package body;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.ws.http.HTTPException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
public class MyMain {
static HashMap sendGet(HashMap oldHash) {
HashMap newHash = new HashMap();
String oldLine = "";
try {
for (java.util.Map.Entry mapping : oldHash.entrySet()) {
if (!mapping.getValue()) {
oldLine = mapping.getKey();
Document doc = Jsoup.connect(oldLine).get();
// TODO 頁面資訊查找
Element info;
if ((info = doc.getElementById("info")) != null) {
String title = doc.title();
System.out.println("httpUrl ----" + oldLine);
System.out.println("title ----" +title);
findMovieInfo(info);
} else {
Elements elements = doc.select("[href]");
for (Element element : elements) {
String newUrl = element.attr("href");
if (!oldHash.containsKey(newUrl) && !newHash.containsKey(newUrl))
if (newUrl.startsWith("http")) {
// System.out.println(newUrl);
newHash.put(newUrl, false);
}
}
}
}
oldHash.replace(oldLine, false, true);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (HTTPException eh) {
eh.printStackTrace();
}
if (!newHash.isEmpty()) {
oldHash.putAll(newHash);
oldHash.putAll(sendGet(oldHash));
}
return oldHash;
}
static void myRequest(String httpUrl) {
HashMap oldHash = new HashMap<>();
oldHash.put(httpUrl, false);
oldHash = sendGet(oldHash);
}
public static void findMovieInfo(Element element) {
Elements elements = element.children();
for (Element info : elements) {
if (info.childNodeSize() > 0) {
String key = info.getElementsByAttributeValue("class", "pl").text();
if (!key.isEmpty()) {
if ("導演".equals(key) || "編劇".equals(key) || "主演".equals(key)) {
String value = info.getElementsByAttributeValue("class", "attrs").text();
System.out.println(key + "----" + value);
} else if ("類型:".equals(key)) {
String value = element.getElementsByAttributeValue("property", "v:genre").text();
System.out.println(key + "----" + value);
} else if ("制片國家/地區:".equals(key)) {
Pattern patternCountry = Pattern.compile(".制片國家/地區:.+\n
");
Matcher matcherCountry = patternCountry.matcher(element.html());
if (matcherCountry.find()) {
String value = matcherCountry.group().split("")[1].split("
")[0].trim();
System.out.println(key + "----" + value);
}
} else if ("語言:".equals(key)) {
Pattern patternCountry = Pattern.compile(".語言:.+\n
");
Matcher matcherCountry = patternCountry.matcher(element.html());
if (matcherCountry.find()) {
String value = matcherCountry.group().split("")[1].split("
")[0].trim();
System.out.println(key + "----" + value);
}
} else if ("上映日期:".equals(key)) {
String value = element.getElementsByAttributeValue("property", "v:initialReleaseDate").text();
System.out.println(key + "----" + value);
} else if ("片長:".equals(key)) {
String value = element.getElementsByAttributeValue("property", "v:runtime").text();
System.out.println(key + "----" + value);
} else if ("又名:".equals(key)) {
Pattern patternCountry = Pattern.compile(".又名:.+\n
");
Matcher matcherCountry = patternCountry.matcher(element.html());
if (matcherCountry.find()) {
String value = matcherCountry.group().split("")[1].split("
")[0].trim();
System.out.println(key + "----" + value);
}
}
}
}
}
}
public static void main(String[] args) {
myRequest("https://movie.douban.com");
}
}