1 package com.mock.utils;
2
3 import java.io.IOException;
4 import java.net.MalformedURLException;
5 import java.util.ArrayList;
6 import java.util.List;
7
8 import org.jsoup.Jsoup;
9 import org.jsoup.nodes.Document;
10 import org.jsoup.nodes.Element;
11 import org.jsoup.select.Elements;
12
13 import com.gargoylesoftware.htmlunit.BrowserVersion;
14 import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
15 import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
16 import com.gargoylesoftware.htmlunit.WebClient;
17 import com.gargoylesoftware.htmlunit.WebClientOptions;
18 import com.gargoylesoftware.htmlunit.html.HtmlPage;
19 import com.justsy.army.mgt.mock.model.City;
20
21 public class NationalBureauOfStatics {
22 private static final String ADDRESS = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/";
23 private static final String fix = ".html";
24
25 public static void main(String[] args) {
26 List<City> provinceList = new ArrayList<>();
27 List<City> cityList = new ArrayList<>();
28 List<City> countyList = new ArrayList<>();
29 List<City> townList = new ArrayList<>();
30 provinceList = getTVMall(provinceList, new City(), ADDRESS, 0);
31 for (City city : provinceList) {
32 cityList = getTVMall(cityList, city, city.getHtmlAddr(), 1);
33 }
34 for (City city : cityList) {
35 countyList = getTVMall(countyList, city, city.getHtmlAddr(), 2);
36 }
37 for (City city : countyList) {
38 townList = getTVMall(townList, city, city.getHtmlAddr(), 3);
39 }
40
41 for (City city : townList) {
42 System.out.println(city.toString());
43 }
44 }
45
46 public static List<City> getTVMall(List<City> list, City city, String address, int type) {
47 WebClient webClient = new WebClient(BrowserVersion.CHROME);
48 // webclient參數載體
49 WebClientOptions clientOptions = webClient.getOptions();
50 // 設定webClient的相關參數
51 clientOptions.setJavaScriptEnabled(true);
52 clientOptions.setCssEnabled(false);
53 webClient.setAjaxController(new NicelyResynchronizingAjaxController());
54 clientOptions.setTimeout(35000);
55 clientOptions.setThrowExceptionOnScriptError(false);
56 try {
57 HtmlPage htmlPage = webClient.getPage(address);
58 Document dom = Jsoup.parse(htmlPage.asXml());
59 Elements ele = null;
60 if (type == 0) {
61 ele = dom.getElementsByClass("provincetable");
62 } else if (type == 1) {
63 ele = dom.getElementsByClass("citytable");
64 } else if (type == 2) {
65 ele = dom.getElementsByClass("countytable");
66 } else if (type == 3) {
67 ele = dom.getElementsByClass("towntable");
68 }
69 dom = Jsoup.parse(ele.toString());
70 ele = dom.getElementsByTag("tr");
71 if (ele != null) {
72 getList(list, ele, city, type);
73 }
74 } catch (FailingHttpStatusCodeException e) {
75 e.printStackTrace();
76 } catch (MalformedURLException e) {
77 e.printStackTrace();
78 } catch (IOException e) {
79 e.printStackTrace();
80 }
81 return list;
82 }
83
84 private static List<City> getList(List<City> list, Elements ele, City city, int type) {
85 if (type == 0) {
86 for (int i = 3; i < ele.size(); i++) {
87 Element item = ele.get(i);
88 Elements aElements = item.getElementsByTag("a");
89 for (int j = 0; j < aElements.size(); j++) {
90 City c = new City();
91 String html = aElements.get(j).attr("href");
92 String name = aElements.get(j).text();
93 c.setProvince(name);
94 c.setHtmlAddr(ADDRESS + html);
95 c.setCode(html.replace(fix, "0000000000"));
96 list.add(c);
97 }
98 }
99 return list;
100 }
101 for (int i = 0; i < ele.size(); i++) {
102 Element item = ele.get(i);
103 Elements aElements = item.getElementsByTag("a");
104 if (aElements.size() > 0) {
105 City c = new City();
106 String html = aElements.get(0).attr("href");
107 String code = aElements.get(0).text();
108 String name = aElements.get(1).text();
109 if (type == 1) {
110 c.setProvince(city.getProvince());
111 c.setCity(name);
112 } else if (type == 2) {
113 c.setProvince(city.getProvince());
114 c.setCity(city.getCity());
115 c.setCounty(name);
116 } else if (type == 3) {
117 c.setProvince(city.getProvince());
118 c.setCity(city.getCity());
119 c.setCounty(city.getCounty());
120 c.setTown(name);
121 }
122 c.setCode(code);
123 String provinceCode = city.getCode().substring(0, 2);
124 if (!html.startsWith(provinceCode + "/")) {
125 html = provinceCode + "/" + html;
126 }
127 c.setHtmlAddr(ADDRESS + html);
128 list.add(c);
129 System.out.println(c.toString());
130 }
131 }
132 return list;
133 }
134 }