Jsoup解析網頁html
解析網頁demo:
利用Jsoup擷取截圖中的資料資訊:

1 <!-- 目前基金檔案\計算\定投\開戶 start -->
2 <div class="wrapper">
3 <div class="wrapper_min">
4 <div class="merchandiseDetail">
5 <div class="fundDetail-header">
6 <div class="fundDetail-tit">
7 <div style="float: left">興全社會責任混合
8 <span>(</span>
9 <span class="ui-num">340007</span></div>)</div>
10 <div class="fundDetail-tools">
11 <a class="jijinba" href="http://guba.eastmoney.com/list,of340007.html">基金吧</a>
12 <!-- 未自選 start -->
13 <a class="addSel" id="addSel" href="javascript:;" target="_self">加自選</a>
14 <!-- 未自選 end -->
15 <a class="addCom" id="addCom" href="javascript:;" target="_self" onclick="common.addCompare()">加對比</a>
16 <a class="addDownApp" href="http://fundact.eastmoney.com/app/">手機版天天基金下載下傳</a></div>
17 </div>
18 <div class="fundDetail-main">
19 <!-- 檔案 start -->
20 <div class="fundInfoItem">
21 <!--開放式基金收益率子產品-->
22 <div class="dataOfFund">
23 <dl class="dataItem01">
24 <dt>
25 <p>
26 <span>
27 <span class="sp01">淨值估算</span></span>
28 <span id="gz_gztime">(17-12-20 15:00)</span>
29 <span class="infoTips">
30 <span class="tipsBubble" style="display: none;">淨值估算每個交易日9:30-15:00盤中實時更新(QDII基金為海外交易時段),是按照基金持倉、指數走勢和基金過往業績估算,估算資料并不代表真實淨值,僅供參考,請以基金管理人披露淨值為準。</span></span>
31 </p>
32 </dt>
33 <dd class="dataNums">
34 <dl class="floatleft">
35 <span class="ui-font-large ui-color-green ui-num" id="gz_gsz">3.7576</span></dl>
36 <dl id="gz_icon" class="gzdown"></dl>
37 <dl class="floatleft fundZdf">
38 <span class="ui-font-middle ui-color-green ui-num" id="gz_gszze">0.0594</span>
39 <span class="ui-font-middle ui-color-green ui-num" id="gz_gszzl">-1.56%</span></dl>
40 </dd>
41 <dd>
42 <span>近1月:</span>
43 <span class="ui-font-middle ui-color-green ui-num">-4.62%</span></dd>
44 <dd>
45 <span>近1年:</span>
46 <span class="ui-font-middle ui-color-red ui-num">44.20%</span></dd>
47 </dl>
48 <span class="dataOfFund-line"></span>
49 <dl class="dataItem02">
50 <dt>
51 <p>
52 <span class="ui-color-blue">
53 <span class="sp01">
54 <a href="http://fund.eastmoney.com/f10/jjjz_340007.html">機關淨值</a></span>(</span>2017-12-19)</p>
55 </dt>
56 <dd class="dataNums">
57 <span class="ui-font-large ui-color-red ui-num">3.8170</span>
58 <span class="ui-font-middle ui-color-red ui-num">1.41%</span></dd>
59 <dd>
60 <span>近3月:</span>
61 <span class="ui-font-middle ui-color-red ui-num">13.47%</span></dd>
62 <dd>
63 <span>近3年:</span>
64 <span class="ui-font-middle ui-color-red ui-num">113.48%</span></dd>
65 </dl>
66 <span class="dataOfFund-line"></span>
67 <dl class="dataItem03">
68 <dt>
69 <p>
70 <span class="ui-color-blue">
71 <span class="sp01">
72 <a href="http://fund.eastmoney.com/f10/jjjz_340007.html">累計淨值</a></span>
73 </span>
74 </p>
75 </dt>
76 <dd class="dataNums">
77 <span class="ui-font-large ui-color-red ui-num">4.0070</span></dd>
78 <dd>
79 <span>近6月:</span>
80 <span class="ui-font-middle ui-color-red ui-num">25.35%</span></dd>
81 <dd>
82 <span>成立來:</span>
83 <span class="ui-font-middle ui-color-red ui-num">332.92%</span></dd>
84 </dl>
85 </div>
86 <div class="infoOfFund">
87 <div class="infoOfFund-line"></div>
88 <table>
89 <tr>
90 <td>基金類型:
91 <a href="http://fund.eastmoney.com/HH_jzzzl.html#os_0;isall_0;ft_;pt_3">混合型</a> | 中高風險</td>
92 <td>
93 <a href="http://fund.eastmoney.com/f10/gmbd_340007.html">基金規模</a>:76.83億元(2017-09-30)</td>
94 <td>基金經理:
95 <a href="http://fund.eastmoney.com/f10/jjjl_340007.html">傅鵬博</a></td>
96 </tr>
97 <tr>
98 <td>
99 <span class="letterSpace01">成 立 日</span>:2008-04-30</td>
100 <td>
101 <span class="letterSpace01">管 理 人</span>:
102 <a href="http://fund.eastmoney.com/company/80036742.html">興全基金</a></td>
103 <td>
104 <a class="floatleft" href="http://fund.eastmoney.com/f10/jjpj_340007.html">基金評級</a>
105 <span class="floatleft">:</span>
106 <div class="jjpj4"></div>
107 </td>
108 </tr>
109 </table>
110 </div>
111 </div>
112 <!-- 檔案 end -->
/**
* Project Name:wlpc
* File Name:XyzqTask.java
* Package Name:com.xyzq.wlpc.task
* Date:2017年12月20日下午1:48:16
* Copyright (c) 2017 All Rights Reserved.
*
*/
import java.io.IOException;;import net.sf.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;/**
* ClassName:XyzqTask
* Function: TODO
* Reason: TODO
* Date: 2017年12月20日 下午1:48:16
* @author lizm
* @since JDK 1.6
*
*/
public class XyzqTask extends BaseTask {
private void getHtml(){
String url = "";
url = Pub.getPropertiesValue("wlpc", "wlpc.web.url");
try {
Document doc = Jsoup.connect(url).get();
//class等于fundDetail-tit的div标簽
Elements fundDetail_tit = doc.select("div.fundDetail-tit");
for (Element element : fundDetail_tit){
//特殊字元'(',使用 \\( 或 [(]
System.out.println("fundDetail_tit>>>>:"+element.text().split("\\(")[0]);
//擷取div下的第一個span的class為ui-num的值
Document elementDoc = Jsoup.parse(element.toString());
Element elm = elementDoc.select("span.ui-num").first();
System.out.println("elm>>>>:"+elm.text());
}
//id等于gz_gztime的span标簽
Elements gz_gztime = doc.select("span#gz_gztime");
for (Element element : gz_gztime){
System.out.println("gz_gztime>>>>:"+element.text().replace("(", "").replace(")", ""));
}
//id等于gz_gsz的span标簽
Elements gz_gsz = doc.select("span#gz_gsz");
for (Element element : gz_gsz){
System.out.println("gz_gsz>>>>:"+element.text());
}
//id等于gz_gszze的span标簽
Elements gz_gszze = doc.select("span#gz_gszze");
for (Element element : gz_gszze){
System.out.println("gz_gszze>>>>:"+element.text());
}
//id等于gz_gszzl的span标簽
Elements gz_gszzl = doc.select("span#gz_gszzl");
for (Element element : gz_gszzl){
System.out.println("gz_gszzl>>>>:"+element.text());
}
//class等于dataItem02的dl标簽
Elements dataItem02 = doc.select("dl.dataItem02");
for (Element element : dataItem02){
Document elementDoc = Jsoup.parse(element.toString());
Element elm1 = elementDoc.getElementsByTag("p").first();
System.out.println("elm>>>>:"+elm1.text().replace("機關淨值 (", "").replace(")", ""));
Element elm_dd = elementDoc.select("dd.dataNums").first();
Document doc_dd = Jsoup.parse(elm_dd.toString());
Element elm_dd_span1 = doc_dd.getElementsByTag("span").first();
System.out.println("elm_dd_span1>>>:"+elm_dd_span1.text());
Element elm_dd_span2 = doc_dd.getElementsByTag("span").last();
System.out.println("elm_dd_span2>>>:"+elm_dd_span2.text());
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) {
XyzqTask client = new XyzqTask();
client.getHtml();
}
}
fundDetail_tit>>>>:興全社會責任混合
elm>>>>:340007
gz_gztime>>>>:17-12-21 15:00
gz_gsz>>>>:3.8583
gz_gszze>>>>:+0.0933
gz_gszzl>>>>:+2.48%
elm>>>>:2017-12-20
elm_dd_span1>>>:3.7650
elm_dd_span2>>>:-1.36%