天天看點

jsoup抓取借書記錄

1 package tushuguan; 
  2 
  3 import java.io.IOException;  
  4 import java.util.ArrayList;  
  5 import java.util.HashMap;  
  6 import java.util.Iterator;  
  7 import java.util.List;  
  8 import java.util.Set;  
  9   
 10 import org.apache.http.Header;  
 11 import org.apache.http.HeaderElement;  
 12 import org.apache.http.HttpEntity;  
 13 import org.apache.http.HttpResponse;  
 14 import org.apache.http.NameValuePair;  
 15 import org.apache.http.ParseException;  
 16 import org.apache.http.client.ClientProtocolException;  
 17 import org.apache.http.client.entity.UrlEncodedFormEntity;  
 18 import org.apache.http.client.methods.HttpGet;  
 19 import org.apache.http.client.methods.HttpPost;  
 20 import org.apache.http.client.params.ClientPNames;  
 21 import org.apache.http.impl.client.DefaultHttpClient;  
 22 import org.apache.http.message.BasicNameValuePair;  
 23 import org.apache.http.util.EntityUtils;  
 24 import org.jsoup.Jsoup;  
 25 import org.jsoup.nodes.Document;  
 26 import org.jsoup.nodes.Element;  
 27 import org.jsoup.select.Elements;  
 28   
 29 public class tushuguan {  
 30    private static String LoginUrl = "http://222.200.98.171:81/login.aspx";  
 31    private static String Host = "http://222.200.98.171:81";  
 32    private static String mainUrl = "";  
 33    private static String borrowedBooksUrl = "";  
 34    private static String cookie = "";  
 35    private static String location = "";  
 36  
 37    /** 
 38     * @param args 
 39     */  
 40    public static void main(String[] args) {  
 41        // TODO Auto-generated method stub  
 42        getMyBorrowedBooks();  
 43    }  
 44  
 45    public static void getMyBorrowedBooks() {  
 46        try {  
 47            Document document = Jsoup.parse(login());  
 48            Elements elements1 = document  
 49                    .getElementsContainingOwnText("目前借閱情況和續借");// 通過text關鍵字找到所要的<a>标簽  
 50            String url = elements1.first().attr("href");  
 51            for(int i=1;i<=4;i++){
 52            borrowedBooksUrl = "http://222.200.98.171:81/user/bookborrowedhistory.aspx?page="+i;// 取值和mainUrl進行拼湊組織借閱情況位址  
 53            System.out.println("連結如下:"+borrowedBooksUrl);
 54            getBookBorrowedData(getHtml(borrowedBooksUrl));  
 55            }
 56  
 57        } catch (IOException e) {  
 58            // TODO Auto-generated catch block  
 59            e.printStackTrace();  
 60        }  
 61    }  
 62  
 63    /** 
 64     * 擷取借書情況具體資料(List<BookEntity>) 
 65     *  
 66     * @param src 
 67     * @return List<BookEntity> 
 68     */  
 69    private static List<BookEntity> getBookBorrowedData(String src) {  
 70        List<BookEntity> data = new ArrayList<BookEntity>();  
 71        Document document = Jsoup.parse(src);  
 72        Element element = document.select("[id=UserMasterRight]").first()  
 73                .getElementsByTag("table").first();  
 74        Elements elements2 = element.getElementsByTag("tr");  
 75        for (Element temp2 : elements2) {  
 76            Elements elements3 = temp2.getElementsByTag("td");  
 77            BookEntity entity = new tushuguan().new BookEntity()  
 78                    .setIsFullData(elements3.get(4).text())  
 79                    .setData2Return(elements3.get(1).text())  
 80                    .setName(elements3.get(2).text())  
 81                    .setData2Borrowed(elements3.get(0).text());  
 82            data.add(entity);  
 83  
 84        }  
 85        data.remove(0);  
 86        System.out.println("借書情況\n");  
 87  
 88        for (BookEntity temp : data) {  
 89            System.out.println(temp.getName() + "\n" + temp.getData2Borrowed()  
 90                    + "\n" + temp.getData2Return() + "\n"  
 91                    + temp.getIsFullData());  
 92        }  
 93        return data;  
 94  
 95    }  
 96  
 97    /** 
 98     * 圖書館登陸 
 99     *  
100     * @param context 
101     * @return 傳回登陸後的界面Html代碼 
102     * @throws ClientProtocolException 
103     * @throws IOException 
104     */  
105    public static String login() throws ClientProtocolException, IOException {  
106        List<NameValuePair> parmasList = new ArrayList<NameValuePair>();  
107        parmasList = initLoginParmas("3113003802", "092137");  
108        HttpPost post = new HttpPost(LoginUrl);  
109        post.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS, false);  
110        // 阻止自動重定向,目的是擷取第一個ResponseHeader的Cookie和Location  
111        post.setHeader("Content-Type",  
112                "application/x-www-form-urlencoded;charset=gbk");  
113        // 設定編碼為GBK  
114        post.setEntity(new UrlEncodedFormEntity(parmasList, "GBK"));  
115        HttpResponse response = new DefaultHttpClient().execute(post);  
116        cookie = response.getFirstHeader("Set-Cookie").getValue();  
117        // 取得cookie并儲存起來  
118        // System.out.println("cookie= " + cookie);  
119        location = response.getFirstHeader("Location").getValue();  
120        // 重定向位址,目的是連接配接到首頁  
121        mainUrl = Host + location;  
122        // 建構首頁位址  
123        String html = getHtml(mainUrl);  
124        return html;  
125  
126    }  
127  
128    /** 
129     * 擷取網頁HTML源代碼 
130     *  
131     * @param url 
132     * @return  
133     * @throws ParseException 
134     * @throws IOException 
135     */  
136  
137    private static String getHtml(String url) throws ParseException,  
138            IOException {  
139        // TODO Auto-generated method stub  
140        HttpGet get = new HttpGet(url);  
141        if ("" != cookie) {  
142            get.addHeader("Cookie", cookie);  
143        }  
144        HttpResponse httpResponse = new DefaultHttpClient().execute(get);  
145        HttpEntity entity = httpResponse.getEntity();  
146        return EntityUtils.toString(entity);  
147    }  
148  
149    /** 
150     * 初始化參數 
151     *  
152     * @param userName 
153     * @param passWord 
154     * @return  
155     * @throws ParseException 
156     * @throws IOException 
157     */  
158    public static List<NameValuePair> initLoginParmas(String userName,  
159            String passWord) throws ParseException, IOException {  
160        List<NameValuePair> parmasList = new ArrayList<NameValuePair>();  
161        HashMap<String, String> parmasMap = getLoginFormData(LoginUrl);  
162        Set<String> keySet = parmasMap.keySet();  
163  
164        for (String temp : keySet) {  
165            if (temp.contains("Username")) {  
166                parmasMap.put(temp, userName);  
167            } else if (temp.contains("txtPas")) {  
168                parmasMap.put(temp, passWord);  
169            }  
170        }  
171  
172        Set<String> keySet2 = parmasMap.keySet();  
173        System.out.println("表單内容:");  
174        for (String temp : keySet2) {  
175            System.out.println(temp + " = " + parmasMap.get(temp));  
176        }  
177        for (String temp : keySet2) {  
178            parmasList.add(new BasicNameValuePair(temp, parmasMap.get(temp)));  
179        }  
180  
181        // System.out.println("initParams \n" + parmasMap);  
182  
183        return parmasList;  
184  
185    }  
186  
187    /** 
188     * 擷取登入表單input内容 
189     *  
190     * @param url 
191     * @return  
192     * @throws IOException 
193     * @throws ParseException 
194     */  
195    public static HashMap<String, String> getLoginFormData(String url)  
196            throws ParseException, IOException {  
197        Document document = Jsoup.parse(getHtml(url));  
198        Elements element1 = document.getElementsByTag("form");// 找出所有form表單  
199        Element element = element1.select("[method=post]").first();// 篩選出送出方法為post的表單  
200        Elements elements = element.select("input[name]");// 把表單中帶有name屬性的input标簽取出  
201        HashMap<String, String> parmas = new HashMap<String, String>();  
202        for (Element temp : elements) {  
203            parmas.put(temp.attr("name"), temp.attr("value"));// 把所有取出的input,取出其name,放入Map中  
204        }  
205        return parmas;  
206    }  
207  
208    class BookEntity {  
209        /** 
210         * 書名 
211         *  
212         */  
213        private String name;  
214        /** 
215         * 可借數 
216         */  
217        private String leandableNum;  
218        /** 
219         * 索引号 
220         */  
221        private String callNumber;  
222        /** 
223         * 作者 
224         */  
225        private String writer;  
226        /** 
227         * 出版社 
228         */  
229        private String publisher;  
230        /** 
231         * 還書時間 
232         */  
233        private String data2Return;  
234        /** 
235         * 借書時間 
236         */  
237        private String data2Borrowed;  
238        /** 
239         * 是否續滿 
240         */  
241        private String isFullData;  
242  
243        public BookEntity() {  
244  
245        }  
246  
247        public String getName() {  
248            return name;  
249        }  
250  
251        public String getLeandableNum() {  
252            return leandableNum;  
253        }  
254  
255        public String getCallNumber() {  
256            return callNumber;  
257        }  
258  
259        public String getWriter() {  
260            return writer;  
261        }  
262  
263        public String getPublisher() {  
264            return publisher;  
265        }  
266  
267        public BookEntity setName(String name) {  
268            this.name = name;  
269            return this;  
270        }  
271  
272        public BookEntity setLeandableNum(String leandableNum) {  
273            this.leandableNum = leandableNum;  
274            return this;  
275        }  
276  
277        public BookEntity setCallNumber(String callNumber) {  
278            this.callNumber = callNumber;  
279            return this;  
280        }  
281  
282        public BookEntity setWriter(String writer) {  
283            this.writer = writer;  
284            return this;  
285        }  
286  
287        public BookEntity setPublisher(String publisher) {  
288            this.publisher = publisher;  
289            return this;  
290        }  
291  
292        public String getData2Return() {  
293            return data2Return;  
294        }  
295  
296        public String getData2Borrowed() {  
297            return data2Borrowed;  
298        }  
299  
300        public String getIsFullData() {  
301            return isFullData;  
302        }  
303  
304        public BookEntity setData2Return(String data2Return) {  
305            this.data2Return = data2Return;  
306            return this;  
307        }  
308  
309        public BookEntity setData2Borrowed(String data2Borrowed) {  
310            this.data2Borrowed = data2Borrowed;  
311            return this;  
312        }  
313  
314        public BookEntity setIsFullData(String isFullData) {  
315            this.isFullData = isFullData;  
316            return this;  
317        }  
318  
319    }  
320  
321 }        
jsoup抓取借書記錄

結果如下:

表單内容:
__VIEWSTATE = /wEPDwULLTE0MjY3MDAxNzcPZBYCZg9kFgoCAQ8PFgIeCEltYWdlVXJsBRt+XGltYWdlc1xoZWFkZXJvcGFjNGdpZi5naWZkZAICDw8WAh4EVGV4dAUt5bm/5Lic5bel5Lia5aSn5a2m5Zu+5Lmm6aaG5Lmm55uu5qOA57Si57O757ufZGQCAw8PFgIfAQUcMjAxNeW5tDEy5pyIMjHml6UgIOaYn+acn+S4gGRkAgQPZBYEZg9kFgQCAQ8WAh4LXyFJdGVtQ291bnQCCBYSAgEPZBYCZg8VAwtzZWFyY2guYXNweAAM55uu5b2V5qOA57SiZAICD2QWAmYPFQMTcGVyaV9uYXZfY2xhc3MuYXNweAAM5YiG57G75a+86IiqZAIDD2QWAmYPFQMOYm9va19yYW5rLmFzcHgADOivu+S5puaMh+W8lWQCBA9kFgJmDxUDCXhzdGIuYXNweAAM5paw5Lmm6YCa5oqlZAIFD2QWAmYPFQMUcmVhZGVycmVjb21tZW5kLmFzcHgADOivu+iAheiNkOi0rWQCBg9kFgJmDxUDE292ZXJkdWVib29rc19mLmFzcHgADOaPkOmGkuacjeWKoWQCBw9kFgJmDxUDEnVzZXIvdXNlcmluZm8uYXNweAAP5oiR55qE5Zu+5Lmm6aaGZAIID2QWAmYPFQMbaHR0cDovL2xpYnJhcnkuZ2R1dC5lZHUuY24vAA/lm77kuabppobpppbpobVkAgkPZBYCAgEPFgIeB1Zpc2libGVoZAIDDxYCHwJmZAIBD2QWBAIDD2QWBAIBDw9kFgIeDGF1dG9jb21wbGV0ZQUDb2ZmZAIHDw8WAh8BZWRkAgUPZBYGAgEPEGRkFgFmZAIDDxBkZBYBZmQCBQ8PZBYCHwQFA29mZmQCBQ8PFgIfAQWlAUNvcHlyaWdodCAmY29weTsyMDA4LTIwMDkuIFNVTENNSVMgT1BBQyA0LjAxIG9mIFNoZW56aGVuIFVuaXZlcnNpdHkgTGlicmFyeS4gIEFsbCByaWdodHMgcmVzZXJ2ZWQuPGJyIC8+54mI5p2D5omA5pyJ77ya5rex5Zyz5aSn5a2m5Zu+5Lmm6aaGIEUtbWFpbDpzenVsaWJAc3p1LmVkdS5jbmRkZBFPBFe3T/k7AJVSx8iKDmNVbdHT
ctl00$ContentPlaceHolder1$txtPas_Lib = 你猜你猜
ctl00$ContentPlaceHolder1$btnLogin_Lib = 登入
ctl00$ContentPlaceHolder1$txtlogintype = 0
ctl00$ContentPlaceHolder1$txtUsername_Lib = 3113003802
__EVENTVALIDATION = /wEWBQKs47i8AwKOmK5RApX9wcYGAsP9wL8JAqW86pcIDebecgohSzUlmvgecvTU4k49zAw=
連結如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=1
借書情況

回鄉記 [專著]/賀雪峰主編
2015-09-01
2015-10-15
A3226253
土木工程CAD基礎 [專著]:AutoCAD軟體基礎教程=CAD foundation of civil engineering:AutoCAD software basic course/鄧芃主編
2015-07-20
2015-10-15
A3138201
李光耀傳 [專著]/淩翔著
2015-07-20
2015-10-15
A3210306
工程CAD基礎理論與上機操作習題集 [專著]/于奕峰,楊松林主編
2015-07-20
2015-10-15
A3258522
消失的17歲 [專著]/(美) 諾瓦·倫·蘇瑪著=17 & gone/Nova Ren Suma;劉麗潔譯
2015-06-03
2015-09-01
A3213437
湯姆叔叔的小屋 [專著]=Uncle tom's cabin:插圖·中文導讀英文版/(美)比徹·斯托夫人著;王勳,紀飛等編譯
2015-03-27
2015-06-01
A3002490
商務口譯 [專著]=Business interpreting/劉建珠主編
2015-03-27
2015-06-01
A3003500
2014年季度精選集 [彙編]·春季卷/《讀者·鄉土人文版》編輯部主編
2015-03-27
2015-06-01
A3210150
可口可樂不規則營銷 [專著]/(美)洛威爾著;龍文元譯
2015-03-17
2015-06-16
A1501833
工程經濟學 [專著]/關罡, 郝彤主編
2015-03-17
2015-04-29
A3109697
連結如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=2
借書情況

讴歌母愛 關注人生 [專著]:冰心小說全集/冰 心著
2015-03-03
2015-06-01
A5143376
林徽因小說:九十九度中/林徽因[著];陳學勇編選
2015-03-03
2015-06-01
A5188772
駱駝祥子·黑白李 [專著]/老舍著
2015-03-03
2015-06-01
A0957524
1937年的愛情 [專著]/葉兆言著
2014-11-27
2015-01-10
A1509614
理工大風流往事 [專著]/zt著
2014-11-27
2014-12-16
A1847222
酒殇 [專著]:一個酒業王國的興衰/楊小凡著
2014-11-27
2015-01-10
A1948680
那時年少 [專著]/一草著
2014-11-27
2014-12-16
A2992422
不能承受的生命之輕 [專著]/(捷克斯洛伐克)米蘭·昆德拉(Milan Kundera)著=L'insoutenable legerete de l'etre/許鈞譯
2014-11-18
2015-01-10
A0520872
讀者精華本 [彙編]/萬文海主編
2014-11-18
2015-01-10
A1547276
誰在讓子彈飛 [專著]/曹保印著
2014-11-18
2014-12-16
A3147373
連結如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=3
借書情況

孤獨是不人道的 [專著]/郭鵬著
2014-11-18
2014-12-16
A3147367
且聽風吟 [專著]/(日)村上春樹著;林少華譯
2014-09-23
2014-11-10
A2516969
可怕的巧合 [專著]/石岩編著
2014-09-23
2014-11-13
A3158433
你好,總統 [專著]:烏戈·查韋斯與他的委内瑞拉=Comandante:inside Hugo Chavez's venezuela/(英)洛裡·卡洛爾(Rory Carroll)著;徐天鵬譯
2014-09-23
2014-11-13
A3129490
肝膽相照 [專著]:吳孟超傳/方鴻輝著
2014-09-23
2014-11-10
A3139385
林徽因經典作品 [專著]:你是人間的四月天九十九度中/林徽因著
2014-05-19
2014-07-10
A2386519
梁思成的山河歲月 [專著]/林與舟編著
2014-05-19
2014-05-27
A1210449
人物中國 [彙編]/龔莉主編;《人物中國》編委會編
2014-05-19
2014-07-10
A2603584
百年大案追蹤 [專著]/郭學德,崔愛鵬,李海濤著
2014-04-24
2014-06-11
A0283139
聚焦名人名案 [專著]/窦欣平,葉知秋著
2014-04-24
2014-06-11
A0547714
連結如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=4
借書情況

孫子兵法經典故事 [專著]/李濟生編著
2014-04-24
2014-06-17
A0565277
危險遊戲 [彙編]:典型犯罪案例評說/郭春孚,張翔鷹主編
2014-04-24
2014-06-03
A1360621
家庭常用藥物手冊 [專著]/白禾夏主編
2014-03-17
2014-03-25
A0483737
藥用觀賞植物栽培與利用 [專著]/張永清編著
2014-03-17
2014-04-13
A0614935
排毒不如無毒 [專著]:遠離生活中的有毒物質/(美) 黛布拉·林恩·戴德著 ;常媛譯=Toxic free: how to protect your health and home from the chemicals that are making you sick
2014-03-17
2014-03-25
A3116154
新版以案說法 [專著]/曾憲義總主編
2014-02-27
2014-03-17
A1595640
飛去的詩人:徐志摩傳 [專著]/展望之,張方晦著
2014-02-25
2014-03-20
A8152588
高四兇猛 [專著]/耿蕭著
2014-02-25
2014-02-27
A0547642
      

其實我是轉載改了點東西而已:http://my.oschina.net/dfsfsdf/blog/116279?fromerr=jQsroe5A

如果,您認為閱讀這篇部落格讓您有些收獲,不妨拿出手機【微信掃一掃】

jsoup抓取借書記錄

您的資助是我最大的動力!

金額随意,歡迎來賞!