天天看點

java.net.URLConnection類_java模拟浏覽器

最近做個産品要用爬蟲獲得資訊,可是這個網頁要模拟浏覽器輸出,我參考了網上很多setrequestproperty裡面user-agent都寫得mozilla/5.0結果還是不能通路,最後研究了一下寫出來了。

public static string urlpostconnreturnhtml(string ip){

url url = null ;

httpurlconnection conn = null ;

bufferedreader br = null ;

inputstreamreader isr = null ;

inputstream is = null ;

outputstream os = null ;

outputstreamwriter writer ;

int responsecode ;

string readerline = "" ;

stringbuffer htmlcode = new stringbuffer() ;

string htmlcodestr = "" ;

try {

url = new url("http://www.yougetsignal.com/tools/whois-lookup/php/get-whois-lookup-json-data.php") ;

conn= (httpurlconnection)url.openconnection() ;

conn.setconnecttimeout(time_out) ;

conn.setreadtimeout(time_out) ;

//設定可以輸出

conn.setdooutput(true) ;

//設定可以讀取預設是true

conn.setdoinput(true) ;

//設定不适用緩存,因為我們的資料是實時更新的

conn.setusecaches(false) ;

//設定請求方式為post,預設是get

conn.setrequestmethod("post") ;

//設定所有的http連接配接自動處理重定向

httpurlconnection.setfollowredirects(true) ;

//設定本次連接配接自動處理重定向

conn.setinstancefollowredirects(true) ;

//模拟ios 谷歌浏覽器通路

conn.setrequestproperty("user-agent","mozilla/5.0 (macintosh; intel mac os x 10_7_5) applewebkit/537.11 (khtml, like gecko) chrome/23.0.1271.64 safari/537.11") ;

//conn.setrequestproperty("content-type", "application/x-www-form-urlencoded") ;

os = conn.getoutputstream() ;

writer = new outputstreamwriter(os ,"utf-8") ;

writer.write("remoteaddress="+ip) ;

writer.flush() ;

writer.close() ;

responsecode= conn.getresponsecode() ;

//todo:如果得到傳回html代碼,則讀取

if(responsecode == 200){

is = conn.getinputstream() ;

isr = new inputstreamreader(is) ;

br = new bufferedreader(isr) ;

readerline = br.readline() ; 

while(readerline != null){

htmlcode.append(readerline) ;

readerline = br.readline() ;

}

}else{

system.out.println(errorresponsestr + responsecode);

htmlcodestr = htmlcode.tostring() ;

} catch (exception e) {

e.printstacktrace() ;

retry_count++;

system.out.println(errornetconnstr + retry_count);

if (retry_count < retry_count_break) {

htmlcodestr = urlpostconnreturnhtml(ip);

system.out.println("網絡連接配接失敗!");

}finally{

if(br!=null){

br.close() ;

if(isr!=null){

isr.close() ;

is.close() ;

if(os!=null){

os.close() ;

} catch (ioexception e) {

// todo auto-generated catch block

system.out.println("io流關閉錯誤--->");

e.printstacktrace();

if(conn!=null){

conn.disconnect() ;

system.out.println(htmlcodestr);

return htmlcodestr ;