最近做個産品要用爬蟲獲得資訊,可是這個網頁要模拟浏覽器輸出,我參考了網上很多setrequestproperty裡面user-agent都寫得mozilla/5.0結果還是不能通路,最後研究了一下寫出來了。
public static string urlpostconnreturnhtml(string ip){
url url = null ;
httpurlconnection conn = null ;
bufferedreader br = null ;
inputstreamreader isr = null ;
inputstream is = null ;
outputstream os = null ;
outputstreamwriter writer ;
int responsecode ;
string readerline = "" ;
stringbuffer htmlcode = new stringbuffer() ;
string htmlcodestr = "" ;
try {
url = new url("http://www.yougetsignal.com/tools/whois-lookup/php/get-whois-lookup-json-data.php") ;
conn= (httpurlconnection)url.openconnection() ;
conn.setconnecttimeout(time_out) ;
conn.setreadtimeout(time_out) ;
//設定可以輸出
conn.setdooutput(true) ;
//設定可以讀取預設是true
conn.setdoinput(true) ;
//設定不适用緩存,因為我們的資料是實時更新的
conn.setusecaches(false) ;
//設定請求方式為post,預設是get
conn.setrequestmethod("post") ;
//設定所有的http連接配接自動處理重定向
httpurlconnection.setfollowredirects(true) ;
//設定本次連接配接自動處理重定向
conn.setinstancefollowredirects(true) ;
//模拟ios 谷歌浏覽器通路
conn.setrequestproperty("user-agent","mozilla/5.0 (macintosh; intel mac os x 10_7_5) applewebkit/537.11 (khtml, like gecko) chrome/23.0.1271.64 safari/537.11") ;
//conn.setrequestproperty("content-type", "application/x-www-form-urlencoded") ;
os = conn.getoutputstream() ;
writer = new outputstreamwriter(os ,"utf-8") ;
writer.write("remoteaddress="+ip) ;
writer.flush() ;
writer.close() ;
responsecode= conn.getresponsecode() ;
//todo:如果得到傳回html代碼,則讀取
if(responsecode == 200){
is = conn.getinputstream() ;
isr = new inputstreamreader(is) ;
br = new bufferedreader(isr) ;
readerline = br.readline() ;
while(readerline != null){
htmlcode.append(readerline) ;
readerline = br.readline() ;
}
}else{
system.out.println(errorresponsestr + responsecode);
htmlcodestr = htmlcode.tostring() ;
} catch (exception e) {
e.printstacktrace() ;
retry_count++;
system.out.println(errornetconnstr + retry_count);
if (retry_count < retry_count_break) {
htmlcodestr = urlpostconnreturnhtml(ip);
system.out.println("網絡連接配接失敗!");
}finally{
if(br!=null){
br.close() ;
if(isr!=null){
isr.close() ;
is.close() ;
if(os!=null){
os.close() ;
} catch (ioexception e) {
// todo auto-generated catch block
system.out.println("io流關閉錯誤--->");
e.printstacktrace();
if(conn!=null){
conn.disconnect() ;
system.out.println(htmlcodestr);
return htmlcodestr ;