天天看點

關于HttpClient 擷取頁面,出現亂碼的問題

HttpClient亂碼,網上有好多方法,在這,僅說下自己擷取頁面,出現亂碼的情情況:

将資料流增加字元集,這樣就可以解決這個問題:

如:“BufferedReader br = new BufferedReader(new InputStreamReader(in,"UTF-8"));”

源碼如下:

private String getPage(String url,String encode){

//建立HttpClient執行個體

HttpClient httpClient=new HttpClient();

//設定編碼參數

if (encode!=null){

httpClient.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET,encode);

}else{

httpClient.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET,"gbk");

}

//忽略Cookies

httpClient.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES);

//建立GetMethod執行個體通路指定URL

GetMethod getMethod = new GetMethod(url);

try{

//通路指定URL并取得傳回狀态碼

int statusCode = httpClient.executeMethod(getMethod);

if (statusCode==200){//傳回成功狀态碼200

//讀取頁面HTML源碼

StringBuffer sb = new StringBuffer();

InputStream in = getMethod.getResponseBodyAsStream();

BufferedReader br = new BufferedReader(new InputStreamReader(in,"UTF-8"));

String line;

while((line=br.readLine())!=null){

sb.append(line);

}

if(br!=null)br.close();

return sb.toString();

}else{

return null;

}

}catch(Exception ex){

ex.printStackTrace();

return null;

}

}