Â
public static String getContent(String url) throws Exception{
HttpClient hc = new HttpClient();
HttpMethod hm = new GetMethod(url);
int statusCode = -1;
byte[] result = null;
statusCode = hc.executeMethod(hm);
if(statusCode != HttpStatus.SC_OK)//夿è¿å
return "";
if(hm.getResponseBody()!=null){//è·å页颿°æ®
result = hm.getResponseBody();//hm.getStatusLine()ââhttpç¶æå请æ±ç»æ
}
String charset = JsoupUtils.getCharset(url); //éè¿jsoupè·å¾é¡µé¢çcharset
hm.releaseConnection();
String data = null;
if(result != null)
data = new String(result,charset);//å符ç¼ç 设置
return data;
}
[代ç ] è·å¾å符é
/**
* è·å¾å符é
*/
public static String getCharset (String siteurl) throws Exception{
URL url = new URL(siteurl);
Document doc = Jsoup.parse(url, 6*1000);
Elements eles = doc.select("meta[http-equiv=Content-Type]");
Iterator<Element> itor = eles.iterator();
while (itor.hasNext())
return RegularUtils.matchCharset(itor.next().toString());
return "gb2312";
}
[代ç ] ä½¿ç¨æ£å表达å¼è·å¾é¡µé¢å符
/**
* è·å¾é¡µé¢å符
*/
public static String matchCharset(String content) {
String chs = "gb2312";
p = Pattern.compile("(?<=charset=)(.+)(?=\")");
Matcher m = p.matcher(content);
if (m.find())
return m.group();
return chs;
}
Â