java過濾html标記 java删除html标簽

/**
* 删除input字元串中的html格式
*
* @param input
* @param length
* 顯示的字元的個數
* @return
*/
public static string splitandfilterstring(string input, int length) {
if (input == null || input.trim().equals("")) {
return "";
}
// 去掉所有html元素,
string str = input.replaceall("\\&[a-za-z]{1,10};", "").replaceall(
"<[^>]*>", "");
str = str.replaceall("[(/>)<]", "").trim();
str=systemhwutil.deleteallcrlf(str);//delete all crlf
int len = str.length();
if (length==systemhwutil.negative_one|| len <= length) {
return str;
} else {
str = str.substring(0, length);
str += "......";
return str;
}
/**
* 傳回純文字,去掉html的所有标簽,并且去掉空行
public static string splitandfilterstring(string input) {
str = str.replaceall("[(/>)<]", "");
return systemhwutil.deletecrlf(str);
應用:

papernews.setsplitandfilterstring(systemhwutil.splitandfilterstring(papernews.getcontent(), 120/*todo */));
ring content=papernews.getcontent();
papernews.setcontent(systemhwutil.splitandfilterstring(content, dictionaryparam.getint("news_settings", "content_max")));