如何轉義html标簽

/**
* 去除html字串中的控制字元及不可視字元
*
* @param str
* html字串
* @return 傳回的字串
*/
public static string escapehtml(string str) {
int length = str.length();
int newlength = length;
boolean somecharacterescaped = false;
for (int i = 0; i < length; i++) {
char c = str.charat(i);
int cint = 0xffff & c;
if (cint < 32)
switch (c) {
case 11:
default:
newlength--;
somecharacterescaped = true;
break;
case '\t':
case '\n':
case '\f':
case '\r':
}
else
case '"':
newlength += 5;
case '&':
case '\'':
newlength += 4;
case '<':
case '>':
newlength += 3;
}
if (!somecharacterescaped)
return str;
stringbuffer sb = new stringbuffer(newlength);
sb.append(c);
sb.append("&quot;");
sb.append("&apos;");
sb.append("&amp;");
sb.append("&lt;");
sb.append("&gt;");
return sb.tostring();
}
測試:

@test
public void test_001(){
string input="<html><input type=\"button\" onlick=\"abc()\" > </html>";
system.out.println(input);
system.out.println(stringutil.escapehtml(input));
運作結果:
(2)java 如何去除html标簽,隻留下文本

* 删除input字元串中的html格式
* @param input
* @param length
* 顯示的字元的個數
* @return
public static string splitandfilterstring(string input, int length) {
if (input == null || input.trim().equals("")) {
return "";
// 去掉所有html元素,
string str = input.replaceall("\\&[a-za-z]{1,10};", "").replaceall(
"<[^>]*>", "");
str = str.replaceall("[(/>)<]", "");
int len = str.length();
if (len <= length) {
} else {
str = str.substring(0, length);
str += "......";
return str;
/**
* 傳回純文字,去掉html的所有标簽,并且去掉空行
public static string splitandfilterstring(string input) {
return systemhwutil.deletecrlf(str);
/***
* delete all spaces
public static string deleteallcrlf(string input) {
return input.replaceall("((\r\n)|\n)[\\s\t ]*", "").replaceall(
"^((\r\n)|\n)", "");
* delete crlf; delete empty line ;delete blank lines
public static string deletecrlf(string input) {
input = systemhwutil.deletecrlfonce(input);
return systemhwutil.deletecrlfonce(input);