在
获取网页源代码(←获取html网页源代码请看这儿)的前提下,根据网页源代码获取指定内容
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GetTheSpecifyHtmlTagCode {
public static List match(String source, String element, String byAttr) {
List result = new ArrayList();
String reg = String.format("]*?\\s?%s=['\"](.*?)['\"]\\s?.*?>(.*?)%s>", element, byAttr, element);
Matcher m = Pattern.compile(reg).matcher(source);
while (m.find()) {
String elementResult = m.group(0);
String attrResult = m.group(1);
result.add(elementResult + VS
+ attrResult);
}
return result;
}
public static String getAttrValueByAttr(String source, String element, String byAttr) {
String result = "";
String reg = String.format("]*?\\s?%s=['\"](.*?)['\"]\\s?.*?>(.*?)%s>", element, byAttr, element);
Matcher m = Pattern.compile(reg).matcher(source);
while (m.find()) {
result = m.group(1);
}
return result;
}
public static String getAttrValue(String htmlCode, String element, String byAttr1, String attrValue, String byAttr2) {
List list = match(htmlCode, element, byAttr1);
String tempr = "";
for (int i = 0; i < list.size(); i++) {
String tempResult = list.get(i);
String[] temp = tempResult
.split(VS);
if (temp[1].equals(attrValue)) {
tempr = getAttrValueByAttr(htmlCode, element, byAttr2);
}
}
return tempr;
}
public static String getElementById(String htmlCode, String element, String byAttr, String attrValue) {
List list = match(htmlCode, element, byAttr);
String tempr = "";
for (int i = 0; i < list.size(); i++) {
String tempResult = list.get(i);
String[] temp = tempResult
.split(VS);
// System.out.println("--------> " + (i + 1) + ". " + list.get(i));
if (temp[1].equals(attrValue)) {
// System.out.println("内部打印 " + temp[0]);
tempr = temp[0];
}
}
return tempr;
}
public static String getElementValueByAttr(String htmlCode, String element, String byAttr, String attrValue) {
String elementCode = getElementById(htmlCode, element, byAttr, attrValue);
String regTagStart = String.format("]*?\\s?%s=['\"](.*?)['\"]\\s?.*?>", element,byAttr);
String tagStart="";
Matcher m = Pattern.compile(regTagStart).matcher(elementCode);
while (m.find()) {
tagStart = m.group(0);
}
String regTagEnd = String.format("%s>", element);
String result = elementCode.replace(tagStart, "").replace(regTagEnd, "");
return result;
}
public static String getElementValueByElementCode(String elementCode, String element,String byAttr) {
String regTagStart = String.format("]*?\\s?%s=['\"](.*?)['\"]\\s?.*?>", element,byAttr);
String tagStart="";
Matcher m = Pattern.compile(regTagStart).matcher(elementCode);
while (m.find()) {
tagStart = m.group(0);
}
String regTagEnd = String.format("%s>", element);
String result = elementCode.replace(tagStart, "").replace(regTagEnd, "");
return result;
}
private static String VS = "--------";//split标记自定义
}