java+selenium,40行代码完成支付宝账单爬取
需要jar selenium-server-4.0.0-alpha-5.jar
需要驱动 chromedriver.exe
驱动需要和浏览器版本对应,我用的是chrome版本 81.0.4044.138
chrome驱动地址:http://chromedriver.storage.googleapis.com/index.htmlselenium jar地址:http://selenium-release.storage.googleapis.com/
完整爬取代码(这里我简单写了一下)
package main;import java.util.List;import java.util.Timer;import java.util.TimerTask;import java.util.concurrent.TimeUnit;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.openqa.selenium.By;import org.openqa.selenium.WebDriver;import org.openqa.selenium.WebElement;import org.openqa.selenium.chrome.ChromeDriver;public class LuanchChrome {static WebDriver driver; public static void main(String[] args) { //设置驱动 System.setProperty("webdriver.chrome.driver", "D:workactivemqttlibchromedriver.exe"); //初始化一个chrome浏览器实例,实例名称叫driver driver = new ChromeDriver(); //最大化窗口 driver.manage().window().maximize(); //设置隐性等待时间 driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS); // get()打开一个站点 driver.get("https://www.baidu.com/link?url=HP64htK5pNF11bUxlJjnv_QBMpTb73o9Gf6m-uj9KcidD8_fE-RS80Yn9ScqtW_w&wd=&eqid=8cb4a2a3000f3c4d000000035eb6296c"); //getTitle()获取当前页面title的值 System.out.println("当前打开页面的标题是 "+ driver.getTitle()); //模拟点击// driver.findElement(By.xpath(".//*[@data-status='show_login']")).click();// 获取当前地址 String current_url = driver.getCurrentUrl(); //监测当前窗口切换为支付宝界登陆主页后跳转账单页面 while (!current_url.equals("https://consumeprod.alipay.com/record/standard.htm")) { try {Thread.sleep(10000);} catch (InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();} driver.get("https://consumeprod.alipay.com/record/standard.htm"); current_url = driver.getCurrentUrl();};time2getData();//tm.schedule(ts, 10000, 10000); //关闭并退出浏览器 // driver.quit(); }private static void time2getData() {driver.get("https://consumeprod.alipay.com/record/standard.htm"); List links = driver.findElements(By.xpath("//table/tbody/tr"));// List linkstd = driver.findElements(By.xpath("//table/tbody/tr/td")); for (int i = 0; i < links.size(); i++) { WebElement tr = links.get(i); String icon=tr.findElement(By.tagName("img")).getAttribute("src");String time=repalceLine(tr.findElement(By.className("time")).getText());String name=repalceLine(tr.findElement(By.className("name")).getText());String price = tr.findElements(By.tagName("td")).get(3).getText();//String price=repalceLine(tr.findElement(By.xpath(".//span[@class="amount-pay"]")).getText());String trandno=tr.findElement(By.xpath(".//*/li[@seed="trade-memo"]")).getAttribute("data-bizid");String remark=tr.findElement(By.xpath(".//*[@data-type="memo"]")).getAttribute("data-info");//System.out.println("头像:"+icon);//System.out.println("时间:"+time);//System.out.println("名称:"+name);//System.out.println("金额:"+price);//System.out.println("流水:"+trandno);//System.out.println("备注:"+remark);String str = String.format("头像:%s 名称:%s 时间:%s 金额:%s 流水:%s 备注:%s", icon,name,time,price,trandno,remark); System.out.println(str); }} public static String repalceLine(String myString){String newString=null;Pattern CRLF = Pattern.compile("(|||)");Matcher m = CRLF.matcher(myString);if (m.find()) { newString = m.replaceAll(" ");}return newString;}static Timer tm=new Timer();static TimerTask ts=new TimerTask() {@Overridepublic void run() {time2getData();}}; }
实现效果

需要代码的评论区留言