天天看点

java selenium_java+selenium,40行代码完成支付宝账单爬取java+selenium,40行代码完成支付宝账单爬取

java+selenium,40行代码完成支付宝账单爬取

需要jar selenium-server-4.0.0-alpha-5.jar

需要驱动 chromedriver.exe

驱动需要和浏览器版本对应,我用的是chrome版本 81.0.4044.138

chrome驱动地址:http://chromedriver.storage.googleapis.com/index.htmlselenium jar地址:http://selenium-release.storage.googleapis.com/

完整爬取代码(这里我简单写了一下)

package main;import java.util.List;import java.util.Timer;import java.util.TimerTask;import java.util.concurrent.TimeUnit;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.openqa.selenium.By;import org.openqa.selenium.WebDriver;import org.openqa.selenium.WebElement;import org.openqa.selenium.chrome.ChromeDriver;public class LuanchChrome {static WebDriver driver;    public static void main(String[] args) {            //设置驱动        System.setProperty("webdriver.chrome.driver", "D:workactivemqttlibchromedriver.exe");                  //初始化一个chrome浏览器实例,实例名称叫driver          driver = new ChromeDriver();          //最大化窗口          driver.manage().window().maximize();          //设置隐性等待时间          driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);                    // get()打开一个站点          driver.get("https://www.baidu.com/link?url=HP64htK5pNF11bUxlJjnv_QBMpTb73o9Gf6m-uj9KcidD8_fE-RS80Yn9ScqtW_w&wd=&eqid=8cb4a2a3000f3c4d000000035eb6296c");          //getTitle()获取当前页面title的值          System.out.println("当前打开页面的标题是 "+ driver.getTitle());        //模拟点击//        driver.findElement(By.xpath(".//*[@data-status='show_login']")).click();//        获取当前地址        String current_url = driver.getCurrentUrl();        //监测当前窗口切换为支付宝界登陆主页后跳转账单页面        while (!current_url.equals("https://consumeprod.alipay.com/record/standard.htm")) {          try {Thread.sleep(10000);} catch (InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();}  driver.get("https://consumeprod.alipay.com/record/standard.htm");  current_url = driver.getCurrentUrl();};time2getData();//tm.schedule(ts, 10000, 10000);            //关闭并退出浏览器        // driver.quit();                }private static void time2getData() {driver.get("https://consumeprod.alipay.com/record/standard.htm");   List  links = driver.findElements(By.xpath("//table/tbody/tr"));//         List  linkstd = driver.findElements(By.xpath("//table/tbody/tr/td")); for (int i = 0; i < links.size(); i++) { WebElement tr = links.get(i); String icon=tr.findElement(By.tagName("img")).getAttribute("src");String time=repalceLine(tr.findElement(By.className("time")).getText());String name=repalceLine(tr.findElement(By.className("name")).getText());String price = tr.findElements(By.tagName("td")).get(3).getText();//String price=repalceLine(tr.findElement(By.xpath(".//span[@class="amount-pay"]")).getText());String trandno=tr.findElement(By.xpath(".//*/li[@seed="trade-memo"]")).getAttribute("data-bizid");String remark=tr.findElement(By.xpath(".//*[@data-type="memo"]")).getAttribute("data-info");//System.out.println("头像:"+icon);//System.out.println("时间:"+time);//System.out.println("名称:"+name);//System.out.println("金额:"+price);//System.out.println("流水:"+trandno);//System.out.println("备注:"+remark);String str = String.format("头像:%s 名称:%s 时间:%s 金额:%s 流水:%s 备注:%s", icon,name,time,price,trandno,remark); System.out.println(str);  }}        public static String repalceLine(String myString){String newString=null;Pattern CRLF = Pattern.compile("(|||)");Matcher m = CRLF.matcher(myString);if (m.find()) {  newString = m.replaceAll(" ");}return newString;}static Timer tm=new Timer();static TimerTask ts=new TimerTask() {@Overridepublic void run() {time2getData();}};  }
           

实现效果

java selenium_java+selenium,40行代码完成支付宝账单爬取java+selenium,40行代码完成支付宝账单爬取

需要代码的评论区留言