天天看點

java selenium_java+selenium,40行代碼完成支付寶賬單爬取java+selenium,40行代碼完成支付寶賬單爬取

java+selenium,40行代碼完成支付寶賬單爬取

需要jar selenium-server-4.0.0-alpha-5.jar

需要驅動 chromedriver.exe

驅動需要和浏覽器版本對應,我用的是chrome版本 81.0.4044.138

chrome驅動位址:http://chromedriver.storage.googleapis.com/index.htmlselenium jar位址:http://selenium-release.storage.googleapis.com/

完整爬取代碼(這裡我簡單寫了一下)

package main;import java.util.List;import java.util.Timer;import java.util.TimerTask;import java.util.concurrent.TimeUnit;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.openqa.selenium.By;import org.openqa.selenium.WebDriver;import org.openqa.selenium.WebElement;import org.openqa.selenium.chrome.ChromeDriver;public class LuanchChrome {static WebDriver driver;    public static void main(String[] args) {            //設定驅動        System.setProperty("webdriver.chrome.driver", "D:workactivemqttlibchromedriver.exe");                  //初始化一個chrome浏覽器執行個體,執行個體名稱叫driver          driver = new ChromeDriver();          //最大化視窗          driver.manage().window().maximize();          //設定隐性等待時間          driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);                    // get()打開一個站點          driver.get("https://www.baidu.com/link?url=HP64htK5pNF11bUxlJjnv_QBMpTb73o9Gf6m-uj9KcidD8_fE-RS80Yn9ScqtW_w&wd=&eqid=8cb4a2a3000f3c4d000000035eb6296c");          //getTitle()擷取目前頁面title的值          System.out.println("目前打開頁面的标題是 "+ driver.getTitle());        //模拟點選//        driver.findElement(By.xpath(".//*[@data-status='show_login']")).click();//        擷取目前位址        String current_url = driver.getCurrentUrl();        //監測目前視窗切換為支付寶界登陸首頁後跳轉賬單頁面        while (!current_url.equals("https://consumeprod.alipay.com/record/standard.htm")) {          try {Thread.sleep(10000);} catch (InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();}  driver.get("https://consumeprod.alipay.com/record/standard.htm");  current_url = driver.getCurrentUrl();};time2getData();//tm.schedule(ts, 10000, 10000);            //關閉并退出浏覽器        // driver.quit();                }private static void time2getData() {driver.get("https://consumeprod.alipay.com/record/standard.htm");   List  links = driver.findElements(By.xpath("//table/tbody/tr"));//         List  linkstd = driver.findElements(By.xpath("//table/tbody/tr/td")); for (int i = 0; i < links.size(); i++) { WebElement tr = links.get(i); String icon=tr.findElement(By.tagName("img")).getAttribute("src");String time=repalceLine(tr.findElement(By.className("time")).getText());String name=repalceLine(tr.findElement(By.className("name")).getText());String price = tr.findElements(By.tagName("td")).get(3).getText();//String price=repalceLine(tr.findElement(By.xpath(".//span[@class="amount-pay"]")).getText());String trandno=tr.findElement(By.xpath(".//*/li[@seed="trade-memo"]")).getAttribute("data-bizid");String remark=tr.findElement(By.xpath(".//*[@data-type="memo"]")).getAttribute("data-info");//System.out.println("頭像:"+icon);//System.out.println("時間:"+time);//System.out.println("名稱:"+name);//System.out.println("金額:"+price);//System.out.println("流水:"+trandno);//System.out.println("備注:"+remark);String str = String.format("頭像:%s 名稱:%s 時間:%s 金額:%s 流水:%s 備注:%s", icon,name,time,price,trandno,remark); System.out.println(str);  }}        public static String repalceLine(String myString){String newString=null;Pattern CRLF = Pattern.compile("(|||)");Matcher m = CRLF.matcher(myString);if (m.find()) {  newString = m.replaceAll(" ");}return newString;}static Timer tm=new Timer();static TimerTask ts=new TimerTask() {@Overridepublic void run() {time2getData();}};  }
           

實作效果

java selenium_java+selenium,40行代碼完成支付寶賬單爬取java+selenium,40行代碼完成支付寶賬單爬取

需要代碼的評論區留言