天天看点

Java编程技巧:小爬虫程序

本文介绍Java编程技巧之小爬虫程序的编程方法。

  马萨玛索(http://www.masamaso.com/index.shtml)每天10点都会推出一折商品5件,就是秒购。男装质量还不错,所以就经常去抢,感觉手动太慢了,就写了一个小爬虫程序,让自己去爬,如果是金子页面(免费商品)就会自动打开,我就可以抢到了。和大家分享一下。

  思路:

  1. 把所有想要的商品的链接读到程序中。

  2. 分别打开每一个链接读取源代码

  3. 验证是否是金子商品(源代码中含有free_msg字符串)

  4. 如果是金子就把该链接用IE打开

  源代码:

  读链接文件:

package com.gogler.net;

import java.io.BufferedReader;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStreamReader;

import java.util.LinkedList;

import java.util.List;

publicclassFileReader {

  private String

fileName;

  public FileReader(){}

  public FileReader(StringfileName){

    this.fileName = fileName;

  }

  public List<String>getLines(){

    BufferedReaderreader =

null;

    try{

      reader= newBufferedReader(newInputStreamReader(new

FileInputStream(this.fileName)));

    }catch(FileNotFoundExceptione){

      e.printStackTrace();

    }

    List<String>lines = 

new LinkedList();

    Stringline = null;

      while((line =reader.readLine())!=null){

         lines.add(line);

      }

    }catch(IOException e){

    return lines;

}

Url类:

import java.net.MalformedURLException;

import java.net.URL;

import java.net.URLConnection;

publicclassUrl {

  Stringurl;

  public Url(){}

  public Url(String url){

    this.url = url;

  public String getUrl() {

    returnurl;

  publicvoid setUrl(String url) {

  public URLConnectiongetConnection(){

    URLhttpURL = null;

      httpURL= newURL(this.url);

    }catch(MalformedURLExceptione){

    URLConnectionconn =null;

    if(httpURL !=

null){

      try{

         conn= httpURL.openConnection();

      }catch(IOException e){

         e.printStackTrace();

    return conn;

  public BufferedReadergetBuffer(){

    URLConnectionconn =

this.getConnection();

    BufferedReaderbr = null;

    if(conn ==null){

      returnnull;

    conn.setConnectTimeout(1000*10);

      conn.connect();

      br= newBufferedReader(newInputStreamReader(conn.getInputStream()));

    return br;

  publicboolean isExit(String str){

    BufferedReaderbis = getBuffer();

    boolean exit =

false;

    Stringline =

       while((line =bis.readLine())!=null){

          exit =line.contains(str);

          if(exit){

            break;

          }

       }

    }finally{

       try{

       bis.close();

       }catch(IOException e){

      }     

    return exit;

Digger类:

publicclassDigger

extends Thread{

  private Url

url;

  public Digger(){

    super();

  public Digger(Url url){

  /**

   * @param args

   */

  publicstaticvoid main(String[] args) {

    FileReaderreader = newFileReader("D:/allan/craber.txt");

    List<String>urls = reader.getLines();

    for(String s :urls){

      Urlurl = newUrl(s);

      Digger digger =

new

Digger(url);

      digger.start();

  @Override

  publicvoid run(){

    if(url.isExit("配置文件")){

          Runtime.getRuntime().exec("C:/Program Files/Internet Explorer/iexplore.exe" +

url.getUrl());