天天看點

Maven項目初開發(二)定時任務的開發

前兩章記錄了Maven項目開發過程的配置,下面就是簡單的開發過程,這裡是一個定時任務的開發,初衷是能夠定時根據儲存的URL去網上搜尋資源,但沒那麼多時間去搞,算是個半成品吧,算是記錄一下開發的過程。

Web.xml

接上面的配置,在web.xml中配置一個listener:

<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.4"
         xmlns="http://java.sun.com/xml/ns/j2ee"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd">

  <!-- Servlet Filters ================================================ -->

  <!--
    - Declare a filter for multipart MIME handling
    -->
   <context-param>
		<param-name>contextConfigLocation</param-name>
		<param-value>classpath:/applicationContext.xml</param-value>
	</context-param>
  <listener>
    <listener-class>org.springframework.web.context.ContextLoaderListener</listener-class>
  </listener>

  <listener>
    <listener-class>org.springframework.web.context.request.RequestContextListener</listener-class>
  </listener>
  <listener>
	<listener-class>
		com.cff.study.TaskContextListener
	</listener-class>
  </listener>
    
 <filter>
		<filter-name>encodingFilter</filter-name>
		<filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class>
		<init-param>
			<param-name>encoding</param-name>
			<param-value>UTF-8</param-value>
		</init-param>
	</filter>
	<filter-mapping>
		<filter-name>encodingFilter</filter-name>
		<url-pattern>/*</url-pattern>
	</filter-mapping>
  
 	<welcome-file-list>
		<welcome-file>index.jsp</welcome-file>
	</welcome-file-list>
</web-app>
        
           

利用listener作為程式的入口。

package com.cff.study;

import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;

public class TaskContextListener implements ServletContextListener{
	private Context context = null;
	public void contextDestroyed(ServletContextEvent arg0) {
		context.close();
	}

	public void contextInitialized(ServletContextEvent arg0) {
//		System.out.println("我是最偉大的人!!");
		context = Context.getInstance();
		try {
			context.init();
		} catch (ClassNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (InstantiationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IllegalAccessException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
}
           

在Context中,定義多個線程,每個線程啟動一個定時任務,這個小項目我隻添加了一個項目,利用百度搜尋的方法去網上搜尋資源,并不完整,這裡隻稍做介紹。

package com.cff.study;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import java.util.TimerTask;

import org.apache.commons.logging.*;

import com.cff.study.util.FileUtil;
import com.cff.study.util.Logger;


public class Context {
	private static Context instance = null;
	private static Log log = LogFactory.getLog(Context.class);
	Properties classConfig = new Properties();
	synchronized public static Context getInstance() {
		if(null == instance){
			log.info("執行個體不存在!正在建立... ");
			instance = new Context();
			return instance;
		}
		else
			return Context.getInstance();
	}
	
	public void init() throws ClassNotFoundException, InstantiationException, IllegalAccessException{
		log.info("應用上下文初始化中...");
		FileUtil.loadConfig(classConfig,"/class-config.properties");
		int classNum = classConfig.size();
		for(int i=0;i<classNum;i++){
			TaskThread taskThread = new TaskThread();
			Logger.info(classConfig.getProperty("class"+i));
			Class<?> tempTask = Class.forName("com.cff.study."+classConfig.getProperty("class"+i));
			TimerTask currentTask = (TimerTask)tempTask.newInstance();
			taskThread.setCurrentTask(currentTask);
			taskThread.setTimeDelay(5+i);
			taskThread.start();
			Logger.info(""+i+"個線程啟動中...");
		}
	}
	
	public void close(){
		log.info("應用上下文正在登出...");
	}

}
           

任務線程的啟動

package com.cff.study;

import java.util.Calendar;
import java.util.Timer;
import java.util.TimerTask;

import com.cff.study.util.Logger;

public class TaskThread extends Thread {
	private static Timer importDataTimer = null;
	TimerTask currentTask = null;
	int timeDelay = 5;
	public int getTimeDelay() {
		return timeDelay;
	}

	public void setTimeDelay(int timeDelay) {
		this.timeDelay = timeDelay;
	}

	public TimerTask getCurrentTask() {
		return currentTask;
	}

	public void setCurrentTask(TimerTask currentTask) {
		this.currentTask = currentTask;
	}

	public TaskThread() {
		super();
	}
	
	public void run(){
		String importMins = "30";
		
		if(importMins!=null&&!importMins.trim().equals("")&&!importMins.trim().equals("0")){
			importDataTimer = new Timer();
	    	Calendar date = Calendar.getInstance();
	    	int sec = 60 * Integer.parseInt(importMins);
	    	date.add(Calendar.SECOND, timeDelay);
	    	importDataTimer.scheduleAtFixedRate(currentTask, date.getTime(),(long)(sec* 1000));
	    	Logger.info("定時任務"+timeDelay+"秒鐘後啟動,掃描時間間隔為"+importMins+"分鐘");
		}
	}
}
           

百度搜尋任務

package com.cff.study;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TimerTask;

import com.cff.study.search.ContentFromUrl;
import com.cff.study.search.JSoupBaiduSearcher;
import com.cff.study.search.SearchResult;
import com.cff.study.search.Searcher;
import com.cff.study.search.UrlOpen;
import com.cff.study.search.Webpage;
import com.cff.study.util.DateUtil;
import com.cff.study.util.Logger;
import com.cff.study.util.FileUtil;

public class BaiDuSearchTask extends TimerTask {
	private  int UrlLevel = 5; //Url最大層數 
	private  int UrlStep = 10; //Url每層最大數量
	private  int ExecuteUrlNum=100;//一次定時任務最大處理量
	private  int MaxUrls = 100000;
	Searcher searcher = null;
	SearchResult searchResult = null;
	List<Webpage> webpages = null;
	String today = "";
	List<String> keywords = new ArrayList<String>();
	List<Integer> CurrentLocations = new ArrayList<Integer>();
	int CurrentUrlIndex = 1;
	private Properties contextConfig = new Properties();
	private Properties urlCollectionFile = new Properties();
	private Properties contentCollectionFile = new Properties();
	String contextConfigFileName = "F:/Work_Study/TimerTask/context-config.properties";
	String urlCollectionFileName = "F:/Work_Study/TimerTask/url-collect.properties";
	String contentCollectionFileName = "F:/Work_Study/TimerTask/content-collect.properties";
	
	public BaiDuSearchTask() {

	}

	@Override
	public void run() {
		today = DateUtil.format(DateUtil.now(), "yyyyMMdd");
		Logger.info("今天是" + today.substring(0, 4) + "年" + today.substring(4, 6)
				+ "月" + today.substring(6, 8) + "日,我啟動了一個線程,叫百度搜尋,嘿嘿,厲害吧?");
		Logger.info("搜尋正在啟動,請不要關閉電腦!");
		String searchDest = "陳付菲";
		keywords.add("偉大的人");
		keywords.add("華中科技大學");
		FileUtil.loadAbsoluteConfig(contextConfig, contextConfigFileName);
		FileUtil.loadAbsoluteConfig(urlCollectionFile, urlCollectionFileName);
		FileUtil.loadAbsoluteConfig(contentCollectionFile, contentCollectionFileName);
		doSearch(searchDest);
	}

	private void doSearch(String searchDest) {
		int endRunning = Integer.parseInt(contextConfig.getProperty(
				"endRunning").trim());
		if (1 == endRunning)
			return;
		int SearchMethod = Integer.parseInt(contextConfig.getProperty(
				"SearchMethod").trim());
		//最大層數
		UrlLevel = Integer.parseInt(contextConfig.getProperty("MaxUrlLevel").trim()); 			
		//最大Url數量
		MaxUrls = Integer.parseInt(contextConfig.getProperty("MaxUrls").trim());
		//最大步長
		UrlStep = Integer.parseInt(contextConfig.getProperty("MaxUrlStep").trim());
		if (1 == SearchMethod){
			FileUtil.clearInfoForFile(urlCollectionFileName);
			doSearchMethod(searchDest);
			FileUtil.writeAbsoluteProperties(contextConfigFileName,
					"SearchMethod", String.valueOf(0));
		}	
		else {
			int size = urlCollectionFile.size();
			if (MaxUrls < size) {
				int pageNo = Integer.parseInt(contextConfig.getProperty(
						"pageNo").trim());
				FileUtil.writeAbsoluteProperties(contextConfigFileName,
						"pageNo", String.valueOf(pageNo + 1));
				return;
			}
			readCurrentLocation(CurrentLocations);
			int DsetUrlLevel = (int) (UrlLevel-1- Math.log(ExecuteUrlNum) / Math.log(UrlStep));; //遞歸開始的Url層數
			int CurrentUrlLevel = Integer.parseInt(contextConfig.getProperty("CurrentUrlLevel").trim());//目前處理層
			Logger.info("CurrentLocations size:"+CurrentLocations.size());
			//比較如果DsetUrlLevel和CurrentUrlLevel,若相同則從DsetUrlLevel開始處理,不同則從CurrentUrlLevel開始處理
			if(DsetUrlLevel==CurrentUrlLevel){
				Logger.info("CurrentUrlLevel"+CurrentUrlLevel);
				
				Logger.info("DsetUrlLevel:"+DsetUrlLevel);
				int CurrentUrlNo=CurrentLocations.get(CurrentUrlLevel);
				doUrlOpenMethod(DsetUrlLevel,CurrentUrlNo);
				CurrentLocations.set(CurrentUrlLevel, CurrentUrlNo+1);
				writeCurrentLocations(DsetUrlLevel,CurrentLocations);
			}
			else{
				Logger.info("CurrentUrlLevel else"+CurrentUrlLevel);
				int CurrentUrlNo=CurrentLocations.get(CurrentUrlLevel);
				int result = doUrlOpenToDsetMethod(CurrentUrlNo,CurrentUrlLevel,DsetUrlLevel);
				if(-1==result){
					FileUtil.writeAbsoluteProperties(contextConfigFileName,
							"endRunning", String.valueOf(1));
				}
				else if(DsetUrlLevel==result){
					for(int i=CurrentUrlLevel+1;i<DsetUrlLevel;i++)
						CurrentLocations.set(i, 0);
					doUrlOpenMethod(DsetUrlLevel,0);
					CurrentLocations.set(DsetUrlLevel, 1);
				}
				else if(CurrentUrlLevel==result){
					for(int i=CurrentUrlLevel+1;i<DsetUrlLevel;i++)
						CurrentLocations.set(i, -1);
					CurrentLocations.set(CurrentUrlLevel, CurrentUrlNo+1);
				}
				else{
					if(CurrentUrlLevel!=result){
						for(int k=CurrentUrlLevel+1;k<result;k++ ){
							CurrentLocations.set(k,0);							
						}
						CurrentLocations.set(result, 1);
						for(int i=result+1;i<DsetUrlLevel;i++)
							CurrentLocations.set(i, -1);
					}
				}
				writeCurrentLocations(DsetUrlLevel,CurrentLocations);
			}
		}
	}
	
	/**
	 * 寫入目前處理位置
	 * @param dsetUrlLevel	目标處理層
	 * @param currentLocations	位置數組
	 */
	private void writeCurrentLocations(int dsetUrlLevel, List<Integer> currentLocations) {
		int size = currentLocations.size()-1;
		int CurrentUrlLevel = dsetUrlLevel;
		for(int i=size;i>0;i--){
			if(currentLocations.get(i)>9){
				currentLocations.set(i, -1);
				int LevelUpNo = currentLocations.get(i-1);
				currentLocations.set(i-1, LevelUpNo+1);
				CurrentUrlLevel = i-1;
			}
		}
		if(currentLocations.get(0)>9){
			FileUtil.writeAbsoluteProperties(contextConfigFileName,
					"endRunning", String.valueOf(1));
			return;
		}
		
		StringBuffer location = new StringBuffer();
		location.append(currentLocations.get(0));
		for(int i=1;i<=size;i++){
			location.append("#");
			location.append(currentLocations.get(i));
		}
		FileUtil.writeAbsoluteProperties(contextConfigFileName,
				"CurrentLocation", location.toString());
		FileUtil.writeAbsoluteProperties(contextConfigFileName,
				"CurrentUrlLevel", String.valueOf(CurrentUrlLevel));
	}
	
	/**
	 * 讀取目前處理位置
	 * @param currentLocations	位置數組
	 */
	public static void main(String[] args){
		//readCurrentLocation(CurrentLocations);
	}
	private  void readCurrentLocation(List<Integer> currentLocations) {
		//FileUtil.loadAbsoluteConfig(contextConfig, contextConfigFileName);
		String location = contextConfig.getProperty("CurrentLocation").trim();
		String[] locations = location.split("#", -1);
		Logger.info(locations.length);
		for(int i=0;i<locations.length;i++){
			currentLocations.add(Integer.parseInt(locations[i].trim()));
		}
	}

	/**
	 * 到指定Url層數的解析過程
	 * @param CurrentUrlNo 目前url維數位置
	 * @param tempUrlNo	目前url層數位置
	 * @param CurrentUrlLevel	目前層
	 */
	private int doUrlOpenToDsetMethod(int CurrentUrlNo, int CurrentUrlLevel, int dsetUrlLevel) {
		if(CurrentUrlNo>UrlStep||CurrentUrlLevel>UrlLevel){
			return -1;
		}
		Logger.info("目前處理層:"+CurrentUrlLevel);
		Logger.info("目前第"+CurrentUrlNo+"個Url");
		Logger.info("Url辨別為:"+"Url"+(CurrentUrlNo+CurrentUrlLevel*UrlStep));
		String url = urlCollectionFile.getProperty("Url"+(CurrentUrlNo+CurrentUrlLevel*UrlStep));
		Logger.info("目前Url為:"+url);
		if(!CheckUrlIllegalOrNot(url)){
			Logger.info("目前Url不合法!");
			return CurrentUrlLevel;
		}
		Logger.info("目前Url合法,可以繼續執行! ");
		doUrlOpenMethodDetail(url,0,CurrentUrlLevel);
		CurrentUrlLevel++;
		while(CurrentUrlLevel!=dsetUrlLevel){
			url = urlCollectionFile.getProperty("Url"+CurrentUrlLevel*UrlStep);
			Logger.info("目前處理層:"+CurrentUrlLevel);
			Logger.info("目前第"+0+"個Url");
			Logger.info("Url辨別為:"+"Url"+CurrentUrlLevel*UrlStep);
			Logger.info("目前Url為:"+url);
			if(!CheckUrlIllegalOrNot(url)){
				Logger.info("目前Url不合法!");
				return CurrentUrlLevel+1;
			}
			Logger.info("目前Url合法,可以繼續執行! ");
			doUrlOpenMethodDetail(url,0,CurrentUrlLevel);
			CurrentUrlLevel++;
		}
		return dsetUrlLevel;
	}

	/**
	 * 打開url連結的方法
	 * @param CurrentUrlLevel	
	 * @param CurrentUrlNo
	 */
	private void doUrlOpenMethod(int CurrentUrlLevel,int CurrentUrlNo) {
		if(CurrentUrlNo>UrlStep||CurrentUrlLevel>UrlLevel){
			return;
		}
		String url = urlCollectionFile.getProperty("Url"+(CurrentUrlNo+CurrentUrlLevel*UrlStep));
		Logger.info("目前處理目标層:"+CurrentUrlLevel);
		Logger.info("目前第"+0+"個Url");
		Logger.info("Url辨別為:"+"Url"+(CurrentUrlNo+CurrentUrlLevel*UrlStep));
		Logger.info("目前Url為:"+url);
		if(!CheckUrlIllegalOrNot(url)){
			Logger.info("目前Url不合法!");
			return;
		}
		Logger.info("目前Url合法,可以繼續執行! ");
		doUrlOpenMethodDetail(url,CurrentUrlNo,CurrentUrlLevel);
		for(int i=0;i<UrlStep;i++){
			doUrlOpenMethod(CurrentUrlLevel+1,i);
		}
		
	}

	/**
	 * 搜尋方法主體
	 * @param searchDest 搜尋詞
	 */
	private void doSearchMethod(String searchDest) {
		int pageNo = Integer.parseInt(contextConfig.getProperty("pageNo")
				.trim());
		Logger.info("目前搜尋頁碼:" + pageNo);
		
		if (UrlStep < pageNo) {
			FileUtil.writeAbsoluteProperties(contextConfigFileName,
					"endRunning", "1");
			return;
		}
		doSearchMethodDetail(searchDest, pageNo);
		FileUtil.writeAbsoluteProperties(contextConfigFileName, "pageNo",
				String.valueOf(pageNo + 1));

	}

	/**
	 * 搜尋方法詳細
	 * @param searchDest 百度搜尋詞
	 * @param pageNo	顯示頁碼
	 */
	private void doSearchMethodDetail(String searchDest, int pageNo) {
		searcher = new JSoupBaiduSearcher();
		searchResult = searcher.search(searchDest, pageNo);
		webpages = searchResult.getWebpages();
		Map<String, String> map = new HashMap();
		int size = urlCollectionFile.size();
		if (webpages != null) {
			int i = 0;
			Logger.info("搜尋結果 目前第 " + searchResult.getPage() + " 頁,頁面大小為:"
					+ searchResult.getPageSize() + " 共有結果數:"
					+ searchResult.getTotal());
			for (Webpage webpage : webpages) {
				// Logger.info("搜尋結果 "+ i + " :");
				// Logger.info("标題:" + webpage.getTitle());
				Logger.info("URL:" + webpage.getUrl());
				// Logger.info("摘要:" + webpage.getSummary());
				// Logger.info("正文:" + webpage.getContent());
				// Logger.info("");				
				map.put("Url" + i, webpage.getUrl());
				FileUtil.writeAbsolutePropertiesFile(urlCollectionFileName, map);
				i++;
				if(i>UrlStep)break;
			}
			if(map.size()<UrlStep){
				for(int j=map.size();j<UrlStep;j++){
					map.put("Url" + j, " ");
					FileUtil.writeAbsolutePropertiesFile(urlCollectionFileName, map);
				}
			}
		} else {
			Logger.error("沒有搜尋到結果");
		}
	}

	/**
	 * 處理url位址的内容并添加urls
	 * @param url	url連結
	 * @param currentUrlNo	Url目前位置
	 * @param currentUrlLevel Url層數
	 */
	private void doUrlOpenMethodDetail(String url, int currentUrlNo, int currentUrlLevel) {
		if(currentUrlNo>UrlStep||currentUrlLevel>UrlLevel){
			return;
		}
		UrlOpen urlOpen = new ContentFromUrl();
		SearchResult searchResult = urlOpen.urlOpen(url);
		List<Webpage> webpages = searchResult.getWebpages();
		Map<String, String> map = new HashMap();
		int size = urlCollectionFile.size();
		if (webpages != null) {
			int i = 0;
			Logger.info("搜尋結果 目前第 " + searchResult.getPage() + " 頁,頁面大小為:"
					+ searchResult.getPageSize() + " 共有結果數:"
					+ searchResult.getTotal());
			for (Webpage webpage : webpages) {
				// Logger.info("搜尋結果 "+ i + " :");
				// Logger.info("标題:" + webpage.getTitle());
				for (int k = 0; k < webpage.getUrls().size(); k++) {
					Logger.info("URL:" + webpage.getUrls().get(k));
					if (k > UrlStep)
						break;
					map.put("Url" + ((currentUrlLevel+1) * UrlStep + k), webpage.getUrls().get(k));
					FileUtil.writeAbsolutePropertiesFile(urlCollectionFileName,
							map);
				}
				// Logger.info("摘要:" + webpage.getSummary());
				// Logger.info("正文:" + webpage.getContent());
				// Logger.info("");
				int sizeOfcontentCollectionFile = contentCollectionFile.size();
				FileUtil.writeAbsoluteProperties(contentCollectionFileName,
						"内容" + (sizeOfcontentCollectionFile + i + 1),
						webpage.getContent());
				if (webpage.getUrls().size() < UrlStep) {
					for (int k = webpage.getUrls().size(); k < UrlStep; k++) {
						map.put("Url" + ((currentUrlLevel+1) * UrlStep + k), " ");
						FileUtil.writeAbsolutePropertiesFile(
								urlCollectionFileName, map);
					}
				}

				i++;
			}
		} else {
			Logger.error("沒有搜尋到結果");
		}
	}
	
	/**
	 * 檢查url合法
	 * @param Url
	 * @return
	 */
	public boolean CheckUrlIllegalOrNot(String Url){
		Logger.info(Url);
		if("".equals(Url))return false;
		if(null==Url)return false;
		if(!Url.contains("http:"))return false;
		if(Url.length()<10)return false;
		if (Url.contains(".js") || Url.contains(".jpg")
				|| Url.contains(".jpeg") || Url.contains(".mp4")
				|| Url.contains(".avi") || Url.contains(".flv")
				|| Url.contains("ico") || Url.contains(".css"))
			return false;
		return true;
	}
	
}
           

這個其實沒寫完整,但是不想花心思搞了,百度搜尋利用的jsoup的工具。 如果有人感興趣可以留下郵箱,我把源代碼發給他。