天天看點

java用Digester解析xml檔案——高效率的xml解析

  Digester不是jdk裡面自帶的,有依賴包:commons-beanutils.jar、commons-collections.jar、commons-digester.jar、commons-logging-1.1.3.jar。下載下傳位址:點選打開連結

  Digester底層采用SAX解析XML檔案,是以很自然的,對象轉換由"事件"驅動,即在識别出特定XML元素時(實際被細分為begin、body、end、finish四個時點),将執行特定的動作,比如建立特定的Java對象,或調用特定對象的方法等。此處的XML元素根據比對模式(matching pattern)識别,而相關操作由規則(rule)定義。在轉換過程中,Digester維持了一個對象棧,可以看作對象轉換的工作台,用來存放轉換中生成的、或是為轉換臨時建立的Java對象。對輸入XML檔案作了一趟完整的掃描後,對象棧的棧頂元素即為目标對象。由于Digester屏蔽了SAX解析的細節,使用者僅需關注轉換操作本身,大大簡化了轉換操作。Digester按規則執行方法時用java反射執行方法。

  本文示例的項目源代碼下載下傳位址:點選打開連結

  上xml檔案内容:

<?xml version="1.0" encoding="UTF-8"?>

<CCMS_DATA>
  <CCMS_MAXAMOUNT_DATA>
    <ROW>
      <SYSCODE>BEPS</SYSCODE>
      <SENDBANK>ALL</SENDBANK>
      <RECVBANK>ALL</RECVBANK>
      <MSGTYPE>ALL</MSGTYPE>
      <BIZTYPE>0</BIZTYPE>
      <AMTLMT>9999999999999.99</AMTLMT>
      <CHKLEVEL>10</CHKLEVEL>
    </ROW>
    <ROW>
      <SYSCODE>BEPS</SYSCODE>
      <SENDBANK>ALL</SENDBANK>
      <RECVBANK>ALL</RECVBANK>
      <MSGTYPE>PKG001</MSGTYPE>
      <BIZTYPE>0</BIZTYPE>
      <AMTLMT>50000.00</AMTLMT>
      <CHKLEVEL>11</CHKLEVEL>
    </ROW>
    <ROW>
      <SYSCODE>BEPS</SYSCODE>
      <SENDBANK>ALL</SENDBANK>
      <RECVBANK>ALL</RECVBANK>
      <MSGTYPE>PKG003</MSGTYPE>
      <BIZTYPE>0</BIZTYPE>
      <AMTLMT>9999999999999.99</AMTLMT>
      <CHKLEVEL>11</CHKLEVEL>
    </ROW>
  </CCMS_MAXAMOUNT_DATA>

</CCMS_DATA>
           

  上代碼:

  entity/bean層:

package com.bosspay.entity;

public class DicAmtLimit {
	private long id;
	private String syscode;
	private String mt;
	private String txtp;
	private String sndrbk;
	private String rcvbk;
	private String chcklvl;
	private String amtupperlmt;

	public long getId() {
		return id;
	}

	public void setId(long id) {
		this.id = id;
	}

	public String getSyscode() {
		return syscode;
	}

	public void setSyscode(String syscode) {
		this.syscode = syscode;
	}

	public String getMt() {
		return mt;
	}

	public void setMt(String mt) {
		this.mt = mt;
	}

	public String getTxtp() {
		return txtp;
	}

	public void setTxtp(String txtp) {
		this.txtp = txtp;
	}

	public String getSndrbk() {
		return sndrbk;
	}

	public void setSndrbk(String sndrbk) {
		this.sndrbk = sndrbk;
	}

	public String getRcvbk() {
		return rcvbk;
	}

	public void setRcvbk(String rcvbk) {
		this.rcvbk = rcvbk;
	}

	public String getChcklvl() {
		return chcklvl;
	}

	public void setChcklvl(String chcklvl) {
		this.chcklvl = chcklvl;
	}

	public String getAmtupperlmt() {
		return amtupperlmt;
	}

	public void setAmtupperlmt(String amtupperlmt) {
		this.amtupperlmt = amtupperlmt;
	}
}
           

  dao層:

package com.bosspay.dao;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;

import com.bosspay.common.DataUtil;
import com.bosspay.entity.DicAmtLimit;

public class DicAmtLimitDao {

	public static final String TABLE_NAME = "DIC_AMT_LIMIT";
	
	public static final String ID = "id";

	public static final String SYSCODE = "syscode";
	public static final String MT = "mt";
	public static final String TXTP = "txtp";
	public static final String SNDRBK = "sndrbk";
	public static final String RCVBK = "rcvbk";
	public static final String CHCKLVL = "chcklvl";
	public static final String AMTUPPERLMT = "amtupperlmt";
	
	private static final String INSERT_SQL = "insert into " + TABLE_NAME +"("
		+SYSCODE+","+MT+","+TXTP+","+SNDRBK+","+RCVBK+","+CHCKLVL+","+AMTUPPERLMT
		+ ")values(?,?,?,?,?,?,?)";

	private static DicAmtLimitDao instance = new DicAmtLimitDao();
	
	public static DicAmtLimitDao getInstance(){
		
		return instance;
	}
	
	/**
	 * 插入資料
	 * @param DicAmtLimit
	 * @return
	 * @throws SQLException
	 */
	public boolean insertBatch(List<DicAmtLimit> dicAmtLimits){
		try {
			Connection conn = DataUtil.getInstance().getConnection(); // 擷取連接配接
			PreparedStatement stm = conn.prepareStatement(INSERT_SQL);
			for (DicAmtLimit dicAmtLimit : dicAmtLimits) {
				stm.setString(1, dicAmtLimit.getSyscode());
				stm.setString(2, dicAmtLimit.getMt());
				stm.setString(3, dicAmtLimit.getTxtp());
				stm.setString(4, dicAmtLimit.getSndrbk());
				stm.setString(5, dicAmtLimit.getRcvbk());
				stm.setString(6, dicAmtLimit.getChcklvl());
				stm.setString(7, dicAmtLimit.getAmtupperlmt());
				stm.addBatch();
			}
			stm.executeBatch();
			DataUtil.getInstance().close(stm, conn);  // 關閉連接配接
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return true;
	}

}
           

  處理層(我自定義的,其實這層可以和dao層合并成一層):

package com.bosspay.processor;
import java.util.ArrayList;
import java.util.List;

import com.bosspay.dao.DicAmtLimitDao;
import com.bosspay.entity.DicAmtLimit;


public class DicAmtLimitProcessor {
	private List<DicAmtLimit> dicAmtLimits = new ArrayList<>();
	private DicAmtLimitDao dicAmtLimitDao = new DicAmtLimitDao();
	private long num;
	
	public void addDicAmtLimit(DicAmtLimit dicAmtLimit){
		if(dicAmtLimits.size()>=5000){
			this.dicAmtLimitDao.insertBatch(dicAmtLimits);
			this.dicAmtLimits.clear();
			System.out.println("目前處理的資料有"+num+"條!");
		}
		this.dicAmtLimits.add(dicAmtLimit);
		num += 1;
	}
	
	public void lastInsert(){
		this.dicAmtLimitDao.insertBatch(dicAmtLimits);
		System.out.println("總共處理的資料有"+num+"條!");
	}
}
           

  解析規則層:

package com.bosspay.processor;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.digester.Digester;
import org.xml.sax.SAXException;

import com.bosspay.entity.DicAmtLimit;



public class DicAmtLimitParse {
	private Digester digester = new Digester();
	
	public void parse(String path) throws IOException, SAXException{
		String CCMS_MAXAMOUNT_DATA = "CCMS_DATA/CCMS_MAXAMOUNT_DATA";
		String ROW = CCMS_MAXAMOUNT_DATA+"/ROW";
		// 設定解析規則
		digester.setValidating(false);
		digester.push(this);//将this壓入棧中 此時棧中有DicAmtLimitParse對象
		//addObjectCreate:當碰到第一個參數的節點時建立類型為第二個參數的對象并壓入棧中
		//當碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA節點時建立DicAmtLimitProcessor對象并壓入棧中
		digester.addObjectCreate(CCMS_MAXAMOUNT_DATA, DicAmtLimitProcessor.class);
		//當碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA/ROW節點時建立DicAmtLimit對象并壓入棧中
		digester.addObjectCreate(ROW, DicAmtLimit.class);
		//此時棧中有DicAmtLimitParse、DicAmtLimitProcessor、DicAmtLimit 3種對象
		//addBeanPropertySetter:當碰到第一個參數的節點時執行棧頂對象的第二個參數的setter方法,參數為該節點的内容
		//當碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA/ROW/SYSCODE節點時執行DicAmtLimit對象的setSyscode方法
		digester.addBeanPropertySetter(ROW+"/SYSCODE", "syscode");
		digester.addBeanPropertySetter(ROW+"/SENDBANK", "sndrbk");
		digester.addBeanPropertySetter(ROW+"/RECVBANK", "rcvbk");
		digester.addBeanPropertySetter(ROW+"/MSGTYPE", "mt");
		digester.addBeanPropertySetter(ROW+"/BIZTYPE", "txtp");
		digester.addBeanPropertySetter(ROW+"/AMTLMT", "amtupperlmt");
		digester.addBeanPropertySetter(ROW+"/CHKLEVEL", "chcklvl");
		//當碰到第一個參數節點的結束标志時彈出棧頂元素,執行新棧頂元素的第二個參數的方法,并将彈出的棧頂元素當作該方法的參數
		//當碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA/ROW節點結束标志執行DicAmtLimitProcessor的addDicAmtLimit方法,
		//參數為DicAmtLimit對象
		digester.addSetNext(ROW, "addDicAmtLimit");
		//當碰到CCMS_DATA/CCMS_MAXAMOUNT_DATA節點結束标志執行DicAmtLimitParse的clearList方法,
		//參數為DicAmtLimitProcessor對象
		digester.addSetNext(CCMS_MAXAMOUNT_DATA, "clearList");
		//生成xml檔案輸入流
		InputStream stream = new FileInputStream(new File(path));
		//digester解析xml檔案的輸入流
		digester.parse(stream);	
		stream.close();
	}
	
	public void clearList(DicAmtLimitProcessor xmlProcessor){
		xmlProcessor.lastInsert();
	}
	
	public Digester getDigester() {
		return digester;
	}
	
	public void setDigester(Digester digester) {
		this.digester = digester;
	}
}
           

  要注意的是,digester實際上就是設定解析規則,當碰到定義好的規則時就執行對應的方法,當xml結構重複的時候他就重複執行對應的方法。有的時候可能會碰到這樣的情況:xml轉換成java對象時屬性重複(xml元素重複),這時我們可以修改java bean屬性的setter方法,用數組、集合或者字元串拼接等方法存儲重複屬性的值,反正digester隻認setter方法,就算沒有對應的屬性都行,我用代碼加注釋來說明:

xml檔案:

    <ROW>

      <mmbCd>10086</mmbCd>

      <mmbCd>10010</mmbCd>

      <mmbCd>10000</mmbCd>

    </ROW>

bean中:

private String mmbCds = "";//隻聲明了mmbCds 沒有聲明mmbCd

//建立mmbCd的setter方法

public void setMmbCd(String mmbCd) {

if(this.mmbCds.equals("")){

this.mmbCds = mmbCd;

}else{

this.mmbCds = this.mmbCds + "," + mmbCd;

}

}

規則設定時:

digester.addBeanPropertySetter(ROW+"/mmbCd", "mmbCd");

解析生成的對象mmbCds屬性的值是 10086,10010,10000 。

digester用的是反射機制,是以我們可以根據反射原理适當更改代碼,将複雜的結構簡單解析。