天天看点

docx文档的解压缩写法

package com;  

import static org.junit.Assert.assertEquals;  

import java.io.ByteArrayOutputStream;  
import java.io.File;  
import java.io.FileOutputStream;  
import java.io.IOException;  
import java.io.InputStream;  
import java.util.Enumeration;  
import java.util.zip.ZipEntry;  
import java.util.zip.ZipException;  
import java.util.zip.ZipFile;  
import java.util.zip.ZipOutputStream;  

import javax.xml.parsers.DocumentBuilderFactory;  
import javax.xml.parsers.ParserConfigurationException;  
import javax.xml.transform.Transformer;  
import javax.xml.transform.TransformerConfigurationException;  
import javax.xml.transform.TransformerException;  
import javax.xml.transform.TransformerFactory;  
import javax.xml.transform.dom.DOMSource;  
import javax.xml.transform.stream.StreamResult;  

import org.junit.Test;  
import org.w3c.dom.Document;  
import org.w3c.dom.Element;  
import org.xml.sax.SAXException;  

public class InputToword {  

    @Test public void modifyDocumentAndSave()throws IOException, ZipException,   
    SAXException,ParserConfigurationException,TransformerException,TransformerConfigurationException  
    {  
        //读取e盘下的hello.docx文档  
        ZipFile docxFile =new ZipFile(new File("e:\\hello.docx"));  
        //解压缩后获得里面和内容相关的xml,word文档是可以解压的,大家可以解压了试试  
        ZipEntry documentXML =docxFile.getEntry("word/document.xml");  
        InputStream documentXMLIS =docxFile.getInputStream(documentXML);  
        DocumentBuilderFactory dbf =DocumentBuilderFactory.newInstance();  
        Document doc =dbf.newDocumentBuilder().parse(documentXMLIS);  

        //获得文档里相关的节点  
        Element docElement = doc.getDocumentElement();  
        assertEquals("w:document", docElement.getTagName());  

        Element bodyElement = (Element)docElement.getElementsByTagName("w:body").item();  
        assertEquals("w:body", bodyElement.getTagName());  

        Element pElement = (Element)bodyElement.getElementsByTagName("w:p").item();  
        assertEquals("w:p", pElement.getTagName());  

        Element rElement = (Element)pElement.getElementsByTagName("w:r").item();  
        assertEquals("w:r", rElement.getTagName());  

        Element tElement = (Element)rElement.getElementsByTagName("w:t").item();  
        assertEquals("w:t", tElement.getTagName());  

        //查找文档中的Hello, from Office !文字部分  
        assertEquals("Hello, from Office 2007!",tElement.getTextContent());  
        //写入新的内容  
        tElement.setTextContent("哈哈,终于可以用java写word了,Hello, Office 2007, from Java6!");  

        Transformer t =TransformerFactory.newInstance().newTransformer();  
        ByteArrayOutputStream baos =new ByteArrayOutputStream();  
        t.transform(new DOMSource(doc),  
        new StreamResult(baos));  

        //创建新的要输出的word文档,按钮原来word文档的内容写入新的文档中。  
        ZipOutputStream docxOutFile = new ZipOutputStream(new FileOutputStream("e:\\response.docx"));  
        Enumeration entriesIter =docxFile.entries();  
        while (entriesIter.hasMoreElements())  
        {  
            ZipEntry entry = (ZipEntry) entriesIter.nextElement();  

            if (entry.getName().equals("word/document.xml"))  
            {  
                byte[] data = baos.toByteArray();  
                docxOutFile.putNextEntry(new ZipEntry(entry.getName()));  
                docxOutFile.write(data, , data.length);  
                docxOutFile.closeEntry();  
            }  
            else  
            {  
                InputStream incoming =docxFile.getInputStream(entry);  
                byte[] data = new byte[ * ];  
                int readCount =incoming.read(data, , data.length);  
                docxOutFile.putNextEntry(new ZipEntry(entry.getName()));  
                docxOutFile.write(data, , readCount);  
                docxOutFile.closeEntry();  
            }  
        }  
        docxOutFile.close();  

    }  

}  
           

可以将.docx文件看作是压缩文件。