天天看點

POI實作word轉html(帶圖檔),實作word線上預覽

項目後端使用了springboot,maven,前端使用了ckeditor富文本編輯器。目前從html轉換的word為doc格式,而圖檔處理支援的是docx格式,是以需要手動把doc另存為docx,然後才可以進行圖檔替換。

一.添加maven依賴

主要使用了以下和poi相關的依賴,為了便于擷取html的圖檔元素,還使用了jsoup:

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.14</version>
</dependency>

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>3.14</version>
</dependency>

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>3.14</version>
</dependency>

<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>xdocreport</artifactId>
    <version>1.0.6</version>
</dependency>

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>3.14</version>
</dependency>

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>ooxml-schemas</artifactId>
    <version>1.3</version>
</dependency>

<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.11.3</version>
</dependency>      

二.word轉換為html

在springboot項目的resources目錄下建立static檔案夾,将需要轉換的word檔案temp.docx粘貼進去,由于static是springboot的預設資源檔案,是以不需要在配置檔案裡面另行配置了,如果改成其他名字,需要在application.yml進行相應配置。

doc格式轉換為html:

public static String docToHtml() throws Exception {
    File path = new File(ResourceUtils.getURL("classpath:").getPath());
    String imagePathStr = path.getAbsolutePath() + "\\static\\image\\";
    String sourceFileName = path.getAbsolutePath() + "\\static\\test.doc";
    String targetFileName = path.getAbsolutePath() + "\\static\\test2.html";
    File file = new File(imagePathStr);
    if(!file.exists()) {
        file.mkdirs();
    }
    HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
    org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
    //儲存圖檔,并傳回圖檔的相對路徑
    wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
        try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
            out.write(content);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return "image/" + name;
    });
    wordToHtmlConverter.processDocument(wordDocument);
    org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
    DOMSource domSource = new DOMSource(htmlDocument);
    StreamResult streamResult = new StreamResult(new File(targetFileName));
    TransformerFactory tf = TransformerFactory.newInstance();
    Transformer serializer = tf.newTransformer();
    serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);
    return targetFileName;
}      
public static String docxToHtml() throws Exception {
    File path = new File(ResourceUtils.getURL("classpath:").getPath());
    String imagePath = path.getAbsolutePath() + "\\static\\image";
    String sourceFileName = path.getAbsolutePath() + "\\static\\test.docx";
    String targetFileName = path.getAbsolutePath() + "\\static\\test.html";

    OutputStreamWriter outputStreamWriter = null;
    try {
        XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
        XHTMLOptions options = XHTMLOptions.create();
        // 存放圖檔的檔案夾
        options.setExtractor(new FileImageExtractor(new File(imagePath)));
        // html中圖檔的路徑
        options.URIResolver(new BasicURIResolver("image"));
        outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
        XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
        xhtmlConverter.convert(document, outputStreamWriter, options);
    } finally {
        if (outputStreamWriter != null) {
            outputStreamWriter.close();
        }
    }
    return targetFileName;
}      
public static String readfile(String filePath) {
    File file = new File(filePath);
    InputStream input = null;
    try {
        input = new FileInputStream(file);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
    StringBuffer buffer = new StringBuffer();
    byte[] bytes = new byte[1024];
    try {
        for (int n; (n = input.read(bytes)) != -1;) {
            buffer.append(new String(bytes, 0, n, "utf8"));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return buffer.toString();
}