使用XDocReport将HTML格式資料轉換為Word

文檔位址：https://github.com/opensagres/xdocreport/wiki/DocxReportingQuickStart

本文采用XDocReport集合Freemaiker進行處理

1. 引入Maven依賴：

<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>xdocreport</artifactId>
    <version>2.0.1</version>
</dependency>
<dependency>
    <groupId>org.apache.velocity</groupId>
    <artifactId>velocity-engine-core</artifactId>
    <version>2.0</version>
</dependency>
<dependency>
    <groupId>org.freemarker</groupId>
    <artifactId>freemarker</artifactId>
    <version>2.3.23</version>
</dependency>

2. 建立Word模版

建立Word，在光标處通過快捷鍵Ctrl+F9 或工具欄“插入”->“文檔部件或文本”->“域”

根據電腦系統不同出現的界面不同，但内容都差不多，${text} 這個text就是後期要替換的變量了。

3. Java代碼處理邏輯

1 String templateFilePath = request.getSession().getServletContext().getRealPath("/WEB-INF/templates/freemarkerTest.docx");
 2 File file = new File(templateFilePath);
 3 InputStream in = new FileInputStream(file);
 4 IXDocReport report;
 5 String targetPath = basePath + lawDownDto.getLawsName() + ".docx";
 6 try {
 7     report = XDocReportRegistry.getRegistry().loadReport(in, TemplateEngineKind.Freemarker);
 8     // 設定内容為HTML格式
 9     FieldsMetadata metadata = report.createFieldsMetadata();
10     metadata.addFieldAsTextStyling("text", SyntaxKind.Html);   
11 
12     // 建立内容-text為模版中對應都變量名稱
13     IContext context = report.createContext();
14     context.put("text", content);
15             
16     // 生成檔案
17     OutputStream out = new FileOutputStream(targetPath);
18     report.process(context, out);
19 } catch (XDocReportException e) {
20     e.printStackTrace();
21 }

檔案下載下傳：在生成檔案邏輯後建立讀取流傳回即可。

=============================================================

如果檔案中有圖檔需要處理：

　　圖檔方案一：單個圖檔且位置固定，可通過XDocReport配置模版處理

　　圖檔方案二：多個圖檔且位置不固定，可通過POI結合Freemarker進行處理

圖檔方案一：

　　1. 在模版中插入臨時圖檔，選中圖檔并添加“書簽”，書簽名稱是後續作為替換的變量

　　2. 代碼中追加邏輯

　　在上面代碼10後追加

// logo為模版中标簽名稱
metadata.addFieldAsImage("logo");
report.setFieldsMetadata(metadata);

　　在上面代碼14行後追加

// IImageProvider可通過3種方式建立（File/IO流/ClassPath下檔案）具體可參考頂部文檔-Dynamic Image
IImageProvider logo = new FileImageProvider(new File("1950737_195902644.png"));
context.put("logo", logo);

圖檔方案二：

　　1. 在上面讀取模版之前進行資料替換

// 處理文本中的圖檔，使用imgReplace變量替換
Map<String, Object> param = new HashMap<String, Object>();
if (StringUtils.isNotBlank(content)) {
    content = HtmlUtils.htmlUnescape(content);
    List<HashMap<String, String>> imgs = getImgStrContent(content);
    int count = 0;
    for (HashMap<String, String> img : imgs) {
        count++;
        //處理替換以“/>”結尾的img标簽
        content = content.replace(img.get("img"), "${imgReplace" + count + "}");
        //處理替換以“>”結尾的img标簽
        content = content.replace(img.get("img1"), "${imgReplace" + count + "}");
        Map<String, Object> header = new HashMap<String, Object>();
        String result = "";
        result = img.get("src");
        //如果沒有寬高屬性，預設設定為
        if(img.get("width") == null || img.get("height") == null) {
            header.put("width", 150);
            header.put("height", 150);
        }else {
            header.put("width", (int)(Double.parseDouble(img.get("width"))));
            header.put("height", (int) (Double.parseDouble(img.get("height"))));
        }
        if( StringUtils.isNotBlank(result) ){
            String type1 = result.substring(result.lastIndexOf(".") , result.length());
            header.put("type", type1);
            header.put("content",this.imageToInputStream(result));
        }
        param.put("${imgReplace" + count + "}", header);
    }
}

//擷取html中的圖檔元素資訊
private  List<HashMap<String, String>> getImgStrContent(String htmlStr) {
    List<HashMap<String, String>> pics = new ArrayList<HashMap<String, String>>();
    Document doc = Jsoup.parse(htmlStr);
    if( doc != null ){
        Elements imgs = doc.select("img");
        if( imgs != null && imgs.size() > 0 ){
            for (Element img : imgs) {
                HashMap<String, String> map = new HashMap<String, String>();
                if(!"".equals(img.attr("width"))) {
                    map.put("width", img.attr("width"));
                }
                if(!"".equals(img.attr("height"))) {
                    map.put("height", img.attr("height"));
                }
                map.put("img", img.toString().substring(0, img.toString().length() - 1) + "/>");
                map.put("img1", img.toString());
                map.put("src", img.attr("src"));
                pics.add(map);
            }
        }
    }
    return pics;
}

// 讀取生成的檔案
readStream = new FileInputStream(targetPath);
ByteArrayOutputStream docxOs = new ByteArrayOutputStream();
int b = 0;
byte[] buf = new byte[1024];
while ((b = readStream.read(buf)) != -1) {
    docxOs.write(buf, 0, b);
}
docxResponseStream = new ByteArrayInputStream(docxOs.toByteArray());
// 建立word 對象
XWPFDocument document = new XWPFDocument(docxResponseStream);
newOS = new ByteArrayOutputStream();
if (document != null && param != null) {
    // 生成帶圖檔的word（如需工具類請給我發郵件）
    XWPFDocument customXWPFDocument = WordUtil.getWord(param, document);
    // 設定表格邊框樣式（另外一片文章會介紹）
    // List<XWPFTable> list = formatTableBorder(customXWPFDocument);
    // 處理合并單元格（另外一片文章會介紹）
    // mergeCell(content, list);
    // 寫入輸出流傳回
    customXWPFDocument.write(newOS);
    document.close();
    customXWPFDocument.close();
    resultInpu = new ByteArrayInputStream(newOS.toByteArray());
}else{
    resultInpu = docxResponseStream;
}

以上内容即可完成Word中多圖檔的動态展示。

後續會寫處理表格邊框、單元格合并及段落都相關内容。