前導:
- 開發過程中經常會使用java将office系列文檔轉換為PDF, 一般都使用微軟提供的openoffice+jodconverter 實作轉換文檔。
- openoffice既有windows版本也有linux版。不用擔心生産環境是linux系統。
- 關于linux系統安裝openoffice軟體請參照:待更新...
java使用SWFTools将PDF轉成swf并使用flexpaper播放PDF
1、openoffice依賴jar,以maven為例:
<dependency>
<groupId>com.artofsolving</groupId>
<artifactId>jodconverter</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.openoffice</groupId>
<artifactId>jurt</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.openoffice</groupId>
<artifactId>ridl</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.openoffice</groupId>
<artifactId>juh</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.openoffice</groupId>
<artifactId>unoil</artifactId>
<version>3.0.1</version>
</dependency>
<!--jodconverter2.2.1必須依賴slf4j-jdk14必須這個版本,不然源碼中日志會報錯,很low的一個問題-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jdk14</artifactId>
<version>1.4.3</version>
</dependency>
2、直接上轉換代碼,需要監聽openoffice應用程式8100端口即可。
public void convert(File sourceFile, File targetFile) {
try {
// 1: 打開連接配接
OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
connection.connect();
DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
// 2:擷取Format
DocumentFormatRegistry factory = new BasicDocumentFormatRegistry();
DocumentFormat inputDocumentFormat = factory
.getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath()));
DocumentFormat outputDocumentFormat = factory
.getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath()));
// 3:執行轉換
converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat);
} catch (ConnectException e) {
log.info("文檔轉換PDF失敗");
}
}
3、需注意:jodconverter 在轉換2007版本以後的xxx.docx文檔會報錯,原因大家都明03字尾名xxx.doc 07以後版本xxx.docx
檢視jodconverter源碼發現documentFormat不支援xxx.docx格式BasicDocumentFormatRegistry中public DocumentFormat getFormatByFileExtension(String extension)預設支援是使用doc格式
BasicDocumentFormatRegistry類源碼
//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <[email protected]>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {
private List/*<DocumentFormat>*/ documentFormats = new ArrayList();
public void addDocumentFormat(DocumentFormat documentFormat) {
documentFormats.add(documentFormat);
}
protected List/*<DocumentFormat>*/ getDocumentFormats() {
return documentFormats;
}
/**
* @param extension the file extension
* @return the DocumentFormat for this extension, or null if the extension is not mapped
*/
public DocumentFormat getFormatByFileExtension(String extension) {
if (extension == null) {
return null;
}
String lowerExtension = extension.toLowerCase();
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
DocumentFormat format = (DocumentFormat) it.next();
if (format.getFileExtension().equals(lowerExtension)) {
return format;
}
}
return null;
}
public DocumentFormat getFormatByMimeType(String mimeType) {
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
DocumentFormat format = (DocumentFormat) it.next();
if (format.getMimeType().equals(mimeType)) {
return format;
}
}
return null;
}
}
BasicDocumentFormatRegistry的預設實作類DefaultDocumentFormatRegistry 中支援的檔案格式如下
//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <[email protected]>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;
public class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry {
public DefaultDocumentFormatRegistry() {
final DocumentFormat pdf = new DocumentFormat("Portable Document Format", "application/pdf", "pdf");
pdf.setExportFilter(DocumentFamily.DRAWING, "draw_pdf_Export");
pdf.setExportFilter(DocumentFamily.PRESENTATION, "impress_pdf_Export");
pdf.setExportFilter(DocumentFamily.SPREADSHEET, "calc_pdf_Export");
pdf.setExportFilter(DocumentFamily.TEXT, "writer_pdf_Export");
addDocumentFormat(pdf);
final DocumentFormat swf = new DocumentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf");
swf.setExportFilter(DocumentFamily.DRAWING, "draw_flash_Export");
swf.setExportFilter(DocumentFamily.PRESENTATION, "impress_flash_Export");
addDocumentFormat(swf);
final DocumentFormat xhtml = new DocumentFormat("XHTML", "application/xhtml+xml", "xhtml");
xhtml.setExportFilter(DocumentFamily.PRESENTATION, "XHTML Impress File");
xhtml.setExportFilter(DocumentFamily.SPREADSHEET, "XHTML Calc File");
xhtml.setExportFilter(DocumentFamily.TEXT, "XHTML Writer File");
addDocumentFormat(xhtml);
// HTML is treated as Text when supplied as input, but as an output it is also
// available for exporting Spreadsheet and Presentation formats
final DocumentFormat html = new DocumentFormat("HTML", DocumentFamily.TEXT, "text/html", "html");
html.setExportFilter(DocumentFamily.PRESENTATION, "impress_html_Export");
html.setExportFilter(DocumentFamily.SPREADSHEET, "HTML (StarCalc)");
html.setExportFilter(DocumentFamily.TEXT, "HTML (StarWriter)");
addDocumentFormat(html);
final DocumentFormat odt = new DocumentFormat("OpenDocument Text", DocumentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt");
odt.setExportFilter(DocumentFamily.TEXT, "writer8");
addDocumentFormat(odt);
final DocumentFormat sxw = new DocumentFormat("OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw");
sxw.setExportFilter(DocumentFamily.TEXT, "StarOffice XML (Writer)");
addDocumentFormat(sxw);
final DocumentFormat doc = new DocumentFormat("Microsoft Word", DocumentFamily.TEXT, "application/msword", "doc");
doc.setExportFilter(DocumentFamily.TEXT, "MS Word 97");
addDocumentFormat(doc);
final DocumentFormat rtf = new DocumentFormat("Rich Text Format", DocumentFamily.TEXT, "text/rtf", "rtf");
rtf.setExportFilter(DocumentFamily.TEXT, "Rich Text Format");
addDocumentFormat(rtf);
final DocumentFormat wpd = new DocumentFormat("WordPerfect", DocumentFamily.TEXT, "application/wordperfect", "wpd");
addDocumentFormat(wpd);
final DocumentFormat txt = new DocumentFormat("Plain Text", DocumentFamily.TEXT, "text/plain", "txt");
// set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog
// alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed
txt.setImportOption("FilterName", "Text");
txt.setExportFilter(DocumentFamily.TEXT, "Text");
addDocumentFormat(txt);
final DocumentFormat wikitext = new DocumentFormat("MediaWiki wikitext", "text/x-wiki", "wiki");
wikitext.setExportFilter(DocumentFamily.TEXT, "MediaWiki");
addDocumentFormat(wikitext);
final DocumentFormat ods = new DocumentFormat("OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods");
ods.setExportFilter(DocumentFamily.SPREADSHEET, "calc8");
addDocumentFormat(ods);
final DocumentFormat sxc = new DocumentFormat("OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc");
sxc.setExportFilter(DocumentFamily.SPREADSHEET, "StarOffice XML (Calc)");
addDocumentFormat(sxc);
final DocumentFormat xls = new DocumentFormat("Microsoft Excel", DocumentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls");
xls.setExportFilter(DocumentFamily.SPREADSHEET, "MS Excel 97");
addDocumentFormat(xls);
final DocumentFormat csv = new DocumentFormat("CSV", DocumentFamily.SPREADSHEET, "text/csv", "csv");
csv.setImportOption("FilterName", "Text - txt - csv (StarCalc)");
csv.setImportOption("FilterOptions", "44,34,0"); // Field Separator: ','; Text Delimiter: '"'
csv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)");
csv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "44,34,0");
addDocumentFormat(csv);
final DocumentFormat tsv = new DocumentFormat("Tab-separated Values", DocumentFamily.SPREADSHEET, "text/tab-separated-values", "tsv");
tsv.setImportOption("FilterName", "Text - txt - csv (StarCalc)");
tsv.setImportOption("FilterOptions", "9,34,0"); // Field Separator: '\t'; Text Delimiter: '"'
tsv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)");
tsv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "9,34,0");
addDocumentFormat(tsv);
final DocumentFormat odp = new DocumentFormat("OpenDocument Presentation", DocumentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp");
odp.setExportFilter(DocumentFamily.PRESENTATION, "impress8");
addDocumentFormat(odp);
final DocumentFormat sxi = new DocumentFormat("OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi");
sxi.setExportFilter(DocumentFamily.PRESENTATION, "StarOffice XML (Impress)");
addDocumentFormat(sxi);
final DocumentFormat ppt = new DocumentFormat("Microsoft PowerPoint", DocumentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt");
ppt.setExportFilter(DocumentFamily.PRESENTATION, "MS PowerPoint 97");
addDocumentFormat(ppt);
final DocumentFormat odg = new DocumentFormat("OpenDocument Drawing", DocumentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg");
odg.setExportFilter(DocumentFamily.DRAWING, "draw8");
addDocumentFormat(odg);
final DocumentFormat svg = new DocumentFormat("Scalable Vector Graphics", "image/svg+xml", "svg");
svg.setExportFilter(DocumentFamily.DRAWING, "draw_svg_Export");
addDocumentFormat(svg);
}
}
解決方法:重寫BasicDocumentFormatRegistry類中public DocumentFormat getFormatByFileExtension(String extension)方法,隻要是字尾名包含doc則使用doc的documentFormat文檔格式
//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <[email protected]>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* 重寫 BasicDocumentFormatRegistry 文檔格式
* @author HuGuangJun
*/
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {
private List/* <DocumentFormat> */ documentFormats = new ArrayList();
public void addDocumentFormat(DocumentFormat documentFormat) {
documentFormats.add(documentFormat);
}
protected List/* <DocumentFormat> */ getDocumentFormats() {
return documentFormats;
}
/**
* @param extension
* the file extension
* @return the DocumentFormat for this extension, or null if the extension
* is not mapped
*/
public DocumentFormat getFormatByFileExtension(String extension) {
if (extension == null) {
return null;
}
//将檔案名字尾統一轉化
if (extension.indexOf("doc") >= 0) {
extension = "doc";
}
if (extension.indexOf("ppt") >= 0) {
extension = "ppt";
}
if (extension.indexOf("xls") >= 0) {
extension = "xls";
}
String lowerExtension = extension.toLowerCase();
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
DocumentFormat format = (DocumentFormat) it.next();
if (format.getFileExtension().equals(lowerExtension)) {
return format;
}
}
return null;
}
public DocumentFormat getFormatByMimeType(String mimeType) {
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
DocumentFormat format = (DocumentFormat) it.next();
if (format.getMimeType().equals(mimeType)) {
return format;
}
}
return null;
}
}