天天看點

Java使用openoffice将office系列文檔轉換為PDF

前導:

  1. 開發過程中經常會使用java将office系列文檔轉換為PDF, 一般都使用微軟提供的openoffice+jodconverter 實作轉換文檔。
  2. openoffice既有windows版本也有linux版。不用擔心生産環境是linux系統。
  3. 關于linux系統安裝openoffice軟體請參照:待更新...

java使用SWFTools将PDF轉成swf并使用flexpaper播放PDF

1、openoffice依賴jar,以maven為例:

<dependency>
			<groupId>com.artofsolving</groupId>
			<artifactId>jodconverter</artifactId>
			<version>2.2.1</version>
		</dependency>
		<dependency>
			<groupId>org.openoffice</groupId>
			<artifactId>jurt</artifactId>
			<version>3.0.1</version>
		</dependency>
		<dependency>
			<groupId>org.openoffice</groupId>
			<artifactId>ridl</artifactId>
			<version>3.0.1</version>
		</dependency>
		<dependency>
			<groupId>org.openoffice</groupId>
			<artifactId>juh</artifactId>
			<version>3.0.1</version>
		</dependency>
		<dependency>
			<groupId>org.openoffice</groupId>
			<artifactId>unoil</artifactId>
			<version>3.0.1</version>
		</dependency>

		<!--jodconverter2.2.1必須依賴slf4j-jdk14必須這個版本,不然源碼中日志會報錯,很low的一個問題-->
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-jdk14</artifactId>
			<version>1.4.3</version>
		</dependency>
           

2、直接上轉換代碼,需要監聽openoffice應用程式8100端口即可。

public void convert(File sourceFile, File targetFile) {

		try {
			// 1: 打開連接配接
			OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
			connection.connect();

			DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
			// 2:擷取Format
			DocumentFormatRegistry factory = new BasicDocumentFormatRegistry();
			DocumentFormat inputDocumentFormat = factory
					.getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath()));
			DocumentFormat outputDocumentFormat = factory
					.getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath()));
			// 3:執行轉換
			converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat);
		} catch (ConnectException e) {
			log.info("文檔轉換PDF失敗");
		}
	}
           

3、需注意:jodconverter 在轉換2007版本以後的xxx.docx文檔會報錯,原因大家都明03字尾名xxx.doc  07以後版本xxx.docx

檢視jodconverter源碼發現documentFormat不支援xxx.docx格式BasicDocumentFormatRegistry中public DocumentFormat getFormatByFileExtension(String extension)預設支援是使用doc格式

BasicDocumentFormatRegistry類源碼

//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <[email protected]>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {

	private List/*<DocumentFormat>*/ documentFormats = new ArrayList();

	public void addDocumentFormat(DocumentFormat documentFormat) {
		documentFormats.add(documentFormat);
	}

	protected List/*<DocumentFormat>*/ getDocumentFormats() {
		return documentFormats;
	}

	/**
	 * @param extension the file extension
	 * @return the DocumentFormat for this extension, or null if the extension is not mapped
	 */
	public DocumentFormat getFormatByFileExtension(String extension) {
        if (extension == null) {
            return null;
        }
        String lowerExtension = extension.toLowerCase();
		for (Iterator it = documentFormats.iterator(); it.hasNext();) {
			DocumentFormat format = (DocumentFormat) it.next();		
			if (format.getFileExtension().equals(lowerExtension)) {
				return format;
			}
		}
		return null;
	}

	public DocumentFormat getFormatByMimeType(String mimeType) {
		for (Iterator it = documentFormats.iterator(); it.hasNext();) {
			DocumentFormat format = (DocumentFormat) it.next();		
			if (format.getMimeType().equals(mimeType)) {
				return format;
			}
		}
		return null;
	}
}
           

BasicDocumentFormatRegistry的預設實作類DefaultDocumentFormatRegistry  中支援的檔案格式如下

//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <[email protected]>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;

public class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry {

	public DefaultDocumentFormatRegistry() {
		final DocumentFormat pdf = new DocumentFormat("Portable Document Format", "application/pdf", "pdf");
        pdf.setExportFilter(DocumentFamily.DRAWING, "draw_pdf_Export");
		pdf.setExportFilter(DocumentFamily.PRESENTATION, "impress_pdf_Export");
		pdf.setExportFilter(DocumentFamily.SPREADSHEET, "calc_pdf_Export");
		pdf.setExportFilter(DocumentFamily.TEXT, "writer_pdf_Export");
		addDocumentFormat(pdf);
		
		final DocumentFormat swf = new DocumentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf");
        swf.setExportFilter(DocumentFamily.DRAWING, "draw_flash_Export");
		swf.setExportFilter(DocumentFamily.PRESENTATION, "impress_flash_Export");
		addDocumentFormat(swf);
		
		final DocumentFormat xhtml = new DocumentFormat("XHTML", "application/xhtml+xml", "xhtml");
		xhtml.setExportFilter(DocumentFamily.PRESENTATION, "XHTML Impress File");
		xhtml.setExportFilter(DocumentFamily.SPREADSHEET, "XHTML Calc File");
		xhtml.setExportFilter(DocumentFamily.TEXT, "XHTML Writer File");
		addDocumentFormat(xhtml);

		// HTML is treated as Text when supplied as input, but as an output it is also
		// available for exporting Spreadsheet and Presentation formats
		final DocumentFormat html = new DocumentFormat("HTML", DocumentFamily.TEXT, "text/html", "html");
		html.setExportFilter(DocumentFamily.PRESENTATION, "impress_html_Export");
		html.setExportFilter(DocumentFamily.SPREADSHEET, "HTML (StarCalc)");
		html.setExportFilter(DocumentFamily.TEXT, "HTML (StarWriter)");
		addDocumentFormat(html);
		
		final DocumentFormat odt = new DocumentFormat("OpenDocument Text", DocumentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt");
		odt.setExportFilter(DocumentFamily.TEXT, "writer8");
		addDocumentFormat(odt);

		final DocumentFormat sxw = new DocumentFormat("OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw");
		sxw.setExportFilter(DocumentFamily.TEXT, "StarOffice XML (Writer)");
		addDocumentFormat(sxw);

		final DocumentFormat doc = new DocumentFormat("Microsoft Word", DocumentFamily.TEXT, "application/msword", "doc");
		doc.setExportFilter(DocumentFamily.TEXT, "MS Word 97");
		addDocumentFormat(doc);

		final DocumentFormat rtf = new DocumentFormat("Rich Text Format", DocumentFamily.TEXT, "text/rtf", "rtf");
		rtf.setExportFilter(DocumentFamily.TEXT, "Rich Text Format");
		addDocumentFormat(rtf);

		final DocumentFormat wpd = new DocumentFormat("WordPerfect", DocumentFamily.TEXT, "application/wordperfect", "wpd");
		addDocumentFormat(wpd);

		final DocumentFormat txt = new DocumentFormat("Plain Text", DocumentFamily.TEXT, "text/plain", "txt");
        // set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog
        // alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed
        txt.setImportOption("FilterName", "Text");
		txt.setExportFilter(DocumentFamily.TEXT, "Text");
		addDocumentFormat(txt);

		final DocumentFormat wikitext = new DocumentFormat("MediaWiki wikitext", "text/x-wiki", "wiki");
		wikitext.setExportFilter(DocumentFamily.TEXT, "MediaWiki");
        addDocumentFormat(wikitext);
		
		final DocumentFormat ods = new DocumentFormat("OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods");
		ods.setExportFilter(DocumentFamily.SPREADSHEET, "calc8");
		addDocumentFormat(ods);

		final DocumentFormat sxc = new DocumentFormat("OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc");
		sxc.setExportFilter(DocumentFamily.SPREADSHEET, "StarOffice XML (Calc)");
		addDocumentFormat(sxc);

		final DocumentFormat xls = new DocumentFormat("Microsoft Excel", DocumentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls");
		xls.setExportFilter(DocumentFamily.SPREADSHEET, "MS Excel 97");
		addDocumentFormat(xls);

        final DocumentFormat csv = new DocumentFormat("CSV", DocumentFamily.SPREADSHEET, "text/csv", "csv");
        csv.setImportOption("FilterName", "Text - txt - csv (StarCalc)");
        csv.setImportOption("FilterOptions", "44,34,0");  // Field Separator: ','; Text Delimiter: '"'  
        csv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)");
        csv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "44,34,0");
        addDocumentFormat(csv);

        final DocumentFormat tsv = new DocumentFormat("Tab-separated Values", DocumentFamily.SPREADSHEET, "text/tab-separated-values", "tsv");
        tsv.setImportOption("FilterName", "Text - txt - csv (StarCalc)");
        tsv.setImportOption("FilterOptions", "9,34,0");  // Field Separator: '\t'; Text Delimiter: '"'
        tsv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)");
        tsv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "9,34,0");
        addDocumentFormat(tsv);

		final DocumentFormat odp = new DocumentFormat("OpenDocument Presentation", DocumentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp");
		odp.setExportFilter(DocumentFamily.PRESENTATION, "impress8");
		addDocumentFormat(odp);

		final DocumentFormat sxi = new DocumentFormat("OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi");
		sxi.setExportFilter(DocumentFamily.PRESENTATION, "StarOffice XML (Impress)");
		addDocumentFormat(sxi);

		final DocumentFormat ppt = new DocumentFormat("Microsoft PowerPoint", DocumentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt");
		ppt.setExportFilter(DocumentFamily.PRESENTATION, "MS PowerPoint 97");
		addDocumentFormat(ppt);
        
        final DocumentFormat odg = new DocumentFormat("OpenDocument Drawing", DocumentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg");
        odg.setExportFilter(DocumentFamily.DRAWING, "draw8");
        addDocumentFormat(odg);
        
        final DocumentFormat svg = new DocumentFormat("Scalable Vector Graphics", "image/svg+xml", "svg");
        svg.setExportFilter(DocumentFamily.DRAWING, "draw_svg_Export");
        addDocumentFormat(svg);
  	}
}
           

 解決方法:重寫BasicDocumentFormatRegistry類中public DocumentFormat getFormatByFileExtension(String extension)方法,隻要是字尾名包含doc則使用doc的documentFormat文檔格式

//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <[email protected]>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * 重寫 BasicDocumentFormatRegistry 文檔格式
 * @author HuGuangJun
 */
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {

	private List/* <DocumentFormat> */ documentFormats = new ArrayList();

	public void addDocumentFormat(DocumentFormat documentFormat) {
		documentFormats.add(documentFormat);
	}

	protected List/* <DocumentFormat> */ getDocumentFormats() {
		return documentFormats;
	}

	/**
	 * @param extension
	 *            the file extension
	 * @return the DocumentFormat for this extension, or null if the extension
	 *         is not mapped
	 */
	public DocumentFormat getFormatByFileExtension(String extension) {
		if (extension == null) {
			return null;
		}
		//将檔案名字尾統一轉化
		if (extension.indexOf("doc") >= 0) {
			extension = "doc";
		}
		if (extension.indexOf("ppt") >= 0) {
			extension = "ppt";
		}
		if (extension.indexOf("xls") >= 0) {
			extension = "xls";
		}
		String lowerExtension = extension.toLowerCase();
		for (Iterator it = documentFormats.iterator(); it.hasNext();) {
			DocumentFormat format = (DocumentFormat) it.next();
			if (format.getFileExtension().equals(lowerExtension)) {
				return format;
			}
		}
		return null;
	}

	public DocumentFormat getFormatByMimeType(String mimeType) {
		for (Iterator it = documentFormats.iterator(); it.hasNext();) {
			DocumentFormat format = (DocumentFormat) it.next();
			if (format.getMimeType().equals(mimeType)) {
				return format;
			}
		}
		return null;
	}
}