天天看點

Java使用aspose批量将PDF轉為word

最近有一些學習資料大概幾個G,搞得全是PDF。沒辦法編輯。是以就想轉成word。但是搜了很多軟體沒有批量轉換功能。隻能一個一個處理,太浪費時間。最主要的還是全部收費。是以決定自己寫一個還能節省時間。

需要注意的事項:jar包必須破解,如果不是破解版每個文檔隻能轉換4頁。

怎麼破解jar包網上有教程感興趣的可以自己破解一下。我就不寫了。

jar包資源:https://download.csdn.net/download/qq_35771266/15084899

1.将需要的aspose.pdf.jar包引入項目

2.封裝讀取檔案夾裡面PDF檔案的工具類

package com.question.syncdemo.utils;

import cn.hutool.core.io.file.FileReader;
import cn.hutool.core.util.StrUtil;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
 * description: FileUtils  檔案操作工具類<br>
 *
 * @date: 2020/11/17 0017 下午 5:06 <br>
 * @author: William <br>
 * version: 1.0 <br>
 */
public class FileUtils {



    //因為我這個是臨時用是以沒有考慮并發,如果并發自己修改一下就好了
    public static List<String> resultList = new ArrayList<>();


    /**
     *@description: 通過檔案路徑,修改該路徑下所有檔案的名字
     * @param path  檔案夾路徑
     * @return:
     * @author: William
     * @date 2019/8/8 14:52
     */
    public static List<String> getFilesPaths(String path,List<String> stringList){
        File file = new File(path);
        if(file.exists()){
            File[] files = file.listFiles();
            if (null == files || files.length == 0) {
                System.out.println("檔案夾是空的!");
            } else {
                for (File file2 : files) {
                    if (file2.isDirectory()) {
                        getFilesPaths(file2.getAbsolutePath(),stringList);
                    } else {
                        String filePath = file2.getAbsolutePath();
                        stringList.add(filePath);
                    }
                }
            }
        }else{
            System.out.println("該路徑不存在");
        }
        return stringList;
    }

}
           

3.封裝PDF處理工具類

package com.question.syncdemo.utils;

import com.aspose.pdf.Document;
import com.aspose.pdf.License;
import com.aspose.pdf.SaveFormat;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicReference;

/**
 * description: PDFUtil <br>
 *
 * @date: 2021/2/4 0004 上午 10:09 <br>
 * @author: William <br>
 * version: 1.0 <br>
 */
public class PDFUtil {
    private static InputStream license;


    public static void main(String[] args) throws Exception {
        pdf2word();
    }




    //多線程處理需要轉換格式的檔案
    public static void produceData(List<String> list) throws InterruptedException {
        //每個線程處理的資料,我這裡隻開了三個線程,
        int threadSize = list.size()/3;
        //int threadSize = 500;  可以每個線程處理500條資料
        int remainder = list.size() % threadSize;
        //線程數
        int threadNum = 0;
        if (remainder == 0) {
            threadNum = list.size() / threadSize;
        } else {
            threadNum = list.size() / threadSize + 1;
        }
        long begin = System.currentTimeMillis();
        //建立一個線程池
        ExecutorService eService = Executors.newFixedThreadPool(threadNum);
        List<Callable<String>> cList = new ArrayList<>();
        Callable<String> task = null;
        List<String> sList = null;
        for (int i = 0; i < threadNum; i++) {
            if (i == threadNum - 1) {
                sList = list.subList(i * threadSize, list.size());
            } else {
                sList = list.subList(i * threadSize, (i + 1) * threadSize);
            }
            final List<String> nowList = sList;
            task = new Callable<String>() {
                @Override
                public String call() throws Exception {
                    nowList.forEach(filesPath -> {
                        if(filesPath.contains(".pdf")){
                            File file = new File(filesPath);
                            String paperName = file.getName();
                            paperName = paperName.substring(0,paperName.lastIndexOf("."));
                            String tempFilesPath = filesPath.substring(0,filesPath.lastIndexOf(File.separator));
                            tempFilesPath = tempFilesPath +"\\"+paperName+".docx";
                            System.out.println(tempFilesPath);
                            try {
                                saveAsWord(filesPath,tempFilesPath);
                            } catch (Exception e) {
                                e.printStackTrace();
                            }
                        }
                    });
                    return "ok";
                }
            };
            cList.add(task);
        }
        List<Future<String>> results = eService.invokeAll(cList);
        for (Future<String> str : results) {
            //System.out.println(str.get());
        }
        eService.shutdown();
        long end = System.currentTimeMillis();
        System.out.println("執行耗時:" + (end - begin));
    }



    public static void pdf2word() throws Exception {
        List<String> strings = new ArrayList<>();
        List<String> filesPaths = FileUtils.getFilesPaths("D:\\work\\temp\\中學學段2019科目二", strings);
        produceData(filesPaths);
    }



    //将PDF儲存為word
    public static void saveAsWord(String targetFile,String newFile) throws Exception {
        File target = new File(targetFile);
        if(!target.exists()){
            target.mkdirs();
        }
        FileInputStream targetInputStream = new FileInputStream(target);
        //調用去水印的方法 讀取license.xml檔案
        if (!getLicense()) {
            System.out.println("擷取驗證失敗");
        }
        Document targetDocument = new Document(targetInputStream);
        targetDocument.save(newFile, SaveFormat.DocX);
        targetInputStream.close();
        targetDocument.close();
    }





     //證書擷取
    public static synchronized boolean getLicense() {
        boolean result = false;
        try {

            String license2 = "<License>\n"
                    + "  <Data>\n"
                    + "    <Products>\n"
                    + "      <Product>Aspose.Total for Java</Product>\n"
                    + "      <Product>Aspose.Words for Java</Product>\n"
                    + "    </Products>\n"
                    + "    <EditionType>Enterprise</EditionType>\n"
                    + "    <SubscriptionExpiry>20991231</SubscriptionExpiry>\n"
                    + "    <LicenseExpiry>20991231</LicenseExpiry>\n"
                    + "    <SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber>\n"
                    + "  </Data>\n"
                    + "  <Signature>111</Signature>\n"
                    + "</License>";
            license = new ByteArrayInputStream(license2.getBytes("UTF-8"));

            License aposeLic = new License();
            aposeLic.setLicense(license);
            result = true;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }

}