最近有一些學習資料大概幾個G,搞得全是PDF。沒辦法編輯。是以就想轉成word。但是搜了很多軟體沒有批量轉換功能。隻能一個一個處理,太浪費時間。最主要的還是全部收費。是以決定自己寫一個還能節省時間。
需要注意的事項:jar包必須破解,如果不是破解版每個文檔隻能轉換4頁。
怎麼破解jar包網上有教程感興趣的可以自己破解一下。我就不寫了。
jar包資源:https://download.csdn.net/download/qq_35771266/15084899
1.将需要的aspose.pdf.jar包引入項目
2.封裝讀取檔案夾裡面PDF檔案的工具類
package com.question.syncdemo.utils;
import cn.hutool.core.io.file.FileReader;
import cn.hutool.core.util.StrUtil;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* description: FileUtils 檔案操作工具類<br>
*
* @date: 2020/11/17 0017 下午 5:06 <br>
* @author: William <br>
* version: 1.0 <br>
*/
public class FileUtils {
//因為我這個是臨時用是以沒有考慮并發,如果并發自己修改一下就好了
public static List<String> resultList = new ArrayList<>();
/**
*@description: 通過檔案路徑,修改該路徑下所有檔案的名字
* @param path 檔案夾路徑
* @return:
* @author: William
* @date 2019/8/8 14:52
*/
public static List<String> getFilesPaths(String path,List<String> stringList){
File file = new File(path);
if(file.exists()){
File[] files = file.listFiles();
if (null == files || files.length == 0) {
System.out.println("檔案夾是空的!");
} else {
for (File file2 : files) {
if (file2.isDirectory()) {
getFilesPaths(file2.getAbsolutePath(),stringList);
} else {
String filePath = file2.getAbsolutePath();
stringList.add(filePath);
}
}
}
}else{
System.out.println("該路徑不存在");
}
return stringList;
}
}
3.封裝PDF處理工具類
package com.question.syncdemo.utils;
import com.aspose.pdf.Document;
import com.aspose.pdf.License;
import com.aspose.pdf.SaveFormat;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicReference;
/**
* description: PDFUtil <br>
*
* @date: 2021/2/4 0004 上午 10:09 <br>
* @author: William <br>
* version: 1.0 <br>
*/
public class PDFUtil {
private static InputStream license;
public static void main(String[] args) throws Exception {
pdf2word();
}
//多線程處理需要轉換格式的檔案
public static void produceData(List<String> list) throws InterruptedException {
//每個線程處理的資料,我這裡隻開了三個線程,
int threadSize = list.size()/3;
//int threadSize = 500; 可以每個線程處理500條資料
int remainder = list.size() % threadSize;
//線程數
int threadNum = 0;
if (remainder == 0) {
threadNum = list.size() / threadSize;
} else {
threadNum = list.size() / threadSize + 1;
}
long begin = System.currentTimeMillis();
//建立一個線程池
ExecutorService eService = Executors.newFixedThreadPool(threadNum);
List<Callable<String>> cList = new ArrayList<>();
Callable<String> task = null;
List<String> sList = null;
for (int i = 0; i < threadNum; i++) {
if (i == threadNum - 1) {
sList = list.subList(i * threadSize, list.size());
} else {
sList = list.subList(i * threadSize, (i + 1) * threadSize);
}
final List<String> nowList = sList;
task = new Callable<String>() {
@Override
public String call() throws Exception {
nowList.forEach(filesPath -> {
if(filesPath.contains(".pdf")){
File file = new File(filesPath);
String paperName = file.getName();
paperName = paperName.substring(0,paperName.lastIndexOf("."));
String tempFilesPath = filesPath.substring(0,filesPath.lastIndexOf(File.separator));
tempFilesPath = tempFilesPath +"\\"+paperName+".docx";
System.out.println(tempFilesPath);
try {
saveAsWord(filesPath,tempFilesPath);
} catch (Exception e) {
e.printStackTrace();
}
}
});
return "ok";
}
};
cList.add(task);
}
List<Future<String>> results = eService.invokeAll(cList);
for (Future<String> str : results) {
//System.out.println(str.get());
}
eService.shutdown();
long end = System.currentTimeMillis();
System.out.println("執行耗時:" + (end - begin));
}
public static void pdf2word() throws Exception {
List<String> strings = new ArrayList<>();
List<String> filesPaths = FileUtils.getFilesPaths("D:\\work\\temp\\中學學段2019科目二", strings);
produceData(filesPaths);
}
//将PDF儲存為word
public static void saveAsWord(String targetFile,String newFile) throws Exception {
File target = new File(targetFile);
if(!target.exists()){
target.mkdirs();
}
FileInputStream targetInputStream = new FileInputStream(target);
//調用去水印的方法 讀取license.xml檔案
if (!getLicense()) {
System.out.println("擷取驗證失敗");
}
Document targetDocument = new Document(targetInputStream);
targetDocument.save(newFile, SaveFormat.DocX);
targetInputStream.close();
targetDocument.close();
}
//證書擷取
public static synchronized boolean getLicense() {
boolean result = false;
try {
String license2 = "<License>\n"
+ " <Data>\n"
+ " <Products>\n"
+ " <Product>Aspose.Total for Java</Product>\n"
+ " <Product>Aspose.Words for Java</Product>\n"
+ " </Products>\n"
+ " <EditionType>Enterprise</EditionType>\n"
+ " <SubscriptionExpiry>20991231</SubscriptionExpiry>\n"
+ " <LicenseExpiry>20991231</LicenseExpiry>\n"
+ " <SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber>\n"
+ " </Data>\n"
+ " <Signature>111</Signature>\n"
+ "</License>";
license = new ByteArrayInputStream(license2.getBytes("UTF-8"));
License aposeLic = new License();
aposeLic.setLicense(license);
result = true;
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
}