pdf 檔案是簽名隻讀的,是以要先讀出來然後在寫出去
使用前先安裝 pypdf2 中文的坑請參考 https://github.com/mstamy2/PyPDF2/pull/463
talk is cheap,show you the code
from PyPDF2 import PdfFileReader, PdfFileWriter
from multiprocessing import Process, Queue
import os,time
import getopt, sys,shutil
def update_metadata(pdf):
readFile = pdf['source']
writeFile = pdf['to']
# 擷取一個 PdfFileReader 對象
pdfReader = PdfFileReader(open(readFile, 'rb'))
print(pdfReader.getDocumentInfo())
# 擷取一個 PdfFileWriter 對象
pdfWriter = PdfFileWriter()
# 這裡輸入要修改的元資訊,當然又可以在原來的資訊裡面加,我不會進階的設計模式,大牛改進後可以發給我
pdfWriter.addMetadata({'/Author':'youngboy','/Title':'youngboy','/Creator':'youngboy'})
# 将一個 PageObject 加入到 PdfFileWriter 中
pdfWriter.appendPagesFromReader(pdfReader)
# 輸出到檔案中
pdfWriter.write(open(writeFile, 'wb+'))
def long_time_task(q):
while not q.empty():
print("剩餘任務"+str(q.qsize()))
v=q.get()
update_metadata(v)
def usage():
print("""
- r root 目錄
- p 程序數(程池不會用是以這個參數沒意義)
""")
if __name__=='__main__':
print(sys.argv[1:])
try:
opts, args = getopt.getopt(sys.argv[1:], "hr:p:")
except getopt.GetoptError as err:
# print help information and exit:
print(err)
usage()
sys.exit(2)
root = None
pnum = 3;
verbose = False
for o, a in opts:
if o == "-r":
root = a
elif o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-p", "--process"):
pnum = a
else:
assert False, "unhandled option"
q = Queue()
## 把任務裝進隊列
shutil.rmtree(root+'/dest')
for (r, dirs, files) in os.walk(root):
for f in files:
dd = r.replace(root,'')
to_path=root+'/dest'+dd+'/'+f
os.makedirs(root+'/dest'+dd,exist_ok=True)
print(dd+"--"+r+"--"+root)
if f.find('pdf')>-1:
q.put({
'source':r+'/'+f,
'to':to_path
})
print(q.qsize())
# 使用兩個程序,程序池不會用是以就手動new
pw1 = Process(target=long_time_task, args=(q,))
pw1.start()
pw2 = Process(target=long_time_task, args=(q,))
pw2.start()
pw1.join()
pw2.join()
print('All subprocesses done.')
使用示例
python xx.py -r D:/pdf