天天看點

python修改pdf元資訊 metadata

pdf 檔案是簽名隻讀的,是以要先讀出來然後在寫出去
使用前先安裝 pypdf2 中文的坑請參考 https://github.com/mstamy2/PyPDF2/pull/463
talk is cheap,show you the code
from PyPDF2 import PdfFileReader, PdfFileWriter
from multiprocessing import Process, Queue
import os,time
import getopt, sys,shutil



def update_metadata(pdf):
    readFile = pdf['source']
    writeFile = pdf['to']
    # 擷取一個 PdfFileReader 對象
    pdfReader = PdfFileReader(open(readFile, 'rb'))
    print(pdfReader.getDocumentInfo())
    # 擷取一個 PdfFileWriter 對象
    pdfWriter = PdfFileWriter()
    # 這裡輸入要修改的元資訊,當然又可以在原來的資訊裡面加,我不會進階的設計模式,大牛改進後可以發給我
    pdfWriter.addMetadata({'/Author':'youngboy','/Title':'youngboy','/Creator':'youngboy'})
    # 将一個 PageObject 加入到 PdfFileWriter 中
    pdfWriter.appendPagesFromReader(pdfReader)
    # 輸出到檔案中
    pdfWriter.write(open(writeFile, 'wb+'))

def long_time_task(q):
    while not q.empty():
        print("剩餘任務"+str(q.qsize()))
        v=q.get()
        update_metadata(v)

def usage():
    print("""
        - r root 目錄
        - p 程序數(程池不會用是以這個參數沒意義)
    """)

if __name__=='__main__':

    print(sys.argv[1:])
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hr:p:")
    except getopt.GetoptError as err:
        # print help information and exit:
        print(err)
        usage()
        sys.exit(2)
    root = None
    pnum = 3;
    verbose = False
    for o, a in opts:
        if o == "-r":
            root = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-p", "--process"):
            pnum = a
        else:
            assert False, "unhandled option"

    q = Queue()
    ## 把任務裝進隊列
    shutil.rmtree(root+'/dest')
    for (r, dirs, files) in os.walk(root):
        for f in files:
            dd = r.replace(root,'')
            to_path=root+'/dest'+dd+'/'+f
            os.makedirs(root+'/dest'+dd,exist_ok=True)
            print(dd+"--"+r+"--"+root)
            if f.find('pdf')>-1:
                q.put({
                    'source':r+'/'+f,
                    'to':to_path
                })

    print(q.qsize())
    # 使用兩個程序,程序池不會用是以就手動new
    pw1 = Process(target=long_time_task, args=(q,))
    pw1.start()
    pw2 = Process(target=long_time_task, args=(q,))
    pw2.start()
    pw1.join()
    pw2.join()
    print('All subprocesses done.')
           

使用示例

python xx.py -r D:/pdf