天天看點

python學習源碼分享:Word文檔批量轉換為PDF

# -*- coding:utf-8 -*-
import os 
from win32com.client import Dispatch, DispatchEx 
from win32com.client import constants  
from win32com.client import gencache    
import re  



def getfilenames(filepath='',filelist_out=[],file_ext='all'):
     for fpath, dirs, fs in os.walk(filepath):
        for f in fs:
            fi_d = os.path.join(fpath, f)
            if file_ext == '.doc':  
                if os.path.splitext(fi_d)[1] in ['.doc','.docx']: 
                    filelist_out.append(re.sub(r'\\','/',fi_d))  
            else:
                if  file_ext == 'all':  
                    filelist_out.append(fi_d) 
                elif os.path.splitext(fi_d)[1] == file_ext:  
                    filelist_out.append(fi_d) 
                else:
                    pass
        filelist_out.sort() 
    return filelist_out  

def wordtopdf(filelist,targetpath,digit):
    valueList = []
    try:
        gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)
        # 開始轉換
        w = Dispatch("Word.Application")
        for index,fullfilename in enumerate(filelist):
            (filepath,filename) = os.path.split(fullfilename)  # 分割檔案路徑和檔案名,其中,filepath表示檔案路徑;filename表示檔案名
            softfilename = os.path.splitext(filename)  # 分割檔案名和擴充名
            os.chdir(filepath)  
            doc = os.path.abspath(filename)
            os.chdir(targetpath)
            pdfname = str(index).zfill(digit) + ".pdf"
            output = os.path.abspath(pdfname)
            pdf_name = output

            # 文檔路徑需要為絕對路徑,因為Word啟動後目前路徑不是調用腳本時的目前路徑。
            try: 
                doc = w.Documents.Open(doc, ReadOnly=1)
                doc.ExportAsFixedFormat(output, constants.wdExportFormatPDF, 
                                Item=constants.wdExportDocumentWithMarkup,
                                CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
            except Exception as e: 
                print(e)
            if os.path.isfile(pdf_name): 
                valueList.append(pdf_name) 
            else:
                print('轉換失敗!')
                return False
        w.Quit(constants.wdDoNotSaveChanges) 
        return valueList  
    except TypeError as e:
        print('出錯了!')
        print(e)
        return False
if __name__ == '__main__':
    sourcepath = r"E:/learn/test/doc/temp"  
    targetpath = r"E:/learn/test/doc/pdf/"  
    filelist = getfilenames(sourcepath,[],'.doc')  
    valueList = wordtopdf(filelist,targetpath,4)  
    if valueList:
        print("轉換成功")
    else:
        print("沒有要轉換的Word文檔或者轉換失敗!")