天天看點

Python讀取Unicode編碼格式的檔案(轉換為UTF-8)

我不說話,隻寫代碼。

import re

def filter_null(c):

    if not re.match('[\\x00\\xff\\xfe]', c):

        return True

if __name__ == '__main__':

    save = open('save.txt', 'w') # save.txt是轉換之後的utf-8檔案

    try:

        with open('sec.txt', 'rb') as f: # sec.txt是以Unicode編碼的檔案,不限于txt格式

            for line in open('sec.txt'):

                line = f.readline()

                line = filter(filter_null, line)

                line = line[:-1]  # 去掉多餘的換行符

                line = line.encode('utf-8')

                print line

                save.writelines(line)

    finally:

        save.close()

    with open('save.txt', 'r') as f_utf8:

        print f_utf8.read()

        pass