我不說話,隻寫代碼。
import re
def filter_null(c):
if not re.match('[\\x00\\xff\\xfe]', c):
return True
if __name__ == '__main__':
save = open('save.txt', 'w') # save.txt是轉換之後的utf-8檔案
try:
with open('sec.txt', 'rb') as f: # sec.txt是以Unicode編碼的檔案,不限于txt格式
for line in open('sec.txt'):
line = f.readline()
line = filter(filter_null, line)
line = line[:-1] # 去掉多餘的換行符
line = line.encode('utf-8')
print line
save.writelines(line)
finally:
save.close()
with open('save.txt', 'r') as f_utf8:
print f_utf8.read()
pass