起步
中華文化博大精深,是中華民族的财富,吸收和繼承發揚中 華文化,是現代每個炎黃子孫無可推卸的天職。
今天小編就交大家用python寫一個腳本,實作漢子和拼音之間的轉換
pinyin.py
漢字轉拼音,With Python
Example:
from pinyin import PinYin
test = PinYin()
test.load_word()
test.hanzi2pinyin(string='釣魚島是中國的')
Out:
test.hanzi2pinyin(string='釣魚島是中國的')
['diao', 'yu', 'dao', 'shi', 'zhong', 'guo', 'de']
test.hanzi2pinyin_split(string='釣魚島是中國的', split="-")
diao-yu-dao-shi-zhong-guo-de
主程式
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Author:cleverdeng
E-mail:[email protected]
"""
__version__ = '0.9'
__all__ = ["PinYin"]
import os.path
class PinYin(object):
def __init__(self, dict_file='word.data'):
self.word_dict = {}
self.dict_file = dict_file
def load_word(self):
if not os.path.exists(self.dict_file):
raise IOError("NotFoundFile")
with open(self.dict_file) as f_obj:
for f_line in f_obj.readlines():
try:
line = f_line.split(' ')
self.word_dict[line[0]] = line[1]
except:
line = f_line.split(' ')
self.word_dict[line[0]] = line[1]
def hanzi2pinyin(self, string=""):
result = []
if not isinstance(string, str):
string = string.decode("utf-8")
for char in string:
key = '%X' % ord(char)
result.append(self.word_dict.get(key, char).split()[0][:-1].lower())
return result
def hanzi2pinyin_split(self, string="", split=""):
result = self.hanzi2pinyin(string=string)
if split == "":
return result
else:
return split.join(result)
if __name__ == "__main__":
test = PinYin()
test.load_word()
string = "釣魚島是中國的"
print("in: %s" % string)
print("out: %s" % str(test.hanzi2pinyin(string=string)))
print("out: %s" % test.hanzi2pinyin_split(string=string, split="-"))
字典
這裡我們需要一個轉換庫
3400 QIU1
3401 TIAN3 TIAN4
3404 KUA4
3405 WU3
3406 YIN3
340C SI4 YI2
3416 YE4
341C CHOU2
3421 NUO4
3424 QIU2
3428 XU4
3429 XING2
342B XIONG1
342C LIU2
342D LIN3
342E XIANG1
342F YONG1
3430 XIN4
3431 ZHEN3
3432 DAI4
3433 WU4
3434 PAN1
3437 MA3 MA4 MIAN2