时间:2021-07-01 10:21:17 帮助过:58人阅读
下面的这个python库可以很容易的将汉字转换成拼音,其中用到了一个word.data 的字典,可点击此处本站下载。
- #!/usr/bin/env python
- # -*- coding:utf-8 -*-
- __version__ = '0.9'
- __all__ = ["PinYin"]
- import os.path
- class PinYin(object):
- def __init__(self, dict_file='word.data'):
- self.word_dict = {}
- self.dict_file = dict_file
- def load_word(self):
- if not os.path.exists(self.dict_file):
- raise IOError("NotFoundFile")
- with file(self.dict_file) as f_obj:
- for f_line in f_obj.readlines():
- try:
- line = f_line.split(' ')
- self.word_dict[line[0]] = line[1]
- except:
- line = f_line.split(' ')
- self.word_dict[line[0]] = line[1]
- def hanzi2pinyin(self, string=""):
- result = []
- if not isinstance(string, unicode):
- string = string.decode("utf-8")
- for char in string:
- key = '%X' % ord(char)
- result.append(self.word_dict.get(key,char).split()[0][:-1].lower())
- return result
- def hanzi2pinyin_split(self, string="", split=""):
- result = self.hanzi2pinyin(string=string)
- if split == "":
- return result
- else:
- return split.join(result)
- if __name__ == "__main__":
- test = PinYin()
- test.load_word()
- string = "欢迎来到"
- print "in: %s" % string
- print "out: %s" % str(test.hanzi2pinyin(string=string))
- print "out: %s" % test.hanzi2pinyin_split(string=string, split="-")
希望本文所述对大家的Python程序设计有所帮助。