import jieba import jieba.posseg as pseg def ositien(zi): if "\u4e00" <= zi <= "\u9fff": gb2312 = str(zi.encode('gb2312')) index = (int(gb2312[4],16)-10)*16*6*16 + (int(gb2312[5],16)-1)*6*16 + (int(gb2312[8],16)-10)*6 + int(gb2312[9],16) -1440 fuyin1 = fuyin[index//384] index %= 384 yuanyin1 = yuanyin[index//48] index %= 48 fuyin2 = fuyin[index//3] return fuyin1 + yuanyin1 + fuyin2 elif zi in biaodian: return biaodian[zi] else: return zi biaodian = {'。':'.','?':'?','!':'!',',':',',';':';',':':':','“':'"','(':'(',')':')'} yuanyin = ['а','и','й','у','е','о','я','ё'] fuyin = ['','к','с','т','н','х','м','р','л','г','з','д','б','п','в','ш','ч','ж'] while True: ju = ci = jiyiqi = '' wenben = input('输入文本:') for i in jieba.cut(wenben): for j in i: if j in biaodian: ju = ju.rstrip() ci += ositien(j) cixing = str(next(pseg.cut(i)))[len(i)+1:] if cixing in ['nr','ns','nt','nw','nz','PER','ORG'] or (jiyiqi == '' and ci != '"') or (jiyiqi in ['.','"'] and i != '"'): ci = ci.capitalize() ju += ci if i not in biaodian: ju += ' ' jiyiqi = ci ci = '' print(ju.rstrip())