需要用到库:jieba    编辑器:vscode    运行语言:python

jieba 安装: cmd输入 pip install jieba

import jieba
from collections import Counter
#调用结巴接口
i = [".",",","“",";","-","”","?","1","”",":","'"] #要去除的符号
n = 0#存储个数
x = open("test.txt",'r',encoding='utf-8')#已将信息写入文件,省去转义过程(懒) '''这里为了防止报错使用utf-8打开'''
s = x.read() #读入文件
#print(s) #debug调试数据
x.close() #读取完关闭,节省内存
# # 标点符号都去掉,去掉非单词部分(优化版好写,同样效果(懒人有懒法))
for o in s:
    if o in i:
        s = s.replace(o,"")
j = jieba.lcut(s)  # 拆分若干词组
dic = Counter(j)  # 若干词组转换成字典统计数量
st = set(j)  # 词组列表转换成无序但是不重复的Set集合为准备dict字典的键值调用
print("/".join(j))  # 集合用反斜杠相连,查看分词后结果
for i in st:  # 遍历Set集合,集合元素就是字典的key值
    n = n+dic.get(i)  # 统计个数
    print(i+":"+str(dic.get(i)))#输出个数
print("总个数:"+str(n))

 提示:如果遇到代码没有高亮请刷新页面