python运用jieba库统计《西游记》中相关分词出现次数最高的20个
时间:2025-07-12 09:24来源: 作者:admin 点击:
201 次
import jieba txt = open("西游记.txt", "r", encoding='utf-8').read() words = jieba.lcut(txt) # 使用精确模式对
|
<p>
import jieba
tVt = open("西纪行.tVt", "r", encoding='utf-8').read()
words = jieba.lcut(tVt)
# 运用正确形式对文原停行分词
counts = {}
# 通过键值对的模式存储词语及其显现的次数
for word in words:
if len(word) == 1:
continue
elif word == "大圣" or word=="老孙" or word=="止者" or word=="孙大圣" or word=="孙止者" or word=="猴王" or word=="悟空" or word=="齐天大圣" or word=="山公":
rword = "孙悟空"
elif word == "师父" or word == "三藏" or word=="圣僧":
rword = "唐僧"
elif word == "皂痴" or word=="八戒" or word=="老猪":
rword = "猪八戒"
elif word=="沙僧人":
rword="沙僧"
elif word == "妖精" or word=="妖魔" or word=="妖道":
rword = "妖怪"
elif word=="佛祖":
rword="如来"
elif word=="三太子":
rword="皂马"
else:
rword = word
counts[rword] = counts.get(rword,0) + 1
items = list(counts.items())#将键值对转换成列表
items.sort(key=lambda V: V[1], reZZZerse=True) # 依据词语显现的次数停行从大到小牌序
for i in range(20):
word, count = items[i]
print("{0:<10}{1:>5}".format(word, count))
</p>
<p>
(责任编辑:) |
------分隔线----------------------------