代码如下:
from os import path
from wordcloud import WordCloud
import matplotlib
matplotlib.use('TkAgg')
# 获取目录地址,读取文本
d = path.dirname(__file__)
text = open(path.join(d, 'haha.txt')).read()
# 生成一个词云图像
wordcloud = WordCloud().generate(text)
# pil方式展示生成的词云图像
image = wordcloud.to_image()
image.show()
from os import path
from PIL import Image
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
from wordcloud import WordCloud, STOPWORDS
d = path.dirname(__file__)
# 读取整个文本.
text = open(path.join(d, 'haha.txt')).read()
#读取图片
alice_mask = np.array(Image.open(path.join(d, "heart.png")))
#添加停用词
stopwords = set(STOPWORDS)
stopwords.add("HaHa")
#设置词云的一些属性
wc = WordCloud(background_color="black", max_words=2000, mask=alice_mask,
stopwords=stopwords)
# 生成词云
外汇赠金活动http://www.fx61.com/activities
wc.generate(text)
# pil方式展示生成的词云图像(如果你没有matplotlib)
image = wc.to_image()
image.show()
from os import path
from PIL import Image
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
import jieba
d = path.dirname(__file__)
#
# # 读取整个文本.
text = open(path.join(d, '中文.txt')).read()
#
# print text
# #读取图片
alice_mask = np.array(Image.open(path.join(d, "dog.jpeg")))
stopwords_path = 'stopwords.txt' # 停用词词表
my_words_list = ['碌碌无为'] # 在结巴的词库中添加新词
# 添加自己的词库分词
def add_word(list):
for items in list:
jieba.add_word(items)
add_word(my_words_list)
# 结巴分词
wordlist = jieba.cut(text, cut_all=False)
wl = " ".join(wordlist)
# print(wl)#输出分词之后的txt
# 去掉停用词
mywordlist = []
f_stop = open(stopwords_path)
try:
f_stop_text = f_stop.read()
f_stop_text = unicode(f_stop_text, 'utf-8')
# print f_stop_text
finally:
f_stop.close()
f_stop_seg_list = f_stop_text.split('\n')
for myword in wl.split(" "):
for stopword in f_stop_seg_list:
if (myword.strip() == stopword) or len(myword.strip()) <= 1:
break;
else :
mywordlist.append(myword)
mywordlist = " ".join(mywordlist)
# stopwords = set(STOPWORDS)
# stopwords.add("这样")
# 设置词云
wc = WordCloud(background_color="black", # 设置背景颜色
mask=alice_mask, #设置背景图片
max_words=2000, # 设置最大显示的字数
# font_path="fangsong_GB2312.ttf",
font_path="华文宋体.ttf",
max_font_size=50, # 设置字体最大值
random_state=30, # 设置有多少种随机生成状态,即有多少种配色方案
)
# wc = WordCloud(background_color="black", max_words=2000, mask=alice_mask,font_path="华文宋体.ttf",
# stopwords=stopwords)
myword = wc.generate(mywordlist) # 生成词云
# 展示词云图
plt.imshow(myword)
plt.axis("off")
plt.show()
词云图模糊怎么解决?
默认的参数图片分辨率较低,设置scale参数,参数越大,分辨率越高。
词云图重复怎么解决?
默认collocations=True,我们将它设置为False就行了,具体原理,好像是把相邻的两词算作一个词。collocations=False #对于关键词重复的问题,我们可以使用collocations参数来解决
其他
python展示中文字符串列表,直接输出会展示为unicode编码后的格式。
import json
print json.dumps(f_stop_seg_list, encoding=“UTF-8”, ensure_ascii=False)
|
|