黑马程序员技术交流社区

标题: Python生成词云图 [打印本页]

作者: 专注的一批    时间: 2020-4-2 13:44
标题: Python生成词云图
代码如下:

from os import path

from wordcloud import WordCloud

import matplotlib

matplotlib.use('TkAgg')

# 获取目录地址,读取文本

d = path.dirname(__file__)

text = open(path.join(d, 'haha.txt')).read()

# 生成一个词云图像

wordcloud = WordCloud().generate(text)

# pil方式展示生成的词云图像

image = wordcloud.to_image()

image.show()

from os import path

from PIL import Image

import numpy as np

import matplotlib

matplotlib.use('TkAgg')

from wordcloud import WordCloud, STOPWORDS

d = path.dirname(__file__)

# 读取整个文本.

text = open(path.join(d, 'haha.txt')).read()

#读取图片

alice_mask = np.array(Image.open(path.join(d, "heart.png")))

#添加停用词

stopwords = set(STOPWORDS)

stopwords.add("HaHa")

#设置词云的一些属性

wc = WordCloud(background_color="black", max_words=2000, mask=alice_mask,

stopwords=stopwords)

# 生成词云
外汇赠金活动http://www.fx61.com/activities

wc.generate(text)

# pil方式展示生成的词云图像(如果你没有matplotlib)

image = wc.to_image()

image.show()

from os import path

from PIL import Image

import numpy as np

import matplotlib

matplotlib.use('TkAgg')

import matplotlib.pyplot as plt

from wordcloud import WordCloud, STOPWORDS

import jieba

d = path.dirname(__file__)

#

# # 读取整个文本.

text = open(path.join(d, '中文.txt')).read()

#

# print text

# #读取图片

alice_mask = np.array(Image.open(path.join(d, "dog.jpeg")))

stopwords_path = 'stopwords.txt' # 停用词词表

my_words_list = ['碌碌无为'] # 在结巴的词库中添加新词

# 添加自己的词库分词

def add_word(list):

for items in list:

jieba.add_word(items)

add_word(my_words_list)

# 结巴分词

wordlist = jieba.cut(text, cut_all=False)

wl = " ".join(wordlist)

# print(wl)#输出分词之后的txt

# 去掉停用词

mywordlist = []

f_stop = open(stopwords_path)

try:

f_stop_text = f_stop.read()

f_stop_text = unicode(f_stop_text, 'utf-8')

# print f_stop_text

finally:

f_stop.close()

f_stop_seg_list = f_stop_text.split('\n')

for myword in wl.split(" "):

for stopword in f_stop_seg_list:

if (myword.strip() == stopword) or len(myword.strip()) <= 1:

break;

else :

mywordlist.append(myword)

mywordlist = " ".join(mywordlist)

# stopwords = set(STOPWORDS)

# stopwords.add("这样")

# 设置词云

wc = WordCloud(background_color="black", # 设置背景颜色

mask=alice_mask, #设置背景图片

max_words=2000, # 设置最大显示的字数

# font_path="fangsong_GB2312.ttf",

font_path="华文宋体.ttf",

max_font_size=50, # 设置字体最大值

random_state=30, # 设置有多少种随机生成状态,即有多少种配色方案

)

# wc = WordCloud(background_color="black", max_words=2000, mask=alice_mask,font_path="华文宋体.ttf",

# stopwords=stopwords)

myword = wc.generate(mywordlist) # 生成词云

# 展示词云图

plt.imshow(myword)

plt.axis("off")

plt.show()

词云图模糊怎么解决?

默认的参数图片分辨率较低,设置scale参数,参数越大,分辨率越高。

词云图重复怎么解决?

默认collocations=True,我们将它设置为False就行了,具体原理,好像是把相邻的两词算作一个词。collocations=False #对于关键词重复的问题,我们可以使用collocations参数来解决

其他

python展示中文字符串列表,直接输出会展示为unicode编码后的格式。

import json

print json.dumps(f_stop_seg_list, encoding=“UTF-8”, ensure_ascii=False)





欢迎光临 黑马程序员技术交流社区 (http://bbs.itheima.com/) 黑马程序员IT技术论坛 X3.2