def main(input_filename):
content = '\n'.join([line.strip()
for line in codecs.open(input_filename, 'r', 'utf-8')
if len(line.strip()) > 0])
stopwords = set([line.strip()
for line in codecs.open(stopwords_filename, 'r', 'utf-8')])
segs = jieba.cut(content)
words = []
for seg in segs:
word = seg.strip().lower()
if len(word) > 1 and word not in stopwords:
words.append(word)
words_df = pandas.DataFrame({'word':words})
words_stat = words_df.groupby(by=['word'])['word'].agg({'number' : np.size})
words_stat = words_stat.reset_index().sort_values(by="number",ascending=False)
print '# of different words =', len(words_stat)
input_prefix = input_filename
if input_filename.find('.') != -1:
input_prefix = '.'.join(input_filename.split('.')[:-1])
for file in listdir(template_dir):
if file[-4:] != '.png' and file[-4:] != '.jpg':
continue
background_picture_filename = join(template_dir, file)
if isfile(background_picture_filename):
prefix = file.split('.')[0]
bimg=imread(background_picture_filename)
wordcloud=WordCloud(font_path=font_filename,background_color='white',mask = bimg,max_font_size=600,random_state=100)
wordcloud=wordcloud.fit_words(dict(words_stat.head(4000).itertuples(index=False)))
bimgColors=ImageColorGenerator(bimg)
wordcloud.recolor(color_func=bimgColors)
output_filename = prefix + '_' + input_prefix + '.png'
print 'Saving', output_filename
wordcloud.to_file(output_filename)
create_word_cloud.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录