def showData(self):
print('???,????···')
mask = imread(self.picfile)
imgcolor = ImageColorGenerator(mask)
wcc = WordCloud(font_path='./msyhl.ttc',
mask=mask, background_color='white',
max_font_size=200,
max_words=300,
color_func=imgcolor
)
wc = wcc.generate_from_frequencies(self.data)
plt.figure()
plt.imshow(wc)
plt.axis('off')
print('?????')
plt.show()
python类WordCloud()的实例源码
def create_wordcloud(corpus, output, stopword_dict):
lex_dic = build_lex_dic(corpus, stopword_dict=stopword_dict)
total_words = get_total_words(lex_dic)
ordered_freq_list = build_freq_list(lex_dic, total_words)
fig = plt.figure(figsize=(10, 8), frameon=False)
ax = plt.Axes(fig, [0., 0., 1., 1.])
ax.set_axis_off()
fig.add_axes(ax)
wordcloud = WordCloud(width=1000, height=800, max_words=100, background_color='white',
relative_scaling=0.7, random_state=15, prefer_horizontal=0.5).generate_from_frequencies(
ordered_freq_list[0:100])
wordcloud.recolor(random_state=42, color_func=my_color_func)
ax.imshow(wordcloud)
fig.savefig(output, facecolor='white')
def get_plot(limit, txt, wc_mask=wc_mask, stop = english_stopwords):
wordcloud = WordCloud(
max_words=limit,
stopwords=stop,
mask=wc_mask
).generate(txt)
fig = plt.figure()
fig.set_figwidth(8)
fig.set_figheight(8)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis('off')
figfile = BytesIO()
plt.savefig(figfile, format='png')
figfile.seek(0)
figdata_png = base64.b64encode(figfile.getvalue()).decode()
return figdata_png
def create_cloud(self):
# Return Bing search snippets
text = self.return_txt()
# Get mask image from Bing
image_mask = np.array(self.return_img())
# potential feature
stopwords = set(STOPWORDS)
# stopwords.add(search_modifier)
wordcloud = WordCloud(background_color="white", mask=image_mask, stopwords=stopwords)
wordcloud.generate(text)
image_colors = ImageColorGenerator(image_mask)
plt.imshow(image_mask, cmap=plt.cm.gray, interpolation="None")
plt.imshow(wordcloud.recolor(color_func=image_colors), alpha=.8, interpolation='None')
plt.axis("off")
return plt
def full_wordcloud():
"""
Generates wordcloud for the site.
"""
text = ""
try:
posts = Post.objects.filter().values("content")
for post in posts:
text += post["content"] + " "
text = words_wo_stopwords(text=text)
word_cloud = WordCloud(max_font_size=40, background_color="rgba(255, 255, 255, 0)", width=350, height=600, mode="RGBA").generate(text)
fig = plt.figure(frameon=False)
fig.patch.set_visible(False)
ax = fig.add_axes([0, 0, 1, 1])
ax.axis('off')
ax.imshow(word_cloud, interpolation='bilinear')
plt.savefig(join(settings.STATIC_ROOT, 'images', 'wordcloud.png'))
plt.close()
except Exception as err:
print(err)
def posts_wordcloud():
"""
Generates wordcloud foeach post.
"""
posts = Post.objects.filter().exclude(content="")
for post in posts:
try:
image_file = join(settings.STATIC_ROOT, "wordcloud", "{0}.png".format(post.slug))
if not isfile(image_file):
text = words_wo_stopwords(text=post.content)
if len(text) > 100:
word_cloud = WordCloud(max_font_size=40, background_color="rgba(255, 255, 255, 0)", width=800, height=350, mode="RGBA").generate(text)
fig = plt.figure(frameon=False)
fig.patch.set_visible(False)
ax = fig.add_axes([0, 0, 1, 1])
ax.axis('off')
ax.imshow(word_cloud, interpolation='bilinear')
plt.savefig(image_file)
plt.close()
post.wordcloud = "static/wordcloud/{0}.png".format(post.slug)
post.save()
except Exception as err:
print(err)
def make_wordcloud(entry):
"""
Makes singular wordcloud for a post.
"""
text = words_wo_stopwords(text=entry.content)
if len(text) > 100:
word_cloud = WordCloud(max_font_size=60, background_color="rgba(255, 255, 255, 0)", mode="RGBA").generate(text)
fig = plt.figure(frameon=False)
fig.patch.set_visible(False)
ax = fig.add_axes([0, 0, 1, 1])
ax.axis('off')
ax.imshow(word_cloud, interpolation='bilinear')
plt.savefig(join(settings.STATIC_ROOT, "wordcloud", "{0}.png".format(entry.slug)))
plt.close()
entry.wordcloud = "static/wordcloud/{0}.png".format(entry.slug)
return entry
def populateCaches(self):
try:
cur = self.bot.conn_wc.cursor()
cur.execute("SELECT msgs FROM "+self.tablename) # hashtag no limits
entries = cur.fetchall()
arr = []
for i in range(0, len(entries)):
arr.append(entries[i][0])
if len(arr) < 1:
self.serverCache = self.backupArr
else:
self.serverCache = arr
except Exception as e:
print("server cache retrieval error: \n", e)
self.serverCache = self.backupArr
text = " ".join(self.serverCache)
print("generating word cloud")
wc = WordCloud(width=1024, height=1024, max_words=200000, stopwords=self.STOPWORDS).generate(text) # take it to the limit
wc.to_file(self.serverImage)
def get_result(url_set):
line_set = []
for url in url_set:
wb_data = requests.get(url,headers = headers)
soup = BeautifulSoup(wb_data.text,'lxml')
a = soup.select('span.ctt')
for i in range(len(a)):
text = re.sub('<[^>]*>', '',a[i].text)
text = re.sub('??', ' ', text)
text = re.sub('[\W]+', ' ', text)
line_set.append(text)
#print(text)
#writer.writerow((i,text))
word_list = [" ".join(jieba.cut(sentence)) for sentence in line_set]
new_text = ' '.join(word_list)
wordcloud = WordCloud(font_path="C:/Python34/Lib/site-packages/wordcloud/simhei.ttf", background_color="black").generate(new_text)
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
def _get_wordcloud(img, patch, words, word_to_frequency=None, **wordcloud_kwargs):
# get the boolean mask corresponding to each patch
path = patch.get_path()
mask = path.contains_points(img.pixel_coordinates).reshape((img.y_resolution, img.x_resolution))
# make mask matplotlib-venn compatible
mask = (~mask * 255).astype(np.uint8) # black indicates mask position
mask = np.flipud(mask) # origin is in upper left
# create wordcloud
wc = WordCloud(mask=mask,
background_color=None,
mode="RGBA",
**wordcloud_kwargs)
if not word_to_frequency:
text = " ".join(words)
wc.generate(text)
else:
wc.generate_from_frequencies({word: word_to_frequency[word] for word in words})
return wc
def world_cloud():
""" ????
"""
counter = {}
with open(os.path.join("data", "post_pre_desc_counter.csv"),
"r", encoding="utf-8") as f:
f_csv = csv.reader(f)
for row in f_csv:
counter[row[0]] = counter.get(row[0], int(row[1]))
pprint(counter)
file_path = os.path.join("font", "msyh.ttf")
wc = WordCloud(font_path=file_path,
max_words=100,
height=600,
width=1200).generate_from_frequencies(counter)
plt.imshow(wc)
plt.axis('off')
plt.show()
wc.to_file(os.path.join("images", "wc.jpg"))
def plot_cloud(text):
# mask, max_words = np.array(Image.open(path.join(d, "uno_mask.png"))), 200
mask, max_words = np.array(Image.open(path.join(d, "mav_mask.png"))), 300
stopwords = STOPWORDS.union(common_words)
wordcloud = WordCloud(background_color="white", width=2400, height=2400, mask=mask, stopwords=stopwords, max_words=max_words).generate(text)#.recolor(color_func=grey_color_func, random_state=3)
# Open a plot of the generated image.
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
# import IPython; IPython.embed()
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
canvas = FigureCanvas(fig)
png_output = BytesIO()
canvas.print_png(png_output)
return png_output.getvalue()
def wcloud(text):
mask = np.array(Image.open("face_mask.png")) #choose mask
stopwords = set(STOPWORDS)
wc = WordCloud(background_color="white",
mask=mask,
max_words=80,
stopwords=stopwords,
width=800,
height=400,
mode="RGB",
relative_scaling=0.5,
)
text = clean_text(text)
wc.generate(text)
#save image
file_name = raw_input("Enter any name for the Word Cloud image:") +'.png'
wc.to_file(file_name)
return
def lyrics():
with open('lyrics.json', 'r', encoding='utf-8') as f:
data = json.load(f)
tokens = list()
for v in data.values():
# ??????, ???????? 2 ??, ?????
tokens += [seg for seg in jieba.cut(v) if seg.split() and len(seg) > 1]
# ?? tokens ?????????
counter = Counter(tokens)
print(counter.most_common(10))
# ???, ???????????
wcloud = WordCloud(font_path='NotoSansMonoCJKtc-Regular.otf').generate(' '.join(tokens))
plt.imshow(wcloud)
plt.axis('off')
plt.show()
def generateWordCloud(text):
# read the mask / color image
# taken from http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010
d = path.dirname(__file__)
cloud_coloring = np.array(Image.open(path.join(d, "us-mask-white.png")))
stopwords = set(STOPWORDS)
stopwords.add("said")
wc = WordCloud(background_color="black", max_words=2000, mask=cloud_coloring,
stopwords=stopwords, max_font_size=40, random_state=42)
# generate word cloud
wc.generate(text)
# create coloring from image
image_colors = ImageColorGenerator(cloud_coloring)
# show
plt.imshow(wc)
plt.axis("off")
plt.show()
def generateTable(text, n=5):
# Start by getting a frequency dictionary
d = path.dirname(__file__)
cloud_coloring = np.array(Image.open(path.join(d, "us-mask-white.png")))
stopwords = set(STOPWORDS)
stopwords.add("said")
wc = WordCloud(background_color="black", max_words=2000, mask=cloud_coloring,
stopwords=stopwords, max_font_size=40, random_state=42)
frequenciesDict = wc.process_text(text)
words = frequenciesDict.keys()
freq = frequenciesDict.values()
frequencies = pd.DataFrame({ 'words' : words, 'frequencies' : freq })
frequencies.sort_values('frequencies', ascending = False, inplace = True)
print '\nTop 5 Terms\n'
print frequencies.head(n = n).to_string(index = False)
print '\n'
def generate_image():
data = []
jieba.analyse.set_stop_words("./stopwords.txt")
with codecs.open("weibo1.txt", 'r', encoding="utf-8") as f:
for text in f.readlines():
data.extend(jieba.analyse.extract_tags(text, topK=20))
data = " ".join(data)
mask_img = imread('./52f90c9a5131c.jpg', flatten=True)
wordcloud = WordCloud(
font_path='msyh.ttc',
background_color='white',
mask=mask_img
).generate(data)
plt.imshow(wordcloud.recolor(color_func=grey_color_func, random_state=3),
interpolation="bilinear")
plt.axis('off')
plt.savefig('./heart2.jpg', dpi=1600)
def save_cloud(frequencies, output, options={}, color_func=None,canvas_width=0, canvas_height=0):
base_options = copy(WORD_CLOUD_DEFAULTS)
base_options.update(options)
clean_options = { x : base_options[x] for x in base_options if base_options[x] is not None}
wordcloud = WordCloud(**clean_options).generate_from_frequencies(frequencies)
if(color_func):
wordcloud = wordcloud.recolor(color_func=color_func)
image = wordcloud.to_image()
if clean_options.get("height") != clean_options.get("width") and not canvas_width and not canvas_height:
canvas_height = clean_options.get("height")
canvas_width = clean_options.get("width")
if(canvas_width and canvas_height):
final_image = Image.new(image.mode, (canvas_width, canvas_height), clean_options.get("background_color"))
offset = ((final_image.size[0] - image.size[0]) / 2, (final_image.size[1] - image.size[1]) / 2)
final_image.paste(image, offset)
return final_image.save(output)
return image.save(output)
def main():
client = pymongo.MongoClient(host='127.0.0.1', port=27017)
dbName = client['cnblogs']
table = dbName['articles']
wc = WordCloud(
font_path='msyh.ttc', background_color='#ccc', width=600, height=600)
if not os.path.exists('wordcloudimgs'):
os.mkdir('wordcloudimgs')
threads = []
queue = Queue()
titleThread = MyThread(getTitle, (queue, table))
imgThread = MyThread(getImg, (queue, wc))
threads.append(imgThread)
threads.append(titleThread)
for t in threads:
t.start()
for t in threads:
t.join()
def generate_wordcloud(words_list, mask_path):
text = ' '.join(words_list)
# print text
mask = np.array(Image.open(mask_path))
# stopwords = set(STOPWORDS)
# stopwords.add(u'')
wc = WordCloud(font_path = 'data/SourceHanSerifCN-Regular.otf', background_color = 'white',
max_words = 2000, mask = mask) # ??????????
wc.generate(text)
# wc.to_file('data/path/to/file')
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
def generate_ciyun_pic():
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import jieba
from cv2 import imread
text_from_file_with_apath = open('./{}lyric.txt'.format(singer), 'r').read().replace('??', '').replace('??', '')
wordlist_after_jieba = jieba.cut(text_from_file_with_apath, cut_all = True)
wl_space_split = " ".join(wordlist_after_jieba)
mask_img = imread('./mask.jpg')# , flatten=True)
my_wordcloud = WordCloud(
font_path='msyh.ttc',
background_color='white',
mask=mask_img
).generate(wl_space_split)
plt.imshow(my_wordcloud)
plt.axis("off")
plt.show()
def title_word_cloud():
"""
???????
"""
text = ''
wc = WordCloud(background_color='white', # ??????
stopwords=STOPWORDS,
max_words=1000, # ?????????
font_path='C:/Python27/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/simhei.ttf',
# ?????????????????
max_font_size=50, # ???????
random_state=30, # ??????????????????????
)
with open('rent_ave.csv') as csvfile:
reader = [each for each in csv.DictReader(csvfile)]
for row in reader:
text += row[u'title'] + ' '
print jieba_clear_text(text)
wc.generate(jieba_clear_text(text))
plt.imshow(wc)
plt.axis('off')
plt.show()
def save_cloud(frequencies, output, options={}, color_func=None,canvas_width=0, canvas_height=0):
base_options = copy(WORD_CLOUD_DEFAULTS)
base_options.update(options)
clean_options = { x : base_options[x] for x in base_options if base_options[x] is not None}
wordcloud = WordCloud(**clean_options).generate_from_frequencies(frequencies)
if(color_func):
wordcloud = wordcloud.recolor(color_func=color_func)
image = wordcloud.to_image()
if clean_options.get("height") != clean_options.get("width") and not canvas_width and not canvas_height:
canvas_height = clean_options.get("height")
canvas_width = clean_options.get("width")
if(canvas_width and canvas_height):
final_image = Image.new(image.mode, (canvas_width, canvas_height), clean_options.get("background_color"))
offset = (int((final_image.size[0] - image.size[0]) / 2), int((final_image.size[1] - image.size[1]) / 2))
final_image.paste(image, offset)
return final_image.save(output)
return image.save(output)
def drawWordCloud(word_text, filename):
mask = imread('hello.jpg')
my_wordcloud = WordCloud(
background_color='white', # ??????
mask=mask, # ??????
max_words=2000, # ?????????
stopwords=STOPWORDS, # ?????
font_path='/System/Library/Fonts/Hiragino Sans GB W6.ttc', # ?????????????????
max_font_size=50, # ???????
random_state=30, # ??????????????????????
scale=1
).generate(word_text)
image_colors = ImageColorGenerator(mask)
my_wordcloud.recolor(color_func=image_colors)
# ????????
plt.imshow(my_wordcloud)
plt.axis("off")
plt.show()
# ????
my_wordcloud.to_file(filename=filename)
print()
def drawWordCloud(word_text, filename):
mask = imread('bike.jpg')
my_wordcloud = WordCloud(
background_color='white', # ??????
mask=mask, # ??????
max_words=2000, # ?????????
stopwords=STOPWORDS, # ?????
font_path='/System/Library/Fonts/Hiragino Sans GB W6.ttc', # ?????????????????
max_font_size=50, # ???????
random_state=30, # ??????????????????????
scale=1.3
).generate(word_text)
image_colors = ImageColorGenerator(mask)
my_wordcloud.recolor(color_func=image_colors)
# ????????
plt.imshow(my_wordcloud)
plt.axis("off")
plt.show()
# ????
my_wordcloud.to_file(filename=filename)
print()
def analyze(content):
# ????? ???? content ? string ??? ????
# ????? ??? nouns ?? ??? ??
nouns=t.nouns(str(content))
# ????? ??
trash=["??","????","??","??","??","??","?????"]
for i in trash:
for j in nouns:
if i==j:
nouns.remove(i)
ko=nltk.Text(nouns,name="??")
#ranking??? ??? ????? ??
ranking=ko.vocab().most_common(100)
tmpData=dict(ranking)
# ?????? ??
wordcloud=WordCloud(font_path="/Library/Fonts/AppleGothic.ttf",relative_scaling=0.2,background_color="white",).generate_from_frequencies(tmpData)
#matplotlib ?????? ?? ??????? ??? ???? ???
plt.figure(figsize=(16,8))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
# ??? ??(??? ????? ???? ???? ? ?????? ??? ??)
def wordcloud_visualization(corpus, topics, num_docs=None, min_df=0.1,
ngrams=1, weighting='tf', max_df=0.7, mds='pcoa',
*args, **kwargs):
font = pkg_resources.resource_filename(__name__,
"fonts/ZillaSlab-Medium.ttf")
print(font)
model, doc_term_matrix, vectorizer = build_model(
corpus, topics, num_docs, ngrams, weighting, min_df, max_df
)
prep_data = prepare(model.model, doc_term_matrix, vectorizer, mds=mds)
ti = prep_data.topic_info
topic_labels = ti.groupby(['Category']).groups.keys()
plt.clf()
topics = []
for label in topic_labels:
out = StringIO()
df = ti[ti.Category == label].sort_values(by='Total',
ascending=False)[:20]
tf = dict(df[['Term', 'Total']].to_dict('split')['data'])
wc = wordcloud.WordCloud(font_path=font, width=600, height=300,
background_color='white')
wc.fit_words(tf)
plt.imshow(wc)
plt.axis('off')
plt.savefig(out)
out.seek(0)
topics.append((label, out.read()))
return topics
"""
Category Freq Term Total loglift logprob
term
478 Default 738.000000 specie 738.000000 1.0000 1.0000
... ... ... ... ... ... ...
191 Topic10 25.344278 space 145.983738 1.8935 -5.0376
190 Topic10 32.076070 green 193.201661 1.8488 -4.8020
319 Topic10 12.129367 aspect 73.063725 1.8488 -5.7745
"""
def plot_topic(self, topic_idx):
'''
Function to plot a wordcloud based on a topic
INPUT:
topic_idx: index of topic from NMF clustering
'''
title = raw_input('Enter a title for this plot: ')
num_reviews = self.labels[:, topic_idx].sum()
word_freq = self.topic_word_frequency(topic_idx)
wc = WordCloud(width=2000, height=1000, max_words=150,
background_color='white')
wc.fit_words(word_freq)
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(111)
ax.set_title('Topic {}: {}\nNumber of Reviews in Topic: {}'.format(
topic_idx, title, num_reviews), fontsize=24)
ax.axis('off')
ax.imshow(wc)
name = 'topic_' + str(topic_idx) + '.png'
if self.pro_or_con == 'pro':
img_path = os.path.join('images', 'positive')
else:
img_path = os.path.join('images', 'negative')
plt.savefig(os.path.join(img_path, name))
plt.show()
def get_wc(word_dic,fontname,savename,photoname):
'''??4?????????????????????????'''
colors = imread(photoname)
wc = WordCloud(background_color='white', mask=colors, font_path=fontname, max_font_size=150)
wc.generate_from_frequencies(word_dic)
plt.imshow(wc)
plt.axis('off')
wc.to_file(savename)
print('get the photo {} !'.format(savename))
stackoverflow_users_taginfo.py 文件源码
项目:stackoverflow_tag_cloud
作者: droyed
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def tag_cloud(link=22656, lim_num_tags=200, image_dims=(400, 200),
out_filepath="TagCloud.png"):
""" Generate tag cloud and save it as an image.
Parameters
----------
link : same as used for the function taginfo.
num_tags : same as used for the function taginfo.
image_dims : tuple of two elements.
Image dimensions of the tag cloud image to be saved.
out_filepath : string
Output image filepath.
Output
------
None
"""
W, H = image_dims # Wordcloud image size (width, height)
font_path = "fonts/ShortStack-Regular.ttf" # Font path
info = taginfo(link=link, lim_num_tags=lim_num_tags)
if info is None:
print("Error : No webpage found!")
else:
if len(info) == 0:
print("Error : No tags found!")
else: # Successfully extracted tag info
WC = WordCloud(font_path=font_path, width=W, height=H,
max_words=len(info)).generate_from_frequencies(info)
WC.to_image().save(out_filepath)
print("Tag Cloud Saved as " + out_filepath)