def get_result(url_set):
line_set = []
for url in url_set:
wb_data = requests.get(url,headers = headers)
soup = BeautifulSoup(wb_data.text,'lxml')
a = soup.select('span.ctt')
for i in range(len(a)):
text = re.sub('<[^>]*>', '',a[i].text)
text = re.sub('??', ' ', text)
text = re.sub('[\W]+', ' ', text)
line_set.append(text)
#print(text)
#writer.writerow((i,text))
word_list = [" ".join(jieba.cut(sentence)) for sentence in line_set]
new_text = ' '.join(word_list)
wordcloud = WordCloud(font_path="C:/Python34/Lib/site-packages/wordcloud/simhei.ttf", background_color="black").generate(new_text)
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
评论列表
文章目录