python类WordCloud()的实例源码

analyse.py 文件源码 项目:lyricswordcloud 作者: qwertyyb 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def showData(self):
    print('???,????···')
    mask = imread(self.picfile)
    imgcolor = ImageColorGenerator(mask)
    wcc = WordCloud(font_path='./msyhl.ttc', 
    mask=mask, background_color='white', 
    max_font_size=200, 
    max_words=300,
    color_func=imgcolor
    )
    wc = wcc.generate_from_frequencies(self.data)
    plt.figure()
    plt.imshow(wc)
    plt.axis('off')
    print('?????')
    plt.show()
lexical_analysis.py 文件源码 项目:SFBIStats 作者: royludo 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_wordcloud(corpus, output, stopword_dict):
    lex_dic = build_lex_dic(corpus, stopword_dict=stopword_dict)
    total_words = get_total_words(lex_dic)
    ordered_freq_list = build_freq_list(lex_dic, total_words)

    fig = plt.figure(figsize=(10, 8), frameon=False)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    wordcloud = WordCloud(width=1000, height=800, max_words=100, background_color='white',
                          relative_scaling=0.7, random_state=15, prefer_horizontal=0.5).generate_from_frequencies(
        ordered_freq_list[0:100])
    wordcloud.recolor(random_state=42, color_func=my_color_func)

    ax.imshow(wordcloud)
    fig.savefig(output, facecolor='white')
compute.py 文件源码 项目:WebAppEx 作者: karlafej 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_plot(limit, txt, wc_mask=wc_mask, stop = english_stopwords):
    wordcloud = WordCloud(
        max_words=limit,
        stopwords=stop,
        mask=wc_mask
        ).generate(txt)
    fig = plt.figure()
    fig.set_figwidth(8)
    fig.set_figheight(8)
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis('off')

    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0) 
    figdata_png = base64.b64encode(figfile.getvalue()).decode()
    return figdata_png
cloudysearch.py 文件源码 项目:cloudy_search 作者: tim-shane 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def create_cloud(self):
        # Return Bing search snippets
        text = self.return_txt()

        # Get mask image from Bing
        image_mask = np.array(self.return_img())

        # potential feature
        stopwords = set(STOPWORDS)
        # stopwords.add(search_modifier)

        wordcloud = WordCloud(background_color="white", mask=image_mask, stopwords=stopwords)
        wordcloud.generate(text)

        image_colors = ImageColorGenerator(image_mask)
        plt.imshow(image_mask, cmap=plt.cm.gray, interpolation="None")
        plt.imshow(wordcloud.recolor(color_func=image_colors), alpha=.8, interpolation='None')
        plt.axis("off")
        return plt
tasks.py 文件源码 项目:QProb 作者: quant-trade 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def full_wordcloud():
    """
    Generates wordcloud for the site.
    """
    text = ""
    try:
        posts = Post.objects.filter().values("content")
        for post in posts:
            text += post["content"] + " "

        text = words_wo_stopwords(text=text)
        word_cloud = WordCloud(max_font_size=40, background_color="rgba(255, 255, 255, 0)", width=350, height=600, mode="RGBA").generate(text)
        fig = plt.figure(frameon=False)
        fig.patch.set_visible(False)
        ax = fig.add_axes([0, 0, 1, 1])
        ax.axis('off')
        ax.imshow(word_cloud, interpolation='bilinear')
        plt.savefig(join(settings.STATIC_ROOT, 'images', 'wordcloud.png'))
        plt.close()
    except Exception as err:
            print(err)
tasks.py 文件源码 项目:QProb 作者: quant-trade 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def posts_wordcloud():
    """
    Generates wordcloud foeach post.
    """
    posts = Post.objects.filter().exclude(content="")
    for post in posts:
        try:
            image_file = join(settings.STATIC_ROOT, "wordcloud", "{0}.png".format(post.slug))

            if not isfile(image_file):
                text = words_wo_stopwords(text=post.content)
                if len(text) > 100:
                    word_cloud = WordCloud(max_font_size=40, background_color="rgba(255, 255, 255, 0)", width=800, height=350, mode="RGBA").generate(text)
                    fig = plt.figure(frameon=False)
                    fig.patch.set_visible(False)
                    ax = fig.add_axes([0, 0, 1, 1])
                    ax.axis('off')
                    ax.imshow(word_cloud, interpolation='bilinear')
                    plt.savefig(image_file)
                    plt.close()
                    post.wordcloud = "static/wordcloud/{0}.png".format(post.slug)
                    post.save()
        except Exception as err:
            print(err)
tasks.py 文件源码 项目:QProb 作者: quant-trade 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def make_wordcloud(entry):
    """
    Makes singular wordcloud for a post.
    """
    text = words_wo_stopwords(text=entry.content)
    if len(text) > 100:
        word_cloud = WordCloud(max_font_size=60, background_color="rgba(255, 255, 255, 0)", mode="RGBA").generate(text)
        fig = plt.figure(frameon=False)
        fig.patch.set_visible(False)
        ax = fig.add_axes([0, 0, 1, 1])
        ax.axis('off')
        ax.imshow(word_cloud, interpolation='bilinear')
        plt.savefig(join(settings.STATIC_ROOT, "wordcloud", "{0}.png".format(entry.slug)))
        plt.close()
        entry.wordcloud = "static/wordcloud/{0}.png".format(entry.slug)

    return entry
wordart.py 文件源码 项目:csss-minion 作者: henrymzhao 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def populateCaches(self):
        try:
            cur = self.bot.conn_wc.cursor()
            cur.execute("SELECT msgs FROM "+self.tablename) # hashtag no limits
            entries = cur.fetchall()
            arr = []
            for i in range(0, len(entries)):
                arr.append(entries[i][0])
            if len(arr) < 1:
                self.serverCache = self.backupArr
            else:
                self.serverCache = arr
        except Exception as e:
            print("server cache retrieval error: \n", e)
            self.serverCache = self.backupArr
        text = " ".join(self.serverCache)
        print("generating word cloud")
        wc = WordCloud(width=1024, height=1024, max_words=200000, stopwords=self.STOPWORDS).generate(text) # take it to the limit
        wc.to_file(self.serverImage)
get_data2.py 文件源码 项目:text_analysis 作者: mathlf2015 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get_result(url_set):
    line_set = []
    for url in url_set:
        wb_data = requests.get(url,headers = headers)
        soup = BeautifulSoup(wb_data.text,'lxml')
        a = soup.select('span.ctt')
        for i in range(len(a)):
            text = re.sub('<[^>]*>', '',a[i].text)
            text = re.sub('??', ' ', text)
            text = re.sub('[\W]+', ' ', text)
            line_set.append(text)
            #print(text)
            #writer.writerow((i,text))
    word_list = [" ".join(jieba.cut(sentence)) for sentence in line_set]
    new_text = ' '.join(word_list)
    wordcloud = WordCloud(font_path="C:/Python34/Lib/site-packages/wordcloud/simhei.ttf", background_color="black").generate(new_text)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
_main.py 文件源码 项目:matplotlib_venn_wordcloud 作者: paulbrodersen 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _get_wordcloud(img, patch, words, word_to_frequency=None, **wordcloud_kwargs):

    # get the boolean mask corresponding to each patch
    path = patch.get_path()
    mask = path.contains_points(img.pixel_coordinates).reshape((img.y_resolution, img.x_resolution))

    # make mask matplotlib-venn compatible
    mask = (~mask * 255).astype(np.uint8) # black indicates mask position
    mask = np.flipud(mask) # origin is in upper left

    # create wordcloud
    wc = WordCloud(mask=mask,
                   background_color=None,
                   mode="RGBA",
                   **wordcloud_kwargs)

    if not word_to_frequency:
        text = " ".join(words)
        wc.generate(text)
    else:
        wc.generate_from_frequencies({word: word_to_frequency[word] for word in words})

    return wc
job_spider.py 文件源码 项目:51job 作者: chenjiandongx 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def world_cloud():
        """ ????
        """
        counter = {}
        with open(os.path.join("data", "post_pre_desc_counter.csv"),
                  "r", encoding="utf-8") as f:
            f_csv = csv.reader(f)
            for row in f_csv:
                counter[row[0]] = counter.get(row[0], int(row[1]))
            pprint(counter)
        file_path = os.path.join("font", "msyh.ttf")
        wc = WordCloud(font_path=file_path,
                       max_words=100,
                       height=600,
                       width=1200).generate_from_frequencies(counter)
        plt.imshow(wc)
        plt.axis('off')
        plt.show()
        wc.to_file(os.path.join("images", "wc.jpg"))
gen_cloud.py 文件源码 项目:courses.uno 作者: BenDoan 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def plot_cloud(text):

    # mask, max_words = np.array(Image.open(path.join(d, "uno_mask.png"))), 200
    mask, max_words = np.array(Image.open(path.join(d, "mav_mask.png"))), 300
    stopwords = STOPWORDS.union(common_words)
    wordcloud = WordCloud(background_color="white", width=2400, height=2400, mask=mask, stopwords=stopwords, max_words=max_words).generate(text)#.recolor(color_func=grey_color_func, random_state=3)

    # Open a plot of the generated image.
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    # import IPython; IPython.embed()

    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    canvas = FigureCanvas(fig)
    png_output = BytesIO()
    canvas.print_png(png_output)

    return png_output.getvalue()
WC.py 文件源码 项目:FacebookGraphAPI-Examples 作者: nikhilkumarsingh 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def wcloud(text):
     mask = np.array(Image.open("face_mask.png"))   #choose mask
     stopwords = set(STOPWORDS)
     wc = WordCloud(background_color="white",
                    mask=mask,
                    max_words=80,
                    stopwords=stopwords,
                    width=800,
                    height=400,
                    mode="RGB",
                    relative_scaling=0.5,
                    )

     text = clean_text(text)
     wc.generate(text)

     #save image
     file_name = raw_input("Enter any name for the Word Cloud image:") +'.png'    
     wc.to_file(file_name)

     return
lyric_nlp.py 文件源码 项目:web-crawler-tutorial 作者: jwlin 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def lyrics():
    with open('lyrics.json', 'r', encoding='utf-8') as f:
        data = json.load(f)
    tokens = list()
    for v in data.values():
        # ??????, ???????? 2 ??, ?????
        tokens += [seg for seg in jieba.cut(v) if seg.split() and len(seg) > 1]

    # ?? tokens ?????????
    counter = Counter(tokens)
    print(counter.most_common(10))

    # ???, ???????????
    wcloud = WordCloud(font_path='NotoSansMonoCJKtc-Regular.otf').generate(' '.join(tokens))
    plt.imshow(wcloud)
    plt.axis('off')
    plt.show()
peruse.py 文件源码 项目:TwitterPeruser 作者: ilyauts 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def generateWordCloud(text):
    # read the mask / color image
    # taken from http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010
    d = path.dirname(__file__)

    cloud_coloring = np.array(Image.open(path.join(d, "us-mask-white.png")))
    stopwords = set(STOPWORDS)
    stopwords.add("said")

    wc = WordCloud(background_color="black", max_words=2000, mask=cloud_coloring,
                   stopwords=stopwords, max_font_size=40, random_state=42)
    # generate word cloud
    wc.generate(text)

    # create coloring from image
    image_colors = ImageColorGenerator(cloud_coloring)

    # show
    plt.imshow(wc)
    plt.axis("off")
    plt.show()
peruse.py 文件源码 项目:TwitterPeruser 作者: ilyauts 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def generateTable(text, n=5):
    # Start by getting a frequency dictionary
    d = path.dirname(__file__)

    cloud_coloring = np.array(Image.open(path.join(d, "us-mask-white.png")))
    stopwords = set(STOPWORDS)
    stopwords.add("said")

    wc = WordCloud(background_color="black", max_words=2000, mask=cloud_coloring,
                   stopwords=stopwords, max_font_size=40, random_state=42)

    frequenciesDict = wc.process_text(text)

    words = frequenciesDict.keys()
    freq = frequenciesDict.values()

    frequencies = pd.DataFrame({ 'words' : words, 'frequencies' : freq })
    frequencies.sort_values('frequencies', ascending = False, inplace = True)

    print '\nTop 5 Terms\n'
    print frequencies.head(n = n).to_string(index = False)
    print '\n'
heart.py 文件源码 项目:crawler_html2pdf 作者: lzjun567 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def generate_image():
    data = []
    jieba.analyse.set_stop_words("./stopwords.txt")

    with codecs.open("weibo1.txt", 'r', encoding="utf-8") as f:
        for text in f.readlines():
            data.extend(jieba.analyse.extract_tags(text, topK=20))
        data = " ".join(data)
        mask_img = imread('./52f90c9a5131c.jpg', flatten=True)
        wordcloud = WordCloud(
            font_path='msyh.ttc',
            background_color='white',
            mask=mask_img
        ).generate(data)
        plt.imshow(wordcloud.recolor(color_func=grey_color_func, random_state=3),
                   interpolation="bilinear")
        plt.axis('off')
        plt.savefig('./heart2.jpg', dpi=1600)
clouds.py 文件源码 项目:wordclouds 作者: inmagik 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def save_cloud(frequencies, output, options={}, color_func=None,canvas_width=0, canvas_height=0):
    base_options = copy(WORD_CLOUD_DEFAULTS)
    base_options.update(options)
    clean_options = { x : base_options[x] for x in base_options if base_options[x] is not None}

    wordcloud = WordCloud(**clean_options).generate_from_frequencies(frequencies)

    if(color_func):
        wordcloud = wordcloud.recolor(color_func=color_func)

    image = wordcloud.to_image()

    if clean_options.get("height") != clean_options.get("width") and not canvas_width and not canvas_height:
        canvas_height = clean_options.get("height")
        canvas_width = clean_options.get("width")

    if(canvas_width and canvas_height):
        final_image =  Image.new(image.mode, (canvas_width, canvas_height), clean_options.get("background_color"))
        offset = ((final_image.size[0] - image.size[0]) / 2, (final_image.size[1] - image.size[1]) / 2)
        final_image.paste(image, offset)
        return final_image.save(output)

    return image.save(output)
cnblogsWC.py 文件源码 项目:cnblogs 作者: hao15239129517 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def main():
    client = pymongo.MongoClient(host='127.0.0.1', port=27017)
    dbName = client['cnblogs']
    table = dbName['articles']
    wc = WordCloud(
        font_path='msyh.ttc', background_color='#ccc', width=600, height=600)
    if not os.path.exists('wordcloudimgs'):
        os.mkdir('wordcloudimgs')
    threads = []
    queue = Queue()
    titleThread = MyThread(getTitle, (queue, table))
    imgThread = MyThread(getImg, (queue, wc))
    threads.append(imgThread)
    threads.append(titleThread)

    for t in threads:
        t.start()
    for t in threads:
        t.join()
text_analysis.py 文件源码 项目:CloudMusic-Crawler 作者: GreatV 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def generate_wordcloud(words_list, mask_path):
    text = ' '.join(words_list)
    # print text
    mask = np.array(Image.open(mask_path))
    # stopwords = set(STOPWORDS)
    # stopwords.add(u'')

    wc = WordCloud(font_path = 'data/SourceHanSerifCN-Regular.otf', background_color = 'white', 
        max_words = 2000, mask = mask) # ??????????

    wc.generate(text)

    # wc.to_file('data/path/to/file')

    plt.imshow(wc, interpolation='bilinear')
    plt.axis("off")
    plt.show()
WangYiYunCiYun.py 文件源码 项目:LizardDance 作者: guerbai 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def generate_ciyun_pic():
    import matplotlib.pyplot as plt
    from wordcloud import WordCloud
    import jieba
    from cv2 import imread

    text_from_file_with_apath = open('./{}lyric.txt'.format(singer), 'r').read().replace('??', '').replace('??', '')

    wordlist_after_jieba = jieba.cut(text_from_file_with_apath, cut_all = True)
    wl_space_split = " ".join(wordlist_after_jieba)

    mask_img = imread('./mask.jpg')# , flatten=True)
    my_wordcloud = WordCloud(
            font_path='msyh.ttc',
            background_color='white',
            mask=mask_img
            ).generate(wl_space_split)

    plt.imshow(my_wordcloud)
    plt.axis("off")
    plt.show()
Datavisualization.py 文件源码 项目:lianjia 作者: learrn 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def title_word_cloud():
    """
    ???????
    """
    text = ''
    wc = WordCloud(background_color='white',  # ??????
                   stopwords=STOPWORDS,
                   max_words=1000,  # ?????????
                   font_path='C:/Python27/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/simhei.ttf',
                   # ?????????????????
                   max_font_size=50,  # ???????
                   random_state=30,  # ??????????????????????
                   )
    with open('rent_ave.csv') as csvfile:
        reader = [each for each in csv.DictReader(csvfile)]
    for row in reader:
        text += row[u'title'] + ' '
    print jieba_clear_text(text)
    wc.generate(jieba_clear_text(text))
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
clouds.py 文件源码 项目:newsclouds-engine 作者: inmagik 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def save_cloud(frequencies, output, options={}, color_func=None,canvas_width=0, canvas_height=0):
    base_options = copy(WORD_CLOUD_DEFAULTS)
    base_options.update(options)
    clean_options = { x : base_options[x] for x in base_options if base_options[x] is not None}

    wordcloud = WordCloud(**clean_options).generate_from_frequencies(frequencies)

    if(color_func):
        wordcloud = wordcloud.recolor(color_func=color_func)

    image = wordcloud.to_image()

    if clean_options.get("height") != clean_options.get("width") and not canvas_width and not canvas_height:
        canvas_height = clean_options.get("height")
        canvas_width = clean_options.get("width")

    if(canvas_width and canvas_height):
        final_image =  Image.new(image.mode, (canvas_width, canvas_height), clean_options.get("background_color"))
        offset = (int((final_image.size[0] - image.size[0]) / 2), int((final_image.size[1] - image.size[1]) / 2))

        final_image.paste(image, offset)
        return final_image.save(output)

    return image.save(output)
captureInfo.py 文件源码 项目:InterestingCrawler 作者: Maicius 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def drawWordCloud(word_text, filename):
    mask = imread('hello.jpg')
    my_wordcloud = WordCloud(
        background_color='white',  # ??????
        mask=mask,  # ??????
        max_words=2000,  # ?????????
        stopwords=STOPWORDS,  # ?????
        font_path='/System/Library/Fonts/Hiragino Sans GB W6.ttc',  # ?????????????????
        max_font_size=50,  # ???????
        random_state=30,  # ??????????????????????
        scale=1
    ).generate(word_text)
    image_colors = ImageColorGenerator(mask)
    my_wordcloud.recolor(color_func=image_colors)
    # ????????
    plt.imshow(my_wordcloud)
    plt.axis("off")
    plt.show()
    # ????
    my_wordcloud.to_file(filename=filename)
    print()
drawWordCloud.py 文件源码 项目:InterestingCrawler 作者: Maicius 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def drawWordCloud(word_text, filename):
    mask = imread('bike.jpg')
    my_wordcloud = WordCloud(
        background_color='white',  # ??????
        mask=mask,  # ??????
        max_words=2000,  # ?????????
        stopwords=STOPWORDS,  # ?????
        font_path='/System/Library/Fonts/Hiragino Sans GB W6.ttc',  # ?????????????????
        max_font_size=50,  # ???????
        random_state=30,  # ??????????????????????
        scale=1.3
    ).generate(word_text)
    image_colors = ImageColorGenerator(mask)
    my_wordcloud.recolor(color_func=image_colors)
    # ????????
    plt.imshow(my_wordcloud)
    plt.axis("off")
    plt.show()
    # ????
    my_wordcloud.to_file(filename=filename)
    print()
webcrawling0203.py 文件源码 项目:webcrawling 作者: etilelab 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def analyze(content):
    # ????? ???? content ? string ??? ????
    # ????? ??? nouns ?? ??? ??
    nouns=t.nouns(str(content))

    # ????? ??
    trash=["??","????","??","??","??","??","?????"]
    for i in trash:
        for j in nouns:
            if i==j:
                nouns.remove(i)

    ko=nltk.Text(nouns,name="??")

    #ranking??? ??? ????? ??
    ranking=ko.vocab().most_common(100)
    tmpData=dict(ranking)

    # ?????? ??
    wordcloud=WordCloud(font_path="/Library/Fonts/AppleGothic.ttf",relative_scaling=0.2,background_color="white",).generate_from_frequencies(tmpData)

    #matplotlib ?????? ?? ??????? ??? ???? ???
    plt.figure(figsize=(16,8))
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()




# ??? ??(??? ????? ???? ???? ? ?????? ??? ??)
topics.py 文件源码 项目:eea.corpus 作者: eea 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def wordcloud_visualization(corpus, topics, num_docs=None, min_df=0.1,
                            ngrams=1, weighting='tf', max_df=0.7, mds='pcoa',
                            *args, **kwargs):
    font = pkg_resources.resource_filename(__name__,
                                           "fonts/ZillaSlab-Medium.ttf")
    print(font)
    model, doc_term_matrix, vectorizer = build_model(
        corpus, topics, num_docs, ngrams, weighting, min_df, max_df
    )
    prep_data = prepare(model.model, doc_term_matrix, vectorizer, mds=mds)
    ti = prep_data.topic_info
    topic_labels = ti.groupby(['Category']).groups.keys()

    plt.clf()
    topics = []
    for label in topic_labels:
        out = StringIO()
        df = ti[ti.Category == label].sort_values(by='Total',
                                                     ascending=False)[:20]
        tf = dict(df[['Term', 'Total']].to_dict('split')['data'])

        wc = wordcloud.WordCloud(font_path=font, width=600, height=300,
                                 background_color='white')
        wc.fit_words(tf)
        plt.imshow(wc)
        plt.axis('off')
        plt.savefig(out)
        out.seek(0)
        topics.append((label, out.read()))

    return topics
    """
     Category         Freq            Term        Total  loglift  logprob
term
478   Default   738.000000          specie   738.000000   1.0000   1.0000
...       ...          ...             ...          ...      ...      ...
191   Topic10    25.344278           space   145.983738   1.8935  -5.0376
190   Topic10    32.076070           green   193.201661   1.8488  -4.8020
319   Topic10    12.129367          aspect    73.063725   1.8488  -5.7745

"""
topic_modeling.py 文件源码 项目:glassdoor-analysis 作者: THEdavehogue 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def plot_topic(self, topic_idx):
        '''
        Function to plot a wordcloud based on a topic

        INPUT:
            topic_idx: index of topic from NMF clustering
        '''
        title = raw_input('Enter a title for this plot: ')
        num_reviews = self.labels[:, topic_idx].sum()
        word_freq = self.topic_word_frequency(topic_idx)
        wc = WordCloud(width=2000, height=1000, max_words=150,
                       background_color='white')
        wc.fit_words(word_freq)
        fig = plt.figure(figsize=(16, 8))
        ax = fig.add_subplot(111)
        ax.set_title('Topic {}: {}\nNumber of Reviews in Topic: {}'.format(
            topic_idx, title, num_reviews), fontsize=24)
        ax.axis('off')
        ax.imshow(wc)
        name = 'topic_' + str(topic_idx) + '.png'
        if self.pro_or_con == 'pro':
            img_path = os.path.join('images', 'positive')
        else:
            img_path = os.path.join('images', 'negative')
        plt.savefig(os.path.join(img_path, name))
        plt.show()
analysis.py 文件源码 项目:Jobs-search 作者: Hopetree 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_wc(word_dic,fontname,savename,photoname):
    '''??4?????????????????????????'''
    colors = imread(photoname)
    wc = WordCloud(background_color='white', mask=colors, font_path=fontname, max_font_size=150)
    wc.generate_from_frequencies(word_dic)
    plt.imshow(wc)
    plt.axis('off')
    wc.to_file(savename)
    print('get the photo {} !'.format(savename))
stackoverflow_users_taginfo.py 文件源码 项目:stackoverflow_tag_cloud 作者: droyed 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def tag_cloud(link=22656, lim_num_tags=200, image_dims=(400, 200),
              out_filepath="TagCloud.png"):
    """ Generate tag cloud and save it as an image.

    Parameters
    ----------
    link : same as used for the function taginfo.

    num_tags : same as used for the function taginfo.

    image_dims : tuple of two elements.
        Image dimensions of the tag cloud image to be saved.

    out_filepath : string
        Output image filepath.

    Output
    ------
    None
    """

    W, H = image_dims    # Wordcloud image size (width, height)
    font_path = "fonts/ShortStack-Regular.ttf"  # Font path
    info = taginfo(link=link, lim_num_tags=lim_num_tags)
    if info is None:
        print("Error : No webpage found!")
    else:
        if len(info) == 0:
            print("Error : No tags found!")
        else:         # Successfully extracted tag info
            WC = WordCloud(font_path=font_path, width=W, height=H,
                           max_words=len(info)).generate_from_frequencies(info)
            WC.to_image().save(out_filepath)
            print("Tag Cloud Saved as " + out_filepath)


问题


面经


文章

微信
公众号

扫码关注公众号