StatisticsModule.py 文件源码

python
阅读 34 收藏 0 点赞 0 评论 0

项目:ForumAnalysis 作者: consjuly542 项目源码 文件源码
def get_article_statistics(self, recompute_statistics=True):
        """
        Agregate statistics from both forum.
        """
        if recompute_statistics:
            self.get_article_index()
            data_generator = loadDataGenerator()
            cnt_not_match_links = 0
            links_cnt = 0
            l2a = Link2Article()
            # log = open("./logs", "w")
            # error_link = []
            for question_batch in data_generator:
                for question in question_batch:

                    links = LinksSearcher(question.get_all_text()).get_simple_links()
                    for link in links:
                        # log.write(link.link_text + "\n")
                        # log.flush()
                        # function from Alexandrina
                        article = l2a.link2article(link)
                        # print (article)
                        if article:
                            # print (article.article_ID)
                            links_cnt += 1
                            self.article_index[article.article_ID].add_question(question, link)
                        else:
                            cnt_not_match_links += 1

                    sys.stderr.write("\r\t\t\t\t\tALL LINKS: %d; CAN't MATCH: %d" % (links_cnt, cnt_not_match_links))

            with open("./../data/statistics/article_statistics", "wb") as f:
                cPickle.dump(self.article_index, f, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open("./../data/statistics/article_statistics", "rb") as f:
                self.article_index = cPickle.load(f)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号