all_topics.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:feedlark 作者: CPSSD 项目源码 文件源码
def main():
    if len(sys.argv) != 2:
        print 'This tool takes 1 command line argument; the number of topics to output data on. See README.md'
        return
    num_requested_topics = int(sys.argv[1])
    gearman_client = gearman.GearmanClient(['localhost:4730'])
    result = bson.BSON.decode(bson.BSON(gearman_client.submit_job('db-get', str(bson.BSON.encode({
        'key': getenv('SECRETKEY'),
        'database':'feedlark',
        'collection':'user',
        'query':{},
        'projection':{
            'words':1
        }
    }))).result))

    if result[u'status'] == u'ok':
        users = result['docs']
        print len(users)
        topic_data = get_all_topic_data(users)
        num_output_topics = min(num_requested_topics, len(topic_data))
        print len(topic_data), num_output_topics
        sorted_topics = sorted(topic_data, key=lambda x:len(topic_data[x]), reverse=True)
        for i in xrange(num_output_topics):
            sorted_values = sorted(topic_data[sorted_topics[i]])
            mean_val = mean(sorted_values)
            mode_val = mode(sorted_values)
            median_val = median(sorted_values)

            if mode_val is None:
                mode_val = 'X'
            print sorted_topics[i], len(sorted_values), mean_val, mode_val, median_val
    else:
        print('Error getting user data from database')
        print(result['description'])
        return
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号