report.py 文件源码-python代码片段

report.py 文件源码

python

阅读 25 收藏 0 点赞 0 评论 0

项目：source-code-to-name 作者: zironycho 项目源码文件源码

def first_words(index='codetoname', language='python'):
    es = elasticsearch.Elasticsearch()

    # update first name
    s = elasticsearch_dsl.Search(using=es, index=index, doc_type=language)\
        .query('bool', filter=Q('exists', field='feature') & Q('missing', field='first_name'))
    for hit in s.scan():
        data = hit.to_dict()
        feature = json.loads(data['feature'])
        data['first_name'] = firstname(feature['name'], language)
        es.index(index=index, doc_type=language, id=hit.meta.id, body=data)
    es.indices.refresh(index=index)

    # aggregation
    s = elasticsearch_dsl.Search(using=es, index=index, doc_type=language)\
        .query('bool', filter=Q('exists', field='feature'))
    a = A('terms', field='first_name')
    s.aggs.bucket('first_name_terms', a)
    response = s.execute()

    words = []
    for item in response.aggregations.first_name_terms.buckets:
        percentage = item.doc_count / float(response.hits.total) * 100
        words.append({'word': item.key, 'percentage': percentage})
    return words