contact.py 文件源码-python代码片段

def run_query(src, fn, rows=200):
    print('writing data for %s to %s' % (source, fn))
    query['query']['function_score']['query']['bool']['must'][0]['term']['analysis.source'] = source
    emails = set()
    batches = 0
    print(json.dumps(query))
    total = None
    with open(fn, 'w', newline='') as outfile:
        writer = csv.writer(outfile, delimiter='\t', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['email', 'name', 'date', 'comment', 'url'])
        while len(emails) < rows and batches < 10:
            offset = batches * 100
            if total and offset > total:
                break
            resp = es.search(index='fcc-comments', body=query, size=100, from_=offset)
            if batches == 0:
                total = resp['hits']['total']
                print('\t%s matches' % (total))
            else:
                print('\tbatch %s: have %s' % (batches+1, len(emails)))
            batches += 1
            for doc in resp['hits']['hits']:
                if len(emails) == rows:
                    break
                data = doc['_source']
                if data['contact_email'] in emails:
                    continue
                emails.add(data['contact_email'])
                writer.writerow([data['contact_email'], data['filers'][0]['name'],
                    data['date_received'], data['text_data'],
                    'https://www.fcc.gov/ecfs/filing/%s' % doc['_id']
                ])