def get_revisions_past_weeks(qids, weeks):
"""
Get the revision IDs for revisions on `qids` items in the past `weeks` weeks
:param qids: set of qids
:param weeks: int
:return:
"""
revisions = set()
qids_str = '"' + '","'.join(qids) + '"'
for week in tqdm(range(weeks)):
query = '''select rev_id, rev_page, rev_timestamp, page_id, page_namespace, page_title, page_touched FROM revision
inner join page on revision.rev_page = page.page_id WHERE
rev_timestamp > DATE_FORMAT(DATE_SUB(DATE_SUB(NOW(),INTERVAL {week} WEEK), INTERVAL 1 WEEK),'%Y%m%d%H%i%s') AND
rev_timestamp < DATE_FORMAT(DATE_SUB(NOW(), INTERVAL {week} WEEK),'%Y%m%d%H%i%s') AND
page_content_model = "wikibase-item" AND
page.page_title IN({qids});
'''.format(qids=qids_str, week=week)
revision_df = query_wikidata_mysql(query)
print(len(revision_df))
print(revision_df.head(2))
print(revision_df.tail(2))
revisions.update(set(revision_df.rev_id))
return revisions
评论列表
文章目录