def query_all_tweets(query):
"""
Queries *all* tweets in the history of twitter for the given query. This
will run in parallel for each ~10 days.
:param query: A twitter advanced search query.
:return: A list of tweets.
"""
year = 2006
month = 3
limits = []
while date(year=year, month=month, day=1) < date.today():
nextmonth = month + 1 if month < 12 else 1
nextyear = year + 1 if nextmonth == 1 else year
limits.append(
(date(year=year, month=month, day=1),
date(year=year, month=month, day=10))
)
limits.append(
(date(year=year, month=month, day=10),
date(year=year, month=month, day=20))
)
limits.append(
(date(year=year, month=month, day=20),
date(year=nextyear, month=nextmonth, day=1))
)
year, month = nextyear, nextmonth
queries = ['{} since:{} until:{}'.format(query, since, until)
for since, until in reversed(limits)]
pool = Pool(20)
all_tweets = []
try:
for new_tweets in pool.imap_unordered(query_tweets_once, queries):
all_tweets.extend(new_tweets)
logging.info("Got {} tweets ({} new).".format(
len(all_tweets), len(new_tweets)))
except KeyboardInterrupt:
logging.info("Program interrupted by user. Returning all tweets "
"gathered so far.")
return sorted(all_tweets)
评论列表
文章目录