query.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:twitterscraper 作者: taspinar 项目源码 文件源码
def query_all_tweets(query):
    """
    Queries *all* tweets in the history of twitter for the given query. This
    will run in parallel for each ~10 days.

    :param query: A twitter advanced search query.
    :return: A list of tweets.
    """
    year = 2006
    month = 3

    limits = []
    while date(year=year, month=month, day=1) < date.today():
        nextmonth = month + 1 if month < 12 else 1
        nextyear = year + 1 if nextmonth == 1 else year

        limits.append(
            (date(year=year, month=month, day=1),
             date(year=year, month=month, day=10))
        )
        limits.append(
            (date(year=year, month=month, day=10),
             date(year=year, month=month, day=20))
        )
        limits.append(
            (date(year=year, month=month, day=20),
             date(year=nextyear, month=nextmonth, day=1))
        )
        year, month = nextyear, nextmonth

    queries = ['{} since:{} until:{}'.format(query, since, until)
               for since, until in reversed(limits)]

    pool = Pool(20)
    all_tweets = []
    try:
        for new_tweets in pool.imap_unordered(query_tweets_once, queries):
            all_tweets.extend(new_tweets)
            logging.info("Got {} tweets ({} new).".format(
                len(all_tweets), len(new_tweets)))
    except KeyboardInterrupt:
        logging.info("Program interrupted by user. Returning all tweets "
                     "gathered so far.")

    return sorted(all_tweets)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号