common.py 文件源码-python代码片段

def get_tweets(tweet_ids, consumer_key, consumer_secret, access_token, access_token_secret, nlp):
    """
    Expands tweets from Twitter
    :param tweet_ids: the list of tweet IDs to expand
    :return: a dictionary of tweet ID to tweet text
    """

    # Save tweets in a temporary file, in case the script stops working and re-starts
    tweets = {}
    if os.path.exists('tweet_temp'):
        with codecs.open('tweet_temp', 'r', 'utf-8') as f_in:
            lines = [tuple(line.strip().split('\t')) for line in f_in]
            tweets = { tweet_id : tweet for (tweet_id, tweet) in lines }

    api = twitter.Api(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token_key=access_token,
                      access_token_secret=access_token_secret)

    [sleeptime, resettime] = reset_sleep_time(api)

    with codecs.open('tweet_temp', 'a', 'utf-8') as f_out:

        for tweet_id in tweet_ids:

            # We didn't download this tweet yet
            if not tweet_id in tweets:
                try:
                    curr_tweet = api.GetStatus(tweet_id, include_entities=False)
                    tweets[tweet_id] = clean_tweet(' '.join([t.lower_ for t in nlp(curr_tweet.text)]))

                except twitter.TwitterError as err:
                    error = str(err)

                    # If the rate limit exceeded, this script should be stopped and resumed the next day
                    if 'Rate limit exceeded' in error:
                        raise

                    # Other error - the tweet is not available :(
                    print 'Error reading tweet id:', tweet_id, ':', error
                    tweets[tweet_id] = 'TWEET IS NOT AVAILABLE'

                print >> f_out, '\t'.join((tweet_id, tweets[tweet_id]))

                time.sleep(sleeptime)
                if time.time() >= resettime:
                    [sleeptime, resettime] = reset_sleep_time(api)

    return tweets