def _get_user_tweets(self, screen_name):
# TODO: Implement tweet limit
# Twitter only allows access to a users most recent 3240 tweets with this method
# initialize a list to hold all the tweepy Tweets
alltweets = []
# make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = self._api.user_timeline(screen_name = screen_name,count=200)
# save most recent tweets
alltweets.extend(new_tweets)
# save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
# keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
# all subsequent requests use the max_id param to prevent duplicates
new_tweets = self._api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
# save most recent tweets
alltweets.extend(new_tweets)
# update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
# transform the tweepy tweets into a 2D array that will populate the csv
outtweets = {tweet.id_str: {'created':tweet.created_at,'text':tweet.text} for tweet in alltweets}
# Twitter-aware tokenizer
tknzr = TweetTokenizer()
# Extend data with linguistic processing
for tweet_id in outtweets:
# Get tweet data from dictionary
tweet = outtweets[tweet_id]
# Lowercase tokenized tweet text
tweet_tokens = tknzr.tokenize(tweet['text'])
# Parts-of-speech tags for tokenized text
tweet_pos = nltk.pos_tag(tweet_tokens)
# Is the tweet a rewteet?
tweet['retweet'] = tweet_pos[0][0] == 'RT'
# If retweeted, who was the original author?
if tweet['retweet'] is True:
tweet['rt_author'] = tweet_pos[1][0]
else:
tweet['rt_author'] = ''
return outtweets
# TODO: Might have encoding issues. See: https://stackoverflow.com/questions/6539881/python-converting-from-iso-8859-1-latin1-to-utf-8
评论列表
文章目录