def crawl_target(api, target_type, target_list):
for target in target_list:
if target_type == 'user':
statuses = limit_handled(tweepy.Cursor(api.user_timeline,
id=target).items())
elif target_type == 'hashtag':
statuses = limit_handled(tweepy.Cursor(api.search,
target).items())
print('Crawling %s' % target)
for status in statuses:
if status.created_at.timestamp() > catastrophe_period_start:
if not tweet_db.get(bytes(status.id_str, 'utf-8')):
print('Saving tweet: %s' % status.id_str)
write_to_tweet_db(status)
if not user_db.get(bytes(status.author.id_str, 'utf-8')):
print('Saving user: %s' % status.author.id_str)
write_to_user_db(status.author)
else:
print('Reached {time}, on to the next {ttype}'.format(time=status.created_at.strftime('%Y %h %d %H:%M:%S'), ttype=target_type))
break
py_twitter_scrape.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录