def run(consumer_key, consumer_secret, access_key, access_secret,
connection_string, threshold=5000, seed_only=True):
db = dataset.connect(connection_string)
api = get_api(consumer_key, consumer_secret, access_key, access_secret)
if seed_only:
is_seed = 1
else:
is_seed = 0
user_table = db['user']
users = user_table.find(user_table.table.columns.friends_count < threshold,
friends_collected=0, is_seed=is_seed)
users = [u for u in users]
all_users = len(users)
remaining = all_users
for u in users:
try:
print('Getting friend ids for ' + u['screen_name'])
next, prev, friend_ids = get_friend_ids(
api, screen_name=u['screen_name'])
print('Adding ' + str(len(friend_ids)) + ' user ids to db')
insert_if_missing(db, user_ids=friend_ids)
print('Creating relationships for ' + str(u['user_id']))
create_connections(db, u['user_id'], friend_ids=friend_ids)
update_dict = dict(id=u['id'], friends_collected=1)
user_table.update(update_dict, ['id'])
# Can only make 15 calls in a 15 minute window to this endpoint
remaining -= 1
time_left = remaining / 60.0
print(str(time_left) + ' hours to go')
print('Sleeping for 1 minute, timestamp: ' + str(datetime.now()))
time.sleep(60)
except:
continue
评论列表
文章目录