def update_numberfire_history():
# Start by updating our slug dict and overall numberfire player information
overall_stats = scrape_numberfire_overview_page()
save_nf_overview_data(sport, overall_stats)
# We only load & update numberfire slug information for players appearing in the most recent batch of overview data
# and only if we are also able to match this player to a BREF ID. A side effect of this is that we will make no
# predictions for any NBA players who haven't played yet this year.
pids_to_load = []
for ix, row in overall_stats.iterrows():
pid, confidence = name2nbaid(row['name_player'], player_team=row['name_team'], get_confidence=True)
if confidence > 75:
pids_to_load.append((pid, row['slug_player']))
old_predictions = load_nf_histplayerinfo(sport, identifiers_to_load=pids_to_load)
scraped_salaries = {}
new_dataframes, updated_dataframes = 0, 0
print "Scraping updated player predictions from Numberfire..."
pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()])
for pid, slug in pbar(pids_to_load):
time.sleep(1)
player_df, salary_df = load_stats_tables_from_history_page(nf_player_url.format(slug=slug))
old_player_df = old_predictions.get(pid)
if old_player_df is None:
old_predictions[pid] = player_df
new_dataframes += 1
else:
try:
new_data = old_player_df.combine_first(player_df)
old_predictions[pid] = new_data
except ValueError as ex:
ipdb.set_trace()
updated_dataframes += 1
scraped_salaries[pid] = salary_df
logging.info('Saving scraped predictions (%d updated, %d added)', updated_dataframes, new_dataframes)
save_nf_histplayerinfo(sport, old_predictions)
save_nf_salary_info(sport, scraped_salaries)
评论列表
文章目录