nba_scraper.py 文件源码-python代码片段

def update_numberfire_history():
  # Start by updating our slug dict and overall numberfire player information
  overall_stats = scrape_numberfire_overview_page()
  save_nf_overview_data(sport, overall_stats)

  # We only load & update numberfire slug information for players appearing in the most recent batch of overview data
  # and only if we are also able to match this player to a BREF ID. A side effect of this is that we will make no
  # predictions for any NBA players who haven't played yet this year.
  pids_to_load = []
  for ix, row in overall_stats.iterrows():
    pid, confidence = name2nbaid(row['name_player'], player_team=row['name_team'], get_confidence=True)
    if confidence > 75:
      pids_to_load.append((pid, row['slug_player']))
  old_predictions = load_nf_histplayerinfo(sport, identifiers_to_load=pids_to_load)
  scraped_salaries = {}

  new_dataframes, updated_dataframes = 0, 0
  print "Scraping updated player predictions from Numberfire..."
  pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()])
  for pid, slug in pbar(pids_to_load):
    time.sleep(1)
    player_df, salary_df = load_stats_tables_from_history_page(nf_player_url.format(slug=slug))
    old_player_df = old_predictions.get(pid)
    if old_player_df is None:
      old_predictions[pid] = player_df
      new_dataframes += 1
    else:
      try:
        new_data = old_player_df.combine_first(player_df)
        old_predictions[pid] = new_data
      except ValueError as ex:
        ipdb.set_trace()
      updated_dataframes += 1
    scraped_salaries[pid] = salary_df
  logging.info('Saving scraped predictions (%d updated, %d added)', updated_dataframes, new_dataframes)
  save_nf_histplayerinfo(sport, old_predictions)
  save_nf_salary_info(sport, scraped_salaries)