def load_names_data():
fp = os.path.join(tempfile.gettempdir(), ZIP_NAME)
if not os.path.exists(fp):
r = requests.get(URL_NAMES)
with open(fp, 'wb') as f:
f.write(r.content)
post = collections.OrderedDict()
with zipfile.ZipFile(fp) as zf:
# get ZipInfo instances
for zi in sorted(zf.infolist(), key=lambda zi: zi.filename):
fn = zi.filename
if fn.startswith('yob'):
year = int(fn[3:7])
df = pd.read_csv(
zf.open(zi),
header=None,
names=('name', 'gender', 'count'))
df['year'] = year
post[year] = df
df = pd.concat(post.values())
df.set_index('name', inplace=True, drop=True)
return df
评论列表
文章目录