def create_subset(src, dest, n=250):
"Given a csv file `src`, create a subset `dest` with `n` unique entities"
df = pd.read_csv(src)
lics = pd.unique(df["License #"])
sublics = lics[random.sample(range(0,len(lics)), n)]
subset = df[df["License #"].isin(sublics)]
# Make the column names a little more readable
subset.columns = map(clean_column_name, subset.columns)
subset.to_csv(dest, index=False)
评论列表
文章目录