def _country_initial_fixer(df,country,limit):
'''
Function:
extracts data by country and ramdomly select "limit" amount of data from that dataset
Input:
df = dataframe (should contain 'countrycode' features) [dataframe]
country = should be 2 capital letter country code[string]
limit = max number of rows (data) you want to take into the new data frame
Output:
dataframe contains data from selected country (# of data <= limit)
note: uses random.seed(32113)
'''
if df[df['countrycode']==country].count()[0] > limit:
df_c = df[df['countrycode']==country]
random_c = np.random.choice(list(df_c.index), limit, replace=False)
df_c = df_c.loc[list(random_c)]
else:
df_c = df[df['countrycode']==country]
return df_c
ensemble_method_func.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录