def _df_initial_fixer(df, word, sample=60000):
'''
function:
- ramdomly select rows (image) "sample" times from the df dataframe
and delete features that are not used in ensemble method modeling
input:
df = dataframe. output of 1_feature_engineering_func. [pd.dataframe]
word = name of topic ig "cat" [str]
sample = number of sample you want to extract from df [int]
output:
new data frame!
'''
print "total number of images for df_{}: {}".format(word, len(df))
random_index = np.random.choice(list(df.index), sample, replace=False)
df = df.loc[list(random_index)]
df_test = df.drop(['drawing','key_id','timestamp','recognized','X','Y','time',\
'X_per_stroke','Y_per_stroke','time_per_stroke',\
'total_time_of_stroke','dp_per_stroke','dp_percent_per_stroke',\
'direction'], axis=1)
return df_test
ensemble_method_func.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录