def load_csv(self):
file_name = "data/jra_race_resultNN.csv"
df = pd.read_csv(file_name)
## ???????
labelEncoder = preprocessing.LabelEncoder()
df['area_name'] = labelEncoder.fit_transform(df['area_name'])
df['race_name'] = labelEncoder.fit_transform(df['race_name'])
df['track'] = labelEncoder.fit_transform(df['track'])
df['run_direction'] = labelEncoder.fit_transform(df['run_direction'])
df['track_condition'] = labelEncoder.fit_transform(df['track_condition'])
df['horse_name'] = labelEncoder.fit_transform(df['horse_name'])
df['horse_sex'] = labelEncoder.fit_transform(df['horse_sex'])
df['jockey_name'] = labelEncoder.fit_transform(df['jockey_name'])
df['margin'] = labelEncoder.fit_transform(df['margin'])
df['is_blinkers'] = labelEncoder.fit_transform(df['is_blinkers'])
df['trainer_name'] = labelEncoder.fit_transform(df['trainer_name'])
df['comments_by_trainer'] = labelEncoder.fit_transform(df['comments_by_trainer'])
df['evaluation_by_trainer'] = labelEncoder.fit_transform(df['evaluation_by_trainer'])
df['dhorse_weight'] = labelEncoder.fit_transform(df['dhorse_weight'])
x_np = np.array(df[['area_name', 'race_number', 'race_name', 'track', 'run_direction',
'distance', 'track_condition', 'purse', 'heads_count',
'post_position', 'horse_number', 'horse_name', 'horse_sex', 'horse_age',
'jockey_name', 'time', 'margin', 'time3F',
'load_weight', 'horse_weight', 'dhorse_weight', 'odds_order',
'odds', 'is_blinkers', 'trainer_name', 'comments_by_trainer',
'evaluation_by_trainer'
]].fillna(0))
# ??
d = df[['finish_order']].to_dict('record')
self.vectorizer = DictVectorizer(sparse=False)
y_np = self.vectorizer.fit_transform(d)
self.n_classes = len(self.vectorizer.get_feature_names())
self.train_size = int(len(df[['finish_order']]) / 5)
self.batch_size = self.train_size
# ????????????????????
[self.x_train, self.x_test] = np.vsplit(x_np, [self.train_size])
[self.y_train, self.y_test] = np.vsplit(y_np, [self.train_size])
# Create model
评论列表
文章目录