def extract_features_from_json():
input_path = '../../data/20_5_from_2008/'
df_list = []
for json_file in os.listdir(input_path):
train_data = pd.read_json(os.path.join(input_path, json_file), orient='columns')
train_data.dropna(inplace=True)
train_data.sort_index(ascending=False, inplace=True)
train_data.index = range(len(train_data))
if len(train_data) > 0:
data_norm(train_data)
values = train_data['real_up_after_240'].tolist()
codes = train_data['code'].tolist()
train_data.drop(['datetime', 'code', 'real_up_after_240'], axis=1, inplace=True)
features = train_data.values.tolist()
with open('../../data/20_5_from_2008/data', 'a') as f:
for ix in xrange(len(codes)):
if np.inf not in features[ix] and -np.inf not in features[ix]:
f.write('%s;0 %s;1 %f\n' % (codes[ix][2:], ' '.join([str(x) for x in features[ix]]), values[ix]))
评论列表
文章目录