def train(ENV, args):
processed_train_data_path = os.path.join(ENV.processed_data_path, 'processed_train.pkl')
processed_test_data_path = os.path.join(ENV.processed_data_path, 'processed_test.pkl')
if os.path.exists(processed_train_data_path) and os.path.exists(processed_test_data_path):
processed_train_data = pickle.load(open(processed_train_data_path, 'r'))
processed_test_data = pickle.load(open(processed_test_data_path, 'r'))
else:
train_wav_files, train_phn_files = load_data(ENV.train_data)
print('Process train data...')
processed_train_data = process_data(train_wav_files, train_phn_files)
test_wav_files, test_phn_files = load_data(ENV.test_data)
print('Process test data...')
processed_test_data = process_data(test_wav_files, test_phn_files)
pickle.dump(processed_train_data, open(processed_train_data_path, 'w'))
pickle.dump(processed_test_data, open(processed_test_data_path, 'w'))
# print(processed_train_data[0][1])
print("Define graph...")
train_model(ENV, processed_train_data, processed_test_data)
python类load_data()的实例源码
def skoptTUNE(args, model, n_calls):
"""
Hyper-parameter optimization using scikit-opt.
It has 3 algorithms: forest_minimize (decision-tree regression search);
gbrt_minimize (gradient-boosted-tree search);
and hp_minimize (Gaussian process regression search).
"""
hyperparameters = {
'batch_size': (40, 120),
'num_hidden': (100, 500),
'dropout_output': (0.3, 1.0),
'dropout_input': (0.3, 1.0),
'clip_norm': (0.5, 1.0),
}
data = load_data(args, args.data, saved=args.load_data)
all_res = skopt_search(args, data, model, hyperparameters, gp_minimize, n_calls=n_calls)
print(all_res)
def hyperoptTUNE(args, model, n_calls):
"""
Search the hyper-parameter space according to the tree of Parzen estimators;
a Bayesian approach.
"""
hyperparameters = {
'batch_size': hp.choice('batch_size', range(40, 130, 20)),
'num_hidden': hp.quniform('num_hidden', 100, 500, 1),
# 'learning_rate': hp.choice('learning_rate', [0.0005]),
'dropout_output': hp.quniform('dropout_output', 0.3, 1.0, 0.1),
'dropout_input': hp.quniform('dropout_input', 0.3, 1.0, 0.1),
'clip_norm': hp.quniform('clip_norm', 0.5, 1.0, 0.1),
}
data = load_data(args, args.data, saved=args.load_data)
best_params, all_res = hyperopt_search(args, data, model, hyperparameters, max_evals=n_calls)
print(best_params)
def main(args):
'''Module main method'''
random.seed()
problem = MathProblem()
database = utils.initialize_database(args, 'MathDatabase')
database.set_objective_names(['cos', 'sinc'])
generation = database.properties['highest_population']
population_size = database.properties['population_size']
genetic_algorithm = evolution.NSGA(problem, population_size)
if generation > 0:
parents, children = utils.load_data(database)
genetic_algorithm.set_population(parents)
genetic_algorithm.set_children(children)
for _ in range(args.iterations):
generation += 1
print('Starting generation ' + str(generation))
genetic_algorithm.iterate()
database.create_population()
utils.save_data(genetic_algorithm, database)
print('=' * (SCREEN_WIDTH - 1))
def read_gram_from_file(i, n, l, comment=""):
data = load_data(_get_gram_file_name(i, n, l, comment=comment))
return data
def run(result_csv_path):
train_x,train_y = load_data(train_csv_path,True)
test_x = load_data(test_csv_path,False)
print('load data successfully ......')
rf = RandomForestRegressor(
n_estimators = 2000, #[1500,2000]
min_samples_split = 2,
max_depth = 15, # [10,15]
n_jobs = -1
)
rf.fit(train_x,train_y)
###### save model ##################
joblib.dump(rf,'weights/'+Model_Name+'.m')
y_pred = rf.predict(test_x)
####### save_results ###########################
save_results(result_csv_path,y_pred)
###### generate report #######################
feature_importances = rf.feature_importances_
dic_feature_importances = dict(zip(fields,feature_importances))
dic = sorted(dic_feature_importances.iteritems(),key = lambda d:d[1],reverse = True)
print('feature_importances:')
for i in range(len(dic)):
print(dic[i][0]+":\t"+str(dic[i][1]))
def step_data(FPATH, end_date=None):
all_data = load_data(FPATH, EPS, end_date=end_date, use_sensors=[5])
return all_data
def step_data():
EPS = 1e-6
all_data = load_data(FPATH, EPS)
return all_data
def step_data(FPATH, end_date=None):
all_data = load_data(FPATH, EPS, end_date=end_date)
return all_data
def step_data(FPATH, end_date=None, use_sensors=None, use_datetime=False):
all_data = load_data(FPATH, EPS, use_sensors=use_sensors, use_datetime=use_datetime)
return all_data
def TUNE(args, model, mode, n_calls=5):
hyperparameters_all = {
'batch_size': range(40, 130, 20),
'seq_len': [42],
'num_hidden': np.random.randint(100, 501, 10),
'learning_rate': [0.0005],
'dropout_output': np.arange(0.3, 1.1, 0.1),
'dropout_input': np.arange(0.3, 1.1, 0.1),
'clip_norm': np.arange(0.5, 1.01, 0.1),
}
maxx = 0
data = load_data(args, args.data, saved=args.load_data)
if mode == 'rand':
samp = random_search(hyperparameters_all, n_calls) #random search
else:
samp = expand_grid(hyperparameters_all) #grid-search
for hyperparameters in samp:
print("Evaluating hyperparameters:", hyperparameters)
for attr, value in hyperparameters.items():
setattr(args, attr, value)
scores = run_network(args, data, model, tuning=args.tune)
test_score, eval_score = scores
if eval_score[0] > maxx:
maxx = eval_score[0]
best_score = test_score
hyperparameters_best = hyperparameters
tf.reset_default_graph()
print()
print("Optimisation finished..")
print("Optimised hyperparameters:")
with open(os.path.dirname(args.checkpoint_file)+'/checkpoint', 'w') as fp:
fp.write('%s:"%s"\n' % ('model',args.model))
for attr, value in sorted(hyperparameters_best.items()):
print("{}={}".format(attr.upper(), value))
fp.write('%s:"%s"\n' % (attr, value))
print()
print("Final Test Data Accuracy = {:.5f}; 3-class F1 = {:.5f}; 2-class F1 = {:.5f}"
.format(best_score[0], best_score[1], best_score[2]))
def TRAIN(args, model):
t0 = time.time()
print("\nParameters:")
for attr, value in sorted(vars(args).items()):
print("{}={}".format(attr.upper(), value))
print()
print("Graph initialized..")
t1 = time.time()
print("time taken:", t1-t0)
print()
data = load_data(args, args.data, saved=args.load_data)
run_network(args, data, model, tuning=args.tune)
def main(args):
if os.path.isfile(args.vocab_file):
en_dict, cn_dict, en_total_words, cn_total_words = pickle.load(open(args.vocab_file, "rb"))
else:
print("vocab file does not exit!")
exit(-1)
args.en_total_words = en_total_words
args.cn_total_words = cn_total_words
inv_en_dict = {v: k for k, v in en_dict.items()}
inv_cn_dict = {v: k for k, v in cn_dict.items()}
if os.path.isfile(args.model_file):
model = torch.load(args.model_file)
else:
print("model file does not exit!")
exit(-1)
if args.use_cuda:
model = model.cuda()
crit = utils.LanguageModelCriterion()
test_en, test_cn = utils.load_data(args.test_file)
args.num_test = len(test_en)
test_en, test_cn = utils.encode(test_en, test_cn, en_dict, cn_dict)
test_data = utils.gen_examples(test_en, test_cn, args.batch_size)
translate(model, test_data, en_dict, inv_en_dict, cn_dict, inv_cn_dict)
correct_count, loss, num_words = eval(model, test_data, args, crit)
loss = loss / num_words
acc = correct_count / num_words
print("test loss %s" % (loss) )
print("test accuracy %f" % (acc))
print("test total number of words %f" % (num_words))
def sample(args):
print 'Loading data'
x, y, vocabulary, vocabulary_inv = utils.load_data()
text = [list(args.text)]
sentences_padded = utils.pad_sentences(text, maxlen=x.shape[1])
raw_x, dummy_y = utils.build_input_data(sentences_padded, [0], vocabulary)
checkpoint_file = tf.train.latest_checkpoint(args.checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
sess = tf.Session()
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
# Get the placeholders from the graph by name
input_x = graph.get_operation_by_name("input_x").outputs[0]
# input_y = graph.get_operation_by_name("input_y").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
predicted_result = sess.run(predictions, {input_x: raw_x, dropout_keep_prob: 1.0})
if (predicted_result[0] == 0):
print args.text + ": negative"
else:
print args.text + ": positive"
def train_model(opt, logger):
logger.info('---START---')
# initialize for reproduce
np.random.seed(opt.seed)
# load data
logger.info('---LOAD DATA---')
opt, training, training_snli, validation, test_matched, test_mismatched = load_data(opt)
if not opt.skip_train:
logger.info('---TRAIN MODEL---')
for train_counter in range(opt.max_epochs):
if train_counter == 0:
model = build_model(opt)
else:
model = load_model_local(opt)
np.random.seed(train_counter)
lens = len(training_snli[-1])
perm = np.random.permutation(lens)
idx = perm[:int(lens * 0.2)]
train_data = [np.concatenate((training[0], training_snli[0][idx])),
np.concatenate((training[1], training_snli[1][idx])),
np.concatenate((training[2], training_snli[2][idx]))]
csv_logger = CSVLogger('{}{}.csv'.format(opt.log_dir, opt.model_name), append=True)
cp_filepath = opt.save_dir + "cp-" + opt.model_name + "-" + str(train_counter) + "-{val_acc:.2f}.h5"
cp = ModelCheckpoint(cp_filepath, monitor='val_acc', save_best_only=True, save_weights_only=True)
callbacks = [cp, csv_logger]
model.fit(train_data[:-1], train_data[-1], batch_size=opt.batch_size, epochs=1, validation_data=(validation[:-1], validation[-1]), callbacks=callbacks)
save_model_local(opt, model)
else:
logger.info('---LOAD MODEL---')
model = load_model_local(opt)
# predict
logger.info('---TEST MODEL---')
preds_matched = model.predict(test_matched[:-1], batch_size=128, verbose=1)
preds_mismatched = model.predict(test_mismatched[:-1], batch_size=128, verbose=1)
save_preds_matched_to_csv(preds_matched, test_mismatched[-1], opt)
save_preds_mismatched_to_csv(preds_mismatched, test_mismatched[-1], opt)
def main(args):
'''Module main function'''
global database
global genetic_algorithm
global joint_positions
global goal_positions
pygame.init()
random.seed()
database = utils.initialize_database(args, 'RobotTrainingData')
database.set_objective_names(['Tiempo', r'Error en $\theta_1$', r'Error en $\theta_2$', r'Error en $\theta_3$', 'Energía'])
problem = EV3Problem()
generation = database.properties['highest_population']
population_size = database.properties['population_size']
genetic_algorithm = evolution.NSGA(problem, population_size)
x_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'x_train.txt'))
y_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'y_train.txt'))
batch_start = (generation % 10) * N_GOALS
joint_positions = np.loadtxt(x_path)[batch_start : batch_start + N_GOALS, :]
goal_positions = np.loadtxt(y_path)[batch_start : batch_start + N_GOALS, :]
if generation > 0:
parents, children = utils.load_data(database)
genetic_algorithm.set_population(parents)
genetic_algorithm.set_children(children)
for _ in range(args.iterations):
generation += 1
print('Starting generation ' + str(generation))
genetic_algorithm.iterate()
database.create_population()
utils.save_data(genetic_algorithm, database)
print('=' * (SCREEN_WIDTH - 1))
def run(result_csv_path):
train_x,train_y = load_data(train_csv_path,True)
test_x = load_data(test_csv_path,False)
print('load data successfully.........')
layer1_rf_paramters ={
'max_depth':range(15,21),
'max_features': [0.5,0.6,0.8],
'min_samples_leaf':[1,3,10]
}
print('layer 1 train..........')
layer1_rf = RandomForestRegressor(
n_estimators = 2500,
n_jobs = -1
)
layer1_gs_rf = GridSearchCV(layer1_rf,param_grid = layer1_rf_paramters)
layer1_gs_rf.fit(train_x,train_y)
################# save model##################
joblib.dump(layer1_gs_rf,'weights/layer1_'+Model_Name+'.m')
#layer1_rf = joblib.load('weights/layer1_'+Model_Name+'.m')
tr_pred = layer1_gs_rf.predict(train_x)
train_x = feature_engineer(layer1_gs_rf,train_x,tr_pred)
te_pred = layer1_gs_rf.predict(test_x)
test_x = feature_engineer(layer1_gs_rf,test_x,te_pred)
print('layer 2 train ............')
layer2_rf = RandomForestRegressor(
n_jobs = -1,
n_estimators = 1000,
max_features = 'sqrt',
max_depth = 18,
bootstrap = False
)
layer2_rf.fit(train_x,train_y)
joblib.dump(layer2_rf,'weights/layer2_'+Model_Name+'.m')
y_pred = layer2_rf.predict(test_x)
############ save_results ########################
save_results(result_csv_path,y_pred)
def run(result_csv_path):
train_x,train_y = load_data(train_csv_path,True)
test_x = load_data(test_csv_path,False)
print('load data successfully.........')
print('layer 1 train..........')
layer1_rf = RandomForestRegressor(
n_estimators = 2500,
max_features = 0.8,
bootstrap = False,
max_depth = 15,
n_jobs = -1
)
layer1_rf.fit(train_x,train_y)
################# save model##################
joblib.dump(layer1_rf,'weights/layer1_'+Model_Name+'.m')
#layer1_rf = joblib.load('weights/layer1_'+Model_Name+'.m')
tr_pred = layer1_rf.predict(train_x)
train_x = feature_engineer(layer1_rf,train_x,tr_pred)
te_pred = layer1_rf.predict(test_x)
test_x = feature_engineer(layer1_rf,test_x,te_pred)
print('layer 2 train ............')
layer2_rf = RandomForestRegressor(
n_jobs = -1,
n_estimators = 800, #[600]
max_features = 'sqrt',
max_depth = 20,
bootstrap = False
)
layer2_rf.fit(train_x,train_y)
joblib.dump(layer2_rf,'weights/layer2_'+Model_Name+'.m')
tr_pred = layer2_rf.predict(train_x)
train_x = feature_engineer(layer2_rf,train_x,tr_pred)
te_pred = layer2_rf.predict(test_x)
test_x = feature_engineer(layer2_rf,test_x,te_pred)
print('layer 3 train ..............')
layer3_rf = RandomForestRegressor(
n_jobs = -1,
n_estimators = 600, #[500]
max_features = 'sqrt',
max_depth = 20,
bootstrap = False
)
layer3_rf.fit(train_x,train_y)
joblib.dump(layer3_rf,'weights/layer3_'+Model_Name+'.m')
y_pred = layer3_rf.predict(test_x)
############ save_results ########################
save_results(result_csv_path,y_pred)
def run():
tr_x ,tr_y = load_data(TRAIN,True)
te_x = load_data(TEST,False)
rf = RandomForestClassifier(
n_estimators = 500,
max_depth = 11,
min_samples_split =2,
bootstrap =True,
warm_start = True,
max_features = 'sqrt',
criterion='entropy',
class_weight = 'balanced',
n_jobs = -1
)
#rf.fit(tr_x,tr_y)
##feature_importances = rf.feature_importances_
##dic_feature_importances = dict(zip(Features,feature_importances))
##dic = sorted(dic_feature_importances.iteritems(),key=lambda d:d[1],reverse=True)
##print('===========================\n')
##print('feature_importances:')
##for i in range(len(dic)):
## print(dic[i][0]+":\t"+str(dic[i][1]))
#te_pred = rf.predict(te_x)
#save_results(result_csv_path,te_pred)
#sum_acc = 0
#cv = 10
#kf = KFold(tr_x.shape[0],n_folds = cv,shuffle=True)
#for train,val in kf:
# x_tr,x_val,y_tr,y_val = tr_x[train],tr_x[val],tr_y[train],tr_y[val]
# rf.fit(x_tr,y_tr)
# pred_val = rf.predict(x_val)
# true_count = 0
# for i in range(len(y_val)):
# if y_val[i] == pred_val[i]:
# true_count += 1
# acc = true_count*1.0/len(pred_val)
# sum_acc += acc
# print('acc :'+ str(acc))
#print('avg acc:'+str(sum_acc/cv))
cv = 10
scores = cross_val_score(rf,tr_x,tr_y,cv=cv,scoring='f1_weighted')
avg_score = sum(scores)/cv
print(str(scores))
print('scores:\t'+str(avg_score))
#while True:
# #rf.fit(tr_x,tr_y)
# scores = cross_val_score(rf,tr_x,tr_y,cv=cv,scoring='f1_weighted')
# avg_score = sum(scores)/cv
# print(str(scores))
# print('scores:\t'+str(avg_score))
# if avg_score > 0.6:
# te_pred = rf.predict(te_x)
# save_results(result_csv_path,te_pred)
# break
#print(str(scores))
#print(str(sum(scores)/cv))
########################################################################################