def main():
# read pre-trained embeddings
embeddings = load_embeddings(embedding_path, 'word2vec')
test_accus = [] # Collect test accuracy for each fold
for i in xrange(n_folds):
fold = i + 1
logging.info('Fold {} of {}...'.format(fold, n_folds))
# read data
train_data, train_labels, test_data, test_labels, seq_len, vocab_size = load_data_MR(data_path, fold=fold)
# update train directory according to fold number
train_dir = base_train_dir + '/' + str(fold)
# create train directory if not exist
if not os.path.exists(train_dir):
os.makedirs(train_dir)
# create log file handler
file_handler = logging.FileHandler(pjoin(train_dir, "log.txt"))
logging.getLogger().addHandler(file_handler)
# check whether the model has been trained, if not, create a new one
if os.path.exists(train_dir + '/model.json'):
# load json and create model
json_file = open(train_dir + '/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights(train_dir + "/model.h5")
model.compile(loss={'output':'binary_crossentropy'},
optimizer=Adadelta(lr=base_lr, epsilon=1e-6, decay=decay_rate),
metrics=["accuracy"])
print("Loaded model from disk!")
else:
model = setup_model(embeddings, seq_len, vocab_size)
print("Created a new model!")
# train the model
test_accu = train(model, train_data, train_labels, test_data, test_labels, embeddings, train_dir)
# log test accuracy result
logging.info("\nTest Accuracy for fold {}: {}".format(fold, test_accu))
test_accus.append(test_accu)
# write log of test accuracy for all folds
test_accu_log = open(base_train_dir + "/final_test_accuracy.txt", 'w')
test_accu_log.write('\n'.join(['Fold {} Test Accuracy: {}'.format(fold, test_accu) for fold, test_accu in enumerate(test_accus)]))
test_accu_log.write('\nAvg test acc: {}'.format(np.mean(test_accus)))
评论列表
文章目录