fasttext.py 文件源码-python代码片段

def main():

    # read pre-trained embeddings
    embeddings = load_embeddings(embedding_path, 'word2vec')

    test_accus = [] # Collect test accuracy for each fold
    for i in xrange(n_folds):
        fold = i + 1
        logging.info('Fold {} of {}...'.format(fold, n_folds))
        # read data
        train_data, train_labels, test_data, test_labels, seq_len, vocab_size = load_data_MR_fasttext(data_path, fold=fold)

        # update train directory according to fold number
        train_dir = base_train_dir + '/' + str(fold)
        # create train directory if not exist
        if not os.path.exists(train_dir):
            os.makedirs(train_dir)
        # create log file handler
        file_handler = logging.FileHandler(pjoin(train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # check whether the model has been trained, if not, create a new one
        if os.path.exists(train_dir + '/model.json'):
            # load json and create model
            json_file = open(train_dir + '/model.json', 'r')
            loaded_model_json = json_file.read()
            json_file.close()
            model = model_from_json(loaded_model_json)
            # load weights into new model
            model.load_weights(train_dir + "/model.h5")
            model.compile(loss={'output':'binary_crossentropy'},
                        optimizer=Adadelta(lr=base_lr, epsilon=1e-6, decay=decay_rate),
                        metrics=["accuracy"])
            print("Loaded model from disk!")
        else:
            model = setup_model(embeddings, seq_len, vocab_size)
            print("Created a new model!")

        # train the model
        test_accu = train(model, train_data, train_labels, test_data, test_labels, embeddings, train_dir)

        # log test accuracy result
        logging.info("\nTest Accuracy for fold {}: {}".format(fold, test_accu))
        test_accus.append(test_accu)

    # write log of test accuracy for all folds
    test_accu_log = open(base_train_dir + "/final_test_accuracy.txt", 'w')
    test_accu_log.write('\n'.join(['Fold {} Test Accuracy: {}'.format(fold, test_accu) for fold, test_accu in enumerate(test_accus)]))
    test_accu_log.write('\nAvg test acc: {}'.format(np.mean(test_accus)))