def demo_command(args):
def create_data_file(partition, filename, samples):
data = pandas.DataFrame(
{TEXT_NAME: partition.data,
LABEL_NAME: [partition.target_names[target] for target in partition.target]}).dropna()[:samples]
data.to_csv(filename, index=False)
return filename
os.makedirs(args.directory, exist_ok=True)
print("Download a portion of the 20 Newsgroups data and create train.csv and test.csv.")
newsgroups_train = fetch_20newsgroups(subset="train", remove=("headers", "footers", "quotes"))
newsgroups_test = fetch_20newsgroups(subset="test", remove=("headers", "footers", "quotes"))
train_filename = create_data_file(newsgroups_train, os.path.join(args.directory, "train.csv"), 1000)
test_filename = create_data_file(newsgroups_test, os.path.join(args.directory, "test.csv"), 100)
model_directory = os.path.join(args.directory, "model")
print("Train a model.\n")
cmd = "train bow %s --save-model %s --epochs 5 --logging progress\n" % (
train_filename, model_directory)
print("mycroft " + cmd)
default_main(cmd.split())
print("\nEvaluate it on the test data.\n")
cmd = "evaluate %s %s\n" % (model_directory, test_filename)
print("mycroft " + cmd)
default_main(cmd.split())
print("\n(Note that there is not enough training data here to generate accurate predictions.)")
评论列表
文章目录