def main():
start_time = time.time()
args = parse_args()
logger.setLevel(getattr(logging, args.verbosity.upper()))
logger.info("Started")
build_constants()
df = pd.read_json(path_or_buf=DATA_PATH, orient='records', encoding="UTF8")
logger.debug("Loaded {} rows into df".format(len(df)))
df = utils.get_data_subset.crop(df, None, None)
df = utils.get_data_subset.filter_rows_by_string(df,
[TARGET_COL],
['Rock',
'Hip Hop'])
df = utils.clean_data.execute_cleaners(df)
df = utils.normalize_data.normalize_genres(df, TARGET_COL)
X, y = utils.get_data_subset.get_x_y(df, SAMPLE_COL, TARGET_COL)
clf = model_pipeline.get_pipeline(SAMPLE_COL)
utils.persistence.dump(DF_DUMP_NAME, df)
utils.persistence.dump(CLF_DUMP_NAME, clf)
if args.train:
train_and_test.train_and_dump(X, y, clf)
elif args.test:
train_and_test.test_using_kfold(X, y, clf)
logger.info("Finished in {0:.2f} seconds".format(time.time() - start_time))
评论列表
文章目录