build_model.py 文件源码

python
阅读 28 收藏 0 点赞 0 评论 0

项目:Guess-Genre-By-Lyrics 作者: ormatt 项目源码 文件源码
def main():
    start_time = time.time()
    args = parse_args()
    logger.setLevel(getattr(logging, args.verbosity.upper()))
    logger.info("Started")

    build_constants()

    df = pd.read_json(path_or_buf=DATA_PATH, orient='records', encoding="UTF8")
    logger.debug("Loaded {} rows into df".format(len(df)))

    df = utils.get_data_subset.crop(df, None, None)
    df = utils.get_data_subset.filter_rows_by_string(df,
                                                     [TARGET_COL],
                                                     ['Rock',
                                                      'Hip Hop'])
    df = utils.clean_data.execute_cleaners(df)
    df = utils.normalize_data.normalize_genres(df, TARGET_COL)
    X, y = utils.get_data_subset.get_x_y(df, SAMPLE_COL, TARGET_COL)

    clf = model_pipeline.get_pipeline(SAMPLE_COL)

    utils.persistence.dump(DF_DUMP_NAME, df)
    utils.persistence.dump(CLF_DUMP_NAME, clf)

    if args.train:
        train_and_test.train_and_dump(X, y, clf)
    elif args.test:
        train_and_test.test_using_kfold(X, y, clf)

    logger.info("Finished in {0:.2f} seconds".format(time.time() - start_time))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号