run_experiments.py 文件源码-python代码片段

def run_experiment(experiment):
    '''
    This function is the main function
    It takes a string 'experiment' that drives the naming of the files

    - it generates the filenames for the datasources, schemas, recipes
    - it sends a randomized version of the data to an s3 bucket
    - creates the train, valid and test datasources
    - creates the model
    - creates the evaluation
    '''
    # start by generating all teh filenames for the current experiment
    filenames = generate_filenames(experiment)

    # load the data form local, shuffle and save back to original file
    df = pd.read_csv(filepath + filenames['main'])
    df = df.reindex(np.random.permutation(df.index))
    df.to_csv(filepath + filenames['main'], quoting= csv.QUOTE_NONNUMERIC, index=False)
    # sends the original file to s3
    os.system("aws s3 cp %s%s %s "% (filepath, filenames['main'], s3_path) )

    # write cli JSON
    create_dsrc("train", 0, 60)
    create_dsrc("valid", 60, 80)
    create_dsrc("test", 80, 100)

    create_model()
    create_eval("valid")
    create_eval("test")


# ----------------------------------------------------------------------------
#  datasource, model, evaluation and batch predictions functions
#  These functions all do the same thing
#  1. write the JSON parameters to a JSON formatted file in json_path folder
#  2. execute the AWS CLI command that will create the object: datasource, model, ...
# ----------------------------------------------------------------------------