def main(argv=None):
args = parse_arguments(argv)
if args['very_verbose']:
logging.basicConfig(level=logging.DEBUG)
elif args['verbose']:
logging.basicConfig(level=logging.INFO)
else:
logging.basicConfig()
del args['verbose']
del args['very_verbose']
# TODO: Set spark configuration? Some can't actually be set here though, so best might be to set all of it
# on the command line for consistency.
sc = SparkContext(appName="MLR: training pipeline")
sc.setLogLevel('WARN')
sqlContext = HiveContext(sc)
output_dir = args['output_dir']
if os.path.exists(output_dir):
logging.error('Output directory (%s) already exists' % (output_dir))
sys.exit(1)
# Maybe this is a bit early to create the path ... but should be fine.
# The annoyance might be that an error in training requires deleting
# this directory to try again.
os.mkdir(output_dir)
try:
run_pipeline(sc, sqlContext, **args)
except: # noqa: E722
# If the directory we created is still empty delete it
# so it doesn't need to be manually re-created
if not len(glob.glob(os.path.join(output_dir, '*'))):
os.rmdir(output_dir)
raise
评论列表
文章目录