def run_cat(filename,modelname,fileout,embeddings,new_run=True,run_parse=True,
model_type='logreg',C=10.0,
alpha=1.0, cutoff=0.50, n_iter=1):
# pull relevant data and run parsing and classification
df = pd.read_csv(filename)
if (len(df.columns)==2): # make sure columns have the right names
df.columns = ['raw','amount']
if new_run: # initialize the model;
if model_type=='logreg':
model = linear_model.SGDClassifier(loss='log',warm_start=True,
n_iter=n_iter,alpha=alpha)
elif model_type=='passive-aggressive':
model = linear_model.PassiveAggressiveClassifier(C=C,warm_start=True)
elif model_type=='naive-bayes':
model = naive_bayes.GaussianNB()
else:
raise NameError('model_type must be logreg, passive-aggressive, or naive-bayes')
else: # load a saved, pre-trained model
modelFileLoad = open(modelname, 'rb')
model = pickle.load(modelFileLoad)
fileCities = dirs.data_dir + 'cities_by_state.pickle'
us_cities = pd.read_pickle(fileCities)
df = cat_df(df,model,us_cities,embeddings,new_run,run_parse,cutoff=cutoff,
model_type=model_type)
df.to_csv(fileout,index=False)
# Saving logistic regression model from training set 1
modelFileSave = open(modelname, 'wb')
pickle.dump(model, modelFileSave)
modelFileSave.close()
# ------ testing functions
评论列表
文章目录