def MultinomialNBPredictModel(localTrainLabel, config):
train = pd.read_csv('../feature/trainQlist.csv', header = 0, sep = ",")
test = pd.read_csv('../feature/testQlist.csv', header = 0, sep = ",")
print "Train tf-idf vector Model..."
encode = TfidfVectorizer(decode_error = 'ignore', norm = "l2", binary = False, sublinear_tf = True, min_df = 50)
localTrainFeature = encode.fit_transform(train['qlist'].values)
localTestFeature = encode.transform(train['qlist'].values)
print localTrainFeature.shape, localTestFeature.shape
print 'train...'
model = MultinomialNB(alpha=1.0, fit_prior=True, class_prior=None)
model.fit(X = localTrainFeature, y = localTrainLabel)
print 'predict...'
if config['prob'] == False:
return model.predict(localTestFeature), test['uid'].values
else:
return model.predict_log_proba(localTestFeature), test['uid'].values
#-- xgboost local corss validation model frame
评论列表
文章目录