def test_cat():
print 'Testing categorization...'
filein = 'test_lookup.csv'
fileout = 'test_cat.csv'
df = pd.read_csv(filein)
model = linear_model.SGDClassifier(loss='log')
catData = df[~df.category.isnull()]
uncatData = df[df.category.isnull()]
print str(float(len(catData))/float(len(df)) * 100.) + "% of transactions categorized with lookup."
ts.train_model(catData,model,embeddings,model_type='logreg',new_run=True)
ts.use_model(uncatData,model,embeddings,0.0,model_type='logreg')
df = pd.concat([catData, uncatData])
df.sort_index(inplace=True)
df.to_csv(fileout,index=False)
评论列表
文章目录