unit_tests.py 文件源码-python代码片段

unit_tests.py 文件源码

python

阅读 19 收藏 0 点赞 0 评论 0

项目：banking-class 作者: eli-goodfriend 项目源码文件源码

def test_cat():
    print 'Testing categorization...'
    filein = 'test_lookup.csv'
    fileout = 'test_cat.csv'
    df = pd.read_csv(filein)

    model = linear_model.SGDClassifier(loss='log')

    catData = df[~df.category.isnull()]
    uncatData = df[df.category.isnull()]
    print str(float(len(catData))/float(len(df)) * 100.) + "% of transactions categorized with lookup."

    ts.train_model(catData,model,embeddings,model_type='logreg',new_run=True)
    ts.use_model(uncatData,model,embeddings,0.0,model_type='logreg')

    df = pd.concat([catData, uncatData])
    df.sort_index(inplace=True)

    df.to_csv(fileout,index=False)