ClassifierStuff.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:VERSE 作者: jakelever 项目源码 文件源码
def buildVectorizer(classes, examples, parameters):
    featureChoice = None
    doFeatureSelection = False
    tfidf = False
    featureSelectPerc = 10

    if "featureChoice" in parameters:
        featureChoice = parameters["featureChoice"]
    if "doFeatureSelection" in parameters and parameters["doFeatureSelection"] == "True":
        doFeatureSelection = True
    if "featureSelectPerc" in parameters:
        featureSelectPerc = int(parameters["featureSelectPerc"])
    if "tfidf" in parameters and parameters["tfidf"] == "True":
        tfidf = True

    print "Starting vectorizer..."
    vectorizer = Vectorizer(classes,examples,featureChoice,tfidf)
    vectors = vectorizer.getTrainingVectors()
    print "Vectors of size:", vectors.shape

    if doFeatureSelection:
        print "Trimming training vectors..."
        from sklearn.feature_selection import SelectKBest,SelectPercentile,chi2
        #featureSelector = SelectKBest(chi2, k=100)`:
        featureSelector = SelectPercentile(chi2,featureSelectPerc)
        vectorsTrimmed = featureSelector.fit_transform(vectors, classes)
        vectorsTrimmed = coo_matrix(vectorsTrimmed)
        print "Trimmed training vectors of size:", vectorsTrimmed.shape
    else:
        vectorsTrimmed = vectors
        featureSelector = None

    return vectorsTrimmed,vectorizer,featureSelector
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号