def transformTFIDF(X_train_all, X_test_all):
"""Transform bag-of-events using TF-IDF.
Arguments
---------
X_train_all: pandas DataFrame
X_test_all: pandas DataFrame
Returns
-------
X_train_t: CSR matrix
X_test_t: CSR matrix
"""
tfidf_t = TfidfTransformer(norm='l2',
use_idf=True,
sublinear_tf=True,
smooth_idf=True)
X_train = scipy.sparse.csr_matrix(X_train_all)
X_test = scipy.sparse.csr_matrix(X_test_all)
# Fit TFIDF using training data.
tfidf_t.fit(X_train)
# Transform both training and test data.
X_train_t = tfidf_t.transform(X_train)
X_test_t = tfidf_t.transform(X_test)
return X_train_t, X_test_t
评论列表
文章目录