def feature(terms):
dataMatrix = np.genfromtxt(finaltest, delimiter='|', dtype=None, skip_header=True)
n = dataMatrix.size
l = len(terms)
occurence = np.zeros((n, l), dtype=np.int)
d = 0
for row in dataMatrix:
temp = row[0].lower().decode('UTF-8').split(' ')
for i in range(l):
if terms[i] in temp:
occurence[d][i] += 1
d += 1
transformer = TfidfTransformer()
tfdif = transformer.fit_transform(occurence)
occurence = tfdif.toarray()
np.savetxt('occurencetest.csv',occurence,delimiter=',')
return occurence, dataMatrix
评论列表
文章目录