def getFeature():
fileData = open("data")
row = []
col = []
data = []
evalRes = []
rowIndex = -1
fileList = fileData.readlines()
random.shuffle(fileList)
for line in fileList:
line = line.rstrip('\n')
dataList = re.split(' |:', line)
if int(dataList[0]) >= 7:
evalRes.append(1)
else:
if int(dataList[0]) <= 4:
evalRes.append(-1)
else:
continue
del dataList[0]
rowIndex = rowIndex + 1
row.extend([rowIndex] * int(len(dataList) / 2))
col.extend(map(int, dataList[::2]))
data.extend(map(int, dataList[1::2]))
featureMatrix = csr_matrix((data, (row, col)))
featureMNew = SelectKBest(chi2, k=20000).fit_transform(featureMatrix, evalRes)
return featureMNew, evalRes
评论列表
文章目录