def validation(self, X, Y, wv_X, kind):
"""
2-fold validation
:param X: train text
:param Y: train label
:param wv_X: train wv_vec
:param kind: age/gender/education
:return: mean score of 2-fold validation
"""
print '????...'
X=np.array(X)
fold_n=2
folds = list(StratifiedKFold(Y, n_folds=fold_n, shuffle=False,random_state=0))
score = np.zeros(fold_n)
for j, (train_idx, test_idx) in enumerate(folds):
print j+1,'-fold'
X_train = X[train_idx]
y_train = Y[train_idx]
X_test = X[test_idx]
y_test = Y[test_idx]
wv_X_train =wv_X[train_idx]
wv_X_test = wv_X[test_idx]
vec = TfidfVectorizer(use_idf=True,sublinear_tf=False, max_features=50000, binary=True)
vec.fit(X_train, y_train)
X_train = vec.transform(X_train)
X_test = vec.transform(X_test)
print 'shape',X_train.shape
ypre = self.stacking(X_train,y_train,X_test,wv_X_train,wv_X_test,kind)
cur = sum(y_test == ypre) * 1.0 / len(ypre)
score[j] = cur
print score
print score.mean(),kind
return score.mean()
评论列表
文章目录