def gmmRemovingOutlierForClassifier():
"""
use GMM model to remove outlier
:return: NA
"""
# load data
X_train = np.load('inputClf_small/X_train.npy')
y_train = np.load('inputClf_small/y_train.npy')
y_train_price = np.load('inputClf_small/y_train_price.npy')
# classifier initialize
classifier = GMM(n_components=2,covariance_type='full', init_params='wmc', n_iter=20)
# cluster initializing
X_train1 = X_train[np.where(y_train==0)[0], :]
X_train2 = X_train[np.where(y_train==1)[0], :]
cluster1 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train1)
cluster1 = cluster1.cluster_centers_
cluster2 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train2)
cluster2 = cluster2.cluster_centers_
clusters = np.concatenate((cluster1, cluster2), axis=0)
classifier.means_ = clusters
# Train the other parameters using the EM algorithm.
classifier.fit(X_train)
# predict
y_train_pred = classifier.predict(X_train)
train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
print "Keep {}% data.".format(train_accuracy)
# keep the data which are not outliers
y_train_pred = y_train_pred.reshape((y_train_pred.shape[0], 1))
X_train = X_train[np.where(y_train==y_train_pred)[0], :]
y_train_price = y_train_price[np.where(y_train==y_train_pred)[0], :]
y_train = y_train[np.where(y_train==y_train_pred)[0], :]
np.save('inputClf_GMMOutlierRemoval/X_train', X_train)
np.save('inputClf_GMMOutlierRemoval/y_train', y_train)
np.save('inputClf_GMMOutlierRemoval/y_train_price', y_train_price)
ClassficationBase.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录