def rand_forest_train(self):
# ??????????
users = pd.read_csv('names.csv')
# ??similarity?platform?reputation?entropy????????????
X = users[['similarity', 'platform', 'reputation', 'entropy']]
y = users['human_or_machine']
# ?????????? 25%???????
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)
# ????????????????
from sklearn.feature_extraction import DictVectorizer
vec = DictVectorizer(sparse=False)
X_train = vec.fit_transform(X_train.to_dict(orient='record'))
X_test = vec.transform(X_test.to_dict(orient='record'))
# ?????????????????????
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
dtc_y_pred = dtc.predict(X_test)
# ???????????????????????
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
rfc_y_pred = rfc.predict(X_test)
# ???????????????????????
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier()
gbc.fit(X_train, y_train)
gbc_y_pred = gbc.predict(X_test)
from sklearn.metrics import classification_report
# ??????????????????? ?????????? ??? F1??
print("??????????", dtc.score(X_test, y_test))
print(classification_report(dtc_y_pred, y_test))
# ??????????????????????????????? ??? F1??
print("????????????", rfc.score(X_test, y_test))
print(classification_report(rfc_y_pred, y_test))
# ??????????????????????????????? ??? F1??
print("????????????", gbc.score(X_test, y_test))
print(classification_report(gbc_y_pred, y_test))
users = pd.read_csv('values.csv')
# ??????????
X = users[['similarity', 'platform', 'reputation', 'entropy']]
X = vec.transform(X.to_dict(orient='record'))
print(rfc.predict(X))
self.dtc = dtc
self.rfc = rfc
self.gbc = gbc