def get_classification_data(self, division_dummies=True, samples=None, percentile=100):
raw = PlayerCollection.filter_by_class(self.raw)
df = PlayerCollection.raw_to_df(raw)
players, divisions = PlayerCollection.aggregate_df(df)
players, divisions = PlayerCollection.to_matrix(players, divisions)
players, divisions = PlayerCollection.subsample(players, divisions, samples)
X_train, X_test, y_train, y_test = train_test_split(
players, divisions, random_state=42, stratify=divisions)
selector = SelectPercentile(f_classif, percentile=percentile)
selector.fit(X_train, y_train)
X_train = selector.transform(X_train)
X_test = selector.transform(X_test)
if division_dummies:
y_train = pd.get_dummies(y_train).as_matrix()
y_test = pd.get_dummies(y_test).as_matrix()
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
return X_train, X_test, y_train, y_test
评论列表
文章目录