def create_stratum(self, column_names, **kwargs):
'''
Use affinity propagation to find number of strata for each column.
column_names is a list of the covariates to be split into strata and
used for classification. This funciton adds a column to the data frame
for each column as column_name_strata that gives the strata designation
for that variable. The whole data frame is returned.
'''
for colname in column_names:
X = self.data[colname].reshape(-1, 1)
if np.isnan(X).any():
raise ValueError("There are NaN values in self.data[%s] that the \
clustering algorithm can't handle" % colname)
elif np.unique(self.data[colname]).shape[0] <=2:
string_name = colname+'_strata'
self.data[string_name] = self.data[colname].astype(int)
else:
af_model = AP(damping = 0.9)
strata_groups = af_model.fit(X)
#cluster_centers_indices = af.cluster_centers_indices_
#n_clusters_ = len(cluster_centers_indices)
string_name = colname+'_strata'
self.data[string_name] = strata_groups.labels_
return self.data
#In the main function, you need to call create_stratum before create_unique_strata
评论列表
文章目录