def n2_ratio_intra_extra_class_nearest_neighbor_distance(data):
features = data.columns[:-1,]
labels = data.columns[-1]
dist = pdist(data[features], 'euclidean')
df_dist = pd.DataFrame(squareform(dist))
max_size = df_dist.copy( )
max_size.iloc[:, :] = False
classes = data.iloc[ :, -1].unique()
n = data.shape[0]
n2 = 0
cl = 'bla'
intra_min = 0
inter_min = 0
for i in range(data.shape[0]):
ci = data.iloc[i, -1]
if ci != cl:
cl = ci
intra_idx = data[data[labels] == ci].index.values.tolist()
inter_idx = data[data[labels] != ci].index.values
intra_idx.remove(i)
intra_min = intra_min + df_dist.iloc[intra_idx, i].min()
inter_min = inter_min + df_dist.iloc[inter_idx, i].min()
intra_idx.append(i)
# tratar caso de inter_min == 0
if inter_min == 0:
inter_min = 1
n2 = (1.0 * intra_min) / (1.0 * inter_min)
return n2
评论列表
文章目录