def corr_fea(df,cols,de=None,bar=0.9):
from scipy.stats import pearsonr
xcols = []
for c,i in enumerate(cols[:-1]):
for j in cols[c+1:]:
if i==j:
continue
#score = pearsonr(df[i],df[j])[0]
score = df[i].corr(df[j])
#print(i,j,score)
if score>bar:
df["%s-%s"%(i,j)] = df[i]-df[j]
if de is not None:
de["%s-%s"%(i,j)] = de[i]-de[j]
xcols.append(j)
if score<-bar:
df["%s+%s"%(i,j)] = df[i]+df[j]
if de is not None:
de["%s+%s"%(i,j)] = de[i]+de[j]
xcols.append(j)
return xcols
评论列表
文章目录