def loadDataSet(filename):
df = pd.read_excel(filename,sheetname=[1], header=None, skiprows=1)[1]
df = df.fillna(0)
df[2] = df[2]/100000000 # ?????
# zeros = df[df[0]==0]
# df = df.drop(zeros.index,axis=0)
df[2] = standard(df[2]) #?????
df[3] = standard(df[3]) #????
df[4] = standard(df[4]) #????
df[6] = standard(df[6]) #??????
df[9] = df[9].apply(map_01) # ?????
df[11] = df[11].apply(map_01) # ??????
df[14] = standard(df[14]) #???????
df[15] = standard(df[15].apply(map_rate)) #???????
df[16] = standard(df[16].apply(map_sub_rate)) #?????????
df[17] = standard(df[17]) #??????
prov_coding,province_dict = transcoding(df[10]) # province
enter_coding,enter_dict = transcoding(df[13]) # enterprise
target = df[2]
data = df[[3,4,6,9,11,14,15,16,17]]
data = pd.concat([data,prov_coding],axis=1)
data = pd.concat([data,enter_coding],axis=1)
import seaborn as sns
# sns.pairplot(df, x_vars=[3,4,6,9,11,14,15,16,17], y_vars=2, size=7, aspect=0.8, kind='reg')
return mat(data),mat(target).T
评论列表
文章目录