lasso.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:forward 作者: yajun0601 项目源码 文件源码
def loadDataSet(filename):
    sheet = 1
    df = pd.read_excel(filename,sheetname=[sheet], header=None, skiprows=1)[sheet]
    df = df.dropna(how='any',thresh=df.shape[1]/2) # drop those rows 
    df = df.dropna(how='any')
    df = df.fillna(0)
    df[2] = df[2]/100000000  # ?????
    df = df.sort_values(2).reset_index()
#    zeros = df[df[0]==0]
#    df = df.drop(zeros.index,axis=0)
    df[2] = (df[2]) #?????

    df[3] = standard(df[3]) #????
    df[4] = standard(df[4]) #????
    rate_type,rate_dict = transcoding(df[5]) # ????
    df[6] = standard(df[6]) #??????
#    market,market_dict = transcoding(df[7]) #????
    platform,platform_dict = transcoding(df[8]) #????
    df[9] = df[9].apply(map_01)  # ?????
    df[11] = df[11].apply(map_01) # ??????
    nature,nature_dict = transcoding(df[12]) #????
    df[14] = standard(df[14]) #???????
    print( df.groupby(15).size())
    df[15] = standard(df[15].apply(map_rate)) #???????
    print( df.groupby(15).size())
    df[16] = standard(df[16].apply(map_sub_rate)) #?????????
    df[17] = standard(df[17]) #??????

    target = df[2]
    data = df[[3,4,6,9,11,14,15,16,17]]
#    data = pd.concat([data,rate_type,platform,nature],axis=1)    

    import seaborn as sns
    sns.pairplot(df, x_vars=[3,17,4,14,15,16,6,9,11], y_vars=2, size=5, aspect=0.8, kind='reg')
#    sns.pairplot(df, vars=[2,4,14,15,17])
    return np.mat(data),np.mat(target).T
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号