v1.3.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码
def init(l_sys, l_namenode, l_FS, l_RPC, d, dwhite, winsize=200, sleeptime=15, cont=0.01,limit = 300):
    win_sys = []
    win_namenode = []
    win_FS = []
    win_RPC = []
    while True:
        print "fetching at %s" % ctime()
        data = getdata()
        loadvalue(data, d, dwhite)
        o_sys, o_namenode, o_FS, o_RPC = extract(d, l_sys, l_namenode, l_FS, l_RPC)
        # ??????????
        win_sys.append(o_sys)
        win_namenode.append(o_namenode)
        win_FS.append(o_FS)
        win_RPC.append(o_RPC)
        if len(win_sys) > winsize:  # ????????????
            break
        sleep(sleeptime)
    # ?????
    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #??????????
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')

    data_sys = sampleWithDecay(client, limit, 'select * from ganglia where w_system >0 ORDER BY time DESC limit 1500')#??????
    d_sys = data_sys[l_sys]

    data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC limit 1500')
    d_FS = data_fs[l_FS]

    data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC limit 1500')
    d_namenode = data_namenode[l_namenode]

    data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC limit 1500')
    d_RPC = data_rpc[l_RPC]

    #????????
    append_sys = pd.DataFrame(win_sys,columns=l_sys)
    append_namenode = pd.DataFrame(win_namenode, columns=l_namenode)
    append_FS = pd.DataFrame(win_FS, columns=l_FS)
    append_RPC = pd.DataFrame(win_RPC, columns=l_RPC)

    out_sys = pd.concat([d_sys,append_sys])
    out_namenode = pd.concat([d_namenode,append_namenode])
    out_FS = pd.concat([d_FS,append_FS])
    out_RPC = pd.concat([d_RPC,append_RPC])
    # ??fit
    ilf_sys.fit(out_sys)
    ilf_namenode.fit(out_namenode)
    ilf_FS.fit(out_FS)
    ilf_RPC.fit(out_RPC)

    print ilf_sys.predict(win_sys)
    print ilf_namenode.predict(win_namenode)
    print ilf_FS.predict(win_FS)
    print ilf_RPC.predict(win_RPC)

    return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号