python类DataFrameClient()的实例源码-面圈网

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)],
                                 columns=["column_one", "column_two",
                                          "column_three"])
        expected = (
            b"foo column_one=\"1\",column_two=1i,column_three=1.0 0\n"
            b"foo column_one=\"2\",column_two=2i,column_three=2.0 "
            b"3600000000000\n"
        )

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/write",
                           status_code=204)

            cli = DataFrameClient(database='db')

            cli.write_points(dataframe, 'foo')
            self.assertEqual(m.last_request.body, expected)

            cli.write_points(dataframe, 'foo', tags=None)
            self.assertEqual(m.last_request.body, expected)

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe_with_period_index(self):
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 index=[pd.Period('1970-01-01'),
                                        pd.Period('1970-01-02')],
                                 columns=["column_one", "column_two",
                                          "column_three"])

        expected = (
            b"foo column_one=\"1\",column_two=1i,column_three=1.0 0\n"
            b"foo column_one=\"2\",column_two=2i,column_three=2.0 "
            b"86400000000000\n"
        )

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/write",
                           status_code=204)

            cli = DataFrameClient(database='db')
            cli.write_points(dataframe, "foo")

            self.assertEqual(m.last_request.body, expected)

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_get_list_database(self):
        data = {'results': [
            {'series': [
                {'measurement': 'databases',
                 'values': [
                     ['new_db_1'],
                     ['new_db_2']],
                 'columns': ['name']}]}
        ]}

        cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
        with _mocked_session(cli, 'get', 200, json.dumps(data)):
            self.assertListEqual(
                cli.get_list_database(),
                [{'name': 'new_db_1'}, {'name': 'new_db_2'}]
            )

v1.1.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont):
    ilf = IsolationForest(n_estimators=100, contamination=cont)
    query = 'select * from ganglia where w_fs >0 and w_namenode>0 and w_rpc >0 limit 1024;'  # ???? ???
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
    result = client.query(query, chunked=False)
    data = result['ganglia']
    d_sys = data[l_sys]
    d_namenode = data[l_namenode]
    d_FS = data[l_FS]
    d_RPC = data[l_RPC]

    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)

    ilf_sys.fit(d_sys)
    ilf_namenode.fit(d_namenode)
    ilf_FS.fit(d_FS)
    ilf_RPC.fit(d_RPC)

    print "update finished"
    return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe_in_batches(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)],
                                 columns=["column_one", "column_two",
                                          "column_three"])
        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/write",
                           status_code=204)

            cli = DataFrameClient(database='db')
            self.assertTrue(cli.write_points(dataframe, "foo", batch_size=1))

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe_with_tag_columns(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(data=[['blue', 1, "1", 1, 1.0],
                                       ['red', 0, "2", 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)],
                                 columns=["tag_one", "tag_two", "column_one",
                                          "column_two", "column_three"])
        expected = (
            b"foo,tag_one=blue,tag_two=1 "
            b"column_one=\"1\",column_two=1i,column_three=1.0 "
            b"0\n"
            b"foo,tag_one=red,tag_two=0 "
            b"column_one=\"2\",column_two=2i,column_three=2.0 "
            b"3600000000000\n"
        )

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/write",
                           status_code=204)

            cli = DataFrameClient(database='db')

            cli.write_points(dataframe, 'foo',
                             tag_columns=['tag_one', 'tag_two'])
            self.assertEqual(m.last_request.body, expected)

            cli.write_points(dataframe, 'foo',
                             tag_columns=['tag_one', 'tag_two'], tags=None)
            self.assertEqual(m.last_request.body, expected)

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe_with_tag_cols_and_global_tags(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(data=[['blue', 1, "1", 1, 1.0],
                                       ['red', 0, "2", 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)],
                                 columns=["tag_one", "tag_two", "column_one",
                                          "column_two", "column_three"])
        expected = (
            b"foo,global_tag=value,tag_one=blue,tag_two=1 "
            b"column_one=\"1\",column_two=1i,column_three=1.0 "
            b"0\n"
            b"foo,global_tag=value,tag_one=red,tag_two=0 "
            b"column_one=\"2\",column_two=2i,column_three=2.0 "
            b"3600000000000\n"
        )

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/write",
                           status_code=204)

            cli = DataFrameClient(database='db')

            cli.write_points(dataframe, 'foo',
                             tag_columns=['tag_one', 'tag_two'],
                             tags={'global_tag': 'value'})
            self.assertEqual(m.last_request.body, expected)

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe_with_tag_escaped(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(
            data=[['blue', 1, "1", 1, 1.0, 'hot'],
                  ['red,green=orange', 0, "2", 2, 2.0, 'cold']],
            index=[now, now + timedelta(hours=1)],
            columns=["tag_one", "tag_two", "column_one",
                     "column_two", "column_three",
                     "tag_three"])

        expected_escaped_tags = (
            b"foo,tag_one=blue "
            b"column_one=\"1\",column_two=1i "
            b"0\n"
            b"foo,tag_one=red\\,green\\=orange "
            b"column_one=\"2\",column_two=2i "
            b"3600000000000\n"
        )

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/write",
                           status_code=204)
            cli = DataFrameClient(database='db')
            cli.write_points(dataframe, 'foo',
                             field_columns=['column_one', 'column_two'],
                             tag_columns=['tag_one'])
            self.assertEqual(m.last_request.body, expected_escaped_tags)

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe_fails_without_time_index(self):
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 columns=["column_one", "column_two",
                                          "column_three"])

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series",
                           status_code=204)

            cli = DataFrameClient(database='db')
            cli.write_points(dataframe, "foo")

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe_fails_with_series(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.Series(data=[1.0, 2.0],
                              index=[now, now + timedelta(hours=1)])

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series",
                           status_code=204)

            cli = DataFrameClient(database='db')
            cli.write_points(dataframe, "foo")

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_query_into_dataframe(self):
        data = {
            "results": [{
                "series": [
                    {"measurement": "network",
                     "tags": {"direction": ""},
                     "columns": ["time", "value"],
                     "values":[["2009-11-10T23:00:00Z", 23422]]
                     },
                    {"measurement": "network",
                     "tags": {"direction": "in"},
                     "columns": ["time", "value"],
                     "values": [["2009-11-10T23:00:00Z", 23422],
                                ["2009-11-10T23:00:00Z", 23422],
                                ["2009-11-10T23:00:00Z", 23422]]
                     }
                ]
            }]
        }

        pd1 = pd.DataFrame(
            [[23422]], columns=['value'],
            index=pd.to_datetime(["2009-11-10T23:00:00Z"]))
        pd1.index = pd1.index.tz_localize('UTC')
        pd2 = pd.DataFrame(
            [[23422], [23422], [23422]], columns=['value'],
            index=pd.to_datetime(["2009-11-10T23:00:00Z",
                                  "2009-11-10T23:00:00Z",
                                  "2009-11-10T23:00:00Z"]))
        pd2.index = pd2.index.tz_localize('UTC')
        expected = {
            ('network', (('direction', ''),)): pd1,
            ('network', (('direction', 'in'),)): pd2
        }

        cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
        with _mocked_session(cli, 'GET', 200, data):
            result = cli.query('select value from network group by direction;')
            for k in expected:
                assert_frame_equal(expected[k], result[k])

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_query_with_empty_result(self):
        cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
        with _mocked_session(cli, 'GET', 200, {"results": [{}]}):
            result = cli.query('select column_one from foo;')
            self.assertEqual(result, {})

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_datetime_to_epoch(self):
        timestamp = pd.Timestamp('2013-01-01 00:00:00.000+00:00')
        cli = DataFrameClient('host', 8086, 'username', 'password', 'db')

        self.assertEqual(
            cli._datetime_to_epoch(timestamp),
            1356998400.0
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='h'),
            1356998400.0 / 3600
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='m'),
            1356998400.0 / 60
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='s'),
            1356998400.0
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='ms'),
            1356998400000.0
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='u'),
            1356998400000000.0
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='n'),
            1356998400000000000.0
        )

influx.py 文件源码项目：Quantrade 作者: quant-trade 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def connect(host=settings.INFLUX_HOST, port=settings.INFLUX_PORT):
    user = settings.INFLUX_USER
    password = settings.INFLUX_PASSWORD
    dbname = settings.INFLUX_DBASE

    client = DataFrameClient(host, port, user, password, dbname)

    return client

influxdb_gpu_logger.py 文件源码项目：gpu_monitor 作者: msalvaris 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def __init__(self, ip_or_url, port, username, password, database, series_name):
        self._client = DataFrameClient(ip_or_url, port, username, password, database)
        self._series_name = series_name

_influx_db.py 文件源码项目：querygraph 作者: peter-woyzbun 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def _conn(self):
        conn = DataFrameClient(self.host, self.port, self.user, self.password, self.db_name)
        return conn

v1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def analyseWarn(name,qname,topk=5):
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
    query_positive = 'select * from ganglia where '+qname+' >0 ORDER BY time DESC limit 10'#??10??????
    query_negative = 'select * from ganglia where '+qname+' <0 ORDER BY time DESC limit 5'#????warnfactor??

    data_p = client.query(query_positive, chunked=False)
    data_positive = data_p['ganglia']
    normalSample = data_positive[name]#??????

    data_n = client.query(query_negative, chunked=False)
    data_negative = data_n['ganglia']
    anamolySample = data_negative[name]#??????
    return analyseReasonWithTreeBaesd(anamolySample, normalSample, name)

v1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont,limit):
    ilf = IsolationForest(n_estimators=100, contamination=cont)
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
    #???
    data_sys = sampleWithDecay(client,limit,'select * from ganglia where w_system >0 ORDER BY time DESC limit 1500')#????limit????????
    d_sys = data_sys[l_sys]

    data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC limit 1500')
    d_FS = data_fs[l_FS]

    data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC limit 1500')
    d_namenode = data_namenode[l_namenode]

    data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC limit 1500')
    d_RPC = data_rpc[l_RPC]

    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #?????????
    ilf_sys.fit(d_sys)
    ilf_namenode.fit(d_namenode)
    ilf_FS.fit(d_FS)
    ilf_RPC.fit(d_RPC)

    print "update finished"
    return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC

simulatev1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont,limit):
    ilf = IsolationForest(n_estimators=100, contamination=cont)
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
    #???
    data_sys = sampleWithDecay(client,limit,'select * from ganglia where w_system >0 ORDER BY time DESC')
    d_sys = data_sys[l_sys]

    data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC')
    d_FS = data_fs[l_FS]

    data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC')
    d_namenode = data_namenode[l_namenode]

    data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC')
    d_RPC = data_rpc[l_RPC]

    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #?????????
    ilf_sys.fit(d_sys)
    ilf_namenode.fit(d_namenode)
    ilf_FS.fit(d_FS)
    ilf_RPC.fit(d_RPC)

    print "update finished"
    return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC

simulatev1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def init(l_sys, l_namenode, l_FS, l_RPC, sleeptime=15, cont=0.01,limit = 300):
    # ?????
    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=50, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #??????????
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')

    data_sys = sampleWithDecay(client, limit, 'select * from ganglia where w_system >0 ORDER BY time DESC')
    d_sys = data_sys[l_sys]

    data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC')
    d_FS = data_fs[l_FS]

    data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC')
    d_namenode = data_namenode[l_namenode]

    data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC')
    d_RPC = data_rpc[l_RPC]


    print len(d_sys)
    print len(d_FS)
    print len(d_namenode)
    print len(d_RPC)
    # ??fit
    ilf_sys.fit(d_sys)
    ilf_namenode.fit(d_namenode)
    ilf_FS.fit(d_FS)
    ilf_RPC.fit(d_RPC)

    print ilf_FS.predict(d_FS)

    return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC

dataframe_client_test.py 文件源码项目：Dshield 作者: ywjt 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def test_write_points_from_dataframe_with_numeric_precision(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        # df with numeric column names
        dataframe = pd.DataFrame(data=[["1", 1, 1.1111111111111],
                                       ["2", 2, 2.2222222222222]],
                                 index=[now, now + timedelta(hours=1)])

        expected_default_precision = (
            b'foo,hello=there 0=\"1\",1=1i,2=1.11111111111 0\n'
            b'foo,hello=there 0=\"2\",1=2i,2=2.22222222222 3600000000000\n'
        )

        expected_specified_precision = (
            b'foo,hello=there 0=\"1\",1=1i,2=1.1111 0\n'
            b'foo,hello=there 0=\"2\",1=2i,2=2.2222 3600000000000\n'
        )

        expected_full_precision = (
            b'foo,hello=there 0=\"1\",1=1i,2=1.1111111111111 0\n'
            b'foo,hello=there 0=\"2\",1=2i,2=2.2222222222222 3600000000000\n'
        )

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/write",
                           status_code=204)

            cli = DataFrameClient(database='db')
            cli.write_points(dataframe, "foo", {"hello": "there"})

            self.assertEqual(m.last_request.body, expected_default_precision)

            cli = DataFrameClient(database='db')
            cli.write_points(dataframe, "foo", {"hello": "there"},
                             numeric_precision=4)

            self.assertEqual(m.last_request.body, expected_specified_precision)

            cli = DataFrameClient(database='db')
            cli.write_points(dataframe, "foo", {"hello": "there"},
                             numeric_precision='full')

            self.assertEqual(m.last_request.body, expected_full_precision)

v1.1.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def init(l_sys, l_namenode, l_FS, l_RPC, d, dwhite, winsize=200, sleeptime=15, cont=0.01):
    # ????
    win_sys = []
    win_namenode = []
    win_FS = []
    win_RPC = []
    while True:
        print "fetching at %s" % ctime()
        data = getdata()
        loadvalue(data, d, dwhite)
        o_sys, o_namenode, o_FS, o_RPC = extract(d, l_sys, l_namenode, l_FS, l_RPC)
        # ??????????
        win_sys.append(o_sys)
        win_namenode.append(o_namenode)
        win_FS.append(o_FS)
        win_RPC.append(o_RPC)

        if len(win_sys) > winsize:  # ????????????
            break
        sleep(sleeptime)
    # ?????
    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #??????????
    query = 'select * from ganglia where w_fs >0 and w_namenode>0 and w_rpc >0 limit 256;' #???? ???
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
    result = client.query(query, chunked=False)
    data = result['ganglia']
    d_sys = data[l_sys]
    d_namenode = data[l_namenode]
    d_FS = data[l_FS]
    d_RPC = data[l_RPC]
    #????????
    append_sys = pd.DataFrame(win_sys,columns=l_sys)
    append_namenode = pd.DataFrame(win_namenode, columns=l_namenode)
    append_FS = pd.DataFrame(win_FS, columns=l_FS)
    append_RPC = pd.DataFrame(win_RPC, columns=l_RPC)

    out_sys = pd.concat([d_sys,append_sys])
    out_namenode = pd.concat([d_namenode,append_namenode])
    out_FS = pd.concat([d_FS,append_FS])
    out_RPC = pd.concat([d_RPC,append_RPC])
    # ??fit
    ilf_sys.fit(out_sys)
    ilf_namenode.fit(out_namenode)
    ilf_FS.fit(out_FS)
    ilf_RPC.fit(out_RPC)

    #print out_sys

    print ilf_sys.predict(win_sys)
    print ilf_namenode.predict(win_namenode)
    print ilf_FS.predict(win_FS)
    print ilf_RPC.predict(win_RPC)

    # ??????????????
    return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC

v1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def init(l_sys, l_namenode, l_FS, l_RPC, d, dwhite, winsize=200, sleeptime=15, cont=0.01,limit = 300):
    win_sys = []
    win_namenode = []
    win_FS = []
    win_RPC = []
    while True:
        print "fetching at %s" % ctime()
        data = getdata()
        loadvalue(data, d, dwhite)
        o_sys, o_namenode, o_FS, o_RPC = extract(d, l_sys, l_namenode, l_FS, l_RPC)
        # ??????????
        win_sys.append(o_sys)
        win_namenode.append(o_namenode)
        win_FS.append(o_FS)
        win_RPC.append(o_RPC)
        if len(win_sys) > winsize:  # ????????????
            break
        sleep(sleeptime)
    # ?????
    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #??????????
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')

    data_sys = sampleWithDecay(client, limit, 'select * from ganglia where w_system >0 ORDER BY time DESC limit 1500')#??????
    d_sys = data_sys[l_sys]

    data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC limit 1500')
    d_FS = data_fs[l_FS]

    data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC limit 1500')
    d_namenode = data_namenode[l_namenode]

    data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC limit 1500')
    d_RPC = data_rpc[l_RPC]

    #????????
    append_sys = pd.DataFrame(win_sys,columns=l_sys)
    append_namenode = pd.DataFrame(win_namenode, columns=l_namenode)
    append_FS = pd.DataFrame(win_FS, columns=l_FS)
    append_RPC = pd.DataFrame(win_RPC, columns=l_RPC)

    out_sys = pd.concat([d_sys,append_sys])
    out_namenode = pd.concat([d_namenode,append_namenode])
    out_FS = pd.concat([d_FS,append_FS])
    out_RPC = pd.concat([d_RPC,append_RPC])
    # ??fit
    ilf_sys.fit(out_sys)
    ilf_namenode.fit(out_namenode)
    ilf_FS.fit(out_FS)
    ilf_RPC.fit(out_RPC)

    print ilf_sys.predict(win_sys)
    print ilf_namenode.predict(win_namenode)
    print ilf_FS.predict(win_FS)
    print ilf_RPC.predict(win_RPC)

    return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC