def test_write_points_from_dataframe(self):
now = pd.Timestamp('1970-01-01 00:00+00:00')
dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
index=[now, now + timedelta(hours=1)],
columns=["column_one", "column_two",
"column_three"])
expected = (
b"foo column_one=\"1\",column_two=1i,column_three=1.0 0\n"
b"foo column_one=\"2\",column_two=2i,column_three=2.0 "
b"3600000000000\n"
)
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/write",
status_code=204)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, 'foo')
self.assertEqual(m.last_request.body, expected)
cli.write_points(dataframe, 'foo', tags=None)
self.assertEqual(m.last_request.body, expected)
python类DataFrameClient()的实例源码
def test_write_points_from_dataframe_with_period_index(self):
dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
index=[pd.Period('1970-01-01'),
pd.Period('1970-01-02')],
columns=["column_one", "column_two",
"column_three"])
expected = (
b"foo column_one=\"1\",column_two=1i,column_three=1.0 0\n"
b"foo column_one=\"2\",column_two=2i,column_three=2.0 "
b"86400000000000\n"
)
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/write",
status_code=204)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, "foo")
self.assertEqual(m.last_request.body, expected)
def test_get_list_database(self):
data = {'results': [
{'series': [
{'measurement': 'databases',
'values': [
['new_db_1'],
['new_db_2']],
'columns': ['name']}]}
]}
cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
with _mocked_session(cli, 'get', 200, json.dumps(data)):
self.assertListEqual(
cli.get_list_database(),
[{'name': 'new_db_1'}, {'name': 'new_db_2'}]
)
def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont):
ilf = IsolationForest(n_estimators=100, contamination=cont)
query = 'select * from ganglia where w_fs >0 and w_namenode>0 and w_rpc >0 limit 1024;' # ???? ???
client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
result = client.query(query, chunked=False)
data = result['ganglia']
d_sys = data[l_sys]
d_namenode = data[l_namenode]
d_FS = data[l_FS]
d_RPC = data[l_RPC]
ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
ilf_sys.fit(d_sys)
ilf_namenode.fit(d_namenode)
ilf_FS.fit(d_FS)
ilf_RPC.fit(d_RPC)
print "update finished"
return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC
def test_write_points_from_dataframe_in_batches(self):
now = pd.Timestamp('1970-01-01 00:00+00:00')
dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
index=[now, now + timedelta(hours=1)],
columns=["column_one", "column_two",
"column_three"])
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/write",
status_code=204)
cli = DataFrameClient(database='db')
self.assertTrue(cli.write_points(dataframe, "foo", batch_size=1))
def test_write_points_from_dataframe_with_tag_columns(self):
now = pd.Timestamp('1970-01-01 00:00+00:00')
dataframe = pd.DataFrame(data=[['blue', 1, "1", 1, 1.0],
['red', 0, "2", 2, 2.0]],
index=[now, now + timedelta(hours=1)],
columns=["tag_one", "tag_two", "column_one",
"column_two", "column_three"])
expected = (
b"foo,tag_one=blue,tag_two=1 "
b"column_one=\"1\",column_two=1i,column_three=1.0 "
b"0\n"
b"foo,tag_one=red,tag_two=0 "
b"column_one=\"2\",column_two=2i,column_three=2.0 "
b"3600000000000\n"
)
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/write",
status_code=204)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, 'foo',
tag_columns=['tag_one', 'tag_two'])
self.assertEqual(m.last_request.body, expected)
cli.write_points(dataframe, 'foo',
tag_columns=['tag_one', 'tag_two'], tags=None)
self.assertEqual(m.last_request.body, expected)
def test_write_points_from_dataframe_with_tag_cols_and_global_tags(self):
now = pd.Timestamp('1970-01-01 00:00+00:00')
dataframe = pd.DataFrame(data=[['blue', 1, "1", 1, 1.0],
['red', 0, "2", 2, 2.0]],
index=[now, now + timedelta(hours=1)],
columns=["tag_one", "tag_two", "column_one",
"column_two", "column_three"])
expected = (
b"foo,global_tag=value,tag_one=blue,tag_two=1 "
b"column_one=\"1\",column_two=1i,column_three=1.0 "
b"0\n"
b"foo,global_tag=value,tag_one=red,tag_two=0 "
b"column_one=\"2\",column_two=2i,column_three=2.0 "
b"3600000000000\n"
)
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/write",
status_code=204)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, 'foo',
tag_columns=['tag_one', 'tag_two'],
tags={'global_tag': 'value'})
self.assertEqual(m.last_request.body, expected)
def test_write_points_from_dataframe_with_tag_escaped(self):
now = pd.Timestamp('1970-01-01 00:00+00:00')
dataframe = pd.DataFrame(
data=[['blue', 1, "1", 1, 1.0, 'hot'],
['red,green=orange', 0, "2", 2, 2.0, 'cold']],
index=[now, now + timedelta(hours=1)],
columns=["tag_one", "tag_two", "column_one",
"column_two", "column_three",
"tag_three"])
expected_escaped_tags = (
b"foo,tag_one=blue "
b"column_one=\"1\",column_two=1i "
b"0\n"
b"foo,tag_one=red\\,green\\=orange "
b"column_one=\"2\",column_two=2i "
b"3600000000000\n"
)
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/write",
status_code=204)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, 'foo',
field_columns=['column_one', 'column_two'],
tag_columns=['tag_one'])
self.assertEqual(m.last_request.body, expected_escaped_tags)
def test_write_points_from_dataframe_fails_without_time_index(self):
dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
columns=["column_one", "column_two",
"column_three"])
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/db/db/series",
status_code=204)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, "foo")
def test_write_points_from_dataframe_fails_with_series(self):
now = pd.Timestamp('1970-01-01 00:00+00:00')
dataframe = pd.Series(data=[1.0, 2.0],
index=[now, now + timedelta(hours=1)])
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/db/db/series",
status_code=204)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, "foo")
def test_query_into_dataframe(self):
data = {
"results": [{
"series": [
{"measurement": "network",
"tags": {"direction": ""},
"columns": ["time", "value"],
"values":[["2009-11-10T23:00:00Z", 23422]]
},
{"measurement": "network",
"tags": {"direction": "in"},
"columns": ["time", "value"],
"values": [["2009-11-10T23:00:00Z", 23422],
["2009-11-10T23:00:00Z", 23422],
["2009-11-10T23:00:00Z", 23422]]
}
]
}]
}
pd1 = pd.DataFrame(
[[23422]], columns=['value'],
index=pd.to_datetime(["2009-11-10T23:00:00Z"]))
pd1.index = pd1.index.tz_localize('UTC')
pd2 = pd.DataFrame(
[[23422], [23422], [23422]], columns=['value'],
index=pd.to_datetime(["2009-11-10T23:00:00Z",
"2009-11-10T23:00:00Z",
"2009-11-10T23:00:00Z"]))
pd2.index = pd2.index.tz_localize('UTC')
expected = {
('network', (('direction', ''),)): pd1,
('network', (('direction', 'in'),)): pd2
}
cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
with _mocked_session(cli, 'GET', 200, data):
result = cli.query('select value from network group by direction;')
for k in expected:
assert_frame_equal(expected[k], result[k])
def test_query_with_empty_result(self):
cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
with _mocked_session(cli, 'GET', 200, {"results": [{}]}):
result = cli.query('select column_one from foo;')
self.assertEqual(result, {})
def test_datetime_to_epoch(self):
timestamp = pd.Timestamp('2013-01-01 00:00:00.000+00:00')
cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
self.assertEqual(
cli._datetime_to_epoch(timestamp),
1356998400.0
)
self.assertEqual(
cli._datetime_to_epoch(timestamp, time_precision='h'),
1356998400.0 / 3600
)
self.assertEqual(
cli._datetime_to_epoch(timestamp, time_precision='m'),
1356998400.0 / 60
)
self.assertEqual(
cli._datetime_to_epoch(timestamp, time_precision='s'),
1356998400.0
)
self.assertEqual(
cli._datetime_to_epoch(timestamp, time_precision='ms'),
1356998400000.0
)
self.assertEqual(
cli._datetime_to_epoch(timestamp, time_precision='u'),
1356998400000000.0
)
self.assertEqual(
cli._datetime_to_epoch(timestamp, time_precision='n'),
1356998400000000000.0
)
def connect(host=settings.INFLUX_HOST, port=settings.INFLUX_PORT):
user = settings.INFLUX_USER
password = settings.INFLUX_PASSWORD
dbname = settings.INFLUX_DBASE
client = DataFrameClient(host, port, user, password, dbname)
return client
def __init__(self, ip_or_url, port, username, password, database, series_name):
self._client = DataFrameClient(ip_or_url, port, username, password, database)
self._series_name = series_name
def _conn(self):
conn = DataFrameClient(self.host, self.port, self.user, self.password, self.db_name)
return conn
def analyseWarn(name,qname,topk=5):
client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
query_positive = 'select * from ganglia where '+qname+' >0 ORDER BY time DESC limit 10'#??10??????
query_negative = 'select * from ganglia where '+qname+' <0 ORDER BY time DESC limit 5'#????warnfactor??
data_p = client.query(query_positive, chunked=False)
data_positive = data_p['ganglia']
normalSample = data_positive[name]#??????
data_n = client.query(query_negative, chunked=False)
data_negative = data_n['ganglia']
anamolySample = data_negative[name]#??????
return analyseReasonWithTreeBaesd(anamolySample, normalSample, name)
def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont,limit):
ilf = IsolationForest(n_estimators=100, contamination=cont)
client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
#???
data_sys = sampleWithDecay(client,limit,'select * from ganglia where w_system >0 ORDER BY time DESC limit 1500')#????limit????????
d_sys = data_sys[l_sys]
data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC limit 1500')
d_FS = data_fs[l_FS]
data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC limit 1500')
d_namenode = data_namenode[l_namenode]
data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC limit 1500')
d_RPC = data_rpc[l_RPC]
ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
#?????????
ilf_sys.fit(d_sys)
ilf_namenode.fit(d_namenode)
ilf_FS.fit(d_FS)
ilf_RPC.fit(d_RPC)
print "update finished"
return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC
def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont,limit):
ilf = IsolationForest(n_estimators=100, contamination=cont)
client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
#???
data_sys = sampleWithDecay(client,limit,'select * from ganglia where w_system >0 ORDER BY time DESC')
d_sys = data_sys[l_sys]
data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC')
d_FS = data_fs[l_FS]
data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC')
d_namenode = data_namenode[l_namenode]
data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC')
d_RPC = data_rpc[l_RPC]
ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
#?????????
ilf_sys.fit(d_sys)
ilf_namenode.fit(d_namenode)
ilf_FS.fit(d_FS)
ilf_RPC.fit(d_RPC)
print "update finished"
return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC
def init(l_sys, l_namenode, l_FS, l_RPC, sleeptime=15, cont=0.01,limit = 300):
# ?????
ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
ilf_FS = IsolationForest(n_estimators=50, contamination=cont)
ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
#??????????
client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
data_sys = sampleWithDecay(client, limit, 'select * from ganglia where w_system >0 ORDER BY time DESC')
d_sys = data_sys[l_sys]
data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC')
d_FS = data_fs[l_FS]
data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC')
d_namenode = data_namenode[l_namenode]
data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC')
d_RPC = data_rpc[l_RPC]
print len(d_sys)
print len(d_FS)
print len(d_namenode)
print len(d_RPC)
# ??fit
ilf_sys.fit(d_sys)
ilf_namenode.fit(d_namenode)
ilf_FS.fit(d_FS)
ilf_RPC.fit(d_RPC)
print ilf_FS.predict(d_FS)
return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC
def test_write_points_from_dataframe_with_numeric_precision(self):
now = pd.Timestamp('1970-01-01 00:00+00:00')
# df with numeric column names
dataframe = pd.DataFrame(data=[["1", 1, 1.1111111111111],
["2", 2, 2.2222222222222]],
index=[now, now + timedelta(hours=1)])
expected_default_precision = (
b'foo,hello=there 0=\"1\",1=1i,2=1.11111111111 0\n'
b'foo,hello=there 0=\"2\",1=2i,2=2.22222222222 3600000000000\n'
)
expected_specified_precision = (
b'foo,hello=there 0=\"1\",1=1i,2=1.1111 0\n'
b'foo,hello=there 0=\"2\",1=2i,2=2.2222 3600000000000\n'
)
expected_full_precision = (
b'foo,hello=there 0=\"1\",1=1i,2=1.1111111111111 0\n'
b'foo,hello=there 0=\"2\",1=2i,2=2.2222222222222 3600000000000\n'
)
with requests_mock.Mocker() as m:
m.register_uri(requests_mock.POST,
"http://localhost:8086/write",
status_code=204)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, "foo", {"hello": "there"})
self.assertEqual(m.last_request.body, expected_default_precision)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, "foo", {"hello": "there"},
numeric_precision=4)
self.assertEqual(m.last_request.body, expected_specified_precision)
cli = DataFrameClient(database='db')
cli.write_points(dataframe, "foo", {"hello": "there"},
numeric_precision='full')
self.assertEqual(m.last_request.body, expected_full_precision)
def init(l_sys, l_namenode, l_FS, l_RPC, d, dwhite, winsize=200, sleeptime=15, cont=0.01):
# ????
win_sys = []
win_namenode = []
win_FS = []
win_RPC = []
while True:
print "fetching at %s" % ctime()
data = getdata()
loadvalue(data, d, dwhite)
o_sys, o_namenode, o_FS, o_RPC = extract(d, l_sys, l_namenode, l_FS, l_RPC)
# ??????????
win_sys.append(o_sys)
win_namenode.append(o_namenode)
win_FS.append(o_FS)
win_RPC.append(o_RPC)
if len(win_sys) > winsize: # ????????????
break
sleep(sleeptime)
# ?????
ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
#??????????
query = 'select * from ganglia where w_fs >0 and w_namenode>0 and w_rpc >0 limit 256;' #???? ???
client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
result = client.query(query, chunked=False)
data = result['ganglia']
d_sys = data[l_sys]
d_namenode = data[l_namenode]
d_FS = data[l_FS]
d_RPC = data[l_RPC]
#????????
append_sys = pd.DataFrame(win_sys,columns=l_sys)
append_namenode = pd.DataFrame(win_namenode, columns=l_namenode)
append_FS = pd.DataFrame(win_FS, columns=l_FS)
append_RPC = pd.DataFrame(win_RPC, columns=l_RPC)
out_sys = pd.concat([d_sys,append_sys])
out_namenode = pd.concat([d_namenode,append_namenode])
out_FS = pd.concat([d_FS,append_FS])
out_RPC = pd.concat([d_RPC,append_RPC])
# ??fit
ilf_sys.fit(out_sys)
ilf_namenode.fit(out_namenode)
ilf_FS.fit(out_FS)
ilf_RPC.fit(out_RPC)
#print out_sys
print ilf_sys.predict(win_sys)
print ilf_namenode.predict(win_namenode)
print ilf_FS.predict(win_FS)
print ilf_RPC.predict(win_RPC)
# ??????????????
return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC
def init(l_sys, l_namenode, l_FS, l_RPC, d, dwhite, winsize=200, sleeptime=15, cont=0.01,limit = 300):
win_sys = []
win_namenode = []
win_FS = []
win_RPC = []
while True:
print "fetching at %s" % ctime()
data = getdata()
loadvalue(data, d, dwhite)
o_sys, o_namenode, o_FS, o_RPC = extract(d, l_sys, l_namenode, l_FS, l_RPC)
# ??????????
win_sys.append(o_sys)
win_namenode.append(o_namenode)
win_FS.append(o_FS)
win_RPC.append(o_RPC)
if len(win_sys) > winsize: # ????????????
break
sleep(sleeptime)
# ?????
ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
#??????????
client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
data_sys = sampleWithDecay(client, limit, 'select * from ganglia where w_system >0 ORDER BY time DESC limit 1500')#??????
d_sys = data_sys[l_sys]
data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC limit 1500')
d_FS = data_fs[l_FS]
data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC limit 1500')
d_namenode = data_namenode[l_namenode]
data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC limit 1500')
d_RPC = data_rpc[l_RPC]
#????????
append_sys = pd.DataFrame(win_sys,columns=l_sys)
append_namenode = pd.DataFrame(win_namenode, columns=l_namenode)
append_FS = pd.DataFrame(win_FS, columns=l_FS)
append_RPC = pd.DataFrame(win_RPC, columns=l_RPC)
out_sys = pd.concat([d_sys,append_sys])
out_namenode = pd.concat([d_namenode,append_namenode])
out_FS = pd.concat([d_FS,append_FS])
out_RPC = pd.concat([d_RPC,append_RPC])
# ??fit
ilf_sys.fit(out_sys)
ilf_namenode.fit(out_namenode)
ilf_FS.fit(out_FS)
ilf_RPC.fit(out_RPC)
print ilf_sys.predict(win_sys)
print ilf_namenode.predict(win_namenode)
print ilf_FS.predict(win_FS)
print ilf_RPC.predict(win_RPC)
return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC