python类IsolationForest()的实例源码

house_prices.py 文件源码 项目:HousePrices 作者: MizioAnd 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def outlier_prediction(x_train, y_train):
        # Use built-in isolation forest or use predicted vs. actual
        # Compute squared residuals of every point
        # Make a threshold criteria for inclusion

        # The prediction returns 1 if sample point is inlier. If outlier prediction returns -1
        rng = np.random.RandomState(42)
        clf_all_features = IsolationForest(max_samples=100, random_state=rng)
        clf_all_features.fit(x_train)

        # Predict if a particular sample is an outlier using all features for higher dimensional data set.
        y_pred_train = clf_all_features.predict(x_train)

        # Exclude suggested outlier samples for improvement of prediction power/score
        outlier_map_out_train = np.array(map(lambda x: x == 1, y_pred_train))
        x_train_modified = x_train[outlier_map_out_train, ]
        y_train_modified = y_train[outlier_map_out_train, ]

        return x_train_modified, y_train_modified
OD_numpy_buf.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def updateWindow(window,buf,maxContainSize):
    if len(buf) >= maxContainSize:#??buf??
        print "buffer full "
        window = clusteringReminMost(window)
        print "window size after clustering without adding buffer :",len(window)
        for i in buf:
            window.append(i)
        ilf = IsolationForest(n_estimators=60)
        ilf.fit(window)
        print "isolation update finished"

    else:                       #???????buf????
        print "higher than threads"
        for i in buf:
            window.append(i)
        ilf = IsolationForest(n_estimators=60)
        ilf.fit(window)
        print "isolation update finished"
    return window,ilf
OD_numpy_buf.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def init(idlist,d,dblack,outcome,winsize=200,sleeptime = 5):
    #????
    window =  []
    while True:
        print "fetching at %s" %ctime()
        data = getdata()
        loadvalue(data, d,dblack)
        outvalue = extract(d,idlist)
        window.append(outvalue)
        if len(window) > winsize:
            break
        sleep(sleeptime)
    #?????
    ilf = IsolationForest(n_estimators=60)
    ilf.fit(window)
    print ilf.predict(window)
    for i in ilf.predict(window):
        outcome.append(i)
    #??
    return ilf,window
onlinedetectWithlittleData.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def updateWindow(window,buf,maxContainSize):
    if len(buf) >= maxContainSize:#??buf??
        print "buffer full "
        window = clusteringReminMost(window)
        print "window size after clustering without adding buffer :",len(window)
        for i in buf:
            window = window.append(i)
        ilf = IsolationForest(n_estimators=100,verbose=2,)
        ilf.fit(window)
        print "isolation update finished"

    else:                       #???????buf????
        print "higher than threads"
        for i in buf:
            window = window.append(i)
        ilf = IsolationForest(n_estimators=100,verbose=2,)
        ilf.fit(window)
        print "isolation update finished"
    return window,ilf
birchForChangeWindowSize.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def updateWindow(window,buf,maxContainSize):
    if len(buf) >= maxContainSize:#??buf??
        print window################################################
        print "buffer full "
        window = clusteringReminMost(window)
        print "window size after clustering without adding buffer :",len(window)
        for i in buf:
            window.append(i)
            #print i
        ilf = IsolationForest(n_estimators=100)
        ilf.fit(window)
        print "isolation update finished"

    else:                       #???????buf????
        print "higher than threads"
        for i in buf:
            window.append(i)
        ilf = IsolationForest(n_estimators=100)
        ilf.fit(window)
        print "isolation update finished"
    return window,ilf
onlinedetect.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def updateWindow(window,buf,maxContainSize):
    if len(buf) >= maxContainSize:#??buf??
        print "buffer full "
        window = clusteringReminMost(window)
        print "window size after clustering without adding buffer :",len(window)
        for i in buf:
            window.append(i)
        ilf = IsolationForest(n_estimators=100,verbose=2,)
        ilf.fit(window)
        print "isolation update finished"

    else:                       #???????buf????
        print "higher than threads"
        for i in buf:
            window.append(i)
        ilf = IsolationForest(n_estimators=100,verbose=2,)
        ilf.fit(window)
        print "isolation update finished"
    return window,ilf
OD_V0.0.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def updateWindow(window,buf,maxContainSize):
    if len(buf) >= maxContainSize:#??buf??
        print "buffer full "
        for i in buf:
            window.append(i)
        ilf = IsolationForest(n_estimators=100,contamination=0.01)
        ilf.fit(window)
        print "isolation update finished"

    else:                       #???????buf????
        print "higher than threads"
        for i in buf:
            window.append(i)
        ilf = IsolationForest(n_estimators=100,contamination=0.01)
        ilf.fit(window)
        print "isolation update finished"
    return window,ilf
OD_V0.0.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def init(idlist,d,dblack,outcome,winsize=200,sleeptime = 5):
    #????
    window =  []
    while True:
        print "fetching at %s" %ctime()
        data = getdata()
        loadvalue(data, d,dblack)
        outvalue = extract(d,idlist)
        window.append(outvalue)
        if len(window) > winsize:
            break
        sleep(sleeptime)
    #?????
    ilf = IsolationForest(n_estimators=100,contamination=0.01)
    ilf.fit(window)
    print ilf.predict(window)
    for i in ilf.predict(window):
        outcome.append(i)
    #??
    return ilf,window
v1.1.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont):
    ilf = IsolationForest(n_estimators=100, contamination=cont)
    query = 'select * from ganglia where w_fs >0 and w_namenode>0 and w_rpc >0 limit 1024;'  # ???? ???
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
    result = client.query(query, chunked=False)
    data = result['ganglia']
    d_sys = data[l_sys]
    d_namenode = data[l_namenode]
    d_FS = data[l_FS]
    d_RPC = data[l_RPC]

    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)

    ilf_sys.fit(d_sys)
    ilf_namenode.fit(d_namenode)
    ilf_FS.fit(d_FS)
    ilf_RPC.fit(d_RPC)

    print "update finished"
    return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC
preprocessing.py 文件源码 项目:Bacchus 作者: surfstudio 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def transform(self, X, **transform_params):
        if X.shape[0] < 1/self.contamination:
            return X
        self.isolation_forest = IsolationForest(contamination=self.contamination,
                                                n_estimators=self.n_estimators,
                                                n_jobs=self.n_jobs)
        to_analyze = self._columns_to_apply(X)
        if to_analyze is None:
            to_analyze = self._numeric_columns(X)
        rest = self._rest_columns(X, to_analyze)
        self.isolation_forest.fit(to_analyze)
        labels = self.isolation_forest.predict(to_analyze)
        to_analyze['_outlier'] = labels; to_analyze = to_analyze[to_analyze['_outlier'] == 1];
        del(to_analyze['_outlier'])
        rest['_outlier'] = labels; rest = rest[rest['_outlier'] == 1]; del(rest['_outlier'])
        if self.verbose:
            print('%s Now has %s' % (self.class_name, to_analyze.shape[0]))
        return pd.concat((to_analyze, rest), axis=1)
test_iforest.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_iforest_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data

    # Test max_samples
    assert_raises(ValueError,
                  IsolationForest(max_samples=-1).fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples=0.0).fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples=2.0).fit, X)
    # The dataset has less than 256 samples, explicitly setting max_samples > n_samples
    # should result in a warning. If not set explicitly there should be no warning
    assert_warns_message(UserWarning,
                         "max_samples will be set to n_samples for estimation",
                         IsolationForest(max_samples=1000).fit, X)
    assert_no_warnings(IsolationForest(max_samples='auto').fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples='foobar').fit, X)
test_iforest.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_iforest_performance():
    """Test Isolation Forest performs well"""

    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)
    X_train = np.r_[X + 2, X - 2]
    X_train = X[:100]

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X[100:], X_outliers]
    y_test = np.array([0] * 20 + [1] * 20)

    # fit the model
    clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)

    # predict scores (the lower, the more normal)
    y_pred = clf.predict(X_test)

    # check that there is at most 6 errors (false positive or false negative)
    assert_greater(roc_auc_score(y_test, y_pred), 0.98)
isolation_forest.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __init__(self, _id, _config):
        super(IsolationForest, self).__init__(_id, _config)
        self._nb_samples = int(_config['nb_samples'])
isolation_forest.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_default_config():
        return {
            'module': IsolationForest.__name__,
            'nb_samples': N_SAMPLES
        }
isolation_forest.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _get_best_detector(self, train):
        detector = ensemble.IsolationForest()
        detector.fit(train)
        return detector
test_isolation_forest.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def setUp(self):
        super(TestIsolationForest, self).setUp()
        self.if_sml = isolation_forest.IsolationForest(
            "fakeid", {"module": "fake", "nb_samples": 1000})
test_isolation_forest.py 文件源码 项目:monasca-analytics 作者: openstack 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_learn_structure(self):
        data = self.get_testing_data()
        clf = self.if_sml.learn_structure(data)
        self.assertIsInstance(clf, ensemble.IsolationForest)
User_Interface.py 文件源码 项目:yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def exportPresentationData(classifier,action):
        dir = input('Give Data Directory: ')

        if int(classifier)==1:
                clf = GradientBoostingClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 2:
                clf = LogisticRegression()
                classify(dir,clf,action)
        elif int(classifier) == 3:
                clf = KNeighborsClassifier(n_neighbors=5)
                classify(dir,clf,action)
        elif int(classifier) == 4:
                clf = DecisionTreeClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 5:
                clf = svm.LinearSVC()
                classify_type2(dir,clf,action)
        elif int(classifier) == 6:
                clf = RandomForestClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 7:
                clf = ExtraTreesClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 8:
                clf = IsolationForest()
                classify_type2(dir,clf,action)
        elif int(classifier) == 9:
                clf = AdaBoostClassifier(n_estimators=100)
                classify(dir,clf,action)
        elif int(classifier) == 10:
                clf = BaggingClassifier(DecisionTreeClassifier())
                classify(dir,clf,action)
        elif int(classifier) == 11:
                clf1 = GradientBoostingClassifier()
                clf2 = AdaBoostClassifier()
                clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
                classify(dir,clf,action)
Exporter.py 文件源码 项目:yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def exportPresentationData(classifier,action,dir):

        if int(classifier)==1:
                clf = GradientBoostingClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 2:
                clf = LogisticRegression()
                classify(dir,clf,action)
        elif int(classifier) == 3:
                clf = KNeighborsClassifier(n_neighbors=5)
                classify(dir,clf,action)
        elif int(classifier) == 4:
                clf = DecisionTreeClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 5:
                clf = svm.LinearSVC()
                classify_type2(dir,clf,action)
        elif int(classifier) == 6:
                clf = RandomForestClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 7:
                clf = ExtraTreesClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 8:
                clf = IsolationForest()
                classify_type2(dir,clf,action)
        elif int(classifier) == 9:
                clf = AdaBoostClassifier(n_estimators=100)
                classify(dir,clf,action)
        elif int(classifier) == 10:
                clf = BaggingClassifier(DecisionTreeClassifier())
                classify(dir,clf,action)
        elif int(classifier) == 11:
                clf1 = GradientBoostingClassifier()
                clf2 = AdaBoostClassifier()
                clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
                classify(dir,clf,action)
onlinedetectWithlittleData.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def init(idlist,d,dblack,winsize=50):

    data = getdata()
    loadvalue(data, d,dblack)
    outvalue = extract(d,idlist)
    print len(outvalue)
    reshapevalue = np.array(outvalue).reshape(1,-1) 
    window =  DataFrame(reshapevalue)
    buf = []#################
    while True:
        print "fetching at %s" %ctime()
        data = getdata()
        loadvalue(data, d,dblack)
        outvalue = extract(d,idlist)
        reshapevalue = np.array(outvalue).reshape(1,-1) 
        window = window.append(DataFrame(reshapevalue))#??dataframe???1row * xcolums
        buf.append(DataFrame(reshapevalue))
        print len(window)
        if len(window) > winsize:
            break
        sleep(5)
    ilf = IsolationForest(n_estimators=100,verbose=2,)
    ilf.fit(window)
    print ilf.predict(window)
    print "__________________"
    for i in buf:
        print ilf.predict(i)
    return ilf,window
v0.4.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def updateWindow(buf, cont):
    ilf = IsolationForest(n_estimators=100, contamination=cont)
    ilf.fit(buf)  # ??buf??????
    print "isolation update finished"
    return ilf
v0.4.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def init(l_sys, l_namenode, l_FS, l_RPC, d, dwhite, winsize=200, sleeptime=15, cont=0.01):
    # ????
    win_sys = []
    win_namenode = []
    win_FS = []
    win_RPC = []
    while True:
        print "fetching at %s" % ctime()
        data = getdata()
        loadvalue(data, d, dwhite)
        o_sys, o_namenode, o_FS, o_RPC = extract(d, l_sys, l_namenode, l_FS, l_RPC)
        # ??????????
        win_sys.append(o_sys)
        win_namenode.append(o_namenode)
        win_FS.append(o_FS)
        win_RPC.append(o_RPC)

        if len(win_sys) > winsize:  # ????????????
            break
        sleep(sleeptime)
    # ?????
    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)

    # ??fit
    ilf_sys.fit(win_sys)
    ilf_namenode.fit(win_namenode)
    ilf_FS.fit(win_FS)
    ilf_RPC.fit(win_RPC)

    print ilf_sys.predict(win_sys)
    print ilf_namenode.predict(win_namenode)
    print ilf_FS.predict(win_FS)
    print ilf_RPC.predict(win_RPC)

    # ??????????????
    return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC
v0.3.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def updateWindow(buf,cont):

    ilf = IsolationForest(n_estimators=100,contamination=cont)
    ilf.fit(buf)#??buf??????
    print "isolation update finished"
    return ilf
v0.3.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def init(l_sys,l_namenode,l_FS,l_RPC,l_queue,d,dwhite,winsize=200,sleeptime = 15,cont=0.01):
    #????
    win_sys =  []
    win_namenode = []
    win_FS = []
    win_RPC =[]
    win_queue = []

    while True:
        print "fetching at %s" %ctime()
        data = getdata()
        loadvalue(data, d,dwhite)
        o_sys,o_namenode,o_FS,o_RPC,o_queue  = extract(d,l_sys,l_namenode,l_FS,l_RPC,l_queue)
        #??????????
        win_sys.append(o_sys)
        win_namenode.append(o_namenode)
        win_FS.append(o_FS)
        win_RPC.append(o_RPC)
        win_queue.append(o_queue)

        if len(win_sys) > winsize:#????????????
            break
        sleep(sleeptime)
    #?????
    ilf_sys = IsolationForest(n_estimators=100,contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100,contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100,contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100,contamination=cont)
    ilf_queue = IsolationForest(n_estimators=100,contamination=cont)
    #??fit
    ilf_sys.fit(win_sys)
    ilf_namenode.fit(win_namenode)
    ilf_FS.fit(win_FS)
    ilf_RPC.fit(win_RPC)
    ilf_queue.fit(win_queue)
    #??????????????
    return ilf_sys,ilf_namenode,ilf_FS,ilf_queue,ilf_RPC
v0.1.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def updateWindow(buf,cont):

    ilf = IsolationForest(n_estimators=100,contamination=cont)
    ilf.fit(buf)#??buf??????
    print "isolation update finished"
    return ilf
v1.3.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont,limit):
    ilf = IsolationForest(n_estimators=100, contamination=cont)
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
    #???
    data_sys = sampleWithDecay(client,limit,'select * from ganglia where w_system >0 ORDER BY time DESC limit 1500')#????limit????????
    d_sys = data_sys[l_sys]

    data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC limit 1500')
    d_FS = data_fs[l_FS]

    data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC limit 1500')
    d_namenode = data_namenode[l_namenode]

    data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC limit 1500')
    d_RPC = data_rpc[l_RPC]

    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #?????????
    ilf_sys.fit(d_sys)
    ilf_namenode.fit(d_namenode)
    ilf_FS.fit(d_FS)
    ilf_RPC.fit(d_RPC)

    print "update finished"
    return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC
simulatev1.3.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def updateWindow(l_sys, l_namenode, l_FS, l_RPC,cont,limit):
    ilf = IsolationForest(n_estimators=100, contamination=cont)
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')
    #???
    data_sys = sampleWithDecay(client,limit,'select * from ganglia where w_system >0 ORDER BY time DESC')
    d_sys = data_sys[l_sys]

    data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC')
    d_FS = data_fs[l_FS]

    data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC')
    d_namenode = data_namenode[l_namenode]

    data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC')
    d_RPC = data_rpc[l_RPC]

    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=100, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #?????????
    ilf_sys.fit(d_sys)
    ilf_namenode.fit(d_namenode)
    ilf_FS.fit(d_FS)
    ilf_RPC.fit(d_RPC)

    print "update finished"
    return ilf_sys,ilf_namenode,ilf_FS,ilf_RPC
simulatev1.3.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def init(l_sys, l_namenode, l_FS, l_RPC, sleeptime=15, cont=0.01,limit = 300):
    # ?????
    ilf_sys = IsolationForest(n_estimators=100, contamination=cont)
    ilf_namenode = IsolationForest(n_estimators=100, contamination=cont)
    ilf_FS = IsolationForest(n_estimators=50, contamination=cont)
    ilf_RPC = IsolationForest(n_estimators=100, contamination=cont)
    #??????????
    client = DataFrameClient(host='127.0.0.1', port=8086, username='root', password='root', database='testdb')

    data_sys = sampleWithDecay(client, limit, 'select * from ganglia where w_system >0 ORDER BY time DESC')
    d_sys = data_sys[l_sys]

    data_fs = sampleWithDecay(client, limit, 'select * from ganglia where w_fs >0 ORDER BY time DESC')
    d_FS = data_fs[l_FS]

    data_namenode = sampleWithDecay(client, limit, 'select * from ganglia where w_namenode >0 ORDER BY time DESC')
    d_namenode = data_namenode[l_namenode]

    data_rpc = sampleWithDecay(client, limit, 'select * from ganglia where w_rpc >0 ORDER BY time DESC')
    d_RPC = data_rpc[l_RPC]


    print len(d_sys)
    print len(d_FS)
    print len(d_namenode)
    print len(d_RPC)
    # ??fit
    ilf_sys.fit(d_sys)
    ilf_namenode.fit(d_namenode)
    ilf_FS.fit(d_FS)
    ilf_RPC.fit(d_RPC)

    print ilf_FS.predict(d_FS)

    return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC
v0.2.py 文件源码 项目:onlineDetectForHadoop 作者: DawnsonLi 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def updateWindow(buf,cont):

    ilf = IsolationForest(n_estimators=100,contamination=cont)
    ilf.fit(buf)#??buf??????
    print "isolation update finished"
    return ilf
test_iforest.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_iforest():
    """Check Isolation Forest for various parameter settings."""
    X_train = np.array([[0, 1], [1, 2]])
    X_test = np.array([[2, 1], [1, 1]])

    grid = ParameterGrid({"n_estimators": [3],
                          "max_samples": [0.5, 1.0, 3],
                          "bootstrap": [True, False]})

    with ignore_warnings():
        for params in grid:
            IsolationForest(random_state=rng,
                            **params).fit(X_train).predict(X_test)


问题


面经


文章

微信
公众号

扫码关注公众号