python类KNeighborsRegressor()的实例源码

test_bagging.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)
friedman_scores.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens
Output.py 文件源码 项目:ModelFlow 作者: yuezPrincetechs 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def fit(self,X,y):
        '''
        ??knn?????
        :param X: ??????dataframe???????????????
        :param y: ??????series??X???????????????????????
        :return:
        '''
        X=pd.DataFrame(X.copy())
        X=X.reset_index(drop=True)
        y=pd.Series(y.copy())
        y=y.reset_index(drop=True)
        self.means=y.mean()
        self.models={}
        for col in X.columns.tolist():
            if col in self.feature_cate:
                self.models[col]=y.groupby(X[col]).mean().to_dict()
            else:
                knn=KNeighborsRegressor(n_neighbors=self.n_neighbors)
                knn.fit(X[[col]],y)
                self.models[col]=copy.deepcopy(knn)
        return self
modeltest.py 文件源码 项目:strategy 作者: kanghua309 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
RegressionKNN.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,100)
                             }
                            ]


        reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true)
test_neighbors.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_kneighbors_regressor(n_samples=40,
                              n_features=5,
                              n_test_pts=10,
                              n_neighbors=3,
                              random_state=0):
    # Test k-neighbors regression
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X ** 2).sum(1))
    y /= y.max()

    y_target = y[:n_test_pts]

    weight_func = _weight_func

    for algorithm in ALGORITHMS:
        for weights in ['uniform', 'distance', weight_func]:
            knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                                weights=weights,
                                                algorithm=algorithm)
            knn.fit(X, y)
            epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
            y_pred = knn.predict(X[:n_test_pts] + epsilon)
            assert_true(np.all(abs(y_pred - y_target) < 0.3))
test_neighbors.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_KNeighborsRegressor_multioutput_uniform_weight():
    # Test k-neighbors in multi-output regression with uniform weight
    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    for algorithm, weights in product(ALGORITHMS, [None, 'uniform']):
        knn = neighbors.KNeighborsRegressor(weights=weights,
                                            algorithm=algorithm)
        knn.fit(X_train, y_train)

        neigh_idx = knn.kneighbors(X_test, return_distance=False)
        y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
                               for idx in neigh_idx])

        y_pred = knn.predict(X_test)

        assert_equal(y_pred.shape, y_test.shape)
        assert_equal(y_pred_idx.shape, y_test.shape)
        assert_array_almost_equal(y_pred, y_pred_idx)
test_neighbors.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_kneighbors_regressor_sparse(n_samples=40,
                                     n_features=5,
                                     n_test_pts=10,
                                     n_neighbors=5,
                                     random_state=0):
    # Test radius-based regression on sparse matrices
    # Like the above, but with various types of sparse matrices
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = ((X ** 2).sum(axis=1) < .25).astype(np.int)

    for sparsemat in SPARSE_TYPES:
        knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                            algorithm='auto')
        knn.fit(sparsemat(X), y)
        for sparsev in SPARSE_OR_DENSE:
            X2 = sparsev(X)
            assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
test_neighbors.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    for algorithm in ALGORITHMS:
        clf = neighbors.KNeighborsClassifier(n_neighbors=1,
                                             algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_array_equal(clf.predict(iris.data), iris.target)

        clf.set_params(n_neighbors=9, algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_true(np.mean(clf.predict(iris.data) == iris.target) > 0.95)

        rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
        rgs.fit(iris.data, iris.target)
        assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
                       0.95)
FittedFQI.py 文件源码 项目:HrlPy 作者: snakeztc 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def learn(self, experiences, max_iter=20):
        # experience is in (s, a, r, ns)
        states = experiences[:, 0:self.domain.state_space_dims]
        actions = experiences[:, self.domain.state_space_dims]
        rewards = experiences[:, self.domain.state_space_dims+1]
        next_states = experiences[:, self.domain.state_space_dims+2:]
        X = self.representation.phi_sa("root", states, actions)

        for i in range(0, max_iter):
            #old_qs = np.reshape(self.representation.Q("root", states, actions), (-1, 1))
            nqs = self.representation.Qs("root", next_states)
            best_nqs = np.reshape(np.amax(nqs, axis=1), (-1, 1))
            y = rewards+ self.domain.discount_factor * best_nqs
            #resd = np.mean(np.abs(y - old_qs))
            model = KNeighborsRegressor(n_neighbors=2, n_jobs=-1)
            model.fit(X, y)
            self.representation.models["root"] = model
            #print "Residual is " + str(resd)
modeltest.py 文件源码 项目:strategy 作者: kanghua309 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
RegressionKNN.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, isTrain):
        super(RegressionKNN, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create KNN regression object
        # first parameter is the K neighbors
        # 'uniform' assigns uniform weights to each neighbor
        # 'distance' assigns weights proportional to the inverse of the distance from the query point
        # default metric is euclidean distance
        self.regr = neighbors.KNeighborsRegressor(86, weights='distance')
models_actinf.py 文件源码 项目:smp_base 作者: x75 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, conf):
        """smpKNN.__init__

        init
        """
        smpModel.__init__(self, conf)

        self.fwd = KNeighborsRegressor(n_neighbors = self.n_neighbors)

        self.X_ = []
        self.y_ = []

        self.bootstrap()
main.py 文件源码 项目:coursera-machine-learning-yandex 作者: dstarcev 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def calculate(X, y):
    best_p, best_score = 0, -float('inf')
    kf = KFold(len(y), n_folds=5, shuffle=True, random_state=42)
    for p in numpy.linspace(1, 10, num=200):
        knr = KNeighborsRegressor(n_neighbors=5, weights='distance', p=p)
        score = max(cross_val_score(knr, X, y, cv=kf, scoring='mean_squared_error'))
        if score > best_score:
            best_score = score
            best_p = p

    return best_p, best_score
predictorWeekly.py 文件源码 项目:stock_prediction 作者: vishwajeetv 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def knnPredictor(df):

    dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df)
    corelationCoefficiantDictionary = {}
    corelationCoefficiantArray = []

    for k in range(1, 200, 1):
        knnModel = KNeighborsRegressor(n_neighbors=k)

        knnModel.fit(dataTrainX, dataTrainY)

        knnpredicted = knnModel.predict(dataTestX)
        corelationCoefficient = pearsonr(dataTestY, knnpredicted)
        corelationCoefficiantDictionary[k] = corelationCoefficient[0]
        corelationCoefficiantArray.append(corelationCoefficient[0])

    # plotter.plot(corelationCoefficiantArray)
    bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get)

    knnModelBest = KNeighborsRegressor(n_neighbors=bestK)
    knnModelBest.fit(dataTrainX, dataTrainY)
    print("K = ")
    print(bestK)
    print("Corelation Coeff:")
    print(corelationCoefficiantDictionary[bestK])

    knnpredictedBest = knnModelBest.predict(dataTestX)

    fig, ax = plotter.subplots()
    corelationCoefficient = pearsonr(dataTestY, knnpredictedBest)
    print(corelationCoefficient[0])
    ax.set_ylabel('Predicted KNN Weekly')
    ax.scatter(dataTestY, knnpredictedBest)
    ax.set_xlabel('Measured')
    plotter.show()
predictor.py 文件源码 项目:stock_prediction 作者: vishwajeetv 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def predictKnn(data, priceToPredict):
    corelationCoefficiantDictionary = {}
    corelationCoefficiantArray = []
    openingPriceTrain, openingPriceTest, closingPriceTrain, closingPriceTest = \
        data["openingPriceTrain"], data["openingPriceTest"], data["closingPriceTrain"], data["closingPriceTest"]

    for k in range( 1 , 100 , 1):
        neigh = KNeighborsRegressor(n_neighbors=k)
        #n = 7 best fits
        neigh.fit(openingPriceTrain, closingPriceTrain)

        closingPriceTestArray = np.reshape(closingPriceTest,-1)
        knnpr = neigh.predict(openingPriceTest)
        predictedArray = np.reshape(knnpr,-1)

        corelationCoefficient = pearsonr(closingPriceTestArray,predictedArray)
        corelationCoefficiantDictionary[k] = corelationCoefficient[0]
        corelationCoefficiantArray.append(corelationCoefficient[0])
    plotter.plot(corelationCoefficiantArray)
    # plotter.show()

    bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get)
    neighBest = KNeighborsRegressor(n_neighbors=bestK)
    neighBest.fit(openingPriceTrain, closingPriceTrain)
    openingPriceToPredict = np.array([priceToPredict])
    print("K = ")
    print(bestK)
    print(neighBest.predict(openingPriceToPredict))
friedman_memory.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)

    est = [ElasticNet(copy_X=False),
           Lasso(copy_X=False)]

    ens.add(est)
    ens.add(KNeighborsRegressor())

    return ens
friedman_memory.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def knn():
    """Fit KNN."""
    print("Fitting KNN...", end=" ", flush=True)
    time.sleep(SLEEP)
    t0 = time.time()
    knn = KNeighborsRegressor()
    knn.fit(X, y)
    print_time(t0, "Done", end="")
knn_scikit.py 文件源码 项目:Photometric-Redshifts 作者: martiansideofthemoon 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def knn_regression(K, training_data, labels, test_data, weights='distance'):
    knn = neighbors.KNeighborsRegressor(K, weights=weights)
    output = knn.fit(training_data, labels).predict(test_data)
    return output
prediction_utils.py 文件源码 项目:ML-Predictions 作者: ltfschoen 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def generate_model(self, regressor, qty_neighbors, algorithm, distance_type):
        """ Regressor Model Generation"""
        if regressor == "knn":
            return KNeighborsRegressor(n_neighbors=qty_neighbors, algorithm=algorithm, p=distance_type)
        elif regressor == "linear":
            return LinearRegression(fit_intercept=True) # copy_X=True, n_jobs=1, normalize=False
        elif regressor == "logistic":
            return LogisticRegression(class_weight='balanced')
tbs_ml.py 文件源码 项目:eezzy 作者: 3Blades 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def spot_check(X, y):
    if type == 'regression':
        models = [
        (LinearRegression(), 'Ordinary Least Squares'),
        (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
        (Ridge(), 'Ridge (alpha 1.0)'),
        (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
        (Lasso(), 'Lasso (alpha 1.0)'),
        (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
        (ElasticNet(), 'ElasticNet (alpha 1.0)'),
        (DecisionTreeRegressor(), 'Decision Tree'),
        (KNeighborsRegressor(), 'K-Nearest Neighbors'),

#         (RandomForestRegressor(), 'Random Forest Regressor'),
#         (BaggingRegressor(), 'Bagging Regressor'),
#         (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
#         (SVR(), 'Support Vector Regression')
    ]

    splits = 5
    scores = []

    for model, model_name in models:
        score = check_model(model, splits, X, y)
        # get average score
        scores.append(score)

    model_names = map(lambda x: x[1], models)
    for name, score in zip(model_names, scores):
        print('%s: %f' % (name, score))
model.py 文件源码 项目:poormining 作者: bowenpay 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """

        clf = KNeighborsRegressor(weights='uniform')
        clf.fit(X, Y)
        return clf
value.py 文件源码 项目:KerasRL 作者: aejax 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, S, A, n_neighbors=5, weights='uniform', algorithm='auto', metric='minkowski', memory_fit=100, memory_size=100, **kwargs):
        #assert self.lr_mode == 'constant', 'KNNQ is only compatible with constant learning rates.'
        self.S = S
        self.A = A
        self.states = deque([])
        self.targets = deque([])
        self.memory_fit = memory_fit
        self.memory_size = memory_size
        self.count = 0

        self.neigh = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm, metric=metric)

        super(KNNQ, self).__init__(**kwargs)
        self.update_mode = 'set'
functions.py 文件源码 项目:binet 作者: crisjf 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def calculatepRCA(data, y ='',c='',p='',x=''):
    '''
    Returns the pRCA from data. pRCA is the probability that (RCA_{y+1} > 1) given the volume of exports (x_{cpy}),
    and the 'baseline term' (\sum_c x_{cpy}  \sum_p x_{cpy} / \sum_c \sum_p x_{cpy}).
    It is computed using k-nearest neighbors, in the space of log exports and log baseline term.
    Parameters
    ----------
    data : pandas.DataFrame
        Raw data. It has source,target,volume (trade, number of people etc.).
    y,c,p,x : str (optional)
        Labels of the columns in data used for source,target,volume
    Returns
    -------
    RCA : pandas.DataFrame
        Table with the RCAs, with the columns c,p,x,RCA
        If shares is True it also includes:
            s_c : Share of X_cp over X_c
            s_p : Share of X_cp over X_p
    '''
    df = calculateRCA_by_year(data,y ='year',c='ccode',p='pcode',x='x',log_terms = True)

    #Compute (RCA > 1) next year and merge it
    df_ = df.copy()
    df_['year'] = df_['year'] - 1
    df_['RCA_y+1'] = (df_['log(RCA)'] > 0).astype(int)
    df_ = df_[['year','ccode','pcode','RCA_y+1']]
    df = df.merge(df_)

    #Prepare dataset for knn and fit
    M = df[['log(x)','T','RCA_y+1']].as_matrix()
    X, y = M[:,:2], M[:, 2] 
    knn = neighbors.KNeighborsRegressor(n_neighbors = 200, weights = 'uniform').fit(X, y)

    #To avoid memory error, compute predictions in split X. Predictions are output pRCA
    pRCA = np.array([])
    for x in np.array_split(X, 10):
        pRCA = np.append(pRCA, knn.predict(x))
    df['pRCA'] = pRCA

    return df
actinf_models.py 文件源码 项目:actinf 作者: x75 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __init__(self, idim = 1, odim = 1):
        self.fwd = KNeighborsRegressor(n_neighbors=5)
        ActInfModel.__init__(self, idim, odim)

        self.X_ = []
        self.y_ = []

        self.bootstrap()
regression.py 文件源码 项目:stacking 作者: ikki407 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def build_model(self):
            return KNeighborsRegressor(**self.params)
4.2 KNN regressor.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_KNeighborsRegressor(*data):
    '''
    test the KNN regressor
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    regr=neighbors.KNeighborsRegressor()
    regr.fit(X_train,y_train)
    print("Training Score:{0}".format(regr.score(X_train,y_train)))
    print("Testing Score:{0}".format(regr.score(X_test,y_test)))
4.2 KNN regressor.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_KNeighborsRegressor_k_w(*data):
    '''
    test the performance with different n_neighbors and weights
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
    weights=['uniform','distance']

    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ### graph
    for weight in weights:
        training_scores=[]
        testing_scores=[]
        for K in Ks:
            regr=neighbors.KNeighborsRegressor(weights=weight,n_neighbors=K)
            regr.fit(X_train,y_train)
            testing_scores.append(regr.score(X_test,y_test))
            training_scores.append(regr.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label="testing score:weight={0}".format(weight))
        ax.plot(Ks,training_scores,label="training score:weight={0}".format(weight))
    ax.legend(loc='best')
    ax.set_xlabel("K")
    ax.set_ylabel("score")
    ax.set_ylim(0,1.05)
    ax.set_title("KNeighborsRegressor")
    plt.show()
4.2 KNN regressor.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_KNeighborsRegressor_k_p(*data):
    '''
    test the performance with different n_neighbors and p
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int')
    Ps=[1,2,10]

    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ### graph
    for P in Ps:
        training_scores=[]
        testing_scores=[]
        for K in Ks:
            regr=neighbors.KNeighborsRegressor(p=P,n_neighbors=K)
            regr.fit(X_train,y_train)
            testing_scores.append(regr.score(X_test,y_test))
            training_scores.append(regr.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label="testing score:p={0}".format(P))
        ax.plot(Ks,training_scores,label="training score:p={0}".format(P))
    ax.legend(loc='best')
    ax.set_xlabel("K")
    ax.set_ylabel("score")
    ax.set_ylim(0,1.05)
    ax.set_title("KNeighborsRegressor")
    plt.show()
rice_run.py 文件源码 项目:Black-Swan 作者: 12190143 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def knn(train_sample, validation_sample, features, seed):
    log_base = np.e
    knn_est = KNeighborsRegressor(n_neighbors=1, weights='distance', algorithm='auto', leaf_size=30,
                                  p=1).fit(
        train_sample[features], np.log1p(train_sample['volume']) / np.log(log_base))
    knn_prob = np.power(log_base, knn_est.predict(validation_sample[features])) - 1
    print_mape(validation_sample['volume'], knn_prob, 'KNN')
    return knn_prob


问题


面经


文章

微信
公众号

扫码关注公众号