python类gaussian_kde()的实例源码

density.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def kde_scipy(data, grid, **kwargs):
    """
    Kernel Density Estimation with Scipy

    Parameters
    ----------
    data : numpy.array
        Data points used to compute a density estimator. It
        has `n x p` dimensions, representing n points and p
        variables.
    grid : numpy.array
        Data points at which the desity will be estimated. It
        has `m x p` dimensions, representing m points and p
        variables.

    Returns
    -------
    out : numpy.array
        Density estimate. Has `m x 1` dimensions
    """
    kde = gaussian_kde(data.T, **kwargs)
    return kde.evaluate(grid.T)
graph_ABC.py 文件源码 项目:abcpy 作者: eth-cscs 项目源码 文件源码 阅读 54 收藏 0 点赞 0 评论 0
def plot(samples, path = None, true_value = 5, title = 'ABC posterior'): 
    Bayes_estimate = np.mean(samples, axis = 0)
    theta = true_value
    xmin, xmax = max(samples[:,0]), min(samples[:,0])
    positions = np.linspace(xmin, xmax, samples.shape[0])
    gaussian_kernel = gaussian_kde(samples[:,0].reshape(samples.shape[0],))
    values = gaussian_kernel(positions)
    plt.figure()
    plt.plot(positions,gaussian_kernel(positions))
    plt.plot([theta, theta],[min(values), max(values)+.1*(max(values)-min(values))])
    plt.plot([Bayes_estimate, Bayes_estimate],[min(values), max(values)+.1*(max(values)-min(values))])
    plt.ylim([min(values), max(values)+.1*(max(values)-min(values))])
    plt.xlabel(r'$\theta$')
    plt.ylabel('density')
    #plt.xlim([0,1])
    plt.rc('axes', labelsize=15) 
    plt.legend(loc='best', frameon=False, numpoints=1)
    font = {'size'   : 15}
    plt.rc('font', **font)
    plt.title(title)
    if path is not None :
        plt.savefig(path)
    return plt
lnn.py 文件源码 项目:lnn 作者: wgao9 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def KDE_entropy(x):
    '''
        Estimate the entropy H(X) from samples {x_i}_{i=1}^N
        Using Kernel Density Estimator (KDE) and resubstitution

        Input: x: 2D list of size N*d_x

        Output: one number of H(X)
    '''

    N = len(x)
    d = len(x[0])
    local_est = np.zeros(N)
    for i in range(N):
        kernel = sst.gaussian_kde(x.transpose())
        local_est[i] = kernel.evaluate(x[i].transpose())
    return -np.mean(map(log,local_est))
model.py 文件源码 项目:5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def kde_opt2c(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"] + df["hour"] / 24.
        df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        return df_new
    logging.info("train kde_opt2c model")
    df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
    df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
    n_class = len(np.unique(y_train))
    y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
    for i in range(n_class):
        X = df_cell_train_feats_kde[y_train == i]
        y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
        for feat in df_cell_train_feats_kde.columns.values:
            X_feat = X[feat].values
            kde = gaussian_kde(X_feat, "scott")
            y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
        y_test_pred[:, i] += y_test_pred_i
    return y_test_pred
model.py 文件源码 项目:5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def kde_opt3c(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"] + df["hour"] / 24.
        df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        return df_new
    logging.info("train kde_opt3c model")
    df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
    df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
    n_class = len(np.unique(y_train))
    y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
    for i in range(n_class):
        X = df_cell_train_feats_kde[y_train == i]
        y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
        for feat in df_cell_train_feats_kde.columns.values:
            X_feat = X[feat].values
            kde = gaussian_kde(X_feat, "scott")
            kde = gaussian_kde(X_feat, kde.factor * 0.741379)
            y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
        y_test_pred[:, i] += y_test_pred_i
    return y_test_pred
utils.py 文件源码 项目:lens 作者: ASIDataScience 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _scipy_bivariate_kde(x, y, bw, gridsize, cut, clip):
    """Compute a bivariate kde using scipy."""
    data = np.c_[x, y]
    kde = stats.gaussian_kde(data.T)
    data_std = data.std(axis=0, ddof=1)
    if isinstance(bw, string_types):
        bw = "scotts" if bw == "scott" else bw
        bw_x = getattr(kde, "%s_factor" % bw)() * data_std[0]
        bw_y = getattr(kde, "%s_factor" % bw)() * data_std[1]
    elif np.isscalar(bw):
        bw_x, bw_y = bw, bw
    else:
        msg = ("Cannot specify a different bandwidth for each dimension "
               "with the scipy backend. You should install statsmodels.")
        raise ValueError(msg)
    x_support = _kde_support(data[:, 0], bw_x, gridsize, cut, clip[0])
    y_support = _kde_support(data[:, 1], bw_y, gridsize, cut, clip[1])
    xx, yy = np.meshgrid(x_support, y_support)
    z = kde([xx.ravel(), yy.ravel()]).reshape(xx.shape)
    return xx, yy, z
plotting.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
              column_num=None, stacking_id=None, **kwds):
        from scipy.stats import gaussian_kde
        from scipy import __version__ as spv

        y = remove_na(y)

        if LooseVersion(spv) >= '0.11.0':
            gkde = gaussian_kde(y, bw_method=bw_method)
        else:
            gkde = gaussian_kde(y)
            if bw_method is not None:
                msg = ('bw_method was added in Scipy 0.11.0.' +
                       ' Scipy version in use is %s.' % spv)
                warnings.warn(msg)

        y = gkde.evaluate(ind)
        lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
        return lines
rplot.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def work(self, fig=None, ax=None):
        """Draw a one dimensional kernel density plot.
        You can specify either a figure or an axis to draw on.

        Parameters:
        -----------
        fig: matplotlib figure object
        ax: matplotlib axis object to draw on

        Returns:
        --------
        fig, ax: matplotlib figure and axis objects
        """
        if ax is None:
            if fig is None:
                return fig, ax
            else:
                ax = fig.gca()
        from scipy.stats import gaussian_kde
        x = self.data[self.aes['x']]
        gkde = gaussian_kde(x)
        ind = np.linspace(x.min(), x.max(), 200)
        ax.plot(ind, gkde.evaluate(ind))
        return fig, ax
bam_accuracy.py 文件源码 项目:wub 作者: nanoporetech 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def estimate_mode(acc):
    """ Estimate the mode of a set of float values between 0 and 1.

    :param acc: Data.
    :returns: The mode of the sample
    :rtype: float
    """
    # Taken from sloika.
    if len(acc) > 1:
        da = gaussian_kde(acc)
        optimization_result = minimize_scalar(lambda x: -da(x), bounds=(0, 1), method='Bounded')
        if optimization_result.success:
            try:
                mode = optimization_result.x[0]
            except IndexError:
                mode = optimization_result.x
        else:
            sys.stderr.write("Mode computation failed")
            mode = 0
    else:
        mode = acc[0]
    return mode
qa_graphs.py 文件源码 项目:ndmg 作者: neurodata 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def density(data, nbins=500, rng=None):
    """
    Computes density for metrics which return vectors

    Required parameters:
        data:
            - Dictionary of the vectors of data
    """
    density = OrderedDict()
    xs = OrderedDict()
    for subj in data:
        hist = np.histogram(data[subj], nbins)
        hist = np.max(hist[0])
        dens = gaussian_kde(data[subj])
        if rng is not None:
            xs[subj] = np.linspace(rng[0], rng[1], nbins)
        else:
            xs[subj] = np.linspace(0, np.max(data[subj]), nbins)
        density[subj] = dens.evaluate(xs[subj])*np.max(data[subj]*hist)
    return {"xs": xs, "pdfs": density}
priors.py 文件源码 项目:openml-pimp 作者: janvanrijn 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, hyperparameter, param_name, data, oob_strategy='resample', bandwith=0.4):
        if oob_strategy not in ['resample', 'round', 'ignore']:
            raise ValueError()
        self.oob_strategy = oob_strategy
        self.param_name = param_name
        self.hyperparameter = hyperparameter
        reshaped = np.reshape(data, (len(data), 1))

        if self.hyperparameter.log:
            if isinstance(self.hyperparameter, UniformIntegerHyperparameter):
                # self.probabilities = {val: self.distrib.pdf(np.log2(val)) for val in range(self.hyperparameter.lower, self.hyperparameter.upper)}
                raise ValueError('Log Integer hyperparameter not supported: %s' %param_name)
            # self.distrib = gaussian_kde(np.log2(data))
            # self.distrib = KernelDensity(kernel='gaussian').fit(np.log2(np.reshape(data, (len(data), 1))))
            self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(np.log2(reshaped))
        else:
            # self.distrib = gaussian_kde(data)
            self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(reshaped)
        pass
test_ball_tree.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_gaussian_kde(n_samples=1000):
    # Compare gaussian KDE results to scipy.stats.gaussian_kde
    from scipy.stats import gaussian_kde
    np.random.seed(0)
    x_in = np.random.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        bt = BallTree(x_in[:, None])
        try:
            gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
        except TypeError:
            raise SkipTest("Old version of scipy, doesn't accept "
                           "explicit bandwidth.")

        dens_bt = bt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_array_almost_equal(dens_bt, dens_gkde, decimal=3)
test_kd_tree.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 46 收藏 0 点赞 0 评论 0
def test_gaussian_kde(n_samples=1000):
    # Compare gaussian KDE results to scipy.stats.gaussian_kde
    from scipy.stats import gaussian_kde
    np.random.seed(0)
    x_in = np.random.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        kdt = KDTree(x_in[:, None])
        try:
            gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
        except TypeError:
            raise SkipTest("Old scipy, does not accept explicit bandwidth.")

        dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
metrics.py 文件源码 项目:bnn-analysis 作者: myshkov 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def kl_divergence(p_samples, q_samples):
    # estimate densities
    # p_samples = np.nan_to_num(p_samples)
    # q_samples = np.nan_to_num(q_samples)

    if isinstance(p_samples, tuple):
        idx, p_samples = p_samples

        if idx not in _cached_p_pdf:
            _cached_p_pdf[idx] = sc.gaussian_kde(p_samples)

        p_pdf = _cached_p_pdf[idx]
    else:
        p_pdf = sc.gaussian_kde(p_samples)

    q_pdf = sc.gaussian_kde(q_samples)

    # joint support
    left = min(min(p_samples), min(q_samples))
    right = max(max(p_samples), max(q_samples))

    p_samples_num = p_samples.shape[0]
    q_samples_num = q_samples.shape[0]

    # quantise
    lin = np.linspace(left, right, min(max(p_samples_num, q_samples_num), MAX_GRID_POINTS))
    p = p_pdf.pdf(lin)
    q = q_pdf.pdf(lin)

    # KL
    kl = min(sc.entropy(p, q), MAX_KL)

    return kl
metrics.py 文件源码 项目:bnn-analysis 作者: myshkov 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def ci_metrics(p_samples, q_samples, alpha=.95):
    if isinstance(p_samples, tuple):
        idx, p_samples = p_samples

    # compute CIs
    p_left, p_right = _compute_ci(p_samples, alpha)
    q_left, q_right = _compute_ci(q_samples, alpha)

    precision = 0
    iou = 0
    recall = 0
    f1 = 0

    if (p_right > q_left) and (q_right > p_left):
        # intersection
        int_left = max(q_left, p_left)
        int_right = min(p_right, q_right)

        union_left = min(p_left, q_left)
        union_right = max(p_right, q_right)

        iou = (int_right - int_left) / (union_right - union_left)

        precision = (int_right - int_left) / (q_right - q_left)
        recall = (int_right - int_left) / (p_right - p_left)

        f1 = 2 * precision * recall / (precision + recall)

        # estimate densities
        # p_pdf = sc.gaussian_kde(p_samples)
        # q_pdf = sc.gaussian_kde(q_samples)
        #
        # precision = q_pdf.integrate_box(int_left, int_right) / alpha
        # recall = p_pdf.integrate_box(int_left, int_right) / alpha

    return iou, precision, recall, f1
prep_wikiqa_data.py 文件源码 项目:answer-triggering 作者: jiez-osu 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def visualize_distribution(data):
    """ Visualize bucket distribution, a distribution over:
    (sentence_number, question_length, sentence_length)"""
    bucket_distribution = []
    for qid, value in data.iteritems():
      q_length = len(value['question'])
      s_number = len(value['sentences'])
      s_length = max([len(sent) for sent in value['sentences'].itervalues()])
      bucket_distribution.append([s_number, q_length, s_length])

    # pdb.set_trace()
    distribution = np.transpose(np.array(bucket_distribution))
    kde = stats.gaussian_kde(distribution)
    density = kde(distribution)

    idx = density.argsort()
    x = distribution[0, idx]
    y = distribution[1, idx]
    z = distribution[2, idx]
    density = density[idx]

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(x, y, z, c=density)
    ax.set_xlabel('sentence number')
    ax.set_ylabel('question length')
    ax.set_zlabel('sentence length')
    plt.show()
target.py 文件源码 项目:uncover-ml 作者: GeoscienceAustralia 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def fit(self, y):

        self.lb = y.min() - 1e6
        self.ub = y.max() + 1e6
        self.kde = gaussian_kde(y)
early_stopping.py 文件源码 项目:expan 作者: zalando 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def bayes_factor(x, y, distribution='normal', num_iters=25000, inference='sampling'):
    """
    Args:
        x (array_like): sample of a treatment group
        y (array_like): sample of a control group
        distribution: name of the KPI distribution model, which assumes a
            Stan model file with the same name exists
        num_iters: number of iterations of bayes sampling
        inference: sampling or variational inference method for approximation the posterior

    Returns:
        dictionary with statistics
    """
    traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x, y, distribution=distribution, num_iters=num_iters,
                                                   inference=inference)
    trace_normalized_effect_size = get_trace_normalized_effect_size(distribution, traces)
    trace_absolute_effect_size = traces['delta']

    kde = gaussian_kde(trace_normalized_effect_size)
    prior = cauchy.pdf(0, loc=0, scale=1)
    # BF_01
    bf = kde.evaluate(0)[0] / prior
    stop = bf > 3 or bf < 1 / 3.

    credibleMass = 0.95                # another magic number
    leftOut      = 1.0 - credibleMass
    p1           = round(leftOut/2.0, 5)
    p2           = round(1.0 - leftOut/2.0, 5)
    credible_interval = HDI_from_MCMC(trace_absolute_effect_size, credibleMass)

    return {'stop'                  : bool(stop),
            'delta'                 : float(mu_x - mu_y),
            'confidence_interval'   : [{'percentile': p*100, 'value': v} for p, v in zip([p1, p2], credible_interval)],
            'treatment_sample_size' : int(n_x),
            'control_sample_size'   : int(n_y),
            'treatment_mean'        : float(mu_x),
            'control_mean'          : float(mu_y),
            'number_of_iterations'  : num_iters}
F1_communities.py 文件源码 项目:f1-communities 作者: GiulioRossetti 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __plot_scatter(prl, max_points=None, fileout="PR_scatter.png", title="Precision Recall Scatter Plot"):
        """

        :param prl: list of tuples (precision, recall)
        :param max_points: max number of tuples to plot
        :param fileout: output filename
        :param title: plot title
        """

        prs = [i[0] for i in prl]
        recs = [i[1] for i in prl]

        if max_points is not None:
            prs = prs[:max_points]
            recs = recs[:max_points]

        xy = np.vstack([prs, recs])
        z = gaussian_kde(xy)(xy)
        x = np.array(prs)
        y = np.array(recs)
        base = min(z)
        rg = max(z) - base
        z = np.array(z)
        idx = z.argsort()
        x, y, z = x[idx], y[idx], (z[idx] - base) / rg
        fig, ax = plt.subplots()
        sca = ax.scatter(x, y, c=z, s=50, edgecolor='', cmap=plt.cm.jet)
        fig.colorbar(sca)
        plt.ylabel("Recall", fontsize=20, labelpad=15)
        plt.xlabel("Precision", fontsize=20)
        plt.ylim([-0.01, 1.01])
        plt.xlim([-0.01, 1.01])
        plt.title(title)
        if matplotlib.get_backend().lower() in ['agg', 'macosx']:
            fig.set_tight_layout(True)
        else:
            fig.tight_layout()

        plt.savefig("%s" % fileout)
_rangeslider.py 文件源码 项目:orange3-timeseries 作者: biolab 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def setHistogram(self, values=None, bins=None, use_kde=False, histogram=None):
        """ Set background histogram (or density estimation, violin plot)

        The histogram of bins is calculated from values, optionally as a
        Gaussian KDE. If histogram is provided, its values are used directly
        and other parameters are ignored.
        """
        if (values is None or not len(values)) and histogram is None:
            self.setPixmap(None)
            return
        if histogram is not None:
            self._histogram = hist = histogram
        else:
            if bins is None:
                bins = min(100, max(10, len(values) // 20))
            if use_kde:
                hist = gaussian_kde(values,
                                    None if isinstance(use_kde, bool) else use_kde)(
                    np.linspace(np.min(values), np.max(values), bins))
            else:
                hist = np.histogram(values, bins)[0]
            self._histogram = hist = hist / hist.max()

        HEIGHT = self.rect().height() / 2
        OFFSET = HEIGHT * .3
        pixmap = QPixmap(QSize(len(hist), 2 * (HEIGHT + OFFSET)))  # +1 avoids right/bottom frame border shadow
        pixmap.fill(Qt.transparent)
        painter = QPainter(pixmap)
        painter.setPen(QPen(Qt.darkGray))
        for x, value in enumerate(hist):
            painter.drawLine(x, HEIGHT * (1 - value) + OFFSET,
                             x, HEIGHT * (1 + value) + OFFSET)

        if self.orientation() != Qt.Horizontal:
            pixmap = pixmap.transformed(QTransform().rotate(-90))

        self.setPixmap(pixmap)
tools.py 文件源码 项目:lddmm-ot 作者: jeanfeydy 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def make_kde(self):
        """
        Makes the kernel density estimation(s) for create_distplot().

        This is called when curve_type = 'kde' in create_distplot().

        :rtype (list) curve: list of kde representations
        """
        curve = [None] * self.trace_number
        for index in range(self.trace_number):
            self.curve_x[index] = [self.start[index] +
                                   x * (self.end[index] - self.start[index])
                                   / 500 for x in range(500)]
            self.curve_y[index] = (scipy.stats.gaussian_kde
                                   (self.hist_data[index])
                                   (self.curve_x[index]))

            if self.histnorm == ALTERNATIVE_HISTNORM:
                self.curve_y[index] *= self.bin_size[index]

        for index in range(self.trace_number):
            curve[index] = dict(type='scatter',
                                x=self.curve_x[index],
                                y=self.curve_y[index],
                                xaxis='x1',
                                yaxis='y1',
                                mode='lines',
                                name=self.group_labels[index],
                                legendgroup=self.group_labels[index],
                                showlegend=False if self.show_hist else True,
                                marker=dict(color=self.colors[index]))
        return curve
plot_heating_correlations.py 文件源码 项目:CAAPR 作者: Stargrazer82301 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def getDensityKernel(x,y):

    # filter NaNs and infinite numbers
    idx = (np.isfinite(x) * np.isfinite(y))
    x = x[idx]
    y = y[idx]

    # Calculate the point density
    xy = np.vstack([x,y])
    z = gaussian_kde(xy)(xy)
    idx = z.argsort()
    return x[idx], y[idx], z[idx]

# RADIAL PROFILES
functions.py 文件源码 项目:Orbit-Fitting 作者: jacob-i-skinner 项目源码 文件源码 阅读 45 收藏 0 点赞 0 评论 0
def maximize(samples):
    '''
    Use Kernel Density Estimation to find a continuous PDF
    from the discrete sampling. Maximize that distribution.

    Parameters
    ----------
    samples : list (shape = (nsamples, ndim))
        Observations drawn from the distribution which is
        going to be fit.

    Returns
    -------
    maximum : list(ndim)
        Maxima of the probability distributions along each axis.
    '''
    from scipy.optimize import minimize
    from scipy.stats import gaussian_kde as kde

    # Give the samples array the proper shape.
    samples = np.transpose(samples)

    # Estimate the continous PDF.
    estimate = kde(samples)

    # Take the minimum of the estimate.
    def PDF(x):
        return -estimate(x)

    # Initial guess on maximum is made from 50th percentile of the sample.
    p0 = [np.percentile(samples[i], 50) for i in range(samples.shape[0])]

    # Calculate the maximum of the distribution.
    maximum = minimize(PDF, p0).x

    return maximum
model.py 文件源码 项目:5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def kde_opt4(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"] + df["hour"] / 24.
        df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        return df_new
    logging.info("train kde_opt4 model")
    df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
    df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
    n_class = len(np.unique(y_train))
    y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
    for i in range(n_class):
        X = df_cell_train_feats_kde[y_train == i]
        y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
        for feat in df_cell_train_feats_kde.columns.values:
            X_feat = X[feat].values
            BGK10_output = kdeBGK10(X_feat)
            if BGK10_output is None:
                kde = gaussian_kde(X_feat, "scott")
                kde = gaussian_kde(X_feat, kde.factor * 0.741379)
                y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
            else:
                bandwidth, mesh, density = BGK10_output
                kde = KernelDensity(kernel='gaussian', metric='manhattan', bandwidth=bandwidth)
                kde.fit(X_feat[:, np.newaxis])
                y_test_pred_i *= np.exp(kde.score_samples(df_cell_test_feats_kde[feat].values[:, np.newaxis]))
        y_test_pred[:, i] += y_test_pred_i
    return y_test_pred
pumil.py 文件源码 项目:pumil 作者: levelfour 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def weighted_kde(Bn, conf):
  # N.B. apply WKDE function to the instance multiplied by its confindence value
  weighted_ins = np.vstack([conf[i] * B.data() for i, B in enumerate(Bn)])
  return stats.gaussian_kde(weighted_ins.T)
utils.py 文件源码 项目:lens 作者: ASIDataScience 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def _scipy_univariate_kde(data, bw, gridsize, cut, clip):
    """Compute a univariate kernel density estimate using scipy."""
    kde = stats.gaussian_kde(data, bw_method=bw)
    if isinstance(bw, string_types):
        bw = "scotts" if bw == "scott" else bw
        bw = getattr(kde, "%s_factor" % bw)() * np.std(data)
    grid = _kde_support(data, bw, gridsize, cut, clip)
    y = kde(grid)
    return grid, y
plot.py 文件源码 项目:CElegansBehaviour 作者: ChristophKirst 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def plot_distributions(data, cmap = plt.cm.Spectral_r, percentiles =  [5, 25, 50, 75, 95], percentiles_colors = ['gray', 'gray', 'red', 'gray', 'gray']):
  """Plots the data point color as local density"""
  npoints, ntimes = data.shape;

  for t in range(ntimes):
    cols = gaussian_kde(data[:,t])(data[:,t]);
    idx = cols.argsort();

    plt.scatter(np.ones(npoints)*t, data[idx,t], c=cols[idx], s=30, edgecolor='face', cmap = cmap)

  pct = np.percentile(data, percentiles, axis = 0); 
  for i in range(len(percentiles)):
    #plt.plot(iqr[s0][i,:],  c = plt.cm.Spectral(i/5.0), linewidth = 3);
    plt.plot(pct[i,:], c = percentiles_colors[i], linewidth = 2);
plot.py 文件源码 项目:CElegansBehaviour 作者: ChristophKirst 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def plot_embedding_contours(Y, contours = 10, cmap = plt.cm.plasma, xmin = None, xmax = None, ymin = None, ymax = None, npts = 100, density = False):
  """Plot a 2d density map of the embedding Y"""

  if xmin is None:
    xmin = np.min(Y[:,0]);
  if xmax is None:
    xmax = np.max(Y[:,0]);
  if ymin is None:
    ymin = np.min(Y[:,1]);
  if ymax is None:
    ymax = np.max(Y[:,1]);   

  #print xmin,xmax,ymin,ymax
  dx = float(xmax-xmin) / npts;
  dy = float(xmax-xmin) / npts;
  xx, yy = np.mgrid[xmin:xmax:dx, ymin:ymax:dy]
  positions = np.vstack([xx.ravel(), yy.ravel()])
  kernel = st.gaussian_kde(Y.T);
  #print xx.shape
  #print positions.shape
  #print Y.shape
  #print kernel(positions).shape
  f = kernel(positions)
  f = np.reshape(f, xx.shape)
  #print f.shape

  ax = plt.gca()
  ax.set_xlim(xmin, xmax)
  ax.set_ylim(ymin, ymax)
  # Contourf plot
  if density:
    cfset = ax.contourf(xx, yy, f, cmap=cmap)
  ## Or kernel density estimate plot instead of the contourf plot
  #ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
  # Contour plot
  if contours is not None:
    cset = ax.contour(xx, yy, f, contours, cmap=cmap)
  # Label plot
  #ax.clabel(cset, inline=1, fontsize=10)
  ax.set_xlabel('Y0')
  ax.set_ylabel('Y1')
plot.py 文件源码 项目:CElegansBehaviour 作者: ChristophKirst 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def plot_distributions(data, cmap = plt.cm.Spectral_r, percentiles =  [5, 25, 50, 75, 95], percentiles_colors = ['gray', 'gray', 'red', 'gray', 'gray']):
  """Plots the data point color as local density"""
  npoints, ntimes = data.shape;

  for t in range(ntimes):
    cols = gaussian_kde(data[:,t])(data[:,t]);
    idx = cols.argsort();

    plt.scatter(np.ones(npoints)*t, data[idx,t], c=cols[idx], s=30, edgecolor='face', cmap = cmap)

  pct = np.percentile(data, percentiles, axis = 0); 
  for i in range(len(percentiles)):
    #plt.plot(iqr[s0][i,:],  c = plt.cm.Spectral(i/5.0), linewidth = 3);
    plt.plot(pct[i,:], c = percentiles_colors[i], linewidth = 2);
cute_plot.py 文件源码 项目:cellcomplex 作者: VirtualPlants 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def violin_plot(figure,X,data,colors,xlabel="",ylabel="",n_points=400,violin_width=None,linewidth=3,marker_size=20):
    font = fm.FontProperties(family = 'Trebuchet', weight ='light')
    #font = fm.FontProperties(family = 'CenturyGothic',fname = '/Library/Fonts/Microsoft/Century Gothic', weight ='light')
    figure.patch.set_facecolor('white')
    axes = figure.add_subplot(111)
    if violin_width is None:
        if len(X)>1:
            violin_width = ((np.array(X)[1:] - np.array(X)[:-1]).mean())/3.
        else:
            violin_width = 0.33
    for x in xrange(len(X)):
        color = colors[x]

        Y = gaussian_kde(data[x])
        D_smooth = np.linspace(np.percentile(Y.dataset,1),np.percentile(Y.dataset,99),n_points)
        Y_smooth = Y.evaluate(D_smooth)
        Y_smooth = violin_width*Y_smooth/Y_smooth.max()
        axes.fill_betweenx(D_smooth,X[x],X[x]+Y_smooth,facecolor=color,alpha=0.1)
        axes.fill_betweenx(D_smooth,X[x],X[x]-Y_smooth,facecolor=color,alpha=0.1)
        axes.plot(X[x]+Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
        axes.plot(X[x]-Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
        axes.plot([X[x]-Y_smooth[0],X[x]+Y_smooth[0]],[D_smooth[0],D_smooth[0]],color=color,linewidth=linewidth,alpha=0.8)
        axes.plot([X[x]-Y_smooth[-1],X[x]+Y_smooth[-1]],[D_smooth[-1],D_smooth[-1]],color=color,linewidth=linewidth,alpha=0.8)
        axes.plot(X[x]-Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
        axes.plot(X[x],np.percentile(data[x],50),'o',markersize=marker_size,markeredgewidth=linewidth,color=color)
        axes.plot([X[x],X[x]],[np.percentile(data[x],25),np.percentile(data[x],75)],color=color,linewidth=2*linewidth,alpha=0.5)
    axes.set_xlim(min(X)-1,max(X)+1)
    axes.set_xlabel(xlabel,fontproperties=font, size=10, style='italic')
    axes.set_xticklabels(axes.get_xticks(),fontproperties=font, size=12)
    axes.set_ylabel(ylabel, fontproperties=font, size=10, style='italic')
    axes.set_yticklabels(axes.get_yticks(),fontproperties=font, size=12)


问题


面经


文章

微信
公众号

扫码关注公众号