python类sum()的实例源码

sudoku_steps.py 文件源码 项目:pyku 作者: dubvulture 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def remove_artifacts(self, image):
        """
        Remove the connected components that are not within the parameters
        Operates in place
        :param image: sudoku's thresholded image w/o grid
        :return: None
        """
        labeled, features = label(image, structure=CROSS)
        lbls = np.arange(1, features + 1)
        areas = extract_feature(image, labeled, lbls, np.sum,
                                np.uint32, 0)
        sides = extract_feature(image, labeled, lbls, min_side,
                                np.float32, 0, True)
        diags = extract_feature(image, labeled, lbls, diagonal,
                                np.float32, 0, True)

        for index in lbls:
            area = areas[index - 1] / 255
            side = sides[index - 1]
            diag = diags[index - 1]
            if side < 5 or side > 20 \
                    or diag < 15 or diag > 25 \
                    or area < 40:
                image[labeled == index] = 0
        return None
sudoku.py 文件源码 项目:pyku 作者: dubvulture 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def remove_artifacts(self, image):
        """
        Remove the connected components that are not within the parameters
        Operates in place
        :param image: sudoku's thresholded image w/o grid
        :return: None
        """
        labeled, features = label(image, structure=CROSS)
        lbls = np.arange(1, features + 1)
        areas = extract_feature(image, labeled, lbls, np.sum,
                                np.uint32, 0)
        sides = extract_feature(image, labeled, lbls, min_side,
                                np.float32, 0, True)
        diags = extract_feature(image, labeled, lbls, diagonal,
                                np.float32, 0, True)

        for index in lbls:
            area = areas[index - 1] / 255
            side = sides[index - 1]
            diag = diags[index - 1]
            if side < 5 or side > 20 \
                    or diag < 15 or diag > 25 \
                    or area < 40:
                image[labeled == index] = 0
        return None
genderclassifier.py 文件源码 项目:namegenderclassifier 作者: joaoalvarenga 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def evaluate(self, dataset):
        predictions = self.predict(dataset[:,0])
        confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes)

        precisions = []
        recalls = []
        accuracies = []

        for gender in self.__classes:
            idx = self.__classes_indexes[gender]
            precision = 1
            recall = 1
            if np.sum(confusion_matrix[idx,:]) > 0:
                precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:])
            if np.sum(confusion_matrix[:, idx]) > 0:
                recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx])
            precisions.append(precision)
            recalls.append(recall)

        precision = np.mean(precisions)
        recall = np.mean(recalls)
        f1 = (2*(precision*recall))/float(precision+recall)
        accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix))

        return precision, recall, accuracy, f1
tdose_utilities.py 文件源码 项目:TDOSE 作者: kasperschmidt 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def reshape_array(array, newsize, pixcombine='sum'):
    """
    Reshape an array to a give size using either the sum, mean or median of the pixels binned

    Note that the old array dimensions have to be multiples of the new array dimensions

    --- INPUT ---
    array           Array to reshape (combine pixels)
    newsize         New size of array
    pixcombine      The method to combine the pixels with. Choices are sum, mean and median

    """
    sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1]
    pdb.set_trace()
    if pixcombine == 'sum':
        reshapedarray = array.reshape(sh).sum(-1).sum(1)
    elif pixcombine == 'mean':
        reshapedarray = array.reshape(sh).mean(-1).mean(1)
    elif pixcombine == 'median':
        reshapedarray = array.reshape(sh).median(-1).median(1)

    return reshapedarray
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
run_mpi.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def do_work_pso(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
#    print((1 / np.sum(f1)))
    return (1 / np.sum(f1))
run_mpi.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def do_work_ga(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].genes)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    return (1 / np.sum(f1))

# Main
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def xloads(self):
        # Xloadings
        A = self.data_.transpose().values
        B = self.fscores.transpose().values
        A_mA = A - A.mean(1)[:, None]
        B_mB = B - B.mean(1)[:, None]

        ssA = (A_mA**2).sum(1)
        ssB = (B_mB**2).sum(1)

        xloads_ = (np.dot(A_mA, B_mB.T) /
                   np.sqrt(np.dot(ssA[:, None], ssB[None])))
        xloads = pd.DataFrame(
            xloads_, index=self.manifests, columns=self.latent)

        return xloads
boot_mpi.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def do_work_pso(data, LVcsv, Mcsv, scheme, reg, h, maximo):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    print((1 / np.sum(f1)))
    return (1 / np.sum(f1))
boot.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def do_work_pso(self, item):
        output = pd.DataFrame(self.population[item].position)
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)
        print((1 / np.sum(f1)))
        return (1 / np.sum(f1))
boot.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def do_work_tabu(self, item):
        output = pd.DataFrame(self.population[item])
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)

        cost = (np.sum(f1))
        print(1 / cost)
        return [self.population[item], cost]
adequacy.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def KMO(data):

    cor_ = pd.DataFrame.corr(data)
    invCor = np.linalg.inv(cor_)
    rows = cor_.shape[0]
    cols = cor_.shape[1]
    A = np.ones((rows, cols))

    for i in range(rows):
        for j in range(i, cols):
            A[i, j] = - (invCor[i, j]) / (np.sqrt(invCor[i, i] * invCor[j, j]))
            A[j, i] = A[i, j]

    num = np.sum(np.sum((cor_)**2)) - np.sum(np.sum(np.diag(cor_**2)))
    den = num + (np.sum(np.sum(A**2)) - np.sum(np.sum(np.diag(A**2))))
    kmo = num / den

    return kmo
CartPole_DRQN.py 文件源码 项目:GYM_DRL 作者: Kyushik 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def xavier_initializer(shape):
    dim_sum = np.sum(shape)
    if len(shape) == 1:
        dim_sum += 1
    bound = np.sqrt(2.0 / dim_sum)
    return tf.random_uniform(shape, minval=-bound, maxval=bound)

# # Assigning network variables to target network variables 
# def assign_network_to_target():
#   update_wfc = tf.assign(w_fc_target, w_fc)
#   update_bfc = tf.assign(b_fc_target, b_fc)

#   sess.run(update_wfc)
#   sess.run(update_bfc)

#   cell_target = cell 

# Input
example4.py 文件源码 项目:pytorch_tutorial 作者: soravux 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        # A different (control flow based) way to control dropout
        if self.training:
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        else:
            x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        if self.training:
            x = F.dropout(x, training=True)
        x = self.fc2(x)

        # Check for NaNs and infinites
        nans = np.sum(np.isnan(x.data.numpy()))
        infs = np.sum(np.isinf(x.data.numpy()))
        if nans > 0:
            print("There is {} NaN at the output layer".format(nans))
        if infs > 0:
            print("There is {} infinite values at the output layer".format(infs))

        return F.log_softmax(x)
example4.py 文件源码 项目:pytorch_tutorial 作者: soravux 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
utils.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def score_samples(self, X):
        """Return the log-likelihood of each sample
        See. "Pattern Recognition and Machine Learning"
        by C. Bishop, 12.2.1 p. 574
        or http://www.miketipping.com/papers/met-mppca.pdf
        Parameters
        ----------
        X: array, shape(n_samples, n_features)
            The data.
        Returns
        -------
        ll: array, shape (n_samples,)
            Log-likelihood of each sample under the current model
        """
        check_is_fitted(self, 'mean_')

        X = check_array(X)
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        precision = self.get_precision()
        log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
        log_like -= .5 * (n_features * log(2. * np.pi)
                          - fast_logdet(precision))
        return log_like
YM_labels_matrix.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def main():
    files = tf.gfile.Glob(flags.FLAGS.src_path_1)
    labels_uni = np.zeros([4716,1])
    labels_matrix = np.zeros([4716,4716])
    for file in files:
        labels_all = get_video_input_feature(file)
        print(len(labels_all[0][2]),len(labels_all[0][3]),len(labels_all[0][4]),len(labels_all[0][5]))
        """
        for labels in labels_all:
            for i in range(len(labels)):
                labels_uni[labels[i]] += 1
                for j in range(len(labels)):
                    labels_matrix[labels[i],labels[j]] += 1
    labels_matrix = labels_matrix/labels_uni
    labels_matrix = labels_matrix/(np.sum(labels_matrix,axis=0)-1.0)
    for i in range(4716):
        labels_matrix[i,i] = 1.0
    np.savetxt('labels_uni.out', labels_uni, delimiter=',')
    np.savetxt('labels_matrix.out', labels_matrix, delimiter=',')"""
eval_util.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()
eval_util.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()
inference-sample-error-analysis.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def format_lines(video_ids, predictions, labels, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    n_recall = max(int(numpy.sum(labels[video_index])), 1)
    # labels
    label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
    label_predictions = [(class_index, predictions[video_index][class_index]) 
                           for class_index in label_indices]
    label_predictions = sorted(label_predictions, key=lambda p: -p[1])
    label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
    # predictions
    top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    top_k_predictions = [(class_index, predictions[video_index][class_index])
                         for class_index in top_k_indices]
    top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
    top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
    # compute PERR
    top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
    positives = [labels[video_index][class_index] 
                 for class_index in top_n_indices]
    perr = sum(positives) / float(n_recall)
    # URL
    url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')
    yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"
eval_util.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()
kernels.py 文件源码 项目:MKLMM 作者: omerwe 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def getTrainKernel(self, params):
        self.checkParams(params)
        if (self.sameParams(params)): return self.cache['getTrainKernel']

        ell = np.exp(params[0])
        if (self.K_sq is None): K = sq_dist(self.X_scaled.T / ell)  #precompute squared distances
        else: K = self.K_sq / ell**2        
        self.cache['K_sq_scaled'] = K

        # # # #manual computation (just for sanity checks)
        # # # K1 = np.exp(-K / 2.0)
        # # # K2 = np.zeros((self.X_scaled.shape[0], self.X_scaled.shape[0]))
        # # # for i1 in xrange(self.X_scaled.shape[0]):
            # # # for i2 in xrange(i1, self.X_scaled.shape[0]):
                # # # diff = self.X_scaled[i1,:] - self.X_scaled[i2,:]
                # # # K2[i1, i2] = np.exp(-np.sum(diff**2) / (2*ell))
                # # # K2[i2, i1] = K2[i1, i2]               
        # # # print np.max((K1-K2)**2)
        # # # sys.exit(0)

        K_exp = np.exp(-K / 2.0)
        self.cache['getTrainKernel'] = K_exp
        self.saveParams(params)
        return K_exp
kernels.py 文件源码 项目:MKLMM 作者: omerwe 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def getTrainTestKernel(self, params, Xtest):
        self.checkParams(params)
        ell2 = np.exp(2*params[0])

        z = Xtest / np.sqrt(Xtest.shape[1])
        S = 1 + self.X_scaled.dot(z.T)
        sz = 1 + np.sum(z**2, axis=1)
        sqrtEll2Psx = np.sqrt(ell2+self.sx)
        sqrtEll2Psz = np.sqrt(ell2+sz)
        K = S / np.outer(sqrtEll2Psx, sqrtEll2Psz)
        return np.arcsin(K)
stats.py 文件源码 项目:pyTBA 作者: Thing342 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def match_matrix(event: Event):
    """Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR.

        Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for
    red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated
    on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an
    event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match
    matrix that looks like this (sans labels):

                                #1  #2  #3  #4  #5  #6  #7
                    qm1_red     1   0   1   0   1   0   0
                    qm1_blue    0   1   0   1   0   1   0
    """
    match_list = []
    for match in filter(lambda match: match['comp_level'] == 'qm', event.matches):
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0)
        match_list.append(matchRow)
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0)
        match_list.append(matchRow)

    mat = numpy.array(match_list)
    sum_matches = numpy.sum(mat, axis=0)
    avg_team_matches = sum(sum_matches) / float(len(sum_matches))
    return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2]
utils.py 文件源码 项目:pyku 作者: dubvulture 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def compute_angle(pt0, pt1, pt2):
    """
    Given 3 points, compute the cosine of the angle from pt0
    :type pt0: numpy.array
    :type pt1: numpy.array
    :type pt2: numpy.array
    :return: cosine of angle
    """
    a = pt0 - pt1
    b = pt0 - pt2
    return (np.sum(a * b)) / (np.linalg.norm(a) * np.linalg.norm(b))
digit_classifier.py 文件源码 项目:pyku 作者: dubvulture 项目源码 文件源码 阅读 57 收藏 0 点赞 0 评论 0
def _zoning(image):
        """
        It works better with DSIZE = 28
        ~0.9967 precision and recall
        :param image:
        :return: #pixels/area ratio of each zone (7x7) as feature vector
        """
        zones = []
        for i in range(0, 28, 7):
            for j in range(0, 28, 7):
                roi = image[i:i+7, j:j+7]
                val = (np.sum(roi)/255) / 49.
                zones.append(val)
        return np.array(zones, np.float32)
data_manager.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def getTypeProblem (self, solution_filename):
            ''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
        if 'task' not in self.info.keys():
            solution = np.array(data_converter.file_to_array(solution_filename))
            target_num = solution.shape[1]
            self.info['target_num']=target_num
            if target_num == 1: # if we have only one column
                solution = np.ravel(solution) # flatten
                nbr_unique_values = len(np.unique(solution))
                if nbr_unique_values < len(solution)/8:
                    # Classification
                    self.info['label_num'] = nbr_unique_values
                    if nbr_unique_values == 2:
                        self.info['task'] = 'binary.classification'
                        self.info['target_type'] = 'Binary'
                    else:
                        self.info['task'] = 'multiclass.classification'
                        self.info['target_type'] = 'Categorical'
                else:
                    # Regression
                    self.info['label_num'] = 0
                    self.info['task'] = 'regression'
                    self.info['target_type'] = 'Numerical'     
            else:
                # Multilabel or multiclass       
                self.info['label_num'] = target_num
                self.info['target_type'] = 'Binary' 
                if any(item > 1 for item in map(np.sum,solution.astype(int))):
                    self.info['task'] = 'multilabel.classification'     
                else:
                    self.info['task'] = 'multiclass.classification'        
        return self.info['task']
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def binarize_predictions(array, task='binary.classification'):
    ''' Turn predictions into decisions {0,1} by selecting the class with largest 
    score for multiclass problems and thresholding at 0.5 for other cases.'''
    # add a very small random value as tie breaker (a bit bad because this changes the score every time)
    # so to make sure we get the same result every time, we seed it    
    #eps = 1e-15
    #np.random.seed(sum(array.shape))
    #array = array + eps*np.random.rand(array.shape[0],array.shape[1])
    bin_array = np.zeros(array.shape)
    if (task != 'multiclass.classification') or (array.shape[1]==1): 
        bin_array[array>=0.5] = 1
    else:        
        sample_num=array.shape[0]
        for i in range(sample_num):
            j = np.argmax(array[i,:])
            bin_array[i,j] = 1        
    return bin_array
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def acc_stat (solution, prediction):
    ''' Return accuracy statistics TN, FP, TP, FN
     Assumes that solution and prediction are binary 0/1 vectors.'''
     # This uses floats so the results are floats
    TN = sum(np.multiply((1-solution), (1-prediction)))
    FN = sum(np.multiply(solution, (1-prediction)))
    TP = sum(np.multiply(solution, prediction))
    FP = sum(np.multiply((1-solution), prediction))
    #print "TN =",TN
    #print "FP =",FP
    #print "TP =",TP
    #print "FN =",FN
    return (TN, FP, TP, FN)
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def auc_metric(solution, prediction, task='binary.classification'):
    ''' Normarlized Area under ROC curve (AUC).
    Return Gini index = 2*AUC-1 for  binary classification problems.
    Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values
    for the predictions. If solution and prediction are not vectors, the AUC
    of the columns of the matrices are computed and averaged (with no weight).
    The same for all classification problems (in fact it treats well only the
    binary and multilabel classification problems).'''
    #auc = metrics.roc_auc_score(solution, prediction, average=None)
    # There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect
    label_num=solution.shape[1]
    auc=np.empty(label_num)
    for k in range(label_num):
        r_ = tiedrank(prediction[:,k])
        s_ = solution[:,k]
        if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1))
        npos = sum(s_==1)
        nneg = sum(s_<1)
        auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos)
    return 2*mvmean(auc)-1


### END CLASSIFICATION METRICS 

# ======= Specialized scores ========
# We run all of them for all tasks even though they don't make sense for some tasks


问题


面经


文章

微信
公众号

扫码关注公众号