python类compress()的实例源码

core.py 文件源码 项目:ConceptualSpaces 作者: lbechberger 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def simplify(cuboids):
    """Simplifies the given set of cuboids by removing redundant ones."""

    keep = [True]*len(cuboids)
    for i in range(len(cuboids)):

        p_min = cuboids[i]._p_min
        p_max = cuboids[i]._p_max
        for j in range(len(cuboids)):
            if i == j or keep[j] == False:
                continue
            if cuboids[j].contains(p_min) and cuboids[j].contains(p_max):
                keep[i] = False
                break

    return list(compress(cuboids, keep))
utils_feature_selection.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def transform(self, X, y=None):

        if self.selector == 'KeepAll':
            return X

        if scipy.sparse.issparse(X):
            if X.getformat() == 'csr':
                # convert to a csc (column) matrix, rather than a csr (row) matrix
                X = X.tocsc()

            # Slice that column matrix to only get the relevant columns that we already calculated in fit:
            X = X[:, self.index_mask]

            # convert back to a csr matrix
            return X.tocsr()

        # If this is a dense matrix:
        else:
            pruned_X = [list(itertools.compress(row, self.support_mask)) for row in X]
            return pruned_X
loaddataSubClass.py 文件源码 项目:drmad 作者: bigaidream-projects 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def select_subclassdata(X, y,totalClassNum,SubClassNum, subClassIndexList,normalize=True):


    X= np.array(list(itertools.compress(X, [subClassIndexList.__contains__(c) for c in y])))
    y= np.array(list(itertools.compress(y, [subClassIndexList.__contains__(c) for c in y])))


    d = {}
    for i in xrange(SubClassNum):
        d.update({subClassIndexList[i]: (totalClassNum+i)})

    d1 = {}
    for i in xrange(SubClassNum):
        d1.update({(totalClassNum+i): i})

    for k, v in d.iteritems():
        np.place(y,y==k,v)
    for k, v in d1.iteritems():
        np.place(y,y==k,v)
    return X,y
process.py 文件源码 项目:Lifting-from-the-Deep-release 作者: DenisTome 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def import_json(path='json/MPI_annotations.json', order='json/MPI_order.npy'):
    """Get the json file containing the dataset.
    We want the data to be shuffled, however the training has to be repeatable.
    This means that once shuffled the order has to me mantained."""
    with open(path) as data_file:
        data_this = json.load(data_file)
        data_this = np.array(data_this['root'])
    num_samples = len(data_this)

    if os.path.exists(order):
        idx = np.load(order)
    else:
        idx = np.random.permutation(num_samples).tolist()
        np.save(order, idx)

    is_not_validation = [not data_this[i]['isValidation']
                         for i in range(num_samples)]
    keep_data_idx = list(compress(idx, is_not_validation))

    data = data_this[keep_data_idx]
    return data, len(keep_data_idx)
nvdm_nobatch.py 文件源码 项目:NVDM-For-Document-Classification 作者: cryanzpj 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def train_step(x_batch, y_batch, epoch):
            """
            A single training step
            """
            x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_batch[0]))]
            feed_dict = {nvdm.input_x: x_batch, nvdm.x_id: x_batch_id}
            '''
            h1b = [v for v in tf.all_variables() if v.name == "h1/b:0"][0]
            h1w = [v for v in tf.all_variables() if v.name == "h1/w:0"][0]
            _, step, summaries, loss, kl, rc, p_xi_h, R, hb, hw, e  = sess.run(
                [nvdm.train_op, global_step, loss_summary, nvdm.loss, nvdm.KL, nvdm.recon_loss, nvdm.p_xi_h, nvdm.R, h1b, h1w, nvdm.e], feed_dict)
            '''
            _, step,  loss = sess.run([nvdm.train_op, nvdm.global_step, nvdm.loss], feed_dict)

            time_str = datetime.datetime.now().isoformat()
            if step % FLAGS.train_every == 0:
                print("time: {},  epoch: {}, step: {}, loss: {:g}".format(time_str,epoch, step, loss))
            if np.isnan(loss):
                import pdb
                pdb.set_trace()
            #train_summary_writer.add_summary(summaries, step)
condition.py 文件源码 项目:cryptoconditions 作者: bigchaindb 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def from_asn1_dict(asn1_dict):
        asn1_type, value = asn1_dict.popitem()
        registered_type = TypeRegistry.find_by_asn1_type(asn1_type)
        # Instantiate condition
        condition = Condition()
        condition.type_id = registered_type['type_id']
        condition.hash = value['fingerprint']
        condition.cost = value['cost']
        condition._subtypes = set()
        if registered_type['class'].TYPE_CATEGORY == 'compound':
            subtypes = {
                TypeRegistry.find_by_type_id(type_id)['name']
                for type_id in compress(
                    range(Condition.MAX_SAFE_SUBTYPES),
                    map(lambda bit: int(bit), value['subtypes'])
                )
            }
            condition._subtypes.update(subtypes)

        return condition
naive_bayes.py 文件源码 项目:mglex 作者: fungs 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def maximize_likelihood(self, data, responsibilities, weights, cmask=None):

        if not (cmask is None or cmask.shape == () or np.all(cmask)):  # cluster reduction
            responsibilities = responsibilities[:, cmask]
            self.names = list(compress(self.names, cmask))  # TODO: make self.names a numpy array?

        weights_combined = responsibilities * weights

        self.variables = np.dot(weights_combined.T, data.frequencies)
        with np.errstate(invalid='ignore'):  # if no training data is available for any class
            np.divide(self.variables, weights_combined.sum(axis=0, keepdims=True, dtype=types.large_float_type).T, out=self.variables)  # normalize before update, self.variables is types.prob_type

        dimchange = self.update()  # create cache for likelihood calculations

        # TODO: refactor this block
        ll = self.log_likelihood(data)
        std_per_class = common.weighted_std(ll, weights_combined)
        weight_per_class = weights_combined.sum(axis=0, dtype=types.large_float_type)
        weight_per_class /= weight_per_class.sum()
        std_per_class_mask = np.isnan(std_per_class)
        skipped_classes = std_per_class_mask.sum()
        self.stdev = np.ma.dot(np.ma.MaskedArray(std_per_class, mask=std_per_class_mask), weight_per_class)
        stderr.write("LOG %s: mean class likelihood standard deviation is %.2f (omitted %i/%i classes due to invalid or unsufficient data)\n" % (self._short_name, self.stdev, skipped_classes, self.num_components - skipped_classes))
        return dimchange, ll
word2vec-cbow.py 文件源码 项目:TensorFlowHub 作者: MJFND 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def generate_batch(batch_size, num_skips, skip_window):
  global data_index
  assert batch_size % num_skips == 0
  assert num_skips <= 2 * skip_window
  batch = np.ndarray(shape=(batch_size,num_skips), dtype=np.int32)
  labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
  span = 2 * skip_window + 1 # [ skip_window target skip_window ]
  buffer = collections.deque(maxlen=span)
  for _ in range(span):
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  for i in range(batch_size):
    mask = [1] * span #[1 1 1]
    mask[skip_window] = 0 # [1 0 1] 
    batch[i, :] = list(compress(buffer, mask)) # all surrounding words
    labels[i, 0] = buffer[skip_window] # the word at the center 
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  return batch, labels
param_class.py 文件源码 项目:scikit-dataaccess 作者: MITHaystack 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def perturb(self):
        ''' Peturb the list by selecting a random subset of the initial list '''
        # randomly index list elements to be kept
        index = [random.randint(0,1) for r in range(len(self.val_init))]
        # update list and keep list values where index is 1
        self.val_list = list(itertools.compress(self.val_init, index))
param_class.py 文件源码 项目:scikit-dataaccess 作者: MITHaystack 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def perturb(self):
        ''' 
        Systematically change which item is absent from the list
        '''
        self.n = self.n + 1
        if self.n >= len(self.val_init):
            self.n = 0
        index = [1 for i in range(len(self.val_init))]
        index[self.n] = 0

        self.val_list = list(itertools.compress(self.val_init, index))
data_loader.py 文件源码 项目:multiNLI_encoder 作者: easonnie 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def combine_two_set(set_1, set_2, rate=(1, 1), seed=0):
    np.random.seed(seed)
    len_1 = len(set_1)
    len_2 = len(set_2)
    # print(len_1, len_2)
    p1, p2 = rate
    c_1 = np.random.choice([0, 1], len_1, p=[1 - p1, p1])
    c_2 = np.random.choice([0, 1], len_2, p=[1 - p2, p2])
    iter_1 = itertools.compress(iter(set_1), c_1)
    iter_2 = itertools.compress(iter(set_2), c_2)
    for it in itertools.chain(iter_1, iter_2):
        yield it
__init__.py 文件源码 项目:sudokuextract 作者: hbldh 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def create_mnist_dataset():
    images, labels = get_mnist_raw_data()
    mask = labels != 0
    print("Pre-zero removal:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    images = list(itertools.compress(images, mask))
    labels = labels[mask]

    images = images[3::20]
    labels = labels[3::20]

    print("Pre-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    y = np.array(labels, 'int8')
    images, mask = blobify(images)
    y = y[mask]
    print("Post-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(y))]))

    print("Extract features...")
    X = np.array([extract_efd_features(img) for img in images])

    try:
        os.makedirs(os.path.expanduser('~/sudokuextract'))
    except:
        pass

    try:
        for i, (img, lbl) in enumerate(zip(images, labels)):
            img = Image.fromarray(img, 'L')
            with open(os.path.expanduser('~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)), 'w') as f:
                img.save(f)
    except Exception as e:
        print(e)

    return images, labels, X, y
memoize.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def alive(self):
        return all(item() is not None
                   for item in compress(self._items, self._selectors))
eia_data_part.py 文件源码 项目:type2-fuzzy 作者: h4iku 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def outlier_processing(intervals):
    """Outlier processing"""

    left = [x[0] for x in intervals]
    right = [x[1] for x in intervals]

    # Compute Q(0.25), Q(0.75) and IQR for left-ends
    lq25, lq75 = np.percentile(left, [25, 75])
    liqr = lq75 - lq25

    # Compute Q(0.25), Q(0.75) and IQR for right-ends
    rq25, rq75 = np.percentile(right, [25, 75])
    riqr = rq75 - rq25

    # Outlier processing for Left and Right bounds
    left_filtered = [x for x in intervals if (lq25 - 1.5 * liqr) <= x[0] <= (lq75 + 1.5 * liqr)]
    right_filtered = [x for x in left_filtered if (rq25 - 1.5 * riqr) <= x[1] <= (rq75 + 1.5 * riqr)]

    # Compute Q(0.25), Q(0.75) and IQR for interval length
    len_values = [x[1] - x[0] for x in right_filtered]
    lenq25, lenq75 = np.percentile(len_values, [25, 75])
    leniqr = lenq75 - lenq25

    # Outlier processing for interval length
    len_filtered = [x if (lenq25 - 1.5 * leniqr) <= x <= (lenq75 + 1.5 * leniqr) else None for x in len_values]
    selectors = [x is not None for x in len_filtered]
    filtered_intervals = list(itertools.compress(right_filtered, selectors))
    return filtered_intervals
eia_data_part.py 文件源码 项目:type2-fuzzy 作者: h4iku 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def tolerance_limit_processing(intervals):
    """Tolerance limit processing"""

    left = [x[0] for x in intervals]
    right = [x[1] for x in intervals]
    mean_left = np.mean(left)
    std_left = np.std(left, ddof=1)
    mean_right = np.mean(right)
    std_right = np.std(right, ddof=1)

    limits = [32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839,
        2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31, 2.31, 2.31,
        2.31, 2.31, 2.208]
    k = limits[min(len(left) - 1, 24)]

    # Tolerance limit processing for Left and Right bounds
    left_filtered = [x for x in intervals if (mean_left - k * std_left) <= x[0] <= (mean_left + k * std_left)]
    right_filtered = [x for x in left_filtered if (mean_right - k * std_right) <= x[1] <= (mean_right + k * std_right)]

    # Tolerance limit processing for interval length
    len_values = [x[1] - x[0] for x in right_filtered]
    mean_len = np.mean(len_values)
    std_len = np.std(len_values, ddof=1)

    if std_len != 0:
        k = min(k, mean_len / std_len, (100 - mean_len) / std_len)

    len_filtered = [x if (mean_len - k * std_len) <= x <= (mean_len + k * std_len) else None for x in len_values]
    selectors = [x is not None for x in len_filtered]
    filtered_intervals = list(itertools.compress(right_filtered, selectors))
    return filtered_intervals
kinematic_chain.py 文件源码 项目:pybotics 作者: nnadeau 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def optimization_vector(self) -> np.ndarray:
        """
        Get the values of parameters being optimized.

        :return: optimization parameter values
        """
        filtered_iterator = compress(self.vector, self.optimization_mask)
        optimization_vector = np.array(list(filtered_iterator))
        return optimization_vector
frame.py 文件源码 项目:pybotics 作者: nnadeau 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def optimization_vector(self) -> np.ndarray:
        """
        Return the values of parameters being optimized.

        :return: optimization parameter values
        """
        filtered_iterator = compress(self.vector(), self.optimization_mask)
        vector = np.array(list(filtered_iterator))
        return vector
identify.py 文件源码 项目:Thrifty 作者: swkrueger 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def filter_duplicates(detections):
    """Return detections with duplicates and unidentified detections removed,
    sorted by timestamp."""
    mask = identify_duplicates(detections)
    filtered = list(itertools.compress(detections, mask))
    filtered.sort(key=lambda x: x.timestamp)
    return filtered
tdoa_est.py 文件源码 项目:Thrifty 作者: swkrueger 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def make_detection_extractor(detections, matches):
    rxpair_detections = collections.defaultdict(list)
    for group in matches:
        for det0_id, det1_id in itertools.combinations(group, 2):
            det0 = detections[det0_id]
            det1 = detections[det1_id]
            if det0.rxid > det1.rxid:
                det0, det1 = det1, det0
            rxpair_detections[(det0.rxid, det1.rxid)].append((det0, det1))

    timestamps = {}
    for pair, detections in rxpair_detections.iteritems():
        detections.sort(cmp=lambda x, y: x[0].timestamp < y[0].timestamp)
        timestamps[pair] = [d[0].timestamp for d in detections]

    def extract(rxid0, rxid1, timestamp_start, timestamp_stop):
        assert rxid0 < rxid1
        pair = (rxid0, rxid1)
        left = bisect_left(timestamps[pair], timestamp_start)
        right = bisect_right(timestamps[pair], timestamp_stop)
        detection_pairs = rxpair_detections[pair][left:right]

        if len(detection_pairs) > 1:
            sdoa = np.array([d[0].soa - d[1].soa for d in detection_pairs])
            is_outlier = stat_tools.is_outlier(sdoa)
            detection_pairs = list(itertools.compress(detection_pairs,
                                                      ~is_outlier))

        return detection_pairs

    return extract
store_csv.py 文件源码 项目:TFG 作者: BraulioV 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def split_in_pairs(split_list):
    """
    Input: ["Element1", "Element2", "Element3", "Element4"]
    Output: (["Element1", "Element3"], ["Element2", "Element4"])
    """
    def compress_elements(split_list, elements, times):
        return compress(split_list, chain.from_iterable(repeat(elements, times)))

    n_times = len(split_list) // 2
    return compress_elements(split_list, [1,0], n_times), compress_elements(split_list, [0,1], n_times)


# separate Class names and file names in two different lists
timetable.py 文件源码 项目:TFG 作者: BraulioV 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def assign_lab_hours(self):
        for group, it in zip(self.groups.values(), range(len(self.groups.items()))):
            # get subjects and its practical hours
            subject_list = self.__get_subj_list__(group)
            shuffle(subject_list)

            subject_list = self.recalculate_subjects(subject_list, group.numsubgroups)

            # compute range of shift
            if group.shift == 'M':
                start_range, end_range = 0, self.time_table.shape[1] // 2
            else:
                start_range, end_range = self.time_table.shape[1] // 2, self.time_table.shape[1]

            # compute the index
            subjects_index = [i for i in range(group.numsubgroups)]

            days_week = self.structure.shape[2]
            # compute the total lab hours, for each subject
            hours = list(map(lambda x: x*group.numsubgroups, [subject.practical_hours if type(subject) is not tuple
                     else subject[0].practical_hours + subject[1].practical_hours
                     for subject in subject_list]))
            # start loop
            for hour in range(start_range, end_range, 2):
                for day in range(days_week):
                    # if the cell is a lab cell, let's fill it
                    if (self.structure[it, hour, day] == 'L' or self.structure[it, hour, day] == 'E')\
                            and sum(compress(hours, map(lambda x: x in subjects_index, range(len(hours))))) > 0:
                        cell1, cell2 = self.compute_best_cells(group, subject_list, subjects_index, hours, hour, day)
                        self.time_table[it, hour, day] = cell1
                        self.time_table[it, hour + 1, day] = cell2

                        subjects_index = list(map(lambda x: (x + 1) % len(subject_list), subjects_index))
                if sum(hours) == 0: break
active_stock.py 文件源码 项目:bigfishtrader 作者: xingetouzi 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def can_trade(self, *codes):
        if len(codes):
            return list(compress(codes, [self.cache.client.sismember('index', code) for code in codes]))
        else:
            return list(self.cache.client.smembers('index'))
prediction_data.py 文件源码 项目:ML-Predictions 作者: ltfschoen 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def setup_training_columns(self):
        """ Return array of Training Columns.

        When "training_columns" array is empty it means return all columns except the "target_column"
        """

        training_columns = self.prediction_config.DATASET_LOCATION[self.dataset_choice]["training_columns"]

        if not training_columns and not isinstance(self.df_listings, type(None)):
            features = self.df_listings.columns.tolist()

            # Remove "target_column" (if already in the dataset, as may not yet have been generated by Clustering)
            if self.target_column in features:
                features.remove(self.target_column)

            # Remove columns containing Excluded full text
            for index, column_name in enumerate(self.prediction_config.EXCLUDE_TRAINING_COLUMNS_WITH_FULL_TEXT):
                if column_name in features:
                    features.remove(column_name)

            # Retain columns that do not contain Excluded partial text
            is_features_to_retain = [False] * len(features)
            for idx_outer, column_partial_name in enumerate(self.prediction_config.EXCLUDE_TRAINING_COLUMNS_WITH_PARTIAL_TEXT):
                for idx_inner, column_name in enumerate(features):
                    if column_partial_name not in column_name:
                        is_features_to_retain[idx_inner] = True
            filtered = list(compress(features, is_features_to_retain))
            return filtered
        else:
            return training_columns
test_itertools.py 文件源码 项目:Mac-Python-3.X 作者: L1nwatch 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def data_deal_function():
    # compress()????????????.????????????????,??????????????.
    # ????????????????True?????
    # ??,????????????.???????Python??????????,??????
    # itertools.filterfalse()???????????,??????.???????????False???True???
    for item in it.compress([1, 2, 3, 4, 5], [False, True, False, 0, 1]):
        print(item)

    # dropwhile()?takewhile()?????????????.??????????????????????????,???????????????.
    # dropwhile()??????????????????????False.?takewhile()??????????False
    # ??,????????????????????????(??dropwhile????,????????????,?takewhile?????????)
    def __single_digit(n):
        return n < 10

    for n in it.dropwhile(__single_digit, range(20)):
        print(n, end=" ")
    for n in it.takewhile(__single_digit, range(20)):
        print(n, end=" ")

    # accumulate()?????????????????????????????(??????,????????????).??,???????
    # [1,2,3,4]??,???result1?1.?????????result1?2??result2,????.????????functools???reduce()????
    for n in it.accumulate([1, 2, 3, 4, ]):
        print(n, end=" ")
pipeline.py 文件源码 项目:eclipse2017 作者: google 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def assemble(self, fnames):
        """
        Stitches together movies from an ordered list of filenames.
        Downloads new files from GCS then feeds files to ffmpeg.
        Returns list of files sucessfully stitched into movie & calls stats func
        """

        # Get files from GCS
        pool = Pool(min(len(fnames), constants.MOVIE_DAEMON_MAX_PROCESSES))
        results = pool.map(get_file_from_gcs, fnames)
        pool.terminate()

        # Start ffmpeg subprocess
        ffmpeg_cmd = ["ffmpeg","-y",        # Overwrite exsisting movie file
                    "-f", "image2pipe",
                    "-framerate", constants.MOVIE_FRAMERATE,
                    "-vcodec","mjpeg",
                    "-i", "-",              # Input pipe from stdin
                    "-vf", "scale=1024:-1",
                    "-loglevel", "panic",
                    "-vcodec", "libx264",
                    constants.MOVIE_FPATH]

        ffmpeg_ps = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)


        fnames = list(compress(fnames, results))
        files_read = self._pipe_to_ffmpeg(ffmpeg_ps, fnames)

        if files_read > constants.MOVIE_MIN_FRAMES:
            ffmpeg_ps.stdin.close()
            ffmpeg_ps.wait()
        else:
            ffmpeg_ps.kill()

        return fnames
dataframe.py 文件源码 项目:raccoon 作者: rsheftel 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def select_index(self, compare, result='boolean'):
        """
        Finds the elements in the index that match the compare parameter and returns either a list of the values that
        match, of a boolean list the length of the index with True to each index that matches. If the indexes are
        tuples then the compare is a tuple where None in any field of the tuple will be treated as "*" and match all
        values.

        :param compare: value to compare as a singleton or tuple
        :param result: 'boolean' = returns a list of booleans, 'value' = returns a list of index values that match
        :return: list of booleans or values
        """
        if isinstance(compare, tuple):
            # this crazy list comprehension will match all the tuples in the list with None being an * wildcard
            booleans = [all([(compare[i] == w if compare[i] is not None else True) for i, w in enumerate(v)])
                        for x, v in enumerate(self._index)]
        else:
            booleans = [False] * len(self._index)
            if self._sort:
                booleans[sorted_index(self._index, compare)] = True
            else:
                booleans[self._index.index(compare)] = True
        if result == 'boolean':
            return booleans
        elif result == 'value':
            return list(compress(self._index, booleans))
        else:
            raise ValueError('only valid values for result parameter are: boolean or value.')
dataframe.py 文件源码 项目:raccoon 作者: rsheftel 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def get_rows(self, indexes, column, as_list=False):
        """
        For a list of indexes and a single column name return the values of the indexes in that column.

        :param indexes: either a list of index values or a list of booleans with same length as all indexes
        :param column: single column name
        :param as_list: if True return a list, if False return DataFrame
        :return: DataFrame is as_list if False, a list if as_list is True
        """
        c = self._columns.index(column)
        if all([isinstance(i, bool) for i in indexes]):  # boolean list
            if len(indexes) != len(self._index):
                raise ValueError('boolean index list must be same size of existing index')
            if all(indexes):  # the entire column
                data = self._data[c]
                index = self._index
            else:
                data = list(compress(self._data[c], indexes))
                index = list(compress(self._index, indexes))
        else:  # index values list
            locations = [sorted_index(self._index, x) for x in indexes] if self._sort \
                else [self._index.index(x) for x in indexes]
            data = [self._data[c][i] for i in locations]
            index = [self._index[i] for i in locations]
        return data if as_list else DataFrame(data={column: data}, index=index, index_name=self._index_name,
                                              sort=self._sort)
dataframe.py 文件源码 项目:raccoon 作者: rsheftel 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_matrix(self, indexes, columns):
        """
        For a list of indexes and list of columns return a DataFrame of the values.

        :param indexes: either a list of index values or a list of booleans with same length as all indexes
        :param columns: list of column names
        :return: DataFrame
        """
        if all([isinstance(i, bool) for i in indexes]):  # boolean list
            is_bool_indexes = True
            if len(indexes) != len(self._index):
                raise ValueError('boolean index list must be same size of existing index')
            bool_indexes = indexes
            indexes = list(compress(self._index, indexes))
        else:
            is_bool_indexes = False
            locations = [sorted_index(self._index, x) for x in indexes] if self._sort \
                else [self._index.index(x) for x in indexes]

        if all([isinstance(i, bool) for i in columns]):  # boolean list
            if len(columns) != len(self._columns):
                raise ValueError('boolean column list must be same size of existing columns')
            columns = list(compress(self._columns, columns))

        col_locations = [self._columns.index(x) for x in columns]
        data_dict = dict()

        for c in col_locations:
            data_dict[self._columns[c]] = list(compress(self._data[c], bool_indexes)) if is_bool_indexes \
                else [self._data[c][i] for i in locations]

        return DataFrame(data=data_dict, index=indexes, columns=columns, index_name=self._index_name,
                         sort=self._sort)
dataframe.py 文件源码 项目:raccoon 作者: rsheftel 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_location(self, location, columns=None, as_dict=False, index=True):
        """
        For an index location and list of columns return a DataFrame of the values. This is optimized for speed because
        it does not need to lookup the index location with a search. Also can accept relative indexing from the end of
        the DataFrame in standard python notation [-3, -2, -1]

        :param location: index location in standard python form of positive or negative number
        :param columns: list of columns, or None to include all columns
        :param as_dict: if True then return a dictionary
        :param index: if True then include the index in the dictionary if as_dict=True
        :return: DataFrame or dictionary
        """
        if columns is None:
            columns = self._columns
        elif all([isinstance(i, bool) for i in columns]):
            if len(columns) != len(self._columns):
                raise ValueError('boolean column list must be same size of existing columns')
            columns = list(compress(self._columns, columns))
        data = dict()
        for column in columns:
            c = self._columns.index(column)
            data[column] = self._data[c][location]
        index_value = self._index[location]
        if as_dict:
            if index:
                data[self._index_name] = index_value
            return data
        else:
            data = {k: [data[k]] for k in data}  # this makes the dict items lists
            return DataFrame(data=data, index=[index_value], columns=columns, index_name=self._index_name,
                             sort=self._sort)
dataframe.py 文件源码 项目:raccoon 作者: rsheftel 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_slice(self, start_index=None, stop_index=None, columns=None, as_dict=False):
        """
        For sorted DataFrames will return either a DataFrame or dict of all of the rows where the index is greater than
        or equal to the start_index if provided and less than or equal to the stop_index if provided. If either the
        start or stop index is None then will include from the first or last element, similar to standard python
        slide of [:5] or [:5]. Both end points are considered inclusive.

        :param start_index: lowest index value to include, or None to start from the first row
        :param stop_index: highest index value to include, or None to end at the last row
        :param columns: list of column names to include, or None for all columns
        :param as_dict: if True then return a tuple of (list of index, dict of column names: list data values)
        :return: DataFrame or tuple
        """
        if not self._sort:
            raise RuntimeError('Can only use get_slice on sorted DataFrames')

        if columns is None:
            columns = self._columns
        elif all([isinstance(i, bool) for i in columns]):
            if len(columns) != len(self._columns):
                raise ValueError('boolean column list must be same size of existing columns')
            columns = list(compress(self._columns, columns))

        start_location = bisect_left(self._index, start_index) if start_index is not None else None
        stop_location = bisect_right(self._index, stop_index) if stop_index is not None else None

        index = self._index[start_location:stop_location]
        data = dict()
        for column in columns:
            c = self._columns.index(column)
            data[column] = self._data[c][start_location:stop_location]

        if as_dict:
            return index, data
        else:
            data = data if data else None  # if the dict is empty, convert to None
            return DataFrame(data=data, index=index, columns=columns, index_name=self._index_name, sort=self._sort,
                             use_blist=self._blist)


问题


面经


文章

微信
公众号

扫码关注公众号