python类fromiter()的实例源码-面圈网

plyfile.py 文件源码项目：pointnet 作者: charlesq34 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def make2d(array, cols=None, dtype=None):
    '''
    Make a 2D array from an array of arrays.  The `cols' and `dtype'
    arguments can be omitted if the array is not empty.

    '''
    if (cols is None or dtype is None) and not len(array):
        raise RuntimeError("cols and dtype must be specified for empty "
                           "array")

    if cols is None:
        cols = len(array[0])

    if dtype is None:
        dtype = array[0].dtype

    return _np.fromiter(array, [('_', dtype, (cols,))],
                        count=len(array))['_']

predictors.py 文件源码项目：triage 作者: dssg 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def _load_saved_predictions(self, existing_predictions, matrix_store):
        index = matrix_store.matrix.index
        score_lookup = {}
        for prediction in existing_predictions:
            score_lookup[(
                prediction.entity_id,
                prediction.as_of_date.date().isoformat()
            )] = prediction.score
        if 'as_of_date' in index.names:
            score_iterator = (
                score_lookup[(
                    entity_id,
                    datetime.strptime(dt, self.expected_matrix_ts_format).date().isoformat()
                )]
                for (entity_id, dt) in index
            )
        else:
            as_of_date = matrix_store.metadata['end_time'].date().isoformat()
            score_iterator = (score_lookup[(row, as_of_date)] for row in index)
        return numpy.fromiter(score_iterator, float)

dict.py 文件源码项目：ParlAI 作者: facebookresearch 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def txt2vec(self, text, vec_type=list):
        """Converts a string to a vector (list of ints).

        First runs a sentence tokenizer, then a word tokenizer.

        ``vec_type`` is the type of the returned vector if the input is a string.
        """
        if vec_type == np.ndarray:
            res = np.fromiter(
                (self[token] for token in self.tokenize(str(text))),
                np.int
            )
        elif vec_type == list or vec_type == tuple or vec_type == set:
            res = vec_type((self[token] for token in self.tokenize(str(text))))
        else:
            raise RuntimeError('Type {} not supported by dict'.format(vec_type))
        assert type(res) == vec_type
        return res

hmath.py 文件源码项目：luckyhorse 作者: alexmbird 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def weighted_avg_and_std(values, weights=None):
  '''
  Return the weighted average and standard deviation.

  `values`  - np.ndarray of values to average.
  `weights` - Optional np.ndarray of weights.  Otherwise all values are assumed
              equally weighted.

  Note the helpful np.fromiter() function, helpful building arrays.
  '''
  if not isinstance(values, np.ndarray):
    raise TypeError("Values must be an np.array")
  if len(values) == 0:
    raise ValueError("Can't calculate with no values")
  if weights is not None:
    if not isinstance(weights, np.ndarray):
      raise TypeError("Weights must be None or an np.array")
    if len(values) != len(weights):
      raise ValueError("Length of values and weights differ")

  average = np.average(values, weights=weights)
  variance = np.average((values-average)**2, weights=weights)  # Fast and numerically precise
  return (average, math.sqrt(variance))

isp_data_pollution.py 文件源码项目：isp-data-pollution 作者: essandess 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def draw_links(self,n=1,log_sampling=False):
        """ Draw multiple random links. """
        urls = []
        domain_array = np.array([dmn for dmn in self.domain_links])
        domain_count = np.array([len(self.domain_links[domain_array[k]]) for k in range(domain_array.shape[0])])
        p = np.array([np.float(c) for c in domain_count])
        count_total = p.sum()
        if log_sampling:  # log-sampling [log(x+1)] to bias lower count domains
            p = np.fromiter((np.log1p(x) for x in p), dtype=p.dtype)
        if count_total > 0:
            p = p/p.sum()
            cnts = npr.multinomial(n, pvals=p)
            if n > 1:
                for k in range(cnts.shape[0]):
                    domain = domain_array[k]
                    cnt = min(cnts[k],domain_count[k])
                    for url in random.sample(self.domain_links[domain],cnt):
                        urls.append(url)
            else:
                k = int(np.nonzero(cnts)[0])
                domain = domain_array[k]
                url = random.sample(self.domain_links[domain],1)[0]
                urls.append(url)
        return urls

main.py 文件源码项目：senti 作者: stevenxxiu 项目源码文件源码阅读 60 收藏 0 点赞 0 评论 0

def __init__(self):
        super().__init__()
        stack = self._stack
        # classes
        self.classes_ = [0, 1, 2]
        self.average_classes = [0, 2]
        # data
        self.data_dir = 'data/twitter/semeval_2016_submit'
        with temp_chdir(self.data_dir):
            self.train_objs = JSONDecoder(stack.enter_context(open('train.json')))
            self.train_docs = FieldExtractor(self.train_objs, 'text')
            self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'label'), 'int32')
            distant_srs = [stack.enter_context(open('../emote/class_{}.txt'.format(i), encoding='utf-8')) for i in [0, 2]]
            self.distant_docs = BalancedSlice(distant_srs)
            self.distant_labels = BalancedSlice((RepeatSr(0), RepeatSr(2)))
            unsup_sr = stack.enter_context(open('../unsup/all.txt', encoding='utf-8'))
            self.unsup_docs = BalancedSlice([unsup_sr])
            self.val_objs = JSONDecoder(stack.enter_context(open('val.json')))
            self.val_docs = FieldExtractor(self.val_objs, 'text')
            self.val_labels = FieldExtractor(self.val_objs, 'label')
            self.test_objs = JSONDecoder(stack.enter_context(open('test.json')))
            self.test_docs = FieldExtractor(self.test_objs, 'text')
            self.test_labels = FieldExtractor(self.test_objs, 'label')

main.py 文件源码项目：senti 作者: stevenxxiu 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def __init__(self):
        super().__init__()
        stack = self._stack
        # classes
        self.classes_ = [0, 1, 2]
        self.average_classes = [0, 2]
        # data
        self.data_dir = 'data/imdb'
        with temp_chdir(self.data_dir):
            self.train_objs = JSONDecoder(stack.enter_context(open('train.json')))
            self.train_docs = FieldExtractor(self.train_objs, 'text')
            self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'label'), 'int32')
            unsup_sr = stack.enter_context(open('unsup.json'))
            self.unsup_docs = BalancedSlice([FieldExtractor(unsup_sr, 'text')])
            self.val_objs = JSONDecoder(stack.enter_context(open('val.json')))
            self.val_docs = FieldExtractor(self.val_objs, 'text')
            self.val_labels = FieldExtractor(self.val_objs, 'label')
            self.test_objs = JSONDecoder(stack.enter_context(open('test.json')))
            self.test_docs = FieldExtractor(self.test_objs, 'text')
            self.test_labels = FieldExtractor(self.test_objs, 'label')

main.py 文件源码项目：senti 作者: stevenxxiu 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def __init__(self):
        super().__init__()
        stack = self._stack
        # classes
        self.classes_ = [1, 2, 3, 4, 5]
        self.average_classes = [1, 2, 3, 4, 5]
        # data
        self.data_dir = 'data/yelp'
        with temp_chdir(self.data_dir):
            self.train_objs = JSONDecoder(stack.enter_context(open('train.json')))
            self.train_docs = FieldExtractor(self.train_objs, 'text')
            self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'stars'), 'int32')
            self.val_objs = JSONDecoder(stack.enter_context(open('val.json')))
            self.val_docs = FieldExtractor(self.val_objs, 'text')
            self.val_labels = FieldExtractor(self.val_objs, 'stars')
            self.test_objs = JSONDecoder(stack.enter_context(open('test.json')))
            self.test_docs = FieldExtractor(self.test_objs, 'text')
            self.test_labels = FieldExtractor(self.test_objs, 'stars')

linear_regression.py 文件源码项目：lazyprogrammer 作者: inhwane 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test():
    # create a bunch of random data for X-axis
    # uniformly generate 2-D vectors in [-50, 50]
    X = 100*np.random.random([NUM_SAMPLES, 2]) - 50

    # create a bunch of random data for Y-axis
    # let's say y = 5x1 - 2x2 + 3 + noise
    # true beta is then: [3, 5, -2]
    Y = np.fromiter((5*x1 - 2*x2 + 3 for x1, x2 in X), np.float, count=NUM_SAMPLES)
    Y += np.random.standard_normal(NUM_SAMPLES)

    # fit
    lr = LinearRegression()
    lr.fit(X,Y)
    print "beta estimated: %s" % lr.beta

    r2 = lr.score(X,Y)
    print "R-square is: %s" % r2

    # predict
    x = (100, 100)
    h = lr.predict(np.array([x]))
    y = 5*x[0] - 2*x[1] + 3
    print "Extrapolated prediction: %.2f\nActual: %.2f" % (h, y)

space_io.py 文件源码项目：semspaces 作者: pmandera 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def read_vectors(fin, dtype='float64', delim=' '):
        """Return a list with tuples (word, word_vector)."""
        reader = csv.reader(fin, delimiter=delim, quoting=csv.QUOTE_NONE)
        word_vectors = []

        ncol = None

        for row in reader:
            if ncol is None:
                if len(row) == 2:
                    ncol = int(row[1])
                    continue
                else:
                    ncol = len(row) - 1

            word = unicode(row[0], 'utf-8', errors='replace')

            word_vector = np.fromiter(
                [float(v) for v in row[1: ncol + 1]],
                dtype=dtype, count=ncol)

            word_vectors.append((word, word_vector))

        return word_vectors

processors.py 文件源码项目：django-watermark-images 作者: abarto 项目源码文件源码阅读 51 收藏 0 点赞 0 评论 0

def lsb_encode(data, image):
    bytes_io = BytesIO()
    dump(data, file=bytes_io)
    data_bytes = bytes_io.getvalue()
    data_bytes_array = np.fromiter(data_bytes, dtype=np.uint8)
    data_bits_list = np.unpackbits(data_bytes_array).tolist()
    data_bits_list += [0] * (image.size[0] * image.size[1] - len(data_bits_list))
    watermark = Image.frombytes(data=bytes(data_bits_list), size=image.size, mode='L')
    red, green, blue = image.split()
    watermarked_red = ImageMath.eval("convert(a&0xFE|b&0x1,'L')", a=red, b=watermark)
    watermarked_image = Image.merge("RGB", (watermarked_red, green, blue))
    return watermarked_image

groupby.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
    """
    reconstruct labels from observed group ids

    Parameters
    ----------
    xnull: boolean,
        if nulls are excluded; i.e. -1 labels are passed through
    """
    from pandas.hashtable import unique_label_indices

    if not xnull:
        lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8')
        shape = np.asarray(shape, dtype='i8') + lift

    if not _int64_overflow_possible(shape):
        # obs ids are deconstructable! take the fast route!
        out = decons_group_index(obs_ids, shape)
        return out if xnull or not lift.any() \
            else [x - y for x, y in zip(out, lift)]

    i = unique_label_indices(comp_ids)
    i8copy = lambda a: a.astype('i8', subok=False, copy=True)
    return [i8copy(lab[i]) for lab in labels]

util.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def cartesian_product(X):
    '''
    Numpy version of itertools.product or pandas.compat.product.
    Sometimes faster (for large inputs)...

    Examples
    --------
    >>> cartesian_product([list('ABC'), [1, 2]])
    [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'),
    array([1, 2, 1, 2, 1, 2])]

    '''

    lenX = np.fromiter((len(x) for x in X), dtype=int)
    cumprodX = np.cumproduct(lenX)

    a = np.roll(cumprodX, 1)
    a[0] = 1

    b = cumprodX[-1] / cumprodX

    return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]),
                    np.product(a[i]))
            for i, x in enumerate(X)]

Variogram.py 文件源码项目：scikit-gstat 作者: mmaelicke 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def r(self):
        """
        Pearson correlation of the fitted Variogram

        :return:
        """
        # get the experimental and theoretical variogram and cacluate means
        experimental, model = self.__model_deviations()
        mx = np.nanmean(experimental)
        my = np.nanmean(model)

        # claculate the single pearson correlation terms
        term1 = np.nansum(np.fromiter(map(lambda x, y: (x-mx) * (y-my), experimental, model), np.float))

        t2x = np.nansum(np.fromiter(map(lambda x: (x-mx)**2, experimental), np.float))
        t2y = np.nansum(np.fromiter(map(lambda y: (y-my)**2, model), np.float))

        return term1 / (np.sqrt(t2x * t2y))

datasource.py 文件源码项目：coordinates 作者: markovmodel 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def trajectory_lengths(self, stride=1, skip=0):
        r""" Returns the length of each trajectory.

        Parameters
        ----------
        stride : int
            return value is the number of frames of the trajectories when
            running through them with a step size of `stride`.
        skip : int
            skip parameter

        Returns
        -------
        array(dtype=int) : containing length of each trajectory
        """
        n = self.number_of_trajectories()
        if isinstance(stride, np.ndarray):
            return np.fromiter((self.trajectory_length(itraj, stride)
                                for itraj in range(n)),
                               dtype=int, count=n)
        else:
            return np.fromiter(((l - skip - 1) // stride + 1 for l in self._lengths),
                               dtype=int, count=n)

data_sources_test.py 文件源码项目：speech_ml 作者: coopie 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_ttv_array_like_data_source(self):
        dummy_data_source = DummyDataSource()
        subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
        ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))

        array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv)

        self.assertEqual(len(array_ds), 3)

        all_values = np.fromiter((x for x in array_ds[:]), dtype='int16')

        self.assertTrue(
            np.all(
                np.in1d(
                    all_values,
                    np.array([1, 2, 3])
                )
            )
        )

langid.py 文件源码项目：CVProject 作者: hieuxinhe94 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def set_languages(self, langs=None):
    logger.debug("restricting languages to: %s", langs)

    # Unpack the full original model. This is needed in case the language set
    # has been previously trimmed, and the new set is not a subset of the current
    # set.
    nb_ptc, nb_pc, nb_classes = self.__full_model

    if langs is None:
      self.nb_classes = nb_classes 
      self.nb_ptc = nb_ptc
      self.nb_pc = nb_pc

    else:
      # We were passed a restricted set of languages. Trim the arrays accordingly
      # to speed up processing.
      for lang in langs:
        if lang not in nb_classes:
          raise ValueError("Unknown language code %s" % lang)

      subset_mask = np.fromiter((l in langs for l in nb_classes), dtype=bool)
      self.nb_classes = [ c for c in nb_classes if c in langs ]
      self.nb_ptc = nb_ptc[:,subset_mask]
      self.nb_pc = nb_pc[subset_mask]

generate_stats.py 文件源码项目：ekphrasis 作者: cbaziotis 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def write_stats_to_file(filename, counts, mincount):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename + ".txt", 'w', encoding="utf-8") as f:
        if args.perc == 0:
            percentile = 0
        else:
            percentile = numpy.percentile(numpy.fromiter(counts.values(), numpy.int32), args.perc)
        threshold = max(percentile, mincount)

        for k, v in counts.items():

            if v >= threshold:
                entry = k.split(SEPARATOR)
                entry.append(str(v))
                f.write('\t'.join(entry) + '\n')

    if args.pickle:
        with open(filename + ".pickle", 'wb') as f:
            pickle.dump(counts, f)

sim_benchmark.py 文件源码项目：VecShare 作者: JaredFern 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def _eval_all(emb_simset):
    inp_emb = {}
    for wordvec in emb_simset.iterrows():
        word, vec = wordvec[1][0], wordvec[1][1:].tolist()
        vec = np.fromiter(map(float, vec[1:]), dtype = np.float32)
        norm = np.linalg.norm(vec)
        inp_emb[word] = vec/norm if (norm != 0) else [vec]
    score_dict = {}
    score_dict['score'] = 0
    for root,dirs,files in os.walk('/home/jared/vecshare/Test_Input'):
        files = [testfile for testfile in files if testfile[0]!='.']
        for testfile in files:
            f_path = '/home/jared/vecshare/Test_Input/'+testfile
            score_dict[testfile[:-4].strip().lower().replace(" ", "_").replace("-", "_")] = _eval_sim(f_path, inp_emb)
        if  testfile != 'mc-30.csv':
                score_dict['score'] += _eval_sim(f_path, inp_emb)/(len(files)-1)
    return score_dict

astro_tools.py 文件源码项目：scikit-discovery 作者: MITHaystack 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def cdf_dlf(x, A, m1, a1, m2, a2, start=-26):
    ''' 
    Cumulative  Schechter function. Second LF is set to be 2*A of first LF.

    @param x: magnitude
    @param A: Scale factor
    @param m1: Knee of distribution 1
    @param a1: Faint-end turnover of first lf
    @param m2: Knee of distribution 2
    @param a2: Faint-end turnover of second lf
    @param start: Brightest magnitude

    @return Probability that galaxy has a magnitude greater than x
    '''
    def integrate(in_x):
        return quad(dlf, start,in_x,args=(A,m1,a1,m2,a2))[0]

    if np.isscalar(x):
        x = np.array([x])

    return np.fromiter(map(integrate,x),np.float,count=len(x))

astro_tools.py 文件源码项目：scikit-discovery 作者: MITHaystack 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def inv_cdf_dlf(p, A, m1, a1, m2, a2, start=-26, end=-15):

    ''' 
    Inverse Cumulative Schechter function. Second LF is set to be 2*A of first LF.

    @param p: probability
    @param A: Scale factor
    @param m1: Knee of distribution 1
    @param a1: Faint-end turnover of first lf
    @param m2: Knee of distribution 2
    @param a2: Faint-end turnover of second lf
    @param start: Brightest magnitude
    @param end: Faintest possible magnitude

    @return Magnitude associated with cdf probability p
    '''
    def get_root(p):
        return root(lambda x: cdf_dlf(x,A,m1,a1,m2,a2,start)-p, (start + end)/2).x[0]

    if np.isscalar(p):
        return get_root(p)
    else:
        return np.fromiter(map(get_root,p),np.float,count=len(p))

dzh.py 文件源码项目：kquant_data 作者: wukan1986 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def _read_symbol(self):
        dividends = []

        rawsymbol = self.f.read(16)
        if rawsymbol == b'':
            raise EOFError

        symbol = unpack('16s', rawsymbol)[0].replace(b'\x00', b'')

        rawdate = self.f.read(4)

        dt = np.dtype([('time', np.int32),
                       ('split', np.float32),
                       ('purchase', np.float32),
                       ('purchase_price', np.float32),
                       ('dividend', np.float32)])
        while (rawdate) != b"\xff" * 4:
            dividend = np.frombuffer(rawdate + self.f.read(16), dtype=dt)
            dividends.append(dividend)

            rawdate = self.f.read(4)
            if rawdate == b'':
                break

        return (symbol, np.fromiter(dividends, dtype=dt))

points.py 文件源码项目：demosys-py 作者: Contraz 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def points_random_3d(count, range_x=(-10.0, 10.0), range_y=(-10.0, 10.0), range_z=(-10.0, 10.0), seed=None):
    """
    Generates random positions

    :param count: Number of points
    :param range_x: min-max range for x axis
    :param range_y: min-max range for y axis
    :param range_z: min-max range for z axis
    :param seed: The random seed to be used
    """
    random.seed(seed)

    def gen():
        for i in range(count):
            yield random.uniform(*range_x)
            yield random.uniform(*range_y)
            yield random.uniform(*range_z)

    data = numpy.fromiter(gen(), count=count * 3, dtype=numpy.float32)
    pos = VBO(data)
    vao = VAO("geometry:points_random_3d", mode=GL.GL_POINTS)
    vao.add_array_buffer(GL.GL_FLOAT, pos)
    vao.map_buffer(pos, "in_position", 3)
    vao.build()
    return vao

ugoImage.py 文件源码项目：sudomemo-utils 作者: Sudomemo 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def parseNpf(self, buffer, imageWidth, imageHeight):
        # Read the header
        sectionLengths = self._readUgarHeader(buffer)
        # Read the palette data (section number 1)
        paletteData = np.frombuffer(buffer.read(roundToPower(sectionLengths[0])), dtype=np.uint16)
        # Read the image data (section number 2)
        imageData = np.frombuffer(buffer.read(sectionLengths[1]), dtype=np.uint8)
        # NPF image data uses 1 byte per 2 pixels, so we need to split that byte into two
        imageData = np.stack((np.bitwise_and(imageData, 0x0f), np.bitwise_and(imageData >> 4, 0x0f)), axis=-1).flatten()
        # Unpack palette colors
        palette = unpackColors(paletteData, useAlpha=False)
        # Convert each pixel from a palette index to full color
        pixels = np.fromiter((palette[i] if i > 0 else 0 for i in imageData), dtype=">u4")
        # Clip the image data and create a Pillow image from it
        return Image.fromarray(self._clipImageData(pixels, (imageWidth, imageHeight)), mode="RGBA")

    # Write the image as an npf to buffer

test_gradient_boosting.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_staged_predict():
    # Test whether staged decision function eventually gives
    # the same prediction.
    X, y = datasets.make_friedman1(n_samples=1200,
                                   random_state=1, noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test = X[200:]
    clf = GradientBoostingRegressor()
    # test raise ValueError if not fitted
    assert_raises(ValueError, lambda X: np.fromiter(
        clf.staged_predict(X), dtype=np.float64), X_test)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # test if prediction for last stage equals ``predict``
    for y in clf.staged_predict(X_test):
        assert_equal(y.shape, y_pred.shape)

    assert_array_equal(y_pred, y)

freq_conversions.py 文件源码项目：Thymus-timeseries 作者: sidorof 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def _filter_dates(dates, freq, kwargs):
    """
    This function filters dates to indicate end of periods for ordinals.
    """

    indicator = DATETIME_DICT[freq]

    if isinstance(indicator, str):
        # no special behavior
        indicators = np.fromiter(
            [date.__getattribute__(indicator) for date in dates],
            dtype=np.int32)

        return np.argwhere(indicators[1:] - indicators[:-1] > 0)

    else:
        # apply a function
        indicators = np.fromiter(
            [indicator(date, kwargs) for date in dates], dtype=np.int32)

        return np.argwhere(indicators[1:] - indicators[:-1] > 0)

market-memprof.py 文件源码项目：DBAdapter 作者: ContinuumIO 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def write_tables():
    import tables

    dtype = np.dtype("S7,f4,f4,f4,f4,i4")
    t0 = time()
    sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
                          for i in xrange(N)), dtype, count=N)
    t1 = time() - t0
    print "Created sarray with %d rows in %.3fs" % (N, t1)

    t0 = time()
    h5f = tables.openFile("market.h5", "w")
    table = h5f.createTable(h5f.root, "market", dtype)
    table.append(sarray)
    h5f.close()
    t1 = time() - t0
    print "[PyTables] Stored %d rows in %.3fs" % (N, t1)

market-memprof.py 文件源码项目：DBAdapter 作者: ContinuumIO 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def write_tables2():
    import tables

    dtype = np.dtype("S7,f4,f4,f4,f4,i4")
    # t0 = time()
    # sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
    #                       for i in xrange(N)), dtype, count=N)
    # t1 = time() - t0
    # print "Created sarray with %d rows in %.3fs" % (N, t1)

    t0 = time()
    h5f = tables.openFile("market.h5", "w")
    table = h5f.createTable(h5f.root, "market", dtype)
    count = 10000
    for j in xrange(count, N, count):
        sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
                              for i in xrange(j)), dtype)
        table.append(sarray)
    h5f.close()
    t1 = time() - t0
    print "[PyTables] Stored %d rows in %.3fs" % (N, t1)

__init__.py 文件源码项目：cellranger 作者: 10XGenomics 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def write_umi_info(pickles, filename):
    """" Write an H5 with (bc, chain, read_count) tuples """
    filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)

    with tables.open_file(filename, 'w', filters=filters) as h5:
        umi_info = vdj_umi_info.create_arrays(h5)

        bc_to_int = {}
        chain_to_int = {}

        for pickle in pickles:
            bc_chain_umi_counts = cPickle.load(open(pickle))

            for bc, chain_umis in bc_chain_umi_counts.iteritems():
                for chain, umi_counts in chain_umis.iteritems():
                    n_umis = len(umi_counts)

                    if chain != cr_constants.MULTI_REFS_PREFIX and n_umis > 0:
                        if bc not in bc_to_int:
                            bc_to_int[bc] = len(bc_to_int)
                        if chain not in chain_to_int:
                            chain_to_int[chain] = len(chain_to_int)

                        umi_info['barcode_idx'].append(np.full(n_umis, bc_to_int[bc],
                                                               dtype=vdj_umi_info.get_dtype('barcode_idx')))
                        umi_info['chain_idx'].append(np.full(n_umis, chain_to_int[chain],
                                                             dtype=vdj_umi_info.get_dtype('chain_idx')))
                        umi_info['reads'].append(np.fromiter(umi_counts.itervalues(),
                                                             vdj_umi_info.get_dtype('reads'), count=n_umis))

        vdj_umi_info.set_ref_column(h5, 'barcodes', np.array(sorted(bc_to_int.keys(), key=bc_to_int.get)))
        vdj_umi_info.set_ref_column(h5, 'chains', np.array(sorted(chain_to_int.keys(), key=chain_to_int.get)))

find.py 文件源码项目：circletracking 作者: caspervdw 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def where_close(pos, separation, intensity=None):
    """ Returns indices of features that are closer than separation from other
    features. When intensity is given, the one with the lowest intensity is
    returned: else the most topleft is returned (to avoid randomness)

    To be implemented in trackpy v0.4"""
    if len(pos) == 0:
        return []
    separation = validate_tuple(separation, pos.shape[1])
    if any([s == 0 for s in separation]):
        return []
    # Rescale positions, so that pairs are identified below a distance
    # of 1.
    pos_rescaled = pos / separation
    duplicates = cKDTree(pos_rescaled, 30).query_pairs(1 - 1e-7)
    if len(duplicates) == 0:
        return []
    index_0 = np.fromiter((x[0] for x in duplicates), dtype=int)
    index_1 = np.fromiter((x[1] for x in duplicates), dtype=int)
    if intensity is None:
        to_drop = np.where(np.sum(pos_rescaled[index_0], 1) >
                           np.sum(pos_rescaled[index_1], 1),
                           index_1, index_0)
    else:
        intensity_0 = intensity[index_0]
        intensity_1 = intensity[index_1]
        to_drop = np.where(intensity_0 > intensity_1, index_1, index_0)
        edge_cases = intensity_0 == intensity_1
        if np.any(edge_cases):
            index_0 = index_0[edge_cases]
            index_1 = index_1[edge_cases]
            to_drop[edge_cases] = np.where(np.sum(pos_rescaled[index_0], 1) >
                                           np.sum(pos_rescaled[index_1], 1),
                                           index_1, index_0)
    return np.unique(to_drop)