python类ifilter()的实例源码-面圈网

dstream.py 文件源码项目：MIT-Thesis 作者: alec-heif 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=None):
        """
        Return a new DStream in which each RDD contains the count of distinct elements in
        RDDs in a sliding window over this DStream.

        @param windowDuration: width of the window; must be a multiple of this DStream's
                              batching interval
        @param slideDuration:  sliding interval of the window (i.e., the interval after which
                              the new DStream will generate RDDs); must be a multiple of this
                              DStream's batching interval
        @param numPartitions:  number of partitions of each RDD in the new DStream.
        """
        keyed = self.map(lambda x: (x, 1))
        counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub,
                                             windowDuration, slideDuration, numPartitions)
        return counted.filter(lambda kv: kv[1] > 0)

rdd.py 文件源码项目：MIT-Thesis 作者: alec-heif 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def lookup(self, key):
        """
        Return the list of values in the RDD for key `key`. This operation
        is done efficiently if the RDD has a known partitioner by only
        searching the partition that the key maps to.

        >>> l = range(1000)
        >>> rdd = sc.parallelize(zip(l, l), 10)
        >>> rdd.lookup(42)  # slow
        [42]
        >>> sorted = rdd.sortByKey()
        >>> sorted.lookup(42)  # fast
        [42]
        >>> sorted.lookup(1024)
        []
        >>> rdd2 = sc.parallelize([(('a', 'b'), 'c')]).groupByKey()
        >>> list(rdd2.lookup(('a', 'b'))[0])
        ['c']
        """
        values = self.filter(lambda kv: kv[0] == key).values()

        if self.partitioner is not None:
            return self.ctx.runJob(values, lambda x: x, [self.partitioner(key)])

        return values.collect()

vocab_getty_edu.py 文件源码项目：StrepHit 作者: Wikidata 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def load_into_db(self, table):
        def callback(response):
            """ Loads the CSV data contained into the response body and puts it into the appropriate table
            """
            data = itertools.ifilter(lambda x: len(x) == 2,
                                     (row.split(',', 1) for row in response.body_as_unicode().split('\r\n')[1:]))

            cur = self.db_connection.cursor()
            try:
                cur.executemany('INSERT INTO %s(pk, data) VALUES (?, ?)' % table, data)
            except sqlite3.Error:
                self.db_connection.rollback()
                raise
            else:
                self.db_connection.commit()
            finally:
                cur.close()

            for each in self.finalize_data(table):
                yield each
        return callback

tags.py 文件源码项目：idascripts 作者: ctfhacker 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def everything(use_cache=False):
    '''Return all the tags within the database as (globals, contents, frames).'''
    if use_cache:
        g, f = cached()

    else:
        print >>output, '--> Grabbing globals...'
        g = {ea : d for ea, d in globals()}

        print >>output, '--> Grabbing contents from all functions...'
        res = (function(ea) for ea in db.functions())
        f = {}
        map(f.update, itertools.imap(dict, itertools.ifilter(None, res)))

    print >>output, '--> Grabbing frames from all functions...'
    h = {ea : d for ea, d in frames()}
    return (g, f, h)

compat.py 文件源码项目：Price-Comparator 作者: Thejas-1 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result

filecmp.py 文件源码项目：sslstrip-hsts-openwrt 作者: adde88 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def phase1(self): # Compute common names
        a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list))
        b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list))
        self.common = map(a.__getitem__, ifilter(b.__contains__, a))
        self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a))
        self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b))

connection.py 文件源码项目：baiji 作者: bodylabs 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def glob(self, prefix, pattern):
        '''
        Given a path prefix and a pattern, iterate over matching paths.

        e.g.

        paths = list(s3.glob(
            prefix='s3://bodylabs-ants-go-marching/output/feet_on_floor/eff2a0e/',
            pattern='*_alignment.ply'
        ))

        '''
        import fnmatch
        import functools
        import itertools
        predicate = functools.partial(fnmatch.fnmatch, pat=prefix + pattern)
        listing = self.ls(prefix, return_full_urls=True)
        return itertools.ifilter(predicate, listing)

compat.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result

compat.py 文件源码项目：neighborhood_mood_aws 作者: jarrellmark 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result

Counter.py 文件源码项目：chewBBACA_deprecated 作者: mickaelsilva 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result

Counter.py 文件源码项目：chewBBACA_deprecated 作者: mickaelsilva 项目源码文件源码阅读 61 收藏 0 点赞 0 评论 0

def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result

dstream.py 文件源码项目：pyspark 作者: v-v-vishnevskiy 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=None):
        """
        Return a new DStream in which each RDD contains the count of distinct elements in
        RDDs in a sliding window over this DStream.

        @param windowDuration: width of the window; must be a multiple of this DStream's
                              batching interval
        @param slideDuration:  sliding interval of the window (i.e., the interval after which
                              the new DStream will generate RDDs); must be a multiple of this
                              DStream's batching interval
        @param numPartitions:  number of partitions of each RDD in the new DStream.
        """
        keyed = self.map(lambda x: (x, 1))
        counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub,
                                             windowDuration, slideDuration, numPartitions)
        return counted.filter(lambda kv: kv[1] > 0).count()

dstream.py 文件源码项目：pyspark 作者: v-v-vishnevskiy 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def updateStateByKey(self, updateFunc, numPartitions=None):
        """
        Return a new "state" DStream where the state for each key is updated by applying
        the given function on the previous state of the key and the new values of the key.

        @param updateFunc: State update function. If this function returns None, then
                           corresponding state key-value pair will be eliminated.
        """
        if numPartitions is None:
            numPartitions = self._sc.defaultParallelism

        def reduceFunc(t, a, b):
            if a is None:
                g = b.groupByKey(numPartitions).mapValues(lambda vs: (list(vs), None))
            else:
                g = a.cogroup(b.partitionBy(numPartitions), numPartitions)
                g = g.mapValues(lambda ab: (list(ab[1]), list(ab[0])[0] if len(ab[0]) else None))
            state = g.mapValues(lambda vs_s: updateFunc(vs_s[0], vs_s[1]))
            return state.filter(lambda k_v: k_v[1] is not None)

        jreduceFunc = TransformFunction(self._sc, reduceFunc,
                                        self._sc.serializer, self._jrdd_deserializer)
        dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc)
        return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)

rdd.py 文件源码项目：pyspark 作者: v-v-vishnevskiy 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def lookup(self, key):
        """
        Return the list of values in the RDD for key `key`. This operation
        is done efficiently if the RDD has a known partitioner by only
        searching the partition that the key maps to.

        >>> l = range(1000)
        >>> rdd = sc.parallelize(zip(l, l), 10)
        >>> rdd.lookup(42)  # slow
        [42]
        >>> sorted = rdd.sortByKey()
        >>> sorted.lookup(42)  # fast
        [42]
        >>> sorted.lookup(1024)
        []
        >>> rdd2 = sc.parallelize([(('a', 'b'), 'c')]).groupByKey()
        >>> list(rdd2.lookup(('a', 'b'))[0])
        ['c']
        """
        values = self.filter(lambda kv: kv[0] == key).values()

        if self.partitioner is not None:
            return self.ctx.runJob(values, lambda x: x, [self.partitioner(key)])

        return values.collect()

compat.py 文件源码项目：FancyWord 作者: EastonLee 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result

ASLLearner.py 文件源码项目：SignGlove 作者: papachristoumarios 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def testSVM(clf, dataSet):
    labels, instances = dataSet.getLabelsAndInstances()
    predictionsLoL = map(clf.predict, instances)

    numWrong = 0
    numPred = 0
    for i in range(len(labels)):
        l = labels[i]
        predList = predictionsLoL[i]
        wrongList = it.ifilter(lambda x: x!=l, predList)

        numWrong += len(wrongList)
        numPred += len(predList)

    print "Wrong: ", numWrong
    print "Predicted: ", numPred
    print "Percent: ", float(numWrong)/numPred

compat.py 文件源码项目：beepboop 作者: nicolehe 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result

counter.py 文件源码项目：helios-server-mixnet 作者: RunasSudo 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result

compat.py 文件源码项目：kind2anki 作者: prz3m 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result

compat.py 文件源码项目：but_sentiment 作者: MixedEmotions 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result

backports.py 文件源码项目：TornadoWeb 作者: VxCoder 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result

walker.py 文件源码项目：biblio 作者: b1naryth1ef 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def visit_ClassDef(self, node):
        obj = {
            'type': 'class',
            'name': node.name,
            'docstring': self.get_docstring(node),
            'bases': list(ifilter(lambda k: k.get('name') != 'object', [
                {'name': i.id} if isinstance(i, ast.Name) else self.visit(i) for i in node.bases
            ])),
            'attributes': [],
            'functions': [],
        }

        for node in imap(self.visit, node.body):
            if node['type'] == 'function':
                obj['functions'].append(node)
            elif node['type'] == 'assign':
                obj['attributes'].append(node)

        return obj

Counter.py 文件源码项目：chewBBACA 作者: B-UMMI 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result

loader.py 文件源码项目：processtap 作者: firodj 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def __load_classes( self ):
        classes = {}#unique symbol id : class decl
        is_udt = lambda smbl: smbl.symTag == msdia.SymTagUDT
        self.logger.info( 'building udt objects' )
        for udt_smbl in itertools.ifilter( is_udt, self.symbols.itervalues() ):
            classes[udt_smbl.symIndexId] = self.__create_class(udt_smbl)
        self.logger.info( 'building udt objects(%d) - done', len(classes) )

        self.logger.info( 'integrating udt objects with namespaces' )
        does_parent_exists = self.parent_exists_t( self.global_ns, classes, self.__id2decl )
        while classes:
            to_be_integrated = len( classes )
            self.logger.info( 'there are %d classes to go', len( classes ) )
            to_be_deleted = filter( does_parent_exists, classes.itervalues() )
            map( self.__update_decls_tree, to_be_deleted )
            map( lambda decl: classes.pop( decl.dia_symbols[0].symIndexId )
                 , to_be_deleted )
            if not ( to_be_integrated - len( classes ) ):
                for cls in classes.itervalues():
                    self.logger.warning( 'unable to integrate class "%s" into declarations tree', cls.dia_symbols[0].uname )
                break
        self.logger.info( 'integrating udt objects with namespaces - done' )

loader.py 文件源码项目：processtap 作者: firodj 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def __load_base_classes( self ):
        make_hi = declarations.hierarchy_info_t
        is_base_class = lambda smbl: smbl.symTag == msdia.SymTagBaseClass \
                                     and False == smbl.indirectVirtualBaseClass
        self.logger.info( 'building class hierarchies' )
        for count, smbl in enumerate( itertools.ifilter( is_base_class, self.symbols.itervalues() ) ):
            base_id = smbl.type.symIndexId
            derived_id = smbl.classParentId

            hi_base = make_hi( self.__id2decl[base_id]
                               , self.__guess_access_type( smbl )
                               , bool( smbl.virtualBaseClass ) )
            self.__id2decl[ derived_id ].bases.append( hi_base )

            hi_derived = make_hi( self.__id2decl[derived_id]
                                  , self.__guess_access_type( smbl )
                                  , bool( smbl.virtualBaseClass ) )
            self.__id2decl[ base_id ].derived.append( hi_derived )

        self.logger.info( 'building class hierarchies(%d) - done', count )

sets.py 文件源码项目：kinect-2-libras 作者: inessadl 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def intersection(self, other):
        """Return the intersection of two sets as a new set.

        (I.e. all elements that are in both sets.)
        """
        if not isinstance(other, BaseSet):
            other = Set(other)
        if len(self) <= len(other):
            little, big = self, other
        else:
            little, big = other, self
        common = ifilter(big._data.__contains__, little)
        return self.__class__(common)

sets.py 文件源码项目：kinect-2-libras 作者: inessadl 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def difference_update(self, other):
        """Remove all elements of another set from this set."""
        data = self._data
        if not isinstance(other, BaseSet):
            other = Set(other)
        if self is other:
            self.clear()
        for elt in ifilter(data.__contains__, other):
            del data[elt]

    # Python dict-like mass mutations: update, clear

filecmp.py 文件源码项目：kinect-2-libras 作者: inessadl 项目源码文件源码阅读 54 收藏 0 点赞 0 评论 0

def phase1(self): # Compute common names
        a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list))
        b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list))
        self.common = map(a.__getitem__, ifilter(b.__contains__, a))
        self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a))
        self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b))

search_command.py 文件源码项目：mongodb-monitoring 作者: jruaux 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def iteritems(self):
            definitions = type(self).configuration_setting_definitions
            version = self.command.protocol_version
            return ifilter(
                lambda (name, value): value is not None, imap(
                    lambda setting: (setting.name, setting.__get__(self)), ifilter(
                        lambda setting: setting.is_supported_by_protocol(version), definitions)))

streaming_command.py 文件源码项目：mongodb-monitoring 作者: jruaux 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def iteritems(self):
            iteritems = SearchCommand.ConfigurationSettings.iteritems(self)
            version = self.command.protocol_version
            if version == 1:
                if self.required_fields is None:
                    iteritems = ifilter(lambda (name, value): name != 'clear_required_fields', iteritems)
            else:
                iteritems = ifilter(lambda (name, value): name != 'distributed', iteritems)
                if self.distributed:
                    iteritems = imap(
                        lambda (name, value): (name, 'stateful') if name == 'type' else (name, value), iteritems)
            return iteritems

        # endregion