python类compress()的实例源码

series.py 文件源码 项目:raccoon 作者: rsheftel 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def select_index(self, compare, result='boolean'):
        """
        Finds the elements in the index that match the compare parameter and returns either a list of the values that
        match, of a boolean list the length of the index with True to each index that matches. If the indexes are
        tuples then the compare is a tuple where None in any field of the tuple will be treated as "*" and match all
        values.

        :param compare: value to compare as a singleton or tuple
        :param result: 'boolean' = returns a list of booleans, 'value' = returns a list of index values that match
        :return: list of booleans or values
        """
        if isinstance(compare, tuple):
            # this crazy list comprehension will match all the tuples in the list with None being an * wildcard
            booleans = [all([(compare[i] == w if compare[i] is not None else True) for i, w in enumerate(v)])
                        for x, v in enumerate(self._index)]
        else:
            booleans = [False] * len(self._index)
            if self._sort:
                booleans[sorted_index(self._index, compare)] = True
            else:
                booleans[self._index.index(compare)] = True
        if result == 'boolean':
            return booleans
        elif result == 'value':
            return list(compress(self._index, booleans))
        else:
            raise ValueError('only valid values for result parameter are: boolean or value.')
ch10_r07.py 文件源码 项目:Modern-Python-Cookbook 作者: PacktPublishing 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def pass_outliers(data):
    return itertools.compress(data, (z >= 3.5 for z in z_mod(data)))
ch10_r07.py 文件源码 项目:Modern-Python-Cookbook 作者: PacktPublishing 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def reject_outliers(data):
    return itertools.compress(data, (z < 3.5 for z in z_mod(data)))
nvdm_nobatch.py 文件源码 项目:NVDM-For-Document-Classification 作者: cryanzpj 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def prediction(x_sample, y_sample): # sample has size 20
            '''
            Get the perplexity of the test set
            '''

            perplist = []
            for i in range(20):
                x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_sample[0]))]
                feed_dict = {nvdm.input_x: x_sample[i].reshape(1,10000)}
                step, p_xi_h = sess.run([nvdm.global_step, nvdm.p_xi_h], feed_dict)

                valid_p = np.mean(np.log(p_xi_h[x_batch_id]))
                perplist.append(valid_p)
            print("perplexity: {}".format(np.exp(-np.mean(perplist))))
nvdm_nobatch_new.py 文件源码 项目:NVDM-For-Document-Classification 作者: cryanzpj 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def train_step(x_batch, y_batch, epoch,predicts,labels):
            """
            A single training step
            """
            y_batch = y_batch.reshape(1,-1)
            x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_batch[0]))]
            feed_dict = {nvdm.input_x: x_batch,
                         nvdm.input_y:y_batch,
                         nvdm.x_id: x_batch_id}
            '''
            h1b = [v for v in tf.all_variables() if v.name == "h1/b:0"][0]
            h1w = [v for v in tf.all_variables() if v.name == "h1/w:0"][0]
            _, step, summaries, loss, kl, rc, p_xi_h, R, hb, hw, e  = sess.run(
                [nvdm.train_op, global_step, loss_summary, nvdm.loss, nvdm.KL, nvdm.recon_loss, nvdm.p_xi_h, nvdm.R, h1b, h1w, nvdm.e], feed_dict)
            '''

            _, step,  loss,predict = sess.run([nvdm.train_op, nvdm.global_step, nvdm.loss,nvdm.predicts], feed_dict)


            time_str = datetime.datetime.now().isoformat()
            if step % FLAGS.train_every == 0:
                import pdb
                pdb.set_trace()


                score = f1_score_multiclass(np.array(predicts),np.array(labels))
                print("time: {},  epoch: {}, step: {}, loss: {:g}, score: {:g}".format(time_str,epoch, step, loss,score))

                return [],[]


            predicts.append(predict)
            labels.append(y_batch[0].astype(int))

            return predicts,labels

            if np.isnan(loss):
                import pdb
                pdb.set_trace()

            #train_summary_writer.add_summary(summaries, step)
nvdm_nobatch_new.py 文件源码 项目:NVDM-For-Document-Classification 作者: cryanzpj 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def prediction(x_sample, y_sample): # sample has size 20
            '''
            Get the perplexity of the test set
            '''
            perplist = []
            for i in range(20):
                x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_sample[0]))]
                feed_dict = {nvdm.input_x: x_sample[i].reshape(1,10000),
                             nvdm.input_y: y_sample[i].reshape(1,103)}
                step, p_xi_h = sess.run([nvdm.global_step, nvdm.p_xi_h], feed_dict)

                valid_p = np.mean(np.log(p_xi_h[x_batch_id]))
                perplist.append(valid_p)
            print("perplexity: {}".format(np.exp(-np.mean(perplist))))
vae_imdb.py 文件源码 项目:NVDM-For-Document-Classification 作者: cryanzpj 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def train(self, X_train, y_train):
        #self.saver.restore(self.sess, "./imdbmodel/model.ckpt")
        total_batch = X_train.shape[0] // self.batch_size 
        for e in range(self.epoch):
            perplist = []
            for i in range(total_batch):
                X_batch = X_train[i*self.batch_size:(i+1)*self.batch_size]
                y_batch = y_train[i*self.batch_size:(i+1)*self.batch_size]
                x_batch_id = [_ for _ in itertools.compress(range(self.feature_size), map(lambda x : x>0, X_batch[0].toarray()[0]))]
                feed_dict = {
                        self.input_x : X_batch.toarray(),
                        self.input_y : np.reshape(y_batch, [-1,1]),
                        self.x_id : x_batch_id
                        }
                _, loss =  self.sess.run([
                            self.train_op, 
                            self.loss], feed_dict)
                if np.isnan(loss):
                    import pdb
                    pdb.set_trace()
                if i % self.display_score == 0:
                    p_xi_h = self.sess.run([self.p_xi_h], feed_dict)
                    valid_p = np.mean(np.log(p_xi_h[0][x_batch_id]))
                    perplist.append(valid_p)
                    print("step: {}, perp: {:f}".format(i, np.exp(-np.mean(perplist))))
            # save model every epoch
                if i > 0 and i % 2000 == 0:
                    self.savemodel()
base.py 文件源码 项目:MetaHeuristic 作者: gonzalesMK 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _evaluate(self, individual, X, y, cv=3):
        """ Evaluate method

        Parameters
        ----------
        individual: list [n_features]
                The input individual to be evaluated

        Return
        ----------
        Score of the individual : turple( cross_val_score, feature score)
        """
        # Select Features
        features = list(compress(range(len(individual)), individual))
        train = np.reshape([X[:, i] for i in features],
                           [len(features), len(X)]).T

        if train.shape[1] == 0:
            return 0,1,

        # Applying K-Fold Cross Validation
        accuracies = cross_val_score(estimator=clone(self.estimator), X=train, 
                                     y=y, cv=cv, 
                                     scoring=self.cv_metric_function)

        if self.features_metric_function == None :
            feature_score = pow(sum(individual)/(len(individual)*5), 2)
        else:
            feature_score = self.features_metric_function(individual)

        return accuracies.mean() - accuracies.std(), feature_score
base_pareto.py 文件源码 项目:MetaHeuristic 作者: gonzalesMK 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _evaluate(self, individual, X, y, cv=3):
        """ Evaluate method

        Parameters
        ----------
        individual: list [n_features]
                The input individual to be evaluated

        Return
        ----------
        Score of the individual : turple( cross_val_score, feature score)
        """
        # Select Features
        features = list(compress(range(len(individual)), individual))
        train = np.reshape([X[:, i] for i in features],
                           [len(features), len(X)]).T

        if train.shape[1] == 0:
            return 0,1,

        # Applying K-Fold Cross Validation
        accuracies = cross_val_score(estimator=clone(self.estimator), X=train, 
                                     y=y, cv=cv, 
                                     scoring=self.cv_metric_function)

        if self.features_metric_function == "log" :
            feature_score = np.log10(9*(sum(individual)/len(individual))+1) 
        elif self.features_metric_function == "poly" :
            feature_score = sum(individual)/len(individual)
        else:
            raise ValueError('Unknow evaluation')

        return accuracies.mean() - accuracies.std(), feature_score
doc2vec.py 文件源码 项目:tensorflow-playground 作者: wangz10 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def generate_batch_pvdm(doc_ids, word_ids, batch_size, window_size):
    '''
    Batch generator for PV-DM (Distributed Memory Model of Paragraph Vectors).
    batch should be a shape of (batch_size, window_size+1)

    Parameters
    ----------
    doc_ids: list of document indices 
    word_ids: list of word indices
    batch_size: number of words in each mini-batch
    window_size: number of leading words before the target word 
    '''
    global data_index
    assert batch_size % window_size == 0
    batch = np.ndarray(shape=(batch_size, window_size + 1), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    span = window_size + 1
    buffer = collections.deque(maxlen=span) # used for collecting word_ids[data_index] in the sliding window
    buffer_doc = collections.deque(maxlen=span) # collecting id of documents in the sliding window
    # collect the first window of words
    for _ in range(span):
        buffer.append(word_ids[data_index])
        buffer_doc.append(doc_ids[data_index])
        data_index = (data_index + 1) % len(word_ids)

    mask = [1] * span
    mask[-1] = 0 
    i = 0
    while i < batch_size:
        if len(set(buffer_doc)) == 1:
            doc_id = buffer_doc[-1]
            # all leading words and the doc_id
            batch[i, :] = list(compress(buffer, mask)) + [doc_id]
            labels[i, 0] = buffer[-1] # the last word at end of the sliding window
            i += 1
        # move the sliding window  
        buffer.append(word_ids[data_index])
        buffer_doc.append(doc_ids[data_index])
        data_index = (data_index + 1) % len(word_ids)

    return batch, labels
word2vec.py 文件源码 项目:tensorflow-playground 作者: wangz10 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def generate_batch_cbow(data, batch_size, num_skips, skip_window):
    '''
    Batch generator for CBOW (Continuous Bag of Words).
    batch should be a shape of (batch_size, num_skips)

    Parameters
    ----------
    data: list of index of words
    batch_size: number of words in each mini-batch
    num_skips: number of surrounding words on both direction (2: one word ahead and one word following)
    skip_window: number of words at both ends of a sentence to skip (1: skip the first and last word of a sentence)
    '''
    global data_index
    assert batch_size % num_skips == 0
    assert num_skips <= 2 * skip_window
    batch = np.ndarray(shape=(batch_size, num_skips), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    span = 2 * skip_window + 1 # [ skip_window target skip_window ]
    buffer = collections.deque(maxlen=span) # used for collecting data[data_index] in the sliding window
    # collect the first window of words
    for _ in range(span):
        buffer.append(data[data_index])
        data_index = (data_index + 1) % len(data)
    # move the sliding window  
    for i in range(batch_size):
        mask = [1] * span
        mask[skip_window] = 0 
        batch[i, :] = list(compress(buffer, mask)) # all surrounding words
        labels[i, 0] = buffer[skip_window] # the word at the center 
        buffer.append(data[data_index])
        data_index = (data_index + 1) % len(data)
    return batch, labels
faam_data.py 文件源码 项目:faampy 作者: ncasuk 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def simplified(self):
        """
        Returns the reduced number of coordinates
        """
        if not self.Simple_mask:
            self._simplify_()
        return list(itertools.compress(self, self.Simple_mask))
tools.py 文件源码 项目:asynq 作者: quora 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def afilter(function, sequence):
    """Equivalent of filter() that takes an async filter function.

    Returns a list.

    """
    if function is None:
        result(filter(None, sequence)); return
    should_include = yield [function.asynq(elt) for elt in sequence]
    result(list(itertools.compress(sequence, should_include))); return
tools.py 文件源码 项目:asynq 作者: quora 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def afilterfalse(function, sequence):
    """Equivalent of itertools.ifilterfalse() that takes an async filter function.

    Returns a list.

    """
    should_exclude = yield [function.asynq(elt) for elt in sequence]
    should_include = [not res for res in should_exclude]
    result(list(itertools.compress(sequence, should_include))); return
master_ops.py 文件源码 项目:open-database 作者: mitaffinity 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def retrieve(self, table, cols, col_rules):
        """ Retrieves column values from a single table based on a given filtering rule.

        Example:
        <pre lang="python">
        my_db.retrieve(some_table_table,["num1","num2"],{"remainder_div_3":"{}==1 or {}==2", "sum":"{}<200"})
        </pre>
        will retrieve:
        <pre lang="python">
        columns called "num1" and "num2" from some table. That have value 1 or 2 in the ramainder_div_3 column. Column
        named "sum" of which would be less than 200. All columns are combined with an "AND" statement.
        </pre>

        :param table: string (name of the table to retrieve from)
        :param columns: list of strings (names of the columns to retrieve)
        :param column_rules: dictionary of rules that will be evaluated
        :return: 
        Nested list in which is entry in a list a a column with filtered requested values
        """
        # todo: add string comp support
        cursor = self.conn.cursor()

        # from the table get all the columns to retrieve
        sql_cmd = "select " + " ,".join(cols) + " from \"" + table + "\""
        cursor.execute(sql_cmd)
        sel_sets = cursor.fetchall()

        if len(col_rules)==0:
            sel_vals = sel_sets
        else:
            # from the table select all the columns to filter for
            sql_cmd = "select " + ", ".join([key for key in col_rules]) + " from \"" + table + "\""
            cursor.execute(sql_cmd)
            filter_sets = cursor.fetchall()

            # repeat every argument number of times it appears in the selection
            mult = [len(re.findall("{}", col_rules[key])) for key in col_rules]

            def _repeat_vals(vals, repeats):
                rep_vals = []
                [[rep_vals.append(vals[i]) for _ in range(repeats[i])] for i in range(len(col_rules))]
                return rep_vals
            filter_sets = [_repeat_vals(set, mult) for set in filter_sets]

            # evaluate every row to get a boolean mask of examples
            rule_tmp = "(" + ") and (".join([col_rules[key] for key in col_rules]) + ")"
            sel_mask = [eval(rule_tmp.format(*val_set)) for val_set in filter_sets]

            # apply a boolean mask to take only entries that fit the selection rule
            sel_sets = list(compress(sel_sets, sel_mask))
            sel_vals = sel_sets
            #sel_vals = [list(x) for x in zip(*sel_sets)]
        return sel_vals
functions.py 文件源码 项目:pyshtrih 作者: oleg-golovanov 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def handle_fr_flags(arg):
    def get_keys(revision):
        return (
            (u'??????????? ???????? ??????????', u'????? ???????? ??????')[revision],
            u'???? ????? ?????????',
            (u'????? ?????? ??????? ????????', u'?????? ?? ?????? ?? ??????????')[revision],
            (u'????? ??????? ??????? ????????', u'?????? ?? ????? ? ?????????', u'?????? ????????')[revision],
            u'???????? ????',
            u'?????? ??????? ??',
            u'????? ???????????? ??????? ?????',
            u'????? ???????????? ??????????? ?????',
            u'?????????? ?????? ??????? ?????',
            u'?????????? ?????? ????????????? ???????',
            u'????',
            u'????????? ?????????? ?????',
            u'?????? ?????? ??????????? ?????????',
            u'??????? ?????? ??????????? ?????????',
            u'????? ??????? ?????',
            u'????? ????????????? ???????'
        )

    bits = misc.int_to_bits(arg, 16)

    a, b, c = 0, 1, 2
    flags_actual = {
        # ?????-??-?
        4: ((0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1), a),
        # ?????-?????-??-?
        9: ((0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0), a),
        # ?????-?????-??-? (?????? 02)
        12: ((0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0), a)
    }

    flags, rev = flags_actual.get(
        handle_fr_flags.model,
        ((1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), a)
    )

    return dict(
        zip(
            itertools.compress(get_keys(rev), flags),
            itertools.compress(bits, flags)
        )
    )
paragraph_vector.py 文件源码 项目:tensorflow-playground 作者: wangz10 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def generate_batch_pvdm(batch_size, window_size):
    '''
    Batch generator for PV-DM (Distributed Memory Model of Paragraph Vectors).
    batch should be a shape of (batch_size, window_size+1)

    Parameters
    ----------
    batch_size: number of words in each mini-batch
    window_size: number of leading words on before the target word direction 
    '''
    global data_index
    assert batch_size % window_size == 0
    batch = np.ndarray(shape=(batch_size, window_size + 1), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    span = window_size + 1
    buffer = collections.deque(maxlen=span) # used for collecting word_ids[data_index] in the sliding window
    buffer_doc = collections.deque(maxlen=span) # collecting id of documents in the sliding window
    # collect the first window of words
    for _ in range(span):
        buffer.append(word_ids[data_index])
        buffer_doc.append(doc_ids[data_index])
        data_index = (data_index + 1) % len(word_ids)

    mask = [1] * span
    mask[-1] = 0 
    i = 0
    while i < batch_size:
        if len(set(buffer_doc)) == 1:
            doc_id = buffer_doc[-1]
            # all leading words and the doc_id
            batch[i, :] = list(compress(buffer, mask)) + [doc_id]
            labels[i, 0] = buffer[-1] # the last word at end of the sliding window
            i += 1
            # print buffer
            # print list(compress(buffer, mask))
        # move the sliding window  
        buffer.append(word_ids[data_index])
        buffer_doc.append(doc_ids[data_index])
        data_index = (data_index + 1) % len(word_ids)

    return batch, labels

## examinng the batch generator function


问题


面经


文章

微信
公众号

扫码关注公众号