queries.py 文件源码

python
阅读 38 收藏 0 点赞 0 评论 0

项目:rankpy 作者: dmitru 项目源码 文件源码
def save_as_text(self, filepath, shuffle=False):
        '''
        Save queries into the specified file in svmlight format.

        Parameters:
        -----------
        filepath: string
            The filepath where this object will be saved.

        shuffle: bool
            Specify to shuffle the query document lists prior
            to writing into the file.
        '''
        # Inflate the query_ids array such that each id covers
        # the corresponding feature vectors.
        query_ids = np.fromiter(
            chain(*[[qid] * cnt for qid, cnt in zip(self.query_ids, np.diff(self.query_indptr))]),
            dtype=int)

        relevance_scores = self.relevance_scores
        feature_vectors = self.feature_vectors

        if shuffle:
            shuffle_indices = np.random.permutation(self.document_count())
            reshuffle_indices = np.argsort(query_ids[shuffle_indices])
            document_shuffle_indices = np.arange(self.document_count(),
                                                 dtype=np.intc)[shuffle_indices[reshuffle_indices]]
            query_ids = query_ids[document_shuffle_indices]
            relevance_scores = relevance_scores[document_shuffle_indices]
            feature_vectors = feature_vectors[document_shuffle_indices]

        with open(filepath, 'w') as ofile:
            for score, qid, feature_vector in zip(relevance_scores,
                                                   query_ids,
                                                   feature_vectors):
                ofile.write('%d' % score)
                ofile.write(' qid:%d' % qid)
                for feature in zip(self.feature_indices, feature_vector):
                    output = ' %d:%.12f' % feature
                    ofile.write(output.rstrip('0').rstrip('.'))
                ofile.write('\n')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号