def save_as_text(self, filepath, shuffle=False):
'''
Save queries into the specified file in svmlight format.
Parameters:
-----------
filepath: string
The filepath where this object will be saved.
shuffle: bool
Specify to shuffle the query document lists prior
to writing into the file.
'''
# Inflate the query_ids array such that each id covers
# the corresponding feature vectors.
query_ids = np.fromiter(
chain(*[[qid] * cnt for qid, cnt in zip(self.query_ids, np.diff(self.query_indptr))]),
dtype=int)
relevance_scores = self.relevance_scores
feature_vectors = self.feature_vectors
if shuffle:
shuffle_indices = np.random.permutation(self.document_count())
reshuffle_indices = np.argsort(query_ids[shuffle_indices])
document_shuffle_indices = np.arange(self.document_count(),
dtype=np.intc)[shuffle_indices[reshuffle_indices]]
query_ids = query_ids[document_shuffle_indices]
relevance_scores = relevance_scores[document_shuffle_indices]
feature_vectors = feature_vectors[document_shuffle_indices]
with open(filepath, 'w') as ofile:
for score, qid, feature_vector in zip(relevance_scores,
query_ids,
feature_vectors):
ofile.write('%d' % score)
ofile.write(' qid:%d' % qid)
for feature in zip(self.feature_indices, feature_vector):
output = ' %d:%.12f' % feature
ofile.write(output.rstrip('0').rstrip('.'))
ofile.write('\n')
评论列表
文章目录