def score(self, x_or_y):
if len(x_or_y.shape) > 2: # x shape: (1, N, M). y shape: (N, M) todo: work with factors
x_or_y = numpy.squeeze(x_or_y, axis=0)
"""
Nematus is generally called on 1)Tokenized, 2)Truecased, 3)BPE data.
So we will train KenLM on Tokenized, Truecase data.
Therefore all we need to do is convert to a string and deBPE.
"""
sentences = [deBPE(seqs2words(seq, self.id_to_word)) for seq in x_or_y.T]
scores = self.model.score(sentences)
#try:
# print 'remote LM sentences/scores:'
# for sent, score in zip(sentences, scores):
# print '"'+sent+'":', score
#except Exception, e:
# print 'failed to print LM sentences/scores', e
return scores
评论列表
文章目录