lm_remote.py 文件源码-python代码片段

lm_remote.py 文件源码

python

阅读 28 收藏 0 点赞 0 评论 0

def score(self, x_or_y):
        if len(x_or_y.shape) > 2:  # x shape: (1, N, M). y shape: (N, M)  todo: work with factors
            x_or_y = numpy.squeeze(x_or_y, axis=0)
        """
        Nematus is generally called on 1)Tokenized, 2)Truecased, 3)BPE data.
        So we will train KenLM on Tokenized, Truecase data.
        Therefore all we need to do is convert to a string and deBPE.
        """
        sentences = [deBPE(seqs2words(seq, self.id_to_word)) for seq in x_or_y.T]
        scores = self.model.score(sentences)
        #try:
        #    print 'remote LM sentences/scores:'
        #    for sent, score in zip(sentences, scores):
        #        print '"'+sent+'":', score
        #except Exception, e:
        #    print 'failed to print LM sentences/scores', e
        return scores