def __init__(self, text: str, delimiter: str, rnnlm_model_path: str):
self.text = text
self.sentences = split_text(text, delimiter) # type: List[str]
lengths, self.tss = tokenize(self.sentences)
if not os.path.isfile(rnnlm_model_path):
raise FileNotFoundError(errno.ENOENT,
os.strerror(errno.ENOENT),
rnnlm_model_path)
self.rnnlm_model_path = rnnlm_model_path
self.word_freq, self.n_total_words = self._load_word_freq(threshold=1)
log_prob_scores = \
self._calc_log_prob_scores()
unigram_scores = \
self._calc_unigram_scores()
mean_lp_scores = \
calc_mean_lp_scores(log_prob_scores, lengths)
norm_lp_div_scores = \
calc_norm_lp_div_scores(log_prob_scores, unigram_scores)
norm_lp_sub_scores = \
calc_norm_lp_sub_scores(log_prob_scores, unigram_scores)
slor_scores = \
calc_slor_scores(norm_lp_sub_scores, lengths)
self.log_prob = average(log_prob_scores)
self.mean_lp = average(mean_lp_scores)
self.norm_lp_div = average(norm_lp_div_scores)
self.norm_lp_sub = average(norm_lp_sub_scores)
self.slor = average(slor_scores)
评论列表
文章目录