def __init__(self, parameters, language):
assert language in ["en", "nl"]
self.language = language
# load frequency list
pathtofrequencies = 'frequencies_' + language + '.json'
# load trained fasttext model
pathtomodel = 'embeddings_' + language + '.bin'
# give path to fasttext vectors
pathtovectors = 'embeddings_' + language + '.vec'
# PHASE 1
self.comp_function = parameters['comp_function'] # item from ["sum", "mult", "max"]
self.include_misspelling = parameters['include_misspelling'] # boolean
self.include_oov_candidates = parameters['include_oov_candidates'] # boolean
self.pathtovectors = pathtovectors # path to fasttext vectors
self.model = fasttext.load_model(pathtomodel) # path to fasttext model
# PHASE 2
self.window_size = parameters['window_size'] # number in range(0,11)
self.reciprocal = parameters['reciprocal'] # boolean
self.remove_stopwords = parameters['remove_stopwords'] # boolean
self.stopwords = frozenset(json.load(open('stopwords_' + str(self.language) + '.json', 'r')))
# PHASE 3
self.edit_distance = parameters['edit_distance'] # item from [1, 2, 3, 4]
# PHASE 4
self.oov_penalty = parameters['oov_penalty'] # oov penalty tuned with self.tune_oov()
# OUTPUT
self.ranking_method = parameters['ranking_method'] # item from ["context", "noisy_channel", "frequency",
# "ensemble"]
self.frequency_dict = json.load(open(pathtofrequencies, 'r')) # path to frequency list
self.k = parameters['k-best'] # positive natural number
评论列表
文章目录