def setup(self, config):
"""
Load name model (word list) and compile regexes for stop characters.
:param config: Configuration object.
:type config: ``dict``
"""
reference_model = os.path.join(
config[helper.CODE_ROOT], config[helper.NAME_MODEL])
self.stopper = regex.compile(('(%s)' % '|'.join([
'and', 'or', 'og', 'eller', r'\?', '&', '<', '>', '@', ':', ';', '/',
r'\(', r'\)', 'i', 'of', 'from', 'to', r'\n', '!'])),
regex.I | regex.MULTILINE)
self.semistop = regex.compile(
('(%s)' % '|'.join([','])), regex.I | regex.MULTILINE)
self.size_probability = [0.000, 0.000, 0.435, 0.489, 0.472, 0.004, 0.000]
self.threshold = 0.25
self.candidates = defaultdict(int)
with gzip.open(reference_model, 'rb') as inp:
self.model = json.loads(inp.read().decode('utf-8'))
self.tokenizer = regex.compile(r'\w{2,20}')
评论列表
文章目录