def fetch_all_transitions(self, language, ngram_length):
""" Generate a dict of counts for transitions for all n-grams in the language word list """
wordlist = os.path.join(os.path.dirname(__file__), "wordlists/{0}.txt".format(language))
if not os.path.exists(wordlist):
raise SystemError("Language '{0}' does not exist".format(language))
all_grams = []
with codecs.open(wordlist, 'r', encoding='utf-8') as f:
for line in f:
words = line.strip('\n').lower().split()
ngrams = reduce(lambda x, y: x + y, map(lambda word: self.find_ngrams(word, ngram_length), words))
all_grams += ngrams
return dict(Counter(all_grams))
评论列表
文章目录