def guess_by_frequency(self):
input_data = None
words = None
to_replace = {}
try:
with open(os.path.join(os.path.dirname(__file__), "Lingvo/wordlist.txt"), 'r') as words_file:
input_data = words_file.read().split()
words = self.text.split()
except FileNotFoundError:
logging.critical("Wordlist could not be found.")
return False
frequencies = nltk.FreqDist(words).most_common(len(words))
# Choosing to replace an element where needed.
for elem in frequencies:
word = elem[0]
if word in to_replace.keys() or '?' not in word:
continue
for sample_word in input_data:
if check_similarity(word, sample_word):
to_replace[word] = sample_word
break
# Replacing
for i in range(len(words)):
if words[i] in to_replace.keys():
words[i] = to_replace[words[i]]
text = nltk.Text(words)
self.text = nltk.Text(words).name[:-3]
return True
评论列表
文章目录