def _load(self):
"""Load text to memory"""
corpus_directory = glob.escape(self.corpus_directory)
file_list = sorted(glob.glob(os.path.join(corpus_directory, "*.txt")))
for path in file_list:
with open(path, "r", encoding="utf8") as text:
# Read content from text file
content = text.read()
# Preprocessing
content = self._preprocessing(content)
# Create text instance
text = Text(path, os.path.basename(path), content)
# Add text to corpus
self.__corpus.append(text)
评论列表
文章目录