def read_folder(self, folder_name, number_of_files_to_read=10000):
"""
Reads all files in a directory, splits them into sentences and puts these sentences in a list to return.
Args:
folder_name = the name of the folder to read files from
number_of_files_to_read = optional parameter for how many files in a directory to read
Returns:
A list of all sentences from all text files in the folder
"""
count = 0
all_sentences = []
for filename in os.listdir(folder_name):
if filename.endswith(".txt") and count < number_of_files_to_read:
main_text_to_open = folder_name + "/" + filename
main_text = self.open_file_single_string(main_text_to_open)
udata = main_text.decode("utf-8")
main_text = udata.encode("ascii", "ignore")
sentences = sent_tokenize(main_text)
for sentence in sentences:
all_sentences.append(sentence)
count += 1
return all_sentences
评论列表
文章目录