def __iter__(self):
"""
Read a file where each line is of the form "word1 word2 ..."
Yields lists of the form [word1, word2, ...]
"""
if os.path.isdir(self.fname):
filenames = [os.path.join(self.fname,f) for f in os.listdir(self.fname)]
else:
filenames = [self.fname]
for filename in filenames:
# with io.open(filename, encoding='utf-8') as f:
with open(filename) as f:
doc = f.read()
for line in doc.split("\n"):
#if not line: continue
sent = "".join([ch for ch in line.lower() if ch not in string.punctuation]).strip().split()
# sent = [word for word in line.strip().split()]
sent = [self.begin] + sent + [self.end]
yield sent
评论列表
文章目录