def __iter__(self):
"""
Read a file where each line is of the form "word1 word2 ..."
Yields lists of the form [word1, word2, ...]
"""
#jfbbb
if os.path.isdir(self.fname):
filenames = [os.path.join(self.fname,f) for f in os.listdir(self.fname)]
#else:
# filenames = [self.fname]
for langpath in filenames:
with open(filename) as f:
doc = f.read()
for line in doc.split("\n"):
#if not line: continue
sent = "".join([ch for ch in line.lower() if ch not in string.punctuation]).strip().split()
# sent = [word for word in line.strip().split()]
sent = [self.begin] + sent + [self.end]
yield sent
评论列表
文章目录