def get_iterator(self):
tweet_parser = TweetParser()
if self.compression == 'bz2':
self.mode = binary_mode(self.mode)
csv_handle = bz2.open(self.filepath, self.mode, encoding=self.encoding)
elif self.compression == 'gzip':
self.mode = binary_mode(self.mode)
csv_handle = gzip.open(self.filepath, self.mode, encoding=self.encoding)
else:
csv_handle = open(self.filepath, self.mode, encoding=self.encoding)
for count, tweet in enumerate(csv.DictReader(csv_handle)):
if self.limit < count+1 and self.limit != 0:
csv_handle.close()
return
elif tweet_parser.tweet_passes_filter(self.filter, tweet) \
and tweet_parser.tweet_passes_custom_filter_list(self.custom_filters, tweet):
if self.should_strip:
yield tweet_parser.strip_tweet(self.keep_fields, tweet)
else:
yield dict(tweet)
csv_handle.close()
评论列表
文章目录