input.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:ome-model 作者: ome 项目源码 文件源码
def parse(self):
        """Generator that parses the HTML source, yielding markup events.

        :return: a markup event stream
        :raises ParseError: if the HTML text is not well formed
        """
        def _generate():
            if self.encoding:
                reader = codecs.getreader(self.encoding)
                source = reader(self.source)
            else:
                source = self.source
            try:
                bufsize = 4 * 1024 # 4K
                done = False
                while 1:
                    while not done and len(self._queue) == 0:
                        data = source.read(bufsize)
                        if not data: # end of data
                            self.close()
                            done = True
                        else:
                            if not isinstance(data, unicode):
                                raise UnicodeError("source returned bytes, but no encoding specified")
                            self.feed(data)
                    for kind, data, pos in self._queue:
                        yield kind, data, pos
                    self._queue = []
                    if done:
                        open_tags = self._open_tags
                        open_tags.reverse()
                        for tag in open_tags:
                            yield END, QName(tag), pos
                        break
            except html.HTMLParseError, e:
                msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
                raise ParseError(msg, self.filename, e.lineno, e.offset)
        return Stream(_generate()).filter(_coalesce)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号