python类html()的实例源码-面圈网

??????.py 文件源码项目：Python_Study 作者: thsheep 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def get_links_from_url(url):
    """Download the page at `url` and parse it for links.
    Returned links have had the fragment after `#` removed, and have been made
    absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
    'http://www.tornadoweb.org/en/stable/gen.html'.
    """
    try:
        response = yield httpclient.AsyncHTTPClient().fetch(url)
        print('fetched %s' % url)

        html = response.body if isinstance(response.body, str) \
            else response.body.decode()
        urls = [urljoin(url, remove_fragment(new_url))
                for new_url in get_links(html)]
    except Exception as e:
        print('Exception: %s %s' % (e, url))
        raise gen.Return([])
    raise gen.Return(urls)

webspider.py 文件源码项目：annotated-py-tornado 作者: hhstore 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def get_links_from_url(url):
    """Download the page at `url` and parse it for links.

    Returned links have had the fragment after `#` removed, and have been made
    absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
    'http://www.tornadoweb.org/en/stable/gen.html'.
    """
    try:
        response = yield httpclient.AsyncHTTPClient().fetch(url)
        print('fetched %s' % url)
        urls = [urlparse.urljoin(url, remove_fragment(new_url))
                for new_url in get_links(response.body)]
    except Exception as e:
        print('Exception: %s %s' % (e, url))
        raise gen.Return([])

    raise gen.Return(urls)

asycrawler.py 文件源码项目：asyncmultitasks 作者: willwinworld 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def get_links_from_url(url):  # ????url????
    """Download the page at `url` and parse it for links.

    Returned links have had the fragment after `#` removed, and have been made
    absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
    'http://www.tornadoweb.org/en/stable/gen.html'.
    """
    try:
        response = yield httpclient.AsyncHTTPClient().fetch(url)  # ????url,??url???
        print('fetched %s' % url)

        html = response.body if isinstance(response.body, str) \
            else response.body.decode()  # ?????????
        urls = [urljoin(url, remove_fragment(new_url))
                for new_url in get_links(html)]  # ???url?????url
    except Exception as e:
        print('Exception: %s %s' % (e, url))
        raise gen.Return([])  # Special exception to return a value from a coroutine.

    raise gen.Return(urls)  # If this exception is raised, its value argument is used as the result of the coroutine.

asycrawler.py 文件源码项目：asyncmultitasks 作者: willwinworld 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_links(html):  # ????????
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)  # ?? ???super.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    print('@@'*20)
    print(url_seeker.urls)
    print('@@'*20)
    return url_seeker.urls  # ?????????

webspider.py 文件源码项目：aweasome_learning 作者: Knight-ZXW 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def get_links_from_url(url):
    """Download the page at `url` and parse it for links.

    Returned links have had the fragment after `#` removed, and have been made
    absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
    'http://www.tornadoweb.org/en/stable/gen.html'.
    """
    try:
        response = yield httpclient.AsyncHTTPClient().fetch(url)
        print('fetched %s' % url)

        html = response.body if isinstance(response.body, str) \
            else response.body.decode()
        urls = [urljoin(url, remove_fragment(new_url))
                for new_url in get_links(html)]
    except Exception as e:
        print('Exception: %s %s' % (e, url))
        raise gen.Return([])

    raise gen.Return(urls)

webspider.py 文件源码项目：browser_vuln_check 作者: lcatro 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def get_links_from_url(url):
    """Download the page at `url` and parse it for links.

    Returned links have had the fragment after `#` removed, and have been made
    absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
    'http://www.tornadoweb.org/en/stable/gen.html'.
    """
    try:
        response = yield httpclient.AsyncHTTPClient().fetch(url)
        print('fetched %s' % url)

        html = response.body if isinstance(response.body, str) \
            else response.body.decode()
        urls = [urljoin(url, remove_fragment(new_url))
                for new_url in get_links(html)]
    except Exception as e:
        print('Exception: %s %s' % (e, url))
        raise gen.Return([])

    raise gen.Return(urls)

webspider.py 文件源码项目：LinuxBashShellScriptForOps 作者: DingGuodong 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def get_links_from_url(url):
    """Download the page at `url` and parse it for links.

    Returned links have had the fragment after `#` removed, and have been made
    absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
    'http://www.tornadoweb.org/en/stable/gen.html'.
    """
    try:
        response = yield httpclient.AsyncHTTPClient().fetch(url)
        print('fetched %s' % url)

        html = response.body if isinstance(response.body, str) \
            else response.body.decode()
        urls = [urljoin(url, remove_fragment(new_url))
                for new_url in get_links(html)]
    except Exception as e:
        print('Exception: %s %s' % (e, url))
        raise gen.Return([])

    raise gen.Return(urls)

webspider.py 文件源码项目：ProgrameFacil 作者: Gpzim98 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def get_links_from_url(url):
    """Download the page at `url` and parse it for links.

    Returned links have had the fragment after `#` removed, and have been made
    absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes
    'http://www.tornadoweb.org/en/stable/gen.html'.
    """
    try:
        response = yield httpclient.AsyncHTTPClient().fetch(url)
        print('fetched %s' % url)

        html = response.body if isinstance(response.body, str) \
            else response.body.decode()
        urls = [urljoin(url, remove_fragment(new_url))
                for new_url in get_links(html)]
    except Exception as e:
        print('Exception: %s %s' % (e, url))
        raise gen.Return([])

    raise gen.Return(urls)

telnet.py 文件源码项目：odr-stream-router 作者: digris 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def handle_stream(self, stream, address):
        """
        handle telnet connection
        http://www.tornadoweb.org/en/stable/gen.html#tornado-gen-simplify-asynchronous-code
        """
        stream.write(TELNET_PROMPT_PREFIX)
        while True:
            try:
                command = yield stream.read_until(b'\n')
                result = self.handle_command(command.decode().strip())
                yield stream.write(result.encode() + TELNET_PROMPT_PREFIX)
            except StreamClosedError:
                break

??????.py 文件源码项目：Python_Study 作者: thsheep 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def get_links(html):
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    return url_seeker.urls

webspider.py 文件源码项目：annotated-py-tornado 作者: hhstore 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def get_links(html):
    class URLSeeker(HTMLParser.HTMLParser):
        def __init__(self):
            HTMLParser.HTMLParser.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    return url_seeker.urls

webspider.py 文件源码项目：aweasome_learning 作者: Knight-ZXW 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def get_links(html):
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    return url_seeker.urls

webspider.py 文件源码项目：browser_vuln_check 作者: lcatro 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def get_links(html):
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    return url_seeker.urls

webspider.py 文件源码项目：LinuxBashShellScriptForOps 作者: DingGuodong 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def get_links(html):
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    return url_seeker.urls

webspider.py 文件源码项目：ProgrameFacil 作者: Gpzim98 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def get_links(html):
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    return url_seeker.urls

__init__.py 文件源码项目：noc-orchestrator 作者: DirceuSilvaLabs 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def fetch_next(self):
        """A Future used with `gen.coroutine`_ to asynchronously retrieve the
        next document in the result set, fetching a batch of documents from the
        server if necessary. Resolves to ``False`` if there are no more
        documents, otherwise :meth:`next_object` is guaranteed to return a
        document.

        .. _`gen.coroutine`: http://tornadoweb.org/en/stable/gen.html

        .. testsetup:: fetch_next

          MongoClient().test.test_collection.remove()
          collection = MotorClient().test.test_collection

        .. doctest:: fetch_next

          >>> @gen.coroutine
          ... def f():
          ...     yield collection.insert([{'_id': i} for i in range(5)])
          ...     cursor = collection.find().sort([('_id', 1)])
          ...     while (yield cursor.fetch_next):
          ...         doc = cursor.next_object()
          ...         sys.stdout.write(str(doc['_id']) + ', ')
          ...     print 'done'
          ...
          >>> IOLoop.current().run_sync(f)
          0, 1, 2, 3, 4, done

        .. note:: While it appears that fetch_next retrieves each document from
          the server individually, the cursor actually fetches documents
          efficiently in `large batches`_.

        .. _`large batches`: http://docs.mongodb.org/manual/core/read-operations/#cursor-behaviors
        """
        future = Future()

        if not self._buffer_size() and self.alive:
            if self._empty():
                # Special case, limit of 0
                future.set_result(False)
                return future

            def cb(batch_size, error):
                if error:
                    future.set_exception(error)
                else:
                    future.set_result(bool(batch_size))

            self._get_more(cb)
            return future
        elif self._buffer_size():
            future.set_result(True)
            return future
        else:
            # Dead
            future.set_result(False)
        return future

__init__.py 文件源码项目：noc-orchestrator 作者: DirceuSilvaLabs 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def stream_to_handler(self, request_handler):
        """Write the contents of this file to a
        :class:`tornado.web.RequestHandler`. This method calls `flush` on
        the RequestHandler, so ensure all headers have already been set.
        For a more complete example see the implementation of
        :class:`~motor.web.GridFSHandler`.

        Takes an optional callback, or returns a Future.

        :Parameters:
         - `callback`: Optional function taking parameters (self, error)

        .. code-block:: python

            class FileHandler(tornado.web.RequestHandler):
                @tornado.web.asynchronous
                @gen.coroutine
                def get(self, filename):
                    db = self.settings['db']
                    fs = yield motor.MotorGridFS(db()).open()
                    try:
                        gridout = yield fs.get_last_version(filename)
                    except gridfs.NoFile:
                        raise tornado.web.HTTPError(404)

                    self.set_header("Content-Type", gridout.content_type)
                    self.set_header("Content-Length", gridout.length)
                    yield gridout.stream_to_handler(self)
                    self.finish()

        .. seealso:: Tornado `RequestHandler <http://tornadoweb.org/en/stable/web.html#request-handlers>`_
        """
        written = 0
        while written < self.length:
            # Reading chunk_size at a time minimizes buffering
            chunk = yield self.read(self.chunk_size)

            # write() simply appends the output to a list; flush() sends it
            # over the network and minimizes buffering in the handler.
            request_handler.write(chunk)
            request_handler.flush()
            written += len(chunk)

__init__.py 文件源码项目：noc-orchestrator 作者: DirceuSilvaLabs 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def fetch_next(self):
        """A Future used with `gen.coroutine`_ to asynchronously retrieve the
        next document in the result set, fetching a batch of documents from the
        server if necessary. Resolves to ``False`` if there are no more
        documents, otherwise :meth:`next_object` is guaranteed to return a
        document.

        .. _`gen.coroutine`: http://tornadoweb.org/en/stable/gen.html

        .. testsetup:: fetch_next

          MongoClient().test.test_collection.remove()
          collection = MotorClient().test.test_collection

        .. doctest:: fetch_next

          >>> @gen.coroutine
          ... def f():
          ...     yield collection.insert([{'_id': i} for i in range(5)])
          ...     cursor = collection.find().sort([('_id', 1)])
          ...     while (yield cursor.fetch_next):
          ...         doc = cursor.next_object()
          ...         sys.stdout.write(str(doc['_id']) + ', ')
          ...     print 'done'
          ...
          >>> IOLoop.current().run_sync(f)
          0, 1, 2, 3, 4, done

        .. note:: While it appears that fetch_next retrieves each document from
          the server individually, the cursor actually fetches documents
          efficiently in `large batches`_.

        .. _`large batches`: http://docs.mongodb.org/manual/core/read-operations/#cursor-behaviors
        """
        future = Future()

        if not self._buffer_size() and self.alive:
            if self._empty():
                # Special case, limit of 0
                future.set_result(False)
                return future

            def cb(batch_size, error):
                if error:
                    future.set_exception(error)
                else:
                    future.set_result(bool(batch_size))

            self._get_more(cb)
            return future
        elif self._buffer_size():
            future.set_result(True)
            return future
        else:
            # Dead
            future.set_result(False)
        return future

__init__.py 文件源码项目：noc-orchestrator 作者: DirceuSilvaLabs 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def stream_to_handler(self, request_handler):
        """Write the contents of this file to a
        :class:`tornado.web.RequestHandler`. This method calls `flush` on
        the RequestHandler, so ensure all headers have already been set.
        For a more complete example see the implementation of
        :class:`~motor.web.GridFSHandler`.

        Takes an optional callback, or returns a Future.

        :Parameters:
         - `callback`: Optional function taking parameters (self, error)

        .. code-block:: python

            class FileHandler(tornado.web.RequestHandler):
                @tornado.web.asynchronous
                @gen.coroutine
                def get(self, filename):
                    db = self.settings['db']
                    fs = yield motor.MotorGridFS(db()).open()
                    try:
                        gridout = yield fs.get_last_version(filename)
                    except gridfs.NoFile:
                        raise tornado.web.HTTPError(404)

                    self.set_header("Content-Type", gridout.content_type)
                    self.set_header("Content-Length", gridout.length)
                    yield gridout.stream_to_handler(self)
                    self.finish()

        .. seealso:: Tornado `RequestHandler <http://tornadoweb.org/en/stable/web.html#request-handlers>`_
        """
        written = 0
        while written < self.length:
            # Reading chunk_size at a time minimizes buffering
            chunk = yield self.read(self.chunk_size)

            # write() simply appends the output to a list; flush() sends it
            # over the network and minimizes buffering in the handler.
            request_handler.write(chunk)
            request_handler.flush()
            written += len(chunk)

__init__.py 文件源码项目：noc-orchestrator 作者: DirceuSilvaLabs 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def fetch_next(self):
        """A Future used with `gen.coroutine`_ to asynchronously retrieve the
        next document in the result set, fetching a batch of documents from the
        server if necessary. Resolves to ``False`` if there are no more
        documents, otherwise :meth:`next_object` is guaranteed to return a
        document.

        .. _`gen.coroutine`: http://tornadoweb.org/en/stable/gen.html

        .. testsetup:: fetch_next

          MongoClient().test.test_collection.remove()
          collection = MotorClient().test.test_collection

        .. doctest:: fetch_next

          >>> @gen.coroutine
          ... def f():
          ...     yield collection.insert([{'_id': i} for i in range(5)])
          ...     cursor = collection.find().sort([('_id', 1)])
          ...     while (yield cursor.fetch_next):
          ...         doc = cursor.next_object()
          ...         sys.stdout.write(str(doc['_id']) + ', ')
          ...     print 'done'
          ...
          >>> IOLoop.current().run_sync(f)
          0, 1, 2, 3, 4, done

        .. note:: While it appears that fetch_next retrieves each document from
          the server individually, the cursor actually fetches documents
          efficiently in `large batches`_.

        .. _`large batches`: http://docs.mongodb.org/manual/core/read-operations/#cursor-behaviors
        """
        future = Future()

        if not self._buffer_size() and self.alive:
            if self._empty():
                # Special case, limit of 0
                future.set_result(False)
                return future

            def cb(batch_size, error):
                if error:
                    future.set_exception(error)
                else:
                    future.set_result(bool(batch_size))

            self._get_more(cb)
            return future
        elif self._buffer_size():
            future.set_result(True)
            return future
        else:
            # Dead
            future.set_result(False)
        return future

__init__.py 文件源码项目：noc-orchestrator 作者: DirceuSilvaLabs 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def fetch_next(self):
        """A Future used with `gen.coroutine`_ to asynchronously retrieve the
        next document in the result set, fetching a batch of documents from the
        server if necessary. Resolves to ``False`` if there are no more
        documents, otherwise :meth:`next_object` is guaranteed to return a
        document.

        .. _`gen.coroutine`: http://tornadoweb.org/en/stable/gen.html

        .. testsetup:: fetch_next

          MongoClient().test.test_collection.remove()
          collection = MotorClient().test.test_collection

        .. doctest:: fetch_next

          >>> @gen.coroutine
          ... def f():
          ...     yield collection.insert([{'_id': i} for i in range(5)])
          ...     cursor = collection.find().sort([('_id', 1)])
          ...     while (yield cursor.fetch_next):
          ...         doc = cursor.next_object()
          ...         sys.stdout.write(str(doc['_id']) + ', ')
          ...     print 'done'
          ...
          >>> IOLoop.current().run_sync(f)
          0, 1, 2, 3, 4, done

        .. note:: While it appears that fetch_next retrieves each document from
          the server individually, the cursor actually fetches documents
          efficiently in `large batches`_.

        .. _`large batches`: http://docs.mongodb.org/manual/core/read-operations/#cursor-behaviors
        """
        future = Future()

        if not self._buffer_size() and self.alive:
            if self._empty():
                # Special case, limit of 0
                future.set_result(False)
                return future

            def cb(batch_size, error):
                if error:
                    future.set_exception(error)
                else:
                    future.set_result(bool(batch_size))

            self._get_more(cb)
            return future
        elif self._buffer_size():
            future.set_result(True)
            return future
        else:
            # Dead
            future.set_result(False)
        return future

__init__.py 文件源码项目：noc-orchestrator 作者: DirceuSilvaLabs 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def stream_to_handler(self, request_handler):
        """Write the contents of this file to a
        :class:`tornado.web.RequestHandler`. This method calls `flush` on
        the RequestHandler, so ensure all headers have already been set.
        For a more complete example see the implementation of
        :class:`~motor.web.GridFSHandler`.

        Takes an optional callback, or returns a Future.

        :Parameters:
         - `callback`: Optional function taking parameters (self, error)

        .. code-block:: python

            class FileHandler(tornado.web.RequestHandler):
                @tornado.web.asynchronous
                @gen.coroutine
                def get(self, filename):
                    db = self.settings['db']
                    fs = yield motor.MotorGridFS(db()).open()
                    try:
                        gridout = yield fs.get_last_version(filename)
                    except gridfs.NoFile:
                        raise tornado.web.HTTPError(404)

                    self.set_header("Content-Type", gridout.content_type)
                    self.set_header("Content-Length", gridout.length)
                    yield gridout.stream_to_handler(self)
                    self.finish()

        .. seealso:: Tornado `RequestHandler <http://tornadoweb.org/en/stable/web.html#request-handlers>`_
        """
        written = 0
        while written < self.length:
            # Reading chunk_size at a time minimizes buffering
            chunk = yield self.read(self.chunk_size)

            # write() simply appends the output to a list; flush() sends it
            # over the network and minimizes buffering in the handler.
            request_handler.write(chunk)
            request_handler.flush()
            written += len(chunk)