def _pre_visit_url_condense(self, url):
""" Reduce (condense) URLs into some canonical form before
visiting. All occurrences of equivalent URLs are treated as
identical.
All this does is strip the \"fragment\" component from URLs,
so that http://foo.com/blah.html\#baz becomes
http://foo.com/blah.html """
base, frag = urlparse.urldefrag(url)
return base
## URL Filtering functions. These all use information from the
## state of the Crawler to evaluate whether a given URL should be
## used in some context. Return value of True indicates that the
## URL should be used.
评论列表
文章目录