scraper.py 文件源码-python代码片段

scraper.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

项目：pi_romulus 作者: ArthurMoore85 项目源码文件源码

def _html_link_return(self, url, tag, key, value, deeper=False, second=False):
        """
        Returns links
        :param url: URL to filter
        :param key: Name of key to search in tag
        :param tag: Name of value to find
        :param value: Name of the value expected in tag
        """
        if url[0] == '/':
            url = '{0}{1}'.format(self.url, url)
        r = urllib2.Request(url)
        response = urllib2.urlopen(r)
        soup = BeautifulSoup(response, 'html.parser')
        matches = soup.findAll(tag, {key, value})
        if deeper:
            m = matches[0]
            matches = m.findAll('a')[0]['href']
        elif second:
            m = matches[0]
            matches = m.findAll('a')[1]['href']
            print m.findAll('a')
        else:
            matches = matches[0]['href']
        return '{0}{1}'.format(self.url, matches)