debates.py 文件源码-python代码片段

debates.py 文件源码

python

阅读 23 收藏 0 点赞 0 评论 0

项目：presidency 作者: jayrav13 项目源码文件源码

def all(self):

        url = "http://www.presidency.ucsb.edu/debates.php"

        # Retrieve all debates as tree.
        page = requests.get(url)
        tree = html.document_fromstring(page.text)

        # List of all debate and date elements.
        dates = [x for x in tree.xpath('//td[@class="docdate"]') if len(x.text_content()) > 0]
        debates = tree.xpath('//td[@class="doctext"]')

        # Throw error if lengths are off.
        if len(dates) != len(debates):
            raise Exception('Sorry - something went wrong! Please open an issue at https://github.com/jayrav13/presidency/issues and include the following timestamp: %s' % str(time.time()))
            return None

        # Curate list of all debates.
        self.data = []

        for i in range(0, len(debates)):

            self.data.append({
                "date" : dates[i].text_content(),
                "debate" : debates[i].xpath('a')[0].text_content(),
                "link" : debates[i].xpath('a')[0].attrib['href'],
                "id" : int(debates[i].xpath('a')[0].attrib['href'].split('?')[1].split('=')[1])
            })

        return self.data