Scraper.py 文件源码-python代码片段

Scraper.py 文件源码
python
阅读 24 收藏 0 点赞 0 评论 0
def get_act(self, scraper):
        """Creates Find_XML_Elements_Action. Higher-order function

        Args: @Action
        """

        def act():
            if not scraper.xml_tree:
                Parse_XML_Action().execute(scraper)
            if not scraper.xml_tree:
                return
            xml_elements = scraper.xml_elements[:]

            def find(element):
                """Helper function. Recursively traverses tree to find if the elements
                satisfy the tag/attribute pairs.

                Args:
                  element <ElementTree>: the HTML element that is about to be examined
                """

                if element.tag in self.tags or not self.tags:
                    element_index = self.tags.index(element.tag)
                    if all([(key in element.attrib and element.attrib[key] == self.attributes[element_index][key]) 
                        for key in self.attributes[element_index]]):
                        scraper.xml_elements.append(element)
                for sub_element in element:
                    find(sub_element)   
            if self.find_subelements:
                for e in xml_elements:
                    find(e)
            else:
                find(scraper.xml_tree)
        return act