def get_act(self, scraper):
"""Creates Find_XML_Elements_Action. Higher-order function
Args: @Action
"""
def act():
if not scraper.xml_tree:
Parse_XML_Action().execute(scraper)
if not scraper.xml_tree:
return
xml_elements = scraper.xml_elements[:]
def find(element):
"""Helper function. Recursively traverses tree to find if the elements
satisfy the tag/attribute pairs.
Args:
element <ElementTree>: the HTML element that is about to be examined
"""
if element.tag in self.tags or not self.tags:
element_index = self.tags.index(element.tag)
if all([(key in element.attrib and element.attrib[key] == self.attributes[element_index][key])
for key in self.attributes[element_index]]):
scraper.xml_elements.append(element)
for sub_element in element:
find(sub_element)
if self.find_subelements:
for e in xml_elements:
find(e)
else:
find(scraper.xml_tree)
return act
评论列表
文章目录