def get_related_document_ids(kamervraag_url):
logger.info('get related antwoord id for url: ' + kamervraag_url)
page = requests.get(kamervraag_url, timeout=60)
tree = lxml.html.fromstring(page.content)
relations_titles = tree.xpath('//div[@id="main-column"]//h2[@class="divisiekop1"]')
overheidnl_document_ids = []
for title_element in relations_titles:
if title_element.text_content() == "Relaties":
column_elements = title_element.getparent().xpath('//tr/td/p')
next_is_antwoord_url = False
for column_element in column_elements:
if next_is_antwoord_url:
overheidnl_document_ids.append(column_element.text_content())
next_is_antwoord_url = False
if column_element.text_content() == 'is beantwoord in':
next_is_antwoord_url = True
return overheidnl_document_ids
评论列表
文章目录