def parse_details(self, response):
# response = get(response.url)
institution = response.xpath('//h2/text()').extract()[0].strip()
logging.warn("scrapping: %s - %s"%(response.url, institution))
for tr in response.xpath('//table[@class="fancy"]/tr'):
if tr.xpath('td[1]'):
item = Item()
titlu = xtract(tr, 'td[1]//div/text()')
type_ = xtract(tr, 'td[2]//div//strong/text()')
consult = xtract(tr, 'td[3]//div/text()')
avizare = xtract(tr, 'td[4]//div/text()')
avizori = xtract(tr, 'td[5]//div/text()')
termen_avize = xtract(tr, 'td[6]//div/text()')
mfp_mj = xtract(tr, 'td[7]//div/text()')
reavizare = xtract(tr, 'td[8]//div/text()')
init_1 = xtract(tr, 'td[9]//a/@href')
init_2 = xtract(tr, 'td[10]//a/@href')
final_1 = xtract(tr, 'td[11]//a/@href')
final_2 = xtract(tr, 'td[12]//a/@href')
docs = [{"type": "nota", "url": response.urljoin(f)} for f in [init_1, init_2, final_1, final_2] if f]
item['identifier'] = identify(institution, titlu)
item['title'] = titlu
item['type'] = type_
item['institution'] = "sgg"
item['date'] = consult
item['description'] = ""
item['feedback_days'] = None
item['contact'] = None
item['documents'] = docs
yield item
评论列表
文章目录