def parse_detail_page(self, response):
"""
Get details for each course
"""
item = response.meta['item']
parse_review_num = response.xpath('//span[@itemprop="votes"]/text()').extract_first().strip()
item['review_num'] = string.atoi(parse_review_num)
parse_student_num = re.findall(r'"mycourses-listed-count", 0, (.*), 0', response.text)[0].strip() or '0'
item['student_num'] = string.atoi(parse_student_num)
parse_course_info = response.xpath('//div[@class="course-desc"]').extract()
for i in range(len(parse_course_info) - 1):
parse_course_info[0].extend(parse_course_info[i+1])
item['keywords'] = self.get_keywords(parse_course_info[0]+item['name']) or []
yield item
class_central_spider.py 文件源码
python
阅读 53
收藏 0
点赞 0
评论 0
评论列表
文章目录