def parse_user_1(self, response):
""" ??????2 """
user_item = response.meta["item"]
selector = Selector(response)
text1 = ";".join(selector.xpath('body/div[@class="c"]/text()').extract()) # ????????text()
nickname = re.findall(u'\u6635\u79f0[:|\uff1a](.*?);', text1) # ??
intro = re.findall(u'\u7b80\u4ecb[:|\uff1a](.*?);', text1) # ??
auth = re.findall(u'\u8ba4\u8bc1[:|\uff1a](.*?);', text1) # ????
gender = re.findall(u'\u6027\u522b[:|\uff1a](.*?);', text1) # ??
place = re.findall(u'\u5730\u533a[:|\uff1a](.*?);', text1) # ???????????
birthday = re.findall(u'\u751f\u65e5[:|\uff1a](.*?);', text1) # ??
sexorientation = re.findall(u'\u6027\u53d6\u5411[:|\uff1a](.*?);', text1) # ???
marriage = re.findall(u'\u611f\u60c5\u72b6\u51b5[:|\uff1a](.*?);', text1) # ????
url = re.findall(u'\u4e92\u8054\u7f51[:|\uff1a](.*?);', text1) # ????
if nickname:
user_item["nickname"] = nickname[0]
if auth:
user_item["auth"] = auth[0]
if intro:
user_item["intro"] = intro[0]
user_item['t'] = time.strftime('%Y-%m-%d', time.localtime(time.time()))
yield user_item
评论列表
文章目录