spider.py 文件源码-python代码片段

def parse_user_1(self, response):
        """ ??????2 """
        user_item = response.meta["item"]
        selector = Selector(response)
        text1 = ";".join(selector.xpath('body/div[@class="c"]/text()').extract())  # ????????text()

        nickname = re.findall(u'\u6635\u79f0[:|\uff1a](.*?);', text1)  # ??
        intro = re.findall(u'\u7b80\u4ecb[:|\uff1a](.*?);', text1)  # ??
        auth = re.findall(u'\u8ba4\u8bc1[:|\uff1a](.*?);', text1)  # ????

        gender = re.findall(u'\u6027\u522b[:|\uff1a](.*?);', text1)  # ??
        place = re.findall(u'\u5730\u533a[:|\uff1a](.*?);', text1)  # ???????????
        birthday = re.findall(u'\u751f\u65e5[:|\uff1a](.*?);', text1)  # ??
        sexorientation = re.findall(u'\u6027\u53d6\u5411[:|\uff1a](.*?);', text1)  # ???
        marriage = re.findall(u'\u611f\u60c5\u72b6\u51b5[:|\uff1a](.*?);', text1)  # ????
        url = re.findall(u'\u4e92\u8054\u7f51[:|\uff1a](.*?);', text1)  # ????

        if nickname:
            user_item["nickname"] = nickname[0]
        if auth:
            user_item["auth"] = auth[0]
        if intro:
            user_item["intro"] = intro[0]
        user_item['t'] = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        yield user_item