def parse1(self, response):
selector = Selector(response)
infoItem = response.meta["item"]
ID = response.meta["ID"]
text1 = ";".join(selector.xpath('body/div[@class="c"]/text()').extract()) # ????????text()
nickname = re.findall(u'\u6635\u79f0[:|\uff1a](.*?);', text1) # ??
gender = re.findall(u'\u6027\u522b[:|\uff1a](.*?);', text1) # ??
place = re.findall(u'\u5730\u533a[:|\uff1a](.*?);', text1) # ???????????
signature = re.findall(u'\u7b80\u4ecb[:|\uff1a](.*?);', text1) # ????
birthday = re.findall(u'\u751f\u65e5[:|\uff1a](.*?);', text1) # ??
sexorientation = re.findall(u'\u6027\u53d6\u5411[:|\uff1a](.*?);', text1) # ???
marriage = re.findall(u'\u611f\u60c5\u72b6\u51b5[:|\uff1a](.*?);', text1) # ????
url = re.findall(u'\u4e92\u8054\u7f51[:|\uff1a](.*?);', text1) # ????
if nickname:
infoItem['nickname'] = nickname[0]
if gender:
infoItem['gender'] = gender[0]
if place:
place = place[0].split(" ")
infoItem["province"] = place[0]
if len(place) > 1:
infoItem["city"] = place[1]
if signature:
infoItem["signature"] = signature[0]
if birthday:
try:
birthday = datetime.datetime.strptime(birthday[0], "%Y-%m-%d")
infoItem["birthday"] = birthday - datetime.timedelta(hours=8)
except Exception:
pass
if sexorientation:
if sexorientation[0] == gender[0]:
infoItem["sexorientation"] = "gay"
else:
infoItem["sexorientation"] = "Heterosexual"
if marriage:
infoItem["marriage"] = marriage[0]
if url:
infoItem["url"] = url[0]
infoItem["user_id"] = ID
yield infoItem
评论列表
文章目录