def getFriendsBirthdays(birthdays,friendsDict,s):
# --------- Getting Birthday Info -----------
relatStrainer = SoupStrainer(text=re.compile("Birthday"))
relatExt = "/about"
relatExtBeta = "&sk=about"
fbook = "https://facebook.com"
#***** Note: will have to perform additional string methods because scraping from main page
for friend in friendsDict:
if (friendsDict[friend].find("php") != -1):
relatURL = fbook + friendsDict[friend] + relatExtBeta
else:
relatURL = fbook + friendsDict[friend] + relatExt
relatInfo = s.get(relatURL)
soup = BeautifulSoup(relatInfo.text,"lxml",parse_only=relatStrainer)
subString = soup.find(text=re.compile("Birthday"))
if (subString != None):
# Cut off everthing before Birthday
stringIndex = subString.find('Birthday')
subString = subString[stringIndex:]
# Cut off the prefix to get the birthdate and everything after
stringIndex = subString.find('<div>')
subString = subString[(stringIndex+5):]
# Get rid of everything after the birthday
stringIndex = subString.find('</div>')
subString = subString[:stringIndex]
# Standardize the birthday date by cutting off the year if there is one
commaIndex = subString.find(',')
if (commaIndex != -1):
subString = subString[:commaIndex]
if (subString in birthdays):
birthdays[subString].append(friend)
else:
birthdays[subString] = [friend]
print friend + " has birthday " + subString
return
评论列表
文章目录