def get_my_content(r):
"""
the return from the server in vk is not a standard HTML.
this is why we must cut it up and cant use the regular
'get_real_content' helper.
"""
assert r.status_code == 200
# str_content=r.content.decode(errors='ignore')
try:
content = r.content # type: bytes
str_content = content.decode(errors='ignore')
except Exception as e:
print(e)
print('could not decode')
print(r.content)
sys.exit(1)
str_content = str_content[str_content.find('<input'):]
c = str.encode('<html><body>')+str.encode(str_content)+str.encode('</body></html>')
root = lxml.html.fromstring(c)
return root
评论列表
文章目录