def get_tsun(self, index: str):
r = requests.get('https://www.instagram.com/tsuntsunlive/')
html = r.content
soup = bs4.BeautifulSoup(html, 'html.parser')
tag_list = soup.find_all("script", type="text/javascript")
tag_list = [str(tag) for tag in tag_list]
tag_list = sorted(tag_list, key=len)
data_tag = tag_list[-1]
after = index.split()
try:
index = int(after[0])
except ValueError:
index = None
except IndexError:
index = None
post_list = re.split('"caption": "', data_tag)[1:]
if index is None:
post = random.choice(post_list)
else:
post = post_list[index - 1]
caption = post[:re.search('", "likes"', post).start()]
caption = re.sub(r"(\\u[0-9a-f]{4})", lambda match: codecs.decode(match.group(1), "unicode_escape"), caption)
caption = re.sub(r"\\n", "\n", caption)
img_part = post[re.search('"display_src": "', post).end():]
img = img_part[:re.search("\?", img_part).start()]
img = re.sub(r"\\", "", img)
data = [img, caption]
return data
评论列表
文章目录