def _parse_kk(url):
url = 'https://www.djkakt.us/' + url
soup = bs4.BeautifulSoup(requests.get(url).text, 'lxml')
episodes = []
for epi in soup.find(class_='blog-list')('article'):
date = epi.find(class_='entry-dateline-link').text
date = ' '.join(date.split())
date = arrow.get(date, 'MMMM D, YYYY').format('YYYY-MM-DD')
title = epi.find(class_='entry-title').text.strip()
index = _extract_episode_index(title)
if not index:
continue
text = epi.find(class_='sqs-block-content').text
url = epi.find(class_='entry-title').a['href']
url = urllib.parse.urljoin('https://www.djkakt.us/', url)
episodes.append(utils.AttrDict(
date=date, title=title, index=index, text=text, url=url))
episodes = list(sorted(episodes, key=lambda x: x.date, reverse=True))
return episodes
评论列表
文章目录