def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
for mobj in re.finditer(self._VIDEO_RE, page):
# The link with index 0 is not the first video of the playlist (not sure if still actual)
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
continue
video_id = mobj.group('id')
video_title = unescapeHTML(mobj.group('title'))
if video_title:
video_title = video_title.strip()
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
titles_in_page[idx] = video_title
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
return zip(ids_in_page, titles_in_page)
评论列表
文章目录