def soupit(j,genre):
try:
url = "https://www.wikiart.org/en/paintings-by-genre/"+ genre+ "/" + str(j)
html = urllib.request.urlopen(url)
soup = BeautifulSoup(html)
found = False
urls = []
for i in str(soup.findAll()).split():
if i == 'data':
found = True
if found == True:
if '}];' in i:
break;
if 'https' in i:
web = "http" + i[6:-2]
urls.append(web)
j = j+1
return urls
except Exception as e:
print('Failed to find the following genre page combo: '+genre+str(j))
#Given a url for an image, we download and save the image while also recovering information about the painting in the saved name depending on the length of the file.split('/') information (which corresponds to how much information is available)
评论列表
文章目录