def run(self):
ind=self.qu.get()
url=self.url+str(ind)
soup =bs.BeautifulSoup(''.join( ul.urlopen(url).readlines() ))
bu = up.urlsplit(self.url)
print 'started with the ' ,str(url).split('/')[-1],
for i in soup.find_all(attrs = { "class" : "recipe-title"}):
sp = up.urlsplit(i.a.get('href'))
path = sp.path
print path
if re.search(pat, path):
path = bu.scheme+'://'+bu.netloc+path
filename = str(path).split('/')[-2]
filename = op.join(op.abspath(op.curdir),filename+'.py') # recipe will be stored in given location
# filename = op.join(op.abspath(op.curdir),filename+'.html')
#uncomment the above line if downloading the web page for teh recipe
print path
self.q.put((path,filename))
self.fetch_data()
time.sleep(1)
self.qu.task_done()
self.q.join()
print 'done with the ' ,str(url).split('/')[-1],
评论列表
文章目录