def crawl():
try:
depth_limit = int(request.values['depth'])
except ValueError as e:
return "Depth parameter must be a number", 400
except:
depth_limit = 1
if 'url' in request.values:
url = request.values['url']
parsed_url = urlparse.urlsplit(url)
if parsed_url.scheme not in ['http', 'https']:
return "Only http and https protocols are supported", 400
if parsed_url.netloc == '':
return "Missing domain", 400
allowed_domains = [ parsed_url.netloc ]
crawler = Crawler(allowed_domains, depth_limit)
crawler.crawl(url)
return jsonify(**crawler.crawled)
else:
return "Missing url parameter", 400
评论列表
文章目录