def cmdparse():
parser = argparse.ArgumentParser(usage='\n%(prog)s [options] [-u url|-f file.txt]'
'\n%(prog)s [options] --continue',
description='Yet Another Web Spider',
version=VERSION)
parser.add_argument('-u', '--url', dest='url',
help='Target url, if no tld, only urls in this subdomain')
parser.add_argument('-f', '--file', dest='file', type=open,
help='Load target from file')
parser.add_argument('--cookie-file', dest='cookie_file', metavar='FILE',
help='Cookie file from chrome export by EditThisCookie')
parser.add_argument('--tld', action='store_true', dest='tld',
help='Crawl all subdomains')
parser.add_argument('--continue', dest='keepon', action='store_true',
help='Continue last task, no init target [-u|-f] need')
worker = parser.add_argument_group(title='Worker', description='[optional] options for worker')
worker.add_argument('-c', '--consumer', metavar='N', type=int, default=5, dest='consumer',
help='Max number of consumer processes to run, default 5')
worker.add_argument('-p', '--producer', metavar='N', type=int, default=1, dest='producer',
help='Max number of producer processes to run, default 1')
db = parser.add_argument_group(title='Database', description='[optional] options for redis and mongodb')
db.add_argument('--mongo-db', metavar='STRING', dest='mongo_db', default=MongoConf.db,
help='Mongodb database name, default "tspider"')
db.add_argument('--redis-db', metavar='NUMBER', dest='redis_db', type=int, default=RedisConf.db,
help='Redis db index, default 0')
args = parser.parse_args()
if not any([args.url, args.file, args.keepon]):
parser.exit(parser.format_help())
return args
评论列表
文章目录