def run(self):
self.args = self.parse_arguments()
conf = SparkConf().setAll((
("spark.task.maxFailures", "10"),
("spark.locality.wait", "20s"),
("spark.serializer", "org.apache.spark.serializer.KryoSerializer"),
))
sc = SparkContext(
appName=self.name,
conf=conf)
sqlc = SQLContext(sparkContext=sc)
self.records_processed = sc.accumulator(0)
self.warc_input_processed = sc.accumulator(0)
self.warc_input_failed = sc.accumulator(0)
self.run_job(sc, sqlc)
sc.stop()
评论列表
文章目录