def _count_child(job, masterHostname):
# noinspection PyUnresolvedReferences
from pyspark import SparkContext
# start spark context and connect to cluster
sc = SparkContext(master='spark://%s:7077' % masterHostname,
appName='count_test')
# create an rdd containing 0-9999 split across 10 partitions
rdd = sc.parallelize(xrange(10000), 10)
# and now, count it
assert rdd.count() == 10000
评论列表
文章目录