def _test():
import doctest
from pyspark.sql import SparkSession
import pyspark.sql.column
globs = pyspark.sql.column.__dict__.copy()
spark = SparkSession.builder\
.master("local[4]")\
.appName("sql.column tests")\
.getOrCreate()
sc = spark.sparkContext
globs['sc'] = sc
globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
.toDF(StructType([StructField('age', IntegerType()),
StructField('name', StringType())]))
(failure_count, test_count) = doctest.testmod(
pyspark.sql.column, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
spark.stop()
if failure_count:
exit(-1)
python类REPORT_NDIFF的实例源码
def _test():
import doctest
from pyspark.context import SparkContext
from pyspark.sql import SQLContext
import pyspark.sql.column
globs = pyspark.sql.column.__dict__.copy()
sc = SparkContext('local[4]', 'PythonTest')
globs['sc'] = sc
globs['sqlContext'] = SQLContext(sc)
globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
.toDF(StructType([StructField('age', IntegerType()),
StructField('name', StringType())]))
(failure_count, test_count) = doctest.testmod(
pyspark.sql.column, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
globs['sc'].stop()
if failure_count:
exit(-1)
def _get_report_choice(key):
"""
This function returns the actual `doctest` module flag value, we want to do it as late as possible to avoid
importing `doctest` and all its dependencies when parsing options, as it adds overhead and breaks tests.
"""
import doctest
return {
DOCTEST_REPORT_CHOICE_UDIFF: doctest.REPORT_UDIFF,
DOCTEST_REPORT_CHOICE_CDIFF: doctest.REPORT_CDIFF,
DOCTEST_REPORT_CHOICE_NDIFF: doctest.REPORT_NDIFF,
DOCTEST_REPORT_CHOICE_ONLY_FIRST_FAILURE: doctest.REPORT_ONLY_FIRST_FAILURE,
DOCTEST_REPORT_CHOICE_NONE: 0,
}[key]
def _test():
import doctest
from pyspark.sql import Row, SparkSession
import pyspark.sql.group
globs = pyspark.sql.group.__dict__.copy()
spark = SparkSession.builder\
.master("local[4]")\
.appName("sql.group tests")\
.getOrCreate()
sc = spark.sparkContext
globs['sc'] = sc
globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
.toDF(StructType([StructField('age', IntegerType()),
StructField('name', StringType())]))
globs['df3'] = sc.parallelize([Row(name='Alice', age=2, height=80),
Row(name='Bob', age=5, height=85)]).toDF()
globs['df4'] = sc.parallelize([Row(course="dotNET", year=2012, earnings=10000),
Row(course="Java", year=2012, earnings=20000),
Row(course="dotNET", year=2012, earnings=5000),
Row(course="dotNET", year=2013, earnings=48000),
Row(course="Java", year=2013, earnings=30000)]).toDF()
(failure_count, test_count) = doctest.testmod(
pyspark.sql.group, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
spark.stop()
if failure_count:
exit(-1)
def _test():
import doctest
from pyspark.context import SparkContext
from pyspark.sql import Row, SQLContext, SparkSession
import pyspark.sql.dataframe
from pyspark.sql.functions import from_unixtime
globs = pyspark.sql.dataframe.__dict__.copy()
sc = SparkContext('local[4]', 'PythonTest')
globs['sc'] = sc
globs['sqlContext'] = SQLContext(sc)
globs['spark'] = SparkSession(sc)
globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')])\
.toDF(StructType([StructField('age', IntegerType()),
StructField('name', StringType())]))
globs['df2'] = sc.parallelize([Row(name='Tom', height=80), Row(name='Bob', height=85)]).toDF()
globs['df3'] = sc.parallelize([Row(name='Alice', age=2),
Row(name='Bob', age=5)]).toDF()
globs['df4'] = sc.parallelize([Row(name='Alice', age=10, height=80),
Row(name='Bob', age=5, height=None),
Row(name='Tom', age=None, height=None),
Row(name=None, age=None, height=None)]).toDF()
globs['sdf'] = sc.parallelize([Row(name='Tom', time=1479441846),
Row(name='Bob', time=1479442946)]).toDF()
(failure_count, test_count) = doctest.testmod(
pyspark.sql.dataframe, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
globs['sc'].stop()
if failure_count:
exit(-1)
def _test():
import doctest
import os
import tempfile
import py4j
from pyspark.context import SparkContext
from pyspark.sql import SparkSession, Row
import pyspark.sql.readwriter
os.chdir(os.environ["SPARK_HOME"])
globs = pyspark.sql.readwriter.__dict__.copy()
sc = SparkContext('local[4]', 'PythonTest')
try:
spark = SparkSession.builder.enableHiveSupport().getOrCreate()
except py4j.protocol.Py4JError:
spark = SparkSession(sc)
globs['tempfile'] = tempfile
globs['os'] = os
globs['sc'] = sc
globs['spark'] = spark
globs['df'] = spark.read.parquet('python/test_support/sql/parquet_partitioned')
(failure_count, test_count) = doctest.testmod(
pyspark.sql.readwriter, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
sc.stop()
if failure_count:
exit(-1)
def _test():
import doctest
import os
import tempfile
from pyspark.sql import Row, SparkSession, SQLContext
import pyspark.sql.streaming
os.chdir(os.environ["SPARK_HOME"])
globs = pyspark.sql.streaming.__dict__.copy()
try:
spark = SparkSession.builder.getOrCreate()
except py4j.protocol.Py4JError:
spark = SparkSession(sc)
globs['tempfile'] = tempfile
globs['os'] = os
globs['spark'] = spark
globs['sqlContext'] = SQLContext.getOrCreate(spark.sparkContext)
globs['sdf'] = \
spark.readStream.format('text').load('python/test_support/sql/streaming')
globs['sdf_schema'] = StructType([StructField("data", StringType(), False)])
globs['df'] = \
globs['spark'].readStream.format('text').load('python/test_support/sql/streaming')
(failure_count, test_count) = doctest.testmod(
pyspark.sql.streaming, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
globs['spark'].stop()
if failure_count:
exit(-1)
def _get_report_choice(key):
"""
This function returns the actual `doctest` module flag value, we want to do it as late as possible to avoid
importing `doctest` and all its dependencies when parsing options, as it adds overhead and breaks tests.
"""
import doctest
return {
DOCTEST_REPORT_CHOICE_UDIFF: doctest.REPORT_UDIFF,
DOCTEST_REPORT_CHOICE_CDIFF: doctest.REPORT_CDIFF,
DOCTEST_REPORT_CHOICE_NDIFF: doctest.REPORT_NDIFF,
DOCTEST_REPORT_CHOICE_ONLY_FIRST_FAILURE: doctest.REPORT_ONLY_FIRST_FAILURE,
DOCTEST_REPORT_CHOICE_NONE: 0,
}[key]
def _get_report_choice(key):
"""
This function returns the actual `doctest` module flag value, we want to do it as late as possible to avoid
importing `doctest` and all its dependencies when parsing options, as it adds overhead and breaks tests.
"""
import doctest
return {
DOCTEST_REPORT_CHOICE_UDIFF: doctest.REPORT_UDIFF,
DOCTEST_REPORT_CHOICE_CDIFF: doctest.REPORT_CDIFF,
DOCTEST_REPORT_CHOICE_NDIFF: doctest.REPORT_NDIFF,
DOCTEST_REPORT_CHOICE_ONLY_FIRST_FAILURE: doctest.REPORT_ONLY_FIRST_FAILURE,
DOCTEST_REPORT_CHOICE_NONE: 0,
}[key]
def _get_report_choice(key):
"""
This function returns the actual `doctest` module flag value, we want to do it as late as possible to avoid
importing `doctest` and all its dependencies when parsing options, as it adds overhead and breaks tests.
"""
import doctest
return {
DOCTEST_REPORT_CHOICE_UDIFF: doctest.REPORT_UDIFF,
DOCTEST_REPORT_CHOICE_CDIFF: doctest.REPORT_CDIFF,
DOCTEST_REPORT_CHOICE_NDIFF: doctest.REPORT_NDIFF,
DOCTEST_REPORT_CHOICE_ONLY_FIRST_FAILURE: doctest.REPORT_ONLY_FIRST_FAILURE,
DOCTEST_REPORT_CHOICE_NONE: 0,
}[key]
def _test():
import doctest
from pyspark.context import SparkContext
from pyspark.sql import Row, SQLContext
import pyspark.sql.group
globs = pyspark.sql.group.__dict__.copy()
sc = SparkContext('local[4]', 'PythonTest')
globs['sc'] = sc
globs['sqlContext'] = SQLContext(sc)
globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
.toDF(StructType([StructField('age', IntegerType()),
StructField('name', StringType())]))
globs['df3'] = sc.parallelize([Row(name='Alice', age=2, height=80),
Row(name='Bob', age=5, height=85)]).toDF()
globs['df4'] = sc.parallelize([Row(course="dotNET", year=2012, earnings=10000),
Row(course="Java", year=2012, earnings=20000),
Row(course="dotNET", year=2012, earnings=5000),
Row(course="dotNET", year=2013, earnings=48000),
Row(course="Java", year=2013, earnings=30000)]).toDF()
(failure_count, test_count) = doctest.testmod(
pyspark.sql.group, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
globs['sc'].stop()
if failure_count:
exit(-1)
def _test():
import doctest
from pyspark.context import SparkContext
from pyspark.sql import Row, SQLContext
import pyspark.sql.dataframe
globs = pyspark.sql.dataframe.__dict__.copy()
sc = SparkContext('local[4]', 'PythonTest')
globs['sc'] = sc
globs['sqlContext'] = SQLContext(sc)
globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')])\
.toDF(StructType([StructField('age', IntegerType()),
StructField('name', StringType())]))
globs['df2'] = sc.parallelize([Row(name='Tom', height=80), Row(name='Bob', height=85)]).toDF()
globs['df3'] = sc.parallelize([Row(name='Alice', age=2),
Row(name='Bob', age=5)]).toDF()
globs['df4'] = sc.parallelize([Row(name='Alice', age=10, height=80),
Row(name='Bob', age=5, height=None),
Row(name='Tom', age=None, height=None),
Row(name=None, age=None, height=None)]).toDF()
(failure_count, test_count) = doctest.testmod(
pyspark.sql.dataframe, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
globs['sc'].stop()
if failure_count:
exit(-1)
def _test():
import doctest
import os
import tempfile
from pyspark.context import SparkContext
from pyspark.sql import Row, SQLContext, HiveContext
import pyspark.sql.readwriter
os.chdir(os.environ["SPARK_HOME"])
globs = pyspark.sql.readwriter.__dict__.copy()
sc = SparkContext('local[4]', 'PythonTest')
globs['tempfile'] = tempfile
globs['os'] = os
globs['sc'] = sc
globs['sqlContext'] = SQLContext(sc)
globs['hiveContext'] = HiveContext(sc)
globs['df'] = globs['sqlContext'].read.parquet('python/test_support/sql/parquet_partitioned')
(failure_count, test_count) = doctest.testmod(
pyspark.sql.readwriter, globs=globs,
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
globs['sc'].stop()
if failure_count:
exit(-1)
def test_suite(*args):
return doctest.DocTestSuite(optionflags=(doctest.NORMALIZE_WHITESPACE|
doctest.ELLIPSIS|
doctest.REPORT_ONLY_FIRST_FAILURE|
doctest.REPORT_NDIFF
))
def setup_optionflags(self):
if 'optionflags' not in self._kw:
self._kw['optionflags'] = (
doctest.ELLIPSIS | doctest.REPORT_NDIFF |
doctest.NORMALIZE_WHITESPACE)