def __init__(self, filename, renmwo = True):
self.filename = filename
self.renmwo = "%s.~renmwo%d~" % (filename, os.getpid()) if renmwo else filename
self.file = open(self.renmwo, "w")
self.writer = csv.writer(self.file, dialect = csv.get_dialect("excel-tab"))
python类get_dialect()的实例源码
def __init__(self, filename, renmwo = True):
self.filename = filename
self.renmwo = "%s.~renmwo%d~" % (filename, os.getpid()) if renmwo else filename
self.file = open(self.renmwo, "w")
self.writer = csv.writer(self.file, dialect = csv.get_dialect("excel-tab"))
def __init__(self, filename, renmwo = True):
self.filename = filename
self.renmwo = "%s.~renmwo%d~" % (filename, os.getpid()) if renmwo else filename
self.file = open(self.renmwo, "w")
self.writer = csv.writer(self.file, dialect = csv.get_dialect("excel-tab"))
def __init__(self, filename, renmwo = True):
self.filename = filename
self.renmwo = "%s.~renmwo%d~" % (filename, os.getpid()) if renmwo else filename
self.file = open(self.renmwo, "w")
self.writer = csv.writer(self.file, dialect = csv.get_dialect("excel-tab"))
def test_registry(self):
class myexceltsv(csv.excel):
delimiter = "\t"
name = "myexceltsv"
expected_dialects = csv.list_dialects() + [name]
expected_dialects.sort()
csv.register_dialect(name, myexceltsv)
self.addCleanup(csv.unregister_dialect, name)
self.assertEqual(csv.get_dialect(name).delimiter, '\t')
got_dialects = sorted(csv.list_dialects())
self.assertEqual(expected_dialects, got_dialects)
def test_register_kwargs(self):
name = 'fedcba'
csv.register_dialect(name, delimiter=';')
self.addCleanup(csv.unregister_dialect, name)
self.assertEqual(csv.get_dialect(name).delimiter, ';')
self.assertEqual([['X', 'Y', 'Z']], list(csv.reader(['X;Y;Z'], name)))
def open(self):
if self.is_open:
self._ui.debug('OPEN CALLED ON ALREADY OPEN RUNCONTEXT')
return
self.is_open = True
self._ui.debug('OPEN CALLED ON RUNCONTEXT')
csv.register_dialect('dataset_dialect', **self.dialect)
csv.register_dialect('writer_dialect', **self.writer_dialect)
self.dialect = csv.get_dialect('dataset_dialect')
self.writer_dialect = csv.get_dialect('writer_dialect')
self.db = shelve.open(self.file_context.file_name, writeback=True)
if six.PY2:
self.out_stream = open(self.out_file, 'ab')
elif six.PY3:
self.out_stream = open(self.out_file, 'a', newline='')
def __init__(self, fd, encoding, ui):
self.fd = fd
# dataset_dialect is set by investigate_encoding_and_dialect in utils
self.dialect = csv.get_dialect('dataset_dialect')
self.encoding = encoding
self._ui = ui
def go(self):
dataset_dialect = csv.get_dialect('dataset_dialect')
args = ([self.batch_gen_args,
SerializableDialect.from_dialect(dataset_dialect),
self.queue])
self.p = multiprocessing.Process(target=self._shove,
args=args,
name='Shovel_Proc')
self.p.start()
return self.p
def test_investigate_encoding_and_dialect():
with UI(None, logging.DEBUG, stdout=False) as ui:
data = 'tests/fixtures/windows_encoded.csv'
encoding = investigate_encoding_and_dialect(data, None, ui)
dialect = csv.get_dialect('dataset_dialect')
assert encoding == 'iso-8859-2'
assert dialect.lineterminator == '\r\n'
assert dialect.quotechar == '"'
assert dialect.delimiter == ','
def test_investigate_encoding_and_dialect_skip_dialect():
with UI(None, logging.DEBUG, stdout=False) as ui:
with mock.patch('datarobot_batch_scoring.reader.csv.Sniffer') as sn:
data = 'tests/fixtures/windows_encoded.csv'
encoding = investigate_encoding_and_dialect(data, None, ui,
fast=False,
encoding='',
skip_dialect=True)
assert encoding == 'iso-8859-2'
assert not sn.called
dialect = csv.get_dialect('dataset_dialect')
assert dialect.delimiter == ','
def test_investigate_encoding_and_dialect_substitute_delimiter():
with UI(None, logging.DEBUG, stdout=False) as ui:
with mock.patch('datarobot_batch_scoring.reader.csv.Sniffer') as sn:
data = 'tests/fixtures/windows_encoded.csv'
encoding = investigate_encoding_and_dialect(data, '|', ui,
fast=False,
encoding='utf-8',
skip_dialect=True)
assert encoding == 'utf-8' # Intentionally wrong
assert not sn.called
dialect = csv.get_dialect('dataset_dialect')
assert dialect.delimiter == '|'
parsers.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def __init__(self, f, engine=None, **kwds):
self.f = f
if engine is not None:
engine_specified = True
else:
engine = 'python'
engine_specified = False
self._engine_specified = kwds.get('engine_specified', engine_specified)
if kwds.get('dialect') is not None:
dialect = kwds['dialect']
if dialect in csv.list_dialects():
dialect = csv.get_dialect(dialect)
kwds['delimiter'] = dialect.delimiter
kwds['doublequote'] = dialect.doublequote
kwds['escapechar'] = dialect.escapechar
kwds['skipinitialspace'] = dialect.skipinitialspace
kwds['quotechar'] = dialect.quotechar
kwds['quoting'] = dialect.quoting
if kwds.get('header', 'infer') == 'infer':
kwds['header'] = 0 if kwds.get('names') is None else None
self.orig_options = kwds
# miscellanea
self.engine = engine
self._engine = None
options = self._get_options_with_defaults(engine)
self.chunksize = options.pop('chunksize', None)
self.squeeze = options.pop('squeeze', False)
# might mutate self.engine
self.options, self.engine = self._clean_options(options, engine)
if 'has_index_names' in kwds:
self.options['has_index_names'] = kwds['has_index_names']
self._make_engine(self.engine)