def Load(self, kind, data):
"""Parses CSV data, uses a Loader to convert to entities, and stores them.
On error, fails fast. Returns a "bad request" HTTP response code and
includes the traceback in the output.
Args:
kind: a string containing the entity kind that this loader handles
data: a string containing the CSV data to load
Returns:
tuple (response code, output) where:
response code: integer HTTP response code to return
output: string containing the HTTP response body
"""
data = data.encode('utf-8')
Validate(kind, basestring)
Validate(data, basestring)
output = []
try:
loader = Loader.RegisteredLoaders()[kind]
except KeyError:
output.append('Error: no Loader defined for kind %s.' % kind)
return (httplib.BAD_REQUEST, ''.join(output))
buffer = StringIO.StringIO(data)
reader = csv.reader(buffer, skipinitialspace=True)
try:
csv.field_size_limit(800000)
except AttributeError:
pass
return self.LoadEntities(self.IterRows(reader), loader)
python类field_size_limit()的实例源码
def __init__(self, limit):
self.message = """
A field in your CSV input file has exceeded the current limit of %d.
You can raise this limit by adding the following lines to your config file:
import csv
csv.field_size_limit(new_limit)
where new_limit is number larger than the size in bytes of the largest
field in your CSV.
""" % limit
Error.__init__(self, self.message)
def ContentGenerator(csv_file,
batch_size,
create_csv_reader=csv.reader,
create_csv_writer=csv.writer):
"""Retrieves CSV data up to a batch size at a time.
Args:
csv_file: A file-like object for reading CSV data.
batch_size: Maximum number of CSV rows to yield on each iteration.
create_csv_reader, create_csv_writer: Used for dependency injection.
Yields:
Tuple (entity_count, csv_content) where:
entity_count: Number of entities contained in the csv_content. Will be
less than or equal to the batch_size and greater than 0.
csv_content: String containing the CSV content containing the next
entity_count entities.
"""
try:
csv.field_size_limit(800000)
except AttributeError:
pass
reader = create_csv_reader(csv_file, skipinitialspace=True)
exhausted = False
while not exhausted:
rows_written = 0
content = StringIO.StringIO()
writer = create_csv_writer(content)
try:
for i in xrange(batch_size):
row = reader.next()
writer.writerow(row)
rows_written += 1
except StopIteration:
exhausted = True
if rows_written > 0:
yield rows_written, content.getvalue()
def Load(self, kind, data):
"""Parses CSV data, uses a Loader to convert to entities, and stores them.
On error, fails fast. Returns a "bad request" HTTP response code and
includes the traceback in the output.
Args:
kind: a string containing the entity kind that this loader handles
data: a string containing the CSV data to load
Returns:
tuple (response code, output) where:
response code: integer HTTP response code to return
output: string containing the HTTP response body
"""
data = data.encode('utf-8')
Validate(kind, basestring)
Validate(data, basestring)
output = []
try:
loader = Loader.RegisteredLoaders()[kind]
except KeyError:
output.append('Error: no Loader defined for kind %s.' % kind)
return (httplib.BAD_REQUEST, ''.join(output))
buffer = StringIO.StringIO(data)
reader = csv.reader(buffer, skipinitialspace=True)
try:
csv.field_size_limit(800000)
except AttributeError:
pass
return self.LoadEntities(self.IterRows(reader), loader)
def __init__(self, limit):
self.message = """
A field in your CSV input file has exceeded the current limit of %d.
You can raise this limit by adding the following lines to your config file:
import csv
csv.field_size_limit(new_limit)
where new_limit is number larger than the size in bytes of the largest
field in your CSV.
""" % limit
Error.__init__(self, self.message)
def init_csv_reader():
# Hack
csv_max = sys.maxsize
overflow = True
while overflow:
overflow = False
try:
csv.field_size_limit(csv_max)
except OverflowError:
overflow = True
csv_max = int(csv_max/16)
def __iter__(self):
"""Iterate over all of the lines in the file"""
import csv
try:
# For: _csv.Error: field larger than field limit (131072)
if os.name == 'nt':
# Using sys.maxsize throws an Overflow error on Windows 64-bit platforms since internal
# representation of 'int'/'long' on Win64 is only 32-bit wide. Ideally limit on Win64
# should not exceed ((2**31)-1) as long as internal representation uses 'int' and/or 'long'
csv.field_size_limit((2**31)-1)
else:
csv.field_size_limit(sys.maxsize)
except OverflowError as e:
# skip setting the limit for now
pass
self.start()
try:
# Python 3.6 considers None to mean 'utf8', but Python 3.5 considers it to be 'ascii'
encoding = self.url.encoding or 'utf8'
with open(self.url.path, encoding=encoding) as f:
yield from csv.reader(f, delimiter=self.delimiter)
except UnicodeError as e:
raise
self.finish()
def really_big_fields_enabled(self):
old_limit = csv.field_size_limit()
csv.field_size_limit(2 ** 28)
yield
csv.field_size_limit(old_limit)
def test_with_bunch(filename) :
""" Test shaman with code bunch and show statistics
"""
if not os.path.exists(filename) :
print('File not exists: ' + filename)
sys.exit(-1)
# Read CSV file
print('Load CSV file')
csv.field_size_limit(sys.maxsize) # Set CSV limit to sys.maxsize
filedata = []
with open(filename) as csvfile :
reader = csv.reader(csvfile, delimiter=',')
for row in reader :
filedata.append(row)
detector = shaman.Shaman.default()
correct = 0
totals = len(filedata)
results = {}
print('Start testing')
for index, (language, code) in enumerate(filedata) :
print ('Testing %s/%s ' % (index, len(filedata)), end="\r")
if language not in shaman.SUPPORTING_LANGUAGES:
totals -= 1
continue
try :
glang = detector.detect( code )[0][0]
except IndexError :
glang = None
if language not in results :
results[ language ] = [0, 0, 0]
if glang == language :
correct += 1
results[ language ][0] += 1
results[ language ][1] += 1
results[ language ][2] = results[ language ][0] / results[ language ][1]
print("------------------------------------------------")
print("Accuracy: %.2lf%% (Correct: %d / Valid Data: %d)" % (correct/totals*100, correct, totals))
print("------------------------------------------------")
results = sorted(results.items(), key=lambda x: x[1][0], reverse=True)
for lang, l in results :
print("%s: %.2lf%% (%s/%s)" % (lang, l[2] * 100, l[0], l[1]))