python类field_size_limit()的实例源码-面圈网

test_csv.py 文件源码项目：oil 作者: oilshell 项目源码文件源码阅读 53 收藏 0 点赞 0 评论 0

def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit)

bulkloader.py 文件源码项目：Intranet-Penetration 作者: yuxiaokui 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def Records(self):
    """Reads the CSV data file and generates row records.

    Yields:
      Lists of strings

    Raises:
      ResumeError: If the progress database and data file indicate a different
        number of rows.
    """
    csv_file = self.openfile(self.csv_filename, 'rb')
    reader = self.create_csv_reader(csv_file, skipinitialspace=True)
    try:

      for record in reader:
        yield record
    except csv.Error, e:
      if e.args and e.args[0].startswith('field larger than field limit'):
        raise FieldSizeLimitError(csv.field_size_limit())
      else:
        raise

bulkloader.py 文件源码项目：MKFQ 作者: maojingios 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def Records(self):
    """Reads the CSV data file and generates row records.

    Yields:
      Lists of strings

    Raises:
      ResumeError: If the progress database and data file indicate a different
        number of rows.
    """
    csv_file = self.openfile(self.csv_filename, 'rb')
    reader = self.create_csv_reader(csv_file, skipinitialspace=True)
    try:

      for record in reader:
        yield record
    except csv.Error, e:
      if e.args and e.args[0].startswith('field larger than field limit'):
        raise FieldSizeLimitError(csv.field_size_limit())
      else:
        raise

test_csv.py 文件源码项目：zippy 作者: securesystemslab 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit)

test_csv.py 文件源码项目：python2-tracer 作者: extremecoders-re 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit)

test_csv.py 文件源码项目：web_ctp 作者: molebot 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit)

split_tsv_per_user.py 文件源码项目：FuzMusic 作者: m0re4u 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def main(filename):
    """
    Will split the big input file of 1000 user into 1000 files of 1 user
    """
    with open(filename) as origfile:
        dir = os.path.dirname(filename)
        csv_reader = csv.reader(origfile, delimiter='\t')

        # Fixes a bug:
        # http://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
        csv.field_size_limit(sys.maxsize)
        lastuser = None
        for row in csv_reader:
            if lastuser != row[0]:
                print(row[0])
                lastuser = row[0]
            with open(os.path.join(dir, "split", lastuser + ".tsv"), "a") as f:
                f.write("{}\n".format("\t".join(row)))

bulkloader.py 文件源码项目：xxNet 作者: drzorm 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def Records(self):
    """Reads the CSV data file and generates row records.

    Yields:
      Lists of strings

    Raises:
      ResumeError: If the progress database and data file indicate a different
        number of rows.
    """
    csv_file = self.openfile(self.csv_filename, 'rb')
    reader = self.create_csv_reader(csv_file, skipinitialspace=True)
    try:

      for record in reader:
        yield record
    except csv.Error, e:
      if e.args and e.args[0].startswith('field larger than field limit'):
        raise FieldSizeLimitError(csv.field_size_limit())
      else:
        raise

test_csv.py 文件源码项目：pefile.pypy 作者: cloudtracer 项目源码文件源码阅读 49 收藏 0 点赞 0 评论 0

def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit)

test_csv.py 文件源码项目：ouroboros 作者: pybee 项目源码文件源码阅读 59 收藏 0 点赞 0 评论 0

def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit)

test_csv.py 文件源码项目：ndk-python 作者: gittor 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit)

docAnalyze.py 文件源码项目：policynet 作者: mitre 项目源码文件源码阅读 52 收藏 0 点赞 0 评论 0

def getEdges(docTypes):
    import csv
    csv.field_size_limit(2147483647)
    for docType in docTypes:
        print(docType)
        with open("../output/edgelists/{}-edgelist.csv".format(docType.lower()), "r") as csvfile:
            datareader = csv.reader(csvfile)
            count = 0
            for row in datareader:
                if row[9].lower() in docTypes:
                    yield (row[0], row[2])
                    count += 1
                elif count < 2:
                    continue
                else:
                    return

bulkloader.py 文件源码项目：Deploy_XXNET_Server 作者: jzp820927 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def Records(self):
    """Reads the CSV data file and generates row records.

    Yields:
      Lists of strings

    Raises:
      ResumeError: If the progress database and data file indicate a different
        number of rows.
    """
    csv_file = self.openfile(self.csv_filename, 'rb')
    reader = self.create_csv_reader(csv_file, skipinitialspace=True)
    try:

      for record in reader:
        yield record
    except csv.Error, e:
      if e.args and e.args[0].startswith('field larger than field limit'):
        raise FieldSizeLimitError(csv.field_size_limit())
      else:
        raise

test_csv.py 文件源码项目：kbe_server 作者: xiaohaoppy 项目源码文件源码阅读 59 收藏 0 点赞 0 评论 0

def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit)

bulkloader.py 文件源码项目：Docker-XX-Net 作者: kuanghy 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def Records(self):
    """Reads the CSV data file and generates row records.

    Yields:
      Lists of strings

    Raises:
      ResumeError: If the progress database and data file indicate a different
        number of rows.
    """
    csv_file = self.openfile(self.csv_filename, 'rb')
    reader = self.create_csv_reader(csv_file, skipinitialspace=True)
    try:

      for record in reader:
        yield record
    except csv.Error, e:
      if e.args and e.args[0].startswith('field larger than field limit'):
        raise FieldSizeLimitError(csv.field_size_limit())
      else:
        raise

socsembubbles-blog-reader.py 文件源码项目：graphbrain 作者: graphbrain 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def read_file(self, filename):
        # self.extractor.debug = True

        csv.field_size_limit(sys.maxsize)
        with open(filename, 'r') as csvfile:
            first = True
            for row in csv.reader(csvfile, delimiter=',', quotechar='"'):
                if first:
                    first = False
                else:
                    post = {'id': row[0],
                            'url': row[1],
                            'web_entity_id': row[2],
                            'web_entity': row[3],
                            'text': row[4]}
                    self.process_post(post)

        print('main edges created: %s' % self.main_edges)
        print('extra edges created: %s' % self.extra_edges)
        print('ignored edges: %s' % self.ignored)

__init__.py 文件源码项目：geocity 作者: royi1000 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def _get_city_db():
    csv.field_size_limit(sys.maxsize)
    cities_file = os.path.join(os.path.dirname(__file__), 'cities.txt')
    with open(cities_file, 'rt') as f:
        r = csv.reader(f, delimiter='\t')
        city_db = list(r)
        return city_db

trainer.py 文件源码项目：shaman 作者: Prev 项目源码文件源码阅读 50 收藏 0 点赞 0 评论 0

def run() :
    if len(sys.argv) != 3 :
        # Exception handling on starting program
        print('Usage: "shaman-trainer <code_bunch.csv> <result.json>"')
        sys.exit(-1)


    # Args
    codebunch_file = sys.argv[1]
    result_file = sys.argv[2]


    if not os.path.isfile(codebunch_file) :
        # Exception handling of <code bunch> file
        print('"%s" is not a file' % codebunch_file)
        sys.exit(-1)



    # Read CSV file
    csv.field_size_limit(sys.maxsize) # Set CSV limit to sys.maxsize
    filedata = []

    print('Load CSV file')

    with open(codebunch_file) as csvfile :
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader :
            filedata.append(row)


    # Fetch keyword data
    trained_data = {}
    trained_data['keywords'] = fetch_keywords(filedata)
    trained_data['patterns'] = match_patterns(filedata)

    # Save result
    with open(result_file, 'w') as file :
        file.write( json.dumps(trained_data) )

    print('Trained result is saved at "%s"' % result_file)

util.py 文件源码项目：code-uai16 作者: thanhan 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def main(dataset = 'proton-beam-xml'):
    csv.field_size_limit(430000)
    global mat, rel, turk_dic

    if dataset == 'proton-beam-xml':
        pub_dic_tmp = get_pub_dic_xml()
        # pub_dic_items are already sorted by key
        [rec_nums, texts] = zip(*pub_dic.items())
        rel = get_relevant()
    else:
        pub_dic_tmp = get_pub_dic_csv(dataset)
        #[rec_nums, texts] = zip(*pub_dic.items())
        (turk_dic_tmp, rel_dic_tmp) = get_turk_data(dataset)

        texts = []
        pub_dic = {}; turk_dic = {}; rel_dic = {}

        for i in sorted(pub_dic_tmp.keys()):
            if pub_dic_tmp.has_key(i) and turk_dic_tmp.has_key(i) and rel_dic_tmp.has_key(i):
                texts.append(pub_dic_tmp[i])
                pub_dic[i] = pub_dic_tmp[i]
                turk_dic[i] = turk_dic_tmp[i]
                rel_dic[i] = rel_dic_tmp[i]
            #else:
            #    if pub_dic.has_key(i): pub_dic.pop(i)
            #    if turk_dic.has_key(i): turk_dic.pop(i)
            #    if rel_dic.has_key(i): rel_dic.pop(i)

        (_,rel) = zip(*sorted(rel_dic.items()))
        rel = map(int, rel)

    vectorizer = TfidfVectorizer()
    #save_texts = texts
    mat = vectorizer.fit_transform(texts)
    return (pub_dic, texts)

docreader.py 文件源码项目：quoll 作者: LanguageMachines 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def parse_csv(self, doc, delim=','):
        """
        Csv reader
        =====
        Function to read in a csv file

        Parameters
        -----
        doc : str
            The name of the csv file

        Returns
        -----
        lines : list of lists
            Each list corresponds to the cell values of a row
        """
        csv.field_size_limit(sys.maxsize)
        try:
            lines = []
            with open(doc, 'r', encoding = 'utf-8') as csvfile:
                csv_reader = csv.reader(csvfile, delimiter = delim)
                for line in csv_reader:
                    lines.append(line)
        except:
            lines = []
            csvfile = open(doc, 'r', encoding = 'utf-8')
            csv_reader = csv.reader(line.replace('\0','') for line in csvfile.readlines())
            for line in csv_reader:
                lines.append(line)
        return lines

test_executor.py 文件源码项目：krafters 作者: GianlucaBortoli 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def __init__(self, test_daemon, netem_master, csv_file_path, algorithm):
        self.test_daemon = test_daemon
        # csv.field_size_limit(500 * 1024 * 1024)
        self.csv_writer = csv.writer(open(csv_file_path, 'w', newline=''))
        self.netem_master = netem_master
        self.algorithm = algorithm

    # calls run function on test_daemon and saves results to csv

bulkload_deprecated.py 文件源码项目：Intranet-Penetration 作者: yuxiaokui 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def Load(self, kind, data):
    """Parses CSV data, uses a Loader to convert to entities, and stores them.

    On error, fails fast. Returns a "bad request" HTTP response code and
    includes the traceback in the output.

    Args:
      kind: a string containing the entity kind that this loader handles
      data: a string containing the CSV data to load

    Returns:
      tuple (response code, output) where:
        response code: integer HTTP response code to return
        output: string containing the HTTP response body
    """

    data = data.encode('utf-8')
    Validate(kind, basestring)
    Validate(data, basestring)
    output = []

    try:
      loader = Loader.RegisteredLoaders()[kind]
    except KeyError:
      output.append('Error: no Loader defined for kind %s.' % kind)
      return (httplib.BAD_REQUEST, ''.join(output))

    buffer = StringIO.StringIO(data)
    reader = csv.reader(buffer, skipinitialspace=True)

    try:
      csv.field_size_limit(800000)
    except AttributeError:

      pass

    return self.LoadEntities(self.IterRows(reader), loader)

bulkloader.py 文件源码项目：Intranet-Penetration 作者: yuxiaokui 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def __init__(self, limit):
    self.message = """
A field in your CSV input file has exceeded the current limit of %d.

You can raise this limit by adding the following lines to your config file:

import csv
csv.field_size_limit(new_limit)

where new_limit is number larger than the size in bytes of the largest
field in your CSV.
""" % limit
    Error.__init__(self, self.message)

etl.py 文件源码项目：carto-etl 作者: CartoDB 项目源码文件源码阅读 52 收藏 0 点赞 0 评论 0

def __set_max_csv_length(self):
        maxInt = sys.maxsize
        decrement = True

        while decrement:
            # decrease the maxInt value by factor 10
            # as long as the OverflowError occurs.
            decrement = False
            try:
                csv.field_size_limit(maxInt)
            except OverflowError:
                maxInt = int(maxInt/10)
                decrement = True

bulkload_deprecated.py 文件源码项目：MKFQ 作者: maojingios 项目源码文件源码阅读 50 收藏 0 点赞 0 评论 0

def Load(self, kind, data):
    """Parses CSV data, uses a Loader to convert to entities, and stores them.

    On error, fails fast. Returns a "bad request" HTTP response code and
    includes the traceback in the output.

    Args:
      kind: a string containing the entity kind that this loader handles
      data: a string containing the CSV data to load

    Returns:
      tuple (response code, output) where:
        response code: integer HTTP response code to return
        output: string containing the HTTP response body
    """

    data = data.encode('utf-8')
    Validate(kind, basestring)
    Validate(data, basestring)
    output = []

    try:
      loader = Loader.RegisteredLoaders()[kind]
    except KeyError:
      output.append('Error: no Loader defined for kind %s.' % kind)
      return (httplib.BAD_REQUEST, ''.join(output))

    buffer = StringIO.StringIO(data)
    reader = csv.reader(buffer, skipinitialspace=True)

    try:
      csv.field_size_limit(800000)
    except AttributeError:

      pass

    return self.LoadEntities(self.IterRows(reader), loader)

bulkloader.py 文件源码项目：MKFQ 作者: maojingios 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def __init__(self, limit):
    self.message = """
A field in your CSV input file has exceeded the current limit of %d.

You can raise this limit by adding the following lines to your config file:

import csv
csv.field_size_limit(new_limit)

where new_limit is number larger than the size in bytes of the largest
field in your CSV.
""" % limit
    Error.__init__(self, self.message)

bulkload_deprecated.py 文件源码项目：xxNet 作者: drzorm 项目源码文件源码阅读 67 收藏 0 点赞 0 评论 0

def Load(self, kind, data):
    """Parses CSV data, uses a Loader to convert to entities, and stores them.

    On error, fails fast. Returns a "bad request" HTTP response code and
    includes the traceback in the output.

    Args:
      kind: a string containing the entity kind that this loader handles
      data: a string containing the CSV data to load

    Returns:
      tuple (response code, output) where:
        response code: integer HTTP response code to return
        output: string containing the HTTP response body
    """

    data = data.encode('utf-8')
    Validate(kind, basestring)
    Validate(data, basestring)
    output = []

    try:
      loader = Loader.RegisteredLoaders()[kind]
    except KeyError:
      output.append('Error: no Loader defined for kind %s.' % kind)
      return (httplib.BAD_REQUEST, ''.join(output))

    buffer = StringIO.StringIO(data)
    reader = csv.reader(buffer, skipinitialspace=True)

    try:
      csv.field_size_limit(800000)
    except AttributeError:

      pass

    return self.LoadEntities(self.IterRows(reader), loader)

bulkloader.py 文件源码项目：xxNet 作者: drzorm 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def __init__(self, limit):
    self.message = """
A field in your CSV input file has exceeded the current limit of %d.

You can raise this limit by adding the following lines to your config file:

import csv
csv.field_size_limit(new_limit)

where new_limit is number larger than the size in bytes of the largest
field in your CSV.
""" % limit
    Error.__init__(self, self.message)

processing.py 文件源码项目：dancedeets-monorepo 作者: mikelambert 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def all_fb_data(combined_ids, filename='local_data/FacebookCachedObjectEvent.csv'):
    csv.field_size_limit(1000000000)
    for row in csv.reader(open(filename)):
        source_id, row_id, row_type = row[0].split('.')
        if source_id == "701004" and row_type == 'OBJ_EVENT' and (not combined_ids or row_id in combined_ids):
            fb_event = json.loads(row[1])
            if fb_event and not fb_event.get('deleted') and not fb_event.get('empty') and fb_events.is_public(fb_event):
                yield row_id, fb_event

utilwebisadb.py 文件源码项目：webisalod 作者: sven-h 项目源码文件源码阅读 69 收藏 0 点赞 0 评论 0

def set_csv_field_size():
    maxInt = sys.maxsize
    decrement = True
    while decrement:
        decrement = False
        try:
            csv.field_size_limit(maxInt)
        except OverflowError:
            maxInt = int(maxInt / 10)
            decrement = True
    return maxInt