python类reader()的实例源码

museidelcibo_uploader.py 文件源码 项目:bots 作者: nemobis 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def run(self, filename):
        with open(filename, 'r') as f:
            source = csv.reader(f, delimiter='\t')
            header = next(source)
            pywikibot.output("Header of the input table: " + ', '.join(header) )
            titles = namedtuple('titles', ', '.join(header))
            titles = [titles._make(row) for row in source]

        if not titles:
            pywikibot.output("We were not able to extract the data to work on. Exiting.")
            return

        for row in titles:
            commons = "%s - Musei del cibo - %s - %s.jpg" % (row.nome, row.museo, row.inventario)
            description = u"""
{{Musei del cibo
| museo = %s
| inventario = %s
| nome = %s
| ambito = %s
| epoca = %s
| dimensioni = %s
| materia = %s
| descrizione = %s
| provenienza = %s
| note = %s
| bibliografia = %s
}}
""" % (row.museo, row.inventario, row.nome, row.ambito, row.epoca,
    row.dimensioni, row.materia, row.descrizione, row.provenienza, row.note, row.biblio)

            try:
                upload = UploadRobot(row.inventario + ".jpg", description=description,
                                     useFilename=commons, keepFilename=True,
                                     verifyDescription=False, ignoreWarning=False, aborts=True)
                upload.run()
            except:
                pywikibot.output("ERROR: The upload could not be completed.")
test_actions.py 文件源码 项目:edx-enterprise 作者: edx 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _assert_correct_csv(self, actual_csv, expected_rows):
        """
        Asserts that CSV file ``actual_csv`` contains ``expected_rows``
        """
        reader = unicodecsv.reader(actual_csv.getvalue().splitlines(), encoding="utf-8")
        # preprocess expected - convert everything to strings
        expected_rows = [
            [str(item) for item in row]
            for row in expected_rows
        ]
        actual_rows = list(reader)
        self.assertEqual(actual_rows, expected_rows)
puncnorm.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def _load_punc_norm_map(self):
        """Load the map table for normalizing 'down' punctuation."""
        path = pkg_resources.resource_filename(__name__, 'data/puncnorm.csv')
        with open(path, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8', delimiter=str(','), quotechar=str('"'))
            next(reader)
            return {punc: norm for (punc, norm) in reader}
flite.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _read_arpabet(self, arpabet):
        arpa_map = {}
        with open(arpabet, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            for arpa, ipa in reader:
                arpa_map[arpa] = ipa
        return arpa_map
simple.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _load_g2p_map(self, code):
        """Load the code table for the specified language.

        Args:
            code (str): ISO 639-3 code plus "-" plus ISO 15924 code for the
                        language/script to be loaded
        """
        g2p = defaultdict(list)
        gr_by_line = defaultdict(list)
        try:
            path = os.path.join('data', 'map', code + '.csv')
            path = pkg_resources.resource_filename(__name__, path)
        except IndexError:
            raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
        with open(path, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            next(reader)
            for (i, fields) in enumerate(reader):
                try:
                    graph, phon = fields
                except ValueError:
                    raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
                graph = unicodedata.normalize('NFC', graph)
                phon = unicodedata.normalize('NFC', phon)
                g2p[graph].append(phon)
                gr_by_line[graph].append(i)
        if self._one_to_many_gr_by_line_map(g2p):
            graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
            lines = [l + 2 for l in lines]
            raise MappingError('One-to-many G2P mapping for "{}" on lines {}'.format(graph, ', '.join(map(str, lines))).encode('utf-8'))
        return g2p
simple.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _load_punc_norm_map(self):
        """Load the map table for normalizing 'down' punctuation."""
        path = os.path.join('data', 'puncnorm.csv')
        path = pkg_resources.resource_filename(__name__, path)
        with open(path, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8', delimiter=str(','), quotechar=str('"'))
            next(reader)
            return {punc: norm for (punc, norm) in reader}
count_phones.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def main(fn):
    ft = panphon.FeatureTable()
    xs = epitran.xsampa.XSampa()
    with open(fn, 'rb') as f:
        reader = csv.reader(f, encoding='utf-8')
        next(reader)
        phones = set()
        for orth, phon in reader:
            phones = phones.union(set(ft.segs_safe(phon)))
    print(len(phones))
    print(sorted(list(map(xs.ipa2xs, phones))))
migraterules.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def main():
    for csv in glob.glob('*.csv'):
        txt = re.match('[A-Za-z-]+', csv).group(0) + '.txt'
        with open(csv, 'rb') as f, io.open(txt, 'w', encoding='utf-8') as g:
            reader = unicodecsv.reader(f, encoding='utf-8')
            next(reader)
            for fields in reader:
                if re.match('\s*%', fields[0]):
                    print(','.join([x for x in fields if x]), file=g)
                else:
                    rule = build_rule(fields)
                    rule = re.sub('[ ]+', ' ', rule)
                    rule = re.sub('[ ]$', '', rule)
                    print(rule, file=g)
space2punc.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def main(fns, fnn):
    punc = set()
    for fn in fns:
        print fn
        with open(fn, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            for _, s in reader:
                if len(s) == 1 and unicodedata.category(s)[0] == u'P':
                    punc.add(s)
    with open(fnn, 'wb') as f:
        writer = csv.writer(f, encoding='utf-8')
        for mark in sorted(list(punc)):
            writer.writerow([mark])
isbijective.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def read_map(fn):
    with open(fn, 'rb') as f:
        reader = csv.reader(f, encoding='utf-8')
        next(reader)
        return [(a, b) for [a, b] in reader]
xsampa.py 文件源码 项目:epitran 作者: dmort27 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def _read_ipa2xs(self):
        path = os.path.join('data', self.ipa2xs_fn)
        path = pkg_resources.resource_filename(__name__, path)
        pairs = []
        with open(path, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            next(reader)
            for ipa, xs, _ in reader:
                pairs.append((ipa, xs.encode('utf-8'),))
        trie = marisa_trie.BytesTrie(pairs)
        return trie
utils_pdq.py 文件源码 项目:oim-cms 作者: parksandwildlife 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def csv_data(csv_path, skip_header=True):
    """Pass in the path to a CSV file, returns a CSV Reader object.
    """
    csv_file = open(csv_path, 'r')
    # Determine the CSV dialect.
    dialect = unicodecsv.Sniffer().sniff(csv_file.read(1024))
    csv_file.seek(0)
    data = unicodecsv.reader(csv_file, dialect)
    if skip_header:
        data.next()
    return data
featuretable.py 文件源码 项目:panphon 作者: dmort27 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def _read_weights(self, weights_fn):
        weights_fn = pkg_resources.resource_filename(__name__, weights_fn)
        with open(weights_fn, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            next(reader)
            weights = [float(x) for x in next(reader)]
        return weights
_panphon.py 文件源码 项目:panphon 作者: dmort27 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def _read_weights(self, filename=os.path.join('data', 'feature_weights.csv')):
        filename = pkg_resources.resource_filename(
            __name__, filename)
        with open(filename, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            next(reader)
            weights = [float(x) for x in next(reader)]
        return weights
generate_ipa_all.py 文件源码 项目:panphon 作者: dmort27 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def write_ipa_all(ipa_bases, ipa_all, all_segments, sort_order):
    with open(ipa_bases, 'rb') as f:
        reader = csv.reader(f, encoding='utf-8')
        fieldnames = next(reader)
    with open(ipa_all, 'wb') as f:
        writer = csv.DictWriter(f, encoding='utf-8', fieldnames=fieldnames)
        writer.writerow({k: k for k in fieldnames})
        all_segments_list = sort_all_segments(sort_order, all_segments)
        for segment in all_segments_list:
            fields = copy.copy(segment.features)
            fields['ipa'] = segment.form
            writer.writerow(fields)
xsampa.py 文件源码 项目:panphon 作者: dmort27 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def read_xsampa_table(self):
        filename = os.path.join('data', 'ipa-xsampa.csv')
        filename = pkg_resources.resource_filename(__name__, filename)
        with open(filename, 'rb') as f:
            xs2ipa = {x[1]: x[0] for x in csv.reader(f, encoding='utf-8')}
        xs = sorted(xs2ipa.keys(), key=len, reverse=True)
        xs_regex = re.compile('|'.join(map(re.escape, xs)))
        return xs_regex, xs2ipa
permissive.py 文件源码 项目:panphon 作者: dmort27 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def _read_ipa_bases(self, fn):
        fn = pkg_resources.resource_filename(__name__, fn)
        with open(fn, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8', delimiter=str(','))
            names = next(reader)[1:]
            bases = {}
            for row in reader:
                seg, vals = row[0], row[1:]
                bases[seg] = (set(zip(vals, names)))
        return bases, names
permissive.py 文件源码 项目:panphon 作者: dmort27 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def _read_weights(self, filename=os.path.join('data', 'feature_weights.csv')):
        filename = pkg_resources.resource_filename(
            __name__, filename)
        with open(filename, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            next(reader)
            weights = [float(x) for x in next(reader)]
        return weights
models.py 文件源码 项目:helios-server-mixnet 作者: RunasSudo 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
    # csv.py doesn't do Unicode; encode temporarily as UTF-8:
    csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
                            dialect=dialect, **kwargs)
    for row in csv_reader:
      # decode UTF-8 back to Unicode, cell by cell:
      try:
        yield [unicode(cell, 'utf-8') for cell in row]
      except:
        yield [unicode(cell, 'latin-1') for cell in row]
models.py 文件源码 项目:helios-server-mixnet 作者: RunasSudo 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def itervoters(self):
    if self.voter_file_content:
      if type(self.voter_file_content) == unicode:
        content = self.voter_file_content.encode('utf-8')
      else:
        content = self.voter_file_content

      # now we have to handle non-universal-newline stuff
      # we do this in a simple way: replace all \r with \n
      # then, replace all double \n with single \n
      # this should leave us with only \n
      content = content.replace('\r','\n').replace('\n\n','\n')

      voter_stream = io.BytesIO(content)
    else:
      voter_stream = open(self.voter_file.path, "rU")

    #reader = unicode_csv_reader(voter_stream)
    reader = unicodecsv.reader(voter_stream, encoding='utf-8')

    for voter_fields in reader:
      # bad line
      if len(voter_fields) < 1:
        continue

      return_dict = {'voter_id': voter_fields[0].strip()}

      if len(voter_fields) > 1:
        return_dict['email'] = voter_fields[1].strip()
      else:
        # assume single field means the email is the same field
        return_dict['email'] = voter_fields[0].strip()

      if len(voter_fields) > 2:
        return_dict['name'] = voter_fields[2].strip()
      else:
        return_dict['name'] = return_dict['email']

      yield return_dict


问题


面经


文章

微信
公众号

扫码关注公众号