def run(self, filename):
with open(filename, 'r') as f:
source = csv.reader(f, delimiter='\t')
header = next(source)
pywikibot.output("Header of the input table: " + ', '.join(header) )
titles = namedtuple('titles', ', '.join(header))
titles = [titles._make(row) for row in source]
if not titles:
pywikibot.output("We were not able to extract the data to work on. Exiting.")
return
for row in titles:
commons = "%s - Musei del cibo - %s - %s.jpg" % (row.nome, row.museo, row.inventario)
description = u"""
{{Musei del cibo
| museo = %s
| inventario = %s
| nome = %s
| ambito = %s
| epoca = %s
| dimensioni = %s
| materia = %s
| descrizione = %s
| provenienza = %s
| note = %s
| bibliografia = %s
}}
""" % (row.museo, row.inventario, row.nome, row.ambito, row.epoca,
row.dimensioni, row.materia, row.descrizione, row.provenienza, row.note, row.biblio)
try:
upload = UploadRobot(row.inventario + ".jpg", description=description,
useFilename=commons, keepFilename=True,
verifyDescription=False, ignoreWarning=False, aborts=True)
upload.run()
except:
pywikibot.output("ERROR: The upload could not be completed.")
python类reader()的实例源码
def _assert_correct_csv(self, actual_csv, expected_rows):
"""
Asserts that CSV file ``actual_csv`` contains ``expected_rows``
"""
reader = unicodecsv.reader(actual_csv.getvalue().splitlines(), encoding="utf-8")
# preprocess expected - convert everything to strings
expected_rows = [
[str(item) for item in row]
for row in expected_rows
]
actual_rows = list(reader)
self.assertEqual(actual_rows, expected_rows)
def _load_punc_norm_map(self):
"""Load the map table for normalizing 'down' punctuation."""
path = pkg_resources.resource_filename(__name__, 'data/puncnorm.csv')
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8', delimiter=str(','), quotechar=str('"'))
next(reader)
return {punc: norm for (punc, norm) in reader}
def _read_arpabet(self, arpabet):
arpa_map = {}
with open(arpabet, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
for arpa, ipa in reader:
arpa_map[arpa] = ipa
return arpa_map
def _load_g2p_map(self, code):
"""Load the code table for the specified language.
Args:
code (str): ISO 639-3 code plus "-" plus ISO 15924 code for the
language/script to be loaded
"""
g2p = defaultdict(list)
gr_by_line = defaultdict(list)
try:
path = os.path.join('data', 'map', code + '.csv')
path = pkg_resources.resource_filename(__name__, path)
except IndexError:
raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
for (i, fields) in enumerate(reader):
try:
graph, phon = fields
except ValueError:
raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
graph = unicodedata.normalize('NFC', graph)
phon = unicodedata.normalize('NFC', phon)
g2p[graph].append(phon)
gr_by_line[graph].append(i)
if self._one_to_many_gr_by_line_map(g2p):
graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
lines = [l + 2 for l in lines]
raise MappingError('One-to-many G2P mapping for "{}" on lines {}'.format(graph, ', '.join(map(str, lines))).encode('utf-8'))
return g2p
def _load_punc_norm_map(self):
"""Load the map table for normalizing 'down' punctuation."""
path = os.path.join('data', 'puncnorm.csv')
path = pkg_resources.resource_filename(__name__, path)
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8', delimiter=str(','), quotechar=str('"'))
next(reader)
return {punc: norm for (punc, norm) in reader}
def main(fn):
ft = panphon.FeatureTable()
xs = epitran.xsampa.XSampa()
with open(fn, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
phones = set()
for orth, phon in reader:
phones = phones.union(set(ft.segs_safe(phon)))
print(len(phones))
print(sorted(list(map(xs.ipa2xs, phones))))
def main():
for csv in glob.glob('*.csv'):
txt = re.match('[A-Za-z-]+', csv).group(0) + '.txt'
with open(csv, 'rb') as f, io.open(txt, 'w', encoding='utf-8') as g:
reader = unicodecsv.reader(f, encoding='utf-8')
next(reader)
for fields in reader:
if re.match('\s*%', fields[0]):
print(','.join([x for x in fields if x]), file=g)
else:
rule = build_rule(fields)
rule = re.sub('[ ]+', ' ', rule)
rule = re.sub('[ ]$', '', rule)
print(rule, file=g)
def main(fns, fnn):
punc = set()
for fn in fns:
print fn
with open(fn, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
for _, s in reader:
if len(s) == 1 and unicodedata.category(s)[0] == u'P':
punc.add(s)
with open(fnn, 'wb') as f:
writer = csv.writer(f, encoding='utf-8')
for mark in sorted(list(punc)):
writer.writerow([mark])
def read_map(fn):
with open(fn, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
return [(a, b) for [a, b] in reader]
def _read_ipa2xs(self):
path = os.path.join('data', self.ipa2xs_fn)
path = pkg_resources.resource_filename(__name__, path)
pairs = []
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
for ipa, xs, _ in reader:
pairs.append((ipa, xs.encode('utf-8'),))
trie = marisa_trie.BytesTrie(pairs)
return trie
def csv_data(csv_path, skip_header=True):
"""Pass in the path to a CSV file, returns a CSV Reader object.
"""
csv_file = open(csv_path, 'r')
# Determine the CSV dialect.
dialect = unicodecsv.Sniffer().sniff(csv_file.read(1024))
csv_file.seek(0)
data = unicodecsv.reader(csv_file, dialect)
if skip_header:
data.next()
return data
def _read_weights(self, weights_fn):
weights_fn = pkg_resources.resource_filename(__name__, weights_fn)
with open(weights_fn, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
weights = [float(x) for x in next(reader)]
return weights
def _read_weights(self, filename=os.path.join('data', 'feature_weights.csv')):
filename = pkg_resources.resource_filename(
__name__, filename)
with open(filename, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
weights = [float(x) for x in next(reader)]
return weights
def write_ipa_all(ipa_bases, ipa_all, all_segments, sort_order):
with open(ipa_bases, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
fieldnames = next(reader)
with open(ipa_all, 'wb') as f:
writer = csv.DictWriter(f, encoding='utf-8', fieldnames=fieldnames)
writer.writerow({k: k for k in fieldnames})
all_segments_list = sort_all_segments(sort_order, all_segments)
for segment in all_segments_list:
fields = copy.copy(segment.features)
fields['ipa'] = segment.form
writer.writerow(fields)
def read_xsampa_table(self):
filename = os.path.join('data', 'ipa-xsampa.csv')
filename = pkg_resources.resource_filename(__name__, filename)
with open(filename, 'rb') as f:
xs2ipa = {x[1]: x[0] for x in csv.reader(f, encoding='utf-8')}
xs = sorted(xs2ipa.keys(), key=len, reverse=True)
xs_regex = re.compile('|'.join(map(re.escape, xs)))
return xs_regex, xs2ipa
def _read_ipa_bases(self, fn):
fn = pkg_resources.resource_filename(__name__, fn)
with open(fn, 'rb') as f:
reader = csv.reader(f, encoding='utf-8', delimiter=str(','))
names = next(reader)[1:]
bases = {}
for row in reader:
seg, vals = row[0], row[1:]
bases[seg] = (set(zip(vals, names)))
return bases, names
def _read_weights(self, filename=os.path.join('data', 'feature_weights.csv')):
filename = pkg_resources.resource_filename(
__name__, filename)
with open(filename, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
weights = [float(x) for x in next(reader)]
return weights
def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
# csv.py doesn't do Unicode; encode temporarily as UTF-8:
csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
dialect=dialect, **kwargs)
for row in csv_reader:
# decode UTF-8 back to Unicode, cell by cell:
try:
yield [unicode(cell, 'utf-8') for cell in row]
except:
yield [unicode(cell, 'latin-1') for cell in row]
def itervoters(self):
if self.voter_file_content:
if type(self.voter_file_content) == unicode:
content = self.voter_file_content.encode('utf-8')
else:
content = self.voter_file_content
# now we have to handle non-universal-newline stuff
# we do this in a simple way: replace all \r with \n
# then, replace all double \n with single \n
# this should leave us with only \n
content = content.replace('\r','\n').replace('\n\n','\n')
voter_stream = io.BytesIO(content)
else:
voter_stream = open(self.voter_file.path, "rU")
#reader = unicode_csv_reader(voter_stream)
reader = unicodecsv.reader(voter_stream, encoding='utf-8')
for voter_fields in reader:
# bad line
if len(voter_fields) < 1:
continue
return_dict = {'voter_id': voter_fields[0].strip()}
if len(voter_fields) > 1:
return_dict['email'] = voter_fields[1].strip()
else:
# assume single field means the email is the same field
return_dict['email'] = voter_fields[0].strip()
if len(voter_fields) > 2:
return_dict['name'] = voter_fields[2].strip()
else:
return_dict['name'] = return_dict['email']
yield return_dict