def test_dialect(self):
data = """\
label1,label2,label3
index1,"a,c,e
index2,b,d,f
"""
dia = csv.excel()
dia.quoting = csv.QUOTE_NONE
df = self.read_csv(StringIO(data), dialect=dia)
data = '''\
label1,label2,label3
index1,a,c,e
index2,b,d,f
'''
exp = self.read_csv(StringIO(data))
exp.replace('a', '"a', inplace=True)
tm.assert_frame_equal(df, exp)
python类QUOTE_NONE的实例源码
test_parsers.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def build_gtf(self):
print "Writing new genes GTF file (may take 10 minutes for a 1GB input GTF file)..."
with open(self.out_gtf_fn, 'wb') as f:
writer = csv.writer(f, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar='')
for row, is_comment, properties in self.gtf_reader_iter(self.in_gtf_fn):
if is_comment:
writer.writerow(row)
continue
remove = False
for key, value in properties.iteritems():
if key in self.attributes and value not in self.attributes[key]:
remove = True
if not remove:
writer.writerow(row)
print "...done\n"
def export_to_csv(request, variants):
#export to csv
export = request.GET.get('export', '')
if export != '':
if export == 'csv':
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename=export.csv'
writer = csv.writer(response)
elif export == 'txt':
response = HttpResponse(content_type='text/plain')
response['Content-Disposition'] = 'attachment; filename=export.txt'
writer = csv.writer(response, delimiter='\t', quoting=csv.QUOTE_NONE)
writer.writerow(['Individual', 'Index', 'Pos_index', 'Chr', 'Pos', 'Variant_id', 'Ref', 'Alt', 'Qual', 'Filter', 'Info', 'Format', 'Genotype_col', 'Genotype', 'Read_depth', 'Gene', 'Mutation_type', 'Vartype', 'Genomes1k_maf', 'Dbsnp_maf', 'Esp_maf', 'Dbsnp_build', 'Sift', 'Sift_pred', 'Polyphen2', 'Polyphen2_pred', 'Condel', 'Condel_pred', 'DANN', 'CADD', 'Is_at_omim', 'Is_at_hgmd', 'Hgmd_entries', 'Effect', 'Impact', 'Func_class', 'Codon_change', 'Aa_change', 'Aa_len', 'Gene_name', 'Biotype', 'Gene_coding', 'Transcript_id', 'Exon_rank', 'Genotype_number', 'Allele', 'Gene', 'Feature', 'Feature_type', 'Consequence', 'Cdna_position', 'Cds_position', 'Protein_position', 'Amino_acids', 'Codons', 'Existing_variation', 'Distance', 'Strand', 'Symbol', 'Symbol_source', 'Sift', 'Polyphen', 'Condel'])
for variant in variants:
# print 'variant', variant.index
writer.writerow([variant.individual, variant.index, variant.pos_index, variant.chr, variant.pos, variant.variant_id, variant.ref, variant.alt, variant.qual, variant.filter, pickle.loads(variant.info), variant.format, variant.genotype_col, variant.genotype, variant.read_depth, variant.gene, variant.mutation_type, variant.vartype, variant.genomes1k_maf, variant.dbsnp_maf, variant.esp_maf, variant.dbsnp_build, variant.sift, variant.sift_pred, variant.polyphen2, variant.polyphen2_pred, variant.condel, variant.condel_pred, variant.dann, variant.cadd, variant.is_at_omim, variant.is_at_hgmd, variant.hgmd_entries])
return response
def load(cls, filepath_or_buffer):
"""
Load word embeddings from a csv file.
Params
------
filepath_or_buffer : str
Any object that ``pandas.to_csv()`` can read from.
"""
word_vectors = pd.read_csv(
filepath_or_buffer, sep=' ',
quoting=QUOTE_NONE, index_col=0, header=None)
word_vectors.columns = np.arange(word_vectors.shape[1])
word_vectors.index.name = 'token'
self = cls(n_dimensions=word_vectors.shape[1])
self.word_vectors_ = word_vectors
return self
def _create_csv(self, filename, data):
csv_id = 0
if not self._parser.quiet:
print("Creating csv on file '%s'" % filename)
with open(filename, self._option_open_file) as csv_file:
result = csv.writer(csv_file, delimiter=';', quotechar="", quoting=csv.QUOTE_NONE)
result.writerow(self._header)
for clang_obj in data:
if clang_obj.kind in [clang.cindex.CursorKind.CLASS_DECL, clang.cindex.CursorKind.CLASS_TEMPLATE]:
# class section
for clang_obj_child in clang_obj.methods:
self.add_line(result, clang_obj, clang_obj_child, csv_id)
csv_id += 1
else:
# method of function section
self.add_line(result, None, clang_obj, csv_id)
csv_id += 1
if not self._parser.quiet:
print("%s row into %s" % (csv_id, filename))
def init_csv_file (csv_file_param):
"""
Add the list of flattened event structures into the CSV file
"""
csv_file = None
if isinstance (csv_file_param, str):
# The parameter is a file-path
csv_file = open (csv_file_param, 'w', newline = '')
elif hasattr (csv_file_param, 'write'):
# The parameter is already a file (normally, stdout)
csv_file = csv_file_param
else:
# Unknown
raise IOError ('[Error] Output file parameter "' + str(csv_file_param) + '" unkown')
# Write the header
fileWriter = csv.DictWriter (csv_file, delimiter='^',
fieldnames = fieldnames,
dialect = 'unix', quoting = csv.QUOTE_NONE)
#
fileWriter.writeheader()
def walk(self, filtr: Callable[[str], bool], processor: Callable[[Dict], bool]) -> None:
"""
Walk the directory testing the file against filtr and invoking processor with contents if true
:param filtr: file name tester
:param processor: content row processor
"""
for filedir, _, files in os.walk(self._indir):
for file in files:
if filtr(file):
print("Processing %s" % os.path.join(filedir, file))
with open(os.path.join(filedir, file)) as f:
reader = csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
with self._create_writer(filedir, file, reader) as writer:
for row in reader:
if processor(row):
writer.writerow(row)
def test_write_arg_valid(self):
# PyPy gets a TypeError instead of a csv.Error for "not a sequence"
self.assertRaises((csv.Error, TypeError), self._write_test, None, '')
self._write_test((), '')
self._write_test([None], '""')
self.assertRaises(csv.Error, self._write_test,
[None], None, quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
class BadList:
def __len__(self):
return 10;
def __getitem__(self, i):
if i > 2:
raise IOError
self.assertRaises(IOError, self._write_test, BadList(), '')
class BadItem:
def __str__(self):
raise IOError
self.assertRaises(IOError, self._write_test, [BadItem()], '')
def test_write_escape(self):
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
escapechar='\\')
self.assertRaises(csv.Error,
self._write_test,
['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar=None, doublequote=False)
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar='\\', doublequote = False)
self._write_test(['"'], '""""',
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
doublequote = False)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_NONE)
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
escapechar='\\', quoting = csv.QUOTE_NONE)
def test_quoting(self):
class mydialect(csv.Dialect):
delimiter = ";"
escapechar = '\\'
doublequote = False
skipinitialspace = True
lineterminator = '\r\n'
quoting = csv.QUOTE_NONE
d = mydialect()
mydialect.quoting = None
self.assertRaises(csv.Error, mydialect)
mydialect.doublequote = True
mydialect.quoting = csv.QUOTE_ALL
mydialect.quotechar = '"'
d = mydialect()
mydialect.quotechar = "''"
self.assertRaises(csv.Error, mydialect)
mydialect.quotechar = 4
self.assertRaises(csv.Error, mydialect)
def get_group_data(self):
"""
Parse group(5) formatted files and return tuples of group data in the
form (groupname, group password, group id and a list of member
usernames).
"""
group_data = []
group_file = open(self._group_file, "r")
reader = csv.DictReader(group_file, fieldnames=self.group_fields,
delimiter=":", quoting=csv.QUOTE_NONE)
current_line = 0
for row in reader:
current_line += 1
# Skip if we find the NIS marker
if (row["name"].startswith("+") or row["name"].startswith("-")):
continue
try:
group_data.append((row["name"], row["passwd"], int(row["gid"]),
row["members"].split(",")))
except (AttributeError, ValueError):
logging.warn("group file %s is incorrectly formatted: "
"line %d." % (self._group_file, current_line))
group_file.close()
return group_data
def generate_nonlinear_model_csv(input_csv, model=None, mask=None, work_prefix=None, options={},skip=0,stop_early=100000):
internal_sample=[]
with open(input_csv, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_NONE)
for row in reader:
internal_sample.append(MriDataset(scan=row[0],mask=row[1]))
internal_model=None
if model is not None:
internal_model=MriDataset(scan=model,mask=mask)
if work_prefix is not None and not os.path.exists(work_prefix):
os.makedirs(work_prefix)
return generate_nonlinear_average(internal_sample,internal_model,prefix=work_prefix,options=options,skip=skip,stop_early=stop_early)
def generate_linear_model_csv(input_csv,model=None,mask=None,work_prefix=None,options={}):
internal_sample=[]
with open(input_csv, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_NONE)
for row in reader:
internal_sample.append(MriDataset(scan=row[0],mask=row[1]))
internal_model=None
if model is not None:
internal_model=MriDataset(scan=model,mask=mask)
if work_prefix is not None and not os.path.exists(work_prefix):
os.makedirs(work_prefix)
return generate_linear_average(internal_sample,internal_model,prefix=work_prefix,options=options)
# kate: space-indent on; indent-width 4; indent-mode python;replace-tabs on;word-wrap-column 80;show-tabs on
def generate_ldd_model_csv(input_csv,model=None,mask=None,work_prefix=None,options={}):
internal_sample=[]
with open(input_csv, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_NONE)
for row in reader:
internal_sample.append(MriDataset(scan=row[0],mask=row[1]))
internal_model=None
if model is not None:
internal_model=MriDataset(scan=model,mask=mask)
if work_prefix is not None and not os.path.exists(work_prefix):
os.makedirs(work_prefix)
return generate_ldd_average(internal_sample,internal_model,
prefix=work_prefix,options=options)
def test_write_arg_valid(self):
self._write_error_test(csv.Error, None)
self._write_test((), '')
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
class BadList:
def __len__(self):
return 10;
def __getitem__(self, i):
if i > 2:
raise IOError
self._write_error_test(IOError, BadList())
class BadItem:
def __str__(self):
raise IOError
self._write_error_test(IOError, [BadItem()])
def test_write_escape(self):
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
escapechar='\\')
self._write_error_test(csv.Error, ['a',1,'p,"q"'],
escapechar=None, doublequote=False)
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar='\\', doublequote = False)
self._write_test(['"'], '""""',
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
doublequote = False)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_NONE)
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
escapechar='\\', quoting = csv.QUOTE_NONE)
def test_write_arg_valid(self):
self._write_error_test(csv.Error, None)
self._write_test((), '')
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
class BadList:
def __len__(self):
return 10;
def __getitem__(self, i):
if i > 2:
raise IOError
self._write_error_test(IOError, BadList())
class BadItem:
def __str__(self):
raise IOError
self._write_error_test(IOError, [BadItem()])
def test_write_escape(self):
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
escapechar='\\')
self._write_error_test(csv.Error, ['a',1,'p,"q"'],
escapechar=None, doublequote=False)
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar='\\', doublequote = False)
self._write_test(['"'], '""""',
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
doublequote = False)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_NONE)
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
escapechar='\\', quoting = csv.QUOTE_NONE)
def parse(self):
import csv
reader = csv.reader(
self.stream,
delimiter = ',',
quotechar = None,
escapechar = None,
doublequote = False,
skipinitialspace = True,
lineterminator = '\r\n',
quoting = csv.QUOTE_NONE)
it = iter(reader)
row = reader.next()
self.parse_header(row)
for row in it:
self.parse_row(row)
# compute derived data
self.profile.validate()
self.profile.find_cycles()
self.profile.ratio(TIME_RATIO, SAMPLES)
self.profile.call_ratios(SAMPLES2)
self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
return self.profile
def read_vectors(fin, dtype='float64', delim=' '):
"""Return a list with tuples (word, word_vector)."""
reader = csv.reader(fin, delimiter=delim, quoting=csv.QUOTE_NONE)
word_vectors = []
ncol = None
for row in reader:
if ncol is None:
if len(row) == 2:
ncol = int(row[1])
continue
else:
ncol = len(row) - 1
word = unicode(row[0], 'utf-8', errors='replace')
word_vector = np.fromiter(
[float(v) for v in row[1: ncol + 1]],
dtype=dtype, count=ncol)
word_vectors.append((word, word_vector))
return word_vectors
def reader_gen(self,FILEPATH=os.getcwd()):
if '/' in self.filename:
complete_path=self.filename
else:
complete_path=FILEPATH + '/' + self.filename
reader=csv.reader(open(complete_path,'rU'),delimiter=self.SEP_CHAR,quoting=csv.QUOTE_NONE)
if self.AUTOSKIP_HEADER==True:
## this will over-write provided default
skip_number=0
row=reader.next()
while row[0][0]=="@":
skip_number+=1
row=reader.next()
self.SKIP_HEADER=skip_number
reader=csv.reader(open(complete_path,'rU'),delimiter=self.SEP_CHAR,quoting=csv.QUOTE_NONE)
for i in range(self.SKIP_HEADER):
reader.next()
return reader
def ref_dict_gen(self,FILEPATH=os.getcwd()):
## this function output the reference chromosome into a dict
if '/' in self.filename:
complete_path=self.filename
else:
complete_path=FILEPATH + '/' + self.filename
reader=csv.reader(open(complete_path,'rU'),delimiter=self.SEP_CHAR,quoting=csv.QUOTE_NONE)
row=reader.next()
ref_dict= dict()
while row[0][0]=="@":
if row[0][1:3]=="SQ":
ref_name=row[1][3:]
ref_length=int(row[2][3:])
ref_dict[ref_name]=ref_length
row=reader.next()
return ref_dict
def test_write_arg_valid(self):
self.assertRaises(csv.Error, self._write_test, None, '')
self._write_test((), '')
self._write_test([None], '""')
self.assertRaises(csv.Error, self._write_test,
[None], None, quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
class BadList:
def __len__(self):
return 10;
def __getitem__(self, i):
if i > 2:
raise IOError
self.assertRaises(IOError, self._write_test, BadList(), '')
class BadItem:
def __str__(self):
raise IOError
self.assertRaises(IOError, self._write_test, [BadItem()], '')
def test_write_escape(self):
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
escapechar='\\')
self.assertRaises(csv.Error,
self._write_test,
['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar=None, doublequote=False)
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar='\\', doublequote = False)
self._write_test(['"'], '""""',
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
doublequote = False)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_NONE)
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
escapechar='\\', quoting = csv.QUOTE_NONE)
def write_results_to_file(species, ids, probs):
print(probs.shape)
# Make a path for our results to be saved to
if not os.path.exists('results'):
os.makedirs('results')
print('Writing results to file')
with open('results/results.csv', 'w') as f1:
writer = csv.writer(f1, delimiter=' ', escapechar=' ', quoting=csv.QUOTE_NONE)
header = 'id,' + ','.join(species)
writer.writerow([header])
for i in range(ids.shape[0]):
row = probs[i]
row = convert_list_of_ints_to_string(row)
row = '{}'.format(str(int(ids[i]))) + row
writer.writerow([row])
print('Successfully wrote results to file')
def __parse_file(self):
filename = self.options['filename']
if not filename:
raise IOError
sep = self.options['column_separator']
quote = self.options['quote_character']
values = []
with open(filename, 'rU') as infile:
# if sep is not a one character string, csv.reader will raise a TypeError
if not quote:
csvreader = csv.reader(infile, delimiter=str(sep), quoting=csv.QUOTE_NONE)
else:
csvreader = csv.reader(infile, delimiter=str(sep), quotechar=str(quote))
# get each line from the file and separate it into columns based on sep
for row in csvreader:
# append all lines as-is case-wise
# unicode(str, errors='ignore') causes all invalid characters to be stripped out
values.append([unicode(value.strip(), errors='ignore') for value in row])
# ensure the number of columns in each row is the same as the previous row
if len(values) > 1:
assert len(values[-1]) == len(values[-2])
return values
def parse(self):
import csv
reader = csv.reader(
self.stream,
delimiter = ',',
quotechar = None,
escapechar = None,
doublequote = False,
skipinitialspace = True,
lineterminator = '\r\n',
quoting = csv.QUOTE_NONE)
it = iter(reader)
row = reader.next()
self.parse_header(row)
for row in it:
self.parse_row(row)
# compute derived data
self.profile.validate()
self.profile.find_cycles()
self.profile.ratio(TIME_RATIO, SAMPLES)
self.profile.call_ratios(SAMPLES2)
self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
return self.profile
def test_write_arg_valid(self):
# PyPy gets a TypeError instead of a csv.Error for "not a sequence"
self.assertRaises((csv.Error, TypeError), self._write_test, None, '')
self._write_test((), '')
self._write_test([None], '""')
self.assertRaises(csv.Error, self._write_test,
[None], None, quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
class BadList:
def __len__(self):
return 10;
def __getitem__(self, i):
if i > 2:
raise IOError
self.assertRaises(IOError, self._write_test, BadList(), '')
class BadItem:
def __str__(self):
raise IOError
self.assertRaises(IOError, self._write_test, [BadItem()], '')
def test_write_escape(self):
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
escapechar='\\')
self.assertRaises(csv.Error,
self._write_test,
['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar=None, doublequote=False)
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
escapechar='\\', doublequote = False)
self._write_test(['"'], '""""',
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
doublequote = False)
self._write_test(['"'], '\\"',
escapechar='\\', quoting = csv.QUOTE_NONE)
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
escapechar='\\', quoting = csv.QUOTE_NONE)
def test_write_arg_valid(self):
self._write_error_test(csv.Error, None)
self._write_test((), '')
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
class BadList:
def __len__(self):
return 10;
def __getitem__(self, i):
if i > 2:
raise OSError
self._write_error_test(OSError, BadList())
class BadItem:
def __str__(self):
raise OSError
self._write_error_test(OSError, [BadItem()])