def DoRenaming(options, deps):
"""Copy and rename files given in options.renaming_sources and update deps."""
src_files = list(itertools.chain.from_iterable(
build_utils.ParseGnList(f)
for f in options.renaming_sources))
dest_files = list(itertools.chain.from_iterable(
build_utils.ParseGnList(f)
for f in options.renaming_destinations))
if (len(src_files) != len(dest_files)):
print('Renaming source and destination files not match.')
sys.exit(-1)
for src, dest in itertools.izip(src_files, dest_files):
if os.path.isdir(src):
print ('renaming diretory is not supported.')
sys.exit(-1)
else:
CopyFile(src, os.path.join(options.dest, dest), deps)
python类izip()的实例源码
def pbkdf2_bin(data, salt, iterations=1000, keylen=24, hashfunc=None):
"""Returns a binary digest for the PBKDF2 hash algorithm of `data`
with the given `salt`. It iterates `iterations` time and produces a
key of `keylen` bytes. By default SHA-1 is used as hash function,
a different hashlib `hashfunc` can be provided.
"""
hashfunc = hashfunc or hashlib.sha1
mac = hmac.new(data, None, hashfunc)
def _pseudorandom(x, mac=mac):
h = mac.copy()
h.update(x)
return map(ord, h.digest())
buf = []
for block in xrange(1, -(-keylen // mac.digest_size) + 1):
rv = u = _pseudorandom(salt + _pack_int(block))
for i in xrange(iterations - 1):
u = _pseudorandom(''.join(map(chr, u)))
rv = starmap(xor, izip(rv, u))
buf.extend(rv)
return ''.join(map(chr, buf))[:keylen]
def encode_to_proto(self):
p = HistogramProto()
p.min = float(self.min)
p.max = float(self.max)
p.num = float(self.num)
p.sum = float(self.sum)
p.sum_squares = float(self.sum_squares)
bucket_limits = []
buckets = []
for i, (end, count) in enumerate(izip(self.bucket_limits, self.buckets)):
if (i == len(self.bucket_limits) - 1 or
count > 0.0 or self.buckets[i + 1] > 0.0):
bucket_limits.append(float(end))
buckets.append(float(count))
p.bucket_limit.extend(bucket_limits)
p.bucket.extend(buckets)
return p
def encode_to_proto(self):
p = HistogramProto()
p.min = float(self.min)
p.max = float(self.max)
p.num = float(self.num)
p.sum = float(self.sum)
p.sum_squares = float(self.sum_squares)
bucket_limits = []
buckets = []
for i, (end, count) in enumerate(izip(self.bucket_limits, self.buckets)):
if (i == len(self.bucket_limits) - 1 or
count > 0.0 or self.buckets[i + 1] > 0.0):
bucket_limits.append(float(end))
buckets.append(float(count))
p.bucket_limit.extend(bucket_limits)
p.bucket.extend(buckets)
return p
def constant_time_compare(val1, val2):
"""Returns True if the two strings are equal, False otherwise.
The time taken is independent of the number of characters that match. Do
not use this function for anything else than comparision with known
length targets.
This is should be implemented in C in order to get it completely right.
"""
if _builtin_constant_time_compare is not None:
return _builtin_constant_time_compare(val1, val2)
len_eq = len(val1) == len(val2)
if len_eq:
result = 0
left = val1
else:
result = 1
left = val2
for x, y in izip(bytearray(left), bytearray(val2)):
result |= x ^ y
return result == 0
def constant_time_compare(val1, val2):
"""Returns True if the two strings are equal, False otherwise.
The time taken is independent of the number of characters that match. Do
not use this function for anything else than comparision with known
length targets.
This is should be implemented in C in order to get it completely right.
"""
if _builtin_constant_time_compare is not None:
return _builtin_constant_time_compare(val1, val2)
len_eq = len(val1) == len(val2)
if len_eq:
result = 0
left = val1
else:
result = 1
left = val2
for x, y in izip(bytearray(left), bytearray(val2)):
result |= x ^ y
return result == 0
def constant_time_compare(val1, val2):
"""Returns True if the two strings are equal, False otherwise.
The time taken is independent of the number of characters that match. Do
not use this function for anything else than comparision with known
length targets.
This is should be implemented in C in order to get it completely right.
"""
if _builtin_constant_time_compare is not None:
return _builtin_constant_time_compare(val1, val2)
len_eq = len(val1) == len(val2)
if len_eq:
result = 0
left = val1
else:
result = 1
left = val2
for x, y in izip(bytearray(left), bytearray(val2)):
result |= x ^ y
return result == 0
def constant_time_compare(val1, val2):
"""Returns True if the two strings are equal, False otherwise.
The time taken is independent of the number of characters that match. Do
not use this function for anything else than comparision with known
length targets.
This is should be implemented in C in order to get it completely right.
"""
if _builtin_constant_time_compare is not None:
return _builtin_constant_time_compare(val1, val2)
len_eq = len(val1) == len(val2)
if len_eq:
result = 0
left = val1
else:
result = 1
left = val2
for x, y in izip(bytearray(left), bytearray(val2)):
result |= x ^ y
return result == 0
def entity_to_gmsh(self, e, dim, lc, gmshself=True):
# do not duplicate entity in gmsh
i = self.entities[dim].index(e)
gmsh_e = self.gmsh_entities[dim][i]
if gmsh_e is not None:
return gmsh_e
if dim==0: # create Point
e = e + tuple(0. for i in range(3 - self.dim))
gmsh_e = py4gmsh.Point(e, lc)
self.gmsh_entities[0][i] = gmsh_e
#print gmsh_e, e
return gmsh_e
# dim>0: recursively generate facets and entity itself
facets = _facets(e)
facets = [self.entity_to_gmsh(f, dim-1, lc)
for f in facets]
orient = _orientations(dim-1)
loop = FacetLoop[dim-1]([o+s for o, s in izip(orient, facets)])
if gmshself:
gmsh_e = Entity[dim](loop)
self.gmsh_entities[dim][i] = gmsh_e
#print gmsh_e, e
return gmsh_e
def __setitem__(self, query_filter, value):
"""Add a new filter by setting it on all subqueries.
If any of the setting operations raise an exception, the ones
that succeeded are undone and the exception is propagated
upward.
Args:
query_filter: a string of the form "property operand".
value: the value that the given property is compared against.
"""
saved_items = []
for index, query in enumerate(self.__bound_queries):
saved_items.append(query.get(query_filter, None))
try:
query[query_filter] = value
except:
for q, old_value in itertools.izip(self.__bound_queries[:index],
saved_items):
if old_value is not None:
q[query_filter] = old_value
else:
del q[query_filter]
raise
def constant_time_compare(val1, val2):
"""Returns True if the two strings are equal, False otherwise.
The time taken is independent of the number of characters that match. Do
not use this function for anything else than comparision with known
length targets.
This is should be implemented in C in order to get it completely right.
"""
if _builtin_constant_time_compare is not None:
return _builtin_constant_time_compare(val1, val2)
len_eq = len(val1) == len(val2)
if len_eq:
result = 0
left = val1
else:
result = 1
left = val2
for x, y in izip(bytearray(left), bytearray(val2)):
result |= x ^ y
return result == 0
def scan(self):
with open(self.__filename) as f:
fields = f.readline().strip().split()
result = {}
for (name, format), value in itertools.izip(self.FIELDS, fields):
result[name] = format(value)
return result
def pbkdf2_bin(data, salt, iterations=1000, keylen=24, hashfunc=None):
"""Returns a binary digest for the PBKDF2 hash algorithm of `data`
with the given `salt`. It iterates `iterations` time and produces a
key of `keylen` bytes. By default SHA-1 is used as hash function,
a different hashlib `hashfunc` can be provided.
"""
hashfunc = hashfunc or hashlib.sha1
mac = hmac.new(data, None, hashfunc)
def _pseudorandom(x, mac=mac):
h = mac.copy()
h.update(x)
return map(ord, h.digest())
buf = []
for block in xrange(1, -(-keylen // mac.digest_size) + 1):
rv = u = _pseudorandom(salt + _pack_int(block))
for i in xrange(iterations - 1):
u = _pseudorandom(''.join(map(chr, u)))
rv = starmap(xor, izip(rv, u))
buf.extend(rv)
return ''.join(map(chr, buf))[:keylen]
def write_genome_fasta(self, out_fasta_fn):
if len(self.genomes) > 1:
with open(out_fasta_fn, 'w') as f:
for genome_prefix, in_fasta_fn in itertools.izip(self.genome_prefixes, self.in_fasta_fns):
with open(in_fasta_fn, 'r') as g:
for line in g:
line = line.strip()
if line.startswith('>'):
line = '>' + genome_prefix + '_' + line[1:]
f.write(line + '\n')
else:
cr_utils.copy(self.in_fasta_fns[0], out_fasta_fn)
def write_genome_gtf(self, out_gtf_fn):
with open(out_gtf_fn, 'wb') as f:
writer = csv.writer(f, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar='')
for genome_prefix, in_gtf_fn in itertools.izip(self.genome_prefixes, self.in_gtf_fns):
if len(self.genomes) > 1:
prefix_func = lambda s: '%s_%s' % (genome_prefix, s)
else:
prefix_func = lambda s: s
transcript_to_chrom = {}
cross_chrom_transcripts = set()
for row, is_comment, properties in self.gtf_reader_iter(in_gtf_fn):
if is_comment:
writer.writerow(row)
continue
chrom = prefix_func(row[0])
row[0] = chrom
if 'transcript_id' in properties:
properties['transcript_id'] = prefix_func(properties['transcript_id'])
curr_tx = properties['transcript_id']
if curr_tx in transcript_to_chrom and transcript_to_chrom[curr_tx] != chrom:
# ignore recurrences of a transcript on different chromosomes - it will break the STAR index
cross_chrom_transcripts.add(curr_tx)
continue
transcript_to_chrom[curr_tx] = chrom
if 'gene_id' in properties:
properties['gene_id'] = prefix_func(properties['gene_id'])
if 'gene_name' in properties:
properties['gene_name'] = prefix_func(properties['gene_name'])
row[8] = self.format_properties_dict(properties)
writer.writerow(row)
print "WARNING: The following transcripts appear on multiple chromosomes in the GTF:"
print '\n'.join(list(cross_chrom_transcripts)) + '\n'
print "This can indicate a problem with the reference or annotations. Only the first chromosome will be counted."
def report(self):
d = {str(k):int(v) for k, v in itertools.izip(xrange(0, 1 + self.max_value), self.counts)}
d[">%d" % self.max_value] = int(self.counts[-1])
return d
def get_nonzero(self):
i_array, j_array = self.m.nonzero()
return [(self.genes[i], self.bcs[j], self.m[i, j])
for i, j in itertools.izip(i_array, j_array)]
def load_genes_from_h5_group(group):
""" Load just the genes from an h5 """
gene_ids = list(getattr(group, cr_constants.H5_GENE_IDS_ATTR).read())
if hasattr(group, cr_constants.H5_GENE_NAMES_ATTR):
gene_names = list(getattr(group, cr_constants.H5_GENE_NAMES_ATTR).read())
else:
gene_names = gene_ids
assert len(gene_ids) == len(gene_names)
genes = [cr_constants.Gene(id, name, None, None, None) for id, name in itertools.izip(gene_ids, gene_names)]
return genes
def build_from_mol_counter(molecule_counter, subsample_rate=1.0,
subsample_result=None):
""" Construct a GeneBCMatrices object from a MoleculeCounter.
Args: subsample_result (dict) - Return some metrics results into this dict. """
# Reconstruct all barcode sequences in the original matrices
barcode_whitelist = cr_utils.load_barcode_whitelist(molecule_counter.get_barcode_whitelist())
barcode_length = molecule_counter.get_barcode_length() or len(barcode_whitelist[0])
gem_groups = molecule_counter.get_gem_groups()
barcode_seqs = cr_utils.format_barcode_seqs(barcode_whitelist, gem_groups)
# Reconstruct Gene tuples from the molecule info ref columns
gene_ids = molecule_counter.get_ref_column('gene_ids')
genome_ids = molecule_counter.get_ref_column('genome_ids')
gene_names = molecule_counter.get_ref_column('gene_names')
gene_tuples = [cr_constants.Gene(gid, gname, None, None, None) for (gid, gname) in itertools.izip(gene_ids, gene_names)]
genes = cr_utils.split_genes_by_genomes(gene_tuples, genome_ids)
matrices = GeneBCMatrices(genome_ids, genes, barcode_seqs)
# Track results of subsampling
reads = 0
for mol in molecule_counter.get_molecule_iter(barcode_length, subsample_rate=subsample_rate):
matrices.add(mol.genome, mol.gene_id, mol.barcode)
reads += mol.reads
if subsample_result is not None:
subsample_result['mapped_reads'] = reads
return matrices
def get_molecule_iter(self, barcode_length, subsample_rate=1.0):
""" Return an iterator on Molecule tuples """
assert subsample_rate >= 0 and subsample_rate <= 1.0
# Store the previous compressed barcode so we don't have to decompress every single row
prev_compressed_bc = None
prev_gem_group = None
prev_bc = None
# Load the molecule data
mol_barcodes = self.get_column('barcode')
mol_gem_groups = self.get_column('gem_group')
mol_genome_ints = self.get_column('genome')
mol_gene_ints = self.get_column('gene')
mol_reads = self.get_column('reads')
gene_ids = self.get_ref_column('gene_ids')
genome_ids = self.get_ref_column('genome_ids')
if subsample_rate < 1.0:
mol_reads = np.random.binomial(mol_reads, subsample_rate)
for compressed_bc, gem_group, genome_int, gene_int, reads in itertools.izip(mol_barcodes,
mol_gem_groups,
mol_genome_ints,
mol_gene_ints,
mol_reads):
if reads == 0:
continue
# Decompress the cell barcode if necessary
if compressed_bc == prev_compressed_bc and gem_group == prev_gem_group:
bc = prev_bc
else:
bc = cr_utils.format_barcode_seq(self.decompress_barcode_seq(compressed_bc, barcode_length=barcode_length),
gem_group)
yield Molecule(barcode=bc,
genome=genome_ids[genome_int],
gene_id=gene_ids[gene_int],
reads=reads)