def set_representative_structure(self, seq_outdir=None, struct_outdir=None, pdb_file_type=None,
engine='needle', always_use_homology=False, rez_cutoff=0.0,
seq_ident_cutoff=0.5, allow_missing_on_termini=0.2,
allow_mutants=True, allow_deletions=False,
allow_insertions=False, allow_unresolved=True,
clean=True, force_rerun=False):
"""Set all representative structure for proteins from a structure in the structures attribute.
Each gene can have a combination of the following, which will be analyzed to set a representative structure.
* Homology model(s)
* Ranked PDBs
* BLASTed PDBs
If the ``always_use_homology`` flag is true, homology models are always set as representative when they exist.
If there are multiple homology models, we rank by the percent sequence coverage.
Args:
seq_outdir (str): Path to output directory of sequence alignment files, must be set if GEM-PRO directories
were not created initially
struct_outdir (str): Path to output directory of structure files, must be set if GEM-PRO directories
were not created initially
pdb_file_type (str): ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` -
choose a file type for files downloaded from the PDB
engine (str): ``biopython`` or ``needle`` - which pairwise alignment program to use.
``needle`` is the standard EMBOSS tool to run pairwise alignments.
``biopython`` is Biopython's implementation of needle. Results can differ!
always_use_homology (bool): If homology models should always be set as the representative structure
rez_cutoff (float): Resolution cutoff, in Angstroms (only if experimental structure)
seq_ident_cutoff (float): Percent sequence identity cutoff, in decimal form
allow_missing_on_termini (float): Percentage of the total length of the reference sequence which will be ignored
when checking for modifications. Example: if 0.1, and reference sequence is 100 AA, then only residues
5 to 95 will be checked for modifications.
allow_mutants (bool): If mutations should be allowed or checked for
allow_deletions (bool): If deletions should be allowed or checked for
allow_insertions (bool): If insertions should be allowed or checked for
allow_unresolved (bool): If unresolved residues should be allowed or checked for
clean (bool): If structures should be cleaned
force_rerun (bool): If sequence to structure alignment should be rerun
"""
for g in tqdm(self.genes):
repstruct = g.protein.set_representative_structure(seq_outdir=seq_outdir,
struct_outdir=struct_outdir,
pdb_file_type=pdb_file_type,
engine=engine,
rez_cutoff=rez_cutoff,
seq_ident_cutoff=seq_ident_cutoff,
always_use_homology=always_use_homology,
allow_missing_on_termini=allow_missing_on_termini,
allow_mutants=allow_mutants,
allow_deletions=allow_deletions,
allow_insertions=allow_insertions,
allow_unresolved=allow_unresolved,
clean=clean,
force_rerun=force_rerun)
log.info('{}/{}: number of genes with a representative structure'.format(len(self.genes_with_a_representative_structure),
len(self.genes)))
log.info('See the "df_representative_structures" attribute for a summary dataframe.')
python类tqdm_notebook()的实例源码
def clean_by_interp(inst, picks=None, verbose='progressbar'):
"""Clean epochs/evoked by LOOCV.
Parameters
----------
inst : instance of mne.Evoked or mne.Epochs
The evoked or epochs object.
picks : ndarray, shape(n_channels,) | None
The channels to be considered for autoreject. If None, defaults
to data channels {'meg', 'eeg'}.
verbose : 'tqdm', 'tqdm_notebook', 'progressbar' or False
The verbosity of progress messages.
If `'progressbar'`, use `mne.utils.ProgressBar`.
If `'tqdm'`, use `tqdm.tqdm`.
If `'tqdm_notebook'`, use `tqdm.tqdm_notebook`.
If False, suppress all output messages.
Returns
-------
inst_clean : instance of mne.Evoked or mne.Epochs
Instance after interpolation of bad channels.
"""
inst_interp = inst.copy()
mesg = 'Creating augmented epochs'
picks = _handle_picks(info=inst_interp.info, picks=picks)
BaseEpochs = _get_epochs_type()
ch_names = [inst.info['ch_names'][p] for p in picks]
for ch_idx, (pick, ch) in enumerate(_pbar(list(zip(picks, ch_names)),
desc=mesg, verbose=verbose)):
inst_clean = inst.copy()
inst_clean.info['bads'] = [ch]
interpolate_bads(inst_clean, picks=picks, reset_bads=True, mode='fast')
pick_interp = mne.pick_channels(inst_clean.info['ch_names'], [ch])[0]
if isinstance(inst, mne.Evoked):
inst_interp.data[pick] = inst_clean.data[pick_interp]
elif isinstance(inst, BaseEpochs):
inst_interp._data[:, pick] = inst_clean._data[:, pick_interp]
else:
raise ValueError('Unrecognized type for inst')
return inst_interp
def animate(self,X,y,useTqdm=0,filename=None,return_anim=True):
pos = self.getSteps(X,y)
y_mapping = {i:n for n,i in enumerate(set(y))}
last_iter = pos[len(pos)-1].reshape(-1, 2)
lims = np.max(last_iter,axis=0),np.min(last_iter,axis=0)
NCOLORS = len(y_mapping)
fig = plt.figure()
fig.set_tight_layout(True)
ax = fig.add_subplot(111)
jet = plt.get_cmap('jet')
cNorm = colors.Normalize(vmin=0, vmax=NCOLORS)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
A,B = np.array(list(zip(*pos[0].reshape(-1, 2))))
dots_list = []
for i in range(NCOLORS):
colorVal = scalarMap.to_rgba(i)
a,b = A[y == i],B[y == i]
dots, = ax.plot(b,a,'o',color=colorVal)
dots_list.append(dots)
def init():
ax.set_xlim([lims[0][0],lims[1][0]])
ax.set_ylim([lims[0][1],lims[1][1]])
return [i for i in dots_list]
def update(i):
for j in range(len(dots_list)):
a,b = np.array(list(zip(*pos[i].reshape(-1, 2))))
a,b = a[y == j],b[y == j]
dots_list[j].set_xdata(a)
dots_list[j].set_ydata(b)
return [i for i in dots_list]+[ax]
if useTqdm==0:
frames = np.arange(0, len(pos)-1)
elif useTqdm==1:
from tqdm import tqdm
frames = tqdm(np.arange(0, len(pos)-1))
elif useTqdm==2:
from tqdm import tqdm_notebook
frames = tqdm_notebook(np.arange(0, len(pos)-1))
anim = FuncAnimation(fig, update, frames=frames, init_func=init, interval=50)
if return_anim:
return anim
if filename==None:
plt.show()
else:
#anim.save(filename, fps=20, codec='libx264')
anim.save(filename, dpi=80, writer='imagemagick')
def create_tree_from_clinical(clinical_object, concept_tree=None):
"""
:param clinical_object:
:param concept_tree:
:return:
"""
if not concept_tree:
concept_tree = ConceptTree()
column_map_ids = clinical_object.ColumnMapping.ids
no_bar = True if len(column_map_ids) < 200 else False
bar_format = '{l_bar}{bar} | {n_fmt}/{total_fmt} nodes ready, {rate_fmt}'
for var_id, variable in tqdm.tqdm_notebook(clinical_object.all_variables.items(),
bar_format=bar_format,
unit=' nodes',
leave=False,
dynamic_ncols=True,
disable=no_bar):
data_args = variable.column_map_data
# Don't need these, they're in the tree.
for k in [Mappings.cat_cd_s, Mappings.data_label_s]:
data_args.pop(k)
concept_path = path_converter(variable.concept_path, to_internal=True)
categories = {} if variable.is_numeric else variable.word_map_dict
if categories:
node_type = 'categorical'
else:
node_type = 'empty' if variable.is_empty else 'numeric'
# Store node type in `data` so it can be changed back after renaming OMIT
data_args.update({'ctype': node_type})
# Store column header of variable.
data_args.update({'dfh': variable.header})
# Add filename to SUBJ_ID and OMIT, this is a work around for unique path constraint.
if variable.data_label in {"SUBJ_ID", "OMIT"}:
concept_path = concept_path.replace("SUBJ ID", "SUBJ_ID")
node_type = 'codeleaf'
# Add categorical values to concept tree (if any)
for i, datafile_value in enumerate(categories):
oid = var_id.create_category(i + 1)
mapped = categories[datafile_value]
mapped = mapped if not pd.isnull(mapped) else ''
categorical_path = path_join(concept_path, mapped)
concept_tree.add_node(categorical_path, oid,
node_type='alpha',
data_args={Mappings.df_value_s: datafile_value})
concept_tree.add_node(concept_path, var_id,
node_type=node_type, data_args=data_args)
return concept_tree
def parallel_process(array, function, n_jobs=8, use_kwargs=False, front_num=3):
"""
A parallel version of the map function with a progress bar.
Args:
array (array-like): An array to iterate over.
function (function): A python function to apply to the elements of array
n_jobs (int, default=16): The number of cores to use
use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of
keyword arguments to function
front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job.
Useful for catching bugs
Returns:
[function(array[0]), function(array[1]), ...]
"""
#We run the first few iterations serially to catch bugs
if front_num > 0:
front = [function(**a) if use_kwargs else function(a) for a in array[:front_num]]
#If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
if n_jobs==1:
return front + [function(**a) if use_kwargs else function(a) for a in tqdm(array[front_num:])]
#Assemble the workers
with ProcessPoolExecutor(max_workers=n_jobs) as pool:
#Pass the elements of array into function
if use_kwargs:
futures = [pool.submit(function, **a) for a in array[front_num:]]
else:
futures = [pool.submit(function, a) for a in array[front_num:]]
kwargs = {
'total': len(futures),
'unit': 'it',
'unit_scale': True,
'leave': True,
'smoothing': 0.1,
}
#Print out the progress as tasks complete
for f in tqdm(as_completed(futures), **kwargs):
pass
out = []
#Get the results from the futures.
for i, future in tqdm(enumerate(futures)):
try:
out.append(future.result())
except Exception as e:
out.append(e)
return front + out
def __iter__(self):
state = self.prepareState(self._endpoint, self._filters, **self._prepareStateParams)
entries = self._endpoint(sort= self._sort, n= self._n, **self._filters)
if self._progbar:
try:
get_ipython
inNotebook = True
except NameError:
inNotebook = False
if not inNotebook:
sys.stderr.write("Locating data...")
entries = list(entries)
if self._progbar and not inNotebook:
sys.stderr.write("\r")
if self._progbar:
try:
get_ipython # will fail faster and more reliably than tqdm_notebook
entriesIterable = tqdm_notebook(entries, unit= "entries")
except (NameError, AttributeError, TypeError):
entriesIterable = tqdm(entries, unit= "entries")
else:
entriesIterable = entries
def iterate():
for entry in entriesIterable:
try:
data = self.parse(entry, state= state) if state is not None else self.parse(entry)
yield entry, data
except KeyboardInterrupt:
self._write('Interrupted while parsing "{}"'.format(entry.path))
break
except GeneratorExit:
raise GeneratorExit
except:
self._write('Error while parsing "{}":'.format(entry.path))
self._write( traceback.format_exc() )
# chain the operations together
# each function in self._chain is a generator which takes an iterator
# (remember that you call a generator to "activate" it: calling a generator returns an iterator)
# so end condition for the loop is that `iterate` refers to an iterator
iterate = iterate()
for do in self._chain:
iterate = do(iterate)
return iterate