def QuASAR_rep_wrapper(outdir,parameters,samplename1,samplename2,running_mode):
script_comparison_file=outdir+'/scripts/QuASAR-Rep/'+samplename1+'.vs.'+samplename2+'/'+samplename1+'.vs.'+samplename2+'.QuASAR-Rep.sh'
subp.check_output(['bash','-c','mkdir -p '+os.path.dirname(script_comparison_file)])
script_comparison=open(script_comparison_file,'w')
script_comparison.write("#!/bin/sh"+'\n')
script_comparison.write('. '+bashrc_file+'\n')
outpath=outdir+'/results/reproducibility/'+samplename1+'.vs.'+samplename2+'/QuASAR-Rep/'+samplename1+'.vs.'+samplename2+'.QuASAR-Rep.scores.txt'
subp.check_output(['bash','-c','mkdir -p '+os.path.dirname(outpath)])
quasar_data=outdir+'/data/forQuASAR'
quasar_transform1=quasar_data+'/'+samplename1+'.quasar_transform'
quasar_transform2=quasar_data+'/'+samplename2+'.quasar_transform'
script_comparison.write('${mypython} '+os.path.dirname(os.path.dirname(os.path.abspath(os.path.dirname(os.path.realpath(__file__)))))+"/hifive/bin/find_quasar_replicate_score"+' '+quasar_transform1+' '+quasar_transform2+' '+outpath+'\n')
script_comparison.write('${mypython} '+os.path.abspath(os.path.dirname(os.path.realpath(__file__)))+"/plot_quasar_scatter.py"+' '+quasar_transform1+' '+quasar_transform2+' '+outpath+'\n')
#split the scores by chromosomes
script_comparison.write('${mypython} '+os.path.abspath(os.path.dirname(os.path.realpath(__file__)))+"/quasar_split_by_chromosomes.py"+' '+outpath+'\n')
script_comparison.close()
run_script(script_comparison_file,running_mode)
python类check_output()的实例源码
def HiCSpector_wrapper(outdir,parameters,concise_analysis,samplename1,samplename2,chromo,running_mode,f1,f2,nodefile):
script_comparison_file=outdir+'/scripts/HiC-spector/'+samplename1+'.'+samplename2+'/'+chromo+'.'+samplename1+'.'+samplename2+'.sh'
subp.check_output(['bash','-c','mkdir -p '+os.path.dirname(script_comparison_file)])
script_comparison=open(script_comparison_file,'w')
script_comparison.write("#!/bin/sh"+'\n')
script_comparison.write('. '+bashrc_file+'\n')
if os.path.isfile(f1) and os.path.getsize(f1)>20:
if os.path.isfile(f2) and os.path.getsize(f2)>20:
outpath=outdir+'/results/reproducibility/'+samplename1+'.vs.'+samplename2+'/HiC-Spector/'+chromo+'.'+samplename1+'.vs.'+samplename2+'.scores.txt'
subp.check_output(['bash','-c','mkdir -p '+os.path.dirname(outpath)])
script_comparison.write("$mypython -W ignore "+os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/reproducibility_analysis/hic-spector_wrapper.py --m1 "+f1+" --m2 "+f2+" --out "+outpath+".printout --node_file "+nodefile+" --num_evec "+parameters['HiC-Spector']['n']+"\n")
script_comparison.write("cat "+outpath+".printout | tail -n1 | cut -f2 | awk '{print \""+samplename1+"\\t"+samplename2+"\\t\"$3}' > "+outpath+'\n')
script_comparison.write("rm "+outpath+".printout"+'\n')
script_comparison.close()
run_script(script_comparison_file,running_mode)
def GenomeDISCO_wrapper(outdir,parameters,concise_analysis,samplename1,samplename2,chromo,running_mode,f1,f2,nodefile):
script_comparison_file=outdir+'/scripts/GenomeDISCO/'+samplename1+'.'+samplename2+'/'+chromo+'.'+samplename1+'.'+samplename2+'.sh'
subp.check_output(['bash','-c','mkdir -p '+os.path.dirname(script_comparison_file)])
script_comparison=open(script_comparison_file,'w')
script_comparison.write("#!/bin/sh"+'\n')
script_comparison.write('. '+bashrc_file+'\n')
if os.path.isfile(f1) and os.path.getsize(f1)>20:
if os.path.isfile(f2) and os.path.getsize(f2)>20:
concise_analysis_text=''
if concise_analysis:
concise_analysis_text=' --concise_analysis'
#get the sample that goes for subsampling
subsampling=parameters['GenomeDISCO']['subsampling']
if parameters['GenomeDISCO']['subsampling']!='NA' and parameters['GenomeDISCO']['subsampling']!='lowest':
subsampling_sample=parameters['GenomeDISCO']['subsampling']
subsampling=outdir+'/data/edges/'+subsampling_sample+'/'+subsampling_sample+'.'+chromo+'.gz'
outpath=outdir+'/results/reproducibility/'+samplename1+'.vs.'+samplename2+'/GenomeDISCO/'
subp.check_output(['bash','-c','mkdir -p '+outpath])
script_comparison.write("$mypython -W ignore "+os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/genomedisco/compute_reproducibility.py")+" --m1 "+f1+" --m2 "+f2+" --m1name "+samplename1+" --m2name "+samplename2+" --node_file "+nodefile+" --outdir "+outpath+" --outpref "+chromo+" --m_subsample "+subsampling+" --approximation 10000000 --norm "+parameters['GenomeDISCO']['norm']+" --method RandomWalks "+" --tmin "+parameters['GenomeDISCO']['tmin']+" --tmax "+parameters['GenomeDISCO']['tmax']+concise_analysis_text+'\n')
script_comparison.close()
run_script(script_comparison_file,running_mode)
def _GetMostRecentDockerImageFromGcloud(docker_image):
"""Get most recent <docker_image>:tag for this docker_image.
Args:
docker_image: (string) docker image on Google Cloud.
Returns:
docker_image:tag if at least one tag was found for docker_image.
Otherwise, returns None.
"""
tag = subprocess.check_output(
['gcloud', 'container', 'images', 'list-tags',
docker_image, '--limit=1', '--format=value(tags[0])'])
tag = tag.strip()
if not tag:
return None
return '%s:%s' % (docker_image, tag)
def _GetPodNames(pod_name_prefix, job_name=None):
"""Get pod names based on the pod_name_prefix and job_name.
Args:
pod_name_prefix: value of 'name-prefix' selector.
job_name: value of 'job' selector. If None, pod names will be
selected only based on 'name-prefix' selector.
Returns:
List of pod names.
"""
pod_list_command = [
_KUBECTL, 'get', 'pods', '-o', 'name', '-a',
'-l', _GetJobSelector(pod_name_prefix, job_name)]
logging.info('Command to get pod names: %s', ' '.join(pod_list_command))
output = subprocess.check_output(pod_list_command, universal_newlines=True)
pod_names = [name for name in output.strip().split('\n') if name]
logging.info('Pod names: "%s"', ','.join(pod_names))
return pod_names
def _PrintLogs(pod_name_prefix, job_name):
"""Prints pod logs.
If a pod has been restarted, prints logs from previous run. Otherwise,
prints the logs from current run. We print logs for pods selected
based on pod_name_prefix and job_name.
Args:
pod_name_prefix: value of 'name-prefix' selector.
job_name: value of 'job' selector.
"""
for pod_name in _GetPodNames(pod_name_prefix, job_name):
try:
# Get previous logs.
logs_command = [_KUBECTL, 'logs', '-p', pod_name]
logging.info('Command to get logs: %s', ' '.join(logs_command))
output = subprocess.check_output(logs_command, universal_newlines=True)
except subprocess.CalledProcessError:
# We couldn't get previous logs, so we will try to get current logs.
logs_command = [_KUBECTL, 'logs', pod_name]
logging.info('Command to get logs: %s', ' '.join(logs_command))
output = subprocess.check_output(logs_command, universal_newlines=True)
print('%s logs:' % pod_name)
print(output)
def __init__(self, fqArchiveUrl, filtersDir, outputPrefix, outputUrl, diskSize, diskType, logsPath, container, scriptUrl, tag, cores, mem, preemptible):
super(PipelineStep, self).__init__()
fqFileName = os.path.basename(fqArchiveUrl)
fqInputs = "{fqArchive}:{fqFileName}".format(fqArchive=fqArchiveUrl, fqFileName=fqFileName)
try:
filtersDirContents = subprocess.check_output(["gsutil", "ls", filtersDir])
except subprocess.CalledProcessError as e:
print "ERROR: couldn't get a listing of filter files! -- {reason}".format(reason=e)
exit(-1)
bfInputs = [x for x in filtersDirContents.split('\n') if re.match('^.*\.bf$', x) or re.match('^.*\.txt', x)]
bfInputs.append(fqInputs)
inputs = ",".join(["{url}:{filename}".format(url=x, filename=os.path.basename(x)) for x in bfInputs])
outputs = "{outputPrefix}*:{outDir}".format(outputPrefix=outputPrefix, outDir=outputUrl)
env = "INPUT_FILE={fqFileName},OUTPUT_PREFIX={outputPrefix},FILTERS_LIST={filtersList}".format(fqFileName=fqFileName, outputPrefix=outputPrefix, filtersList=','.join([os.path.basename(x) for x in bfInputs if re.match('^.*\.bf$', x)]))
self._step = PipelineSchema("biobloomcategorizer",
self._pipelinesConfig,
logsPath,
container,
scriptUrl=scriptUrl,
cores=cores,
mem=mem,
diskSize=diskSize,
diskType=diskType,
inputs=inputs,
outputs=outputs,
env=env,
tag=tag,
preemptible=preemptible)
def getJobLogs(args, config): # TODO: reimplement
pipelineDbUtils = PipelineDbUtils(config)
jobInfo = pipelineDbUtils.getJobInfo(select=["stdout_log", "stderr_log", "gcs_log_path"],
where={"job_id": args.jobId})
with open(os.devnull, 'w') as fnull:
if args.stdout:
try:
stdoutLogFile = subprocess.check_output(
["gsutil", "cat", os.path.join(jobInfo[0].gcs_log_path, jobInfo[0].stdout_log)], stderr=fnull)
except subprocess.CalledProcessError as e:
print "ERROR: couldn't get the stdout log : {reason}".format(reason=e)
exit(-1)
print "STDOUT:\n"
print stdoutLogFile
print "---------\n"
if args.stderr:
try:
stderrLogFile = subprocess.check_output(
["gsutil", "-q", "cat", os.path.join(jobInfo[0].gcs_log_path, jobInfo[0].stderr_log)],
stderr=fnull)
except subprocess.CalledProcessError as e:
print "ERROR: couldn't get the stderr log : {reason}".format(reason=e)
exit(-1)
print "STDERR:\n"
print stderrLogFile
print "---------\n"
pipelineDbUtils.closeConnection()
def calculateDiskSize(inputFile=None, inputFileSize=None, analysisId=None, scalingFactor=None, roundToNearestGbInterval=None):
if inputFile is not None:
fileSize = int(subprocess.check_output(["gsutil", "du", inputFile]).split(' ')[0])
elif inputFileSize is not None:
fileSize = inputFileSize
elif analysisId is not None:
analysisDetail = DataUtils.getAnalysisDetail(analysisId)
if len(analysisDetail["result_set"]["results"]) > 0:
files = analysisDetail["result_set"]["results"][0]["files"]
fileSize = sum([int(x["filesize"]) for x in files])
else:
print "ERROR: no files found for analysis ID {a}!".format(a=analysisId)
exit(-1)
if scalingFactor is not None:
scalingFactor = int(scalingFactor)
else:
scalingFactor = 1
if roundToNearestGbInterval is not None:
roundTo = float(roundToNearestGbInterval) * 1000000000
return int(math.ceil(scalingFactor * fileSize/roundTo)*roundTo)/1000000000
def calculateDiskSize(tokenFile=None, fileUuid=None, inputFiles=None, inputFileSize=None, scalingFactor=None, roundToNearestGbInterval=None):
if inputFiles is not None:
fileSize = 0
for f in inputFiles:
fileSize += int(subprocess.check_output(["gsutil", "du", f]).split(' ')[0])
elif fileUuid is not None:
fileSize = GDCDataUtils.getFilesize(fileUuid, tokenFile)
elif inputFileSize is not None:
filesize = inputFileSize
else:
raise DataUtilsError("Couldn't determine disk size! Please provide a path to an existing file in GCS or a file uuid from the GDC.")
if scalingFactor is not None:
scalingFactor = int(scalingFactor)
else:
scalingFactor = 1
if roundToNearestGbInterval is not None:
roundTo = float(roundToNearestGbInterval) * 1000000000
else:
roundTo = 1
return int(math.ceil(scalingFactor * fileSize / roundTo) * roundTo) / 1000000000
def docker_version():
out = subprocess.check_output(["docker", "-v"])
mo = re.match(br"Docker version (\d+)\.(\d+)\.(\d+)", out)
if mo:
return tuple(map(int, mo.groups()))
die("unable to parse a version number from the output of 'docker -v'")
def version():
__version = '0.2.1'
__tag = 'b'
if path.exists('.git'):
__tag = 'git'
__build = subprocess.check_output('git rev-list HEAD --count'.split()).decode().strip()
else:
__build = __tag
return '%s.%s.%s' % (__version, __tag, __build)
def get_cidr_from_iface(interface):
'''
Determines Network CIDR from interface.
'''
if not interface:
return None
apt_install('ohai')
try:
os_info = subprocess.check_output(['ohai', '-l', 'fatal'])
except OSError:
log('Unable to get operating system information')
return None
try:
os_info_json = json.loads(os_info)
except ValueError:
log('Unable to determine network')
return None
device = os_info_json['network']['interfaces'].get(interface)
if device is not None:
if device.get('routes'):
routes = device['routes']
for net in routes:
if 'scope' in net:
return net.get('destination')
else:
return None
else:
return None
def systemv_services_running():
output = subprocess.check_output(
['service', '--status-all'],
stderr=subprocess.STDOUT).decode('UTF-8')
return [row.split()[-1] for row in output.split('\n') if '[ + ]' in row]
def rsync(from_path, to_path, flags='-r', options=None):
"""Replicate the contents of a path"""
options = options or ['--delete', '--executability']
cmd = ['/usr/bin/rsync', flags]
cmd.extend(options)
cmd.append(from_path)
cmd.append(to_path)
log(" ".join(cmd))
return subprocess.check_output(cmd).decode('UTF-8').strip()
def mount(device, mountpoint, options=None, persist=False, filesystem="ext3"):
"""Mount a filesystem at a particular mountpoint"""
cmd_args = ['mount']
if options is not None:
cmd_args.extend(['-o', options])
cmd_args.extend([device, mountpoint])
try:
subprocess.check_output(cmd_args)
except subprocess.CalledProcessError as e:
log('Error mounting {} at {}\n{}'.format(device, mountpoint, e.output))
return False
if persist:
return fstab_add(device, mountpoint, filesystem, options=options)
return True
def umount(mountpoint, persist=False):
"""Unmount a filesystem"""
cmd_args = ['umount', mountpoint]
try:
subprocess.check_output(cmd_args)
except subprocess.CalledProcessError as e:
log('Error unmounting {}\n{}'.format(mountpoint, e.output))
return False
if persist:
return fstab_remove(mountpoint)
return True
def list_nics(nic_type=None):
"""Return a list of nics of given type(s)"""
if isinstance(nic_type, six.string_types):
int_types = [nic_type]
else:
int_types = nic_type
interfaces = []
if nic_type:
for int_type in int_types:
cmd = ['ip', 'addr', 'show', 'label', int_type + '*']
ip_output = subprocess.check_output(cmd).decode('UTF-8')
ip_output = ip_output.split('\n')
ip_output = (line for line in ip_output if line)
for line in ip_output:
if line.split()[1].startswith(int_type):
matched = re.search('.*: (' + int_type +
r'[0-9]+\.[0-9]+)@.*', line)
if matched:
iface = matched.groups()[0]
else:
iface = line.split()[1].replace(":", "")
if iface not in interfaces:
interfaces.append(iface)
else:
cmd = ['ip', 'a']
ip_output = subprocess.check_output(cmd).decode('UTF-8').split('\n')
ip_output = (line.strip() for line in ip_output if line)
key = re.compile('^[0-9]+:\s+(.+):')
for line in ip_output:
matched = re.search(key, line)
if matched:
iface = matched.group(1)
iface = iface.partition("@")[0]
if iface not in interfaces:
interfaces.append(iface)
return interfaces
def get_nic_hwaddr(nic):
"""Return the Media Access Control (MAC) for a network interface."""
cmd = ['ip', '-o', '-0', 'addr', 'show', nic]
ip_output = subprocess.check_output(cmd).decode('UTF-8')
hwaddr = ""
words = ip_output.split()
if 'link/ether' in words:
hwaddr = words[words.index('link/ether') + 1]
return hwaddr
def lsmod():
"""Shows what kernel modules are currently loaded"""
return check_output(['lsmod'],
universal_newlines=True)