def __init__(self, fqArchiveUrl, filtersDir, outputPrefix, outputUrl, diskSize, diskType, logsPath, container, scriptUrl, tag, cores, mem, preemptible):
super(PipelineStep, self).__init__()
fqFileName = os.path.basename(fqArchiveUrl)
fqInputs = "{fqArchive}:{fqFileName}".format(fqArchive=fqArchiveUrl, fqFileName=fqFileName)
try:
filtersDirContents = subprocess.check_output(["gsutil", "ls", filtersDir])
except subprocess.CalledProcessError as e:
print "ERROR: couldn't get a listing of filter files! -- {reason}".format(reason=e)
exit(-1)
bfInputs = [x for x in filtersDirContents.split('\n') if re.match('^.*\.bf$', x) or re.match('^.*\.txt', x)]
bfInputs.append(fqInputs)
inputs = ",".join(["{url}:{filename}".format(url=x, filename=os.path.basename(x)) for x in bfInputs])
outputs = "{outputPrefix}*:{outDir}".format(outputPrefix=outputPrefix, outDir=outputUrl)
env = "INPUT_FILE={fqFileName},OUTPUT_PREFIX={outputPrefix},FILTERS_LIST={filtersList}".format(fqFileName=fqFileName, outputPrefix=outputPrefix, filtersList=','.join([os.path.basename(x) for x in bfInputs if re.match('^.*\.bf$', x)]))
self._step = PipelineSchema("biobloomcategorizer",
self._pipelinesConfig,
logsPath,
container,
scriptUrl=scriptUrl,
cores=cores,
mem=mem,
diskSize=diskSize,
diskType=diskType,
inputs=inputs,
outputs=outputs,
env=env,
tag=tag,
preemptible=preemptible)
评论列表
文章目录