Actions
Pipeline template template » History » Revision 7
« Previous |
Revision 7/25
(diff)
| Next »
Bryan Cosca, 05/21/2015 06:32 PM
Pipeline template template¶
Run-Command Template¶
"NAME":{ "script_version":"29009a1c1f8a9653042c5853832881aca4141cf2", "repository":"arvados", "script":"run-command", "script_parameters":{ "input":{ "output_of":"OUTPUT_OF_PREVIOUS_JOB_NAME" }, "reference":{ "dataclass":"Collection", "default":"3514b8e5da0e8d109946bc809b20a78a+5698", "link_name":"human_g1k_v37 reference data", "title":"NAME Input Reference genome (FASTA)" }, "command":[ "java", "-Xmx60g", "-jar", "$(dir $(gatk3))/GenomeAnalysisTK.jar", "-T", "PrintReads", "-R", "$(glob $(dir $(reference))/*.fasta)", { "foreach":"iterator", "command":[ "-I", "$(iterator)" ] }, "-BQSR", "$(bqsr_table)", "-nct", "16", "-o", "$(outputname)" ], "outputname":{ "value":{ "list":"iterator", "index":"0", "command":"$(basename $(iterator)).bqsrCal.bam" } }, "bqsr_table":{ "value":{ "list":"iterator", "index":"0", "command":"$(dir $(bqsr))/$(basename $(basename $(iterator))).recal_data.table" } }, "input_dir":"$(dir $(input))", "task.foreach":[ "iterator" ], "iterator":{ "value":{ "group":"input_dir", "regex":"(.*)\\.realigned.bam" } }, "gatk3":{ "dataclass":"Collection", "default":"2e98fdc8e90f4c48a0714b711767c9ce+76", "link_name":"Genome Analysis Toolkit 3.2-2", "title":"NAME Input Version of GATK3 jar" }, "dbsnp":{ "dataclass":"Collection", "default":"8ac324bfa3dfff1ff81ed34b433869b1+6712", "title":"Single Nucleotide Polymorphism database", "description":"NAME Input DBsnp" } }, "runtime_constraints":{ "max_tasks_per_node":1, "min_nodes":1, "docker_image":"bcosc/arv-base-java", "arvados_sdk_version":"29009a1c1f8a9653042c5853832881aca4141cf2" }, "output_name":false
Crunch Script Template
#!/usr/bin/env python
import arvados
import subprocess
import os
import sys
import re
from arvados.collection import Collection as coll
import arvados_tools
import shutil
arvados_tools.spawn_new_task_per_file('input','.*realigned.bqsrCal.bam$',if_sequence=0, and_end_task=True)
this_job = arvados.current_job()
this_task = arvados.current_task()
tmpdir = arvados.current_task().tmpdir
input_1 = this_task['parameters']['input_1']
input_1_path = os.path.join(arvados.get_job_param_mount("input"),input_1)
tmp_input_1_path = os.path.join(tmpdir,input_1) # If we need to copy to tmpdir
shutil.copyfile(input_1_path,tmp_input_1_path) # If we need to copy to tmpdir
samtools_path = arvados_tools.get_file_path('samtools','^samtools$') gatk_path = arvados_tools.get_file_path('gatk_jar','^GenomeAnalysisTK.jar$') reference_path = arvados_tools.get_file_path('reference','.*f(ast)?a(.gz)?$') dbsnp_path = arvados_tools.get_file_path('dbsnp','^dbsnp.*vcf$')
gatk_inserttool_args = [] gatk_inserttool_output_name gatk_inserttool_output_path gatk_inserttool_pipe = subprocess.check_output()
samtools_inserttool_args = []
Script Parameter Template¶
Latest arvados_sdk_version:¶
https://arvados.org/projects/arvados/repository/revisions/master/show/sdk/python
Updated by Bryan Cosca over 9 years ago · 7 revisions