Pipeline template template » History » Revision 4
Revision 3 (Bryan Cosca, 05/21/2015 05:25 PM) → Revision 4/25 (Bryan Cosca, 05/21/2015 06:22 PM)
h1. Pipeline template template h2. Run-Command Template <pre> "PrintReads":{ "script_version":"29009a1c1f8a9653042c5853832881aca4141cf2", "repository":"arvados", "script":"run-command", "script_parameters":{ "input":{ "output_of":"Indel-Realignment" }, "reference":{ "dataclass":"Collection", "default":"3514b8e5da0e8d109946bc809b20a78a+5698", "link_name":"human_g1k_v37 reference data", "title":"Base-Recalibration Input Reference genome (FASTA)" }, "command":[ "java", "-Xmx60g", "-jar", "$(dir $(gatk3))/GenomeAnalysisTK.jar", "-T", "PrintReads", "-R", "$(glob $(dir $(reference))/*.fasta)", { "foreach":"iterator", "command":[ "-I", "$(iterator)" ] }, "-BQSR", "$(bqsr_table)", "-nct", "16", "-o", "$(outputname)" ], "outputname":{ "value":{ "list":"iterator", "index":"0", "command":"$(basename $(iterator)).bqsrCal.bam" } }, "bqsr_table":{ "value":{ "list":"iterator", "index":"0", "command":"$(dir $(bqsr))/$(basename $(basename $(iterator))).recal_data.table" } }, "input_dir":"$(dir $(input))", "task.foreach":[ "iterator" ], "iterator":{ "value":{ "group":"input_dir", "regex":"(.*)\\.realigned.bam" } }, "bqsr":{ "output_of":"Base-Recalibration" }, "gatk3":{ "dataclass":"Collection", "default":"2e98fdc8e90f4c48a0714b711767c9ce+76", "link_name":"Genome Analysis Toolkit 3.2-2", "title":"PrintReads Input Version of GATK3 jar" }, "dbsnp":{ "dataclass":"Collection", "default":"8ac324bfa3dfff1ff81ed34b433869b1+6712", "title":"Single Nucleotide Polymorphism database", "description":"Base-Recalibration Input DBsnp" } }, "runtime_constraints":{ "max_tasks_per_node":1, "min_nodes":1, "docker_image":"bcosc/arv-base-java", "arvados_sdk_version":"29009a1c1f8a9653042c5853832881aca4141cf2" }, "output_name":false } </pre> h2. Crunch Script Template <pre> #!/usr/bin/env python import arvados import subprocess import os import sys import re from arvados.collection import Collection as coll import arvados_tools import shutil arvados_tools.spawn_new_task_per_file('input','.*realigned.bqsrCal.bam$',if_sequence=0, and_end_task=True) this_job = arvados.current_job() this_task = arvados.current_task() tmpdir = arvados.current_task().tmpdir input_1 = this_task['parameters']['input_1'] input_1_path = os.path.join(arvados.get_job_param_mount("input"),input_1) tmp_input_1_path = os.path.join(tmpdir,input_1) # If we need to copy to tmpdir shutil.copyfile(input_1_path,tmp_input_1_path) # If we need to copy to tmpdir </pre> <pre> samtools_path = arvados_tools.get_file_path('samtools','^samtools$') gatk_path = arvados_tools.get_file_path('gatk_jar','^GenomeAnalysisTK.jar$') reference_path = arvados_tools.get_file_path('reference','.*f(ast)?a(.gz)?$') dbsnp_path = arvados_tools.get_file_path('dbsnp','^dbsnp.*vcf$') </pre> <pre> gatk_inserttool_args = [] gatk_inserttool_output_name gatk_inserttool_output_path gatk_inserttool_pipe = subprocess.check_output() </pre> <pre> samtools_inserttool_args = [] </pre> h2. Script Parameter Template h2. Latest arvados_sdk_version: https://arvados.org/projects/arvados/repository/revisions/master/show/sdk/python