Pipeline template template » History » Version 8
Bryan Cosca, 05/21/2015 06:33 PM
1 | 1 | Bryan Cosca | h1. Pipeline template template |
---|---|---|---|
2 | |||
3 | h2. Run-Command Template |
||
4 | |||
5 | 4 | Bryan Cosca | <pre> |
6 | 7 | Bryan Cosca | "NAME":{ |
7 | 6 | Bryan Cosca | "script_version":"29009a1c1f8a9653042c5853832881aca4141cf2", |
8 | 4 | Bryan Cosca | "repository":"arvados", |
9 | "script":"run-command", |
||
10 | "script_parameters":{ |
||
11 | "input":{ |
||
12 | 7 | Bryan Cosca | "output_of":"OUTPUT_OF_PREVIOUS_JOB_NAME" |
13 | 4 | Bryan Cosca | }, |
14 | "reference":{ |
||
15 | "dataclass":"Collection", |
||
16 | "default":"3514b8e5da0e8d109946bc809b20a78a+5698", |
||
17 | "link_name":"human_g1k_v37 reference data", |
||
18 | 7 | Bryan Cosca | "title":"NAME Input Reference genome (FASTA)" |
19 | 4 | Bryan Cosca | }, |
20 | "command":[ |
||
21 | "java", |
||
22 | "-Xmx60g", |
||
23 | "-jar", |
||
24 | "$(dir $(gatk3))/GenomeAnalysisTK.jar", |
||
25 | "-T", |
||
26 | "PrintReads", |
||
27 | "-R", |
||
28 | "$(glob $(dir $(reference))/*.fasta)", |
||
29 | { |
||
30 | "foreach":"iterator", |
||
31 | "command":[ |
||
32 | "-I", |
||
33 | "$(iterator)" |
||
34 | ] |
||
35 | }, |
||
36 | "-BQSR", |
||
37 | "$(bqsr_table)", |
||
38 | "-nct", |
||
39 | "16", |
||
40 | "-o", |
||
41 | "$(outputname)" |
||
42 | ], |
||
43 | "outputname":{ |
||
44 | "value":{ |
||
45 | "list":"iterator", |
||
46 | "index":"0", |
||
47 | "command":"$(basename $(iterator)).bqsrCal.bam" |
||
48 | } |
||
49 | }, |
||
50 | "bqsr_table":{ |
||
51 | "value":{ |
||
52 | "list":"iterator", |
||
53 | "index":"0", |
||
54 | "command":"$(dir $(bqsr))/$(basename $(basename $(iterator))).recal_data.table" |
||
55 | } |
||
56 | }, |
||
57 | "input_dir":"$(dir $(input))", |
||
58 | "task.foreach":[ |
||
59 | "iterator" |
||
60 | ], |
||
61 | "iterator":{ |
||
62 | "value":{ |
||
63 | "group":"input_dir", |
||
64 | "regex":"(.*)\\.realigned.bam" |
||
65 | 1 | Bryan Cosca | } |
66 | 4 | Bryan Cosca | }, |
67 | "gatk3":{ |
||
68 | "dataclass":"Collection", |
||
69 | "default":"2e98fdc8e90f4c48a0714b711767c9ce+76", |
||
70 | 1 | Bryan Cosca | "link_name":"Genome Analysis Toolkit 3.2-2", |
71 | 7 | Bryan Cosca | "title":"NAME Input Version of GATK3 jar" |
72 | 4 | Bryan Cosca | }, |
73 | "dbsnp":{ |
||
74 | "dataclass":"Collection", |
||
75 | "default":"8ac324bfa3dfff1ff81ed34b433869b1+6712", |
||
76 | "title":"Single Nucleotide Polymorphism database", |
||
77 | 7 | Bryan Cosca | "description":"NAME Input DBsnp" |
78 | 4 | Bryan Cosca | } |
79 | }, |
||
80 | "runtime_constraints":{ |
||
81 | "max_tasks_per_node":1, |
||
82 | "min_nodes":1, |
||
83 | "docker_image":"bcosc/arv-base-java", |
||
84 | 6 | Bryan Cosca | "arvados_sdk_version":"29009a1c1f8a9653042c5853832881aca4141cf2" |
85 | 4 | Bryan Cosca | }, |
86 | "output_name":false |
||
87 | 6 | Bryan Cosca | |
88 | 4 | Bryan Cosca | </pre> |
89 | 1 | Bryan Cosca | |
90 | 8 | Bryan Cosca | h2. Crunch Script Template Template |
91 | |||
92 | <pre> |
||
93 | "Indel-Realignment":{ |
||
94 | "script_version":"indel_realignment", |
||
95 | "repository":"bcosc/SwiftBiosciences", |
||
96 | "script":"indel_realigner.py", |
||
97 | "script_parameters":{ |
||
98 | "input":{ |
||
99 | "output_of":"Make-Bam-Calculate-Coverage" |
||
100 | }, |
||
101 | "samtools":{ |
||
102 | "required":true, |
||
103 | "dataclass":"Collection", |
||
104 | "default":"0b5dd5ad3fd555dbb9ef81a027b69dec+18147", |
||
105 | "title":"Run-Indel-Realignment Input Samtools Collection" |
||
106 | }, |
||
107 | "gatk_jar":{ |
||
108 | "required":true, |
||
109 | "dataclass":"Collection", |
||
110 | "default":"2e98fdc8e90f4c48a0714b711767c9ce+76", |
||
111 | "title":"Run-Indel-Realignment Input GATK Collection" |
||
112 | }, |
||
113 | "reference":{ |
||
114 | "required":true, |
||
115 | "dataclass":"Collection", |
||
116 | "default":"3514b8e5da0e8d109946bc809b20a78a+5698", |
||
117 | "title":"Run-Indel-Realignment Input Reference Collection" |
||
118 | } |
||
119 | }, |
||
120 | "runtime_constraints":{ |
||
121 | "max_tasks_per_node":1, |
||
122 | "min_nodes":1, |
||
123 | "docker_image":"bcosc/arv-base-java", |
||
124 | "arvados_sdk_version":"749b87143ebb0bdcbe2d49deee9c66f6de9f86dd" |
||
125 | }, |
||
126 | "output_name":false |
||
127 | }, |
||
128 | </pre> |
||
129 | |||
130 | 1 | Bryan Cosca | h2. Crunch Script Template |
131 | <pre> |
||
132 | #!/usr/bin/env python |
||
133 | |||
134 | import arvados |
||
135 | import subprocess |
||
136 | import os |
||
137 | import sys |
||
138 | import re |
||
139 | from arvados.collection import Collection as coll |
||
140 | import arvados_tools |
||
141 | import shutil |
||
142 | |||
143 | arvados_tools.spawn_new_task_per_file('input','.*realigned.bqsrCal.bam$',if_sequence=0, and_end_task=True) |
||
144 | |||
145 | this_job = arvados.current_job() |
||
146 | this_task = arvados.current_task() |
||
147 | tmpdir = arvados.current_task().tmpdir |
||
148 | |||
149 | input_1 = this_task['parameters']['input_1'] |
||
150 | input_1_path = os.path.join(arvados.get_job_param_mount("input"),input_1) |
||
151 | tmp_input_1_path = os.path.join(tmpdir,input_1) # If we need to copy to tmpdir |
||
152 | shutil.copyfile(input_1_path,tmp_input_1_path) # If we need to copy to tmpdir |
||
153 | </pre> |
||
154 | |||
155 | |||
156 | <pre> |
||
157 | samtools_path = arvados_tools.get_file_path('samtools','^samtools$') |
||
158 | gatk_path = arvados_tools.get_file_path('gatk_jar','^GenomeAnalysisTK.jar$') |
||
159 | reference_path = arvados_tools.get_file_path('reference','.*f(ast)?a(.gz)?$') |
||
160 | dbsnp_path = arvados_tools.get_file_path('dbsnp','^dbsnp.*vcf$') |
||
161 | </pre> |
||
162 | |||
163 | <pre> |
||
164 | gatk_inserttool_args = [] |
||
165 | gatk_inserttool_output_name |
||
166 | gatk_inserttool_output_path |
||
167 | gatk_inserttool_pipe = subprocess.check_output() |
||
168 | </pre> |
||
169 | |||
170 | <pre> |
||
171 | samtools_inserttool_args = [] |
||
172 | </pre> |
||
173 | |||
174 | h2. Script Parameter Template |
||
175 | |||
176 | |||
177 | 3 | Bryan Cosca | h2. Latest arvados_sdk_version: |
178 | 2 | Bryan Cosca | |
179 | https://arvados.org/projects/arvados/repository/revisions/master/show/sdk/python |