Project

General

Profile

Pipeline template template » History » Version 7

Bryan Cosca, 05/21/2015 06:32 PM

1 1 Bryan Cosca
h1. Pipeline template template
2
3
h2. Run-Command Template
4
5 4 Bryan Cosca
<pre>
6 7 Bryan Cosca
  "NAME":{
7 6 Bryan Cosca
   "script_version":"29009a1c1f8a9653042c5853832881aca4141cf2",
8 4 Bryan Cosca
   "repository":"arvados",
9
   "script":"run-command",
10
   "script_parameters":{
11
    "input":{
12 7 Bryan Cosca
     "output_of":"OUTPUT_OF_PREVIOUS_JOB_NAME"
13 4 Bryan Cosca
    },
14
    "reference":{
15
     "dataclass":"Collection",
16
     "default":"3514b8e5da0e8d109946bc809b20a78a+5698",
17
     "link_name":"human_g1k_v37 reference data",
18 7 Bryan Cosca
     "title":"NAME Input Reference genome (FASTA)"
19 4 Bryan Cosca
    },
20
    "command":[
21
     "java",
22
     "-Xmx60g",
23
     "-jar",
24
     "$(dir $(gatk3))/GenomeAnalysisTK.jar",
25
     "-T",
26
     "PrintReads",
27
     "-R",
28
     "$(glob $(dir $(reference))/*.fasta)",
29
     {
30
      "foreach":"iterator",
31
      "command":[
32
       "-I",
33
       "$(iterator)"
34
      ]
35
     },
36
     "-BQSR",
37
     "$(bqsr_table)",
38
     "-nct",
39
     "16",
40
     "-o",
41
     "$(outputname)"
42
    ],
43
    "outputname":{
44
     "value":{
45
      "list":"iterator",
46
      "index":"0",
47
      "command":"$(basename $(iterator)).bqsrCal.bam"
48
     }
49
    },
50
    "bqsr_table":{
51
     "value":{
52
      "list":"iterator",
53
      "index":"0",
54
      "command":"$(dir $(bqsr))/$(basename $(basename $(iterator))).recal_data.table"
55
     }
56
    },
57
    "input_dir":"$(dir $(input))",
58
    "task.foreach":[
59
     "iterator"
60
    ],
61
    "iterator":{
62
     "value":{
63
      "group":"input_dir",
64
      "regex":"(.*)\\.realigned.bam"
65 1 Bryan Cosca
     }
66 4 Bryan Cosca
    },
67
    "gatk3":{
68
     "dataclass":"Collection",
69
     "default":"2e98fdc8e90f4c48a0714b711767c9ce+76",
70 1 Bryan Cosca
     "link_name":"Genome Analysis Toolkit 3.2-2",
71 7 Bryan Cosca
     "title":"NAME Input Version of GATK3 jar"
72 4 Bryan Cosca
    },
73
    "dbsnp":{
74
     "dataclass":"Collection",
75
     "default":"8ac324bfa3dfff1ff81ed34b433869b1+6712",
76
     "title":"Single Nucleotide Polymorphism database",
77 7 Bryan Cosca
     "description":"NAME Input DBsnp"
78 4 Bryan Cosca
    }
79
   },
80
   "runtime_constraints":{
81
    "max_tasks_per_node":1,
82
    "min_nodes":1,
83
    "docker_image":"bcosc/arv-base-java",
84 6 Bryan Cosca
    "arvados_sdk_version":"29009a1c1f8a9653042c5853832881aca4141cf2" 
85 4 Bryan Cosca
   },
86
   "output_name":false
87 6 Bryan Cosca
  
88 4 Bryan Cosca
</pre>
89 1 Bryan Cosca
90
h2. Crunch Script Template
91
<pre>
92
#!/usr/bin/env python
93
94
import arvados
95
import subprocess
96
import os
97
import sys
98
import re
99
from arvados.collection import Collection as coll
100
import arvados_tools
101
import shutil
102
103
arvados_tools.spawn_new_task_per_file('input','.*realigned.bqsrCal.bam$',if_sequence=0, and_end_task=True)
104
105
this_job = arvados.current_job()
106
this_task = arvados.current_task()
107
tmpdir = arvados.current_task().tmpdir
108
109
input_1 = this_task['parameters']['input_1']
110
input_1_path = os.path.join(arvados.get_job_param_mount("input"),input_1)
111
tmp_input_1_path = os.path.join(tmpdir,input_1) # If we need to copy to tmpdir
112
shutil.copyfile(input_1_path,tmp_input_1_path) # If we need to copy to tmpdir
113
</pre> 
114
115
116
<pre>
117
samtools_path = arvados_tools.get_file_path('samtools','^samtools$')
118
gatk_path = arvados_tools.get_file_path('gatk_jar','^GenomeAnalysisTK.jar$')
119
reference_path = arvados_tools.get_file_path('reference','.*f(ast)?a(.gz)?$')
120
dbsnp_path = arvados_tools.get_file_path('dbsnp','^dbsnp.*vcf$')
121
</pre>
122
123
<pre>
124
gatk_inserttool_args = []
125
gatk_inserttool_output_name
126
gatk_inserttool_output_path
127
gatk_inserttool_pipe = subprocess.check_output()
128
</pre>
129
130
<pre>
131
samtools_inserttool_args = []
132
</pre>
133
134
h2. Script Parameter Template
135
136
137 3 Bryan Cosca
h2. Latest arvados_sdk_version: 
138 2 Bryan Cosca
139
https://arvados.org/projects/arvados/repository/revisions/master/show/sdk/python