Project

General

Profile

Pipeline template template » History » Version 4

Bryan Cosca, 05/21/2015 06:22 PM

1 1 Bryan Cosca
h1. Pipeline template template
2
3
h2. Run-Command Template
4
5 4 Bryan Cosca
<pre>
6
  "PrintReads":{
7
   "script_version":"29009a1c1f8a9653042c5853832881aca4141cf2",
8
   "repository":"arvados",
9
   "script":"run-command",
10
   "script_parameters":{
11
    "input":{
12
     "output_of":"Indel-Realignment"
13
    },
14
    "reference":{
15
     "dataclass":"Collection",
16
     "default":"3514b8e5da0e8d109946bc809b20a78a+5698",
17
     "link_name":"human_g1k_v37 reference data",
18
     "title":"Base-Recalibration Input Reference genome (FASTA)"
19
    },
20
    "command":[
21
     "java",
22
     "-Xmx60g",
23
     "-jar",
24
     "$(dir $(gatk3))/GenomeAnalysisTK.jar",
25
     "-T",
26
     "PrintReads",
27
     "-R",
28
     "$(glob $(dir $(reference))/*.fasta)",
29
     {
30
      "foreach":"iterator",
31
      "command":[
32
       "-I",
33
       "$(iterator)"
34
      ]
35
     },
36
     "-BQSR",
37
     "$(bqsr_table)",
38
     "-nct",
39
     "16",
40
     "-o",
41
     "$(outputname)"
42
    ],
43
    "outputname":{
44
     "value":{
45
      "list":"iterator",
46
      "index":"0",
47
      "command":"$(basename $(iterator)).bqsrCal.bam"
48
     }
49
    },
50
    "bqsr_table":{
51
     "value":{
52
      "list":"iterator",
53
      "index":"0",
54
      "command":"$(dir $(bqsr))/$(basename $(basename $(iterator))).recal_data.table"
55
     }
56
    },
57
    "input_dir":"$(dir $(input))",
58
    "task.foreach":[
59
     "iterator"
60
    ],
61
    "iterator":{
62
     "value":{
63
      "group":"input_dir",
64
      "regex":"(.*)\\.realigned.bam"
65
     }
66
    },
67
    "bqsr":{
68
     "output_of":"Base-Recalibration"
69
    },
70
    "gatk3":{
71
     "dataclass":"Collection",
72
     "default":"2e98fdc8e90f4c48a0714b711767c9ce+76",
73
     "link_name":"Genome Analysis Toolkit 3.2-2",
74
     "title":"PrintReads Input Version of GATK3 jar"
75
    },
76
    "dbsnp":{
77
     "dataclass":"Collection",
78
     "default":"8ac324bfa3dfff1ff81ed34b433869b1+6712",
79
     "title":"Single Nucleotide Polymorphism database",
80
     "description":"Base-Recalibration Input DBsnp"
81
    }
82
   },
83
   "runtime_constraints":{
84
    "max_tasks_per_node":1,
85
    "min_nodes":1,
86
    "docker_image":"bcosc/arv-base-java",
87
    "arvados_sdk_version":"29009a1c1f8a9653042c5853832881aca4141cf2"
88
   },
89
   "output_name":false
90
  }
91
</pre>
92 1 Bryan Cosca
93
h2. Crunch Script Template
94
<pre>
95
#!/usr/bin/env python
96
97
import arvados
98
import subprocess
99
import os
100
import sys
101
import re
102
from arvados.collection import Collection as coll
103
import arvados_tools
104
import shutil
105
106
arvados_tools.spawn_new_task_per_file('input','.*realigned.bqsrCal.bam$',if_sequence=0, and_end_task=True)
107
108
this_job = arvados.current_job()
109
this_task = arvados.current_task()
110
tmpdir = arvados.current_task().tmpdir
111
112
input_1 = this_task['parameters']['input_1']
113
input_1_path = os.path.join(arvados.get_job_param_mount("input"),input_1)
114
tmp_input_1_path = os.path.join(tmpdir,input_1) # If we need to copy to tmpdir
115
shutil.copyfile(input_1_path,tmp_input_1_path) # If we need to copy to tmpdir
116
</pre> 
117
118
119
<pre>
120
samtools_path = arvados_tools.get_file_path('samtools','^samtools$')
121
gatk_path = arvados_tools.get_file_path('gatk_jar','^GenomeAnalysisTK.jar$')
122
reference_path = arvados_tools.get_file_path('reference','.*f(ast)?a(.gz)?$')
123
dbsnp_path = arvados_tools.get_file_path('dbsnp','^dbsnp.*vcf$')
124
</pre>
125
126
<pre>
127
gatk_inserttool_args = []
128
gatk_inserttool_output_name
129
gatk_inserttool_output_path
130
gatk_inserttool_pipe = subprocess.check_output()
131
</pre>
132
133
<pre>
134
samtools_inserttool_args = []
135
</pre>
136
137
h2. Script Parameter Template
138
139
140 3 Bryan Cosca
h2. Latest arvados_sdk_version: 
141 2 Bryan Cosca
142
https://arvados.org/projects/arvados/repository/revisions/master/show/sdk/python