Statistics
| Branch: | Tag: | Revision:

arvados / crunch_scripts / bwa-index @ master

History | View | Annotate | Download (1.26 KB)

1
#!/usr/bin/env python
2
# Copyright (C) The Arvados Authors. All rights reserved.
3
#
4
# SPDX-License-Identifier: Apache-2.0
5

    
6
import arvados
7
import arvados_bwa
8
import os
9
import re
10
import sys
11

    
12
this_job = arvados.current_job()
13
this_task = arvados.current_task()
14
ref_dir = arvados.util.collection_extract(
15
    collection = this_job['script_parameters']['input'],
16
    path = 'reference',
17
    decompress = False)
18

    
19
ref_fasta_files = (os.path.join(ref_dir, f)
20
                   for f in os.listdir(ref_dir)
21
                   if re.search(r'\.fasta(\.gz)?$', f))
22

    
23
# build reference index
24
arvados_bwa.run('index',
25
                ['-a', 'bwtsw'] + list(ref_fasta_files))
26

    
27
# move output files to new empty directory
28
out_dir = os.path.join(arvados.current_task().tmpdir, 'out')
29
arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
30
os.mkdir(out_dir)
31
for f in os.listdir(ref_dir):
32
    if re.search(r'\.(amb|ann|bwt|pac|rbwt|rpac|rsa|sa)$', f):
33
        sys.stderr.write("bwa output: %s (%d)\n" %
34
                         (f, os.stat(os.path.join(ref_dir, f)).st_size))
35
        os.rename(os.path.join(ref_dir, f),
36
                  os.path.join(out_dir, f))
37

    
38
# store output
39
out = arvados.CollectionWriter()
40
out.write_directory_tree(out_dir, max_manifest_depth=0)
41
this_task.set_output(out.finish())