keep_block_to_file.py

Peter Amstutz, 04/10/2016 08:26 PM

 
1
#!/usr/bin/env python
2

    
3
import re
4
import sys
5
import arvados.collection
6
from arvados.keep import KeepLocator
7

    
8
for collectionsWithMissing in sys.argv[1:]:
9

    
10
    g = re.match(r"\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d-\d\d:\d\d_(.....-.....-...............)_missing\.txt", collectionsWithMissing)
11

    
12
    collection = g.group(1)
13

    
14
    blocklist = open(collectionsWithMissing)
15

    
16
    missingblocks = set()
17
    for b in blocklist:
18
        missingblocks.add(b.strip())
19

    
20
    def scanfiles(name, cur):
21
        if isinstance(cur, arvados.collection.ArvadosFile):
22
            segs = cur.segments()
23
            for s in segs:
24
                st = KeepLocator(s.locator).stripped()
25
                if st in missingblocks:
26
                    print "\"%s\", \"%s\", \"%s\"" % (collection, name, st)
27
        else:
28
            for k, d in cur.items():
29
                scanfiles("%s/%s" % (name, k), d)
30

    
31
    scanfiles(".", arvados.collection.CollectionReader(collection))