Project

General

Profile

Idea #13376 » get_switch_errors.py

Jiayong Li, 06/20/2018 08:35 PM

 
1
#!/usr/bin python
2

    
3
import gzip
4

    
5
def is_header(line):
6
    return line.startswith('#')
7

    
8
def get_GT(line):
9
    fields = line.strip().split('\t')
10
    FORMAT, sample = fields[8], fields[9]
11
    FORMAT_fields = FORMAT.split(':')
12
    sample_fields = sample.split(':')
13

    
14
    GT_index = FORMAT_fields.index('GT')
15
    GT = sample_fields[GT_index]
16
    return GT
17

    
18
def is_phased_het(line):
19
    GT = get_GT(line)
20

    
21
    if '|' in GT:
22
        GT_fields = GT.split('|')
23
        return GT_fields[0] != GT_fields[1]
24
    else:
25
        return False
26

    
27
def is_phase_switch(bline, cline):
28
    bGT = get_GT(bline)
29
    cGT = get_GT(cline)
30

    
31
    if '|' in bGT and '|' in cGT:
32
        bGT_fields = bGT.split('|')
33
        cGT_fields = cGT.split('|')
34
        if bGT_fields[0] != bGT_fields[1] and cGT_fields[0] != cGT_fields[1]:
35
            return bGT_fields[0] == cGT_fields[1] and bGT_fields[1] == cGT_fields[0]
36
        else:
37
            return False
38
    else:
39
        return False
40

    
41
def print_switch_errors(bfile, cfile):
42
    b = gzip.open(bfile) if bfile.endswith(".gz") else open(bfile)
43
    c = gzip.open(cfile) if cfile.endswith(".gz") else open(cfile)
44

    
45
    bline = b.readline()
46
    cline = c.readline()
47

    
48
    last_het_is_switch = False
49

    
50
    while not (bline == '' or cline == ''):
51
        if is_header(bline):
52
            bline = b.readline()
53
        elif is_header(cline):
54
            cline = c.readline()
55
        else:
56
            bfields = bline.strip().split('\t')
57
            cfields = cline.strip().split('\t')
58
            bCHROM, bPOS = bfields[0], int(bfields[1])
59
            cCHROM, cPOS = cfields[0], int(cfields[1])
60
            if bCHROM < cCHROM or (bCHROM == cCHROM and bPOS < cPOS):
61
                if is_phased_het(bline):
62
                    if last_het_is_switch:
63
                        print bline,
64
                        last_het_is_switch = False
65
                bline = b.readline()
66
            elif bCHROM == cCHROM and bPOS == cPOS:
67
                if is_phased_het(bline):
68
                    if last_het_is_switch:
69
                        if not is_phase_switch(bline, cline):
70
                            print bline,
71
                            last_het_is_switch = False
72
                    else:
73
                        if is_phase_switch(bline, cline):
74
                            print bline,
75
                            last_het_is_switch = True
76
                bline = b.readline()
77
                cline = c.readline()
78
            else:
79
                cline = c.readline()
80

    
81
    b.close()
82
    c.close()
83

    
84
def main():
85
    import argparse
86
    parser = argparse.ArgumentParser()
87
    parser.add_argument('bfile', metavar='BFILE', help='baseline file')
88
    parser.add_argument('cfile', metavar='CFILE', help='calls file')
89
    args = parser.parse_args()
90

    
91
    print_switch_errors(args.bfile, args.cfile)
92

    
93
if __name__ == '__main__':
94
    main()
(2-2/2)