1
|
#!/usr/bin python
|
2
|
|
3
|
import gzip
|
4
|
|
5
|
def is_header(line):
|
6
|
return line.startswith('#')
|
7
|
|
8
|
def get_GT(line):
|
9
|
fields = line.strip().split('\t')
|
10
|
FORMAT, sample = fields[8], fields[9]
|
11
|
FORMAT_fields = FORMAT.split(':')
|
12
|
sample_fields = sample.split(':')
|
13
|
|
14
|
GT_index = FORMAT_fields.index('GT')
|
15
|
GT = sample_fields[GT_index]
|
16
|
return GT
|
17
|
|
18
|
def is_phased_het(line):
|
19
|
GT = get_GT(line)
|
20
|
|
21
|
if '|' in GT:
|
22
|
GT_fields = GT.split('|')
|
23
|
return GT_fields[0] != GT_fields[1]
|
24
|
else:
|
25
|
return False
|
26
|
|
27
|
def is_phase_switch(bline, cline):
|
28
|
bGT = get_GT(bline)
|
29
|
cGT = get_GT(cline)
|
30
|
|
31
|
if '|' in bGT and '|' in cGT:
|
32
|
bGT_fields = bGT.split('|')
|
33
|
cGT_fields = cGT.split('|')
|
34
|
if bGT_fields[0] != bGT_fields[1] and cGT_fields[0] != cGT_fields[1]:
|
35
|
return bGT_fields[0] == cGT_fields[1] and bGT_fields[1] == cGT_fields[0]
|
36
|
else:
|
37
|
return False
|
38
|
else:
|
39
|
return False
|
40
|
|
41
|
def print_switch_errors(bfile, cfile):
|
42
|
b = gzip.open(bfile) if bfile.endswith(".gz") else open(bfile)
|
43
|
c = gzip.open(cfile) if cfile.endswith(".gz") else open(cfile)
|
44
|
|
45
|
bline = b.readline()
|
46
|
cline = c.readline()
|
47
|
|
48
|
last_het_is_switch = False
|
49
|
|
50
|
while not (bline == '' or cline == ''):
|
51
|
if is_header(bline):
|
52
|
bline = b.readline()
|
53
|
elif is_header(cline):
|
54
|
cline = c.readline()
|
55
|
else:
|
56
|
bfields = bline.strip().split('\t')
|
57
|
cfields = cline.strip().split('\t')
|
58
|
bCHROM, bPOS = bfields[0], int(bfields[1])
|
59
|
cCHROM, cPOS = cfields[0], int(cfields[1])
|
60
|
if bCHROM < cCHROM or (bCHROM == cCHROM and bPOS < cPOS):
|
61
|
if is_phased_het(bline):
|
62
|
if last_het_is_switch:
|
63
|
print bline,
|
64
|
last_het_is_switch = False
|
65
|
bline = b.readline()
|
66
|
elif bCHROM == cCHROM and bPOS == cPOS:
|
67
|
if is_phased_het(bline):
|
68
|
if last_het_is_switch:
|
69
|
if not is_phase_switch(bline, cline):
|
70
|
print bline,
|
71
|
last_het_is_switch = False
|
72
|
else:
|
73
|
if is_phase_switch(bline, cline):
|
74
|
print bline,
|
75
|
last_het_is_switch = True
|
76
|
bline = b.readline()
|
77
|
cline = c.readline()
|
78
|
else:
|
79
|
cline = c.readline()
|
80
|
|
81
|
b.close()
|
82
|
c.close()
|
83
|
|
84
|
def main():
|
85
|
import argparse
|
86
|
parser = argparse.ArgumentParser()
|
87
|
parser.add_argument('bfile', metavar='BFILE', help='baseline file')
|
88
|
parser.add_argument('cfile', metavar='CFILE', help='calls file')
|
89
|
args = parser.parse_args()
|
90
|
|
91
|
print_switch_errors(args.bfile, args.cfile)
|
92
|
|
93
|
if __name__ == '__main__':
|
94
|
main()
|