From d91ec5ed4e85fc34dc6942eca70dd869d75d1931 Mon Sep 17 00:00:00 2001 From: Sam Brightman Date: Tue, 14 Feb 2017 16:32:52 +0100 Subject: [PATCH] Allow partially-called genotypes to be considered called --- vcf/model.py | 4 ++-- vcf/test/test_vcf.py | 10 ++++++++++ vcf/test/uncalled_genotypes.vcf | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/vcf/model.py b/vcf/model.py index 375a3f8..34a4d17 100644 --- a/vcf/model.py +++ b/vcf/model.py @@ -26,7 +26,7 @@ def __init__(self, site, sample, data): if getattr(self.data, 'GT', None) is not None: self.gt_alleles = [(al if al != '.' else None) for al in allele_delimiter.split(self.data.GT)] self.ploidity = len(self.gt_alleles) - self.called = all([al != None for al in self.gt_alleles]) + self.called = any(al is not None for al in self.gt_alleles) self.gt_nums = self.data.GT if self.called else None else: #62 a call without a genotype is not defined as called or not @@ -65,7 +65,7 @@ def gt_bases(self): if self.called: # lookup and return the actual DNA alleles try: - return self.gt_phase_char().join(str(self.site.alleles[int(X)]) for X in self.gt_alleles) + return self.gt_phase_char().join(str(self.site.alleles[int(X)] if X is not None else '.') for X in self.gt_alleles) except: sys.stderr.write("Allele number not found in list of alleles\n") else: diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py index a09b0b9..deeff01 100644 --- a/vcf/test/test_vcf.py +++ b/vcf/test/test_vcf.py @@ -1639,22 +1639,32 @@ def test_read_uncalled(self): gt_nums = [s.gt_nums for s in var.samples] ploidity = [s.ploidity for s in var.samples] gt_alleles = [s.gt_alleles for s in var.samples] + gt_type = [s.gt_type for s in var.samples] if var.POS == 14370: self.assertEqual(['0|0', None, '1/1'], gt_nums) self.assertEqual(['G|G', None, 'A/A'], gt_bases) self.assertEqual([2,2,2], ploidity) self.assertEqual([['0','0'], [None,None], ['1','1']], gt_alleles) + self.assertEqual([0, None, 2], gt_type) elif var.POS == 17330: self.assertEqual([None, '0|1', '0/0'], gt_nums) self.assertEqual([None, 'T|A', 'T/T'], gt_bases) self.assertEqual([3,2,2], ploidity) self.assertEqual([[None,None,None], ['0','1'], ['0','0']], gt_alleles) + self.assertEqual([None, 1, 0], gt_type) elif var.POS == 1234567: self.assertEqual(['0/1', '0/2', None], gt_nums) self.assertEqual(['GTC/G', 'GTC/GTCT', None], gt_bases) self.assertEqual([2,2,1], ploidity) self.assertEqual([['0','1'], ['0','2'], [None]], gt_alleles) + self.assertEqual([1, 1, None], gt_type) + elif var.POS == 1234568: + self.assertEqual(['./1', '0/.', None], gt_nums) + self.assertEqual(['./G', 'GTC/.', None], gt_bases) + self.assertEqual([2,2,1], ploidity) + self.assertEqual([[None,'1'], ['0',None], [None]], gt_alleles) + self.assertEqual([1, 1, None], gt_type) reader._reader.close() diff --git a/vcf/test/uncalled_genotypes.vcf b/vcf/test/uncalled_genotypes.vcf index 2032097..794aea7 100644 --- a/vcf/test/uncalled_genotypes.vcf +++ b/vcf/test/uncalled_genotypes.vcf @@ -5,3 +5,4 @@ 20 14370 rs6054257 G A 29 PASS NS=3 GT 0|0 ./. 1/1 20 17330 . T A 3 q10 NS=3 GT ././. 0|1 0/0 20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3 GT 0/1 0/2 . +20 1234568 . GTC G,GTCT 50 PASS NS=3 GT ./1 0/. .