From 51652ca9d82955cdd588a31d6093aaa7a1aeffb3 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Mon, 8 Oct 2018 13:56:07 -0700 Subject: [PATCH] Fix affected_start/end with a MNP --- README.rst | 2 +- vcf/model.py | 16 +++++++++++----- vcf/test/test_vcf.py | 19 +++++++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 67b5d1b..9041bf7 100644 --- a/README.rst +++ b/README.rst @@ -156,7 +156,7 @@ for the regions of interest:: Note that the start and end coordinates are in the zero-based, half-open coordinate system, similar to ``_Record.start`` and ``_Record.end``. The very -first base of a chromosome is index 0, and the the region includes bases up +first base of a chromosome is index 0, and the region includes bases up to, but not including the base at the end coordinate. For example:: >>> # fetch all records on chromosome 4 from base 11 through 20 diff --git a/vcf/model.py b/vcf/model.py index 34a4d17..9355628 100644 --- a/vcf/model.py +++ b/vcf/model.py @@ -205,14 +205,14 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, def _set_start_and_end(self): - self.affected_start = self.affected_end = self.POS + self.affected_start = self.affected_end = self.end for alt in self.ALT: if alt is None: start, end = self._compute_coordinates_for_none_alt() elif alt.type == 'SNV': start, end = self._compute_coordinates_for_snp() elif alt.type == 'MNV': - start, end = self._compute_coordinates_for_indel() + start, end = self._compute_coordinates_for_indel(alt) else: start, end = self._compute_coordinates_for_sv() self.affected_start = min(self.affected_start, start) @@ -235,10 +235,16 @@ def _compute_coordinates_for_snp(self): return (start, end) - def _compute_coordinates_for_indel(self): + def _compute_coordinates_for_indel(self, alt): if len(self.REF) > 1: - start = self.POS - end = start + (len(self.REF) - 1) + # get the number of leading bases that are shared between ref and alt + n = 0 + for i in range(min(len(self.REF), len(alt.sequence))): + if self.REF[i] != alt.sequence[i]: + break; + n += 1 + start = self.POS + n - 1 + end = self.POS + (len(self.REF) - 1) else: start = end = self.POS return (start, end) diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py index f04c8b1..40ac398 100644 --- a/vcf/test/test_vcf.py +++ b/vcf/test/test_vcf.py @@ -1189,6 +1189,25 @@ def test_coordinates_for_breakend(self): None ) self.assert_has_expected_coordinates(record, (9, 12), (9, 12)) + + + def test_coordinates_for_mnp(self): + record = model._Record( + '1', + 10, + 'id13', + 'AAA', + [ + model._Substitution('AAT') + ], + None, + None, + {}, + None, + {}, + None + ) + self.assert_has_expected_coordinates(record, (9, 12), (11, 12)) class TestCall(unittest.TestCase):