Skip to content

Commit

Permalink
Merge pull request #39 from hammerlab/search_over_loci
Browse files Browse the repository at this point in the history
Search over loci
  • Loading branch information
iskandr committed Feb 19, 2015
2 parents 3bedf41 + e124676 commit 7077c8f
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 7 deletions.
3 changes: 2 additions & 1 deletion pyensembl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@
from .gtf import GTF
from .locus import Locus
from .reference_transcripts import ReferenceTranscripts
from .transcript import Transcript
from .search import find_nearest_locus
from .transcript import Transcript
27 changes: 23 additions & 4 deletions pyensembl/locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,27 @@ def can_overlap(self, contig, strand=None):
and
(strand is None or self.on_strand(strand)))

def distance_to_interval(self, start, end):
"""
Find the distance between intervals [start1, end1] and [start2, end2].
If the intervals overlap then the distance is 0.
"""
if self.start > end:
# interval is before this exon
return self.start - end
elif self.end < start:
# exon is before the interval
return start - self.end
else:
return 0

def distance_to_locus(self, other):
if not self.can_overlap(other.contig, other.strand):
# if two loci are on different contigs or strands,
# can't compute a distance between them
return float("inf")
return self.distance_to_interval(other.start, other.end)

def overlaps(self, contig, start, end, strand=None):
"""
Does this locus overlap with a given range of positions?
Expand All @@ -181,9 +202,7 @@ def overlaps(self, contig, start, end, strand=None):
return (
self.can_overlap(contig, strand)
and
end >= self.start
and
start <= self.end)
self.distance_to_interval(start, end) == 0)

def overlaps_locus(self, other_locus):
return self.overlaps(
Expand All @@ -205,4 +224,4 @@ def contains_locus(self, other_locus):
other_locus.contig,
other_locus.start,
other_locus.end,
other_locus.strand)
other_locus.strand)
20 changes: 20 additions & 0 deletions pyensembl/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Helper functions for searching over collections of PyEnsembl objects
"""

def find_nearest_locus(start, end, loci):
"""
Finds nearest locus (object with method `distance_to_interval`) to the
interval defined by the given `start` and `end` positions.
Returns the distance to that locus, along with the locus object itself.
"""
best_distance = float("inf")
best_locus = None
for locus in loci:
distance = locus.distance_to_interval(start, end)

if best_distance > distance:
best_distance = distance
best_locus = locus

return best_distance, best_locus
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
if __name__ == '__main__':
setup(
name='pyensembl',
version="0.5.3",
version="0.5.4",
description="Python interface to ensembl reference genome metadata",
author="Alex Rubinsteyn",
author_email="alex {dot} rubinsteyn {at} mssm {dot} edu",
Expand Down
11 changes: 10 additions & 1 deletion test/test_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from nose.tools import assert_raises


def test_normalize_chromosome():
assert normalize_chromosome("X") == "X"
assert normalize_chromosome("chrX") == "X"
Expand Down Expand Up @@ -162,4 +161,14 @@ def test_range_offset():
with assert_raises(ValueError):
negative_locus.offset_range(9, 10)

def test_locus_distance():
locus_chr1_10_20_pos = Locus("1", 10, 20, "+")
locus_chr1_21_25_pos = Locus("1", 21, 25, "+")
locus_chr2_21_25_pos = Locus("2", 21, 25, "+")
locus_chr1_21_25_neg = Locus("1", 21, 25, "-")
assert locus_chr1_10_20_pos.distance_to_locus(locus_chr1_21_25_pos) == 1
assert locus_chr1_21_25_pos.distance_to_locus(locus_chr1_10_20_pos) == 1
inf = float("inf")
assert locus_chr1_10_20_pos.distance_to_locus(locus_chr2_21_25_pos) == inf
assert locus_chr1_10_20_pos.distance_to_locus(locus_chr1_21_25_neg) == inf

55 changes: 55 additions & 0 deletions test/test_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from test_common import test_ensembl_releases
from pyensembl import EnsemblRelease, find_nearest_locus
from nose.tools import eq_

@test_ensembl_releases
def test_find_nearest_BRAF_exon(ensembl):
braf = ensembl.genes_by_name("BRAF")[0]
braf_transcripts = braf.transcripts
for exon in braf_transcripts[0].exons:
# immediately before exon
result = find_nearest_locus(
start=exon.start-2,
end=exon.end-1,
loci=exons)
eq_(result, (1, exon))

# overlapping with exon
result = find_nearest_locus(
start=exon.start-2,
end=exon.start+1,
loci=exons)
eq_(result, (0, exon))

# immediately after exon
result = find_nearest_locus(
start=exon.end+1,
end=exon.end+2,
loci=exons)
eq_(result, (1, exon))

@test_ensembl_releases
def test_find_nearest_BRAF_transcript(ensembl):
braf = ensembl.genes_by_name("BRAF")[0]
braf_transcripts = braf.transcripts
for transcript in braf_transcripts:
# immediately before transcript
result = find_nearest_locus(
start=transcript.start-2,
end=transcript.end-1,
loci=braf_transcripts)
eq_(result, (1, transcript))

# overlapping with transcript
result = find_nearest_locus(
start=transcript.start-2,
end=transcript.start+1,
exons=braf_transcripts)
eq_(result, (0, transcript))

# immediately after transcript
result = find_nearest_locus(
start=transcript.end+1,
end=transcript.end+2,
exons=braf_transcripts)
eq_(result, (1, transcript))

0 comments on commit 7077c8f

Please sign in to comment.