From 98b61b24887115e799b5fe1e9a6e84de93b00db7 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Sun, 18 Sep 2016 23:37:31 -0400 Subject: [PATCH] Added protein coding biotype tests (#166) * added unit tests for Transcript.biotype and Gene.biotype * version bump --- pyensembl/__init__.py | 2 +- test/test_gene_objects.py | 32 ++++++++++++++++++++------------ test/test_transcript_objects.py | 22 ++++++++++++++-------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/pyensembl/__init__.py b/pyensembl/__init__.py index 0c1fc3b..0980e2e 100644 --- a/pyensembl/__init__.py +++ b/pyensembl/__init__.py @@ -35,7 +35,7 @@ ) from .transcript import Transcript -__version__ = '1.0.0' +__version__ = '1.0.1' def cached_release(release, species="human"): """ diff --git a/test/test_gene_objects.py b/test/test_gene_objects.py index 2b68bd3..31bd759 100644 --- a/test/test_gene_objects.py +++ b/test/test_gene_objects.py @@ -1,23 +1,25 @@ from __future__ import absolute_import +from nose.tools import eq_ + from .common import test_ensembl_releases from .data import TP53_gene_id @test_ensembl_releases() -def test_TP53_gene_object_by_id(ensembl): +def test_TP53_gene_object_by_id(genome): # when we look up TP53 by its gene ID, we should get the # correct gene back - gene = ensembl.gene_by_id(TP53_gene_id) + gene = genome.gene_by_id(TP53_gene_id) assert gene.name == "TP53", \ "Incorrect gene name %s for gene ID %s in %s" % ( - gene.name, gene.id, ensembl) + gene.name, gene.id, genome) assert gene.contig == "17", \ "Incorrect gene contig %s for gene ID %s in %s" % ( - gene.contig, gene.id, ensembl) + gene.contig, gene.id, genome) @test_ensembl_releases() -def test_TP53_gene_object_by_name(ensembl): - genes = ensembl.genes_by_name("TP53") +def test_TP53_gene_object_by_name(genome): + genes = genome.genes_by_name("TP53") # we should only have one TP53 gene (there aren't any copies) assert len(genes) == 1, \ "Expected only one gene with name TP53, got %s" % (genes,) @@ -26,17 +28,23 @@ def test_TP53_gene_object_by_name(ensembl): "Expected gene to have ID %s, got %s" % (TP53_gene_id, genes[0].id) @test_ensembl_releases() -def test_equal_genes(ensembl): - gene1 = ensembl.genes_by_name("TP53")[0] +def test_equal_genes(genome): + gene1 = genome.genes_by_name("TP53")[0] # get an identical gene - gene2 = ensembl.gene_by_id(gene1.id) + gene2 = genome.gene_by_id(gene1.id) assert hash(gene1) == hash(gene2) assert gene1 == gene2 @test_ensembl_releases() -def test_not_equal_genes(release): - gene1 = release.genes_by_name("MUC1")[0] - gene2 = release.genes_by_name("BRCA1")[0] +def test_not_equal_genes(genome): + gene1 = genome.genes_by_name("MUC1")[0] + gene2 = genome.genes_by_name("BRCA1")[0] assert hash(gene1) != hash(gene2) assert gene1 != gene2 + +@test_ensembl_releases() +def test_BRCA1_protein_coding_biotype(genome): + gene = genome.genes_by_name("BRCA1")[0] + assert gene.is_protein_coding + eq_(gene.biotype, "protein_coding") diff --git a/test/test_transcript_objects.py b/test/test_transcript_objects.py index adc197f..b85676e 100644 --- a/test/test_transcript_objects.py +++ b/test/test_transcript_objects.py @@ -77,10 +77,10 @@ def test_transcript_exons(): # TODO: Add gene_id patching to gtf_parsing, add ensembl54 to the list # below @test_ensembl_releases(75, 77) -def test_sequence_parts(ensembl): +def test_sequence_parts(genome): # Ensure that the UTRs and coding sequence can be # combined to make the full transcript. - transcript = ensembl.transcript_by_id(FOXP3_001_transcript_id) + transcript = genome.transcript_by_id(FOXP3_001_transcript_id) # The combined lengths of the upstream untranslated region, # coding sequence, and downstream untranslated region @@ -149,17 +149,17 @@ def test_transcript_cds_CTNNIP1_004(): eq_(cds, CTNNBIP1_004_CDS) @test_ensembl_releases() -def test_equal_transcripts(ensembl): - t1 = ensembl.transcripts_by_name("TP53-001")[0] +def test_equal_transcripts(genome): + t1 = genome.transcripts_by_name("TP53-001")[0] # get an identical gene - t2 = ensembl.transcript_by_id(t1.id) + t2 = genome.transcript_by_id(t1.id) eq_(t1, t2) eq_(hash(t1), hash(t2)) @test_ensembl_releases() -def test_not_equal_transcripts(release): - t1 = release.transcripts_by_name("MUC1-001")[0] - t2 = release.transcripts_by_name("BRCA1-001")[0] +def test_not_equal_transcripts(genome): + t1 = genome.transcripts_by_name("MUC1-001")[0] + t2 = genome.transcripts_by_name("BRCA1-001")[0] assert_not_equal(t1, t2) def test_protein_id(): @@ -174,3 +174,9 @@ def test_transcript_gene_should_match_parent_gene(): gene = ensembl77.gene_by_id(TP53_gene_id) for transcript in gene.transcripts: eq_(transcript.gene, gene) + +@test_ensembl_releases() +def test_BRCA1_001_has_protein_coding_biotype(genome): + transcript = genome.transcripts_by_name("BRCA1-001")[0] + assert transcript.is_protein_coding + eq_(transcript.biotype, "protein_coding")