Skip to content

Commit

Permalink
Merge pull request #989 from broadinstitute/benb/other_cleanup
Browse files Browse the repository at this point in the history
Final Cleanup!
  • Loading branch information
jklugherz authored Nov 25, 2024
2 parents 2909ec0 + a5ebfa1 commit fddba67
Show file tree
Hide file tree
Showing 106 changed files with 329 additions and 595 deletions.
6 changes: 2 additions & 4 deletions v03_pipeline/lib/annotations/fields_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@
from v03_pipeline.lib.paths import valid_reference_dataset_path
from v03_pipeline.lib.reference_datasets.reference_dataset import ReferenceDataset
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
from v03_pipeline.lib.test.mocked_reference_dataset_test_case import (
TEST_GNOMAD_NONCODING_CONSTRAINT_38_HT,
TEST_SCREEN_38_HT,
)
from v03_pipeline.lib.vep import run_vep
from v03_pipeline.var.test.vep.mock_vep_data import MOCK_37_VEP_DATA, MOCK_38_VEP_DATA

Expand All @@ -24,6 +20,8 @@
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = (
'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz'
)
TEST_GNOMAD_NONCODING_CONSTRAINT_38_HT = 'v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_non_coding_constraint/1.0.ht'
TEST_SCREEN_38_HT = 'v03_pipeline/var/test/reference_datasets/GRCh38/screen/1.0.ht'


class FieldsTest(MockedDatarootTestCase):
Expand Down
13 changes: 6 additions & 7 deletions v03_pipeline/lib/reference_datasets/dbnsfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ def predictor_parse(field: hl.StringExpression) -> hl.StringExpression:


# adapted from download_and_create_reference_datasets/v02/hail_scripts/write_dbnsfp_ht.py
def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table:
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
types = TYPES[reference_genome]
rename = RENAME[reference_genome]

with download_zip_file(raw_dataset_path) as unzipped_dir:
with download_zip_file(path) as unzipped_dir:
ht = hl.import_table(
f'{unzipped_dir}/dbNSFP*_variant.chr*.gz',
types=types,
Expand All @@ -78,8 +78,7 @@ def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table
return key_by_locus_alleles(ht, reference_genome)


def custom_select(dataset_type: DatasetType, dbnsfp_ht: hl.Table) -> hl.Table:
if dataset_type != DatasetType.MITO:
return dbnsfp_ht

return dbnsfp_ht.select(dbnsfp_ht.SIFT_score, dbnsfp_ht.MutationTaster_pred_id)
def select(_: ReferenceGenome, dataset_type: DatasetType, ht: hl.Table) -> hl.Table:
if dataset_type == DatasetType.MITO:
return ht.select(ht.SIFT_score, ht.MutationTaster_pred_id)
return ht
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/eigen.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import hail as hl


def get_ht(raw_dataset_path: str, *_) -> hl.Table:
ht = hl.read_table(raw_dataset_path)
def get_ht(path: str, *_) -> hl.Table:
ht = hl.read_table(path)
return ht.select(Eigen_phred=ht.info['Eigen-phred'])
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/exac.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
}


def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table:
ht = vcf_to_ht(raw_dataset_path, reference_genome, split_multi=True)
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
ht = vcf_to_ht(path, reference_genome, split_multi=True)
return ht.select(
**{k: parse_nested_field(ht, v) for k, v in SELECT.items()},
)
2 changes: 1 addition & 1 deletion v03_pipeline/lib/reference_datasets/exac_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class ExacTest(unittest.TestCase):
def test_exac(self):
with patch.object(
ReferenceDataset,
'raw_dataset_path',
'path',
return_value=EXAC_PATH,
):
ht = ReferenceDataset.exac.get_ht(ReferenceGenome.GRCh38)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@


def get_ht(
raw_dataset_path: str,
path: str,
reference_genome: ReferenceGenome,
) -> hl.Table:
ht = hl.read_table(raw_dataset_path)
ht = hl.read_table(path)
filtered_contig = 'chr1' if reference_genome == ReferenceGenome.GRCh38 else '1'
ht = hl.filter_intervals(
ht,
Expand Down
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/gnomad_exomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ def af_popmax_expression(
return ht.grpmax['gnomad'].AF


def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table:
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
return _get_ht(
raw_dataset_path,
path,
reference_genome,
af_popmax_expression,
)
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/gnomad_exomes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class GnomadTest(unittest.TestCase):
def test_gnomad_exomes_37(self):
with patch.object(
ReferenceDataset,
'raw_dataset_path',
'path',
return_value=GNOMAD_EXOMES_37_PATH,
):
ht = ReferenceDataset.gnomad_exomes.get_ht(ReferenceGenome.GRCh37)
Expand Down Expand Up @@ -42,7 +42,7 @@ def test_gnomad_exomes_37(self):
def test_gnomad_exomes_38(self):
with patch.object(
ReferenceDataset,
'raw_dataset_path',
'path',
return_value=GNOMAD_EXOMES_38_PATH,
):
ht = ReferenceDataset.gnomad_exomes.get_ht(ReferenceGenome.GRCh38)
Expand Down
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/gnomad_genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ def af_popmax_expression(
return ht.grpmax.AF


def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table:
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
return _get_ht(
raw_dataset_path,
path,
reference_genome,
af_popmax_expression,
)
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/gnomad_genomes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class GnomadTest(unittest.TestCase):
def test_gnomad_genomes_37(self):
with patch.object(
ReferenceDataset,
'raw_dataset_path',
'path',
return_value=GNOMAD_GENOMES_37_PATH,
):
ht = ReferenceDataset.gnomad_genomes.get_ht(ReferenceGenome.GRCh37)
Expand Down Expand Up @@ -42,7 +42,7 @@ def test_gnomad_genomes_37(self):
def test_gnomad_genomes_38(self):
with patch.object(
ReferenceDataset,
'raw_dataset_path',
'path',
return_value=GNOMAD_GENOMES_38_PATH,
):
ht = ReferenceDataset.gnomad_genomes.get_ht(ReferenceGenome.GRCh38)
Expand Down
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/gnomad_mito.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import hail as hl


def get_ht(raw_dataset_path: str, *_) -> hl.Table:
ht = hl.read_table(raw_dataset_path)
def get_ht(path: str, *_) -> hl.Table:
ht = hl.read_table(path)
ht = ht.select(
AN=hl.int32(ht.AN),
AC_hom=hl.int32(ht.AC_hom),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
)


def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table:
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
ht = hl.import_table(
raw_dataset_path,
path,
types={
'start': hl.tint32,
'end': hl.tint32,
Expand Down
6 changes: 3 additions & 3 deletions v03_pipeline/lib/reference_datasets/gnomad_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from v03_pipeline.lib.model import ReferenceGenome


def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table:
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
if reference_genome == ReferenceGenome.GRCh37:
return hl.read_matrix_table(raw_dataset_path).rows()
return hl.read_table(raw_dataset_path)
return hl.read_matrix_table(path).rows()
return hl.read_table(path)
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/gnomad_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ def hemi_field(reference_genome: ReferenceGenome) -> str:


def get_ht(
raw_dataset_path: str,
path: str,
reference_genome: ReferenceGenome,
af_popmax_expression: Callable,
) -> hl.Table:
ht = hl.read_table(raw_dataset_path)
ht = hl.read_table(path)
global_idx = hl.eval(ht.globals.freq_index_dict[global_idx_field(reference_genome)])
ht = ht.select(
AF=hl.float32(ht.freq[global_idx].AF),
Expand Down
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/hgmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from v03_pipeline.lib.model import ReferenceGenome


def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table:
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
mt = hl.import_vcf(
raw_dataset_path,
path,
reference_genome=reference_genome.value,
force=True,
skip_invalid_loci=True,
Expand Down
2 changes: 1 addition & 1 deletion v03_pipeline/lib/reference_datasets/hgmd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class HGMDTest(unittest.TestCase):
def test_hgmd_38(self):
with patch.object(
ReferenceDataset,
'raw_dataset_path',
'path',
return_value=TEST_HGMD_VCF,
):
ht = ReferenceDataset.hgmd.get_ht(ReferenceGenome.GRCh38)
Expand Down
6 changes: 6 additions & 0 deletions v03_pipeline/lib/reference_datasets/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ def filter_mito_contigs(


def filter_contigs(ht, reference_genome: ReferenceGenome):
if hasattr(ht, 'interval'):
return ht.filter(
hl.set(reference_genome.standard_contigs).contains(
ht.interval.start.contig,
),
)
return ht.filter(
hl.set(reference_genome.standard_contigs).contains(ht.locus.contig),
)
Expand Down
4 changes: 2 additions & 2 deletions v03_pipeline/lib/reference_datasets/mitomap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@


# adapted from download_and_create_reference_datasets/v02/mito/write_mito_mitomap_ht.py
def get_ht(raw_dataset_path: str, reference_genome: ReferenceGenome) -> hl.Table:
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
ht = hl.import_table(
raw_dataset_path,
path,
delimiter=',',
quote='"',
types={'Position': hl.tint32},
Expand Down
2 changes: 1 addition & 1 deletion v03_pipeline/lib/reference_datasets/mitomap_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class MitomapTest(unittest.TestCase):
def test_mitomap(self):
with patch.object(
ReferenceDataset,
'raw_dataset_path',
'path',
return_value=TEST_MITOMAP_CSV,
):
ht = ReferenceDataset.mitomap.get_ht(ReferenceGenome.GRCh38)
Expand Down
Loading

0 comments on commit fddba67

Please sign in to comment.