From 96dce614073ece6c7d8c4ad5b6d32ea65efa9b6e Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 27 Nov 2024 21:03:22 -0500 Subject: [PATCH] Filter invalid alleles --- v03_pipeline/lib/reference_datasets/hmtvar.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/v03_pipeline/lib/reference_datasets/hmtvar.py b/v03_pipeline/lib/reference_datasets/hmtvar.py index bd9e963ab..c657ee903 100644 --- a/v03_pipeline/lib/reference_datasets/hmtvar.py +++ b/v03_pipeline/lib/reference_datasets/hmtvar.py @@ -1,6 +1,7 @@ import hail as hl import requests +from v03_pipeline.lib.model.dataset_type import DatasetType from v03_pipeline.lib.model.definitions import ReferenceGenome @@ -21,4 +22,9 @@ def get_ht( score=ht.disease_score, ) ht = ht.key_by('locus', 'alleles') + ht = ht.filter( + ~DatasetType.SNV_INDEL.invalid_allele_types.contains( + hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]), + ), + ) return ht.group_by(*ht.key).aggregate(score=hl.agg.max(ht.score))