Skip to content

Commit

Permalink
Add validation for duplicate variants
Browse files Browse the repository at this point in the history
  • Loading branch information
bpblanken committed Nov 27, 2024
1 parent ab13f4c commit 6eb7331
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions v03_pipeline/lib/reference_datasets/reference_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

import hail as hl

from v03_pipeline.lib.misc.validation import (
validate_allele_type,
validate_no_duplicate_variants,
)
from v03_pipeline.lib.model import AccessControl, DatasetType, Env, ReferenceGenome
from v03_pipeline.lib.reference_datasets import clinvar, dbnsfp
from v03_pipeline.lib.reference_datasets.misc import (
Expand Down Expand Up @@ -115,6 +119,11 @@ def get_ht(
if enum_selects:
ht = ht.transmute(**enum_selects)
ht = filter_contigs(ht, reference_genome)
# Reference Datasets are DatasetType agnostic, but these
# methods (in theory) support SV/GCNV. SNV_INDEL
# is passed as a proxy for non-SV/GCNV.
validate_allele_type(ht, DatasetType.SNV_INDEL)
validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL)
# NB: we do not filter with "filter" here
# ReferenceDatasets are DatasetType agnostic and that
# filter is only used at annotation time.
Expand Down

0 comments on commit 6eb7331

Please sign in to comment.