Skip to content

Commit

Permalink
Handle duplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
bpblanken committed Nov 27, 2024
1 parent ee1a157 commit 680779d
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 5 deletions.
6 changes: 4 additions & 2 deletions v03_pipeline/lib/reference_datasets/dbnsfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
**{k: predictor_parse(ht[k]) for k in PREDICTOR_FIELDS},
)
ht = ht.rename(rename)

return key_by_locus_alleles(ht, reference_genome)
ht = key_by_locus_alleles(ht, reference_genome)
return ht.group_by(*ht.key).aggregate(
**{f: hl.agg.max(ht[f]) for f in ht.row_key},
)


def select(_: ReferenceGenome, dataset_type: DatasetType, ht: hl.Table) -> hl.Table:
Expand Down
3 changes: 2 additions & 1 deletion v03_pipeline/lib/reference_datasets/eigen.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@

def get_ht(path: str, *_) -> hl.Table:
ht = hl.read_table(path)
return ht.select(Eigen_phred=ht.info['Eigen-phred'])
ht = ht.select(Eigen_phred=ht.info['Eigen-phred'])
return ht.group_by(*ht.key).aggregate(Eigen_phred=hl.agg.max(ht.Eigen_phred))
3 changes: 2 additions & 1 deletion v03_pipeline/lib/reference_datasets/hmtvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ def get_ht(
alleles=hl.array([ht.ref_rCRS, ht.alt]),
score=ht.disease_score,
)
return ht.key_by('locus', 'alleles')
ht = ht.key_by('locus', 'alleles')
return ht.group_by(*ht.key).aggregate(score=hl.agg.max(ht.score))
3 changes: 2 additions & 1 deletion v03_pipeline/lib/reference_datasets/mitimpact.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ def get_ht(
alleles=[ht.Ref, ht.Alt],
score=hl.parse_float32(ht.APOGEE2_score),
)
return ht.key_by('locus', 'alleles')
ht = ht.key_by('locus', 'alleles')
return ht.group_by(*ht.key).aggregate(score=hl.agg.max(ht.score))

0 comments on commit 680779d

Please sign in to comment.