Skip to content

Commit 84c2664

Browse files
committed
Deduplicate BAMs with the same sample IDs
1 parent bbe0d36 commit 84c2664

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

bin/cmo_fillout

+4-1
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,16 @@ genomePath = cmo.util.genomes[args.genome]['fasta']
3737

3838
### Extract sample IDs from BAMs unless user provided a GBCMS precomputed fillout
3939
bamString = []
40+
dedupBams = dict() # To deduplicate BAMs with the same sample IDs
4041
if args.fillout is None:
4142
for bam in args.bams:
4243
sam = pysam.AlignmentFile(bam, "rb" )
4344
# In MSK DMP BAMs, the more appropriate sample ID is in "ID" not "SM" - use the shorter one
4445
sample_id = sam.header['RG'][0]['ID'] if len(sam.header['RG'][0]['ID']) < len(sam.header['RG'][0]['SM']) else sam.header['RG'][0]['SM']
4546
sam.close()
46-
bamString.append('--bam '+sample_id+':'+bam)
47+
if sample_id not in dedupBams:
48+
bamString.append('--bam '+sample_id+':'+bam)
49+
dedupBams[sample_id] = 1
4750
bamString = string.join(bamString)
4851

4952
### Check if MAF has right genome

0 commit comments

Comments
 (0)