Skip to content

Commit 7c1d64d

Browse files
committed
peakcaller to output more precise pval and qval
1 parent a6c4fe4 commit 7c1d64d

File tree

3 files changed

+47
-34
lines changed

3 files changed

+47
-34
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
__pycache__/
33
*.py[cod]
44
*$py.class
5-
5+
.vscode/
66
# C extensions
77
*.so
88

CLAM/peakcaller.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ def call_gene_peak(bam_dict, gene, unique_only=False, with_control=False, binsiz
375375
## "narrowPeak" format from
376376
## https://genome.ucsc.edu/FAQ/FAQformat.html#format12
377377
## chr start end name 1000 strand signalValue pVal qVal peak
378-
narrowPeak_formatter = "%s\t%i\t%i\t%s\t1000\t%s\t%s\t%.3e\t%.3e\t.\n"
378+
narrowPeak_formatter = "%s\t%i\t%i\t%s\t1000\t%s\t%s\t%s\t%s\t.\n"
379379
BED = ''
380380
if len(fold_change)==1:
381381
lb = np.log(fold_change[0]) if with_control else fold_change[0]

CLAM/permutation_peakcaller.py

+45-32
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
```
1414
Author:
1515
Zijun Zhang <[email protected]>
16-
17-
Tested under python 2.7.3
16+
Wankun Deng <[email protected]>
17+
Tested under python 3.7.6
1818
"""
1919

2020
from . import config
@@ -59,13 +59,14 @@ def parser(args):
5959

6060
# call peaks
6161
unibam_file=args.in_bam[0]
62-
multibam_file=args.in_bam[1]
62+
multibam_file=args.in_bam[1] if len(args.in_bam)>=2 else None
63+
6364
if nthread>1:
6465
pool = Pool(processes=args.nthread)
65-
assert len(args.in_bam)==2
66+
# assert len(args.in_bam)==2
6667
tid_to_qval_compact = pool.map(
6768
_child_get_permutation_fdr,
68-
[ (unibam_file, multibam_file, child_gene_list[i], gene_annot, args.qval_cutoff, max_iter, ~(args.unstranded=='unstranded'), 'fdr', random_state)
69+
[ (unibam_file, multibam_file, child_gene_list[i], gene_annot, args.qval_cutoff, max_iter, ~(args.lib_type=='unstranded'), 'fdr', random_state)
6970
for i in range(args.nthread)
7071
])
7172

@@ -75,8 +76,8 @@ def parser(args):
7576
unique_tid_to_qval, combined_tid_to_qval = unpack_tid_to_qval(tid_to_qval_compact)
7677
else:
7778
unique_tid_to_qval, combined_tid_to_qval = _child_get_permutation_fdr(
78-
(unibam_file, multibam_file, gene_list, gene_annot, args.qval_cutoff, max_iter, ~(args.unstranded=='unstranded'), 'fdr', random_state)
79-
)
79+
(unibam_file, multibam_file, gene_list, gene_annot, args.qval_cutoff, max_iter, ~(
80+
args.lib_type == 'unstranded'), 'fdr', random_state))
8081

8182
#pickle.dump(unique_tid_to_qval, open(tmp_dir+'/unique_to_qval.pdata','wb'), -1)
8283
#pickle.dump(combined_tid_to_qval, open(tmp_dir+'/combined_to_qval.pdata','wb'), -1)
@@ -93,7 +94,7 @@ def parser(args):
9394

9495

9596
unique_peaks=merge_peaks(unique_tid_to_qval, merge_size, args.qval_cutoff)
96-
combined_peaks=merge_peaks(combined_tid_to_qval, merge_size, args.qval_cutoff)
97+
combined_peaks=merge_peaks(combined_tid_to_qval, merge_size, args.qval_cutoff) if multibam_file is not None else None
9798

9899
# write peak-calling results to file.
99100
narrowPeak_formatter = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t.\t%.3e\t.\n"
@@ -109,20 +110,23 @@ def parser(args):
109110
_, signal_qval, gene_name = peak
110111
signal, qval = signal_qval
111112
f.write( narrowPeak_formatter % (chr, start, end, gene_name, 'unique', strand, signal, qval) )
112-
for peak in combined_peaks:
113-
if args.extend is None:
114-
wt_loc=peak[0]
115-
else:
116-
wt_loc=extend_peak_region(peak[0], args.extend)
117-
#f.write(wt_loc + '\t' + '\t'.join([str(x) for x in peak[1]]) + '\t' + peak[2] + '\tcombined\n')
118-
chr, start, end, strand = wt_loc.split('\t')
119-
_, signal_qval, gene_name = peak
120-
signal, qval = signal_qval
121-
f.write( narrowPeak_formatter % (chr, start, end, gene_name, 'combined', strand, signal, qval) )
122-
if args.unstranded:
123-
cmd = ''' sort -k1,1 -k2,2n %s/all_permutation_peaks.bed |awk '{OFS="\t"; print $1,$2,$3,$4":"$7":"$9,$5,$6}'| bedtools merge -d -1 -i stdin -c 4,5,6 -o collapse,collapse,distinct > %s''' % (output_dir, os.path.join(output_dir,'narrow_peak.permutation.bed') )
113+
if combined_peaks is not None:
114+
for peak in combined_peaks:
115+
if args.extend is None:
116+
wt_loc=peak[0]
117+
else:
118+
wt_loc=extend_peak_region(peak[0], args.extend)
119+
#f.write(wt_loc + '\t' + '\t'.join([str(x) for x in peak[1]]) + '\t' + peak[2] + '\tcombined\n')
120+
chr, start, end, strand = wt_loc.split('\t')
121+
_, signal_qval, gene_name = peak
122+
signal, qval = signal_qval
123+
f.write( narrowPeak_formatter % (chr, start, end, gene_name, 'combined', strand, signal, qval) )
124+
if args.lib_type=='unstranded':
125+
cmd = ''' sort -k1,1 -k2,2n %s/all_permutation_peaks.bed |awk '{OFS="\t"; print $1,$2,$3,$4":"$7":"$9,$5,$6}'| \
126+
bedtools merge -d -1 -i stdin -c 4,5,6 -o collapse,collapse,distinct > %s''' % (output_dir, os.path.join(output_dir,'narrow_peak.permutation.bed') )
124127
else:
125-
cmd = ''' sort -k1,1 -k2,2n %s/all_permutation_peaks.bed |awk '{OFS="\t"; print $1,$2,$3,$4":"$7":"$9,$5,$6}'| bedtools merge -s -d -1 -i stdin -c 4,5,6 -o collapse,collapse,distinct > %s''' % (output_dir, os.path.join(output_dir,'narrow_peak.permutation.bed') )
128+
cmd = ''' sort -k1,1 -k2,2n %s/all_permutation_peaks.bed |awk '{OFS="\t"; print $1,$2,$3,$4":"$7":"$9,$5,$6}'| \
129+
bedtools merge -s -d -1 -i stdin -c 4,5,6 -o collapse,collapse,distinct > %s''' % (output_dir, os.path.join(output_dir,'narrow_peak.permutation.bed') )
126130
os.system( cmd )
127131
logger.info('end')
128132
return
@@ -145,11 +149,17 @@ def unpack_tid_to_qval(compact):
145149
combined_tid_to_qval=defaultdict(list)
146150
for item in compact:
147151
unique, combined = item[0], item[1]
148-
for tid in combined:
149-
if len(unique[tid])>0:
150-
unique_tid_to_qval[tid]=unique[tid]
151-
if len(combined[tid])>1:
152-
combined_tid_to_qval[tid]=combined[tid]
152+
if combined is None:
153+
combined_tid_to_qval=None
154+
for tid in unique:
155+
if len(unique[tid]) > 0:
156+
unique_tid_to_qval[tid] = unique[tid]
157+
else:
158+
for tid in combined:
159+
if len(unique[tid])>0:
160+
unique_tid_to_qval[tid]=unique[tid]
161+
if len(combined[tid])>1:
162+
combined_tid_to_qval[tid]=combined[tid]
153163
return unique_tid_to_qval,combined_tid_to_qval
154164

155165

@@ -162,10 +172,11 @@ def _child_get_permutation_fdr(args):
162172
random.seed(seed)
163173

164174
unique_tid_to_qval=defaultdict(list)
165-
combined_tid_to_qval=defaultdict(list)
175+
combined_tid_to_qval = defaultdict(
176+
list) if multibam_file is not None else None
166177

167178
unibam=pysam.Samfile(unibam_file, 'rb')
168-
multibam=pysam.Samfile(multibam_file, 'rb')
179+
multibam=pysam.Samfile(multibam_file, 'rb') if multibam_file is not None else None
169180

170181
pid = os.getpid()
171182
tot = len(child_gene_list)
@@ -177,15 +188,17 @@ def _child_get_permutation_fdr(args):
177188
gene = gene_annot[gene_name]
178189
chr, start, end, strand, tid = gene[0:5]
179190
unique_reads = read_tid_frag_from_bam(gene, unibam, is_stranded, True)
180-
multi_reads = read_tid_frag_from_bam(gene, multibam, is_stranded, False)
191+
multi_reads = read_tid_frag_from_bam(gene, multibam, is_stranded, False) if multibam_file is not None else None
181192

182193
this_unique_to_qval = do_permutation(gene, unique_reads, max_iter, pval_cutoff, correction_method)
183-
this_combined_to_qval = do_permutation(gene, unique_reads+multi_reads, max_iter, pval_cutoff, correction_method)
194+
this_combined_to_qval = do_permutation(gene, unique_reads+multi_reads, max_iter, pval_cutoff, correction_method) if multibam_file is not None else None
184195

185196
unique_tid_to_qval[tid].extend(this_unique_to_qval)
186-
combined_tid_to_qval[tid].extend(this_combined_to_qval)
197+
if multibam_file is not None:
198+
combined_tid_to_qval[tid].extend(this_combined_to_qval)
187199
unibam.close()
188-
multibam.close()
200+
if multibam_file is not None:
201+
multibam.close()
189202
return unique_tid_to_qval, combined_tid_to_qval
190203

191204

0 commit comments

Comments
 (0)