13
13
```
14
14
Author:
15
15
16
-
17
- Tested under python 2 .7.3
16
+
17
+ Tested under python 3 .7.6
18
18
"""
19
19
20
20
from . import config
@@ -59,13 +59,14 @@ def parser(args):
59
59
60
60
# call peaks
61
61
unibam_file = args .in_bam [0 ]
62
- multibam_file = args .in_bam [1 ]
62
+ multibam_file = args .in_bam [1 ] if len (args .in_bam )>= 2 else None
63
+
63
64
if nthread > 1 :
64
65
pool = Pool (processes = args .nthread )
65
- assert len (args .in_bam )== 2
66
+ # assert len(args.in_bam)==2
66
67
tid_to_qval_compact = pool .map (
67
68
_child_get_permutation_fdr ,
68
- [ (unibam_file , multibam_file , child_gene_list [i ], gene_annot , args .qval_cutoff , max_iter , ~ (args .unstranded == 'unstranded' ), 'fdr' , random_state )
69
+ [ (unibam_file , multibam_file , child_gene_list [i ], gene_annot , args .qval_cutoff , max_iter , ~ (args .lib_type == 'unstranded' ), 'fdr' , random_state )
69
70
for i in range (args .nthread )
70
71
])
71
72
@@ -75,8 +76,8 @@ def parser(args):
75
76
unique_tid_to_qval , combined_tid_to_qval = unpack_tid_to_qval (tid_to_qval_compact )
76
77
else :
77
78
unique_tid_to_qval , combined_tid_to_qval = _child_get_permutation_fdr (
78
- (unibam_file , multibam_file , gene_list , gene_annot , args .qval_cutoff , max_iter , ~ (args . unstranded == 'unstranded' ), 'fdr' , random_state )
79
- )
79
+ (unibam_file , multibam_file , gene_list , gene_annot , args .qval_cutoff , max_iter , ~ (
80
+ args . lib_type == 'unstranded' ), 'fdr' , random_state ) )
80
81
81
82
#pickle.dump(unique_tid_to_qval, open(tmp_dir+'/unique_to_qval.pdata','wb'), -1)
82
83
#pickle.dump(combined_tid_to_qval, open(tmp_dir+'/combined_to_qval.pdata','wb'), -1)
@@ -93,7 +94,7 @@ def parser(args):
93
94
94
95
95
96
unique_peaks = merge_peaks (unique_tid_to_qval , merge_size , args .qval_cutoff )
96
- combined_peaks = merge_peaks (combined_tid_to_qval , merge_size , args .qval_cutoff )
97
+ combined_peaks = merge_peaks (combined_tid_to_qval , merge_size , args .qval_cutoff ) if multibam_file is not None else None
97
98
98
99
# write peak-calling results to file.
99
100
narrowPeak_formatter = "%s\t %s\t %s\t %s\t %s\t %s\t %s\t .\t %.3e\t .\n "
@@ -109,20 +110,23 @@ def parser(args):
109
110
_ , signal_qval , gene_name = peak
110
111
signal , qval = signal_qval
111
112
f .write ( narrowPeak_formatter % (chr , start , end , gene_name , 'unique' , strand , signal , qval ) )
112
- for peak in combined_peaks :
113
- if args .extend is None :
114
- wt_loc = peak [0 ]
115
- else :
116
- wt_loc = extend_peak_region (peak [0 ], args .extend )
117
- #f.write(wt_loc + '\t' + '\t'.join([str(x) for x in peak[1]]) + '\t' + peak[2] + '\tcombined\n')
118
- chr , start , end , strand = wt_loc .split ('\t ' )
119
- _ , signal_qval , gene_name = peak
120
- signal , qval = signal_qval
121
- f .write ( narrowPeak_formatter % (chr , start , end , gene_name , 'combined' , strand , signal , qval ) )
122
- if args .unstranded :
123
- cmd = ''' sort -k1,1 -k2,2n %s/all_permutation_peaks.bed |awk '{OFS="\t "; print $1,$2,$3,$4":"$7":"$9,$5,$6}'| bedtools merge -d -1 -i stdin -c 4,5,6 -o collapse,collapse,distinct > %s''' % (output_dir , os .path .join (output_dir ,'narrow_peak.permutation.bed' ) )
113
+ if combined_peaks is not None :
114
+ for peak in combined_peaks :
115
+ if args .extend is None :
116
+ wt_loc = peak [0 ]
117
+ else :
118
+ wt_loc = extend_peak_region (peak [0 ], args .extend )
119
+ #f.write(wt_loc + '\t' + '\t'.join([str(x) for x in peak[1]]) + '\t' + peak[2] + '\tcombined\n')
120
+ chr , start , end , strand = wt_loc .split ('\t ' )
121
+ _ , signal_qval , gene_name = peak
122
+ signal , qval = signal_qval
123
+ f .write ( narrowPeak_formatter % (chr , start , end , gene_name , 'combined' , strand , signal , qval ) )
124
+ if args .lib_type == 'unstranded' :
125
+ cmd = ''' sort -k1,1 -k2,2n %s/all_permutation_peaks.bed |awk '{OFS="\t "; print $1,$2,$3,$4":"$7":"$9,$5,$6}'| \
126
+ bedtools merge -d -1 -i stdin -c 4,5,6 -o collapse,collapse,distinct > %s''' % (output_dir , os .path .join (output_dir ,'narrow_peak.permutation.bed' ) )
124
127
else :
125
- cmd = ''' sort -k1,1 -k2,2n %s/all_permutation_peaks.bed |awk '{OFS="\t "; print $1,$2,$3,$4":"$7":"$9,$5,$6}'| bedtools merge -s -d -1 -i stdin -c 4,5,6 -o collapse,collapse,distinct > %s''' % (output_dir , os .path .join (output_dir ,'narrow_peak.permutation.bed' ) )
128
+ cmd = ''' sort -k1,1 -k2,2n %s/all_permutation_peaks.bed |awk '{OFS="\t "; print $1,$2,$3,$4":"$7":"$9,$5,$6}'| \
129
+ bedtools merge -s -d -1 -i stdin -c 4,5,6 -o collapse,collapse,distinct > %s''' % (output_dir , os .path .join (output_dir ,'narrow_peak.permutation.bed' ) )
126
130
os .system ( cmd )
127
131
logger .info ('end' )
128
132
return
@@ -145,11 +149,17 @@ def unpack_tid_to_qval(compact):
145
149
combined_tid_to_qval = defaultdict (list )
146
150
for item in compact :
147
151
unique , combined = item [0 ], item [1 ]
148
- for tid in combined :
149
- if len (unique [tid ])> 0 :
150
- unique_tid_to_qval [tid ]= unique [tid ]
151
- if len (combined [tid ])> 1 :
152
- combined_tid_to_qval [tid ]= combined [tid ]
152
+ if combined is None :
153
+ combined_tid_to_qval = None
154
+ for tid in unique :
155
+ if len (unique [tid ]) > 0 :
156
+ unique_tid_to_qval [tid ] = unique [tid ]
157
+ else :
158
+ for tid in combined :
159
+ if len (unique [tid ])> 0 :
160
+ unique_tid_to_qval [tid ]= unique [tid ]
161
+ if len (combined [tid ])> 1 :
162
+ combined_tid_to_qval [tid ]= combined [tid ]
153
163
return unique_tid_to_qval ,combined_tid_to_qval
154
164
155
165
@@ -162,10 +172,11 @@ def _child_get_permutation_fdr(args):
162
172
random .seed (seed )
163
173
164
174
unique_tid_to_qval = defaultdict (list )
165
- combined_tid_to_qval = defaultdict (list )
175
+ combined_tid_to_qval = defaultdict (
176
+ list ) if multibam_file is not None else None
166
177
167
178
unibam = pysam .Samfile (unibam_file , 'rb' )
168
- multibam = pysam .Samfile (multibam_file , 'rb' )
179
+ multibam = pysam .Samfile (multibam_file , 'rb' ) if multibam_file is not None else None
169
180
170
181
pid = os .getpid ()
171
182
tot = len (child_gene_list )
@@ -177,15 +188,17 @@ def _child_get_permutation_fdr(args):
177
188
gene = gene_annot [gene_name ]
178
189
chr , start , end , strand , tid = gene [0 :5 ]
179
190
unique_reads = read_tid_frag_from_bam (gene , unibam , is_stranded , True )
180
- multi_reads = read_tid_frag_from_bam (gene , multibam , is_stranded , False )
191
+ multi_reads = read_tid_frag_from_bam (gene , multibam , is_stranded , False ) if multibam_file is not None else None
181
192
182
193
this_unique_to_qval = do_permutation (gene , unique_reads , max_iter , pval_cutoff , correction_method )
183
- this_combined_to_qval = do_permutation (gene , unique_reads + multi_reads , max_iter , pval_cutoff , correction_method )
194
+ this_combined_to_qval = do_permutation (gene , unique_reads + multi_reads , max_iter , pval_cutoff , correction_method ) if multibam_file is not None else None
184
195
185
196
unique_tid_to_qval [tid ].extend (this_unique_to_qval )
186
- combined_tid_to_qval [tid ].extend (this_combined_to_qval )
197
+ if multibam_file is not None :
198
+ combined_tid_to_qval [tid ].extend (this_combined_to_qval )
187
199
unibam .close ()
188
- multibam .close ()
200
+ if multibam_file is not None :
201
+ multibam .close ()
189
202
return unique_tid_to_qval , combined_tid_to_qval
190
203
191
204
0 commit comments