-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdepair.py
executable file
·101 lines (86 loc) · 3.8 KB
/
depair.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python
'''seperate interleaves paired-end reads into two files
DEinterleave PAIRed-end reads (DEPAIR) accepts an interleaved FASTA or FASTQ
file containing interleaved paired end reads and outputs two FASTA or FASTQ
files, one containing the forward reads (R1) and the other containing the
reverse reads (R2).
Usage: depair.py [options]
-fasta FASTA input file
-fastq FASTQ input file
-out output directory
--version, -v prints version and exits
Copyright:
depair.py Split interleaved reads into two files
Copyright (C) 2016 William Brazelton, Alex Hyer
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
'''
__version__ = '0.10'
import argparse
import datetime
import os
import sys
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from bioinformatic_tools import qualityCheck
def deinterleave(in_file, out_file, file_type):
#Sorts every in_file by alteranting sequences into two out_files
with open(in_file, 'r') as in_handle:
forward_file_name = out_file + '_R1.' + file_type
with open(forward_file_name, 'a') as out_handle_forward:
reverse_file_name = out_file + '_R2.' + file_type
with open(reverse_file_name, 'a') as out_handle_reverse:
counter = 0
for seq_record in SeqIO.parse(in_handle, file_type):
if counter % 2 == 0:
SeqIO.write(seq_record, out_handle_forward,\
file_type)
elif counter % 2 == 1:
SeqIO.write(seq_record, out_handle_reverse,\
file_type)
counter += 1
qualityCheck(out_file, file_type, in_file, forward_file_name,\
reverse_file_name)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description = 'Seperates a FASTA or ' + \
'FASTQ file containing'+ \
'interleaved paired-end reads into' + \
'two seperate file')
group = parser.add_mutually_exclusive_group(required = False)
group.add_argument('-fasta',\
default = None,\
help = 'FASTA file to seperate')
group.add_argument('-fastq',\
default = None,\
help = 'FASTQ file to seperate')
parser.add_argument('-out',
default = None,\
help = 'name of output file, R1 and R2 added to name')
parser.add_argument('--version', '-v',\
help = 'prints version and exit',
action = 'store_true')
args = parser.parse_args()
if args.version:
print(__version__)
sys.exit(0)
elif args.out == None and args.fasta == None and args.fastq == None:
print(__doc__)
sys.exit(0)
elif args.out == None:
print('An output file name must be specified.')
sys.exit(1)
elif args.fasta != None:
deinterleave(args.fasta, args.out, 'fasta')
elif args.fastq != None:
deinterleave(args.fastq, args.out, 'fastq')
else:
print('You must specify an input file.')
sys.exit(1)