-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathseparate_short_read_contigs.py
52 lines (49 loc) · 1.34 KB
/
separate_short_read_contigs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def main(filename):
'''
This function read a .fasta file and separate the reads in the fasta file.
The separation rule is if there is 10 or more "N" inbetween, it splits the reads
This function return a list of all the reads.
'''
file = open(filename, 'r')
scaffolds = file.readlines()
contigs = []
title=[]
count = 0
temp = ''
for i in scaffolds:
# cursor += 1
# print(cursor)
i = i.strip('\n')
if i == '':
continue
elif i[0] == ">":
count = 0
if temp != '':
temp = temp.strip('N')
contigs.append(temp)
temp = ''
continue
for n in i:
temp += n
if n != 'N':
count = 0
else:
count += 1
if count == 10:
temp = temp.strip('N')
if temp != '':
contigs.append(temp)
temp = ''
file.close()
return contigs
all_contigs1 = main('illumina_MPG_2013.fasta')
print(len(all_contigs1))
'''
I store all the reads to a temp file, so it is easier for later use.
'''
f = open('illumina_MPG_2013_contig.fasta', 'w')
count=0
for i in all_contigs1:
count+=1
f.write(">" + str(count) + '\n' + i + '\n')
f.close()