-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfasta-get-faa.py
executable file
·50 lines (39 loc) · 1.55 KB
/
fasta-get-faa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#! /usr/bin/env python
"""
extract sequences from FASTA file according to names in provided file
ignores text in the names file after the first tab
this version is modified to ignore anything after the underscore ("_") in the fasta header
usage:
python fasta-get-faa.py file.faa names.txt
Copyright:
fasta-get-faa.py Extract FASTA sequences based on entry headers
Copyright (C) 2022 William Brazelton
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import sys
fastafilename = sys.argv[1]
namesfilename = sys.argv[2]
outfilename = fastafilename + '.select.fa'
l = []
with open(namesfilename) as namesfile:
for name in namesfile:
name = name.split('\t')
name = name[0]
l.append(name.strip('\n'))
from Bio import SeqIO
with open(outfilename,'a') as outfile:
for fasta in SeqIO.parse(fastafilename,'fasta'):
header = fasta.id
header = header.split("_")
header = 'c_' + header[1]
print(header)
if header in l: SeqIO.write(fasta,outfile,'fasta')