1
1
from os import path
2
2
import sys
3
+ from typing import Dict , List
4
+
5
+ from Bio .Align import MultipleSeqAlignment
3
6
4
7
from .base import Alignment
5
8
@@ -10,34 +13,42 @@ class AlignmentRecoding(Alignment):
10
13
def __init__ (self , args ) -> None :
11
14
super ().__init__ (** self .process_args (args ))
12
15
13
- def run (self ):
14
- alignment , _ = self .get_alignment_and_format ()
15
-
16
+ def run (self ) -> None :
17
+ alignment , _ , is_protein = self .get_alignment_and_format ()
18
+
16
19
recoding_table = self .read_recoding_table (self .code [0 ])
17
20
18
- recoded_alignment = self .recode_alignment_as_dict (
19
- alignment , recoding_table
21
+ recoded_alignment = self .recode_alignment (
22
+ alignment , recoding_table , is_protein
20
23
)
21
24
22
25
for k , v in recoded_alignment .items ():
23
26
print (f">{ k } \n { '' .join (v )} " )
24
27
25
- def recode_alignment_as_dict (self , alignment , recoding_table : dict ) -> dict :
28
+ def recode_alignment (
29
+ self ,
30
+ alignment : MultipleSeqAlignment ,
31
+ recoding_table : Dict [str , str ],
32
+ is_protein : bool ,
33
+ ) -> Dict [str , List [str ]]:
34
+
35
+ gap_chars = self .get_gap_chars ()
26
36
recoded_alignment = dict ()
27
- for i in range (0 , len (alignment )):
28
- recoded_sequence_i = []
29
- for j in range (alignment .get_alignment_length ()):
30
- sequence_ij = alignment [i , j ].upper ()
31
- if sequence_ij in ["?" , "-" , "X" ]:
32
- recoded_sequence_i .append (sequence_ij )
33
- else :
34
- recoded_sequence_i .append (recoding_table [sequence_ij ])
35
37
36
- recoded_alignment [alignment [i ].id ] = recoded_sequence_i
38
+ for record in alignment :
39
+ recoded_sequence = [
40
+ recoding_table .get (base .upper (), base )
41
+ if base not in gap_chars else base
42
+ for base in record .seq
43
+ ]
44
+ recoded_alignment [record .id ] = recoded_sequence
37
45
38
46
return recoded_alignment
39
47
40
- def read_recoding_table (self , recoding : str ) -> dict :
48
+ def read_recoding_table (
49
+ self ,
50
+ recoding : str
51
+ ) -> Dict [str , str ]:
41
52
"""
42
53
return translation table with codons as keys and amino acids as values
43
54
"""
@@ -47,33 +58,27 @@ def read_recoding_table(self, recoding: str) -> dict:
47
58
if recoding is None :
48
59
print ("Please specify a recoding table" )
49
60
sys .exit ()
50
- elif recoding == "RY-nucleotide" :
51
- pathing = path .join (here , "../../recoding_tables/RY-nucleotide.txt" )
52
- elif recoding == "SandR-6" :
53
- pathing = path .join (here , "../../recoding_tables/S_and_R-6.txt" )
54
- elif recoding == "KGB-6" :
55
- pathing = path .join (here , "../../recoding_tables/KGB-6.txt" )
56
- elif recoding == "Dayhoff-6" :
57
- pathing = path .join (here , "../../recoding_tables/Dayhoff-6.txt" )
58
- elif recoding == "Dayhoff-9" :
59
- pathing = path .join (here , "../../recoding_tables/Dayhoff-9.txt" )
60
- elif recoding == "Dayhoff-12" :
61
- pathing = path .join (here , "../../recoding_tables/Dayhoff-12.txt" )
62
- elif recoding == "Dayhoff-15" :
63
- pathing = path .join (here , "../../recoding_tables/Dayhoff-15.txt" )
64
- elif recoding == "Dayhoff-18" :
65
- pathing = path .join (here , "../../recoding_tables/Dayhoff-18.txt" )
66
- # handling case of a custom translation table
67
- else :
68
- pathing = str (recoding )
69
-
70
- with open (pathing ) as code :
71
- for line in code :
72
- line = line .split ()
73
- if line [1 ].upper () in recoding_table .keys ():
74
- recoding_table [line [1 ]].upper ().append (line [0 ].upper ())
75
- else :
76
- recoding_table [line [1 ]] = line [0 ].upper ()
61
+
62
+ recoding_paths = {
63
+ "RY-nucleotide" : "../../recoding_tables/RY-nucleotide.txt" ,
64
+ "SandR-6" : "../../recoding_tables/S_and_R-6.txt" ,
65
+ "KGB-6" : "../../recoding_tables/KGB-6.txt" ,
66
+ "Dayhoff-6" : "../../recoding_tables/Dayhoff-6.txt" ,
67
+ "Dayhoff-9" : "../../recoding_tables/Dayhoff-9.txt" ,
68
+ "Dayhoff-12" : "../../recoding_tables/Dayhoff-12.txt" ,
69
+ "Dayhoff-15" : "../../recoding_tables/Dayhoff-15.txt" ,
70
+ "Dayhoff-18" : "../../recoding_tables/Dayhoff-18.txt" ,
71
+ }
72
+ pathing = recoding_paths .get (recoding , str (recoding ))
73
+
74
+ try :
75
+ with open (path .join (here , pathing )) as code :
76
+ for line in code :
77
+ parts = line .split ()
78
+ recoding_table [parts [1 ].upper ()] = parts [0 ].upper ()
79
+ except FileNotFoundError :
80
+ print (f"Recoding table file '{ pathing } ' not found." )
81
+ sys .exit ()
77
82
78
83
return recoding_table
79
84
0 commit comments