Skip to content

Commit 119a2e8

Browse files
author
Naozumi Hiranuma
committed
adding some code
1 parent 968ff9c commit 119a2e8

38 files changed

+10024
-0
lines changed

data/analysis_list.npy

5.55 KB
Binary file not shown.

data/readme.txt

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ver1: 4000 datasets
2+
ver2: expansion to 800 datasets
3+
ver3: trimmed homology to refinment sets
4+
ver4: trimmed post casp13 released targets

data/test_proteins2.npy

5.98 KB
Binary file not shown.

data/test_proteins3.npy

5.96 KB
Binary file not shown.

data/test_proteins4.npy

5.93 KB
Binary file not shown.

data/train_proteins2.npy

145 KB
Binary file not shown.

data/train_proteins3.npy

144 KB
Binary file not shown.

data/train_proteins4.npy

141 KB
Binary file not shown.

data/valid_proteins2.npy

5.98 KB
Binary file not shown.

data/valid_proteins3.npy

5.98 KB
Binary file not shown.

data/valid_proteins4.npy

5.81 KB
Binary file not shown.

deepAccNet/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from .dataProcessingUtils import *
2+
from .pyprotein import *
3+
from .featurize import *
4+
from .resnet import *
5+
from .model import *
6+
from .model2 import *
7+
from .utils import *
8+
from .dataset import *
274 Bytes
Binary file not shown.
709 Bytes
Binary file not shown.
Binary file not shown.
5.93 KB
Binary file not shown.
16.4 KB
Binary file not shown.
4.51 KB
Binary file not shown.
7.74 KB
Binary file not shown.
3.85 KB
Binary file not shown.
2.97 KB
Binary file not shown.
2.89 KB
Binary file not shown.

deepAccNet/conversion.py

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import numpy as np
2+
3+
# 3LAA to tip atom conversion.
4+
dict_3LAA_to_tip = {"ALA":"CB", "CYS":"SG", "ASP":"CG", "ASN":"CG", "GLU":"CD",
5+
"GLN":"CD", "PHE":"CZ", "HIS":"NE2", "ILE":"CD1", "GLY":"CA",
6+
"LEU":"CG", "MET":"SD", "ARG":"CZ", "LYS":"NZ", "PRO":"CG",
7+
"VAL":"CB", "TYR":"OH", "TRP":"CH2", "SER":"OG", "THR":"OG1"}
8+
9+
# 1LAA to number conversion.
10+
aas = "ACDEFGHIKLMNPQRSTVWY-"
11+
dict_1LAA_to_num = dict([(aas[a], a) for a in range(len(aas))])
12+
13+
# 3LAA to 1LAA conversion.
14+
dict_3LAA_to_1LAA = {"ALA":"A", "CYS":"C", "ASP":"D", "ASN":"N", "GLU":"E",
15+
"GLN":"Q", "PHE":"F", "HIS":"H", "ILE":"I", "GLY":"G",
16+
"LEU":"L", "MET":"M", "ARG":"R", "LYS":"K", "PRO":"P",
17+
"VAL":"V", "TYR":"Y", "TRP":"W", "SER":"S", "THR":"T"}

deepAccNet/data/blosum62.txt

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
2+
A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
3+
R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
4+
N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
5+
D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
6+
C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
7+
Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
8+
E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
9+
G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
10+
H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
11+
I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
12+
L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
13+
K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
14+
M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
15+
F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
16+
P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
17+
S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
18+
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
19+
W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
20+
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
21+
V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
22+
B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
23+
Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
24+
X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
25+
* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1

deepAccNet/data/coarse-group20.txt

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
ALA_CA CAbb
2+
ALA_CB CH3
3+
ALA_C CObb
4+
ALA_N Nbb
5+
ALA_O OCbb
6+
ARG_CA CAbb
7+
ARG_CB CH2
8+
ARG_C CObb
9+
ARG_N Nbb
10+
ARG_O OCbb
11+
ASN_CA CAbb
12+
ASN_CB CH2
13+
ASN_C CObb
14+
ASN_N Nbb
15+
ASN_O OCbb
16+
ASP_CA CAbb
17+
ASP_CB CH2
18+
ASP_C CObb
19+
ASP_N Nbb
20+
ASP_O OCbb
21+
CYS_CA CAbb
22+
CYS_CB CH2
23+
CYS_C CObb
24+
CYS_N Nbb
25+
CYS_O OCbb
26+
GLN_CA CAbb
27+
GLN_CB CH2
28+
GLN_C CObb
29+
GLN_N Nbb
30+
GLN_O OCbb
31+
GLU_CA CAbb
32+
GLU_CB CH2
33+
GLU_C CObb
34+
GLU_N Nbb
35+
GLU_O OCbb
36+
GLY_CA CAbb
37+
GLY_C CObb
38+
GLY_N Nbb
39+
GLY_O OCbb
40+
HIS_CA CAbb
41+
HIS_CB CH2
42+
HIS_C CObb
43+
HIS_N Nbb
44+
HIS_O OCbb
45+
ILE_CA CAbb
46+
ILE_CB CH1
47+
ILE_C CObb
48+
ILE_N Nbb
49+
ILE_O OCbb
50+
LEU_CA CAbb
51+
LEU_CB CH2
52+
LEU_C CObb
53+
LEU_N Nbb
54+
LEU_O OCbb
55+
LYS_CA CAbb
56+
LYS_CB CH2
57+
LYS_C CObb
58+
LYS_N Nbb
59+
LYS_O OCbb
60+
MET_CA CAbb
61+
MET_CB CH2
62+
MET_C CObb
63+
MET_N Nbb
64+
MET_O OCbb
65+
PHE_CA CAbb
66+
PHE_CB CH2
67+
PHE_C CObb
68+
PHE_N Nbb
69+
PHE_O OCbb
70+
PRO_CA CAbb
71+
PRO_CB CH2
72+
PRO_C CObb
73+
PRO_N Npro
74+
PRO_O OCbb
75+
SER_CA CAbb
76+
SER_CB CH2
77+
SER_C CObb
78+
SER_N Nbb
79+
SER_O OCbb
80+
THR_CA CAbb
81+
THR_CB CH1
82+
THR_C CObb
83+
THR_N Nbb
84+
THR_O OCbb
85+
TRP_CA CAbb
86+
TRP_CB CH2
87+
TRP_C CObb
88+
TRP_N Nbb
89+
TRP_O OCbb
90+
TYR_CA CAbb
91+
TYR_CB CH2
92+
TYR_C CObb
93+
TYR_N Nbb
94+
TYR_O OCbb
95+
VAL_CA CAbb
96+
VAL_CB CH1
97+
VAL_C CObb
98+
VAL_N Nbb
99+
VAL_O OCbb

deepAccNet/data/groups20.txt

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
ALA_CA CAbb
2+
ALA_CB CH3
3+
ALA_C CObb
4+
ALA_N Nbb
5+
ALA_O OCbb
6+
ARG_CA CAbb
7+
ARG_CB CH2
8+
ARG_C CObb
9+
ARG_CD CH2
10+
ARG_CG CH2
11+
ARG_CZ aroC
12+
ARG_NE Narg
13+
ARG_NH1 Narg
14+
ARG_NH2 Narg
15+
ARG_N Nbb
16+
ARG_O OCbb
17+
ASN_CA CAbb
18+
ASN_CB CH2
19+
ASN_C CObb
20+
ASN_CG CNH2
21+
ASN_ND2 NH2O
22+
ASN_N Nbb
23+
ASN_OD1 ONH2
24+
ASN_O OCbb
25+
ASP_CA CAbb
26+
ASP_CB CH2
27+
ASP_C CObb
28+
ASP_CG COO
29+
ASP_N Nbb
30+
ASP_OD1 OOC
31+
ASP_OD2 OOC
32+
ASP_O OCbb
33+
CYS_CA CAbb
34+
CYS_CB CH2
35+
CYS_C CObb
36+
CYS_N Nbb
37+
CYS_O OCbb
38+
CYS_SG S
39+
GLN_CA CAbb
40+
GLN_CB CH2
41+
GLN_C CObb
42+
GLN_CD CNH2
43+
GLN_CG CH2
44+
GLN_NE2 NH2O
45+
GLN_N Nbb
46+
GLN_OE1 ONH2
47+
GLN_O OCbb
48+
GLU_CA CAbb
49+
GLU_CB CH2
50+
GLU_C CObb
51+
GLU_CD COO
52+
GLU_CG CH2
53+
GLU_N Nbb
54+
GLU_OE1 OOC
55+
GLU_OE2 OOC
56+
GLU_O OCbb
57+
GLY_CA CAbb
58+
GLY_C CObb
59+
GLY_N Nbb
60+
GLY_O OCbb
61+
HIS_CA CAbb
62+
HIS_CB CH2
63+
HIS_C CObb
64+
HIS_CD2 aroC
65+
HIS_CE1 aroC
66+
HIS_CG aroC
67+
HIS_ND1 Nhis
68+
HIS_NE2 Ntrp
69+
HIS_N Nbb
70+
HIS_O OCbb
71+
ILE_CA CAbb
72+
ILE_CB CH1
73+
ILE_C CObb
74+
ILE_CD1 CH3
75+
ILE_CG1 CH2
76+
ILE_CG2 CH3
77+
ILE_N Nbb
78+
ILE_O OCbb
79+
LEU_CA CAbb
80+
LEU_CB CH2
81+
LEU_C CObb
82+
LEU_CD1 CH3
83+
LEU_CD2 CH3
84+
LEU_CG CH1
85+
LEU_N Nbb
86+
LEU_O OCbb
87+
LYS_CA CAbb
88+
LYS_CB CH2
89+
LYS_C CObb
90+
LYS_CD CH2
91+
LYS_CE CH2
92+
LYS_CG CH2
93+
LYS_N Nbb
94+
LYS_NZ Nlys
95+
LYS_O OCbb
96+
MET_CA CAbb
97+
MET_CB CH2
98+
MET_C CObb
99+
MET_CE CH3
100+
MET_CG CH2
101+
MET_N Nbb
102+
MET_O OCbb
103+
MET_SD S
104+
PHE_CA CAbb
105+
PHE_CB CH2
106+
PHE_C CObb
107+
PHE_CD1 aroC
108+
PHE_CD2 aroC
109+
PHE_CE1 aroC
110+
PHE_CE2 aroC
111+
PHE_CG aroC
112+
PHE_CZ aroC
113+
PHE_N Nbb
114+
PHE_O OCbb
115+
PRO_CA CAbb
116+
PRO_CB CH2
117+
PRO_C CObb
118+
PRO_CD CH2
119+
PRO_CG CH2
120+
PRO_N Npro
121+
PRO_O OCbb
122+
SER_CA CAbb
123+
SER_CB CH2
124+
SER_C CObb
125+
SER_N Nbb
126+
SER_OG OH
127+
SER_O OCbb
128+
THR_CA CAbb
129+
THR_CB CH1
130+
THR_C CObb
131+
THR_CG2 CH3
132+
THR_N Nbb
133+
THR_OG1 OH
134+
THR_O OCbb
135+
TRP_CA CAbb
136+
TRP_CB CH2
137+
TRP_C CObb
138+
TRP_CD1 aroC
139+
TRP_CD2 aroC
140+
TRP_CE2 aroC
141+
TRP_CE3 aroC
142+
TRP_CG aroC
143+
TRP_CH2 aroC
144+
TRP_CZ2 aroC
145+
TRP_CZ3 aroC
146+
TRP_NE1 Ntrp
147+
TRP_N Nbb
148+
TRP_O OCbb
149+
TYR_CA CAbb
150+
TYR_CB CH2
151+
TYR_C CObb
152+
TYR_CD1 aroC
153+
TYR_CD2 aroC
154+
TYR_CE1 aroC
155+
TYR_CE2 aroC
156+
TYR_CG aroC
157+
TYR_CZ aroC
158+
TYR_N Nbb
159+
TYR_OH OH
160+
TYR_O OCbb
161+
VAL_CA CAbb
162+
VAL_CB CH1
163+
VAL_C CObb
164+
VAL_CG1 CH3
165+
VAL_CG2 CH3
166+
VAL_N Nbb
167+
VAL_O OCbb
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
residue,steric_parameter,polarizability,volume,hydrophobicity,isoelectric_pt,helix_prob,sheet_prob
2+
ALA,1.28,0.05,1.00,0.31,6.11,0.42,0.23
3+
GLY,0.00,0.00,0.00,0.00,6.07,0.13,0.15
4+
VAL,3.67,0.14,3.00,1.22,6.02,0.27,0.49
5+
LEU,2.59,0.19,4.00,1.70,6.04,0.39,0.31
6+
ILE,4.19,0.19,4.00,1.80,6.04,0.30,0.45
7+
PHE,2.94,0.29,5.89,1.79,5.67,0.30,0.38
8+
TYR,2.94,0.30,6.47,0.96,5.66,0.25,0.41
9+
TRP,3.21,0.41,8.08,2.25,5.94,0.32,0.42
10+
THR,3.03,0.11,2.60,0.26,5.60,0.21,0.36
11+
SER,1.31,0.06,1.60,-0.04,5.70,0.20,0.28
12+
ARG,2.34,0.29,6.13,-1.01,10.74,0.36,0.25
13+
LYS,1.89,0.22,4.77,-0.99,9.99,0.32,0.27
14+
HIS,2.99,0.23,4.66,0.13,7.69,0.27,0.30
15+
ASP,1.60,0.11,2.78,-0.77,2.95,0.25,0.20
16+
GLU,1.56,0.15,3.78,-0.64,3.09,0.42,0.21
17+
ASN,1.60,0.13,2.95,-0.60,6.52,0.21,0.22
18+
GLN,1.56,0.18,3.95,-0.22,5.65,0.36,0.25
19+
MET,2.35,0.22,4.43,1.23,5.71,0.38,0.32
20+
PRO,2.67,0.00,2.72,0.72,6.80,0.13,0.34
21+
CYS,1.77,0.13,2.43,1.54,6.35,0.17,0.41

0 commit comments

Comments
 (0)