Skip to content

Commit 2726188

Browse files
author
Naozumi Hiranuma
committed
speed improvement thanks to bcov 2
1 parent 723bbf5 commit 2726188

File tree

2 files changed

+92
-92
lines changed

2 files changed

+92
-92
lines changed

DeepAccNet-SILENT.py

100755100644
+21-71
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import time
1212
import pandas as pd
1313
import os
14-
import glob
1514

1615
from pyrosetta import *
1716
from pyrosetta.rosetta import *
@@ -76,14 +75,6 @@ def main():
7675
action="store_true",
7776
default=False,
7877
help="Run with bert features. Use extractBert.py to generate them. (Default: False)")
79-
80-
parser.add_argument("--features_only",
81-
action="store_true",
82-
help="Just dump features")
83-
84-
parser.add_argument("--prediction_only",
85-
action="store_true",
86-
help="Assumes stored features")
8778

8879
args = parser.parse_args()
8980

@@ -108,21 +99,6 @@ def main():
10899
return -1
109100

110101
if args.verbose: print("using", modelpath)
111-
112-
feature_folder = args.outfile + "_features/"
113-
114-
if ( args.features_only ):
115-
if ( not os.path.exists(feature_folder) ):
116-
os.mkdir(feature_folder)
117-
118-
if ( args.prediction_only ):
119-
if ( not os.path.exists(feature_folder)):
120-
print("--prediction_only: Features have not been generated. Run with --features_only first or remove this flag.")
121-
return -1
122-
123-
if ( args.features_only and args.prediction_only ):
124-
print("You can't specify both --features_only and --prediction_only at the same time.")
125-
return -1
126102

127103
##############################
128104
# Importing larger libraries #
@@ -131,75 +107,52 @@ def main():
131107
sys.path.insert(0, script_dir)
132108
import deepAccNet as dan
133109

134-
if ( not args.features_only ):
135-
model = dan.DeepAccNet(twobody_size = 49 if args.bert else 33)
136-
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
137-
checkpoint = torch.load(join(modelpath, "best.pkl"), map_location=device)
138-
model.load_state_dict(checkpoint["model_state_dict"])
139-
model.to(device)
140-
model.eval()
110+
model = dan.DeepAccNet(twobody_size = 49 if args.bert else 33)
111+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
112+
checkpoint = torch.load(join(modelpath, "best.pkl"), map_location=device)
113+
model.load_state_dict(checkpoint["model_state_dict"])
114+
model.to(device)
115+
model.eval()
141116

142117
#############################
143118
# Parse through silent file #
144119
#############################
145120

146-
# loading the silent like this allows us to get names without loading poses
147-
sfd_in = rosetta.core.io.silent.SilentFileData(rosetta.core.io.silent.SilentFileOptions())
148-
sfd_in.read_file(args.infile)
149-
names = sfd_in.tags()
121+
silent_files = utility.vector1_utility_file_FileName()
122+
for silent_file in basic.options.get_file_vector_option("in:file:silent"):
123+
silent_files.append(utility.file.FileName(args.infile))
124+
input_stream = core.import_pose.pose_stream.SilentFilePoseInputStream(args.infile)
150125

151126
# Open with append
152127
if not isfile(args.outfile) or args.reprocess:
153128
outfile = open(args.outfile, "w")
154129
if args.binder:
155-
outfile.write("global_lddt interface_lddt binder_lddt description\n")
130+
outfile.write("name, global_lddt, interface_lddt, binder_lddt\n")
156131
else:
157-
outfile.write("global_lddt description\n")
132+
outfile.write("name, global_lddt\n")
158133
done = []
159134
else:
160135
outfile = open(args.outfile, "a")
161-
done = pd.read_csv(args.outfile, sep="\s+")["description"].values
162-
136+
done = pd.read_csv(args.outfile)["name"].values
163137

164138
if args.savehidden != "" and not isdir(args.savehidden):
165139
os.mkdir(args.savehidden)
166140

167141
with torch.no_grad():
168142
# Parse through poses
169143
pose = core.pose.Pose()
170-
for name in names:
171-
144+
while input_stream.has_another_pose():
145+
146+
input_stream.fill_pose(pose)
147+
name = core.pose.tag_from_pose(pose)
172148
if name in done:
173149
print(name, "is already done.")
174150
continue
175-
176-
177151
print("Working on", name)
178152
per_sample_result = [name]
179-
feature_file = feature_folder + name
180-
181-
182153

183154
# This is where featurization happens
184-
if ( args.prediction_only ):
185-
try:
186-
features = np.load(feature_file + ".npz")
187-
except:
188-
print("Unable to load features for " + name)
189-
continue
190-
else:
191-
if ( args.features_only and os.path.exists( feature_file + ".npz" )):
192-
print(name, "is already done.")
193-
continue
194-
195-
sfd_in.get_structure(name).fill_pose(pose)
196-
features = dan.process_from_pose(pose)
197-
features['blen'] = np.array(pose.conformation().chain_end(1) - pose.conformation().chain_begin(1) + 1)
198-
199-
if ( args.features_only ):
200-
np.savez(feature_file, **features)
201-
continue
202-
155+
features = dan.process_from_pose(pose)
203156

204157
# This is where prediction happens
205158
# For the whole
@@ -226,7 +179,7 @@ def main():
226179
if args.binder:
227180

228181
# Binder length
229-
blen = features['blen']
182+
blen = pose.conformation().chain_end(1) - pose.conformation().chain_begin(1) + 1
230183
plen = estogram.shape[-1]
231184
if blen==plen:
232185
continue
@@ -257,15 +210,12 @@ def main():
257210
# Write the result
258211
if args.binder:
259212
r = per_sample_result
260-
outfile.write("%5f %5f %5f %s\n"%(r[1], r[2], r[3], r[0]))
213+
outfile.write("%s, %5f, %5f, %5f\n"%(r[0], r[1], r[2], r[3]))
261214
else:
262215
r = per_sample_result
263-
outfile.write("%5f %s\n"%(r[1], r[0]))
216+
outfile.write("%s, %5f\n"%(r[0], r[1]))
264217
outfile.flush()
265218
os.fsync(outfile.fileno())
266-
267-
if ( args.prediction_only ):
268-
os.remove(feature_file + ".npz")
269219

270220
outfile.close()
271221

scripts/DeepAccNet-SILENT-old.py DeepAccNet-SILENT_v2.py

+71-21
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import time
1212
import pandas as pd
1313
import os
14+
import glob
1415

1516
from pyrosetta import *
1617
from pyrosetta.rosetta import *
@@ -75,6 +76,14 @@ def main():
7576
action="store_true",
7677
default=False,
7778
help="Run with bert features. Use extractBert.py to generate them. (Default: False)")
79+
80+
parser.add_argument("--features_only",
81+
action="store_true",
82+
help="Just dump features")
83+
84+
parser.add_argument("--prediction_only",
85+
action="store_true",
86+
help="Assumes stored features")
7887

7988
args = parser.parse_args()
8089

@@ -99,6 +108,21 @@ def main():
99108
return -1
100109

101110
if args.verbose: print("using", modelpath)
111+
112+
feature_folder = args.outfile + "_features/"
113+
114+
if ( args.features_only ):
115+
if ( not os.path.exists(feature_folder) ):
116+
os.mkdir(feature_folder)
117+
118+
if ( args.prediction_only ):
119+
if ( not os.path.exists(feature_folder)):
120+
print("--prediction_only: Features have not been generated. Run with --features_only first or remove this flag.")
121+
return -1
122+
123+
if ( args.features_only and args.prediction_only ):
124+
print("You can't specify both --features_only and --prediction_only at the same time.")
125+
return -1
102126

103127
##############################
104128
# Importing larger libraries #
@@ -107,52 +131,75 @@ def main():
107131
sys.path.insert(0, script_dir)
108132
import deepAccNet as dan
109133

110-
model = dan.DeepAccNet(twobody_size = 49 if args.bert else 33)
111-
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
112-
checkpoint = torch.load(join(modelpath, "best.pkl"), map_location=device)
113-
model.load_state_dict(checkpoint["model_state_dict"])
114-
model.to(device)
115-
model.eval()
134+
if ( not args.features_only ):
135+
model = dan.DeepAccNet(twobody_size = 49 if args.bert else 33)
136+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
137+
checkpoint = torch.load(join(modelpath, "best.pkl"), map_location=device)
138+
model.load_state_dict(checkpoint["model_state_dict"])
139+
model.to(device)
140+
model.eval()
116141

117142
#############################
118143
# Parse through silent file #
119144
#############################
120145

121-
silent_files = utility.vector1_utility_file_FileName()
122-
for silent_file in basic.options.get_file_vector_option("in:file:silent"):
123-
silent_files.append(utility.file.FileName(args.infile))
124-
input_stream = core.import_pose.pose_stream.SilentFilePoseInputStream(args.infile)
146+
# loading the silent like this allows us to get names without loading poses
147+
sfd_in = rosetta.core.io.silent.SilentFileData(rosetta.core.io.silent.SilentFileOptions())
148+
sfd_in.read_file(args.infile)
149+
names = sfd_in.tags()
125150

126151
# Open with append
127152
if not isfile(args.outfile) or args.reprocess:
128153
outfile = open(args.outfile, "w")
129154
if args.binder:
130-
outfile.write("name, global_lddt, interface_lddt, binder_lddt\n")
155+
outfile.write("global_lddt interface_lddt binder_lddt description\n")
131156
else:
132-
outfile.write("name, global_lddt\n")
157+
outfile.write("global_lddt description\n")
133158
done = []
134159
else:
135160
outfile = open(args.outfile, "a")
136-
done = pd.read_csv(args.outfile)["name"].values
161+
done = pd.read_csv(args.outfile, sep="\s+")["description"].values
162+
137163

138164
if args.savehidden != "" and not isdir(args.savehidden):
139165
os.mkdir(args.savehidden)
140166

141167
with torch.no_grad():
142168
# Parse through poses
143169
pose = core.pose.Pose()
144-
while input_stream.has_another_pose():
145-
146-
input_stream.fill_pose(pose)
147-
name = core.pose.tag_from_pose(pose)
170+
for name in names:
171+
148172
if name in done:
149173
print(name, "is already done.")
150174
continue
175+
176+
151177
print("Working on", name)
152178
per_sample_result = [name]
179+
feature_file = feature_folder + name
180+
181+
153182

154183
# This is where featurization happens
155-
features = dan.process_from_pose(pose)
184+
if ( args.prediction_only ):
185+
try:
186+
features = np.load(feature_file + ".npz")
187+
except:
188+
print("Unable to load features for " + name)
189+
continue
190+
else:
191+
if ( args.features_only and os.path.exists( feature_file + ".npz" )):
192+
print(name, "is already done.")
193+
continue
194+
195+
sfd_in.get_structure(name).fill_pose(pose)
196+
features = dan.process_from_pose(pose)
197+
features['blen'] = np.array(pose.conformation().chain_end(1) - pose.conformation().chain_begin(1) + 1)
198+
199+
if ( args.features_only ):
200+
np.savez(feature_file, **features)
201+
continue
202+
156203

157204
# This is where prediction happens
158205
# For the whole
@@ -179,7 +226,7 @@ def main():
179226
if args.binder:
180227

181228
# Binder length
182-
blen = pose.conformation().chain_end(1) - pose.conformation().chain_begin(1) + 1
229+
blen = features['blen']
183230
plen = estogram.shape[-1]
184231
if blen==plen:
185232
continue
@@ -210,12 +257,15 @@ def main():
210257
# Write the result
211258
if args.binder:
212259
r = per_sample_result
213-
outfile.write("%s, %5f, %5f, %5f\n"%(r[0], r[1], r[2], r[3]))
260+
outfile.write("%5f %5f %5f %s\n"%(r[1], r[2], r[3], r[0]))
214261
else:
215262
r = per_sample_result
216-
outfile.write("%s, %5f\n"%(r[0], r[1]))
263+
outfile.write("%5f %s\n"%(r[1], r[0]))
217264
outfile.flush()
218265
os.fsync(outfile.fileno())
266+
267+
if ( args.prediction_only ):
268+
os.remove(feature_file + ".npz")
219269

220270
outfile.close()
221271

0 commit comments

Comments
 (0)