-
Notifications
You must be signed in to change notification settings - Fork 3
/
parca.py
196 lines (183 loc) · 7.34 KB
/
parca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import argparse
import json
import os
import pickle
import shutil
import time
from ecoli.composites.ecoli_configs import CONFIG_DIR_PATH
from ecoli.experiments.ecoli_master_sim import SimConfig
from reconstruction.ecoli.knowledge_base_raw import KnowledgeBaseEcoli
from reconstruction.ecoli.fit_sim_data_1 import fitSimData_1
from validation.ecoli.validation_data_raw import ValidationDataRawEcoli
from validation.ecoli.validation_data import ValidationDataEcoli
from wholecell.utils import constants
import wholecell.utils.filepath as fp
def run_parca(config):
# Make output directory
kb_directory = fp.makedirs(config["outdir"], constants.KB_DIR)
raw_data_file = os.path.join(kb_directory, constants.SERIALIZED_RAW_DATA)
sim_data_file = os.path.join(kb_directory, constants.SERIALIZED_SIM_DATA_FILENAME)
raw_validation_data_file = os.path.join(
kb_directory, constants.SERIALIZED_RAW_VALIDATION_DATA
)
validation_data_file = os.path.join(
kb_directory, constants.SERIALIZED_VALIDATION_DATA
)
print(f"{time.ctime()}: Instantiating raw_data with operons={config['operons']}")
raw_data = KnowledgeBaseEcoli(
operons_on=config["operons"],
remove_rrna_operons=config["remove_rrna_operons"],
remove_rrff=config["remove_rrff"],
stable_rrna=config["stable_rrna"],
new_genes_option=config["new_genes"],
)
print(f"{time.ctime()}: Saving raw_data")
with open(raw_data_file, "wb") as f:
pickle.dump(raw_data, f)
print(f"{time.ctime()}: Instantiating sim_data with operons={config['operons']}")
sim_data = fitSimData_1(
raw_data=raw_data,
cpus=config["cpus"],
debug=config["debug_parca"],
load_intermediate=config["load_intermediate"],
save_intermediates=config["save_intermediates"],
intermediates_directory=config["intermediates_directory"],
variable_elongation_transcription=config["variable_elongation_transcription"],
variable_elongation_translation=config["variable_elongation_translation"],
disable_ribosome_capacity_fitting=(not config["ribosome_fitting"]),
disable_rnapoly_capacity_fitting=(not config["rnapoly_fitting"]),
)
print(f"{time.ctime()}: Saving sim_data")
with open(sim_data_file, "wb") as f:
pickle.dump(sim_data, f)
print(f"{time.ctime()}: Instantiating raw_validation_data")
raw_validation_data = ValidationDataRawEcoli()
print(f"{time.ctime()}: Saving raw_validation_data")
with open(raw_validation_data_file, "wb") as f:
pickle.dump(raw_validation_data, f)
print(f"{time.ctime()}: Instantiating validation_data")
validation_data = ValidationDataEcoli()
validation_data.initialize(raw_validation_data, raw_data)
print(f"{time.ctime()}: Saving validation_data")
with open(validation_data_file, "wb") as f:
pickle.dump(validation_data, f)
def main():
parser = argparse.ArgumentParser(description="run_parca")
default_config = os.path.join(CONFIG_DIR_PATH, "default.json")
parser.add_argument(
"--config",
action="store",
default=default_config,
help=(
"Path to configuration file for the simulation. "
"All key-value pairs in this file will be applied on top "
f"of the options defined in {default_config}."
),
)
parser.add_argument(
"-c",
"--cpus",
type=int,
help="The number of CPU processes to use. Default = 1.",
)
parser.add_argument(
"-o",
"--outdir",
type=str,
help="Directory to hold ParCa output kb folder. "
"Default = reconstruction/sim_data",
)
parser.add_argument(
"--operons",
action=argparse.BooleanOptionalAction,
help="Turn operons on (polycistronic).",
)
parser.add_argument(
"--ribosome-fitting",
action=argparse.BooleanOptionalAction,
help="Fit ribosome expression to protein synthesis demands.",
)
parser.add_argument(
"--rnapoly-fitting",
action=argparse.BooleanOptionalAction,
help="Fit RNA polymerase expression to protein synthesis demands.",
)
parser.add_argument(
"--remove-rrna-operons",
action="store_true",
help="Remove the seven rRNA operons. Does not have any effect if"
" --no-operons specified.",
)
parser.add_argument(
"--remove-rrff",
action="store_true",
help="Remove the rrfF gene. If operons are enabled,"
" removes the rrfF gene from the rrnD operon.",
)
parser.add_argument(
"--debug-parca",
action="store_true",
help="Make Parca calculate only one arbitrarily-chosen transcription"
" factor condition when adjusting gene expression levels, leaving"
" the other TFs at their input levels for faster Parca debugging."
" DO NOT USE THIS FOR A MEANINGFUL SIMULATION.",
)
parser.add_argument(
"--load-intermediate",
type=str,
help="The function in the parca to load (skips functions that would"
" have run before the function). Must run with --save-intermediates"
" first.",
)
parser.add_argument(
"--save-intermediates",
action="store_true",
help="If set, saves sim_data and cell_specs at intermediate"
" function calls in the parca.",
)
parser.add_argument(
"--intermediates-directory",
type=str,
help="Directory to save or load intermediate sim_data and cell_specs"
" results from if --load-intermediate or --save-intermediates"
" are set.",
)
parser.add_argument(
"--variable-elongation-transcription",
action=argparse.BooleanOptionalAction,
help="Use a different elongation rate for different transcripts"
" (currently increases rates for rRNA). Usually set this"
" consistently between runParca and runSim.",
)
parser.add_argument(
"--variable-elongation-translation",
action=argparse.BooleanOptionalAction,
help="Use a different elongation rate for different polypeptides"
" (currently increases rates for ribosomal proteins)."
" Usually set this consistently between runParca and runSim.",
)
config_file = os.path.join(CONFIG_DIR_PATH, "default.json")
args = parser.parse_args()
with open(config_file, "r") as f:
config = json.load(f)
if args.config is not None:
config_file = args.config
with open(os.path.join(args.config), "r") as f:
SimConfig.merge_config_dicts(config, json.load(f))
# ParCa options are defined under `parca_options` key in config JSON
# Merge these with CLI arguments, which take precedence
cli_options = {k: v for k, v in vars(args).items() if v is not None}
cli_options.pop("config")
parca_options = config.pop("parca_options")
SimConfig.merge_config_dicts(parca_options, cli_options)
# If config defines a sim_data_path, skip ParCa
if config["sim_data_path"] is not None:
out_kb = os.path.join(parca_options["outdir"], "kb")
if not os.path.exists(out_kb):
os.makedirs(out_kb)
print(f"{time.ctime()}: Skipping ParCa. Using {config['sim_data_path']}")
shutil.copy(config["sim_data_path"], out_kb)
else:
run_parca(parca_options)
if __name__ == "__main__":
main()