-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
381 lines (300 loc) · 14.6 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
does_config_dir=$(DOES_PROJECT_DIR)/doe-suite-config
does_results_dir=$(DOES_PROJECT_DIR)/doe-suite-results
# default output path for super etl
DOES_SUPERETL_OUT?=$(DOES_PROJECT_DIR)/doe-suite-results-super
out?=$(DOES_SUPERETL_OUT)
DOES_CLOUD?=aws
cloud?=$(DOES_CLOUD) # env variable with default (aws)
test_delay?=0
DOES_CLOUD_STATE?=terminate
state?=$(DOES_CLOUD_STATE) # env variable with default (terminate)
epoch=$(shell date +%s)
# if id=last -> need to find the last suite id (based on suite name)
_suite_id= $(shell [ $(id) = last ] && (cd $(does_results_dir) && find . -type d -regex './$(suite)_[0-9]+' | sort | tail -n 1 | grep -o '[0-9]*$$') || echo $(id))
suite_id= $(shell [ $(id) = new ] && echo $(epoch) || echo $(_suite_id))
ansible_inventory=$(does_results_dir)/$(suite)_$(suite_id)/.inventory
# add prefix if defined for playbook run cmd
ifdef expfilter
myexpfilter=expfilter=$(expfilter)
endif
# TODO [nku] would it be possible if suite not defined and required to show available suites and take number input?
ifdef suite
mysuite=--suite $(suite)
endif
ifdef id
myid=--id $(id)
endif
ifdef pipelines
mypipelines=--pipelines $(pipelines)
endif
ifdef custom-suite-id # custom-suite-id="<suite>=<id> <suite>=<id>"
mycustomsuiteid=--suite_id $(custom-suite-id)
endif
# on `make` and `make help` list all targets with information
help:
@echo 'Running Experiments'
@echo ' make run suite=<SUITE> id=new - run the experiments in the suite'
@echo ' make run suite=<SUITE> id=<ID> - continue with the experiments in the suite with <ID> (often id=last)'
@echo ' make run suite=<SUITE> id=<ID> cloud=<CLOUD> - run suite on non-default cloud ([aws], euler)'
@echo ' make run suite=<SUITE> id=<ID> expfilter=<REGEX> - run only subset of experiments in suite where name matches the <REGEX> (suite must be valid)'
@echo ' make run-keep suite=<SUITE> id=new - does not terminate instances at the end, otherwise works the same as run target'
@echo 'Clean'
@echo ' make clean - terminate running cloud instances belonging to the project and local cleanup'
@echo ' make clean-result - delete all inclomplete results in doe-suite-results'
@echo 'Running ETL Locally'
@echo ' make etl suite=<SUITE> id=<ID> - run the etl pipeline of the suite (locally) to process results (often id=last)'
@echo ' make etl-design suite=<SUITE> id=<ID> - same as `make etl ...` but uses the pipeline from the suite design instead of results'
@echo ' make etl-all - run etl pipelines of all results'
@echo ' make etl-super config=<CONFIG> out=<PATH> - run the super etl to combine results of multiple suites (for <CONFIG> e.g., demo_plots)'
@echo ' make etl-super ... pipelines="<P1> <P2>" - run only a subset of pipelines in the super etl'
@echo 'Clean ETL'
@echo ' make etl-clean suite=<SUITE> id=<ID> - delete etl results from specific suite (can be regenerated with make etl ...)'
@echo ' make etl-clean-all - delete etl results from all suites (can be regenerated with make etl-all)'
@echo 'Gather Information'
@echo ' make info - list available suite designs'
@echo ' make status suite=<SUITE> id=<ID> - show the status of a specific suite run (often id=last)'
@echo 'Design of Experiment Suites'
@echo ' make design suite=<SUITE> - list all the run commands defined by the suite'
@echo ' make design-validate suite=<SUITE> - validate suite design and show with default values'
@echo 'Setting up a Suite'
@echo ' make new - initialize doe-suite-config from a template'
@echo 'Running Tests'
@echo ' make test - running all suites (seq) and comparing results to expected (on aws)'
@echo ' make euler-test cloud=euler - running all single instance suites on euler and compare results to expected'
@echo ' make etl-test-all - re-run all etl pipelines and compare results to current state (useful after update of etl step)'
#################################
# ___ ___ _____ _ _ ___
# / __| __|_ _| | | | _ \
# \__ \ _| | | | |_| | _/
# |___/___| |_| \___/|_|
#
#################################
# https://patorjk.com/software/taag/#p=display&h=2&v=2&f=Small&t=SETUP
# initialize a doe-suite-config from cookiecutter template (use defaults + ask user for inputs)
new:
@if [ ! -f $(does_config_dir)/pyproject.toml ]; then \
poetry -C doespy install && \
poetry -C doespy run cookiecutter cookiecutter-doe-suite-config -o $(DOES_PROJECT_DIR); \
fi
# depending on`cloud` variable, check if connection can be established
cloud-check:
@if [ $(cloud) = aws ]; then aws sts get-caller-identity > /dev/null || aws configure; fi
@if [ $(cloud) = euler ]; then ping -c 1 -W 1 login.euler.ethz.ch > /dev/null || (echo "\nCannot reach login.euler.ethz.ch, are you connected to the ETHZ network, e.g., via VPN?" && exit 1); fi
# install dependenicies to ensure that suites can be run + etl
install: new
@cd $(does_config_dir) && \
poetry install && \
poetry run ansible-galaxy install -r $(PWD)/requirements-collections.yml
install-silent: new
@cd $(does_config_dir) && \
poetry install > /dev/null && \
poetry run ansible-galaxy install -r $(PWD)/requirements-collections.yml > /dev/null
#################################
# ___ _ _ _ _
# | _ \ | | | \| |
# | / |_| | .` |
# |_|_\\___/|_|\_|
#
#################################
# https://patorjk.com/software/taag/#p=display&h=2&v=2&f=Small&t=RUN
# run a all experiments in a suite
.PHONY: run
run: install cloud-check
@cd $(does_config_dir) && \
ANSIBLE_CONFIG=$(PWD)/ansible.cfg \
ANSIBLE_INVENTORY=$(ansible_inventory) \
poetry run ansible-playbook $(PWD)/src/experiment-suite.yml -e "suite=$(suite) id=$(id) epoch=$(epoch) cloud=$(cloud) $(myexpfilter)"
.PHONY: run
run-keep: install cloud-check
@cd $(does_config_dir) && \
ANSIBLE_CONFIG=$(PWD)/ansible.cfg \
ANSIBLE_INVENTORY=$(ansible_inventory) \
poetry run ansible-playbook $(PWD)/src/experiment-suite.yml -e "suite=$(suite) id=$(id) epoch=$(epoch) cloud=$(cloud) $(myexpfilter) awsclean=False"
# TODO [nku] integrate them into run target when they are available
#
#run-keep: state=keep
#run-keep: run
#
#run-stop: state=stop
#run-stop: run
#
#run-terminate: state=terminate
#run-terminate: run
#################################
# ___ _____ _
# | __|_ _| |
# | _| | | | |__
# |___| |_| |____|
#
#################################
# https://patorjk.com/software/taag/#p=display&h=2&v=2&f=Small&t=ETL
# run etl pipelines from a specific suite run
.PHONY: etl
etl: install
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/etl/etl.py --suite $(suite) --id $(id)
# can be used for remote debugging with e.g., vs code
etl-debug: install
@cd $(does_config_dir) && \
poetry run python -m debugpy --listen 5678 --wait-for-client $(PWD)/doespy/doespy/etl/etl.py --suite $(suite) --id $(id)
# instead of using the etl pipeline defined in the results folder, it uses the pipeline from the design
# useful for developing an etl pipeline
etl-design: install
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/etl/etl.py --suite $(suite) --id $(id) --load_from_design
# run etl pipelines for all available results
etl-all: install
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/etl/etl.py --all
# run the etl pipelines defined in the doe-suite-config/super_etl/$(config)
# write the etl results into $(out)
# e.g., make etl-super config=demo_plots out=/home/kuenico/dev/doe-suite/tmp
etl-super: install
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/etl/super_etl.py --config $(config) --output_path $(out) $(mypipelines) $(mycustomsuiteid)
etl-super-debug: install
@cd $(does_config_dir) && \
poetry run python -m debugpy --listen 5678 --wait-for-client $(PWD)/doespy/doespy/etl/super_etl.py --config $(config) --output_path $(out) $(mypipelines)
# delete etl results for a specific `suite` and `id` (can be regenerated with `make etl suite=<SUITE> id=<ID>`)
etl-clean: install
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/etl/etl_clean.py --suite $(suite) --id $(id)
# delete all etl results (can be regenerated with `make etl-all`)
etl-clean-all: install
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/etl/etl_clean.py --all
# reruns all etl pipelines of all suite runs and compares the results
# -> can be used to check that a change in an ETL step did not break another earlier suite
etl-test-all: install
@cd $(does_config_dir) && \
poetry run pytest $(PWD)/doespy -q -k 'test_etl_pipeline' -s
#################################
# ___ _ ___ _ _ _
# / __| | | __| /_\ | \| |
# | (__| |__| _| / _ \| .` |
# \___|____|___/_/ \_\_|\_|
#
#################################
# https://patorjk.com/software/taag/#p=display&h=2&v=2&f=Small&t=CLEAN
# delete surplus python files: https://gist.github.com/lumengxi/0ae4645124cd4066f676
clean-local-py:
@find . -name '*.pyc' -exec rm -f {} +
@find . -name '*.pyo' -exec rm -f {} +
@find . -name '*~' -exec rm -f {} +
@find . -name '__pycache__' -exec rm -fr {} +
@find . -name '.pytest_cache' -exec rm -fr {} +
# delete all incomplete results
clean-result: install
@echo -n "Are you sure to delete all the incomplete results in $(does_results_dir)? [y/N] " && read ans && [ $${ans:-N} = y ]
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/result_clean.py --incomplete
# only keep one suite run per suite (the last complete)
clean-result-full: install
@echo -n "Are you sure to delete all the results in $(does_results_dir) except for the ones with the highest id? [y/N] " && read ans && [ $${ans:-N} = y ]
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/result_clean.py --keeplast
clean-cloud: install
@cd $(does_config_dir) && \
ANSIBLE_CONFIG=$(PWD)/ansible.cfg \
poetry run ansible-playbook $(PWD)/src/clear.yml -e "cloud=$(cloud)"
.PHONY: confirm
confirm:
@echo -n 'Are you sure? [y/N] ' && read ans && [ $${ans:-N} = y ]
clean-docker:
@echo "Terminating all doe-suite related docker containers..."
@if $(MAKE) confirm ; then $(MAKE) clean-cloud cloud=docker; else echo "skipping clean-docker"; fi
clean-aws:
@echo "Terminating all doe-suite related aws ec2 instances (+vpc)..."
@if $(MAKE) confirm ; then $(MAKE) clean-cloud cloud=aws; else echo "skipping clean-aws"; fi
clean: clean-local-py clean-docker clean-aws
#################################
# ___ _ _ ___ ___
# |_ _| \| | __/ _ \
# | || .` | _| (_) |
# |___|_|\_|_| \___/
#
#################################
# https://patorjk.com/software/taag/#p=display&h=2&v=2&f=Small&t=INFO
# list infos about the doe-suite-config (config + designs)
info: install-silent
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/info.py
# show status info of a suite (how much progress)
status: install-silent
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/status.py $(mysuite) $(myid)
#################################
# _____ ___ ___ _____
# |_ _| __/ __|_ _|
# | | | _|\__ \ | |
# |_| |___|___/ |_|
#
#################################
# https://patorjk.com/software/taag/#p=display&h=2&v=2&f=Small&t=TEST
# compares the result directory from a suite run, with the expected result directory (ignoring some run-specific aspects)
rescomp: install
@cd $(does_config_dir) && \
poetry run pytest $(PWD)/doespy -q -k 'test_does_results' -s --suite $(suite) --id $(id)
# Uses GNU parallel to execute the tests:
testid?=$(epoch)
does_test_run_dir=$(DOES_PROJECT_DIR)/doe-suite-tests/$@_$(testid)
PARGS = --progress --jobs 4 --delay 8s --joblog $(does_test_run_dir)/progress.log --results $(does_test_run_dir)/
PARALLEL = mkdir -p /$(does_test_run_dir) && parallel $(PARGS)
E2ETEST = echo "Running doe-suite e2e test: \n cloud= $${TEST_CLOUD}\n suites= $${TEST_SUITES}\n results= $(does_test_run_dir) \n"; $(PARALLEL) make test-{} cloud=$${TEST_CLOUD} ::: $${TEST_SUITES} ; cat $(does_test_run_dir)/progress.log
docker-minimal-test:
@TEST_SUITES="example01-minimal example02-single"; TEST_CLOUD="docker" ;\
$(E2ETEST)
docker-mini-test:
@TEST_SUITES="example01-minimal example02-single example05-complex"; TEST_CLOUD="docker" ;\
$(E2ETEST)
docker-test:
@TEST_SUITES="example01-minimal example02-single example03-format example04-multi example06-vars example07-etl example08-superetl example05-complex"; TEST_CLOUD="docker" ;\
$(E2ETEST)
aws-mini-test:
@TEST_SUITES="example01-minimal example02-single example05-complex"; TEST_CLOUD="aws" ;\
$(E2ETEST)
aws-test:
@TEST_SUITES="example01-minimal example02-single example03-format example04-multi example05-complex example06-vars example07-etl example08-superetl"; TEST_CLOUD="aws" ;\
$(E2ETEST)
euler-mini-test:
@TEST_SUITES="example01-minimal example02-single"; TEST_CLOUD="euler" ;\
$(E2ETEST)
euler-test:
@TEST_SUITES="example01-minimal example02-single example03-format example06-vars example07-etl example08-superetl"; TEST_CLOUD="euler" ;\
$(E2ETEST)
# runs the suite and then compares the results
test-%:
@$(MAKE) run suite=$* id=new
@$(MAKE) rescomp suite=$* id=last
# convert a results dir to the expected results dir
convert-to-expected:
@echo -n "Are you sure to make the results in $(does_results_dir)/$(suite)_$(id) to the expected results of the suite? [y/N] " && read ans && [ $${ans:-N} = y ]
cd $(does_results_dir)/$(suite)_$(id) && \
find . -type f -exec sed -i 's/$(id)/$$expected/g' {} +
mv $(does_results_dir)/$(suite)_$(id) $(does_results_dir)/$(suite)_\$$expected
#################################
# ___ ___ ___ ___ ___ _ _
# | \| __/ __|_ _/ __| \| |
# | |) | _|\__ \| | (_ | .` |
# |___/|___|___/___\___|_|\_|
#
#################################
design: install-silent
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/design/validate_extend.py --suite $(suite) --ignore-undefined-vars
design-validate: install-silent
@cd $(does_config_dir) && \
poetry run python $(PWD)/doespy/doespy/design/validate_extend.py --suite $(suite) --ignore-undefined-vars --only-validate
#################################
# ___ ___ ___ ___
# | \ / _ \ / __/ __|
# | |) | (_) | (__\__ \
# |___/ \___/ \___|___/
#
#################################
docs-build: install
@cd doespy && \
poetry install && \
poetry run make html -C ../docs
docs: docs-build
@open docs/build/html/index.html