-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreproduce-logreg.py
executable file
·48 lines (39 loc) · 1.6 KB
/
reproduce-logreg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
from multiprocessing import Lock
import argparse
from utils import read_json_log, read_telecom_churn, run_pool,\
evaluate_logreg_parameters, log_json
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Reproduce recorded parameters')
parser.add_argument('--input-log', required=True)
parser.add_argument('--output-log', required=True)
parser.add_argument('--processes', type=int, default=1)
parser.add_argument('--chunksize', type=int, default=10)
args = parser.parse_args()
log_lock = Lock()
X_train, X_val, y_train, y_val, folds = read_telecom_churn()
input_log = read_json_log(args.input_log)
def generator():
for _, row in input_log.iterrows():
parameters = row.filter(regex='^param_')\
.rename(lambda x: x.replace('param_', ''))\
.to_dict()
yield (row['name'],
row['experiment_id'],
parameters)
def evaluator(experiment):
name, experiment_id, parameters = experiment
log_data = {}
log_data['name'] = name
log_data['experiment_id'] = experiment_id
log_data.update({'param_' + k: v for k, v in parameters.items()})
metrics = evaluate_logreg_parameters(
parameters,
X_train=X_train, X_val=X_val, y_train=y_train, y_val=y_val,
folds=folds)
metrics['success'] = True
log_data.update(metrics)
with log_lock:
log_json(args.output_log, log_data)
run_pool(generator(), args, evaluator)