-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathtrain_models.py
129 lines (96 loc) · 4.63 KB
/
train_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import sys
import json
from gbdt_uncertainty.data import load_regression_dataset
from gbdt_uncertainty.training import tune_parameters_regression, generate_ensemble_regression, tune_parameters_classification, generate_ensemble_classification, generate_rf_ensemble_regression, generate_rf_ensemble_classification
import os
mode = sys.argv[1]
def create_dir(name):
directory = os.path.dirname(name)
if not os.path.exists(name):
os.makedirs(name)
if mode == "regression":
try:
tuning = int(sys.argv[2])
except:
print("Tuning parameter is required: 1 if tuning is needed")
exit(0)
datasets = ["bostonHousing", "concrete", "energy", "kin8nm",
"naval-propulsion-plant", "power-plant", "protein-tertiary-structure",
"wine-quality-red", "yacht", "YearPredictionMSD"]
algorithms = ['sgb-fixed', 'sglb-fixed']
# for -fixed we do not tune sample rate and use 0.5 for sbf and 1. for sglb
for name in datasets:
print("dataset =", name)
if tuning == 1:
create_dir("results/params")
# Tune hyperparameters
print("tuning hyperparameters...")
X, y, index_train, index_test, n_splits = load_regression_dataset(name)
for alg in algorithms:
print(alg)
params = tune_parameters_regression(X, y, index_train,
index_test, n_splits, alg=alg)
with open("results/params/" + name + "_" + alg + '.json', 'w') as fp:
json.dump(params, fp)
# Training models
print("training models...")
create_dir("results/models")
for alg in algorithms:
print(alg)
X, y, index_train, index_test, n_splits = load_regression_dataset(name)
with open("results/params/" + name + "_" + alg + '.json', 'r') as fp:
params = json.load(fp)
generate_ensemble_regression(name, X, y, index_train, index_test,
n_splits, params, alg=alg)
print()
if mode == "classification":
try:
tuning = int(sys.argv[2])
except:
print("Tuning parameter is required: 1 if tuning is needed")
exit(0)
tuning = int(sys.argv[2])
datasets = ["adult", "amazon", "click", "internet",
"appetency", "churn", "upselling", "kick"]
algorithms = ['sgb-fixed', 'sglb-fixed'] # choose from ['sgb-fixed', 'sglb-fixed', 'sgb', 'sglb']
# for -fixed we do not tune sample rate and use 0.5 for sbf and 1. for sglb
for name in datasets:
print("dataset =", name)
if tuning == 1:
create_dir("results/params")
# Tune hyperparameters
print("tuning hyperparameters...")
for alg in algorithms:
print(alg)
params = tune_parameters_classification(name, alg=alg)
with open("results/params/" + name + "_" + alg + '.json', 'w') as fp:
json.dump(params, fp)
# Training all models
print("training models...")
create_dir("results/models")
for alg in algorithms:
print(alg)
with open("results/params/" + name + "_" + alg + '.json', 'r') as fp:
params = json.load(fp)
generate_ensemble_classification(name, params, alg=alg)
print()
if mode == "regression_rf":
datasets = ["bostonHousing", "concrete", "energy", "kin8nm",
"naval-propulsion-plant", "power-plant", "protein-tertiary-structure",
"wine-quality-red", "yacht", "YearPredictionMSD"]
create_dir("results/models")
for name in datasets:
print("dataset =", name)
# Training all models
print("training models...")
X, y, index_train, index_test, n_splits = load_regression_dataset(name)
generate_rf_ensemble_regression(name, X, y, index_train, index_test, n_splits)
if mode == "classification_rf":
datasets = ["adult", "amazon", "click", "internet",
"appetency", "churn", "upselling", "kick"]
create_dir("results/models")
for name in datasets:
print("dataset =", name)
# Training all models
print("training models...")
generate_rf_ensemble_classification(name)