-
Notifications
You must be signed in to change notification settings - Fork 3
/
model_selector.py
106 lines (86 loc) · 2.88 KB
/
model_selector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#! /usr/bin/python3
import numpy as np
import matplotlib
matplotlib.use("agg")
# import matplotlib.pyplot as plt # noqa: E402
from scipy import optimize as opt # noqa: E402
import error_functions as erf # noqa: E402
import models as mod # noqa: E402
# ERROR_FUNCTION = erf.rms
ERROR_FUNCTION = erf.mae
# ERROR_FUNCTION = erf.maxd
# ERROR_FUNCTION = erf.smr
def loss_fun(p, x, y, eval_fun, error_fun):
predictions = eval_fun(p, x)
deviations = predictions - y
error = error_fun(deviations)
return error
def split_data(x=None, y=None, extrapolate=False):
n_train = int(y.size * .7)
if extrapolate:
# Extrapolation
x_train = x[:n_train]
x_test = x[n_train:]
y_train = y[:n_train]
y_test = y[n_train:]
else:
# Interpolation
i_data = np.cumsum(np.ones(x.size), dtype=np.int) - 1
i_train = np.sort(
np.random.choice(i_data, size=n_train, replace=False))
i_test = np.setdiff1d(i_data, i_train)
x_train = x[i_train]
x_test = x[i_test]
y_train = y[i_train]
y_test = y[i_test]
return x_train, y_train, x_test, y_test
def compare_models(x_train, y_train, x_test, y_test):
"""
Fit a data set with a variety of models and figure out which fits best.
"""
training_errors = []
testing_errors = []
models = mod.all_models
for model in models:
res = train(model, x_train, y_train, n_iter=model.n_iter_default)
p_final = res.x
training_errors.append(loss_fun(
p_final, x_train, y_train, model.evaluate, ERROR_FUNCTION))
testing_errors.append(loss_fun(
p_final, x_test, y_test, model.evaluate, ERROR_FUNCTION))
return models, training_errors, testing_errors
def train(model, x_train, y_train, n_iter=3):
best_res = None
best_loss = 1e10
for _ in range(n_iter):
# The arguments that will get passed to the error function,
# in addition to the model parameters of the current iteration.
error_fun_args = (x_train, y_train, model.evaluate, ERROR_FUNCTION)
p_initial = model.initial_guess(x=x_train, y=y_train)
# Confusingly the `x0` argument is not a request for the x values of
# the data points or for the 0th element of a list. It is for the
# initial guess for the parameter values.
res = opt.minimize(
fun=loss_fun,
x0=p_initial,
method="Nelder-Mead",
args=error_fun_args,
)
loss = loss_fun(
res.x,
x_train,
y_train,
model.evaluate,
ERROR_FUNCTION,
)
if loss < best_loss:
best_loss = loss
best_res = res
return best_res
def test():
"""
Check whether the code is doing what it should.
"""
compare_models()
if __name__ == "__main__":
test()