-
Notifications
You must be signed in to change notification settings - Fork 0
/
ML_Train.py
42 lines (36 loc) · 1.42 KB
/
ML_Train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# evaluate multioutput regression model with k-fold cross-validation
from numpy import absolute
from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
# create datasets
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=2, random_state=1, noise=0.5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
print(X_train.shape, y_train.shape)
# define model
model = LinearRegression()
# define the evaluation procedure
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate the model and collect the scores
n_scores = cross_val_score(
model, X_train, y_train, cv=cv, n_jobs=-1,
#scoring='neg_mean_absolute_error'
)
# force the scores to be positive
#n_scores = absolute(n_scores)
# summarize performance
print(n_scores.mean())
print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
# Re-train the model
model.fit(X_train, y_train)
# Evaluate the model on test dataset
print(model.score(X_test, y_test))
# make a prediction
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = model.predict([row])
# summarize prediction
print(yhat[0])