Skip to content

Commit

Permalink
Release
Browse files Browse the repository at this point in the history
  • Loading branch information
cerlymarco committed Jul 21, 2022
1 parent 6c6a581 commit 6b76c61
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 12 deletions.
16 changes: 9 additions & 7 deletions lineartree/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _parallel_binning_fit(split_feat, _self, X, y,
weights=weights[mask], **largs_right)
wloss_right = loss_right * (weights[mask].sum() / weights.sum())

total_loss = wloss_left + wloss_right
total_loss = round(wloss_left + wloss_right, 5)

# store if best
if total_loss < loss:
Expand Down Expand Up @@ -214,15 +214,16 @@ class _LinearTree(BaseEstimator):
"""
def __init__(self, base_estimator, *, criterion, max_depth,
min_samples_split, min_samples_leaf, max_bins,
categorical_features, split_features,
linear_features, n_jobs):
min_impurity_decrease, categorical_features,
split_features, linear_features, n_jobs):

self.base_estimator = base_estimator
self.criterion = criterion
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.min_samples_leaf = min_samples_leaf
self.max_bins = max_bins
self.min_impurity_decrease = min_impurity_decrease
self.categorical_features = categorical_features
self.split_features = split_features
self.linear_features = linear_features
Expand Down Expand Up @@ -295,7 +296,7 @@ def _split(self, X, y, bins,

# select best results
_id_best = np.argmin(_losses)
if _losses[_id_best] < loss:
if loss - _losses[_id_best] > self.min_impurity_decrease:
split_t = split_t[_id_best]
split_col = split_col[_id_best]
left_node = left_node[_id_best]
Expand Down Expand Up @@ -362,6 +363,7 @@ def _grow(self, X, y, weights=None):
loss = CRITERIA[self.criterion](
model, X[:, self._linear_features], y,
weights=weights, **largs)
loss = round(loss, 5)

self._nodes[''] = Node(
id=0,
Expand Down Expand Up @@ -651,8 +653,8 @@ def summary(self, feature_names=None, only_leaves=False, max_depth=None):

summary[N.id] = {
'col': feature_names[Cl.threshold[-1][0]],
'th': round(Cl.threshold[-1][-1], 4),
'loss': round(Cl.w_loss + Cr.w_loss, 4),
'th': round(Cl.threshold[-1][-1], 5),
'loss': round(Cl.w_loss + Cr.w_loss, 5),
'samples': Cl.n_samples + Cr.n_samples,
'children': (Cl.id, Cr.id),
'models': (Cl.model, Cr.model)
Expand All @@ -664,7 +666,7 @@ def summary(self, feature_names=None, only_leaves=False, max_depth=None):
continue

summary[L.id] = {
'loss': round(L.loss, 4),
'loss': round(L.loss, 5),
'samples': L.n_samples,
'models': L.model
}
Expand Down
18 changes: 14 additions & 4 deletions lineartree/lineartree.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ class LinearTreeRegressor(_LinearTree, RegressorMixin):
``max_bins`` bins. Must be lower than 120 and larger than 10.
A higher value implies a higher training time.
min_impurity_decrease : float, default=0.0
A node will be split if this split induces a decrease of the impurity
greater than or equal to this value.
categorical_features : int or array-like of int, default=None
Indicates the categorical features.
All categorical indices must be in `[0, n_features)`.
Expand Down Expand Up @@ -119,15 +123,16 @@ class LinearTreeRegressor(_LinearTree, RegressorMixin):
"""
def __init__(self, base_estimator, *, criterion='mse', max_depth=5,
min_samples_split=6, min_samples_leaf=0.1, max_bins=25,
categorical_features=None, split_features=None,
linear_features=None, n_jobs=None):
min_impurity_decrease=0.0, categorical_features=None,
split_features=None, linear_features=None, n_jobs=None):

self.base_estimator = base_estimator
self.criterion = criterion
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.min_samples_leaf = min_samples_leaf
self.max_bins = max_bins
self.min_impurity_decrease = min_impurity_decrease
self.categorical_features = categorical_features
self.split_features = split_features
self.linear_features = linear_features
Expand Down Expand Up @@ -281,6 +286,10 @@ class LinearTreeClassifier(_LinearTree, ClassifierMixin):
``max_bins`` bins. Must be lower than 120 and larger than 10.
A higher value implies a higher training time.
min_impurity_decrease : float, default=0.0
A node will be split if this split induces a decrease of the impurity
greater than or equal to this value.
categorical_features : int or array-like of int, default=None
Indicates the categorical features.
All categorical indices must be in `[0, n_features)`.
Expand Down Expand Up @@ -341,15 +350,16 @@ class LinearTreeClassifier(_LinearTree, ClassifierMixin):
"""
def __init__(self, base_estimator, *, criterion='hamming', max_depth=5,
min_samples_split=6, min_samples_leaf=0.1, max_bins=25,
categorical_features=None, split_features=None,
linear_features=None, n_jobs=None):
min_impurity_decrease=0.0, categorical_features=None,
split_features=None, linear_features=None, n_jobs=None):

self.base_estimator = base_estimator
self.criterion = criterion
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.min_samples_leaf = min_samples_leaf
self.max_bins = max_bins
self.min_impurity_decrease = min_impurity_decrease
self.categorical_features = categorical_features
self.split_features = split_features
self.linear_features = linear_features
Expand Down
8 changes: 8 additions & 0 deletions notebooks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ class lineartree.LinearTreeRegressor(base_estimator, *, criterion = 'mse', max_d

The maximum number of bins to use to search the optimal split in each feature. Features with a small number of unique values may use less than ``max_bins`` bins. Must be lower than 120 and larger than 10.
A higher value implies a higher training time.

- ```min_impurity_decrease : float, default=0.0```

A node will be split if this split induces a decrease of the impurity greater than or equal to this value.

- ```categorical_features : int or array-like of int, default=None```

Expand Down Expand Up @@ -279,6 +283,10 @@ class lineartree.LinearTreeClassifier(base_estimator, *, criterion = 'hamming',

The maximum number of bins to use to search the optimal split in each feature. Features with a small number of unique values may use less than ``max_bins`` bins. Must be lower than 120 and larger than 10.
A higher value implies a higher training time.

- ```min_impurity_decrease : float, default=0.0```

A node will be split if this split induces a decrease of the impurity greater than or equal to this value.

- ```categorical_features : int or array-like of int, default=None```

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

HERE = pathlib.Path(__file__).parent

VERSION = '0.3.3'
VERSION = '0.3.4'
PACKAGE_NAME = 'linear-tree'
AUTHOR = 'Marco Cerliani'
AUTHOR_EMAIL = '[email protected]'
Expand Down

0 comments on commit 6b76c61

Please sign in to comment.