diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index 5c94569734678..d771b8e3da9de 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -2481,9 +2481,15 @@ class labels (multi-output problem). dict, list, None, - ], + ] } _parameter_constraints.pop("splitter") + _parameter_constraints.pop("max_samples") + _parameter_constraints["max_samples"] = [ + None, + Interval(RealNotInt, 0.0, None, closed="right"), + Interval(Integral, 1, None, closed="left"), + ] def __init__( self, @@ -2509,7 +2515,9 @@ def __init__( max_samples=None, max_bins=None, store_leaf_values=False, - monotonic_cst=None + monotonic_cst=None, + stratify=False, + honest_prior="ignore" ): self.target_tree_kwargs = { "criterion": criterion, @@ -2528,7 +2536,9 @@ def __init__( super().__init__( estimator=HonestDecisionTree( target_tree_class=target_tree_class, - target_tree_kwargs=self.target_tree_kwargs + target_tree_kwargs=self.target_tree_kwargs, + stratify=stratify, + honest_prior=honest_prior ), n_estimators=n_estimators, estimator_params=( @@ -2572,6 +2582,8 @@ def __init__( self.monotonic_cst = monotonic_cst self.ccp_alpha = ccp_alpha self.target_tree_class = target_tree_class + self.stratify = stratify + self.honest_prior = honest_prior class RandomForestRegressor(ForestRegressor): diff --git a/sklearn/tree/_honest_tree.py b/sklearn/tree/_honest_tree.py index b0b5ddcde3839..a7a3d59d7b00b 100644 --- a/sklearn/tree/_honest_tree.py +++ b/sklearn/tree/_honest_tree.py @@ -24,9 +24,9 @@ class HonestDecisionTree(BaseDecisionTree): _parameter_constraints: dict = { **BaseDecisionTree._parameter_constraints, - "target_tree_class": [BaseDecisionTree], + "target_tree_class": "no_validation", "target_tree_kwargs": [dict], - "honest_fraction": [Interval(RealNotInt, 0.0, 1.0, closed="neither")], + "honest_fraction": [Interval(RealNotInt, 0.0, 1.0, closed="both")], "honest_prior": [StrOptions({"empirical", "uniform", "ignore"})], "stratify": ["boolean"], } @@ -221,7 +221,7 @@ def fit( # fingers crossed sklearn.utils.validation.check_is_fitted doesn't # change its behavior - print(f"n_classes = {target_bta.n_classes}") + #print(f"n_classes = {target_bta.n_classes}") self.tree_ = HonestTree( self.target_tree.n_features_in_, target_bta.n_classes, @@ -231,8 +231,8 @@ def fit( self.honesty.resize_tree(self.tree_, self.honesty.get_node_count()) self.tree_.node_count = self.honesty.get_node_count() - print(f"dishonest node count = {self.target_tree.tree_.node_count}") - print(f"honest node count = {self.tree_.node_count}") + #print(f"dishonest node count = {self.target_tree.tree_.node_count}") + #print(f"honest node count = {self.tree_.node_count}") criterion = BaseDecisionTree._create_criterion( self.target_tree, @@ -250,8 +250,8 @@ def fit( for i in range(self.honesty.get_node_count()): start, end = self.honesty.get_node_range(i) - print(f"setting sample range for node {i}: ({start}, {end})") - print(f"node {i} is leaf: {self.honesty.is_leaf(i)}") + #print(f"setting sample range for node {i}: ({start}, {end})") + #print(f"node {i} is leaf: {self.honesty.is_leaf(i)}") self.honesty.set_sample_pointers(criterion, start, end) if missing_values_in_feature_mask is not None: