Skip to content

Commit

Permalink
Merge branch 'scikit-learn:main' into fork
Browse files Browse the repository at this point in the history
  • Loading branch information
adam2392 authored Mar 30, 2023
2 parents 73a2176 + c3bfe86 commit f8d4697
Show file tree
Hide file tree
Showing 13 changed files with 236 additions and 33 deletions.
6 changes: 3 additions & 3 deletions doc/modules/classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ Plotting

.. autosummary::
:toctree: generated/
:template: class.rst
:template: display_only_from_estimator.rst

inspection.DecisionBoundaryDisplay
inspection.PartialDependenceDisplay
Expand Down Expand Up @@ -1122,7 +1122,7 @@ See the :ref:`visualizations` section of the user guide for further details.

.. autosummary::
:toctree: generated/
:template: class.rst
:template: display.rst

metrics.ConfusionMatrixDisplay
metrics.DetCurveDisplay
Expand Down Expand Up @@ -1242,7 +1242,7 @@ Visualization

.. autosummary::
:toctree: generated/
:template: class.rst
:template: display_only_from_estimator.rst

model_selection.LearningCurveDisplay

Expand Down
14 changes: 14 additions & 0 deletions doc/templates/display_all_class_methods.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
:mod:`{{module}}`.{{objname}}
{{ underline }}==============

.. currentmodule:: {{ module }}

.. autoclass:: {{ objname }}

.. include:: {{module}}.{{objname}}.examples
.. include:: {{module}}.{{objname}}.from_estimator.examples
.. include:: {{module}}.{{objname}}.from_predictions.examples

.. raw:: html

<div class="clearer"></div>
13 changes: 13 additions & 0 deletions doc/templates/display_only_from_estimator.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
:mod:`{{module}}`.{{objname}}
{{ underline }}==============

.. currentmodule:: {{ module }}

.. autoclass:: {{ objname }}

.. include:: {{module}}.{{objname}}.examples
.. include:: {{module}}.{{objname}}.from_estimator.examples

.. raw:: html

<div class="clearer"></div>
6 changes: 6 additions & 0 deletions doc/whats_new/v1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ Changelog
curves.
:pr:`24668` by :user:`dberenbaum`.

- |Enhancement| :meth:`metrics.RocCurveDisplay.from_estimator` and
:meth:`metrics.RocCurveDisplay.from_predictions` now accept two new keywords,
`plot_chance_level` and `chance_level_kw` to plot the baseline chance
level. This line is exposed in the `chance_level_` attribute.
:pr:`25987` by :user:`Yao Xiao <Charlie-XIAO>`.

- |Fix| :func:`log_loss` raises a warning if the values of the parameter `y_pred` are
not normalized, instead of actually normalizing them in the metric. Starting from
1.5 this will raise an error. :pr:`25299` by :user:`Omar Salman <OmarManzoor`.
Expand Down
10 changes: 5 additions & 5 deletions examples/inspection/plot_partial_dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@
# features and individual conditional expectation (ICE).
#
# Multi-layer perceptron
# """"""""""""""""""""""
# ~~~~~~~~~~~~~~~~~~~~~~
#
# Let's fit a :class:`~sklearn.neural_network.MLPRegressor` and compute
# single-variable partial dependence plots.
Expand Down Expand Up @@ -278,7 +278,7 @@

# %%
# Gradient boosting
# """""""""""""""""
# ~~~~~~~~~~~~~~~~~
#
# Let's now fit a :class:`~sklearn.ensemble.HistGradientBoostingRegressor` and
# compute the partial dependence on the same features. We also use the
Expand Down Expand Up @@ -330,7 +330,7 @@

# %%
# Analysis of the plots
# """""""""""""""""""""
# ~~~~~~~~~~~~~~~~~~~~~
#
# We will first look at the PDPs for the numerical features. For both models, the
# general trend of the PDP of the temperature is that the number of bike rentals is
Expand All @@ -352,7 +352,7 @@
# synthetic samples if features are correlated.
#
# ICE vs. PDP
# """""""""""
# ~~~~~~~~~~~
# PDP is an average of the marginal effects of the features. We are averaging the
# response of all samples of the provided set. Thus, some effects could be hidden. In
# this regard, it is possible to plot each individual response. This representation is
Expand Down Expand Up @@ -521,7 +521,7 @@

# %%
# 3D representation
# """""""""""""""""
# ~~~~~~~~~~~~~~~~~
#
# Let's make the same partial dependence plot for the 2 features interaction,
# this time in 3 dimensions.
Expand Down
12 changes: 7 additions & 5 deletions examples/miscellaneous/plot_outlier_detection_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,12 @@ def compute_prediction(X, model_name):
pos_label = 0 # mean 0 belongs to positive class
rows = math.ceil(len(datasets_name) / cols)

fig, axs = plt.subplots(rows, cols, figsize=(10, rows * 3))
fig, axs = plt.subplots(rows, cols, figsize=(10, rows * 3), sharex=True, sharey=True)

for i, dataset_name in enumerate(datasets_name):
(X, y) = preprocess_dataset(dataset_name=dataset_name)

for model_name in models_name:
for model_idx, model_name in enumerate(models_name):
y_pred = compute_prediction(X, model_name=model_name)
display = RocCurveDisplay.from_predictions(
y,
Expand All @@ -186,10 +186,12 @@ def compute_prediction(X, model_name):
name=model_name,
linewidth=linewidth,
ax=axs[i // cols, i % cols],
plot_chance_level=(model_idx == len(models_name) - 1),
chance_level_kw={
"linewidth": linewidth,
"linestyle": ":",
},
)
axs[i // cols, i % cols].plot([0, 1], [0, 1], linewidth=linewidth, linestyle=":")
axs[i // cols, i % cols].set_title(dataset_name)
axs[i // cols, i % cols].set_xlabel("False Positive Rate")
axs[i // cols, i % cols].set_ylabel("True Positive Rate")
plt.tight_layout(pad=2.0) # spacing between subplots
plt.show()
10 changes: 5 additions & 5 deletions examples/model_selection/plot_roc.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@
y_score[:, class_id],
name=f"{class_of_interest} vs the rest",
color="darkorange",
plot_chance_level=True,
)
plt.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
plt.axis("square")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
Expand Down Expand Up @@ -161,8 +161,8 @@
y_score.ravel(),
name="micro-average OvR",
color="darkorange",
plot_chance_level=True,
)
plt.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
plt.axis("square")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
Expand Down Expand Up @@ -281,9 +281,9 @@
name=f"ROC curve for {target_names[class_id]}",
color=color,
ax=ax,
plot_chance_level=(class_id == 2),
)

plt.plot([0, 1], [0, 1], "k--", label="ROC curve for chance level (AUC = 0.5)")
plt.axis("square")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
Expand Down Expand Up @@ -364,8 +364,8 @@
y_score[ab_mask, idx_b],
ax=ax,
name=f"{label_b} as positive class",
plot_chance_level=True,
)
plt.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
plt.axis("square")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
Expand Down Expand Up @@ -413,7 +413,7 @@
linestyle=":",
linewidth=4,
)
plt.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
plt.plot([0, 1], [0, 1], "k--", label="Chance level (AUC = 0.5)")
plt.axis("square")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
Expand Down
5 changes: 3 additions & 2 deletions examples/model_selection/plot_roc_crossval.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@
from sklearn.metrics import RocCurveDisplay
from sklearn.model_selection import StratifiedKFold

cv = StratifiedKFold(n_splits=6)
n_splits = 6
cv = StratifiedKFold(n_splits=n_splits)
classifier = svm.SVC(kernel="linear", probability=True, random_state=random_state)

tprs = []
Expand All @@ -88,12 +89,12 @@
alpha=0.3,
lw=1,
ax=ax,
plot_chance_level=(fold == n_splits - 1),
)
interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
interp_tpr[0] = 0.0
tprs.append(interp_tpr)
aucs.append(viz.roc_auc)
ax.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")

mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
Expand Down
44 changes: 34 additions & 10 deletions sklearn/datasets/_samples_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,18 @@ def make_moons(n_samples=100, *, shuffle=True, noise=None, random_state=None):
return X, y


@validate_params(
{
"n_samples": [Interval(Integral, 1, None, closed="left"), "array-like"],
"n_features": [Interval(Integral, 1, None, closed="left")],
"centers": [Interval(Integral, 1, None, closed="left"), "array-like", None],
"cluster_std": [Interval(Real, 0, None, closed="left"), "array-like"],
"center_box": [tuple],
"shuffle": ["boolean"],
"random_state": ["random_state"],
"return_centers": ["boolean"],
}
)
def make_blobs(
n_samples=100,
n_features=2,
Expand Down Expand Up @@ -884,7 +896,7 @@ def make_blobs(
n_features : int, default=2
The number of features for each sample.
centers : int or ndarray of shape (n_centers, n_features), default=None
centers : int or array-like of shape (n_centers, n_features), default=None
The number of centers to generate, or the fixed center locations.
If n_samples is an int and centers is None, 3 centers are generated.
If n_samples is array-like, centers must be
Expand Down Expand Up @@ -967,22 +979,19 @@ def make_blobs(
centers = generator.uniform(
center_box[0], center_box[1], size=(n_centers, n_features)
)
try:
assert len(centers) == n_centers
except TypeError as e:
if not isinstance(centers, Iterable):
raise ValueError(
"Parameter `centers` must be array-like. Got {!r} instead".format(
centers
)
) from e
except AssertionError as e:
)
if len(centers) != n_centers:
raise ValueError(
"Length of `n_samples` not consistent with number of "
f"centers. Got n_samples = {n_samples} and centers = {centers}"
) from e
else:
centers = check_array(centers)
n_features = centers.shape[1]
)
centers = check_array(centers)
n_features = centers.shape[1]

# stds: if cluster_std is given as list, it must be consistent
# with the n_centers
Expand Down Expand Up @@ -1627,6 +1636,14 @@ def make_sparse_spd_matrix(
return prec


@validate_params(
{
"n_samples": [Interval(Integral, 1, None, closed="left")],
"noise": [Interval(Real, 0, None, closed="left")],
"random_state": ["random_state"],
"hole": ["boolean"],
}
)
def make_swiss_roll(n_samples=100, *, noise=0.0, random_state=None, hole=False):
"""Generate a swiss roll dataset.
Expand Down Expand Up @@ -1692,6 +1709,13 @@ def make_swiss_roll(n_samples=100, *, noise=0.0, random_state=None, hole=False):
return X, t


@validate_params(
{
"n_samples": [Interval(Integral, 1, None, closed="left")],
"noise": [Interval(Real, 0, None, closed="left")],
"random_state": ["random_state"],
}
)
def make_s_curve(n_samples=100, *, noise=0.0, random_state=None):
"""Generate an S curve dataset.
Expand Down
Loading

0 comments on commit f8d4697

Please sign in to comment.