Merge branch 'scikit-learn:main' into submodulev3

neurodata · Aug 24, 2023 · 4424f98 · 4424f98
2 parents a4a7122 + a9611d0
commit 4424f98
Show file tree

Hide file tree

Showing 46 changed files with 1,041 additions and 557 deletions.
diff --git a/doc/install.rst b/doc/install.rst
@@ -69,42 +69,65 @@ Then run:
 
 .. raw:: html
 
-       <div class="highlight"><pre><code
-        ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="">python3 -m venv sklearn-venv</span
-        ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="">python -m venv sklearn-venv</span
-        ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="">python -m venv sklearn-venv</span
-        ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="">source sklearn-venv/bin/activate</span
-        ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="">source sklearn-venv/bin/activate</span
-        ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="">sklearn-venv\Scripts\activate</span
-        ><span class="sk-expandable" data-packager="pip" data-venv="">pip install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">pip install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">pip install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">pip3 install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="conda">conda create -n sklearn-env -c conda-forge scikit-learn</span
-        ><span class="sk-expandable" data-packager="conda">conda activate sklearn-env</span
-       ></code></pre></div>
+  <div class="highlight">
+    <pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no"
+    ><span class="prompt1">pip3 install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no"
+    ><span class="prompt1">pip install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no"
+    ><span class="prompt1">pip install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv=""
+    ><span class="prompt1">python3 -m venv sklearn-venv</span>
+  <span class="prompt1">source sklearn-venv/bin/activate</span>
+  <span class="prompt1">pip3 install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv=""
+    ><span class="prompt1">python -m venv sklearn-venv</span>
+  <span class="prompt1">sklearn-venv\Scripts\activate</span>
+  <span class="prompt1">pip install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv=""
+    ><span class="prompt1">python -m venv sklearn-venv</span>
+  <span class="prompt1">source sklearn-venv/bin/activate</span>
+  <span class="prompt1">pip install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="conda"
+    ><span class="prompt1">conda create -n sklearn-env -c conda-forge scikit-learn</span>
+  <span class="prompt1">conda activate sklearn-env</span></pre>
+  </div>
 
 In order to check your installation you can use
 
 .. raw:: html
 
-   <div class="highlight"><pre><code
-      ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span
-      ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -m pip freeze  # to see all packages installed in the active virtualenv</span
-      ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -c "import sklearn; sklearn.show_versions()"</span
-      ><span class="sk-expandable" data-packager="pip" data-venv="">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span
-      ><span class="sk-expandable" data-packager="pip" data-venv="">python -m pip freeze  # to see all packages installed in the active virtualenv</span
-      ><span class="sk-expandable" data-packager="pip" data-venv="">python -c "import sklearn; sklearn.show_versions()"</span
-      ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span
-      ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -m pip freeze  # to see all packages installed in the active virtualenv</span
-      ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -c "import sklearn; sklearn.show_versions()"</span
-      ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span
-      ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -m pip freeze  # to see all packages installed in the active virtualenv</span
-      ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -c "import sklearn; sklearn.show_versions()"</span
-      ><span class="sk-expandable" data-packager="conda">conda list scikit-learn  # to see which scikit-learn version is installed</span
-      ><span class="sk-expandable" data-packager="conda">conda list  # to see all packages installed in the active conda environment</span
-      ><span class="sk-expandable" data-packager="conda">python -c "import sklearn; sklearn.show_versions()"</span
-      ></code></pre></div>
+  <div class="highlight">
+    <pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no"
+    ><span class="prompt1">python3 -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span>
+  <span class="prompt1">python3 -m pip freeze  # to see all packages installed in the active virtualenv</span>
+  <span class="prompt1">python3 -c "import sklearn; sklearn.show_versions()"</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no"
+    ><span class="prompt1">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span>
+  <span class="prompt1">python -m pip freeze  # to see all packages installed in the active virtualenv</span>
+  <span class="prompt1">python -c "import sklearn; sklearn.show_versions()"</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no"
+    ><span class="prompt1">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span>
+  <span class="prompt1">python -m pip freeze  # to see all packages installed in the active virtualenv</span>
+  <span class="prompt1">python -c "import sklearn; sklearn.show_versions()"</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-venv=""
+    ><span class="prompt1">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span>
+  <span class="prompt1">python -m pip freeze  # to see all packages installed in the active virtualenv</span>
+  <span class="prompt1">python -c "import sklearn; sklearn.show_versions()"</span></pre>
+
+    <pre class="sk-expandable" data-packager="conda"
+    ><span class="prompt1">conda list scikit-learn  # to see which scikit-learn version is installed</span>
+  <span class="prompt1">conda list  # to see all packages installed in the active conda environment</span>
+  <span class="prompt1">python -c "import sklearn; sklearn.show_versions()"</span></pre>
   </div>
 
 Note that in order to avoid potential conflicts with other packages it is

diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
@@ -96,6 +96,7 @@ Estimators
 - :class:`decomposition.PCA` (with `svd_solver="full"`,
   `svd_solver="randomized"` and `power_iteration_normalizer="QR"`)
 - :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`)
+- :class:`preprocessing.MaxAbsScaler`
 - :class:`preprocessing.MinMaxScaler`
 
 Tools

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -951,16 +951,17 @@ The objective for the optimization becomes
 
 Where :math:`[P]` represents the Iverson bracket which evaluates to :math:`0`
 if :math:`P` is false, otherwise it evaluates to :math:`1`. We currently provide four choices
-for the regularization term :math:`r(W)` via the `penalty` argument:
+for the regularization term :math:`r(W)` via the `penalty` argument, where :math:`m`
+is the number of features:
 
 +----------------+----------------------------------------------------------------------------------+
 | penalty        | :math:`r(W)`                                                                     |
 +================+==================================================================================+
 | `None`         | :math:`0`                                                                        |
 +----------------+----------------------------------------------------------------------------------+
-| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^n\sum_{j=1}^{K}|W_{i,j}|`                        |
+| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|`                        |
 +----------------+----------------------------------------------------------------------------------+
-| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^n\sum_{j=1}^{K} W_{i,j}^2`   |
+| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2`   |
 +----------------+----------------------------------------------------------------------------------+
 | `ElasticNet`   | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}`                           |
 +----------------+----------------------------------------------------------------------------------+

diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
@@ -10,9 +10,10 @@ The ``sklearn.preprocessing`` package provides several common
 utility functions and transformer classes to change raw feature vectors
 into a representation that is more suitable for the downstream estimators.
 
-In general, learning algorithms benefit from standardization of the data set. If
-some outliers are present in the set, robust scalers or transformers are more
-appropriate. The behaviors of the different scalers, transformers, and
+In general, many learning algorithms such as linear models benefit from standardization of the data set
+(see :ref:`sphx_glr_auto_examples_preprocessing_plot_scaling_importance.py`).
+If some outliers are present in the set, robust scalers or other transformers can
+be more appropriate. The behaviors of the different scalers, transformers, and
 normalizers on a dataset containing marginal outliers is highlighted in
 :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
 

diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -1204,25 +1204,33 @@ div.install > input:checked + label {
 .sk-expandable {
   display: none;
 }
-
-div.highlight span.sk-expandable:before {
-  content: "$ ";
+.sk-expandable + .copybtn {
+  display: none;
 }
 
 /* Show hidden content when the checkbox is checked */
 /* for conda */
 #quickstart-conda:checked  ~* [data-packager="conda"] {
   display: block;
 }
+#quickstart-conda:checked  ~* [data-packager="conda"] + .copybtn {
+  display: block;
+}
 
 #quickstart-conda:checked ~ #quickstart-venv ~ label[for="quickstart-venv"] {
   display: none;
 }
+#quickstart-conda:checked ~ #quickstart-venv ~ label[for="quickstart-venv"] + .copybtn {
+  display: none;
+}
 
 /* for pip */
 #quickstart-pip:checked ~* [data-packager="pip"] {
   display: block;
 }
+#quickstart-pip:checked ~* [data-packager="pip"] + .copybtn {
+  display: block;
+}
 
 #quickstart-pip:checked ~ label[for="quickstart-venv"]:before  {
   content: "Use pip virtualenv";
@@ -1231,20 +1239,37 @@ div.highlight span.sk-expandable:before {
 #quickstart-win:not(:checked) ~* [data-os="windows"] {
   display: none;
 }
+#quickstart-win:not(:checked) ~* [data-os="windows"] + .copybtn {
+  display: none;
+}
+
 #quickstart-lin:not(:checked) ~* [data-os="linux"] {
   display: none;
 }
+#quickstart-lin:not(:checked) ~* [data-os="linux"] + .copybtn {
+  display: none;
+}
+
 #quickstart-mac:not(:checked) ~* [data-os="mac"] {
   display: none;
 }
+#quickstart-mac:not(:checked) ~* [data-os="mac"] + .copybtn {
+  display: none;
+}
 
 #quickstart-venv:not(:checked) ~* [data-venv=""] {
   display: none;
 }
+#quickstart-venv:not(:checked) ~* [data-venv=""] + .copybtn {
+  display: none;
+}
 
 #quickstart-venv:checked ~* [data-venv="no"] {
   display: none;
 }
+#quickstart-venv:checked ~* [data-venv="no"] + .copybtn {
+  display: none;
+}
 
 /* Algorithm cheet-sheet */
 

diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -465,7 +465,7 @@ Linear kernel
 
     >>> svc = svm.SVC(kernel='linear')
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 Polynomial kernel
@@ -477,7 +477,7 @@ Polynomial kernel
     ...               degree=3)
     >>> # degree: polynomial degree
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 RBF kernel (Radial Basis Function)
@@ -489,7 +489,17 @@ RBF kernel (Radial Basis Function)
     >>> # gamma: inverse of size of
     >>> # radial kernel
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_004.png
+   :target: ../../auto_examples/svm/plot_svm_kernels.html
+
+Sigmoid kernel
+^^^^^^^^^^^^^^
+
+::
+
+    >>> svc = svm.SVC(kernel='sigmoid')
+
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_005.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
@@ -115,7 +115,7 @@ Changelog
   `full` and `randomized` solvers (with QR power iterations). See
   :ref:`array_api` for more details.
   :pr:`26315` and :pr:`27098` by :user:`Mateusz Sokół <mtsokol>`,
-  :user:`Olivier Grisel <ogrisel>` and :user:` Edoardo Abati <EdAbati>`.
+  :user:`Olivier Grisel <ogrisel>` and :user:`Edoardo Abati <EdAbati>`.
 
 - |Enhancement| :func:`decomposition.non_negative_factorization`, :class:`decomposition.NMF`,
   and :class:`decomposition.MiniBatchNMF` now support :class:`scipy.sparse.sparray`
@@ -212,11 +212,11 @@ Changelog
   when `sparse_output=True` and the output is configured to be pandas.
   :pr:`26931` by `Thomas Fan`_.
 
-- |MajorFeature| :class:`preprocessing.MinMaxScaler` now
+- |MajorFeature| :class:`preprocessing.MinMaxScaler` and :class:`preprocessing.MaxAbsScaler` now
   supports the `Array API <https://data-apis.org/array-api/latest/>`_. Array API
   support is considered experimental and might evolve without being subject to
   our usual rolling deprecation cycle policy. See
-  :ref:`array_api` for more details. :pr:`26243` by `Tim Head`_.
+  :ref:`array_api` for more details. :pr:`26243` by `Tim Head`_ and :pr:`27110` by :user:`Edoardo Abati <EdAbati>`.
 
 :mod:`sklearn.tree`
 ...................
@@ -251,6 +251,12 @@ Changelog
   and outputs distances in the same dtype as the provided data.
   :pr:`27006` by :user:`Meekail Zain <micky774>`
 
+- |Enhancement| Improve the rendering of the plot obtained with the
+  :class:`metrics.PrecisionRecallDisplay` and :class:`metrics.RocCurveDisplay`
+  classes. the x- and y-axis limits are set to [0, 1] and the aspect ratio between
+  both axis is set to be 1 to get a square plot.
+  :pr:`26366` by :user:`Mojdeh Rastgoo <mrastgoo>`.
+
 :mod:`sklearn.utils`
 ....................
 

diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py
@@ -3,7 +3,7 @@
 Classifier comparison
 =====================
 
-A comparison of a several classifiers in scikit-learn on synthetic datasets.
+A comparison of several classifiers in scikit-learn on synthetic datasets.
 The point of this example is to illustrate the nature of decision boundaries
 of different classifiers.
 This should be taken with a grain of salt, as the intuition conveyed by

diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py
@@ -35,17 +35,19 @@
 from sklearn.neighbors import kneighbors_graph
 from sklearn.preprocessing import StandardScaler
 
-np.random.seed(0)
-
 # ============
 # Generate datasets. We choose the size big enough to see the scalability
 # of the algorithms, but not too big to avoid too long running times
 # ============
 n_samples = 500
-noisy_circles = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05)
-noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05)
-blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
-no_structure = np.random.rand(n_samples, 2), None
+seed = 30
+noisy_circles = datasets.make_circles(
+    n_samples=n_samples, factor=0.5, noise=0.05, random_state=seed
+)
+noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05, random_state=seed)
+blobs = datasets.make_blobs(n_samples=n_samples, random_state=seed)
+rng = np.random.RandomState(seed)
+no_structure = rng.rand(n_samples, 2), None
 
 # Anisotropicly distributed data
 random_state = 170
@@ -82,6 +84,7 @@
     "allow_single_cluster": True,
     "hdbscan_min_cluster_size": 15,
     "hdbscan_min_samples": 3,
+    "random_state": 42,
 }
 
 datasets = [
@@ -154,14 +157,19 @@
     # Create cluster objects
     # ============
     ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
-    two_means = cluster.MiniBatchKMeans(n_clusters=params["n_clusters"], n_init="auto")
+    two_means = cluster.MiniBatchKMeans(
+        n_clusters=params["n_clusters"],
+        n_init="auto",
+        random_state=params["random_state"],
+    )
     ward = cluster.AgglomerativeClustering(
         n_clusters=params["n_clusters"], linkage="ward", connectivity=connectivity
     )
     spectral = cluster.SpectralClustering(
         n_clusters=params["n_clusters"],
         eigen_solver="arpack",
         affinity="nearest_neighbors",
+        random_state=params["random_state"],
     )
     dbscan = cluster.DBSCAN(eps=params["eps"])
     hdbscan = cluster.HDBSCAN(
@@ -175,7 +183,9 @@
         min_cluster_size=params["min_cluster_size"],
     )
     affinity_propagation = cluster.AffinityPropagation(
-        damping=params["damping"], preference=params["preference"], random_state=0
+        damping=params["damping"],
+        preference=params["preference"],
+        random_state=params["random_state"],
     )
     average_linkage = cluster.AgglomerativeClustering(
         linkage="average",
@@ -185,7 +195,9 @@
     )
     birch = cluster.Birch(n_clusters=params["n_clusters"])
     gmm = mixture.GaussianMixture(
-        n_components=params["n_clusters"], covariance_type="full"
+        n_components=params["n_clusters"],
+        covariance_type="full",
+        random_state=params["random_state"],
     )
 
     clustering_algorithms = (

diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py
@@ -145,7 +145,7 @@ def gbdt_apply(X, model):
 
 from sklearn.metrics import RocCurveDisplay
 
-fig, ax = plt.subplots()
+_, ax = plt.subplots()
 
 models = [
     ("RT embedding -> LR", rt_model),
@@ -163,7 +163,7 @@ def gbdt_apply(X, model):
 _ = ax.set_title("ROC curve")
 
 # %%
-fig, ax = plt.subplots()
+_, ax = plt.subplots()
 for name, pipeline in models:
     model_displays[name].plot(ax=ax)