Skip to content

Commit

Permalink
Merge branch 'scikit-learn:main' into submodulev3
Browse files Browse the repository at this point in the history
  • Loading branch information
adam2392 authored Aug 24, 2023
2 parents a4a7122 + a9611d0 commit 4424f98
Show file tree
Hide file tree
Showing 46 changed files with 1,041 additions and 557 deletions.
85 changes: 54 additions & 31 deletions doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,42 +69,65 @@ Then run:

.. raw:: html

<div class="highlight"><pre><code
><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="">python3 -m venv sklearn-venv</span
><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="">python -m venv sklearn-venv</span
><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="">python -m venv sklearn-venv</span
><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="">source sklearn-venv/bin/activate</span
><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="">source sklearn-venv/bin/activate</span
><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="">sklearn-venv\Scripts\activate</span
><span class="sk-expandable" data-packager="pip" data-venv="">pip install -U scikit-learn</span
><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">pip install -U scikit-learn</span
><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">pip install -U scikit-learn</span
><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">pip3 install -U scikit-learn</span
><span class="sk-expandable" data-packager="conda">conda create -n sklearn-env -c conda-forge scikit-learn</span
><span class="sk-expandable" data-packager="conda">conda activate sklearn-env</span
></code></pre></div>
<div class="highlight">
<pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no"
><span class="prompt1">pip3 install -U scikit-learn</span></pre>

<pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no"
><span class="prompt1">pip install -U scikit-learn</span></pre>

<pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no"
><span class="prompt1">pip install -U scikit-learn</span></pre>

<pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv=""
><span class="prompt1">python3 -m venv sklearn-venv</span>
<span class="prompt1">source sklearn-venv/bin/activate</span>
<span class="prompt1">pip3 install -U scikit-learn</span></pre>

<pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv=""
><span class="prompt1">python -m venv sklearn-venv</span>
<span class="prompt1">sklearn-venv\Scripts\activate</span>
<span class="prompt1">pip install -U scikit-learn</span></pre>

<pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv=""
><span class="prompt1">python -m venv sklearn-venv</span>
<span class="prompt1">source sklearn-venv/bin/activate</span>
<span class="prompt1">pip install -U scikit-learn</span></pre>

<pre class="sk-expandable" data-packager="conda"
><span class="prompt1">conda create -n sklearn-env -c conda-forge scikit-learn</span>
<span class="prompt1">conda activate sklearn-env</span></pre>
</div>

In order to check your installation you can use

.. raw:: html

<div class="highlight"><pre><code
><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -m pip show scikit-learn # to see which version and where scikit-learn is installed</span
><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -m pip freeze # to see all packages installed in the active virtualenv</span
><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -c "import sklearn; sklearn.show_versions()"</span
><span class="sk-expandable" data-packager="pip" data-venv="">python -m pip show scikit-learn # to see which version and where scikit-learn is installed</span
><span class="sk-expandable" data-packager="pip" data-venv="">python -m pip freeze # to see all packages installed in the active virtualenv</span
><span class="sk-expandable" data-packager="pip" data-venv="">python -c "import sklearn; sklearn.show_versions()"</span
><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -m pip show scikit-learn # to see which version and where scikit-learn is installed</span
><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -m pip freeze # to see all packages installed in the active virtualenv</span
><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -c "import sklearn; sklearn.show_versions()"</span
><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -m pip show scikit-learn # to see which version and where scikit-learn is installed</span
><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -m pip freeze # to see all packages installed in the active virtualenv</span
><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -c "import sklearn; sklearn.show_versions()"</span
><span class="sk-expandable" data-packager="conda">conda list scikit-learn # to see which scikit-learn version is installed</span
><span class="sk-expandable" data-packager="conda">conda list # to see all packages installed in the active conda environment</span
><span class="sk-expandable" data-packager="conda">python -c "import sklearn; sklearn.show_versions()"</span
></code></pre></div>
<div class="highlight">
<pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no"
><span class="prompt1">python3 -m pip show scikit-learn # to see which version and where scikit-learn is installed</span>
<span class="prompt1">python3 -m pip freeze # to see all packages installed in the active virtualenv</span>
<span class="prompt1">python3 -c "import sklearn; sklearn.show_versions()"</span></pre>

<pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no"
><span class="prompt1">python -m pip show scikit-learn # to see which version and where scikit-learn is installed</span>
<span class="prompt1">python -m pip freeze # to see all packages installed in the active virtualenv</span>
<span class="prompt1">python -c "import sklearn; sklearn.show_versions()"</span></pre>

<pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no"
><span class="prompt1">python -m pip show scikit-learn # to see which version and where scikit-learn is installed</span>
<span class="prompt1">python -m pip freeze # to see all packages installed in the active virtualenv</span>
<span class="prompt1">python -c "import sklearn; sklearn.show_versions()"</span></pre>

<pre class="sk-expandable" data-packager="pip" data-venv=""
><span class="prompt1">python -m pip show scikit-learn # to see which version and where scikit-learn is installed</span>
<span class="prompt1">python -m pip freeze # to see all packages installed in the active virtualenv</span>
<span class="prompt1">python -c "import sklearn; sklearn.show_versions()"</span></pre>

<pre class="sk-expandable" data-packager="conda"
><span class="prompt1">conda list scikit-learn # to see which scikit-learn version is installed</span>
<span class="prompt1">conda list # to see all packages installed in the active conda environment</span>
<span class="prompt1">python -c "import sklearn; sklearn.show_versions()"</span></pre>
</div>

Note that in order to avoid potential conflicts with other packages it is
Expand Down
1 change: 1 addition & 0 deletions doc/modules/array_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ Estimators
- :class:`decomposition.PCA` (with `svd_solver="full"`,
`svd_solver="randomized"` and `power_iteration_normalizer="QR"`)
- :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`)
- :class:`preprocessing.MaxAbsScaler`
- :class:`preprocessing.MinMaxScaler`

Tools
Expand Down
7 changes: 4 additions & 3 deletions doc/modules/linear_model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -951,16 +951,17 @@ The objective for the optimization becomes

Where :math:`[P]` represents the Iverson bracket which evaluates to :math:`0`
if :math:`P` is false, otherwise it evaluates to :math:`1`. We currently provide four choices
for the regularization term :math:`r(W)` via the `penalty` argument:
for the regularization term :math:`r(W)` via the `penalty` argument, where :math:`m`
is the number of features:

+----------------+----------------------------------------------------------------------------------+
| penalty | :math:`r(W)` |
+================+==================================================================================+
| `None` | :math:`0` |
+----------------+----------------------------------------------------------------------------------+
| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^n\sum_{j=1}^{K}|W_{i,j}|` |
| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|` |
+----------------+----------------------------------------------------------------------------------+
| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^n\sum_{j=1}^{K} W_{i,j}^2` |
| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2` |
+----------------+----------------------------------------------------------------------------------+
| `ElasticNet` | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}` |
+----------------+----------------------------------------------------------------------------------+
Expand Down
7 changes: 4 additions & 3 deletions doc/modules/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ The ``sklearn.preprocessing`` package provides several common
utility functions and transformer classes to change raw feature vectors
into a representation that is more suitable for the downstream estimators.

In general, learning algorithms benefit from standardization of the data set. If
some outliers are present in the set, robust scalers or transformers are more
appropriate. The behaviors of the different scalers, transformers, and
In general, many learning algorithms such as linear models benefit from standardization of the data set
(see :ref:`sphx_glr_auto_examples_preprocessing_plot_scaling_importance.py`).
If some outliers are present in the set, robust scalers or other transformers can
be more appropriate. The behaviors of the different scalers, transformers, and
normalizers on a dataset containing marginal outliers is highlighted in
:ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.

Expand Down
31 changes: 28 additions & 3 deletions doc/themes/scikit-learn-modern/static/css/theme.css
Original file line number Diff line number Diff line change
Expand Up @@ -1204,25 +1204,33 @@ div.install > input:checked + label {
.sk-expandable {
display: none;
}

div.highlight span.sk-expandable:before {
content: "$ ";
.sk-expandable + .copybtn {
display: none;
}

/* Show hidden content when the checkbox is checked */
/* for conda */
#quickstart-conda:checked ~* [data-packager="conda"] {
display: block;
}
#quickstart-conda:checked ~* [data-packager="conda"] + .copybtn {
display: block;
}

#quickstart-conda:checked ~ #quickstart-venv ~ label[for="quickstart-venv"] {
display: none;
}
#quickstart-conda:checked ~ #quickstart-venv ~ label[for="quickstart-venv"] + .copybtn {
display: none;
}

/* for pip */
#quickstart-pip:checked ~* [data-packager="pip"] {
display: block;
}
#quickstart-pip:checked ~* [data-packager="pip"] + .copybtn {
display: block;
}

#quickstart-pip:checked ~ label[for="quickstart-venv"]:before {
content: "Use pip virtualenv";
Expand All @@ -1231,20 +1239,37 @@ div.highlight span.sk-expandable:before {
#quickstart-win:not(:checked) ~* [data-os="windows"] {
display: none;
}
#quickstart-win:not(:checked) ~* [data-os="windows"] + .copybtn {
display: none;
}

#quickstart-lin:not(:checked) ~* [data-os="linux"] {
display: none;
}
#quickstart-lin:not(:checked) ~* [data-os="linux"] + .copybtn {
display: none;
}

#quickstart-mac:not(:checked) ~* [data-os="mac"] {
display: none;
}
#quickstart-mac:not(:checked) ~* [data-os="mac"] + .copybtn {
display: none;
}

#quickstart-venv:not(:checked) ~* [data-venv=""] {
display: none;
}
#quickstart-venv:not(:checked) ~* [data-venv=""] + .copybtn {
display: none;
}

#quickstart-venv:checked ~* [data-venv="no"] {
display: none;
}
#quickstart-venv:checked ~* [data-venv="no"] + .copybtn {
display: none;
}

/* Algorithm cheet-sheet */

Expand Down
16 changes: 13 additions & 3 deletions doc/tutorial/statistical_inference/supervised_learning.rst
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ Linear kernel

>>> svc = svm.SVC(kernel='linear')

.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
:target: ../../auto_examples/svm/plot_svm_kernels.html

Polynomial kernel
Expand All @@ -477,7 +477,7 @@ Polynomial kernel
... degree=3)
>>> # degree: polynomial degree

.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
:target: ../../auto_examples/svm/plot_svm_kernels.html

RBF kernel (Radial Basis Function)
Expand All @@ -489,7 +489,17 @@ RBF kernel (Radial Basis Function)
>>> # gamma: inverse of size of
>>> # radial kernel

.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_004.png
:target: ../../auto_examples/svm/plot_svm_kernels.html

Sigmoid kernel
^^^^^^^^^^^^^^

::

>>> svc = svm.SVC(kernel='sigmoid')

.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_005.png
:target: ../../auto_examples/svm/plot_svm_kernels.html


Expand Down
12 changes: 9 additions & 3 deletions doc/whats_new/v1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ Changelog
`full` and `randomized` solvers (with QR power iterations). See
:ref:`array_api` for more details.
:pr:`26315` and :pr:`27098` by :user:`Mateusz Sokół <mtsokol>`,
:user:`Olivier Grisel <ogrisel>` and :user:` Edoardo Abati <EdAbati>`.
:user:`Olivier Grisel <ogrisel>` and :user:`Edoardo Abati <EdAbati>`.

- |Enhancement| :func:`decomposition.non_negative_factorization`, :class:`decomposition.NMF`,
and :class:`decomposition.MiniBatchNMF` now support :class:`scipy.sparse.sparray`
Expand Down Expand Up @@ -212,11 +212,11 @@ Changelog
when `sparse_output=True` and the output is configured to be pandas.
:pr:`26931` by `Thomas Fan`_.

- |MajorFeature| :class:`preprocessing.MinMaxScaler` now
- |MajorFeature| :class:`preprocessing.MinMaxScaler` and :class:`preprocessing.MaxAbsScaler` now
supports the `Array API <https://data-apis.org/array-api/latest/>`_. Array API
support is considered experimental and might evolve without being subject to
our usual rolling deprecation cycle policy. See
:ref:`array_api` for more details. :pr:`26243` by `Tim Head`_.
:ref:`array_api` for more details. :pr:`26243` by `Tim Head`_ and :pr:`27110` by :user:`Edoardo Abati <EdAbati>`.

:mod:`sklearn.tree`
...................
Expand Down Expand Up @@ -251,6 +251,12 @@ Changelog
and outputs distances in the same dtype as the provided data.
:pr:`27006` by :user:`Meekail Zain <micky774>`

- |Enhancement| Improve the rendering of the plot obtained with the
:class:`metrics.PrecisionRecallDisplay` and :class:`metrics.RocCurveDisplay`
classes. the x- and y-axis limits are set to [0, 1] and the aspect ratio between
both axis is set to be 1 to get a square plot.
:pr:`26366` by :user:`Mojdeh Rastgoo <mrastgoo>`.

:mod:`sklearn.utils`
....................

Expand Down
2 changes: 1 addition & 1 deletion examples/classification/plot_classifier_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Classifier comparison
=====================
A comparison of a several classifiers in scikit-learn on synthetic datasets.
A comparison of several classifiers in scikit-learn on synthetic datasets.
The point of this example is to illustrate the nature of decision boundaries
of different classifiers.
This should be taken with a grain of salt, as the intuition conveyed by
Expand Down
30 changes: 21 additions & 9 deletions examples/cluster/plot_cluster_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,19 @@
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler

np.random.seed(0)

# ============
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
# ============
n_samples = 500
noisy_circles = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05)
blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
no_structure = np.random.rand(n_samples, 2), None
seed = 30
noisy_circles = datasets.make_circles(
n_samples=n_samples, factor=0.5, noise=0.05, random_state=seed
)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05, random_state=seed)
blobs = datasets.make_blobs(n_samples=n_samples, random_state=seed)
rng = np.random.RandomState(seed)
no_structure = rng.rand(n_samples, 2), None

# Anisotropicly distributed data
random_state = 170
Expand Down Expand Up @@ -82,6 +84,7 @@
"allow_single_cluster": True,
"hdbscan_min_cluster_size": 15,
"hdbscan_min_samples": 3,
"random_state": 42,
}

datasets = [
Expand Down Expand Up @@ -154,14 +157,19 @@
# Create cluster objects
# ============
ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
two_means = cluster.MiniBatchKMeans(n_clusters=params["n_clusters"], n_init="auto")
two_means = cluster.MiniBatchKMeans(
n_clusters=params["n_clusters"],
n_init="auto",
random_state=params["random_state"],
)
ward = cluster.AgglomerativeClustering(
n_clusters=params["n_clusters"], linkage="ward", connectivity=connectivity
)
spectral = cluster.SpectralClustering(
n_clusters=params["n_clusters"],
eigen_solver="arpack",
affinity="nearest_neighbors",
random_state=params["random_state"],
)
dbscan = cluster.DBSCAN(eps=params["eps"])
hdbscan = cluster.HDBSCAN(
Expand All @@ -175,7 +183,9 @@
min_cluster_size=params["min_cluster_size"],
)
affinity_propagation = cluster.AffinityPropagation(
damping=params["damping"], preference=params["preference"], random_state=0
damping=params["damping"],
preference=params["preference"],
random_state=params["random_state"],
)
average_linkage = cluster.AgglomerativeClustering(
linkage="average",
Expand All @@ -185,7 +195,9 @@
)
birch = cluster.Birch(n_clusters=params["n_clusters"])
gmm = mixture.GaussianMixture(
n_components=params["n_clusters"], covariance_type="full"
n_components=params["n_clusters"],
covariance_type="full",
random_state=params["random_state"],
)

clustering_algorithms = (
Expand Down
4 changes: 2 additions & 2 deletions examples/ensemble/plot_feature_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def gbdt_apply(X, model):

from sklearn.metrics import RocCurveDisplay

fig, ax = plt.subplots()
_, ax = plt.subplots()

models = [
("RT embedding -> LR", rt_model),
Expand All @@ -163,7 +163,7 @@ def gbdt_apply(X, model):
_ = ax.set_title("ROC curve")

# %%
fig, ax = plt.subplots()
_, ax = plt.subplots()
for name, pipeline in models:
model_displays[name].plot(ax=ax)

Expand Down
Loading

0 comments on commit 4424f98

Please sign in to comment.