Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Moredocs and experiment with api implementation #31

Merged
merged 9 commits into from
May 2, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
'n_node_names', 'n_tapers', 'n_signals', 'n_step', 'n_freqs',
'epochs', 'freqs', 'times', 'arrays', 'lists', 'func', 'n_nodes',
'n_estimated_nodes', 'n_samples', 'n_channels', 'Renderer',
'n_ytimes', 'n_ychannels', 'n_events', 'n_components',
'n_ytimes', 'n_ychannels', 'n_events', 'n_components', 'n_classes',
}
numpydoc_xref_aliases = {
# Python
Expand Down
14 changes: 6 additions & 8 deletions examples/iclabel_automatic_artifact_correction_ica.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,14 +203,12 @@
# See :footcite:`iclabel2019` for full details.

ic_labels = label_components(raw, ica)
print(np.round(ic_labels, 2))

# Afterwards, we can hard threshold the probability values to assign
# each component to be kept or not (i.e. it is part of brain signal).
# The first component was visually an artifact, which was captured
# for certain.
not_brain_index = np.argmax(ic_labels, axis=1) != 0
exclude_idx = np.argwhere(not_brain_index).squeeze()
print(ic_labels)

# We can extract the labels of each component and exclude
# non-brain classified components.
labels = ic_labels["labels"]
exclude_idx = np.argwhere(labels != "brain").squeeze()
print(f"Excluding these ICA components: {exclude_idx}")

# %%
Expand Down
2 changes: 1 addition & 1 deletion mne_icalabel/iclabel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
This is a python implementation of the EEGLAB plugin 'ICLabel'."""

from .features import get_iclabel_features # noqa: F401
from .label_components import label_components # noqa: F401
from .label_components import iclabel_label_components # noqa: F401
from .network import ICLabelNet, run_iclabel # noqa: F401
9 changes: 9 additions & 0 deletions mne_icalabel/iclabel/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ICLABEL_NUMERICAL_TO_STRING = {
0: "brain",
1: "muscle artifact",
2: "eye blink",
3: "heart beat",
4: "line noise",
5: "channel noise",
6: "other",
}
15 changes: 8 additions & 7 deletions mne_icalabel/iclabel/label_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .network import run_iclabel


def label_components(inst: Union[BaseRaw, BaseEpochs], ica: ICA):
def iclabel_label_components(inst: Union[BaseRaw, BaseEpochs], ica: ICA):
"""Label the provided ICA components with the ICLabel neural network.

This network uses 3 features:
Expand All @@ -19,7 +19,7 @@ def label_components(inst: Union[BaseRaw, BaseEpochs], ica: ICA):
- Autocorrelation, based on the ICA decomposition and the provided
instance.

For more information, see :footcite:`iclabel2019`
For more information, see :footcite:`iclabel2019`.

Parameters
----------
Expand All @@ -32,14 +32,15 @@ def label_components(inst: Union[BaseRaw, BaseEpochs], ica: ICA):

Returns
-------
labels : numpy.ndarray of shape (n_components,)
The estimated corresponding numerical labels for each independent
component.
labels_pred_proba : numpy.ndarray of shape (n_components, n_classes)
The estimated corresponding predicted probabilities of output classes
for each independent component. Columns are ordered with 'Brain',
'Muscle', 'Eye', 'Heart', 'Line Noise', 'Channel Noise', and 'Other'.

References
----------
.. footbibliography::
"""
features = get_iclabel_features(inst, ica)
labels = run_iclabel(*features)
return labels
labels_pred_proba = run_iclabel(*features)
return labels_pred_proba
7 changes: 6 additions & 1 deletion mne_icalabel/iclabel/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,10 @@ def run_iclabel(images: ArrayLike, psds: ArrayLike, autocorr: ArrayLike):

Returns
-------
labels : np.ndarray of shape (n_components)
labels : np.ndarray of shape (n_components, n_classes)
The predicted numerical probability values for all labels in ICLabel output.
Columns are ordered with 'Brain', 'Muscle', 'Eye', 'Heart',
'Line Noise', 'Channel Noise', and 'Other'.
"""
ica_network_file = files("mne_icalabel.iclabel").joinpath("assets/iclabelNet.pt")

Expand All @@ -237,4 +239,7 @@ def run_iclabel(images: ArrayLike, psds: ArrayLike, autocorr: ArrayLike):
# Format input and get labels
labels = iclabel_net(*_format_input_for_torch(*_format_input(images, psds, autocorr)))
labels = labels.detach().numpy()

# outputs are:
# ordered as in https://github.com/sccn/ICLabel/blob/e8abc99e0c371ff49eff115cf7955fafc7f7969a/iclabel.m#L60-L62
return labels
4 changes: 2 additions & 2 deletions mne_icalabel/iclabel/tests/test_label_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from mne.io import read_raw
from mne.preprocessing import ICA

from mne_icalabel.iclabel import label_components
from mne_icalabel.iclabel import iclabel_label_components

directory = sample.data_path() / "MEG" / "sample"
raw = read_raw(directory / "sample_audvis_raw.fif", preload=False)
Expand All @@ -20,5 +20,5 @@
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
def test_label_components():
"""Simple test to check that label_components runs without raising."""
labels = label_components(raw, ica)
labels = iclabel_label_components(raw, ica)
assert labels is not None
40 changes: 35 additions & 5 deletions mne_icalabel/label_components.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from typing import Union

import numpy as np
from mne import BaseEpochs
from mne.io import BaseRaw
from mne.preprocessing import ICA
from mne.utils import _validate_type
from mne.utils.check import _check_option

from .iclabel import label_components as label_components_iclabel
from .iclabel import iclabel_label_components
from .iclabel.config import ICLABEL_NUMERICAL_TO_STRING
from .utils import _validate_inst_and_ica

methods = {
"iclabel": label_components_iclabel,
"iclabel": iclabel_label_components,
}


Expand All @@ -30,10 +32,38 @@ def label_components(inst: Union[BaseRaw, BaseEpochs], ica: ICA, method: str):

Returns
-------
labels : np.ndarray of shape (n_components,) or (n_components, n_class)
The estimated numerical labels of each ICA component.
component_dict : dict
A dictionary with the following output:
- 'y_pred_proba' : np.ndarray of shape (n_components, n_classes)
Estimated corresponding predicted probabilities of output classes
for each independent component.
- 'y_pred' : list of shape (n_components,)
The corresponding numerical label of the class with the highest
predicted probability.
- 'labels': list of shape (n_components,)
The corresponding string label of each class in 'y_pred'.

Notes
-----
For ICLabel model, the output classes are ordered:
- 'Brain'
- 'Muscle'
- 'Eye'
- 'Heart'
- 'Line Noise'
- 'Channel Noise'
- 'Other'
"""
_validate_type(method, str, "method")
_check_option("method", method, methods)
_validate_inst_and_ica(inst, ica)
return methods[method](inst, ica)
labels_pred_proba = methods[method](inst, ica)
labels_pred = np.argmax(labels_pred_proba, axis=1)
labels = [ICLABEL_NUMERICAL_TO_STRING[label] for label in labels_pred]

component_dict = {
"y_pred_proba": labels_pred_proba,
"y_pred": labels_pred,
"labels": labels,
}
return component_dict
Comment on lines +63 to +68
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason for nesting the output in a dictionary? Can't we directly have return y_pred_proba, y_pred, labels (in this order or another)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No real preference.

I only did this to make it super explicit and limit the number of return variables / make it a one-liner to convert to a Dataframe. Would you prefer the 3 tuple return instead?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No strong preference either, but you can't convert this to a DataFrame that easily either since labels_pred_proba is a 2D array.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah fair. Hmm now that I think about it more I do prefer in general single return arguments, but I'm also unsure if this is optimal here... I'll leave this open for now and we can always change it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea of simple conversion to DataFrame for the user (of course not on our end, let's not have pandas as a dependency just for that). With that in mind, I see 2 individual DataFrame in the proposed output:

idx labels y_pred
0 Eye 0.89
1 Brain 0.67
... ... ...

and:

idx Brain Muscle Eye Heart Line Noise Channel Noise Other
0 0.2 0.89 0.1 0.05 0.03 0.07 0.08
1 0.03 0.67 0.06 0.04 0.1 0.3 0.09
... ... ... ... ... ... ... ...
n_comp 0.3 0.23 0.43 0.22 0.11 0.08 0.7

So how about 2 outputs: return dict(labels=labels, y_pred=labels_pred), {value: labels_pred_proba[:, k] for k, value in ICLABEL_NUMERICAL_TO_STRING.items()}.

IMO, for what needs to be stored in the mne ICA instance, we do not need the y_pred, but only the labels and the second dict.

With this solution, you can get the 'raw' numpy array of probabilities with mne_icalabel.iclabel.iclabel_label_compoments or dictionaries ready to be transformed to DataFrames with mne_icalabel.label_components.

WDYT?