Skip to content

Commit

Permalink
more stable yeo johnson roundtrips (#494)
Browse files Browse the repository at this point in the history
* more stable yeo johnson roundtrips

* revert unrelated change

* default allclose tolerance

* changelog

* comment on change

* changelog: fix indentation
  • Loading branch information
mathause authored Aug 13, 2024
1 parent 845b1e1 commit ee2916c
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 7 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ Yeo-Johnson power transformer
`#455 <https://github.com/MESMER-group/mesmer/pull/455>`_)
- adjust the first guess to assume the data is normally distributed (
`#429 <https://github.com/MESMER-group/mesmer/pull/429>`_)
- make (back-) transformations more stable by using `np.expm1` and `np.log1p`
(`#494 <https://github.com/MESMER-group/mesmer/pull/494>`_)
- rewrite power transformer to work with xarray, and refactor from a class structure to functions (
`#442 <https://github.com/MESMER-group/mesmer/pull/442>`_, and
`#474 <https://github.com/MESMER-group/mesmer/pull/474>`_)
Expand Down
12 changes: 7 additions & 5 deletions mesmer/stats/_power_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,11 @@ def _yeo_johnson_transform_np(data, lambdas):

# assign values for the four cases
transformed[sel_a] = np.log1p(data[sel_a])
transformed[sel_b] = (np.power(data[sel_b] + 1, lambdas[sel_b]) - 1) / lambdas[
transformed[sel_b] = (np.expm1(np.log1p(data[sel_b]) * lambdas[sel_b])) / lambdas[
sel_b
]
transformed[sel_c] = -(np.power(-data[sel_c] + 1, 2 - lambdas[sel_c]) - 1) / (

transformed[sel_c] = -(np.expm1(np.log1p(-data[sel_c]) * (2 - lambdas[sel_c]))) / (
2 - lambdas[sel_c]
)
transformed[sel_d] = -np.log1p(-data[sel_d])
Expand Down Expand Up @@ -114,10 +115,11 @@ def _yeo_johnson_inverse_transform_np(data, lambdas):
# assign values for the four cases
transformed[pos_a] = np.exp(data[pos_a]) - 1
transformed[pos_b] = (
np.power(data[pos_b] * lambdas[pos_b] + 1, 1 / lambdas[pos_b]) - 1
np.exp(np.log1p(data[pos_b] * lambdas[pos_b]) / lambdas[pos_b]) - 1
)
transformed[pos_c] = 1 - np.power(
(lambdas[pos_c] - 2) * data[pos_c] + 1, 1 / (2 - lambdas[pos_c])

transformed[pos_c] = 1 - np.exp(
np.log1p(-(2 - lambdas[pos_c]) * data[pos_c]) / (2 - lambdas[pos_c])
)
transformed[pos_d] = 1 - np.exp(-data[pos_d])

Expand Down
22 changes: 20 additions & 2 deletions tests/unit/test_power_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def test_yeo_johnson_transform_np_sklearn():
pt_sklearn.lambdas_ = np.array([2.0])
expected = pt_sklearn.transform(monthly_residuals.reshape(-1, 1))

np.testing.assert_equal(result, expected.reshape(-1))
# only approximately the same due to #494
np.testing.assert_allclose(result, expected.reshape(-1))


def test_transform_roundtrip():
Expand All @@ -137,6 +138,22 @@ def test_transform_roundtrip():
np.testing.assert_allclose(result, monthly_residuals, atol=1e-7)


@pytest.mark.parametrize("value", (-2, -1, 0, 1, 2))
@pytest.mark.parametrize("lmbda", (0, 1, 2))
@pytest.mark.parametrize("lambda_delta", (-4e-16, 0, 4e-16))
def test_transform_roundtrip_special_cases(value, lmbda, lambda_delta):
# test yeo_johnson for lambdas close to the boundary points

x = np.array([value], dtype=float)

lambdas = np.array([lmbda + lambda_delta], dtype=float)

transformed = _yeo_johnson_transform_np(x, lambdas)
result = _yeo_johnson_inverse_transform_np(transformed, lambdas)

np.testing.assert_allclose(result, x)


def test_yeo_johnson_inverse_transform_np_sklearn():
# test if our inverse power transform is the same as sklearn's for constant lambda
np.random.seed(0)
Expand All @@ -151,7 +168,8 @@ def test_yeo_johnson_inverse_transform_np_sklearn():
pt_sklearn.lambdas_ = np.array([2.0])
expected = pt_sklearn.inverse_transform(monthly_residuals.reshape(-1, 1))

np.testing.assert_equal(result, expected.reshape(-1))
# only approximately the same due to #494
np.testing.assert_allclose(result, expected.reshape(-1))


def test_yeo_johnson_optimize_lambda_sklearn():
Expand Down

0 comments on commit ee2916c

Please sign in to comment.