diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index fcd124b9..8e583391 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -90,6 +90,8 @@ Yeo-Johnson power transformer
       `#455 <https://github.com/MESMER-group/mesmer/pull/455>`_)
 -  adjust the first guess to assume the data is normally distributed (
    `#429 <https://github.com/MESMER-group/mesmer/pull/429>`_)
+-  make (back-) transformations more stable by using `np.expm1` and `np.log1p`
+   (`#494 <https://github.com/MESMER-group/mesmer/pull/494>`_)
 -  rewrite power transformer to work with xarray, and refactor from a class structure to functions (
    `#442 <https://github.com/MESMER-group/mesmer/pull/442>`_, and
    `#474 <https://github.com/MESMER-group/mesmer/pull/474>`_)
diff --git a/mesmer/stats/_power_transformer.py b/mesmer/stats/_power_transformer.py
index 0f74c146..5bc40c98 100644
--- a/mesmer/stats/_power_transformer.py
+++ b/mesmer/stats/_power_transformer.py
@@ -69,10 +69,11 @@ def _yeo_johnson_transform_np(data, lambdas):
 
     # assign values for the four cases
     transformed[sel_a] = np.log1p(data[sel_a])
-    transformed[sel_b] = (np.power(data[sel_b] + 1, lambdas[sel_b]) - 1) / lambdas[
+    transformed[sel_b] = (np.expm1(np.log1p(data[sel_b]) * lambdas[sel_b])) / lambdas[
         sel_b
     ]
-    transformed[sel_c] = -(np.power(-data[sel_c] + 1, 2 - lambdas[sel_c]) - 1) / (
+
+    transformed[sel_c] = -(np.expm1(np.log1p(-data[sel_c]) * (2 - lambdas[sel_c]))) / (
         2 - lambdas[sel_c]
     )
     transformed[sel_d] = -np.log1p(-data[sel_d])
@@ -114,10 +115,11 @@ def _yeo_johnson_inverse_transform_np(data, lambdas):
     # assign values for the four cases
     transformed[pos_a] = np.exp(data[pos_a]) - 1
     transformed[pos_b] = (
-        np.power(data[pos_b] * lambdas[pos_b] + 1, 1 / lambdas[pos_b]) - 1
+        np.exp(np.log1p(data[pos_b] * lambdas[pos_b]) / lambdas[pos_b]) - 1
     )
-    transformed[pos_c] = 1 - np.power(
-        (lambdas[pos_c] - 2) * data[pos_c] + 1, 1 / (2 - lambdas[pos_c])
+
+    transformed[pos_c] = 1 - np.exp(
+        np.log1p(-(2 - lambdas[pos_c]) * data[pos_c]) / (2 - lambdas[pos_c])
     )
     transformed[pos_d] = 1 - np.exp(-data[pos_d])
 
diff --git a/tests/unit/test_power_transformer.py b/tests/unit/test_power_transformer.py
index 0b2e3e31..18f6611f 100644
--- a/tests/unit/test_power_transformer.py
+++ b/tests/unit/test_power_transformer.py
@@ -119,7 +119,8 @@ def test_yeo_johnson_transform_np_sklearn():
     pt_sklearn.lambdas_ = np.array([2.0])
     expected = pt_sklearn.transform(monthly_residuals.reshape(-1, 1))
 
-    np.testing.assert_equal(result, expected.reshape(-1))
+    # only approximately the same due to #494
+    np.testing.assert_allclose(result, expected.reshape(-1))
 
 
 def test_transform_roundtrip():
@@ -137,6 +138,22 @@ def test_transform_roundtrip():
     np.testing.assert_allclose(result, monthly_residuals, atol=1e-7)
 
 
+@pytest.mark.parametrize("value", (-2, -1, 0, 1, 2))
+@pytest.mark.parametrize("lmbda", (0, 1, 2))
+@pytest.mark.parametrize("lambda_delta", (-4e-16, 0, 4e-16))
+def test_transform_roundtrip_special_cases(value, lmbda, lambda_delta):
+    # test yeo_johnson for lambdas close to the boundary points
+
+    x = np.array([value], dtype=float)
+
+    lambdas = np.array([lmbda + lambda_delta], dtype=float)
+
+    transformed = _yeo_johnson_transform_np(x, lambdas)
+    result = _yeo_johnson_inverse_transform_np(transformed, lambdas)
+
+    np.testing.assert_allclose(result, x)
+
+
 def test_yeo_johnson_inverse_transform_np_sklearn():
     # test if our inverse power transform is the same as sklearn's for constant lambda
     np.random.seed(0)
@@ -151,7 +168,8 @@ def test_yeo_johnson_inverse_transform_np_sklearn():
     pt_sklearn.lambdas_ = np.array([2.0])
     expected = pt_sklearn.inverse_transform(monthly_residuals.reshape(-1, 1))
 
-    np.testing.assert_equal(result, expected.reshape(-1))
+    # only approximately the same due to #494
+    np.testing.assert_allclose(result, expected.reshape(-1))
 
 
 def test_yeo_johnson_optimize_lambda_sklearn():