add power line (#179)

david26694 · Jun 14, 2024 · 67aa3d0 · 67aa3d0
1 parent 3859f46
commit 67aa3d0
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -170,7 +170,7 @@ The library offers the following classes:
 
 * Regarding power analysis:
     * `PowerAnalysis`: to run power analysis on any experiment design, using simulation
-    * `NormalPowerAnalysis`: to run power analysis on any experiment design using the central limit theorem for the distribution of the estimator
+    * `NormalPowerAnalysis`: to run power analysis on any experiment design using the central limit theorem for the distribution of the estimator. It can be used to compute the minimum detectable effect (MDE) for a given power level.
     * `ConstantPerturbator`: to artificially perturb treated group with constant perturbations
     * `BinaryPerturbator`: to artificially perturb treated group for binary outcomes
     * `RelativePositivePerturbator`: to artificially perturb treated group with relative positive perturbations

diff --git a/cluster_experiments/power_analysis.py b/cluster_experiments/power_analysis.py
@@ -680,15 +680,42 @@ def _normal_power_calculation(
 
         raise ValueError(f"{self.analysis.hypothesis} is not a valid HypothesisEntries")
 
-    def mde(
+    def _normal_mde_calculation(
+        self, alpha: float, std_error: float, power: float
+    ) -> float:
+        """
+        Returns the minimum detectable effect of the analysis using the normal distribution.
+        Args:
+            alpha: Significance level.
+            std_error: Standard error of the analysis.
+            power: Power of the analysis.
+        """
+        if HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.LESS:
+            z_alpha = norm.ppf(alpha)
+            z_beta = norm.ppf(1 - power)
+        elif HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.GREATER:
+            z_alpha = norm.ppf(1 - alpha)
+            z_beta = norm.ppf(power)
+        elif HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.TWO_SIDED:
+            # we are neglecting norm_cdf_left
+            z_alpha = norm.ppf(1 - alpha / 2)
+            z_beta = norm.ppf(power)
+        else:
+            raise ValueError(
+                f"{self.analysis.hypothesis} is not a valid HypothesisEntries"
+            )
+
+        return float(z_alpha + z_beta) * std_error
+
+    def mde_power_line(
         self,
         df: pd.DataFrame,
         pre_experiment_df: Optional[pd.DataFrame] = None,
         verbose: bool = False,
-        power: float = 0.8,
+        powers: Iterable[float] = (),
         n_simulations: Optional[int] = None,
         alpha: Optional[float] = None,
-    ) -> float:
+    ) -> Dict[float, float]:
         """
         Returns the minimum detectable effect of the analysis.
 
@@ -707,23 +734,41 @@ def mde(
             verbose=verbose,
             n_simulations=n_simulations,
         )
-
-        if HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.LESS:
-            z_alpha = norm.ppf(alpha)
-            z_beta = norm.ppf(1 - power)
-        elif HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.GREATER:
-            z_alpha = norm.ppf(1 - alpha)
-            z_beta = norm.ppf(power)
-        elif HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.TWO_SIDED:
-            # we are neglecting norm_cdf_left
-            z_alpha = norm.ppf(1 - alpha / 2)
-            z_beta = norm.ppf(power)
-        else:
-            raise ValueError(
-                f"{self.analysis.hypothesis} is not a valid HypothesisEntries"
+        return {
+            power: self._normal_mde_calculation(
+                alpha=alpha, std_error=std_error, power=power
             )
+            for power in powers
+        }
 
-        return float(z_alpha + z_beta) * std_error
+    def mde(
+        self,
+        df: pd.DataFrame,
+        pre_experiment_df: Optional[pd.DataFrame] = None,
+        verbose: bool = False,
+        power: float = 0.8,
+        n_simulations: Optional[int] = None,
+        alpha: Optional[float] = None,
+    ) -> float:
+        """
+        Returns the minimum detectable effect of the analysis.
+
+        Args:
+            df: Dataframe with outcome and treatment variables.
+            pre_experiment_df: Dataframe with pre-experiment data.
+            verbose: Whether to show progress bar.
+            power: Power of the analysis.
+            n_simulations: Number of simulations to run.
+            alpha: Significance level.
+        """
+        return self.mde_power_line(
+            df=df,
+            pre_experiment_df=pre_experiment_df,
+            verbose=verbose,
+            powers=[power],
+            n_simulations=n_simulations,
+            alpha=alpha,
+        )[power]
 
     def _get_average_standard_error(
         self,

diff --git a/setup.py b/setup.py
@@ -47,7 +47,7 @@
 
 setup(
     name="cluster_experiments",
-    version="0.16.0",
+    version="0.17.0",
     packages=find_packages(),
     extras_require={
         "dev": dev_packages,

diff --git a/tests/power_analysis/test_normal_power_analysis.py b/tests/power_analysis/test_normal_power_analysis.py
@@ -328,3 +328,23 @@ def test_power_mde(df, hypothesis):
 
     # then
     assert abs(mde - 0.1) < 0.03
+
+
+def test_mde_power_line(df):
+    # given
+    pw_normal = NormalPowerAnalysis.from_dict(
+        {
+            "splitter": "non_clustered",
+            "analysis": "ols",
+            "n_simulations": 5,
+            "hypothesis": "two-sided",
+            "seed": 20240922,
+        }
+    )
+
+    # when
+    mde_power_line = pw_normal.mde_power_line(df, powers=[0.9, 0.8, 0.7])
+
+    # then
+    assert mde_power_line[0.9] > mde_power_line[0.8]
+    assert mde_power_line[0.8] > mde_power_line[0.7]