Skip to content

Commit

Permalink
add power line (#179)
Browse files Browse the repository at this point in the history
  • Loading branch information
david26694 authored Jun 14, 2024
1 parent 3859f46 commit 67aa3d0
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 20 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ The library offers the following classes:

* Regarding power analysis:
* `PowerAnalysis`: to run power analysis on any experiment design, using simulation
* `NormalPowerAnalysis`: to run power analysis on any experiment design using the central limit theorem for the distribution of the estimator
* `NormalPowerAnalysis`: to run power analysis on any experiment design using the central limit theorem for the distribution of the estimator. It can be used to compute the minimum detectable effect (MDE) for a given power level.
* `ConstantPerturbator`: to artificially perturb treated group with constant perturbations
* `BinaryPerturbator`: to artificially perturb treated group for binary outcomes
* `RelativePositivePerturbator`: to artificially perturb treated group with relative positive perturbations
Expand Down
81 changes: 63 additions & 18 deletions cluster_experiments/power_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,15 +680,42 @@ def _normal_power_calculation(

raise ValueError(f"{self.analysis.hypothesis} is not a valid HypothesisEntries")

def mde(
def _normal_mde_calculation(
self, alpha: float, std_error: float, power: float
) -> float:
"""
Returns the minimum detectable effect of the analysis using the normal distribution.
Args:
alpha: Significance level.
std_error: Standard error of the analysis.
power: Power of the analysis.
"""
if HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.LESS:
z_alpha = norm.ppf(alpha)
z_beta = norm.ppf(1 - power)
elif HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.GREATER:
z_alpha = norm.ppf(1 - alpha)
z_beta = norm.ppf(power)
elif HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.TWO_SIDED:
# we are neglecting norm_cdf_left
z_alpha = norm.ppf(1 - alpha / 2)
z_beta = norm.ppf(power)
else:
raise ValueError(
f"{self.analysis.hypothesis} is not a valid HypothesisEntries"
)

return float(z_alpha + z_beta) * std_error

def mde_power_line(
self,
df: pd.DataFrame,
pre_experiment_df: Optional[pd.DataFrame] = None,
verbose: bool = False,
power: float = 0.8,
powers: Iterable[float] = (),
n_simulations: Optional[int] = None,
alpha: Optional[float] = None,
) -> float:
) -> Dict[float, float]:
"""
Returns the minimum detectable effect of the analysis.
Expand All @@ -707,23 +734,41 @@ def mde(
verbose=verbose,
n_simulations=n_simulations,
)

if HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.LESS:
z_alpha = norm.ppf(alpha)
z_beta = norm.ppf(1 - power)
elif HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.GREATER:
z_alpha = norm.ppf(1 - alpha)
z_beta = norm.ppf(power)
elif HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.TWO_SIDED:
# we are neglecting norm_cdf_left
z_alpha = norm.ppf(1 - alpha / 2)
z_beta = norm.ppf(power)
else:
raise ValueError(
f"{self.analysis.hypothesis} is not a valid HypothesisEntries"
return {
power: self._normal_mde_calculation(
alpha=alpha, std_error=std_error, power=power
)
for power in powers
}

return float(z_alpha + z_beta) * std_error
def mde(
self,
df: pd.DataFrame,
pre_experiment_df: Optional[pd.DataFrame] = None,
verbose: bool = False,
power: float = 0.8,
n_simulations: Optional[int] = None,
alpha: Optional[float] = None,
) -> float:
"""
Returns the minimum detectable effect of the analysis.
Args:
df: Dataframe with outcome and treatment variables.
pre_experiment_df: Dataframe with pre-experiment data.
verbose: Whether to show progress bar.
power: Power of the analysis.
n_simulations: Number of simulations to run.
alpha: Significance level.
"""
return self.mde_power_line(
df=df,
pre_experiment_df=pre_experiment_df,
verbose=verbose,
powers=[power],
n_simulations=n_simulations,
alpha=alpha,
)[power]

def _get_average_standard_error(
self,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

setup(
name="cluster_experiments",
version="0.16.0",
version="0.17.0",
packages=find_packages(),
extras_require={
"dev": dev_packages,
Expand Down
20 changes: 20 additions & 0 deletions tests/power_analysis/test_normal_power_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,23 @@ def test_power_mde(df, hypothesis):

# then
assert abs(mde - 0.1) < 0.03


def test_mde_power_line(df):
# given
pw_normal = NormalPowerAnalysis.from_dict(
{
"splitter": "non_clustered",
"analysis": "ols",
"n_simulations": 5,
"hypothesis": "two-sided",
"seed": 20240922,
}
)

# when
mde_power_line = pw_normal.mde_power_line(df, powers=[0.9, 0.8, 0.7])

# then
assert mde_power_line[0.9] > mde_power_line[0.8]
assert mde_power_line[0.8] > mde_power_line[0.7]

0 comments on commit 67aa3d0

Please sign in to comment.