From 03a2eb06a3b0311ed12524f00b39894b4c04d04d Mon Sep 17 00:00:00 2001 From: ludovico-lanni <102026076+ludovico-lanni@users.noreply.github.com> Date: Mon, 27 May 2024 09:40:38 +0200 Subject: [PATCH] [94] feat: create exact power calculation class (#169) * relaxed precommit python config for black * Refactor power analysis module adding abstract class interface * call super in check inputs * add simulation parameter back to parent class * refactor power classes to prepare for AA power computation * draft move of loops interface to parent class * add normalpoweranalysis and tests * add tests * fix tests * add mlm in test * add more tests * bump to 0150 * add failing test * add normal power * add alternative power computation * add comment * add ludos correction --------- Co-authored-by: David --- .pre-commit-config.yaml | 2 +- README.md | 30 +- cluster_experiments/__init__.py | 3 +- cluster_experiments/experiment_analysis.py | 80 ++++- cluster_experiments/power_analysis.py | 337 +++++++++++++++++- cluster_experiments/power_config.py | 8 +- docs/normal_power.ipynb | 298 ++++++++++++++++ mkdocs.yml | 1 + setup.py | 2 +- tests/power_analysis/conftest.py | 43 ++- .../test_normal_power_analysis.py | 253 +++++++++++++ tests/test_docs.py | 2 + 12 files changed, 1037 insertions(+), 22 deletions(-) create mode 100644 docs/normal_power.ipynb create mode 100644 tests/power_analysis/test_normal_power_analysis.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 38c63d2..5999072 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: rev: 22.12.0 hooks: - id: black - language_version: python3.8 + language_version: python3 - repo: https://github.com/charliermarsh/ruff-pre-commit rev: 'v0.0.261' hooks: diff --git a/README.md b/README.md index 48a2740..9f2fe36 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ https://codecov.io/gh/david26694/cluster-experiments/branch/main/graph/badge.svg ![License](https://img.shields.io/github/license/david26694/cluster-experiments) [![Pypi version](https://img.shields.io/pypi/pyversions/cluster-experiments.svg)](https://pypi.python.org/pypi/cluster-experiments) -A library to run simulation-based power analysis, including clustered data. Also useful to design and analyse clustered and switchback experiments. +A library to run simulation-based power analysis, including cluster-randomized trial data. Also useful to design and analyse cluster-randomized and switchback experiments. @@ -50,6 +50,19 @@ power = pw.power_analysis(df, average_effect=0.1) # You may also get the power curve by running the power analysis with different average effects power_line = pw.power_line(df, average_effects=[0, 0.1, 0.2]) + +# A faster method can be used to run the power analysis, using the approximation of +# the central limit theorem, which is stable with less simulations +from cluster_experiments import NormalPowerAnalysis +npw = NormalPowerAnalysis.from_dict( + { + "analysis": "ols_non_clustered", + "splitter": "non_clustered", + "n_simulations": 5, + } +) +power_line_normal = npw.power_line(df, average_effects=[0, 0.1, 0.2]) + ``` ### Switchback @@ -93,7 +106,7 @@ print(f"{power = }") ### Long example -This is a comprehensive example of how to use this library. There are simpler ways to run this power analysis above but this shows all the building blocks of the library. +This is a more comprehensive example of how to use this library. There are simpler ways to run this power analysis above but this shows all the building blocks of the library. ```python title="Switchback - using classes" from datetime import date @@ -102,7 +115,7 @@ import numpy as np import pandas as pd from cluster_experiments.experiment_analysis import GeeExperimentAnalysis from cluster_experiments.perturbator import ConstantPerturbator -from cluster_experiments.power_analysis import PowerAnalysis +from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis from cluster_experiments.random_splitter import ClusteredSplitter # Create fake data @@ -138,6 +151,14 @@ pw = PowerAnalysis( # Keep in mind that the average effect is the absolute effect added, this is not relative! power = pw.power_analysis(df, average_effect=0.1) print(f"{power = }") + +# You can also use normal power analysis, that uses central limit theorem to estimate power, and it should be stable in less simulations +npw = NormalPowerAnalysis( + splitter=sw, analysis=analysis, n_simulations=50, seed=123 +) +power = npw.power_analysis(df, average_effect=0.1) +print(f"{power = }") + ``` ## Features @@ -145,7 +166,8 @@ print(f"{power = }") The library offers the following classes: * Regarding power analysis: - * `PowerAnalysis`: to run power analysis on a clustered/switchback design + * `PowerAnalysis`: to run power analysis on any experiment design, using simulation + * `NormalPowerAnalysis`: to run power analysis on any experiment design using the central limit theorem for the distribution of the estimator * `ConstantPerturbator`: to artificially perturb treated group with constant perturbations * `BinaryPerturbator`: to artificially perturb treated group for binary outcomes * `RelativePositivePerturbator`: to artificially perturb treated group with relative positive perturbations diff --git a/cluster_experiments/__init__.py b/cluster_experiments/__init__.py index 28738fd..be214e5 100644 --- a/cluster_experiments/__init__.py +++ b/cluster_experiments/__init__.py @@ -19,7 +19,7 @@ SegmentedBetaRelativePerturbator, UniformPerturbator, ) -from cluster_experiments.power_analysis import PowerAnalysis +from cluster_experiments.power_analysis import NormalPowerAnalysis, PowerAnalysis from cluster_experiments.power_config import PowerConfig from cluster_experiments.random_splitter import ( BalancedClusteredSplitter, @@ -48,6 +48,7 @@ "BetaRelativePerturbator", "SegmentedBetaRelativePerturbator", "PowerAnalysis", + "NormalPowerAnalysis", "PowerConfig", "EmptyRegressor", "TargetAggregation", diff --git a/cluster_experiments/experiment_analysis.py b/cluster_experiments/experiment_analysis.py index 0acbf09..737eb23 100644 --- a/cluster_experiments/experiment_analysis.py +++ b/cluster_experiments/experiment_analysis.py @@ -82,6 +82,19 @@ def analysis_point_estimate( """ raise NotImplementedError("Point estimate not implemented for this analysis") + def analysis_standard_error( + self, + df: pd.DataFrame, + verbose: bool = False, + ) -> float: + """ + Returns the standard error of the analysis. Expects treatment to be 0-1 variable + Arguments: + df: dataframe containing the data to analyze + verbose (Optional): bool, prints the regression summary if True + """ + raise NotImplementedError("Standard error not implemented for this analysis") + def _data_checks(self, df: pd.DataFrame) -> None: """Checks that the data is correct""" if df[self.target_col].isnull().any(): @@ -116,6 +129,17 @@ def get_point_estimate(self, df: pd.DataFrame) -> float: self._data_checks(df=df) return self.analysis_point_estimate(df) + def get_standard_error(self, df: pd.DataFrame) -> float: + """Returns the standard error of the analysis + + Arguments: + df: dataframe containing the data to analyze + """ + df = df.copy() + df = self._create_binary_treatment(df) + self._data_checks(df=df) + return self.analysis_standard_error(df) + def pvalue_based_on_hypothesis( self, model_result ) -> float: # todo add typehint statsmodels result @@ -234,6 +258,15 @@ def analysis_point_estimate(self, df: pd.DataFrame, verbose: bool = False) -> fl results_gee = self.fit_gee(df) return results_gee.params[self.treatment_col] + def analysis_standard_error(self, df: pd.DataFrame, verbose: bool = False) -> float: + """Returns the standard error of the analysis + Arguments: + df: dataframe containing the data to analyze + verbose (Optional): bool, prints the regression summary if True + """ + results_gee = self.fit_gee(df) + return results_gee.bse[self.treatment_col] + class ClusteredOLSAnalysis(ExperimentAnalysis): """ @@ -287,16 +320,20 @@ def __init__( self.formula = f"{self.target_col} ~ {' + '.join(self.regressors)}" self.cov_type = "cluster" + def fit_ols_clustered(self, df: pd.DataFrame): + """Returns the fitted OLS model""" + return sm.OLS.from_formula(self.formula, data=df,).fit( + cov_type=self.cov_type, + cov_kwds={"groups": self._get_cluster_column(df)}, + ) + def analysis_pvalue(self, df: pd.DataFrame, verbose: bool = False) -> float: """Returns the p-value of the analysis Arguments: df: dataframe containing the data to analyze verbose (Optional): bool, prints the regression summary if True """ - results_ols = sm.OLS.from_formula(self.formula, data=df,).fit( - cov_type=self.cov_type, - cov_kwds={"groups": self._get_cluster_column(df)}, - ) + results_ols = self.fit_ols_clustered(df) if verbose: print(results_ols.summary()) @@ -309,13 +346,18 @@ def analysis_point_estimate(self, df: pd.DataFrame, verbose: bool = False) -> fl df: dataframe containing the data to analyze verbose (Optional): bool, prints the regression summary if True """ - # Keep in mind that the point estimate of the OLS is the same as the ClusteredOLS - results_ols = sm.OLS.from_formula( - self.formula, - data=df, - ).fit() + results_ols = self.fit_ols_clustered(df) return results_ols.params[self.treatment_col] + def analysis_standard_error(self, df: pd.DataFrame, verbose: bool = False) -> float: + """Returns the standard error of the analysis + Arguments: + df: dataframe containing the data to analyze + verbose (Optional): bool, prints the regression summary if True + """ + results_ols = self.fit_ols_clustered(df) + return results_ols.bse[self.treatment_col] + class TTestClusteredAnalysis(ExperimentAnalysis): """ @@ -557,7 +599,7 @@ def __init__( self.formula = f"{self.target_col} ~ {' + '.join(self.regressors)}" self.hypothesis = hypothesis - def fit_ols(self, df: pd.DataFrame) -> sm.GEE: + def fit_ols(self, df: pd.DataFrame): """Returns the fitted OLS model""" return sm.OLS.from_formula(self.formula, data=df).fit() @@ -583,6 +625,15 @@ def analysis_point_estimate(self, df: pd.DataFrame, verbose: bool = False) -> fl results_ols = self.fit_ols(df=df) return results_ols.params[self.treatment_col] + def analysis_standard_error(self, df: pd.DataFrame, verbose: bool = False) -> float: + """Returns the standard error of the analysis + Arguments: + df: dataframe containing the data to analyze + verbose (Optional): bool, prints the regression summary if True + """ + results_ols = self.fit_ols(df=df) + return results_ols.bse[self.treatment_col] + @classmethod def from_config(cls, config): """Creates an OLSAnalysis object from a PowerConfig object""" @@ -680,3 +731,12 @@ def analysis_point_estimate(self, df: pd.DataFrame, verbose: bool = False) -> fl """ results_mlm = self.fit_mlm(df) return results_mlm.params[self.treatment_col] + + def analysis_standard_error(self, df: pd.DataFrame, verbose: bool = False) -> float: + """Returns the standard error of the analysis + Arguments: + df: dataframe containing the data to analyze + verbose (Optional): bool, prints the regression summary if True + """ + results_mlm = self.fit_mlm(df) + return results_mlm.bse[self.treatment_col] diff --git a/cluster_experiments/power_analysis.py b/cluster_experiments/power_analysis.py index 8bcc8d7..3e5702f 100644 --- a/cluster_experiments/power_analysis.py +++ b/cluster_experiments/power_analysis.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd +from scipy.stats import norm from sklearn.base import BaseEstimator from tqdm import tqdm @@ -18,7 +19,7 @@ splitter_mapping, ) from cluster_experiments.random_splitter import RandomSplitter, RepeatedSampler -from cluster_experiments.utils import _get_mapping_key +from cluster_experiments.utils import HypothesisEntries, _get_mapping_key class PowerAnalysis: @@ -524,3 +525,337 @@ def check_inputs(self): self.check_target_col() self.check_treatment() self.check_clusters() + + +class NormalPowerAnalysis: + """ + Class used to run Power analysis, using the central limit theorem to estimate power based on standard errors of the estimator, + and the fact that the coefficients of a regression are normally distributed. + It does so by running simulations. In each simulation: + 1. Assign treatment to dataframe randomly + 2. Add pre-experiment data if needed + 3. Get standard error from analysis + + Finally it returns the power of the analysis by counting how many times the effect was detected. + + Args: + splitter: RandomSplitter class to randomly assign treatment to dataframe. + analysis: ExperimentAnalysis class to use for analysis. + cupac_model: Sklearn estimator class to add pre-experiment data to dataframe. If None, no pre-experiment data will be added. + target_col: Name of the column with the outcome variable. + treatment_col: Name of the column with the treatment variable. + treatment: value of treatment_col considered to be treatment (not control) + control: value of treatment_col considered to be control (not treatment) + n_simulations: Number of simulations to run. + alpha: Significance level. + features_cupac_model: Covariates to be used in cupac model + seed: Optional. Seed to use for the splitter. + + Usage: + ```python + from datetime import date + + import numpy as np + import pandas as pd + from cluster_experiments.experiment_analysis import GeeExperimentAnalysis + from cluster_experiments.power_analysis import NormalPowerAnalysis + from cluster_experiments.random_splitter import ClusteredSplitter + + N = 1_000 + users = [f"User {i}" for i in range(1000)] + clusters = [f"Cluster {i}" for i in range(100)] + dates = [f"{date(2022, 1, i):%Y-%m-%d}" for i in range(1, 32)] + df = pd.DataFrame( + { + "cluster": np.random.choice(clusters, size=N), + "target": np.random.normal(0, 1, size=N), + "user": np.random.choice(users, size=N), + "date": np.random.choice(dates, size=N), + } + ) + + experiment_dates = [f"{date(2022, 1, i):%Y-%m-%d}" for i in range(15, 32)] + sw = ClusteredSplitter( + cluster_cols=["cluster", "date"], + ) + + analysis = GeeExperimentAnalysis( + cluster_cols=["cluster", "date"], + ) + + pw = NormalPowerAnalysis( + splitter=sw, analysis=analysis, n_simulations=50 + ) + + power = pw.power_analysis(df, average_effect=0.1) + print(f"{power = }") + ``` + """ + + def __init__( + self, + splitter: RandomSplitter, + analysis: ExperimentAnalysis, + cupac_model: Optional[BaseEstimator] = None, + target_col: str = "target", + treatment_col: str = "treatment", + treatment: str = "B", + control: str = "A", + n_simulations: int = 100, + alpha: float = 0.05, + features_cupac_model: Optional[List[str]] = None, + seed: Optional[int] = None, + hypothesis: str = "two-sided", + ): + self.splitter = splitter + self.analysis = analysis + self.n_simulations = n_simulations + self.target_col = target_col + self.treatment = treatment + self.control = control + self.treatment_col = treatment_col + self.alpha = alpha + self.hypothesis = hypothesis + + self.cupac_handler = CupacHandler( + cupac_model=cupac_model, + target_col=target_col, + features_cupac_model=features_cupac_model, + ) + if seed is not None: + random.seed(seed) # seed for splitter + np.random.seed(seed) # numpy seed + # may need to seed other stochasticity sources if added + + self.check_inputs() + + def _split(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Split dataframe. + Args: + df: Dataframe with outcome variable + """ + treatment_df = self.splitter.assign_treatment_df(df) + self.log_nulls(treatment_df) + treatment_df = treatment_df.query( + f"{self.treatment_col}.notnull()", engine="python" + ).query( + f"{self.treatment_col}.isin(['{self.treatment}', '{self.control}'])", + engine="python", + ) + return treatment_df + + def _get_standard_error( + self, + df: pd.DataFrame, + n_simulations: int, + verbose: bool, + ) -> Generator[float, None, None]: + for _ in tqdm(range(n_simulations), disable=not verbose): + split_df = self._split(df) + yield self.analysis.get_standard_error(split_df) + + def _normal_power_calculation( + self, alpha: float, std_error: float, average_effect: float + ) -> float: + """Returns the power of the analysis using the normal distribution. + Arguments: + alpha: significance level + std_error: standard error of the analysis + average_effect: effect size of the analysis + """ + if HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.LESS: + z_alpha = norm.ppf(alpha) + return float(norm.cdf(z_alpha - average_effect / std_error)) + + if HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.GREATER: + z_alpha = norm.ppf(1 - alpha) + return 1 - float(norm.cdf(z_alpha - average_effect / std_error)) + + if HypothesisEntries(self.analysis.hypothesis) == HypothesisEntries.TWO_SIDED: + z_alpha = norm.ppf(1 - alpha / 2) + norm_cdf_right = norm.cdf(z_alpha - average_effect / std_error) + norm_cdf_left = norm.cdf(-z_alpha - average_effect / std_error) + return float(norm_cdf_left + (1 - norm_cdf_right)) + + raise ValueError(f"{self.analysis.hypothesis} is not a valid HypothesisEntries") + + def power_line( + self, + df: pd.DataFrame, + pre_experiment_df: Optional[pd.DataFrame] = None, + verbose: bool = False, + average_effects: Iterable[float] = (), + n_simulations: Optional[int] = None, + alpha: Optional[float] = None, + ) -> Dict[float, float]: + """ + Run power analysis by simulation, using standard errors from the analysis. + Args: + df: Dataframe with outcome and treatment variables. + pre_experiment_df: Dataframe with pre-experiment data. + verbose: Whether to show progress bar. + average_effects: Average effects to test. + n_simulations: Number of simulations to run. + alpha: Significance level. + """ + n_simulations = self.n_simulations if n_simulations is None else n_simulations + alpha = self.alpha if alpha is None else alpha + + df = df.copy() + df = self.cupac_handler.add_covariates(df, pre_experiment_df) + + std_errors = list(self._get_standard_error(df, n_simulations, verbose)) + std_error_mean = float(np.mean(std_errors)) + + return { + effect: self._normal_power_calculation( + alpha=alpha, std_error=std_error_mean, average_effect=effect + ) + for effect in average_effects + } + + def power_analysis( + self, + df: pd.DataFrame, + pre_experiment_df: Optional[pd.DataFrame] = None, + verbose: bool = False, + average_effect: float = 0.0, + n_simulations: Optional[int] = None, + alpha: Optional[float] = None, + ) -> float: + """ + Run power analysis by simulation, using standard errors from the analysis. + Args: + df: Dataframe with outcome and treatment variables. + pre_experiment_df: Dataframe with pre-experiment data. + verbose: Whether to show progress bar. + average_effect: Average effect of treatment. + n_simulations: Number of simulations to run. + alpha: Significance level. + """ + return self.power_line( + df=df, + pre_experiment_df=pre_experiment_df, + verbose=verbose, + average_effects=[average_effect], + n_simulations=n_simulations, + alpha=alpha, + )[average_effect] + + def log_nulls(self, df: pd.DataFrame) -> None: + """Warns about dropping nulls in treatment column""" + n_nulls = len(df.query(f"{self.treatment_col}.isnull()", engine="python")) + if n_nulls > 0: + logging.warning( + f"There are {n_nulls} null values in treatment, dropping them" + ) + + @classmethod + def from_dict(cls, config_dict: dict) -> "NormalPowerAnalysis": + """Constructs PowerAnalysis from dictionary""" + config = PowerConfig(**config_dict) + return cls.from_config(config) + + @classmethod + def from_config(cls, config: PowerConfig) -> "NormalPowerAnalysis": + """Constructs PowerAnalysis from PowerConfig""" + splitter_cls = _get_mapping_key(splitter_mapping, config.splitter) + analysis_cls = _get_mapping_key(analysis_mapping, config.analysis) + cupac_cls = _get_mapping_key(cupac_model_mapping, config.cupac_model) + return cls( + splitter=splitter_cls.from_config(config), + analysis=analysis_cls.from_config(config), + cupac_model=cupac_cls.from_config(config), + target_col=config.target_col, + treatment_col=config.treatment_col, + treatment=config.treatment, + n_simulations=config.n_simulations, + alpha=config.alpha, + seed=config.seed, + ) + + def check_treatment_col(self): + """Checks consistency of treatment column""" + assert ( + self.analysis.treatment_col == self.treatment_col + ), f"treatment_col in analysis ({self.analysis.treatment_col}) must be the same as treatment_col in PowerAnalysis ({self.treatment_col})" + + assert ( + self.analysis.treatment_col == self.splitter.treatment_col + ), f"treatment_col in analysis ({self.analysis.treatment_col}) must be the same as treatment_col in splitter ({self.splitter.treatment_col})" + + def check_target_col(self): + assert ( + self.analysis.target_col == self.target_col + ), f"target_col in analysis ({self.analysis.target_col}) must be the same as target_col in PowerAnalysis ({self.target_col})" + + def check_treatment(self): + assert ( + self.analysis.treatment == self.treatment + ), f"treatment in analysis ({self.analysis.treatment}) must be the same as treatment in PowerAnalysis ({self.treatment})" + + assert ( + self.analysis.treatment in self.splitter.treatments + ), f"treatment in analysis ({self.analysis.treatment}) must be in treatments in splitter ({self.splitter.treatments})" + + assert ( + self.control in self.splitter.treatments + ), f"control in power analysis ({self.control}) must be in treatments in splitter ({self.splitter.treatments})" + + def check_covariates(self): + if hasattr(self.analysis, "covariates"): + cupac_in_covariates = ( + self.cupac_handler.cupac_outcome_name in self.analysis.covariates + ) + + assert cupac_in_covariates or not self.cupac_handler.is_cupac, ( + f"covariates in analysis must contain {self.cupac_handler.cupac_outcome_name} if cupac_model is not None. " + f"If you want to use cupac_model, you must add the cupac outcome to the covariates of the analysis " + f"You may want to do covariates=['{self.cupac_handler.cupac_outcome_name}'] in your analysis method or your config" + ) + + if hasattr(self.splitter, "cluster_cols"): + if set(self.analysis.covariates).intersection( + set(self.splitter.cluster_cols) + ): + logging.warning( + f"covariates in analysis ({self.analysis.covariates}) are also cluster_cols in splitter ({self.splitter.cluster_cols}). " + f"Be specially careful when using switchback splitters, since the time splitter column is being overriden" + ) + + def check_clusters(self): + has_analysis_clusters = hasattr(self.analysis, "cluster_cols") + has_splitter_clusters = hasattr(self.splitter, "cluster_cols") + not_cluster_cols_cond = not has_analysis_clusters or not has_splitter_clusters + assert ( + not_cluster_cols_cond + or self.analysis.cluster_cols == self.splitter.cluster_cols + ), f"cluster_cols in analysis ({self.analysis.cluster_cols}) must be the same as cluster_cols in splitter ({self.splitter.cluster_cols})" + + assert ( + has_splitter_clusters + or not has_analysis_clusters + or not self.analysis.cluster_cols + or isinstance(self.splitter, RepeatedSampler) + ), "analysis has cluster_cols but splitter does not." + + assert ( + has_analysis_clusters + or not has_splitter_clusters + or not self.splitter.cluster_cols + ), "splitter has cluster_cols but analysis does not." + + has_time_col = hasattr(self.splitter, "time_col") + assert not ( + has_time_col + and has_splitter_clusters + and self.splitter.time_col not in self.splitter.cluster_cols + ), "in switchback splitters, time_col must be in cluster_cols" + + def check_inputs(self): + self.check_covariates() + self.check_treatment_col() + self.check_target_col() + self.check_treatment() + self.check_clusters() diff --git a/cluster_experiments/power_config.py b/cluster_experiments/power_config.py index 0839e92..86218ce 100644 --- a/cluster_experiments/power_config.py +++ b/cluster_experiments/power_config.py @@ -45,7 +45,7 @@ class PowerConfig: Arguments: splitter: Splitter object to use - perturbator: Perturbator object to use + perturbator: Perturbator object to use, defaults to "" for normal power analysis analysis: ExperimentAnalysis object to use washover: Washover object to use, defaults to "" cupac_model: CUPAC model to use @@ -78,7 +78,7 @@ class PowerConfig: ```python from cluster_experiments.power_config import PowerConfig - from cluster_experiments.power_analysis import PowerAnalysis + from cluster_experiments.power_analysis import PowerAnalysis, NormalPowerAnalysis p = PowerConfig( analysis="gee", @@ -89,13 +89,15 @@ class PowerConfig: alpha=0.05, ) power_analysis = PowerAnalysis.from_config(p) + + normal_power_analysis = NormalPowerAnalysis.from_config(p) ``` """ # mappings - perturbator: str splitter: str analysis: str + perturbator: str = "" washover: str = "" # Needed diff --git a/docs/normal_power.ipynb b/docs/normal_power.ipynb new file mode 100644 index 0000000..2d7cbe8 --- /dev/null +++ b/docs/normal_power.ipynb @@ -0,0 +1,298 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook shows how NormalPowerAnalysis and PowerAnalysis calculators give similar powers for a switchback experiment" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date\n", + "\n", + "import numpy as np\n", + "from cluster_experiments import PowerAnalysis, ConstantPerturbator, BalancedClusteredSplitter, ExperimentAnalysis, ClusteredOLSAnalysis, NormalPowerAnalysis\n", + "import pandas as pd\n", + "\n", + "\n", + "\n", + "# Create fake data\n", + "N = 10_000\n", + "clusters = [f\"Cluster {i}\" for i in range(10)]\n", + "dates = [f\"{date(2022, 1, i):%Y-%m-%d}\" for i in range(1, 15)]\n", + "df = pd.DataFrame(\n", + " {\n", + " \"cluster\": np.random.choice(clusters, size=N),\n", + " \"date\": np.random.choice(dates, size=N),\n", + " }\n", + ").assign(\n", + " # Target is a linear combination of cluster and day of week, plus some noise\n", + " cluster_id=lambda df: df[\"cluster\"].astype(\"category\").cat.codes,\n", + " day_of_week=lambda df: pd.to_datetime(df[\"date\"]).dt.dayofweek,\n", + " target=lambda df: df[\"cluster_id\"] + df[\"day_of_week\"] + np.random.normal(size=N),\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
clusterdatecluster_idday_of_weektarget
0Cluster 52022-01-14549.204485
1Cluster 72022-01-03705.532955
2Cluster 92022-01-10908.551213
3Cluster 02022-01-12021.910662
4Cluster 52022-01-085510.463841
\n", + "
" + ], + "text/plain": [ + " cluster date cluster_id day_of_week target\n", + "0 Cluster 5 2022-01-14 5 4 9.204485\n", + "1 Cluster 7 2022-01-03 7 0 5.532955\n", + "2 Cluster 9 2022-01-10 9 0 8.551213\n", + "3 Cluster 0 2022-01-12 0 2 1.910662\n", + "4 Cluster 5 2022-01-08 5 5 10.463841" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some clusters have a higher average outcome than others" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "cluster_cols = [\"cluster\", \"date\"]\n", + "\n", + "splitter = BalancedClusteredSplitter(\n", + " cluster_cols=cluster_cols,\n", + ")\n", + "\n", + "perturbator = ConstantPerturbator()\n", + "\n", + "analysis = ClusteredOLSAnalysis(\n", + " cluster_cols=cluster_cols,\n", + ")\n", + "\n", + "alpha = 0.05\n", + "n_simulations = 100\n", + "n_simulations_normal = 10\n", + "\n", + "# Simulated power analysis, we use clustered splitter and ols clustered analysis\n", + "pw_simulated = PowerAnalysis(\n", + " splitter=splitter,\n", + " perturbator=perturbator,\n", + " alpha=alpha,\n", + " n_simulations=n_simulations,\n", + " analysis=analysis,\n", + ")\n", + "\n", + "# Normal power analysis, uses Central limit theorem to estimate power, and needs less simulations\n", + "pw_normal = NormalPowerAnalysis(\n", + " splitter=splitter,\n", + " alpha=alpha,\n", + " n_simulations=n_simulations_normal,\n", + " analysis=analysis,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# power line for simulated\n", + "effects = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75]\n", + "pw_simulated_line = pw_simulated.power_line(df, average_effects=effects)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0.25: 0.07100559768004645,\n", + " 0.5: 0.13627190622233026,\n", + " 0.75: 0.24794151532294106,\n", + " 1: 0.3986365782510814,\n", + " 1.25: 0.5669227113637416,\n", + " 1.5: 0.7238074797118612,\n", + " 1.75: 0.84610562511809}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# power line for normal\n", + "pw_normal_line = pw_normal.power_line(df, average_effects=effects)\n", + "pw_normal_line" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEWCAYAAABmE+CbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABEBklEQVR4nO3dd3hU1dbA4d9KIQmEXg2hSpGSECA0EUQBxUYRpUkTFK/16kVsVxG5FhS9KuqnIiIWqsiFiCIoSpUSmiBBBKkJvSS0hLT1/XEGHEJCQplMynqfZx5O2eecNQmZNXvvc/YWVcUYY4zx8XYAxhhj8gZLCMYYYwBLCMYYY1wsIRhjjAEsIRhjjHGxhGCMMQawhGBMniQi1UVERcTvMs7RRkQ2X8m4TMFmCcHkaSKyQ0QSReSEiOwXkQkiEuztuPIDVV2sqnW9HYfJPywhmPzgDlUNBpoAkcDz3ghCRHy9cV1jcoslBJNvqGocMAdoCCAinUVko4jEi8gCEann2n6viHx75jgR2SIiX7ut7xaRCNfyNSLyo4gcEZHNItLDrdwEEflQRL4XkZPADRljcl1rk4gcF5FtIvKA2752IhIrIkNF5ICI7BWRe9323yYia0XkmCumEZm9bxG5W0RWZ9j2LxGZ5Vq+VURiXDHEiciT7td3O+Zp1/7jrvfaPkc/eFN4qKq97JVnX8AOoINruQqwEfgPUAc4CXQE/IGngK1AEaAmEI/zhScE2AnEus5REzjq2lcM2A3cC/gBjYFDQH1X2QlAAtDaVT4wk/huA64GBLgeOAU0ce1rB6QCI10x3uraX9ptf5jr3OHAfqCra191QF1xBQBHgHpu110LdHct7wXauJZLZ7j+mfdd1/VeQ9zOf7W3f7/2ylsvqyGY/GCmiMQDS4CFwKtAT+A7Vf1RVVOAN4Eg4FpV3QYcByKAtsBcYI+IXIPzob1YVdOB24EdqvqZqqaq6lrgG+But2vPUtWlqpquqkkZA1PV71T1L3UsBOYBbdyKpAAjVTVFVb8HTuB8OKOqC1R1g+vc64HJrvgyXuM0MBXoCyAiDXA+0Ge7XaO+iJRQ1aOquiaTn2EaTmKpLyL+qrpDVf/K/MdtCitLCCY/6KqqpVS1mqo+pKqJ/P3NHwDXB/xuoLJr00Kcb8htXcsLcD5sr3etA1QDWrianOJdSeceoJLbtXdfKDARuUVElruanOJxagHl3IocVtVUt/VTQLDr2BYi8ouIHBSRBOAfGY519znQR0QE6AdMcyUKgO6u6+4UkYUi0irjwaq6FXgcGAEcEJEpIhJyofdmCh9LCCa/2oPzgQ6A64OyChDn2nQmIbRxLS/k/ISwG1joSjZnXsGq+qDbdbIcDlhEAnBqFG8CFVW1FPA9TvNRTkwCooAqqloS+CirY1V1OZDsej99gC/d9kWrahegAjATmJbFOSap6nU4PzcFXs9hnKaQsIRg8qtpwG0i0l5E/IGhwGngV9f+hTidwEGqGgssBjoBZXHa38FpcqkjIv1ExN/1anamczoHiuA0wxwEUkXkFuCmi3gPxYEjqpokIs1xPugv5AvgfSBFVZcAiEgREblHREq6ms6OAekZDxSRuiJyoyuJJQGJmZUzhZslBJMvqepmnDb193A6gu/AuT012bX/T5z2+sWu9WPANmCpqqa5th3H+QDvhVPj2IfzrTkghzEcBx7DSU5HcT7Qoy7ibTwEjBSR48Bwsvhm7+ZLnDusvsqwvR+wQ0SO4TQ73ZPJsQHAKJyf1T6c2sSzFxGrKQRE1SbIMSY/EJEg4ADOXURbvB2PKXishmBM/vEgEG3JwHjKJY+TYozJPSKyA6fDuat3IzEFmTUZGWOMAazJyBhjjEu+azIqV66cVq9e3dthGGNMvrJ69epDqlr+QmXyXUKoXr06q1at8nYYxhiTr4jIzuzKWJORMcYYwBKCMcYYF0sIxhhjgHzYh5CZlJQUYmNjSUo6b3Rik88FBgYSGhqKv7+/t0MxpsArEAkhNjaW4sWLU716dZxBL01BoKocPnyY2NhYatSo4e1wjCnwCkSTUVJSEmXLlrVkUMCICGXLlrWanzHrp8HbDWFEKeff9dmNg3hpCkQNAbBkUEDZ79UUeuunwbePQUqis56w21kHCO+R9XGXoEDUEIwxpsCaP/LvZHBGSqKz/QqzhHCFvPLKKzRo0IDw8HAiIiJYsWIFAPfddx8xMTFX5BrVq1fn0KFDFyzz6quvXvR5J0yYwCOPPHKpYRljPEXVqRFkJiH2il+uwDQZXYyZa+MYPXcze+ITCSkVxLCb69K1ceXsD8zCsmXLmD17NmvWrCEgIIBDhw6RnJwMwLhx465U2Dny6quv8txzz+XqNa+0tLQ0fH19vR2GMd51dAd8/1TW+0uGXvFLFroawsy1cTw7YwNx8YkoEBefyLMzNjBzbVy2x2Zl7969lCtXjoAAZ6KtcuXKERLizF/erl27s0NtBAcHM2zYMBo0aECHDh1YuXIl7dq1o2bNmkRFORNtZfy2fvvtt7NgwYLzrtm1a1eaNm1KgwYNGDt2LADPPPMMiYmJREREcM89zqRZX331Fc2bNyciIoIHHniAtLQ0AD777DPq1KlD8+bNWbp0aabva8SIEfTr149WrVpRu3ZtPvnkE8C5+2fYsGE0bNiQsLAwpk6dCsDDDz989n1069aNQYMGATB+/Hj+/e9/XzCe4OBghg4dSqNGjVi2bNlF/w6MKTBSk2HRm/BBS9ixBBreBf5B55bxD4L2w6/4pQtcDeGlbzcSs+dYlvvX7oonOe3cqWQTU9J4avp6Jq/clekx9UNK8OIdDbI850033cTIkSOpU6cOHTp0oGfPnlx//fXnlTt58iQ33ngjo0ePplu3bjz//PP8+OOPxMTEMGDAADp37pzDd+l8yJYpU4bExESaNWtG9+7dGTVqFO+//z7r1q0DYNOmTUydOpWlS5fi7+/PQw89xMSJE+nYsSMvvvgiq1evpmTJktxwww00btw40+usX7+e5cuXc/LkSRo3bsxtt93GsmXLWLduHb/99huHDh2iWbNmtG3bljZt2rB48WI6d+5MXFwce/fuBWDx4sX06tUry3j69+/PyZMnadGiBW+99VaOfwbGFDjbF8N3/4JDf0K9O6DTKKcmsH6a02eQEOustx9+xTuUoQAmhOxkTAbZbc+J4OBgVq9ezeLFi/nll1/o2bMno0aNYuDAgeeUK1KkCJ06dQIgLCyMgIAA/P39CQsLY8eOHRd1zTFjxvC///0PgN27d7NlyxbKli17Tpn58+ezevVqmjVrBkBiYiIVKlRgxYoVtGvXjvLlnYEPe/bsyZ9//pnpdbp06UJQUBBBQUHccMMNrFy5kiVLltC7d298fX2pWLEi119/PdHR0bRp04Z33nmHmJgY6tevz9GjR9m7dy/Lli1jzJgxfP7555nGA+Dr60v37t0v6mdgTIFx4iDMex7WT4FS1aDP11DnprO7Z6a1ZvTpMexJSiQkMIhhaXU9MlNSgUsIF/omD9B61M/ExSeet71yqSCmPtDqkq/r6+tLu3btaNeuHWFhYXz++efnJQR/f/+zt1H6+PicbWLy8fEhNTUVAD8/P9LT/05Omd2Dv2DBAn766SeWLVtG0aJFadeuXablVJUBAwbw2muvnbN95syZOX5fGW/7vNBtoJUrVyY+Pp4ffviBtm3bcuTIEaZNm0ZwcDDFixfPMh5wnki2fgNT6KSnw+rPYP5LkHwK2gyFNk9CkaJni5xp5k5McZpXzzRzA5fV95mZQteHMOzmugT5n/vBE+Tvy7Cb617yOTdv3syWLX9Pc7tu3TqqVat2SeeqXr0669atIz09nd27d7Ny5crzyiQkJFC6dGmKFi3KH3/8wfLly8/u8/f3JyUlBYD27dszffp0Dhw4AMCRI0fYuXMnLVq0YOHChRw+fJiUlBS+/vrrLOOZNWsWSUlJHD58mAULFtCsWTPatGnD1KlTSUtL4+DBgyxatIjmzZsD0LJlS955552zTUhvvvkmbdq0uWA8xhRKe3+DTzs4TUSVwuHBpU5TkFsyAHhj7h9nk8EZiSlpjJ67+YqHVOBqCNk5k1Gv5F1GJ06c4NFHHyU+Ph4/Pz9q1ap1tqP3YrVu3ZoaNWpQv3596tWrR5MmTc4r06lTJz766CPq1atH3bp1admy5dl9Q4YMITw8nCZNmjBx4kRefvllbrrpJtLT0/H39+eDDz6gZcuWjBgxglatWlGqVCkiIiKyjCc8PJwbbriBQ4cO8cILLxASEkK3bt1YtmwZjRo1QkR44403qFSpEgBt2rRh3rx51KpVi2rVqnHkyJGzCaF+/fqZxnOpydOYfCnpGPzyCqwcC0XLQrexTn9Ahtp3alo6Ub/tYU985k/q78mkpeNy5bs5lSMjIzXjBDmbNm2iXr16Xoqo4BoxYgTBwcE8+eSTXo3Dfr+mQFCFjf+DH56FE/shchC0fwGCSp9TLC1difotjvfmb2XboZP4+Qip6ed/TlcuFcTSZ27M8eVFZLWqRl6ojEdrCCLSCXgX8AXGqeqoDPurAp8DpVxlnlHV7z0ZkzHG5LrDf8H3w+Cv+U7zUK9JENr0nCJp6cq3v+1hzPwtbDt0kmsqFeejvk1JPJ3KczN/P6fZ6HKbubPisYQgIr7AB0BHIBaIFpEoVXV/bPd5YJqqfigi9YHvgeqeislcnBEjRng7BGPyt9TTsOQdWPwW+BaBTq9Ds/vA9++P3rR0Zfb6Pbw7fwvbDp5JBE24qX4lfHycZiTxkSvazJ0VT9YQmgNbVXUbgIhMAboA7glBgRKu5ZLAHg/GY4wxueevX+C7oXDkL2hwJ9z8KpS46uzuM4lgzPwt/HXwJHUrFufDe5pwc4O/E8EZXRtX9kgCyMiTCaEy4D4IRyzQIkOZEcA8EXkUKAZ0yOxEIjIEGAJQtWrVKx6oMcZcMcf3wdzn4PdvoHQN6DsDarU/uzstXfluw17GzN/C1gMnqFuxOP93TxM6ZZIIcpu37zLqDUxQ1bdEpBXwpYg0VNVznhJT1bHAWHA6lb0QpzHGXFh6GkR/Cj//B1KT4Ppn4LonwD/Q2e2WCLYcOEGdisF80KcJtzT0fiI4w5MJIQ6o4rYe6trmbjDQCUBVl4lIIFAOOODBuIwx5sqKWwOzn4C966BmO7j1LShXC3ASwfe/7+Xdn5xEULtCMO/3acytDa/KM4ngDE8+mBYN1BaRGiJSBOgFRGUoswtoDyAi9YBA4KAHY/IYEWHo0KFn1998881c75R1H0jPGJMLEuOdfoJPboTje6H7p9BvJpSr5dQI1u+l07uLeGTSWhR4r3djfni8LbeHh+S5ZAAerCGoaqqIPALMxbmldLyqbhSRkcAqVY0ChgKfiMgTOB3MAzU3HozwwEBRAQEBzJgxg2effZZy5cpd9PGpqan4+Xm7Be/KKWjvx5hzqMKGr2Huv+HUIWg+BG78NwSWJD1d+cFVI9i8/zhXly/Gu70iuD08BN88mATcefQv1vVMwfcZtg13W44BWnsyhvN4aDo6Pz8/hgwZwttvv80rr7xyzr4dO3YwaNAgDh06RPny5fnss8+oWrUqAwcOJDAwkLVr19K6dWuOHDlCUFAQa9eu5cCBA4wfP54vvviCZcuW0aJFCyZMmADAgw8+SHR0NImJidx111289NJLF4ytevXq9OjRgzlz5hAUFMSkSZOoVatWpnFVrlyZWrVqsW3bNhISEihbtiy//PILbdu2pW3btnz66aeEhITw6KOP8vvvv5OSksKIESPo0qULEyZMYMaMGZw4cYK0tDQWLlx4yT9PY/KsQ1uc4Sa2L4KQJnDPNAhpTHq6MnfDXt6dv4U/9h2nZj5KBGcUvK9wc56BfRuy3h8bDWmnz92WkgizHoHVn2d+TKUwuGVU5vvcPPzww4SHh/PUU+dOavHoo48yYMAABgwYwPjx43nsscfODjAXGxvLr7/+iq+vLwMHDuTo0aMsW7aMqKgoOnfuzNKlSxk3bhzNmjVj3bp1RERE8Morr1CmTBnS0tJo374969evJzw8/IKxlSxZkg0bNvDFF1/w+OOPM3v27Czjqlu3LjExMWzfvp0mTZqwePFiWrRowe7du6lduzbPPfccN954I+PHjyc+Pp7mzZvToYNzg9iaNWtYv349ZcqUyfbnZUy+kpLoPE+w9F3wC4Jb34TIQaTjw7zf9/LOT65EUK4Y7/SM4I5G+ScRnFHoBrc7Lxlkt/0ilChRgv79+zNmzJhzti9btow+ffoA0K9fP5YsWXJ23913333OKJ933HEHIkJYWBgVK1YkLCwMHx8fGjRocHaI7GnTptGkSRMaN27Mxo0bczRFZ+/evc/+e2YCmqziatOmDYsWLWLRokU8++yzLFmyhOjo6LPDVs+bN49Ro0YRERFxdqTVXbucuSQ6duxoycAUPFt+gv9rCYtGQ/2u8Eg02uw+fog5yG3vLeEfX63hdGo6b/dsxI//up6ujSvnu2QABbGGkN03+bcbZj5HackqcO93l335xx9/nCZNmnDvvffmqHyxYsXOWXcfEvvM8pn11NRUtm/fzptvvkl0dDSlS5dm4MCBmQ59nZH7sNUXGsIaoG3btnz44Yfs2bOHkSNHMnr0aBYsWHB2kDpV5ZtvvqFu3XMfnV+xYsV578eYfO3YHvjhGYiZBWVrQ/8otEZb5sXs592flhCz9xg1yhXjvz0a0blRCH6++fs7dv6O/lK0H+7R6ejKlClDjx49+PTTT89uu/baa5kyZQoAEydOPPvBeimOHTtGsWLFKFmyJPv372fOnDk5Ou7MNJdTp06lVatWF4yrefPm/Prrr/j4+BAYGEhERAQff/wxbdu2BeDmm2/mvffe40z//9q1ay/5/RiTJ6WlwrIP4P1m8OdcuPF59B9LmJdYl9vfW8IDX67mVHIqb93diB+faMudTULzfTKAglhDyM6ZjmMPTkc3dOhQ3n///bPr7733Hvfeey+jR48+23l7qRo1akTjxo255pprqFKlCq1b56xP/ujRo4SHhxMQEMDkyZMvGFdAQABVqlQ5O6x2mzZtmDx5MmFhYQC88MILPP7444SHh5Oenk6NGjWYPXv2Jb8nY/KU3dHOMwX7N0Ctjuito/lpX1He+TCajXuOUa1sUd68uxFdI/J/jSAjG/66EKhevTqrVq26pNth8wL7/ZpcceqIM3PZ6s+h+FVop9eYT0ve+XkLv8cdo2qZojx6Yy26Na6cLxOB14e/NsaYPE8VfpsM816AxKNoy4dYGDKYt37ey4a41VQpE8Qbd4XTrXFl/PNhIrgYlhAKgTN3JxljMjjwh/NMwc6laGhzohs8zyurffltwR9OIugeTrcmBT8RnFFgEoKqZnv3jMl/8luTpsknkk/Bojfg1/fQIsFsbvYyz2yPYN2sY4SWDuL17mHc2SS00CSCMwpEQggMDOTw4cOULVvWkkIBoqocPnyYwMBAb4diCpLNPzizlyXsYm+NO3n62N0sWqxULpXCqDudRFDEr3AlgjMKREIIDQ0lNjaWgwfz5bh45gICAwMJDQ31dhgmP8o4Zlmrh2HHEvhjNidL1uLVUq8zcVMVKpcK5NVutbiraeFNBGcUiITg7+9PjRo1vB2GMSavWD+N1FmP4pfmemgzYTf6wzOo+DGx2L2M3H8D5UsG80q3WtzdtEqhTwRnFIiEYIwx7k7NGU7RtHOf4Bdgf1pxPky9gxe71uLuyFAC/HwzP0EhZQnBGFPgBCbuy3R7BZ94fhnWzhJBFqyeZIwpWJJPkqT+me7ak17WksEFWEIwxhQch/+CcR0IlGRS9NwP/lNahHFF+nopsPzBowlBRDqJyGYR2Soiz2Sy/20RWed6/Ski8Z6MxxhTgG3+AcbewOmjcfRPfpZhqQ8Qm16OdBVi08sxXIcQcdsQb0eZp3msD0FEfIEPgI5ALBAtIlGuWdIAUNUn3Mo/CjT2VDzGmAIqPR0Wvg4LR3Gw+DV0O/Qgdes2oHXDSvT8qSN74hMJKRXEsJvr0rVxZW9Hm6d5slO5ObBVVbcBiMgUoAuQ1WwuvYEXPRiPMaagSTwKM4bAlnlsrHA7d+66iw7h1XinZwT+vj7cHVnF2xHmK55MCJUB95loYoEWmRUUkWpADeDnLPYPAYYAVK1a9cpGaYzJn/b9DlPvQRPi+KHaMB7cHMFdTavwevfwfDlbWV6QVzqVewHTVTUts52qOlZVI1U1snz58rkcmjEmz1n/NYzrgKae5uOa7/Hg5sb0b1WdNywZXBZP1hDiAPf6WqhrW2Z6AQ97MBZjTEGQluIMU73iQ7TqtYwIeJLPNyTxwPU1eabTNTaW2WXyZEKIBmqLSA2cRNAL6JOxkIhcA5QGlnkwFmNMfnd8P3w9EHb9SlrzB3nscDe+23CIoR3r8MiNtSwZXAEeSwiqmioijwBzAV9gvKpuFJGRwCpVjXIV7QVMURvn2BiTld0rYVp/SIwnuesnPLC2Or9sPsjzt9XjvjY1vR1dgeHRoStU9Xvg+wzbhmdYH+HJGIwx+ZgqrPoU5jwDJUM5NWAug+cksnz7QV7tFkafFnaTyZVkYxkZY/KmlESY/S/4bRLUvomEW/+PgZP/ZH1sAv/t0YhujW1Y9CvNEoIxJu85uhOm9oV966Hdsxxu+k/6jV/FlgPH+aBPYzo1vMrbERZIlhCMMXnL1vnwzWDnCeTeU9l/VTvu+WQFu4+c4pP+kbSrW8HbERZYlhCMMXmDKiz5L8z/D1SoDz2/ZLdcxT0fLePwidN8Pqg5LWuW9XaUBZolBGOM9yUdg5kPwh+zoeFd0HkM2xKUvuOWceJ0Kl/e14ImVUt7O8oCzxKCMca7Dvzh9Bcc2QY3vwYtH+SP/cfpO24lqsrkIS1pEFLS21EWCpYQjDHeEzMLZj4E/kEwIAqqX8f62Hj6j19JgJ8PE+9rSa0Kxb0dZaFhCcEYk/vSUuHnkbD0XQhtBj2+gBIhRO84wr2fRVOqqD+T7mtJ1bJFvR1poWIJwRiTu04egumDYPtCiBwMnV4DvwAWbznI/V+sIqRkEF/d14KQUkHejrTQsYRgjMk9cWtgaj84eRC6fACNnSktf4rZz0MT11CzfDG+HNyC8sUDvBxo4WQJwRiTO9Z8Cd8NheAKMHguhDgTJH772x6emLqOBiEl+HxQc0oVLeLlQAsvSwjGGM9KPQ1znoLVE6BmO+g+Hoo5zxNMi97N0zPW06xaGT4dGEnxQH+vhlrYWUIwxnhOQhxM6wdxq+G6J+DGF8DHF4DPf93Bi1EbaVO7HGP7RRJUxNfLwRpLCMYYz9i+2Jm/IDUJenwJ9Tuf3fV/C7byxg+b6Vi/Iu/3aUyAnyWDvMASgjHmylKFZR/Aj8Oh7NXQ8ysoX9e1S3lr3p+8/8tWOjcK4a0ejfD3zSsz+RpLCMaYK+f0CYh6FDbOgGtuh64fQmAJwEkG/5m9ifFLt9OrWRVe6RZm8x/nMZYQjDFXxuG/YMo9cGgzdBgBrR8H17SWaenK8zM3MHnlbu5tXZ3ht9e3KS/zII/W1USkk4hsFpGtIvJMFmV6iEiMiGwUkUmejMcY4yGb58DYdnBiP/T9xulAdn3gp6al869p65i8cjcP33C1JYM8zGM1BBHxBT4AOgKxQLSIRKlqjFuZ2sCzQGtVPSoiNtC5MflJehosGAWL3oCrGjn9BaX+ntbydGoaj01ey9yN+xl2c10evqGWF4M12fFkk1FzYKuqbgMQkSlAFyDGrcz9wAeqehRAVQ94MB5jzJV06gjMGAJbf4SIe+C2t5xB6lwSk9N44KvVLPrzIC/eUZ97W9fwYrAmJzyZECoDu93WY4EWGcrUARCRpYAvMEJVf8h4IhEZAgwBqFrVJtU2xuv2bXD6C47tgdv+C5GDzjYRAZw4ncqgCdFE7zjC693D6NnM/m7zA293KvsBtYF2QCiwSETCVDXevZCqjgXGAkRGRmoux2iMcffbVPj2nxBUCu6dA1WanbM7/lQyAz6L5ve4BN7pGUGXiMreidNcNE8mhDigitt6qGubu1hghaqmANtF5E+cBBHtwbiMMZciLQXm/htWfgzVWsPdE5xxidwcOnGavuNWsO3gST68pwk3NajknVjNJfHkXUbRQG0RqSEiRYBeQFSGMjNxageISDmcJqRtHozJGHMpju+Dz+9wkkHLh6H/rPOSwb6EJHp8vIwdh0/y6cBISwb5kMdqCKqaKiKPAHNx+gfGq+pGERkJrFLVKNe+m0QkBkgDhqnqYU/FZIy5BLtWwLT+cPoYdP8Uwu46r8juI6foM245R0+m8MWgFjSvUcYLgZrLJar5q0k+MjJSV61a5e0wjCmY1k+D+SMhIRZKhkL162DD11CyCvSaCBUbnHfI1gMn6DtuBYkpaXwxqDmNqpTK/bhNtkRktapGXqiMtzuVjTF5xfpp8O1jkJLorCfsht8mQ8UwGDjb6UTOYNPeY/QdtwIRmDKkJfWuKpG7MZsryhKCMcYxf+TfycBd0tFMk8G63fEMGL+SokV8mXhfC2qWD/Z8jMajLCEYYxwJsVlsz3hzIKzYdphBE6IpGxzAxPtaUKVMUQ8HZ3KDjTtrjHEElc58e8nQc1YX/nmQAZ+tpFLJQKY90MqSQQFiNQRjCrv0dFjwGiQeAfEBTf97n38QtB9+dnXuxn08OmkttSoE8+Xg5pQNDvBCwMZTrIZgTGGWkgjfDHIGp4voC10+cO4oQpx/7xgD4T0AmLUujocmrqFB5RJMvr+lJYMCyGoIxhRWx/fDlN4QtwY6vASt/+mMRxTR57yiU1bu4tn/baBFjTKMG9CM4AD76CiI7LdqTGG073eY1NNpJur5JdS7I8uiny7Zzn9mx9Cubnk+6tuUQH+b/7igsoRgTGHz51yYPggCijuD04VEZFn0/Z+38Oa8P+nUoBLv9o4gwM+SQUFmCcGYwkIVln8I8/4NlcKg9xQoEXJOkZlr4xg9dzN74hMpFuDHidOpdGtcmdF3hePna12OBZ0lBGMKg7QU+H4YrP4Mrrkd7hwLRYqdU2Tm2jienbGBxJQ0wJnTwNdHaFurnCWDQsJ+y8YUdInxMPEuJxm0fhx6fHleMgAYPXfz2WRwRlq68uaPf+ZOnMbrrIZgTEF2ZJvTeXxku3NLaeO+WRbdE5/JsBUX2G4KHksIxhRUO5bC1L6AQv+ZzsilWUhKSSPAz4ek1PTz9oWUCsrkCFMQWZORMQXRuknwRRcoWgbum3/BZHAqOZX7v1hFUmo6/r5yzr4gf1+G3VzX09GaPMJqCMYUJOnp8PN/YMl/oUZb6PFF1mMUAceSUhg8IZrVO4/y5t2N8PORs3cZhZQKYtjNdena2OZELiw8mhBEpBPwLs6MaeNUdVSG/QOB0fw91/L7qjrOkzEZU2Aln4L/PQCboqDJALjtLfD1z7L40ZPJDPhsJTF7jvFe7ybcFn4VgCWAQsxjCUFEfIEPgI5ALBAtIlGqGpOh6FRVfcRTcRhTKBzfB5N7wZ51cNMr0OphZxiKLBw4nkS/cSvZfvgkH/drSvt6FXMvVpNnZZsQXB/sG1X1mos8d3Ngq6puc51nCtAFyJgQjDGXY+96JxkkxkPvyVD3lgsW3xOfyD3jVrAvIYnPBjajda1yuROnyfOy7VRW1TRgs4hUvchzVwZ2u63HurZl1F1E1ovIdBGpktmJRGSIiKwSkVUHDx68yDCMKcD++B7Gd3KWB8/NNhnsPHySuz9axqHjp/lycHNLBuYcOb3LqDSwUUTmi0jUmdcVuP63QHVVDQd+BD7PrJCqjlXVSFWNLF++/BW4rDH5nCosHQNT+kD5unD/z85wFBewZf9x7v5oGaeSU5k8pCWR1cvkUrAmv8hpH8ILl3DuOMD9G38of3ceA6Cqh91WxwFvXMJ1jClcUpPh+6Gw5guo3wW6fgRFLjxr2e9xCfQfvxJfH2HKkFbUrVQ8l4I1+UmOEoKqLhSRakBtVf1JRIri3Dl0IdFAbRGpgZMIegHnDLQuIlep6l7Xamdg00VFb0xhc+oITOsPOxZD22HQ7jnwuXBFf/XOowz8bCXFA/yYeH9LapQ7f9gKYyCHCUFE7geGAGWAq3H6Aj4C2md1jKqmisgjwFyc5DFeVTeKyEhglapGAY+JSGcgFTgCDLyM92JMwXb4L5jUA+J3QbePoVGvbA9Z9tdhBn8eTYXiAXx1XwtCS9v8xyZroqrZFxJZh3PX0ApVbezatkFVL9xo6QGRkZG6atWq3L6sMd61fbEzDIWPL/ScCNVaZXvIL38c4B9fraZqmaJMvK8FFUoE5kKgJq8SkdWqGnmhMjntVD6tqsluJ/YDss8kxpjLt+ZL+LIrBFd0hqHIQTKYs2EvQ75cRe2KwUx9oJUlA5MjOe1UXigizwFBItIReAjnDiFjjKekp8NPL8KvY6DmDXD3BAgqle1hM9bE8uTXv9G4amk+u7cZJQKzflrZGHc5rSE8AxwENgAPAN8Dz3sqKGMKveSTMK2fkwwiB8M903OUDCau2MnQr3+jZc2yfDGouSUDc1FyWkO4AfhKVT/xZDDGGCAhznnyeP/vcMsb0HzIBYehOGPc4m28/N0mbrymAv93TxMC/W3+Y3NxcpoQ+gMfisgRYDGwCFiiqkc9FpkxhdGetTC5N5w+Ab2nQp2bsj1EVRkzfytv//Qnt4Vdxds9IyjiZyPbm4uX0+cQBgCISAhwF86gdSE5Pd4YkwMxUTBjCBQr5wxDUbFBtoeoKqPm/MHHi7bRvUkor3cPs/mPzSXL6XMIfYE2QBhwCHgfp6ZgjLlcqrDkbZj/ElSOdAaoC66Q7WHp6cqLURv5cvlO+rWsxkudG+Djk33TkjFZyek3/HeAv3AeRvtFVXd4KiBjCpXUZJj9OKybCA27O/Me+2c/ZWVqWjpPf7OBb9bE8kDbmjxzyzVIDvoZjLmQnDYZlRORBkBb4BURqQ1sVtV+Ho3OmILs5GHnTqKdS+H6Z6DdMznqPE5OTeeJqev4bsNenuhQh8fa17JkYK6InDYZlQCqAtWA6kBJ4PzZuI0xOXNoC0y8G47tgTvHQfjdOTosKSWNhyau4ec/DvDvW+txf9uaHg7UFCY5bTJa4vZ6X1VjPReSMQXctgXOAHU+/jDgW6jaIkeHnTydyv1frGLZtsO80q0h97So5tk4TaGT0yajcAARCfZsOMYUcKsnwHdDoWxt6DMVSufsQz0hMYVBE6JZu+sob93diDubhHo2TlMo5bTJqCHwJc5opyIiB4EBqvq7J4MzpsBIT4Mfh8Oy96FWB7jrMwgskaNDj5xMpv/4FWzed5wP+jThlrCrPBysKaxy2mQ0FviXqv4CICLtXNuu9UxYxhQgp0/AN/fBn3Og+QNw86vgm7M/vQPHkuj76Qp2Hj7F2H6R3HBN9rejGnOpcpoQip1JBgCqukBEbJYNY7KTEAuTesGBGLj1TWh+f44PjT16ir7jVnDg+Gk+u7cZ115t8x8bz8ppQtgmIi/gNBsB9AW2eSYkYwqI2NUwpTekJMI905ymohzafugk93yynOOnU/lycAuaVivtwUCNceT0GfdBQHlgBvANUM61zRiTmY3/gwm3gl8ADJ53Ucngz/3H6fHxMpJS05l8f0tLBibXXLCGICKBwD+AWjhDXw9V1ZScnlxEOgHv4kyhOU5VR2VRrjswHWimqjYdmslf1k+D+SOd5qGSoVC5KcTMhCotoNckZ2yiHNoQm0D/8Svw9/Vh6pCW1K5Y3HNxG5NBdk1GnwMpOOMW3QLUAx7PyYlFxBdnELyOQCwQLSJRqhqToVxx4J/AiouK3Ji8YP00+PYxp1kIIGG386rSEvrPAv+cz1S2ascR7v0smhJB/ky6vwXVylo3ncld2TUZ1VfVvqr6Mc4op20v4tzNga2qus01/eYUoEsm5f4DvA4kXcS5jckb5o/8Oxm4OxZ3Uclg6dZD9Pt0JeWKB/D1P1pZMjBekV1CONs8pKqpF3nuysBut/VY17azRKQJUEVVv7vQiURkiIisEpFVBw8evMgwjPGghCwe2s9qeybmb9rPvROiqVqmKFMfaElIqewHtzPGE7JLCI1E5JjrdRwIP7MsIscu58Ii4gP8FxiaXVlVHauqkaoaWb58+cu5rDFXTno6BGTxcFnJnD1J/N36vTzw5WquqVScKUNaUqF4zmsVxlxpF+xDUNXLmYMvDqjith7q2nZGcaAhsMA1UmMlIEpEOlvHssnzTh6G/w2B0wkgvqBpf+/zD4L2w7M9xderdvP0N+tpWq00nw5sZvMfG6/z5Ixn0UBtEamBkwh6AX3O7FTVBJzbVwEQkQXAk5YMTJ63azlMHwQnD8Ltb0OR4HPvMmo/HMJ7XPAUXy7bwQuzNnJdrXKM7d+UokVs8kHjfR77X6iqqSLyCDAX57bT8aq6UURGAqtUNcpT1zbGI1Th1/fgpxFQqgoM/hFCIpx92SQAdx8v/IvX5vxBh3oVeb9PYwL9L6cibsyV49GvJar6PfB9hm2Z1qVVtZ0nYzHmsiQehZkPwebvod4dzsxmgSUv6hSqyts/bWHM/C3cHn4Vb/eMwN/mPzZ5iNVTjclO3Gr4eiAc2wudXocWD+RoZjN3qsor321i3JLt3N00lFHdw/G1+Y9NHmMJwZisqMLKsTD331D8Khg0F0KbXvRp0tOV52f9zqQVuxh4bXWG314fH0sGJg+yhGBMZpISIOpRiJkFdTpB1w+haJmLPk1qWjpPTV/PjLVxPNjuap66ua7Nf2zyLEsIxmS0dz18PQCO7oSOI6HVo+Bz8W39yanpPDZ5LT9s3MeTN9XhkRtreyBYY64cSwjGnKHqTHE552koWhYGfgfVWl3SqZJS0vjHV6tZsPkgL9xen8HX1biysRrjAZYQjAFnVrPZj8OGr+HqG+HOTy5qlFJ3J06nct/n0azYfoTX7gyjd/OqVzZWYzzEEoIx+2OcJqLDW+GG56HN0ItqIpq5No7RczezJz6RSiUDKeIrxMYn8XaPCLo2rpz9CYzJIywhmMJt3SSY/S8IKO4MV13jYgb0dZLBszM2kJjiDF2xN8EZtPfe1tUtGZh8x56KMYVT8imY+TDMfBBCI+EfSy46GQCMnrv5bDJwN2/j/isRpTG5ymoIpvA5tAWm9YcDm6DtU9DuGfC5+OEjDh4/TVx8JnMhAHuy2G5MXmYJwRQuG6bDt/905jruO/2i5joG5yGzX/86zKSVOy9YC7A5DUx+ZAnBFA4pSTD3WVg1Hqq2gu6fQsmct/EfOnGa6atjmbxyFzsPn6JUUX8GXlud8iUCeOfHLec0GwX5+zLs5rqeeBfGeJQlBFPwHdkG0wbAvvXQ+p9w4wvgm/3cA6rKsr8OM3HlLuZt3EdKmtK8ehme6FCHTg0rnR2ltGLxwLN3GYWUCmLYzXWtQ9nkS5YQTMEWMwtmPQLiA72nQt1O2R5y5GQy01fvZvLK3Ww/dJKSQf70a1md3s2rULti8fPKd21c2RKAKRAsIZiCKTUZfhwOKz6Eyk3h7glQKusHxFSV5duOMGnlLub+vo/ktHQiq5Xm0RtrcWvYVTZngSkULCGYgid+lzNcddxqaPkQdHgJ/IpkWvToyWS+WRPLpJW72HbwJCUC/ejToip9WlSlTia1AWMKMo8mBBHpBLyLM2PaOFUdlWH/P4CHgTTgBDBEVWM8GZMp4DbPgf/9AzQdenwB9bucV0RVWbndqQ3M2eDUBppULcWbdzfitrCrCCpitQFTOHksIYiIL/AB0BGIBaJFJCrDB/4kVf3IVb4z8F8g+0ZeYzJKS3HmNf51DFQKhx6fQ5ma5xSJP5XMN2vimLxyF1sPnKB4gB+9mlehT4uqXFOphJcCNybv8GQNoTmwVVW3AYjIFKALcDYhqOoxt/LFAPVgPKagSohzJr3fvRwiB8PNr4J/IODUBlbvPMqkFbv4bsNeTqemE1GlFG/cFc7t4VfZ5PbGuPHkX0NlYLfbeizQImMhEXkY+BdQBLgxsxOJyBBgCEDVqjZypHGz9SeYMQRSTzvPFoTdBUDCqRRmrHWeG/hz/wmCA/y4OzKUPs2rUT/EagPGZMbrX49U9QPgAxHpAzwPDMikzFhgLEBkZKTVIgykp8GC12DRm1ChPvT4HC1bizWu2sDs9Xs4nZpOo9CSjLozjDsahVAswOv/3Y3J0zz5FxIHVHFbD3Vty8oU4EMPxmMKiuP74ZvBsGMxNO7LsRtfZebvR5n01WL+2HecYkV86d40lD7Nq9KwcklvR2tMvuHJhBAN1BaRGjiJoBfQx72AiNRW1S2u1duALRhzIdsXwfTB6Onj7GrzJu8fac63b/xKUko6DSuX4NVuYXSOCCHYagPGXDSP/dWoaqqIPALMxbntdLyqbhSRkcAqVY0CHhGRDkAKcJRMmouMASA9HRa/hS54lWNFq/F00eH88GMZihbZS7fGlenTvBphoVYbMOZyiGr+apKPjIzUVatWeTsMk4v0xEFOTB5E8bhFRKVfxzPJg6h+VQX6tKhKl4gQigdmPy6RMYWdiKxW1cgLlbF6tcmzTpxO5defv6XJyqEUTz/GcB3C6bC+TG5ZjfDQkoiIt0M0pkCxhGDynN/jEpi0fAdlf/uIf8oU9vtWZPm1X/Hk9e0pYbUBYzzGEoLxmoyT07epXY4/9h1nZ2wc7wR8xA0+azha/VZCen5I5aBS3g7XmALPEoLxiswmp5+2Kpb2wTuZVGoMxZIPwc2jKd38frCmIWNyhSUE4xVv/PBHhsnplXt9f+C51Mn4B4dAv7nOsNXGmFxjCcHkuq0HTrAnIYnOPkt4ym8aIXKI0xQhSJL5Ma0pHR/4BoJKeztMYwodH28HYAoPVeXL5Tu5/b3FdPVdwij/cYT6HMJHIEiSSVZflhZpY8nAGC+xhGByxcHjpxk0IZoXZv5O8xpleSX4G4pK8jllikgaTxWZ6qUIjTHWZGQ87seY/TzzzXpOnE5l5G216OczF9m1P9OyRRP35XJ0xpgzLCEYjzmVnMp/Zm9i8spd1K9UnG/bHyIkugcc3QF+gZCadP5BJUNzPU5jjMMSgvGIdbvjeWLqOnYcPsmIpon0T3gXn3krnKGq+34Dp47At49BSuLfB/kHQfvh3gvamELOEoK5olLT0vngl78Y8/MWwoOPsbLud5TfGAXFysMd70JEX/B1+283fyQkxDo1g/bDIbyH94I3ppCzhGCumJ2HT/L41HVs2bWHjyr/Qof46UisQJsn4brHIaD4uQeE97AEYEweYgnBXDZV5etVsbz87Xru8vmFySVnEHj4MIT3hBtfgFJVsj+JMcbrLCGYy3LkZDLPzdhA4qa5fFd0ClVSd0LFa+Hml+1JY2PyGUsI5pIt/PMgH077loeSP6NtkfVo8RrQ8Uuod4eNP2RMPmQJwVy0pJQ03ov6lcpr/8tEvwUQGAw3vIo0ux/8ing7PGPMJfJoQhCRTsC7OFNojlPVURn2/wu4D0gFDgKDVHWnJ2Myl2fjzv0snzSSB5OmE+SXgja7H78bnoGiZbwdmjHmMnksIYiIL/AB0BGIBaJFJEpVY9yKrQUiVfWUiDwIvAH09FRM5tKlpaWxYPr/US/mbQbLYQ5V6Uhw11FQrpa3QzPGXCGerCE0B7aq6jYAEZkCdAHOJgRV/cWt/HKgrwfjMZfo4MZfSJj5FO1T/mRnQG2O3/kp5a65wdthGWOuME8mhMrAbrf1WKDFBcoPBuZktkNEhgBDAKpWrXql4jPZObKNPdOfJmTPPNK0DCsbv0Kzzg8iPr7ejswY4wF5olNZRPoCkcD1me1X1bHAWIDIyEjNxdAKp8SjnJ7/Or6rPqGk+jI5uB/X9RtB80rlvB2ZMcaDPJkQ4gD3J5JCXdvOISIdgH8D16vqaQ/GY7KTlgLRn5Ly82v4JyfwdVo7Tlz7NANuaoGfr42UbkxB58mEEA3UFpEaOImgF9DHvYCINAY+Bjqp6gEPxmIuRBU2f0/6vBfwOfIXK9IbMqHYCB7p042IKqW8HZ0xJpd4LCGoaqqIPALMxbntdLyqbhSRkcAqVY0CRgPBwNfiPMi0S1U7eyomk4k962Duv2HnEmJ9QhmePIzKkZ0Zc3t9ihbJEy2Kxphc4tG/eFX9Hvg+w7bhbssdPHl9cwHH9sD8/6C/Tea0fylGpQ1ijs9NvNK3CR3qV/R2dMYYL7CvgIXN6RPw6xhYOgbVNGYX78FzBzvS7JrqzO4eTvniAd6O0BjjJZYQCov0NFg3CX5+GU7sY0/ordwXdyvb4svxfNf63NOiKmLjDxlTqFlCKAz++gXmPQ/7fyctJJL/Kz+ctzaVIqxySb7rFcHV5YO9HaExJg+whFCQHdwM816ALXOhVFX+uv49Bq6sTFx8Eo/cUIt/dqiNv91OaoxxsYRQEJ08BAteg1WfQZFipLV/ifdO3siYebupXFqY9kArIqvbYHTGmHNZQihIUpJg5cew6E1IPgmRg9gR9ij/jNrNb7G7uatpKC/eUZ/igf7ejtQYkwdZQigIVGHjDPhpBMTvgto3ox1HMml7EC9/sokifj783z1NuDXsKm9HaozJwywh5Dfrp8H8kZAQCyVDIaIv/DUfYldCxTDoP4tDFVrx9PT1zP/jL66rVY43725EpZKB3o7cGJPHWULIT9ZPg28fg5REZz1hNyx8DQJKQOf3IaIP8zcf4ul3FnEsKZXht9dn4LXV8fGx20mNMdmzhJCfzH/p72TgLqA4pxr25pVZMUxcsYtrKhVn4n0tqVupeO7HaIzJtywh5HVpKbB9EcTMcpqJMqHH9nD7mCVsP3ySIW1rMvSmOgT42ZwFxpiLYwkhL0o9DdsWOkngj9mQFA9FgsG/KKScOq/4Hi1LYkoaEwe34NpaNmeBMebSWELIK1KSnM7hmCjYPAdOJzh9A3Vvhfpd4OobiZ7zOQ1XP0+QJJ897JQWYUqJgfzwYFtKFrXbSY0xl84Sgjcln4KtPzo1gT/nQvIJCCwF9e5wkkDN68Hv78Hm/hlTi8iU+3jKbxohcpg9WpY3UnuwOuVahloyMMZcJksIue30cdgyz0kCW350moCKloWwu5wkUL0N+Dof7qlp6WzcHc+ybYdZvu0we+KTiOI6opKvO+eUEp/kjXdijClgLCHkhqQE2PyDkwT+mg+pSRBcESL6OEmg6rXg60dqWjoxe4+x7C8nAUTvOMqJ06kA1KoQTLEivpxMTjvv9CGlgnL7HRljCiCPJgQR6QS8izNj2jhVHZVhf1vgHSAc6KWq0z0ZT646dcTpC4iZBdt+gbRkKB4CTQc6SaBKC9LwIWbPMZYt3cnybUeI3n6E464EULN8MbpEhNCyZlla1CxDheKBzFwbx7MzNpCY8ndSCPL3ZdjNdb30Jo0xBYnHEoKI+AIfAB2BWCBaRKJUNcat2C5gIPCkp+LIVScPwR/fOUlg+0JIT4WSVaH5EKjflbSQJmzad8KpAfyyhpXuCaBcMe5wJYCWNcpQocT5TxZ3bVwZgNFzN7MnPpGQUkEMu7nu2e3GGHM5PFlDaA5sVdVtACIyBegCnE0IqrrDtS/dg3F41vH98Me3ThLYsQQ0HUrXgFaPkFavC5uoyfLtR1j+82FWbP+J40lOAqhRrhi3NwqhZc0ytKxZloqZJIDMdG1c2RKAMcYjPJkQKgO73dZjgRaXciIRGQIMAahaterlR3a5ju2BTa4ksPNXQKFsbbT1v9hWoT0LEiqxbNsRVi49yLGkvQBUL1uU28KuotXVZWlRo6yNLWSMyXPyRaeyqo4FxgJERkaqV4KI3+U8I7ApCnavcOKqUJ+DTZ9gif+1zNlfipVLj5KQeBQ4SrWyRbmloSsB1CzDVSWt49cYk7d5MiHEAVXc1kNd2/KPI9ucJBAzC/asASCpbAM2Xv0ws05HErUnmPhdKUAyVcuc4OYGFc/WAOzOH2NMfuPJhBAN1BaRGjiJoBfQx4PXuzIObYWYmU4S2LcegIMlGrC4zH18djScDXHlIA6qlAmiY72yTifw1WWpbAnAGJPPeSwhqGqqiDwCzMW57XS8qm4UkZHAKlWNEpFmwP+A0sAdIvKSqjbwVExZOvAHxMxCY2YiB5w+778C6zNTBvC/pCbEHihPaOkgWtYvy4CaZWlZswyhpYvmepjGGONJouqdJvlLFRkZqatWrbqoY6KjPqbKmtFU0IMckPLsbvIkzZq3RjfOJHnDTALit5KOsI5riEppxg9pzfAtFep8+3fdBVSljCUAY0z+JSKrVTXyQmXyRafy5YiO+vjvAeEEKnGQCqufhjWQjg+r0uoxJ/1e1hW7jjq1atGyZlkGWwIwxhRCBT4hVFkz+pzRQQF8BI5qMP+tM5GwOlczpGZZqpQJQsRmFjPGFF4FPiFU0IOQyed8SU7yn3va5Xo8xhiTV/l4OwBPOyDls9huE8kYY4y7Ap8QdjcZRqIWOWdbohZhd5NhXorIGGPypgKfEJp1foDfm77MPsqTrsI+yvN705dp1vkBb4dmjDF5SqG47dQYYwq7nNx2WuBrCMYYY3LGEoIxxhjAEoIxxhgXSwjGGGMASwjGGGNc8t1dRiJyENiZYXM54JAXwrkYeT3GvB4f5P0Y83p8YDFeCXk9Psg8xmqqmvmTui75LiFkRkRWZXc7lbfl9RjzenyQ92PM6/GBxXgl5PX44NJjtCYjY4wxgCUEY4wxLgUlIYz1dgA5kNdjzOvxQd6PMa/HBxbjlZDX44NLjLFA9CEYY4y5fAWlhmCMMeYyWUIwxhgD5LOEICKdRGSziGwVkWcy2f8vEYkRkfUiMl9EquWl+NzKdRcRFZFcv3UtJzGKSA/Xz3GjiEzKazGKSFUR+UVE1rp+17fmcnzjReSAiPyexX4RkTGu+NeLSJM8Ft89rrg2iMivItIoN+PLSYxu5ZqJSKqI3JVbsbmum218ItJORNa5/k4W5mZ8rutn93suKSLfishvrhjvzfakqpovXoAv8BdQEygC/AbUz1DmBqCoa/lBYGpeis9VrjiwCFgORObBn2FtYC1Q2rVeIQ/GOBZ40LVcH9iRyzG2BZoAv2ex/1ZgDs7krS2BFXksvmvdfr+35HZ8OYnR7f/Cz8D3wF15KT6gFBADVHWt5+rfSQ5jfA543bVcHjgCFLnQOfNTDaE5sFVVt6lqMjAF6OJeQFV/UdVTrtXlQGheis/lP8DrQFIuxnZGTmK8H/hAVY8CqOqBPBijAiVcyyWBPbkYH6q6COePKytdgC/UsRwoJSJX5U502cenqr+e+f2S+38nZ2LI7mcI8CjwDZDb/wdzEl8fYIaq7nKVz4sxKlBcRAQIdpVNvdA581NCqAzsdluPdW3LymCcb2m5Jdv4XE0HVVT1u1yMy11OfoZ1gDoislRElotIp1yLzpGTGEcAfUUkFufb46O5E1qOXez/VW/K7b+THBGRykA34ENvx5KFOkBpEVkgIqtFpL+3A8rE+0A9nC9MG4B/qmr6hQ7wy42ocpuI9AUigeu9HcsZIuID/BcY6OVQsuOH02zUDueb4yIRCVPVeG8GlUFvYIKqviUirYAvRaRhdv/ZzblE5AachHCdt2PJxDvA06qa7nzBzXP8gKZAeyAIWCYiy1X1T++GdY6bgXXAjcDVwI8islhVj2V1QH5KCHFAFbf1UNe2c4hIB+DfwPWqejqXYoPs4ysONAQWuP6DVwKiRKSzqubWnKA5+RnG4rQppwDbReRPnAQRnTsh5ijGwUAnAFVdJiKBOIN55Xq1PQs5+r/qTSISDowDblHVw96OJxORwBTX30o54FYRSVXVmV6N6m+xwGFVPQmcFJFFQCMgLyWEe4FR6nQibBWR7cA1wMqsDshPTUbRQG0RqSEiRYBeQJR7ARFpDHwMdPZCm94F41PVBFUtp6rVVbU6TtttbiaDbGN0mYlTO0BEyuFUjbflsRh34XwzQ0TqAYHAwVyMMTtRQH/X3UYtgQRV3evtoM4QkarADKBfHvtGe5aq1nD7W5kOPJSHkgHALOA6EfETkaJAC2CTl2PKyP3vpCJQl2z+lvNNDUFVU0XkEWAuzt0H41V1o4iMBFapahQwGqfz5GvXN4tdqto5D8XnVTmMcS5wk4jEAGnAsNz8BpnDGIcCn4jIEzgdZwNd34JyhYhMxkma5Vz9GC8C/q74P8Lp17gV2AqcwvmmlmtyEN9woCzwf66/k1TN5dE7cxCjV2UXn6puEpEfgPVAOjBOVS94C21ux4hzA8sEEdmAc8fb06p6wWG7begKY4wxQP5qMjLGGONBlhCMMcYAlhCMMca4WEIwxhgDWEIwxhjjYgnB5Hsi0lWc0WOv8XYsl0pE2rhGpFwnIkEiMtq1PvoSzvWcJ2I0BZ/ddmryPRGZCoQAP6vqi1fgfL6qmnb5kV3UNT8ClqjqV671BKDMpcQhIidUNfhKx2gKPqshmHxNRIJxxuIZjPNU85n5FL52K9NORGa7lm8SkWUiskZEvnYdj4jsEJHXRWQNcLeI3C8i0a6x5L9xPY2KiFztGvRvg4i8LCIn3K4zzHXMehF5KYt4z7u+iNwH9AD+IyITRSQK5wHL1SLSU0TKu2KIdr1an3nvIvKZK5b14syzMQoIctU0Jl7xH7gp2HJj3G572ctTL+Ae4FPX8q84A4754Ty2X8y1/UOgL86YOIvctj8NDHct7wCecjtvWbfll4FHXcuzgd6u5X8AJ1zLN+HM0yA4X7RmA20zxHqh60/Abcz/M+d1LU8CrnMtVwU2uZZfB95xK1c647H2stfFvPLN0BXGZKE38K5reQrOh/Vq17ACd4jIdOA24Cmc0W/rA0tdQzYUAZa5nWuq23JDEXkZZyKUYJyhNABaAV1dy5OAN13LN7lea13rwTiDAi5yO2fLbK6flQ5Affl71M8SrppNB1y1IgD9e44DYy6JJQSTb4lIGZyhfcNERHHGPlIRGYaTHB7BmRRklaoeF+cT9UdV7Z3FKU+6LU8AuqrqbyIyENeAfxcKB3hNVT/OpsyFrp8VH6Clqp4zqZLkzWGhTT5mfQgmP7sL+FJVq6kzMmYVYDvQBliIM73g/TjJAZwRZluLSC0AESkmInWyOHdxYK+I+OM0S52xHOjuWu7ltn0uMMitT6KyiFTIcM6Lub67ebhNAiQiEa7FH4GH3baXdi2muOI25qJYQjD5WW/gfxm2fYPTbJSG045/i+tfVPUgzgRFk0VkPU5zTVa3qr4ArACWAn+4bX8c+Jfr+FpAguvc83CakJa5RpecjpNUzrrI67t7DIh0dRzH4PRdgNO3UVpEfheR33DmFAenL2O9dSqbi2W3nRpzEVx3GyWqqopIL5zkk9nc2cbkO9aHYMzFaQq87+qPiAcGeTccY64cqyEYY4wBrA/BGGOMiyUEY4wxgCUEY4wxLpYQjDHGAJYQjDHGuPw/VKdbl3ld42AAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pd.DataFrame(\n", + " {\n", + " \"Average effect\": effects,\n", + " \"Simulated power\": pw_simulated_line.values(),\n", + " \"Normal power\": pw_normal_line.values(),\n", + " }\n", + ").plot(\n", + " x=\"Average effect\",\n", + " y=[\"Simulated power\", \"Normal power\"],\n", + " title=\"Power analysis\",\n", + " xlabel=\"Average effect\",\n", + " ylabel=\"Power\",\n", + " marker=\"o\",\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.6" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "29c447d2129f0d56b23b7ba3abc571cfa9d42454e0e2bba301a881797dc4c0e2" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/mkdocs.yml b/mkdocs.yml index c48ddfe..989a21c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,6 +20,7 @@ nav: - Paired T test: paired_ttest.ipynb - Different hypotheses tests: analysis_with_different_hypotheses.ipynb - Washover: washover_example.ipynb + - Normal Power: normal_power.ipynb - API: - Experiment analysis: api/experiment_analysis.md - Perturbators: api/perturbator.md diff --git a/setup.py b/setup.py index dc8a811..5ca1386 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ setup( name="cluster_experiments", - version="0.14.1", + version="0.15.0", packages=find_packages(), extras_require={ "dev": dev_packages, diff --git a/tests/power_analysis/conftest.py b/tests/power_analysis/conftest.py index 8444f14..53af046 100644 --- a/tests/power_analysis/conftest.py +++ b/tests/power_analysis/conftest.py @@ -1,10 +1,15 @@ from datetime import date, timedelta import numpy as np +import pandas as pd import pytest from cluster_experiments.cupac import TargetAggregation -from cluster_experiments.experiment_analysis import GeeExperimentAnalysis +from cluster_experiments.experiment_analysis import ( + ClusteredOLSAnalysis, + GeeExperimentAnalysis, + MLMExperimentAnalysis, +) from cluster_experiments.perturbator import ConstantPerturbator from cluster_experiments.power_analysis import PowerAnalysis from cluster_experiments.random_splitter import ( @@ -36,6 +41,27 @@ def df(clusters, dates): return generate_random_data(clusters, dates, N) +@pytest.fixture +def correlated_df(): + _n_rows = 10_000 + _clusters = [f"Cluster {i}" for i in range(10)] + _dates = [f"{date(2022, 1, i):%Y-%m-%d}" for i in range(1, 15)] + df = pd.DataFrame( + { + "cluster": np.random.choice(_clusters, size=_n_rows), + "date": np.random.choice(_dates, size=_n_rows), + } + ).assign( + # Target is a linear combination of cluster and day of week, plus some noise + cluster_id=lambda df: df["cluster"].astype("category").cat.codes, + day_of_week=lambda df: pd.to_datetime(df["date"]).dt.dayofweek, + target=lambda df: df["cluster_id"] + + df["day_of_week"] + + np.random.normal(size=_n_rows), + ) + return df + + @pytest.fixture def df_feats(clusters, dates): df = generate_random_data(clusters, dates, N) @@ -61,6 +87,20 @@ def analysis_gee_vainilla(): ) +@pytest.fixture +def analysis_clusterd_ols(): + return ClusteredOLSAnalysis( + cluster_cols=["cluster", "date"], + ) + + +@pytest.fixture +def analysis_mlm(): + return MLMExperimentAnalysis( + cluster_cols=["cluster", "date"], + ) + + @pytest.fixture def analysis_gee(): return GeeExperimentAnalysis( @@ -102,6 +142,7 @@ def switchback_power_analysis(perturbator, analysis_gee_vainilla): splitter=sw, analysis=analysis_gee_vainilla, n_simulations=3, + seed=123, ) diff --git a/tests/power_analysis/test_normal_power_analysis.py b/tests/power_analysis/test_normal_power_analysis.py new file mode 100644 index 0000000..630c25b --- /dev/null +++ b/tests/power_analysis/test_normal_power_analysis.py @@ -0,0 +1,253 @@ +import pytest + +from cluster_experiments.experiment_analysis import ClusteredOLSAnalysis, OLSAnalysis +from cluster_experiments.perturbator import ConstantPerturbator +from cluster_experiments.power_analysis import NormalPowerAnalysis, PowerAnalysis +from cluster_experiments.random_splitter import ClusteredSplitter, NonClusteredSplitter + + +def test_aa_power_analysis(df, analysis_gee_vainilla): + # given + sw = ClusteredSplitter( + cluster_cols=["cluster", "date"], + ) + + pw = NormalPowerAnalysis( + splitter=sw, + analysis=analysis_gee_vainilla, + n_simulations=3, + seed=20240922, + ) + # when + power = pw.power_analysis(df) + # then + assert abs(power - 0.05) < 0.01 + + +def test_normal_power_sorted(df, analysis_mlm): + # given + sw = ClusteredSplitter( + cluster_cols=["cluster", "date"], + ) + + pw = NormalPowerAnalysis( + splitter=sw, + analysis=analysis_mlm, + n_simulations=1, + seed=20240922, + ) + + # when + power = pw.power_line(df, average_effects=[0.05, 0.1, 0.2]) + # then + assert power[0.05] < power[0.1] + assert power[0.1] < power[0.2] + + +def test_left_power_analysis(df): + # given + sw = NonClusteredSplitter() + + pw = NormalPowerAnalysis( + splitter=sw, + analysis=OLSAnalysis(), + n_simulations=3, + seed=20240922, + ) + + pw_left = NormalPowerAnalysis( + splitter=sw, + analysis=OLSAnalysis( + hypothesis="greater", + ), + n_simulations=3, + seed=20240922, + ) + + # when + power = pw.power_line(df, average_effects=[0.05, 0.1, 0.2]) + power_left = pw_left.power_line(df, average_effects=[0.05, 0.1, 0.2]) + + # then + assert power[0.05] < power_left[0.05] + assert power[0.1] < power_left[0.1] + assert power[0.2] < power_left[0.2] + + +def test_right_power_analysis(df): + # given + sw = NonClusteredSplitter() + + pw = NormalPowerAnalysis( + splitter=sw, + analysis=OLSAnalysis(), + n_simulations=3, + seed=20240922, + ) + + pw_right = NormalPowerAnalysis( + splitter=sw, + analysis=OLSAnalysis( + hypothesis="less", + ), + n_simulations=3, + seed=20240922, + ) + + # when + power = pw.power_line(df, average_effects=[0.05, 0.1, 0.2]) + power_right = pw_right.power_line(df, average_effects=[0.05, 0.1, 0.2]) + + # then + assert power[0.05] > power_right[0.05] + assert power[0.1] > power_right[0.1] + assert power[0.2] > power_right[0.2] + + +@pytest.mark.parametrize( + "ols, splitter, effect", + [ + (OLSAnalysis(), NonClusteredSplitter(), 0.1), + (OLSAnalysis(), NonClusteredSplitter(), 0.2), + (OLSAnalysis(hypothesis="greater"), NonClusteredSplitter(), 0.1), + (OLSAnalysis(hypothesis="less"), NonClusteredSplitter(), 0.1), + ( + ClusteredOLSAnalysis( + cluster_cols=["cluster", "date"], + ), + ClusteredSplitter(cluster_cols=["cluster", "date"]), + 0.1, + ), + ( + ClusteredOLSAnalysis( + cluster_cols=["cluster", "date"], + ), + ClusteredSplitter(cluster_cols=["cluster", "date"]), + 0.2, + ), + ], +) +def test_power_sim_compare(df, ols, splitter, effect): + # given + perturbator = ConstantPerturbator() + + pw = PowerAnalysis( + splitter=splitter, + analysis=ols, + perturbator=perturbator, + n_simulations=200, + seed=20240922, + ) + + pw_normal = NormalPowerAnalysis( + splitter=splitter, + analysis=ols, + n_simulations=5, + seed=20240922, + ) + + # when + power = pw.power_line(df, average_effects=[effect]) + power_normal = pw_normal.power_line(df, average_effects=[effect]) + + # then + assert abs(power[effect] - power_normal[effect]) < 0.05 + + +@pytest.mark.parametrize( + "ols, splitter, effect", + [ + ( + ClusteredOLSAnalysis( + cluster_cols=["cluster", "date"], + ), + ClusteredSplitter(cluster_cols=["cluster", "date"]), + 0.2, + ), + ( + ClusteredOLSAnalysis( + cluster_cols=["cluster", "date"], + ), + ClusteredSplitter(cluster_cols=["cluster", "date"]), + 0.5, + ), + ( + # using a covariate + ClusteredOLSAnalysis( + cluster_cols=["cluster", "date"], covariates=["cluster_id"] + ), + ClusteredSplitter(cluster_cols=["cluster", "date"]), + 0.5, + ), + ], +) +def test_power_sim_compare_cluster(correlated_df, ols, splitter, effect): + # given + perturbator = ConstantPerturbator() + + pw = PowerAnalysis( + splitter=splitter, + analysis=ols, + perturbator=perturbator, + n_simulations=200, + seed=20240922, + ) + + pw_normal = NormalPowerAnalysis( + splitter=splitter, + analysis=ols, + n_simulations=5, + seed=20240922, + ) + + # when + power = pw.power_line(correlated_df, average_effects=[effect]) + power_normal = pw_normal.power_line(correlated_df, average_effects=[effect]) + + # then + assert abs(power[effect] - power_normal[effect]) < 0.05 + + +def test_from_config(df): + # given + pw_normal = NormalPowerAnalysis.from_dict( + { + "splitter": "non_clustered", + "analysis": "ols", + "n_simulations": 5, + "seed": 20240922, + } + ) + + pw_normal_default = NormalPowerAnalysis( + splitter=NonClusteredSplitter(), + analysis=OLSAnalysis(), + n_simulations=5, + seed=20240922, + ) + + # when + power_normal = pw_normal.power_line(df, average_effects=[0.1]) + power_normal_default = pw_normal_default.power_line(df, average_effects=[0.1]) + + # then + assert abs(power_normal[0.1] - power_normal_default[0.1]) < 0.03 + + +def test_get_standard_error_hypothesis_wrong_input(): + # Check if the ValueError is raised when the hypothesis is not valid + with pytest.raises(ValueError) as excinfo: + NormalPowerAnalysis( + splitter=NonClusteredSplitter(), + analysis=OLSAnalysis( + hypothesis="greaters", + ), + n_simulations=3, + seed=20240922, + )._normal_power_calculation( + alpha=0.05, + std_error=0.1, + average_effect=0.1, + ) + # Check if the error message is as expected + assert "'greaters' is not a valid HypothesisEntries" in str(excinfo.value) diff --git a/tests/test_docs.py b/tests/test_docs.py index eb016a4..fbc8927 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -18,6 +18,7 @@ MLMExperimentAnalysis, NonClusteredSplitter, NormalPerturbator, + NormalPowerAnalysis, OLSAnalysis, PairedTTestClusteredAnalysis, Perturbator, @@ -48,6 +49,7 @@ OLSAnalysis, Perturbator, PowerAnalysis, + NormalPowerAnalysis, PowerConfig, RandomSplitter, StratifiedClusteredSplitter,