Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase docstring coverage and add doctests #232

Merged
merged 23 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
6bee429
Issue 129: increase docstring coverage, now at 86%
Aug 18, 2023
809277e
Examples for pymc experiments and most models
jpreszler Aug 22, 2023
fd92613
increased interrogate threshold, just need examples in skl experiemen…
jpreszler Aug 22, 2023
8a9800f
pymc models and experiments done and docs checked
jpreszler Aug 23, 2023
110505a
Finished docstrings and checked documentation results
jpreszler Aug 23, 2023
adb94c5
small fixes related to pr comments
jpreszler Aug 24, 2023
5c2870e
dcotest on pymc_models good except rng caused score differences
jpreszler Aug 31, 2023
2220ed0
doctest good on pymc experiments except for summaries
jpreszler Aug 31, 2023
5d6c8eb
doctest clean and added to github actions
jpreszler Sep 1, 2023
e392335
Added IV examples after rebasing
jpreszler Sep 4, 2023
76d970a
add statsmodels to dependencies
jpreszler Sep 5, 2023
b5e6dc6
increase draws and decrease precision on summaries
jpreszler Sep 5, 2023
2359721
more precision reduction
jpreszler Sep 6, 2023
7ea2c11
remove unneeded ellipsis options
jpreszler Sep 6, 2023
11aa925
fix formula rendering
jpreszler Sep 9, 2023
53f0fd0
fixed link and removed awkward `round` call
jpreszler Sep 10, 2023
df5ae62
fix minor formatting error
drbenvincent Sep 11, 2023
8cc283e
remove redundencies, clean up some wording
jpreszler Sep 12, 2023
8a28509
fix test failure and doc anomalies
jpreszler Sep 13, 2023
e2760bc
reduce precision on reg. discont.
jpreszler Sep 13, 2023
34d795d
add make doctest and instructions to contributing
jpreszler Sep 14, 2023
f37d3b3
turn down precision on PrePostFit doctest
jpreszler Sep 14, 2023
c80d78e
skip doctests that write simulated data to csv files
jpreszler Sep 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Run doctests
run: |
pip install -e .[test]
pytest --doctest-modules causalpy/
- name: Run tests
run: |
pip install -e .[test]
Expand Down
11 changes: 8 additions & 3 deletions causalpy/custom_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
"""
Custom Exceptions for CausalPy.
"""


class BadIndexException(Exception):
"""Custom exception used when we have a mismatch in types between the dataframe
index and an event, typically a treatment or intervention."""

def __init__(self, message):
def __init__(self, message: str):
self.message = message


class FormulaException(Exception):
"""Exception raised given when there is some error in a user-provided model
formula"""

def __init__(self, message):
def __init__(self, message: str):
self.message = message


class DataException(Exception):
"""Exception raised given when there is some error in user-provided dataframe"""

def __init__(self, message):
def __init__(self, message: str):
self.message = message
3 changes: 3 additions & 0 deletions causalpy/data/datasets.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Functions to load example datasets
"""
import pathlib

import pandas as pd
Expand Down
100 changes: 90 additions & 10 deletions causalpy/data/simulate_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Functions that generate data sets used in examples
"""
import numpy as np
import pandas as pd
from scipy.stats import dirichlet, gamma, norm, uniform
Expand All @@ -11,6 +14,18 @@
def _smoothed_gaussian_random_walk(
gaussian_random_walk_mu, gaussian_random_walk_sigma, N, lowess_kwargs
):
"""
Generates Gaussian random walk data and applies LOWESS

:param gaussian_random_walk_mu:
Mean of the random walk
:param gaussian_random_walk_sigma:
Standard deviation of the random walk
:param N:
Length of the random walk
:param lowess_kwargs:
Keyword argument dictionary passed to statsmodels lowess
"""
x = np.arange(N)
y = norm(gaussian_random_walk_mu, gaussian_random_walk_sigma).rvs(N).cumsum()
filtered = lowess(y, x, **lowess_kwargs)
Expand All @@ -26,12 +41,25 @@ def generate_synthetic_control_data(
lowess_kwargs=default_lowess_kwargs,
):
"""
Example:
>> import pathlib
>> df, weightings_true = generate_synthetic_control_data(
treatment_time=treatment_time
)
>> df.to_csv(pathlib.Path.cwd() / 'synthetic_control.csv', index=False)
Generates data for synthetic control example.

:param N:
Number fo data points
:param treatment_time:
Index where treatment begins in the generated data frame
jpreszler marked this conversation as resolved.
Show resolved Hide resolved
:param grw_mu:
Mean of Gaussian Random Walk
:param grw_sigma:
Standard deviation of Gaussian Random Walk
:lowess_kwargs:
Keyword argument dictionary passed to statsmodels lowess

Example
--------
>>> from causalpy.data.simulate_data import generate_synthetic_control_data
>>> df, weightings_true = generate_synthetic_control_data(
... treatment_time=70
... )
"""

# 1. Generate non-treated variables
Expand Down Expand Up @@ -70,6 +98,21 @@ def generate_synthetic_control_data(
def generate_time_series_data(
N=100, treatment_time=70, beta_temp=-1, beta_linear=0.5, beta_intercept=3
):
"""
Generates interrupted time series example data

:param N:
Length of the time series
:param treatment_time:
Index of when treatment begins
:param beta_temp:
The temperature coefficient
:param beta_linear:
The linear coefficient
:param beta_intercept:
The intercept

"""
x = np.arange(0, 100, 1)
df = pd.DataFrame(
{
Expand Down Expand Up @@ -99,6 +142,9 @@ def generate_time_series_data(


def generate_time_series_data_seasonal(treatment_time):
"""
Generates 10 years of monthly data with seasonality
"""
dates = pd.date_range(
start=pd.to_datetime("2010-01-01"), end=pd.to_datetime("2020-01-01"), freq="M"
)
Expand Down Expand Up @@ -146,6 +192,14 @@ def generate_time_series_data_simple(treatment_time, slope=0.0):


def generate_did():
"""
Generate Difference in Differences data

Example
--------
>>> from causalpy.data.simulate_data import generate_did
>>> df = generate_did()
"""
# true parameters
control_intercept = 1
treat_intercept_delta = 0.25
Expand All @@ -157,6 +211,7 @@ def generate_did():
def outcome(
t, control_intercept, treat_intercept_delta, trend, Δ, group, post_treatment
):
"""Compute the outcome of each unit"""
return (
control_intercept
+ (treat_intercept_delta * group)
Expand Down Expand Up @@ -191,16 +246,22 @@ def generate_regression_discontinuity_data(
N=100, true_causal_impact=0.5, true_treatment_threshold=0.0
):
"""
Example use:
>> import pathlib
>> df = generate_regression_discontinuity_data(true_treatment_threshold=0.5)
>> df.to_csv(pathlib.Path.cwd() / 'regression_discontinuity.csv', index=False)
Generate regression discontinuity example data

Example
--------
>>> import pathlib
>>> from causalpy.data.simulate_data import generate_regression_discontinuity_data
>>> df = generate_regression_discontinuity_data(true_treatment_threshold=0.5)
>>> df.to_csv(pathlib.Path.cwd() / 'regression_discontinuity.csv', index=False)
"""

def is_treated(x):
"""Check if x was treated"""
return np.greater_equal(x, true_treatment_threshold)

def impact(x):
"""Assign true_causal_impact to all treaated entries"""
y = np.zeros(len(x))
y[is_treated(x)] = true_causal_impact
return y
Expand All @@ -214,6 +275,21 @@ def impact(x):
def generate_ancova_data(
N=200, pre_treatment_means=np.array([10, 12]), treatment_effect=2, sigma=1
):
"""
Generate ANCOVA eample data

Example
--------
>>> import pathlib
>>> from causalpy.data.simulate_data import generate_ancova_data
>>> df = generate_ancova_data(
... N=200,
... pre_treatment_means=np.array([10, 12]),
... treatment_effect=2,
... sigma=1
... )
>>> df.to_csv(pathlib.Path.cwd() / 'ancova_data.csv', index=False)
"""
group = np.random.choice(2, size=N)
pre = np.random.normal(loc=pre_treatment_means[group])
post = pre + treatment_effect * group + np.random.normal(size=N) * sigma
Expand All @@ -233,6 +309,10 @@ def generate_geolift_data():
causal_impact = 0.2

def create_series(n=52, amplitude=1, length_scale=2):
"""
Returns numpy tile with generated seasonality data repeated over
multiple years
"""
return np.tile(
generate_seasonality(n=n, amplitude=amplitude, length_scale=2) + 3, n_years
)
Expand Down
21 changes: 20 additions & 1 deletion causalpy/plot_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Plotting utility functions.
"""

from typing import Any, Dict, Optional, Tuple, Union

import arviz as az
Expand All @@ -17,7 +21,22 @@ def plot_xY(
hdi_prob: float = 0.94,
label: Union[str, None] = None,
) -> Tuple[Line2D, PolyCollection]:
"""Utility function to plot HDI intervals."""
"""
Utility function to plot HDI intervals.

:param x:
Pandas datetime index or numpy array of x-axis values
:param y:
Xarray data array of y-axis data
:param ax:
Matplotlib ax object
:param plot_hdi_kwargs:
Dictionary of keyword arguments passed to ax.plot()
:param hdi_prob:
The size of the HDI, default is 0.94
:param label:
The plot label
"""

if plot_hdi_kwargs is None:
plot_hdi_kwargs = {}
Expand Down
Loading