Skip to content

Commit

Permalink
Finished docstrings and checked documentation results
Browse files Browse the repository at this point in the history
  • Loading branch information
jpreszler committed Aug 23, 2023
1 parent 49a0144 commit f8c1262
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 8 deletions.
1 change: 1 addition & 0 deletions causalpy/pymc_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Models are intended to be used from inside an experiment
class (see pymc_experiments.py). This is why the examples require some extra
manipulation input data, often to ensure `y` has the correct shape.
"""
from typing import Any, Dict, Optional

Expand Down
162 changes: 156 additions & 6 deletions causalpy/skl_experiments.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
"""
Experiments for Scikit-Learn models
- ExperimentalDesign: base class for skl experiments
- PrePostFit: base class for synthetic control and interrupted time series
- SyntheticControl
- InterruptedTimeSeries
- DifferenceInDifferences
- RegressionDiscontinuity
"""
import warnings
from typing import Optional
Expand Down Expand Up @@ -27,8 +34,33 @@ def __init__(self, model=None, **kwargs):


class PrePostFit(ExperimentalDesign):
"""A class to analyse quasi-experiments where parameter estimation is based on just
the pre-intervention data."""
"""
A class to analyse quasi-experiments where parameter estimation is based on just
the pre-intervention data.
:param data:
A pandas data frame
:param treatment_time:
The index or time value of when treatment begins
:param formula:
A statistical model formula
:param model:
An sklearn model object
Example
--------
>>> from sklearn.linear_model import LinearRegression
>>> import causalpy as cp
>>> df = cp.load_data("sc")
>>> treatment_time = 70
>>> result = cp.skl_experiments.PrePostFit(
... df,
... treatment_time,
... formula="actual ~ 0 + a + b + c + d + e + f + g",
... model = cp.skl_models.WeightedProportion()
... )
"""

def __init__(
self,
Expand Down Expand Up @@ -144,7 +176,16 @@ def plot(self, counterfactual_label="Counterfactual", **kwargs):
return (fig, ax)

def get_coeffs(self):
"""Returns model coefficients"""
"""
Returns model coefficients
Example
--------
>>> result.get_coeffs()
array([3.97370896e-01, 1.53881980e-01, 4.48747123e-01, 1.04639857e-16,
0.00000000e+00, 0.00000000e+00, 2.92931287e-16])
"""
return np.squeeze(self.model.coef_)

def plot_coeffs(self):
Expand All @@ -161,13 +202,68 @@ def plot_coeffs(self):


class InterruptedTimeSeries(PrePostFit):
"""Interrupted time series analysis"""
"""
Interrupted time series analysis, a wrapper around the PrePostFit class
:param data:
A pandas data frame
:param treatment_time:
The index or time value of when treatment begins
:param formula:
A statistical model formula
:param model:
An sklearn model object
Example
--------
>>> from sklearn.linear_model import LinearRegression
>>> import pandas as pd
>>> import causalpy as cp
>>> df = (
... cp.load_data("its")
... .assign(date=lambda x: pd.to_datetime(x["date"]))
... .set_index("date")
... )
>>> treatment_time = pd.to_datetime("2017-01-01")
>>> result = cp.skl_experiments.InterruptedTimeSeries(
... df,
... treatment_time,
... formula="y ~ 1 + t + C(month)",
... model = LinearRegression()
... )
"""

expt_type = "Interrupted Time Series"


class SyntheticControl(PrePostFit):
"""A wrapper around the PrePostFit class"""
"""
A wrapper around the PrePostFit class
:param data:
A pandas data frame
:param treatment_time:
The index or time value of when treatment begins
:param formula:
A statistical model formula
:param model:
An sklearn model object
Example
--------
>>> from sklearn.linear_model import LinearRegression
>>> import causalpy as cp
>>> df = cp.load_data("sc")
>>> treatment_time = 70
>>> result = cp.skl_experiments.SyntheticControl(
... df,
... treatment_time,
... formula="actual ~ 0 + a + b + c + d + e + f + g",
... model = cp.skl_models.WeightedProportion()
... )
"""

def plot(self, plot_predictors=False, **kwargs):
"""Plot the results"""
Expand All @@ -187,6 +283,32 @@ class DifferenceInDifferences(ExperimentalDesign):
There is no pre/post intervention data distinction for DiD, we fit all the data
available.
:param data:
A pandas data frame
:param formula:
A statistical model formula
:param time_variable_name:
Name of the data column for the time variable
:param group_variable_name:
Name of the data column for the group variable
:param model:
A PyMC model for difference in differences
Example
--------
>>> df = cp.load_data("did")
>>> seed = 42
>>> result = cp.skl_experiments.DifferenceInDifferences(
... data,
... formula="y ~ 1 + group*post_treatment",
... time_variable_name="t",
... group_variable_name="group",
... treated=1,
... untreated=0,
... model=LinearRegression(),
... )
"""

def __init__(
Expand Down Expand Up @@ -373,6 +495,17 @@ class RegressionDiscontinuity(ExperimentalDesign):
:param bandwidth:
Data outside of the bandwidth (relative to the discontinuity) is not used to fit
the model.
Example
--------
>>> data = cp.load_data("rd")
>>> result = cp.skl_experiments.RegressionDiscontinuity(
... data,
... formula="y ~ 1 + x + treated",
... model=LinearRegression(),
... treatment_threshold=0.5,
... )
"""

def __init__(
Expand Down Expand Up @@ -503,7 +636,24 @@ def plot(self):
return (fig, ax)

def summary(self):
"""Print text output summarising the results"""
"""
Print text output summarising the results
Example
--------
>>> result.summary()
Difference in Differences experiment
Formula: y ~ 1 + x + treated
Running variable: x
Threshold on running variable: 0.5
Results:
Discontinuity at threshold = 0.19
Model coefficients:
Intercept 0.0
treated[T.True] 0.19034196317793994
x 1.229600855360073
"""
print("Difference in Differences experiment")
print(f"Formula: {self.formula}")
print(f"Running variable: {self.running_variable_name}")
Expand Down
27 changes: 25 additions & 2 deletions causalpy/skl_models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
Scikit-Learn Models
Includes:
1. Weighted Proportion
- Weighted Proportion
"""
from functools import partial

Expand All @@ -21,6 +21,29 @@ class WeightedProportion(LinearModel, RegressorMixin):
Inspiration taken from this blog post
https://towardsdatascience.com/understanding-synthetic-control-methods-dd9a291885a1
Example
--------
>>> rng = np.random.default_rng(seed=42)
>>> X = rng.normal(loc=0, scale=1, size=(20,2))
>>> y = rng.normal(loc=0, scale=1, size=(20,))
>>> wp.fit(X, y)
WeightedProportion()
>>> wp.coef_
array([[0.36719946, 0.63280054]])
>>> X_new = rng.normal(loc=0, scale=1, size=(10,2))
>>> wp.predict(X_new)
array([[-0.8298643 ],
[ 0.43072465],
[ 0.76319257],
[-0.42062812],
[ 0.1939908 ],
[-1.18557609],
[-0.0230188 ],
[ 0.48923816],
[-0.05656294],
[ 0.0339618 ]])
"""

def loss(self, W, X, y):
Expand Down

0 comments on commit f8c1262

Please sign in to comment.