Finished docstrings and checked documentation results

pymc-labs · Aug 23, 2023 · f8c1262 · f8c1262
1 parent 49a0144
commit f8c1262
Show file tree

Hide file tree

Showing 3 changed files with 182 additions and 8 deletions.
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -7,6 +7,7 @@
 Models are intended to be used from inside an experiment
 class (see pymc_experiments.py). This is why the examples require some extra
 manipulation input data, often to ensure `y` has the correct shape.
+
 """
 from typing import Any, Dict, Optional
 

diff --git a/causalpy/skl_experiments.py b/causalpy/skl_experiments.py
@@ -1,5 +1,12 @@
 """
 Experiments for Scikit-Learn models
+
+- ExperimentalDesign: base class for skl experiments
+- PrePostFit: base class for synthetic control and interrupted time series
+- SyntheticControl
+- InterruptedTimeSeries
+- DifferenceInDifferences
+- RegressionDiscontinuity
 """
 import warnings
 from typing import Optional
@@ -27,8 +34,33 @@ def __init__(self, model=None, **kwargs):
 
 
 class PrePostFit(ExperimentalDesign):
-    """A class to analyse quasi-experiments where parameter estimation is based on just
-    the pre-intervention data."""
+    """
+    A class to analyse quasi-experiments where parameter estimation is based on just
+    the pre-intervention data.
+
+    :param data:
+        A pandas data frame
+    :param treatment_time:
+        The index or time value of when treatment begins
+    :param formula:
+        A statistical model formula
+    :param model:
+        An sklearn model object
+
+    Example
+    --------
+    >>> from sklearn.linear_model import LinearRegression
+    >>> import causalpy as cp
+    >>> df = cp.load_data("sc")
+    >>> treatment_time = 70
+    >>> result = cp.skl_experiments.PrePostFit(
+    ...     df,
+    ...     treatment_time,
+    ...     formula="actual ~ 0 + a + b + c + d + e + f + g",
+    ...     model = cp.skl_models.WeightedProportion()
+    ... )
+
+    """
 
     def __init__(
         self,
@@ -144,7 +176,16 @@ def plot(self, counterfactual_label="Counterfactual", **kwargs):
         return (fig, ax)
 
     def get_coeffs(self):
-        """Returns model coefficients"""
+        """
+        Returns model coefficients
+
+        Example
+        --------
+        >>> result.get_coeffs()
+        array([3.97370896e-01, 1.53881980e-01, 4.48747123e-01, 1.04639857e-16,
+        0.00000000e+00, 0.00000000e+00, 2.92931287e-16])
+
+        """
         return np.squeeze(self.model.coef_)
 
     def plot_coeffs(self):
@@ -161,13 +202,68 @@ def plot_coeffs(self):
 
 
 class InterruptedTimeSeries(PrePostFit):
-    """Interrupted time series analysis"""
+    """
+    Interrupted time series analysis, a wrapper around the PrePostFit class
+
+    :param data:
+        A pandas data frame
+    :param treatment_time:
+        The index or time value of when treatment begins
+    :param formula:
+        A statistical model formula
+    :param model:
+        An sklearn model object
+
+    Example
+    --------
+    >>> from sklearn.linear_model import LinearRegression
+    >>> import pandas as pd
+    >>> import causalpy as cp
+    >>> df = (
+    ...     cp.load_data("its")
+    ...     .assign(date=lambda x: pd.to_datetime(x["date"]))
+    ...     .set_index("date")
+    ... )
+    >>> treatment_time = pd.to_datetime("2017-01-01")
+    >>> result = cp.skl_experiments.InterruptedTimeSeries(
+    ...     df,
+    ...     treatment_time,
+    ...     formula="y ~ 1 + t + C(month)",
+    ...     model = LinearRegression()
+    ... )
+
+    """
 
     expt_type = "Interrupted Time Series"
 
 
 class SyntheticControl(PrePostFit):
-    """A wrapper around the PrePostFit class"""
+    """
+    A wrapper around the PrePostFit class
+
+    :param data:
+        A pandas data frame
+    :param treatment_time:
+        The index or time value of when treatment begins
+    :param formula:
+        A statistical model formula
+    :param model:
+        An sklearn model object
+
+    Example
+    --------
+    >>> from sklearn.linear_model import LinearRegression
+    >>> import causalpy as cp
+    >>> df = cp.load_data("sc")
+    >>> treatment_time = 70
+    >>> result = cp.skl_experiments.SyntheticControl(
+    ...     df,
+    ...     treatment_time,
+    ...     formula="actual ~ 0 + a + b + c + d + e + f + g",
+    ...     model = cp.skl_models.WeightedProportion()
+    ... )
+
+    """
 
     def plot(self, plot_predictors=False, **kwargs):
         """Plot the results"""
@@ -187,6 +283,32 @@ class DifferenceInDifferences(ExperimentalDesign):
 
         There is no pre/post intervention data distinction for DiD, we fit all the data
         available.
+
+    :param data:
+        A pandas data frame
+    :param formula:
+        A statistical model formula
+    :param time_variable_name:
+        Name of the data column for the time variable
+    :param group_variable_name:
+        Name of the data column for the group variable
+    :param model:
+        A PyMC model for difference in differences
+
+    Example
+    --------
+    >>> df = cp.load_data("did")
+    >>> seed = 42
+    >>> result = cp.skl_experiments.DifferenceInDifferences(
+    ...     data,
+    ...     formula="y ~ 1 + group*post_treatment",
+    ...     time_variable_name="t",
+    ...     group_variable_name="group",
+    ...     treated=1,
+    ...     untreated=0,
+    ...     model=LinearRegression(),
+    ... )
+
     """
 
     def __init__(
@@ -373,6 +495,17 @@ class RegressionDiscontinuity(ExperimentalDesign):
     :param bandwidth:
         Data outside of the bandwidth (relative to the discontinuity) is not used to fit
         the model.
+
+    Example
+    --------
+    >>> data = cp.load_data("rd")
+    >>> result = cp.skl_experiments.RegressionDiscontinuity(
+    ...     data,
+    ...     formula="y ~ 1 + x + treated",
+    ...     model=LinearRegression(),
+    ...     treatment_threshold=0.5,
+    ... )
+
     """
 
     def __init__(
@@ -503,7 +636,24 @@ def plot(self):
         return (fig, ax)
 
     def summary(self):
-        """Print text output summarising the results"""
+        """
+        Print text output summarising the results
+
+        Example
+        --------
+        >>> result.summary()
+        Difference in Differences experiment
+        Formula: y ~ 1 + x + treated
+        Running variable: x
+        Threshold on running variable: 0.5
+        Results:
+        Discontinuity at threshold = 0.19
+        Model coefficients:
+                Intercept		0.0
+                treated[T.True]		0.19034196317793994
+                x		1.229600855360073
+
+        """
         print("Difference in Differences experiment")
         print(f"Formula: {self.formula}")
         print(f"Running variable: {self.running_variable_name}")

diff --git a/causalpy/skl_models.py b/causalpy/skl_models.py
@@ -1,8 +1,8 @@
 """
 Scikit-Learn Models
 
-Includes:
-1. Weighted Proportion
+- Weighted Proportion
+
 """
 from functools import partial
 
@@ -21,6 +21,29 @@ class WeightedProportion(LinearModel, RegressorMixin):
 
     Inspiration taken from this blog post
     https://towardsdatascience.com/understanding-synthetic-control-methods-dd9a291885a1
+
+    Example
+    --------
+    >>> rng = np.random.default_rng(seed=42)
+    >>> X = rng.normal(loc=0, scale=1, size=(20,2))
+    >>> y = rng.normal(loc=0, scale=1, size=(20,))
+    >>> wp.fit(X, y)
+    WeightedProportion()
+    >>> wp.coef_
+    array([[0.36719946, 0.63280054]])
+    >>> X_new = rng.normal(loc=0, scale=1, size=(10,2))
+    >>> wp.predict(X_new)
+    array([[-0.8298643 ],
+       [ 0.43072465],
+       [ 0.76319257],
+       [-0.42062812],
+       [ 0.1939908 ],
+       [-1.18557609],
+       [-0.0230188 ],
+       [ 0.48923816],
+       [-0.05656294],
+       [ 0.0339618 ]])
+
     """
 
     def loss(self, W, X, y):