Skip to content

Commit

Permalink
Merge branch 'main' into summary
Browse files Browse the repository at this point in the history
  • Loading branch information
drbenvincent committed Jun 19, 2024
2 parents 22f5008 + 4af4af6 commit 3207e03
Show file tree
Hide file tree
Showing 19 changed files with 6,683 additions and 29 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/rtd-link-preview.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Read the Docs Pull Request Preview
on:
pull_request_target:
types:
- opened

permissions:
pull-requests: write

jobs:
documentation-links:
runs-on: ubuntu-latest
steps:
- uses: readthedocs/actions/preview@v1
with:
project-slug: "causalpy"
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ _build
build/
dist/
docs/_build/
docs/build/
docs/jupyter_execute/
*.vscode
.coverage
*.jupyterlab-workspace
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ repos:
exclude_types: [svg]
- id: check-yaml
- id: check-added-large-files
exclude: &exclude_pattern 'iv_weak_instruments.ipynb'
args: ["--maxkb=1500"]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
rev: v0.4.9
hooks:
# Run the linter
- id: ruff
Expand Down
4 changes: 2 additions & 2 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ version: 2

# Set the version of Python and other tools you might need
build:
os: ubuntu-20.04
os: ubuntu-lts-latest
tools:
python: "3.10"
python: "3.11"
# You can also specify other tool versions:
# nodejs: "16"
# rust: "1.55"
Expand Down
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ lint:
check_lint:
ruff check .
ruff format --diff --check .
nbqa black --check .
nbqa ruff .
interrogate .

doctest:
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pip install git+https://github.com/pymc-labs/CausalPy.git

```python
import causalpy as cp

import matplotlib.pyplot as plt

# Import and process data
df = (
Expand All @@ -57,6 +57,8 @@ fig, ax = result.plot();

# Get a results summary
result.summary()

plt.show()
```

## Roadmap
Expand Down
1 change: 1 addition & 0 deletions causalpy/data/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"geolift1": {"filename": "geolift1.csv"},
"risk": {"filename": "AJR2001.csv"},
"nhefs": {"filename": "nhefs.csv"},
"schoolReturns": {"filename": "schoolingReturns.csv"},
}


Expand Down
3,011 changes: 3,011 additions & 0 deletions causalpy/data/schoolingReturns.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion causalpy/pymc_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1450,7 +1450,7 @@ def __init__(
"mus": [self.ols_beta_first_params, self.ols_beta_second_params],
"sigmas": [1, 1],
"eta": 2,
"lkj_sd": 2,
"lkj_sd": 1,
}
self.priors = priors
self.model.fit(
Expand Down
54 changes: 41 additions & 13 deletions causalpy/pymc_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,8 @@ class InstrumentalVariableRegression(ModelBuilder):
... "mus": [[-2,4], [0.5, 3]],
... "sigmas": [1, 1],
... "eta": 2,
... "lkj_sd": 2,
... })
... "lkj_sd": 1,
... }, None)
Inference data...
"""

Expand Down Expand Up @@ -340,7 +340,7 @@ def build_model(self, X, Z, y, t, coords, priors):
sigma=priors["sigmas"][1],
dims="covariates",
)
sd_dist = pm.HalfCauchy.dist(beta=priors["lkj_sd"], shape=2)
sd_dist = pm.Exponential.dist(priors["lkj_sd"], shape=2)
chol, corr, sigmas = pm.LKJCholeskyCov(
name="chol_cov",
eta=priors["eta"],
Expand All @@ -366,24 +366,52 @@ def build_model(self, X, Z, y, t, coords, priors):
shape=(X.shape[0], 2),
)

def fit(self, X, Z, y, t, coords, priors):
"""Draw samples from posterior, prior predictive, and posterior predictive
distributions.
def sample_predictive_distribution(self, ppc_sampler="jax"):
"""Function to sample the Multivariate Normal posterior predictive
Likelihood term in the IV class. This can be slow without
using the JAX sampler compilation method. If using the
JAX sampler it will sample only the posterior predictive distribution.
If using the PYMC sampler if will sample both the prior
and posterior predictive distributions."""
random_seed = self.sample_kwargs.get("random_seed", None)

if ppc_sampler == "jax":
with self:
self.idata.extend(
pm.sample_posterior_predictive(
self.idata,
random_seed=random_seed,
compile_kwargs={"mode": "JAX"},
)
)
elif ppc_sampler == "pymc":
with self:
self.idata.extend(pm.sample_prior_predictive(random_seed=random_seed))
self.idata.extend(
pm.sample_posterior_predictive(
self.idata,
random_seed=random_seed,
)
)

def fit(self, X, Z, y, t, coords, priors, ppc_sampler=None):
"""Draw samples from posterior distribution and potentially
from the prior and posterior predictive distributions. The
fit call can take values for the
ppc_sampler = ['jax', 'pymc', None]
We default to None, so the user can determine if they wish
to spend time sampling the posterior predictive distribution
independently.
"""

# Ensure random_seed is used in sample_prior_predictive() and
# sample_posterior_predictive() if provided in sample_kwargs.
random_seed = self.sample_kwargs.get("random_seed", None)
# Use JAX for ppc sampling of multivariate likelihood

self.build_model(X, Z, y, t, coords, priors)
with self:
self.idata = pm.sample(**self.sample_kwargs)
self.idata.extend(pm.sample_prior_predictive(random_seed=random_seed))
self.idata.extend(
pm.sample_posterior_predictive(
self.idata, progressbar=False, random_seed=random_seed
)
)
self.sample_predictive_distribution(ppc_sampler=ppc_sampler)
return self.idata


Expand Down
1 change: 1 addition & 0 deletions causalpy/tests/test_integration_pymc_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ def test_iv_reg():
sample_kwargs=sample_kwargs
),
)
result.model.sample_predictive_distribution(ppc_sampler="pymc")
assert isinstance(df, pd.DataFrame)
assert isinstance(data, pd.DataFrame)
assert isinstance(instruments_data, pd.DataFrame)
Expand Down
16 changes: 15 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,18 @@
"sphinx.ext.autodoc",
"sphinx.ext.intersphinx",
"sphinx.ext.mathjax",
"sphinx.ext.viewcode",
"sphinx_autodoc_typehints",
"sphinx_copybutton",
]

nb_execution_mode = "off"

# configure copy button to avoid copying sphinx or console characters
copybutton_exclude = ".linenos, .gp"
copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
copybutton_prompt_is_regexp = True

source_suffix = {
".rst": "restructuredtext",
".ipynb": "myst-nb",
Expand All @@ -72,8 +79,15 @@

# -- intersphinx config -------------------------------------------------------
intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
"examples": ("https://www.pymc.io/projects/examples/en/latest/", None),
"mpl": ("https://matplotlib.org/stable", None),
"numpy": ("https://numpy.org/doc/stable/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
"pymc": ("https://www.pymc.io/projects/docs/en/stable/", None),
"python": ("https://docs.python.org/3", None),
"scikit-learn": ("https://scikit-learn.org/stable/", None),
"scipy": ("https://docs.scipy.org/doc/scipy/", None),
"xarray": ("https://docs.xarray.dev/en/stable/", None),
}

# MyST options for working with markdown files.
Expand Down
1 change: 1 addition & 0 deletions docs/source/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Instrumental Variables Regression
:titlesonly:

notebooks/iv_pymc.ipynb
notebooks/iv_weak_instruments.ipynb

Inverse Propensity Score Weighting
=================================
Expand Down
9 changes: 7 additions & 2 deletions docs/source/glossary.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ Glossary
Endogenous Variable
An endogenous variable is a variable in a regression equation such that the variable is correlated with the error term of the equation i.e. correlated with the outcome variable (in the system). This is a problem for OLS regression estimation techniques because endogeniety violates the assumptions of the Gauss Markov theorem.

Local Average Treatment effect
LATE
Also known asthe complier average causal effect (CACE), is the effect of a treatment for subjects who comply with the experimental treatment assigned to their sample group. It is the quantity we're estimating in IV designs.

Non-equivalent group designs
NEGD
A quasi-experimental design where units are assigned to conditions non-randomly, and not according to a running variable (see Regression discontinuity design). This can be problematic when assigning causal influence of the treatment - differences in outcomes between groups could be due to the treatment or due to differences in the group attributes themselves.
Expand All @@ -62,6 +66,9 @@ Glossary
Pretest-posttest design
A quasi-experimental design where the treatment effect is estimated by comparing an outcome measure before and after treatment.

Propensity scores
An estimate of the probability of adopting a treatment status. Used in re-weighting schemes to balance observational data.

Quasi-experiment
An empirical comparison used to estimate the effects of a treatment where units are not assigned to conditions at random.

Expand Down Expand Up @@ -101,8 +108,6 @@ Glossary
2SLS
An estimation technique for estimating the parameters of an IV regression. It takes its name from the fact that it uses two OLS regressions - a first and second stage.

Propensity scores
An estimate of the probability of adopting a treatment status. Used in re-weighting schemes to balance observational data.


References
Expand Down
11 changes: 10 additions & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,18 @@ A Python package focussing on causal inference for quasi-experiments. The packag
Installation
------------

To get the latest release:
To get the latest release you can use pip:

.. code-block:: sh
pip install CausalPy
or conda:

.. code-block:: sh
conda install causalpy -c conda-forge
Alternatively, if you want the very latest version of the package you can install from GitHub:

.. code-block:: sh
Expand All @@ -31,6 +37,7 @@ Quickstart
.. code-block:: python
import causalpy as cp
import matplotlib.pyplot as plt
# Import and process data
Expand All @@ -55,6 +62,8 @@ Quickstart
# Get a results summary
result.summary()
plt.show()
Videos
------
Expand Down
3,556 changes: 3,556 additions & 0 deletions docs/source/notebooks/iv_weak_instruments.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/source/quasi_dags.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"One nice feature of this set up is that we can evaluate the claim of __strong ignorability__ because it implies that $T \\perp\\!\\!\\!\\perp X | PS(X)$ and this ensures the covariate profiles are balanced across the treatment branches conditional on the propensity score. This is a testable implication of the postulated design! Balance plots and measures are ways in which to evaluate if the offset achieved by your propensity score has worked. It is crucial that PS serve as a balancing score, if the measure cannot serve as a balancing score the collision effect can add to the confounding bias rather than remove it. "
"One nice feature of this set up is that we can evaluate the claim of __strong ignorability__ because it implies that $Z \\perp\\!\\!\\!\\perp X | PS(X)$ and this ensures the covariate profiles are balanced across the treatment branches conditional on the propensity score. This is a testable implication of the postulated design! Balance plots and measures are ways in which to evaluate if the offset achieved by your propensity score has worked. It is crucial that PS serve as a balancing score, if the measure cannot serve as a balancing score the collision effect can add to the confounding bias rather than remove it. "
]
},
{
Expand Down
9 changes: 9 additions & 0 deletions docs/source/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ @article{acemoglu2001colonial
year={2001}
}

@incollection{card1995returns,
author={Card, David},
title={Using Geographical Variation in College Proximity to Estimate the Return to Schooling},
editor={Christofides, L.N. and Grant, E.K. and Swidinsky, R.},
booktitle={Aspects of Labour Market Behaviour: Essays in Honour of John Vanderkamp},
year={1995},
publisher={University of Toronto Press}
}

@incollection{forde2024nonparam,
author = {Forde, Nathaniel},
title = {Bayesian Non-parametric Causal Inference},
Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ dependencies = [
"scipy",
"seaborn>=0.11.2",
"statsmodels",
"xarray>=v2022.11.0",
"xarray>=v2022.11.0"
]

# List additional groups of dependencies here (e.g. development dependencies). Users
Expand All @@ -54,17 +54,17 @@ docs = [
"ipykernel",
"daft",
"linkify-it-py",
"myst-nb<=1.0.0",
"myst-nb!=1.1.0",
"pathlib",
"sphinx",
"sphinx-autodoc-typehints",
"sphinx_autodoc_defaultargs",
"sphinx-design",
"sphinx-copybutton",
"sphinx-rtd-theme",
"statsmodels",
"sphinxcontrib-bibtex",
]
lint = ["interrogate", "nbqa", "pre-commit", "ruff"]
lint = ["interrogate", "pre-commit", "ruff"]
test = ["pytest", "pytest-cov"]

[metadata]
Expand Down

0 comments on commit 3207e03

Please sign in to comment.