Skip to content

Commit

Permalink
Addition of topological order based methods (#129)
Browse files Browse the repository at this point in the history
* Addition of topological order based methods
* Add SCORE, DAS, NoGAM examples
* Fix poetry lock file

---------

Signed-off-by: francescomontagna <[email protected]>
Co-authored-by: Adam Li <[email protected]>
  • Loading branch information
francescomontagna and adam2392 authored Jun 28, 2023
1 parent dd7cbbb commit ade4e48
Show file tree
Hide file tree
Showing 21 changed files with 2,925 additions and 39 deletions.
14 changes: 14 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,19 @@ Constraint-based structure learning
PC
FCI


Order-based structure learning
===================================

.. currentmodule:: dodiscover.toporder
.. autosummary::
:toctree: generated/

CAM
SCORE
DAS
NoGAM

Comparing causal discovery algorithms
=====================================

Expand All @@ -59,6 +72,7 @@ Comparing causal discovery algorithms

confusion_matrix_networks
structure_hamming_dist
toporder_divergence


Typing
Expand Down
118 changes: 83 additions & 35 deletions doc/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@
% Try to keep this list in alphabetical order based on citing name
# Journal papers and conference proceedings
@article{Buhlmann2013,
author = {Bühlmann, Peter and Peters, Jonas and Ernest, Jan},
year = {2013},
month = {10},
pages = {},
title = {CAM: Causal Additive Models, high-dimensional order search and penalized regression},
volume = {42},
journal = {The Annals of Statistics},
doi = {10.1214/14-AOS1260}
}

@article{Colombo2012,
author = {Diego Colombo and Marloes H. Maathuis and Markus Kalisch and Thomas S. Richardson},
title = {{Learning high-dimensional directed acyclic graphs with latent and selection variables}},
Expand All @@ -16,6 +27,7 @@ @article{Colombo2012
url = {https://doi.org/10.1214/11-AOS940}
}


@inproceedings{correa2020calculus,
title = {A calculus for stochastic interventions: Causal effect identification and surrogate experiments},
author = {Correa, Juan and Bareinboim, Elias},
Expand Down Expand Up @@ -43,6 +55,15 @@ @article{Kocaoglu2019characterization
year = {2019}
}

@article{Li2017,
author = {Li, Yingzhen and Turner, Richard},
journal = {International Conference on Learning Representations (ICLR)},
year = {2017},
month = {05},
pages = {},
title = {Gradient Estimators for Implicit Models}
}

@article{Meek1995,
author = {Meek, Christopher},
year = {2013},
Expand All @@ -53,6 +74,22 @@ @article{Meek1995
journal = {Proceedings of Eleventh Conference on Uncertainty in Artificial Intelligence, Montreal, QU}
}

@inproceedings{Montagna2023a,
title={Scalable Causal Discovery with Score Matching},
author={Francesco Montagna and Nicoletta Noceti and Lorenzo Rosasco and Kun Zhang and Francesco Locatello},
booktitle={2nd Conference on Causal Learning and Reasoning},
year={2023},
url={https://openreview.net/forum?id=6VvoDjLBPQV}
}

@inproceedings{Montagna2023b,
title={Causal Discovery with Score Matching on Additive Models with Arbitrary Noise},
author={Francesco Montagna and Nicoletta Noceti and Lorenzo Rosasco and Kun Zhang and Francesco Locatello},
booktitle={2nd Conference on Causal Learning and Reasoning},
year={2023},
url={https://openreview.net/forum?id=rVO0Bx90deu}
}

@book{Neapolitan2003,
author = {Neapolitan, Richard},
year = {2003},
Expand All @@ -64,6 +101,19 @@ @book{Neapolitan2003
doi = {10.1145/1327942.1327961}
}

@inproceedings{rolland2022,
title = {Score Matching Enables Causal Discovery of Nonlinear Additive Noise Models},
author = {Rolland, Paul and Cevher, Volkan and Kleindessner, Matth{\"a}us and Russell, Chris and Janzing, Dominik and Sch{\"o}lkopf, Bernhard and Locatello, Francesco},
booktitle = {Proceedings of the 39th International Conference on Machine Learning},
pages = {18741--18753},
year = {2022},
editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},
volume = {162},
series = {Proceedings of Machine Learning Research},
month = {17--23 Jul},
publisher = {PMLR}
}

@article{uhler2013geometry,
title = {Geometry of the faithfulness assumption in causal inference},
author = {Uhler, Caroline and Raskutti, Garvesh and B{\"u}hlmann, Peter and Yu, Bin},
Expand Down Expand Up @@ -157,31 +207,30 @@ @article{cressieread1984


@article{frenzel_partial_2007,
title = {Partial {Mutual} {Information} for {Coupling} {Analysis} of {Multivariate} {Time} {Series}},
volume = {99},
doi = {10.1103/PhysRevLett.99.204101},
journal = {Physical review letters},
author = {Frenzel, Stefan and Pompe, Bernd},
month = dec,
year = {2007},
pages = {204101},
file = {Full Text PDF:/Users/adam2392/Zotero/storage/8ICFXVZG/Frenzel and Pompe - 2007 - Partial Mutual Information for Coupling Analysis o.pdf:application/pdf}
title = {Partial {Mutual} {Information} for {Coupling} {Analysis} of {Multivariate} {Time} {Series}},
volume = {99},
doi = {10.1103/PhysRevLett.99.204101},
journal = {Physical review letters},
author = {Frenzel, Stefan and Pompe, Bernd},
month = dec,
year = {2007},
pages = {204101},
file = {Full Text PDF:/Users/adam2392/Zotero/storage/8ICFXVZG/Frenzel and Pompe - 2007 - Partial Mutual Information for Coupling Analysis o.pdf:application/pdf}
}

@article{kraskov_estimating_2004,
title = {Estimating mutual information},
volume = {69},
url = {https://link.aps.org/doi/10.1103/PhysRevE.69.066138},
doi = {10.1103/PhysRevE.69.066138},
number = {6},
urldate = {2023-01-27},
journal = {Physical Review E},
author = {Kraskov, Alexander and Stögbauer, Harald and Grassberger, Peter},
month = jun,
year = {2004},
note = {Publisher: American Physical Society},
pages = {066138},
file = {APS Snapshot:/Users/adam2392/Zotero/storage/GRW23BYU/PhysRevE.69.html:text/html;Full Text PDF:/Users/adam2392/Zotero/storage/NJT9QCVA/Kraskov et al. - 2004 - Estimating mutual information.pdf:application/pdf}
title = {Estimating mutual information},
volume = {69},
url = {https://link.aps.org/doi/10.1103/PhysRevE.69.066138},
doi = {10.1103/PhysRevE.69.066138},
number = {6},
urldate = {2023-01-27},
journal = {Physical Review E},
author = {Kraskov, Alexander and Stögbauer, Harald and Grassberger, Peter},
month = jun,
year = {2004},
note = {Publisher: American Physical Society},
pages = {066138}
}

@article{Lopez2016revisiting,
Expand Down Expand Up @@ -219,9 +268,7 @@ @inproceedings{Runge2018cmi
volume = {84},
series = {Proceedings of Machine Learning Research},
month = {09--11 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v84/runge18a/runge18a.pdf},
url = {https://proceedings.mlr.press/v84/runge18a.html}
publisher = {PMLR}
}

@article{Sen2017model,
Expand All @@ -232,6 +279,7 @@ @article{Sen2017model
year = {2017}
}


@inproceedings{Yu2020Bregman,
title = {Measuring the Discrepancy between Conditional Distributions: Methods, Properties and Applications},
author = {Yu, Shujian and Shaker, Ammar and Alesiani, Francesco and Principe, Jose},
Expand Down Expand Up @@ -264,15 +312,15 @@ @inproceedings{Zhang2011
% Example refs
@article{sachsdataset2005,
author = {Karen Sachs and Omar Perez and Dana Pe'er and Douglas A. Lauffenburger and Garry P. Nolan },
title = {Causal Protein-Signaling Networks Derived from Multiparameter Single-Cell Data},
journal = {Science},
volume = {308},
number = {5721},
pages = {523-529},
year = {2005},
doi = {10.1126/science.1105809},
url = {https://www.science.org/doi/abs/10.1126/science.1105809},
eprint = {https://www.science.org/doi/pdf/10.1126/science.1105809},
author = {Karen Sachs and Omar Perez and Dana Pe'er and Douglas A. Lauffenburger and Garry P. Nolan },
title = {Causal Protein-Signaling Networks Derived from Multiparameter Single-Cell Data},
journal = {Science},
volume = {308},
number = {5721},
pages = {523-529},
year = {2005},
doi = {10.1126/science.1105809},
url = {https://www.science.org/doi/abs/10.1126/science.1105809},
eprint = {https://www.science.org/doi/pdf/10.1126/science.1105809}
}

1 change: 1 addition & 0 deletions doc/whats_new/v0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Changelog
- |Feature| Add pre-commit hooks for linting, type-checking, and code formatting, by `Jaron Lee`_ (:pr:`117`)
- |Feature| Adds parallelization via joblib to the skeleton learners, by `Adam Li`_ (:pr:`127`)
- |Feature| Add a suite of general categorical data CI tests, by `Adam Li`_ (:pr:`128`)
- |Feature| Implement CAM, SCORE, DAS, NoGAM algorithms in ``dodiscover.toporder`` submodule (:pr:`129`)

Code and Documentation Contributors
-----------------------------------
Expand Down
1 change: 1 addition & 0 deletions dodiscover/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from . import cd # noqa: F401
from . import ci # noqa: F401
from . import metrics # noqa: F401
from . import toporder
from ._protocol import EquivalenceClass, Graph
from ._version import __version__ # noqa: F401
from .constraint import FCI, PC
Expand Down
44 changes: 41 additions & 3 deletions dodiscover/metrics.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from typing import Optional
from typing import List, Optional

import networkx as nx
import numpy as np
from numpy.typing import NDArray
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelBinarizer

from dodiscover.typing import NetworkxGraph

from ._protocol import Graph
from .typing import NetworkxGraph


def confusion_matrix_networks(
Expand Down Expand Up @@ -146,3 +145,42 @@ def structure_hamming_dist(
diff = diff + diff.T
diff[diff > 1] = 1 # Ignoring the double edges.
return np.sum(diff) / 2


def toporder_divergence(true_graph: NetworkxGraph, order: List[int]) -> int:
"""Compute topological ordering divergence.
Topological order divergence is used to compute the number of false negatives,
i.e. missing edges, associated to a topological order of the nodes of a
graph with respect to the ground truth structure.
If the topological ordering is compatible with the graph ground truth,
the divergence is equal to 0. In the worst case of completely reversed
ordering, toporder_divergence is equals to P, the number of edges (positives)
in the ground truth graph.
Note that the divergence defines a lower bound for the Structural Hamming Distance.
Parameters
----------
true_graph : NetworkxGraph
Input groundtruth directed acyclic graph.
order : List[int]
A topological ordering on the nodes of the graph.
Returns
-------
err : int
Sum of the number of edges of A not admitted by the given order.
"""
if not nx.is_directed_acyclic_graph(true_graph):
raise ValueError("The input graph must be directed and acyclic.")

# convert graphs to adjacency matrix in numpy array format
A = nx.to_numpy_array(true_graph)

if len(order) != A.shape[0] or A.shape[0] != A.shape[1]:
raise ValueError("The dimensions of the graph and the order list do not match.")

false_negatives_from_order = 0
for i in range(len(order)):
false_negatives_from_order += A[order[i + 1 :], order[i]].sum()
return false_negatives_from_order
4 changes: 4 additions & 0 deletions dodiscover/toporder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .cam import CAM
from .das import DAS
from .nogam import NoGAM
from .score import SCORE
Loading

0 comments on commit ade4e48

Please sign in to comment.