Addition of topological order based methods (#129)

* Addition of topological order based methods * Add SCORE, DAS, NoGAM examples * Fix poetry lock file --------- Signed-off-by: francescomontagna <[email protected]> Co-authored-by: Adam Li <[email protected]>
py-why · Jun 28, 2023 · ade4e48 · ade4e48
1 parent dd7cbbb
commit ade4e48
Show file tree

Hide file tree

Showing 21 changed files with 2,925 additions and 39 deletions.
diff --git a/doc/api.rst b/doc/api.rst
@@ -50,6 +50,19 @@ Constraint-based structure learning
    PC
    FCI
 
+
+Order-based structure learning
+===================================
+
+.. currentmodule:: dodiscover.toporder
+.. autosummary::
+   :toctree: generated/
+
+   CAM
+   SCORE
+   DAS
+   NoGAM
+
 Comparing causal discovery algorithms
 =====================================
 
@@ -59,6 +72,7 @@ Comparing causal discovery algorithms
 
    confusion_matrix_networks
    structure_hamming_dist
+   toporder_divergence
 
 
 Typing

diff --git a/doc/references.bib b/doc/references.bib
@@ -2,6 +2,17 @@
 % Try to keep this list in alphabetical order based on citing name
 
 # Journal papers and conference proceedings
+@article{Buhlmann2013,
+  author  = {Bühlmann, Peter and Peters, Jonas and Ernest, Jan},
+  year    = {2013},
+  month   = {10},
+  pages   = {},
+  title   = {CAM: Causal Additive Models, high-dimensional order search and penalized regression},
+  volume  = {42},
+  journal = {The Annals of Statistics},
+  doi     = {10.1214/14-AOS1260}
+}
+
 @article{Colombo2012,
   author    = {Diego Colombo and Marloes H. Maathuis and Markus Kalisch and Thomas S. Richardson},
   title     = {{Learning high-dimensional directed acyclic graphs with latent and selection variables}},
@@ -16,6 +27,7 @@ @article{Colombo2012
   url       = {https://doi.org/10.1214/11-AOS940}
 }
 
+
 @inproceedings{correa2020calculus,
   title     = {A calculus for stochastic interventions: Causal effect identification and surrogate experiments},
   author    = {Correa, Juan and Bareinboim, Elias},
@@ -43,6 +55,15 @@ @article{Kocaoglu2019characterization
   year    = {2019}
 }
 
+@article{Li2017,
+  author  = {Li, Yingzhen and Turner, Richard},
+  journal = {International Conference on Learning Representations (ICLR)},
+  year    = {2017},
+  month   = {05},
+  pages   = {},
+  title   = {Gradient Estimators for Implicit Models}
+}
+
 @article{Meek1995,
   author  = {Meek, Christopher},
   year    = {2013},
@@ -53,6 +74,22 @@ @article{Meek1995
   journal = {Proceedings of Eleventh Conference on Uncertainty in Artificial Intelligence, Montreal, QU}
 }
 
+@inproceedings{Montagna2023a,
+title={Scalable Causal Discovery with Score Matching},
+author={Francesco Montagna and Nicoletta Noceti and Lorenzo Rosasco and Kun Zhang and Francesco Locatello},
+booktitle={2nd Conference on Causal Learning and Reasoning},
+year={2023},
+url={https://openreview.net/forum?id=6VvoDjLBPQV}
+}
+
+@inproceedings{Montagna2023b,
+title={Causal Discovery with Score Matching on Additive Models with Arbitrary Noise},
+author={Francesco Montagna and Nicoletta Noceti and Lorenzo Rosasco and Kun Zhang and Francesco Locatello},
+booktitle={2nd Conference on Causal Learning and Reasoning},
+year={2023},
+url={https://openreview.net/forum?id=rVO0Bx90deu}
+}
+
 @book{Neapolitan2003,
   author    = {Neapolitan, Richard},
   year      = {2003},
@@ -64,6 +101,19 @@ @book{Neapolitan2003
   doi       = {10.1145/1327942.1327961}
 }
 
+@inproceedings{rolland2022,
+  title     = {Score Matching Enables Causal Discovery of Nonlinear Additive Noise Models},
+  author    = {Rolland, Paul and Cevher, Volkan and Kleindessner, Matth{\"a}us and Russell, Chris and Janzing, Dominik and Sch{\"o}lkopf, Bernhard and Locatello, Francesco},
+  booktitle = {Proceedings of the 39th International Conference on Machine Learning},
+  pages     = {18741--18753},
+  year      = {2022},
+  editor    = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},
+  volume    = {162},
+  series    = {Proceedings of Machine Learning Research},
+  month     = {17--23 Jul},
+  publisher = {PMLR}
+}
+
 @article{uhler2013geometry,
   title     = {Geometry of the faithfulness assumption in causal inference},
   author    = {Uhler, Caroline and Raskutti, Garvesh and B{\"u}hlmann, Peter and Yu, Bin},
@@ -157,31 +207,30 @@ @article{cressieread1984
 
 
 @article{frenzel_partial_2007,
-  title    = {Partial {Mutual} {Information} for {Coupling} {Analysis} of {Multivariate} {Time} {Series}},
-  volume   = {99},
-  doi      = {10.1103/PhysRevLett.99.204101},
-  journal  = {Physical review letters},
-  author   = {Frenzel, Stefan and Pompe, Bernd},
-  month    = dec,
-  year     = {2007},
-  pages    = {204101},
-  file     = {Full Text PDF:/Users/adam2392/Zotero/storage/8ICFXVZG/Frenzel and Pompe - 2007 - Partial Mutual Information for Coupling Analysis o.pdf:application/pdf}
+  title   = {Partial {Mutual} {Information} for {Coupling} {Analysis} of {Multivariate} {Time} {Series}},
+  volume  = {99},
+  doi     = {10.1103/PhysRevLett.99.204101},
+  journal = {Physical review letters},
+  author  = {Frenzel, Stefan and Pompe, Bernd},
+  month   = dec,
+  year    = {2007},
+  pages   = {204101},
+  file    = {Full Text PDF:/Users/adam2392/Zotero/storage/8ICFXVZG/Frenzel and Pompe - 2007 - Partial Mutual Information for Coupling Analysis o.pdf:application/pdf}
 }
 
 @article{kraskov_estimating_2004,
-  title    = {Estimating mutual information},
-  volume   = {69},
-  url      = {https://link.aps.org/doi/10.1103/PhysRevE.69.066138},
-  doi      = {10.1103/PhysRevE.69.066138},
-  number   = {6},
-  urldate  = {2023-01-27},
-  journal  = {Physical Review E},
-  author   = {Kraskov, Alexander and Stögbauer, Harald and Grassberger, Peter},
-  month    = jun,
-  year     = {2004},
-  note     = {Publisher: American Physical Society},
-  pages    = {066138},
-  file     = {APS Snapshot:/Users/adam2392/Zotero/storage/GRW23BYU/PhysRevE.69.html:text/html;Full Text PDF:/Users/adam2392/Zotero/storage/NJT9QCVA/Kraskov et al. - 2004 - Estimating mutual information.pdf:application/pdf}
+  title   = {Estimating mutual information},
+  volume  = {69},
+  url     = {https://link.aps.org/doi/10.1103/PhysRevE.69.066138},
+  doi     = {10.1103/PhysRevE.69.066138},
+  number  = {6},
+  urldate = {2023-01-27},
+  journal = {Physical Review E},
+  author  = {Kraskov, Alexander and Stögbauer, Harald and Grassberger, Peter},
+  month   = jun,
+  year    = {2004},
+  note    = {Publisher: American Physical Society},
+  pages   = {066138}
 }
 
 @article{Lopez2016revisiting,
@@ -219,9 +268,7 @@ @inproceedings{Runge2018cmi
   volume    = {84},
   series    = {Proceedings of Machine Learning Research},
   month     = {09--11 Apr},
-  publisher = {PMLR},
-  pdf       = {http://proceedings.mlr.press/v84/runge18a/runge18a.pdf},
-  url       = {https://proceedings.mlr.press/v84/runge18a.html}
+  publisher = {PMLR}
 }
 
 @article{Sen2017model,
@@ -232,6 +279,7 @@ @article{Sen2017model
   year    = {2017}
 }
 
+
 @inproceedings{Yu2020Bregman,
   title     = {Measuring the Discrepancy between Conditional Distributions: Methods, Properties and Applications},
   author    = {Yu, Shujian and Shaker, Ammar and Alesiani, Francesco and Principe, Jose},
@@ -264,15 +312,15 @@ @inproceedings{Zhang2011
 % Example refs
 
 @article{sachsdataset2005,
-  author   = {Karen Sachs  and Omar Perez  and Dana Pe'er  and Douglas A. Lauffenburger  and Garry P. Nolan },
-  title    = {Causal Protein-Signaling Networks Derived from Multiparameter Single-Cell Data},
-  journal  = {Science},
-  volume   = {308},
-  number   = {5721},
-  pages    = {523-529},
-  year     = {2005},
-  doi      = {10.1126/science.1105809},
-  url      = {https://www.science.org/doi/abs/10.1126/science.1105809},
-  eprint   = {https://www.science.org/doi/pdf/10.1126/science.1105809},
+  author  = {Karen Sachs  and Omar Perez  and Dana Pe'er  and Douglas A. Lauffenburger  and Garry P. Nolan },
+  title   = {Causal Protein-Signaling Networks Derived from Multiparameter Single-Cell Data},
+  journal = {Science},
+  volume  = {308},
+  number  = {5721},
+  pages   = {523-529},
+  year    = {2005},
+  doi     = {10.1126/science.1105809},
+  url     = {https://www.science.org/doi/abs/10.1126/science.1105809},
+  eprint  = {https://www.science.org/doi/pdf/10.1126/science.1105809}
 }
 
diff --git a/doc/whats_new/v0.1.rst b/doc/whats_new/v0.1.rst
@@ -48,6 +48,7 @@ Changelog
 - |Feature| Add pre-commit hooks for linting, type-checking, and code formatting, by `Jaron Lee`_ (:pr:`117`)
 - |Feature| Adds parallelization via joblib to the skeleton learners, by `Adam Li`_ (:pr:`127`)
 - |Feature| Add a suite of general categorical data CI tests, by `Adam Li`_ (:pr:`128`)
+- |Feature| Implement CAM, SCORE, DAS, NoGAM algorithms in ``dodiscover.toporder`` submodule (:pr:`129`)
 
 Code and Documentation Contributors
 -----------------------------------

diff --git a/dodiscover/__init__.py b/dodiscover/__init__.py
@@ -1,6 +1,7 @@
 from . import cd  # noqa: F401
 from . import ci  # noqa: F401
 from . import metrics  # noqa: F401
+from . import toporder
 from ._protocol import EquivalenceClass, Graph
 from ._version import __version__  # noqa: F401
 from .constraint import FCI, PC

diff --git a/dodiscover/metrics.py b/dodiscover/metrics.py
@@ -1,14 +1,13 @@
-from typing import Optional
+from typing import List, Optional
 
 import networkx as nx
 import numpy as np
 from numpy.typing import NDArray
 from sklearn.metrics import confusion_matrix
 from sklearn.preprocessing import LabelBinarizer
 
-from dodiscover.typing import NetworkxGraph
-
 from ._protocol import Graph
+from .typing import NetworkxGraph
 
 
 def confusion_matrix_networks(
@@ -146,3 +145,42 @@ def structure_hamming_dist(
         diff = diff + diff.T
         diff[diff > 1] = 1  # Ignoring the double edges.
         return np.sum(diff) / 2
+
+
+def toporder_divergence(true_graph: NetworkxGraph, order: List[int]) -> int:
+    """Compute topological ordering divergence.
+
+    Topological order divergence is used to compute the number of false negatives,
+    i.e. missing edges, associated to a topological order of the nodes of a
+    graph with respect to the ground truth structure.
+    If the topological ordering is compatible with the graph ground truth,
+    the divergence is equal to 0. In the worst case of completely reversed
+    ordering, toporder_divergence is equals to P, the number of edges (positives)
+    in the ground truth graph.
+    Note that the divergence defines a lower bound for the Structural Hamming Distance.
+
+    Parameters
+    ----------
+    true_graph : NetworkxGraph
+        Input groundtruth directed acyclic graph.
+    order : List[int]
+        A topological ordering on the nodes of the graph.
+
+    Returns
+    -------
+    err : int
+        Sum of the number of edges of A not admitted by the given order.
+    """
+    if not nx.is_directed_acyclic_graph(true_graph):
+        raise ValueError("The input graph must be directed and acyclic.")
+
+    # convert graphs to adjacency matrix in numpy array format
+    A = nx.to_numpy_array(true_graph)
+
+    if len(order) != A.shape[0] or A.shape[0] != A.shape[1]:
+        raise ValueError("The dimensions of the graph and the order list do not match.")
+
+    false_negatives_from_order = 0
+    for i in range(len(order)):
+        false_negatives_from_order += A[order[i + 1 :], order[i]].sum()
+    return false_negatives_from_order
diff --git a/dodiscover/toporder/__init__.py b/dodiscover/toporder/__init__.py
@@ -0,0 +1,4 @@
+from .cam import CAM
+from .das import DAS
+from .nogam import NoGAM
+from .score import SCORE