Merge branch 'conditional_independency_tests' of https://github.com/y…

…0-causal-inference/eliater into conditional_independency_tests
y0-causal-inference · Oct 26, 2023 · 425b6a3 · 425b6a3
2 parents 563cd0a + a2bf0a9
commit 425b6a3
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 26 deletions.
diff --git a/src/eliater/frontdoor_backdoor/multiple_mediators_with_multiple_confounders_nuisances.py b/src/eliater/frontdoor_backdoor/multiple_mediators_with_multiple_confounders_nuisances.py
@@ -56,7 +56,7 @@ def generate(
     # latent node between X and Z1
     # u1 = generator.normal(loc=40.0, scale=10.0, size=num_samples)
     # latent node between Y and Z2
-    u2 = generator.normal(loc=50.0, scale=5.0, size=num_samples)
+    #u2 = generator.normal(loc=50.0, scale=5.0, size=num_samples)
 
     beta0_z1 = 50  # 1.5
     # beta_u1_to_z1 = 0.3
@@ -69,12 +69,12 @@ def generate(
 
     beta0_z2 = 3
     beta_z1_to_z2 = 0.3
-    beta_u2_to_z2 = 0.7
+    #beta_u2_to_z2 = 0.7
 
     if Z2 in treatments:
         z2 = np.full(num_samples, treatments[Z2])
     else:
-        loc_z2 = beta0_z2 + z1 * beta_z1_to_z2 + u2 * beta_u2_to_z2
+        loc_z2 = beta0_z2 + z1 * beta_z1_to_z2 #+ u2 * beta_u2_to_z2
         z2 = generator.normal(loc=loc_z2, scale=4.0, size=num_samples)
 
     beta0_z3 = 4
@@ -117,12 +117,12 @@ def generate(
     beta0_y = 1.8
     beta_z3_to_y = 0.5
     beta_m2_to_y = 0.7
-    beta_u2_to_y = 0.8
+    #beta_u2_to_y = 0.8
     if Y in treatments:
         y = np.full(num_samples, treatments[Y])
     else:
         y = generator.normal(
-            loc=beta0_y + z3 * beta_z3_to_y + m2 * beta_m2_to_y + u2 * beta_u2_to_y,
+            loc=beta0_y + z3 * beta_z3_to_y + m2 * beta_m2_to_y, #+ u2 * beta_u2_to_y,
             scale=10.0,
             size=num_samples,
         )

diff --git a/src/eliater/repair.py b/src/eliater/repair.py
@@ -19,7 +19,9 @@
 
 .. code-block:: python
 
+    from y0.graph import NxMixedGraph
     from eliater.repair import conditional_independence_test_summary
+    import pandas as pd
 
     graph = NxMixedGraph.from_str_adj(
         directed={
@@ -35,16 +37,15 @@
     )
 
     # Get the data
-    import pandas as pd
     data = pd.read_csv(
     "https://raw.githubusercontent.com/y0-causal-inference/eliater/conditional_independency_tests/src/data/sachs_discretized_2bin.csv",
     index_col = False
     )
 
     conditional_independence_test_summary(graph, data, verbose=True)
 
-The results show that out of 35 cases,
-the conditional independence between P38 and PIP2, given PKC, fails with a p-value of 0.00425.
+The results show that out of 35 cases, 1 failed. The failed test is
+the conditional independence between P38 and PIP2, given PKC, with a p-value of 0.00425.
 
 This module relies on statistical tests, and statistical tests always have chances
 of producing false negatives, i.e., a pair of variables that are conditionally

diff --git a/src/eliater/sample_size_vs_pvalue.py b/src/eliater/sample_size_vs_pvalue.py
@@ -1,8 +1,10 @@
-"""This module shows the relationship between p-value and sample size.
+"""This module shows the relationship between p-value and sample size when testing conditional independencies
 
 p-values decrease as the number of data points used in the conditional independency test
 increases, i.e., the larger the data, more conditional independences implied by the network
-will be considered as dependent. Hence, chances of false negatives increases.
+will be considered as dependent. Hence, chances of false negatives increases. This module
+illustrates this. The content of this module are relied on chapter 4 of this reference:
+https://livebook.manning.com/book/causal-ai/welcome/v-4/.
 
 Here is an example that illustrates this point. In the provided graph, R2 is independent of
 Z1 given R1. In addition, M1 is independent of R2 given R1. The data has been generated based
@@ -12,25 +14,22 @@
 .. code-block:: python
 
     from y0.graph import NxMixedGraph
-    from y0.dsl import Variable, X, Y
-    M1 = Variable("M1")
-    M2 = Variable("M2")
     from eliater.frontdoor_backdoor.multiple_mediators_with_multiple_confounders_nuisances import generate
-    from eliater.sample_size_vs_pvalue import estimate_p_val
+    from eliater.sample_size_vs_pvalue import generate_plot_expected_p_value_vs_num_data_points
 
     graph = NxMixedGraph.from_edges(
         directed=[
-            (Z1, X),
-            (X, M1),
-            (M1, M2),
-            (M2, Y),
-            (Z1, Z2),
-            (Z2, Z3),
-            (Z3, Y),
-            (M1, R1),
-            (R1, R2),
-            (R2, R3),
-            (Y, R3),
+            ('Z1', 'X'),
+            ('X', 'M1'),
+            ('M1', 'M2'),
+            ('M2', 'Y'),
+            ('Z1', 'Z2'),
+            ('Z2', 'Z3'),
+            ('Z3', 'Y'),
+            ('M1', 'R1'),
+            ('R1', 'R2'),
+            ('R2', 'R3'),
+            ('Y', 'R3'),
         ],
     )
 
@@ -70,7 +69,7 @@
                                                   boot_size=1000
                                                   )
 
-This plot shows that the expected p-value will again decrease as number of data points increases. For number
+This plot shows that the expected p-value decrease as number of data points increases. For number
 of data points greater than 500, the test is more likely to reject the null hypothesis, and for number
 of data points greater than 900, the test always rejects the null hypothesis, i.e., the data will
 no longer support that R2 is independent of M1 given R1, where it should be.