From 9f8ea526526e88ae28c3837c0157a35d8e87a9ea Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Fri, 30 Jun 2023 11:20:40 +0200
Subject: [PATCH 1/2] Derive probability for broadcast operation

---
 .github/workflows/tests.yml |   1 +
 mypy.ini                    |   1 +
 pymc/logprob/__init__.py    |   1 +
 pymc/logprob/shape.py       | 121 ++++++++++++++++++++++++++++++++++++
 tests/logprob/test_shape.py |  50 +++++++++++++++
 5 files changed, 174 insertions(+)
 create mode 100644 pymc/logprob/shape.py
 create mode 100644 tests/logprob/test_shape.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index d72431194e..e5e2b2fac9 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -112,6 +112,7 @@ jobs:
             tests/logprob/test_mixture.py
             tests/logprob/test_rewriting.py
             tests/logprob/test_scan.py
+            tests/logprob/test_shape.py
             tests/logprob/test_tensor.py
             tests/logprob/test_transforms.py
             tests/logprob/test_utils.py
diff --git a/mypy.ini b/mypy.ini
index 360e64c2e5..e420cc7697 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -10,3 +10,4 @@ disallow_untyped_defs = False
 disallow_untyped_decorators = False
 ignore_missing_imports = True
 warn_unused_ignores = False
+disable_error_code = annotation-unchecked
diff --git a/pymc/logprob/__init__.py b/pymc/logprob/__init__.py
index 0ddea90b6f..ad001546c5 100644
--- a/pymc/logprob/__init__.py
+++ b/pymc/logprob/__init__.py
@@ -50,6 +50,7 @@
 import pymc.logprob.checks
 import pymc.logprob.mixture
 import pymc.logprob.scan
+import pymc.logprob.shape
 import pymc.logprob.tensor
 import pymc.logprob.transforms
 
diff --git a/pymc/logprob/shape.py b/pymc/logprob/shape.py
new file mode 100644
index 0000000000..ed20cab9ae
--- /dev/null
+++ b/pymc/logprob/shape.py
@@ -0,0 +1,121 @@
+#   Copyright 2023 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+from typing import Optional
+
+import numpy as np
+import pytensor.tensor as pt
+
+from pytensor.graph import node_rewriter
+from pytensor.tensor.extra_ops import BroadcastTo
+
+from pymc.logprob.abstract import MeasurableVariable, _logprob, _logprob_helper
+from pymc.logprob.rewriting import PreserveRVMappings, measurable_ir_rewrites_db
+
+
+class MeasurableBroadcast(BroadcastTo):
+    pass
+
+
+MeasurableVariable.register(MeasurableBroadcast)
+
+
+measurable_broadcast = MeasurableBroadcast()
+
+
+@_logprob.register(MeasurableBroadcast)
+def broadcast_logprob(op, values, rv, *shape, **kwargs):
+    """Log-probability expression for (statically-)broadcasted RV
+
+    The probability is the same as the base RV, if no broadcasting had happened:
+
+    ``logp(broadcast_to(normal(size=(3, 1)), (2, 3, 4)), zeros((2, 3, 4))) == logp(normal(size=(3, 1)), zeros((3, 1)))``
+
+    And zero if the value couldn't have possibly originated via broadcasting:
+
+    ``logp(broadcast_to(normal(size=(1,)), (3,)), [1, 2, 3]) == [-np.inf]``
+
+    """
+    [value] = values
+
+    n_new_dims = len(shape) - rv.ndim
+    assert n_new_dims >= 0
+
+    # Enumerate broadcasted dims
+    expanded_dims = tuple(range(n_new_dims))
+    broadcast_dims = tuple(
+        i + n_new_dims
+        for i, (v_bcast, rv_bcast) in enumerate(
+            zip(value.broadcastable[n_new_dims:], rv.broadcastable)
+        )
+        if (not v_bcast) and rv_bcast
+    )
+
+    # "Unbroadcast" value via indexing.
+    # All entries in the broadcasted dimensions should be the same, so we simply select the first of each.
+    indices = []
+    for i in range(value.ndim):
+        # Remove expanded dims
+        if i in expanded_dims:
+            indices.append(0)
+        # Keep first entry of broadcasted (but not expanded) dims
+        elif i in broadcast_dims:
+            indices.append(slice(0, 1))
+        else:
+            indices.append(slice(None))
+
+    unbroadcast_value = value[tuple(indices)]
+    logp = _logprob_helper(rv, unbroadcast_value)
+
+    # Check that dependent values were indeed identical, by comparing with a re-broadcasted value
+    valid_value = pt.broadcast_to(unbroadcast_value, shape)
+    # Note: This could fail due to float-precision issues.
+    # If that proves to be a problem we should switch to `pt.allclose`
+    check = pt.all(pt.eq(value, valid_value))
+    logp = pt.switch(check, logp, -np.inf)
+
+    # Reintroduce expanded_dims in the returned logp
+    if n_new_dims > 0:
+        logp = pt.shape_padleft(logp, n_new_dims)
+
+    return logp
+
+
+@node_rewriter([BroadcastTo])
+def find_measurable_broadcast(fgraph, node):
+    r"""Finds `BroadcastTo`\s for which a `logprob` can be computed."""
+
+    if isinstance(node.op, MeasurableBroadcast):
+        return None  # pragma: no cover
+
+    rv_map_feature: Optional[PreserveRVMappings] = getattr(fgraph, "preserve_rv_mappings", None)
+
+    if rv_map_feature is None:
+        return None  # pragma: no cover
+
+    base_rv, *shape = node.inputs
+
+    if not rv_map_feature.request_measurable([base_rv]):
+        return None
+
+    new_rv = measurable_broadcast.make_node(base_rv, *shape).default_output()
+
+    return [new_rv]
+
+
+measurable_ir_rewrites_db.register(
+    "find_measurable_broadcast",
+    find_measurable_broadcast,
+    "basic",
+    "shape",
+)
diff --git a/tests/logprob/test_shape.py b/tests/logprob/test_shape.py
new file mode 100644
index 0000000000..6b9dbed766
--- /dev/null
+++ b/tests/logprob/test_shape.py
@@ -0,0 +1,50 @@
+#   Copyright 2023 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+import numpy as np
+import pytensor
+import pytensor.tensor as pt
+import scipy.stats as st
+
+from pymc import logp
+
+
+def test_measurable_broadcast():
+    b_shape = pt.vector("b_shape", shape=(3,), dtype=int)
+
+    x = pt.random.normal(size=(3, 1))
+    bcast_x = pt.broadcast_to(x, shape=b_shape)
+    bcast_x.name = "bcast_x"
+
+    bcast_x_value = bcast_x.clone()
+    logp_bcast_x = logp(bcast_x, bcast_x_value)
+    logp_fn = pytensor.function([b_shape, bcast_x_value], logp_bcast_x, on_unused_input="ignore")
+
+    # assert_allclose also asserts shapes match (if neither is scalar)
+    np.testing.assert_allclose(
+        logp_fn([1, 3, 1], np.zeros((1, 3, 1))),
+        st.norm.logpdf(np.zeros((1, 3, 1))),
+    )
+    np.testing.assert_allclose(
+        logp_fn([1, 3, 5], np.zeros((1, 3, 5))),
+        st.norm.logpdf(np.zeros((1, 3, 1))),
+    )
+    np.testing.assert_allclose(
+        logp_fn([2, 3, 5], np.broadcast_to(np.arange(3).reshape(1, 3, 1), (2, 3, 5))),
+        st.norm.logpdf(np.arange(3).reshape(1, 3, 1)),
+    )
+    # Invalid broadcast value
+    np.testing.assert_array_equal(
+        logp_fn([1, 3, 5], np.arange(3 * 5).reshape(1, 3, 5)),
+        np.full(shape=(1, 3, 1), fill_value=-np.inf),
+    )

From 042c9f3c0c1a107f4b5e86ca3f5244c65d9d3a34 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Fri, 30 Jun 2023 11:27:16 +0200
Subject: [PATCH 2/2] Derive probability for transforms with implicit
 broadcasting

A warning is issued as this graph is unlikely to be desired for most users.
---
 pymc/logprob/transforms.py       | 69 ++++++++++++++++++++++----------
 tests/logprob/test_transforms.py | 32 ++++++++++++---
 2 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py
index 5f751b5bf1..2621ad8d9b 100644
--- a/pymc/logprob/transforms.py
+++ b/pymc/logprob/transforms.py
@@ -35,6 +35,7 @@
 #   SOFTWARE.
 
 import abc
+import warnings
 
 from copy import copy
 from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
@@ -111,6 +112,7 @@
     cleanup_ir_rewrites_db,
     measurable_ir_rewrites_db,
 )
+from pymc.logprob.shape import measurable_broadcast
 from pymc.logprob.utils import check_potential_measurability
 
 
@@ -564,10 +566,13 @@ def find_measurable_transforms(fgraph: FunctionGraph, node: Node) -> Optional[Li
 
     scalar_op = node.op.scalar_op
     measurable_input_idx = 0
+    measurable_input_broadcast = (
+        measurable_input.type.broadcastable != node.default_output().type.broadcastable
+    )
     transform_inputs: Tuple[TensorVariable, ...] = (measurable_input,)
     transform: RVTransform
 
-    transform_dict = {
+    unary_transforms_dict = {
         Exp: ExpTransform(),
         Log: LogTransform(),
         Abs: AbsTransform(),
@@ -581,29 +586,49 @@ def find_measurable_transforms(fgraph: FunctionGraph, node: Node) -> Optional[Li
         Erfc: ErfcTransform(),
         Erfcx: ErfcxTransform(),
     }
-    transform = transform_dict.get(type(scalar_op), None)
-    if isinstance(scalar_op, Pow):
-        # We only allow for the base to be measurable
-        if measurable_input_idx != 0:
-            return None
-        try:
-            (power,) = other_inputs
-            power = pt.get_underlying_scalar_constant_value(power).item()
-        # Power needs to be a constant
-        except NotScalarConstantError:
+    transform = unary_transforms_dict.get(type(scalar_op), None)
+    if transform is None:
+        if isinstance(scalar_op, Pow):
+            # We only allow for the base to be measurable
+            if measurable_input_idx != 0:
+                return None
+            try:
+                (power,) = other_inputs
+                base_power = pt.get_underlying_scalar_constant_value(power).item()
+            # Power needs to be a constant
+            except NotScalarConstantError:
+                return None
+            transform_inputs = (measurable_input, power)
+            transform = PowerTransform(power=base_power)
+        elif isinstance(scalar_op, Add):
+            transform_inputs = (measurable_input, pt.add(*other_inputs))
+            transform = LocTransform(
+                transform_args_fn=lambda *inputs: inputs[-1],
+            )
+        elif isinstance(scalar_op, Mul):
+            transform_inputs = (measurable_input, pt.mul(*other_inputs))
+            transform = ScaleTransform(
+                transform_args_fn=lambda *inputs: inputs[-1],
+            )
+        else:
+            raise TypeError(
+                f"Scalar Op not supported: {scalar_op}. Rewrite should not have been triggered"
+            )  # pragma: no cover
+
+    if measurable_input_broadcast:
+        # This rewrite logic only supports broadcasting for transforms with two inputs, where the first is measurable.
+        # This covers all current cases, update if other cases are supported in the future.
+        if len(transform_inputs) != 2 or measurable_input_idx != 0:
             return None
-        transform_inputs = (measurable_input, power)
-        transform = PowerTransform(power=power)
-    elif isinstance(scalar_op, Add):
-        transform_inputs = (measurable_input, pt.add(*other_inputs))
-        transform = LocTransform(
-            transform_args_fn=lambda *inputs: inputs[-1],
-        )
-    elif transform is None:
-        transform_inputs = (measurable_input, pt.mul(*other_inputs))
-        transform = ScaleTransform(
-            transform_args_fn=lambda *inputs: inputs[-1],
+        warnings.warn(
+            "MeasurableTransform with implicit broadcasting detected. This corresponds to a potentially degenerate probability graph.\n"
+            "If you did not intend this, make sure the base measurable variable is created with all the dimensions from the start."
+            "Otherwise, an explicit `broadcast_to` operation can be used to silence this warning.\n",
+            UserWarning,
         )
+        measurable_input, other_input = transform_inputs
+        measurable_input = measurable_broadcast(measurable_input, other_input.shape)
+        transform_inputs = (measurable_input, other_input)
 
     transform_op = MeasurableTransform(
         scalar_op=scalar_op,
diff --git a/tests/logprob/test_transforms.py b/tests/logprob/test_transforms.py
index 9960dff948..91644fc0e8 100644
--- a/tests/logprob/test_transforms.py
+++ b/tests/logprob/test_transforms.py
@@ -807,16 +807,36 @@ def test_discrete_rv_multinary_transform_fails():
         conditional_logp({y_rv: y_rv.clone()})
 
 
-@pytest.mark.xfail(reason="Check not implemented yet")
-def test_invalid_broadcasted_transform_rv_fails():
+@pytest.mark.filterwarnings("error")  # Fail if unexpected warning is issued
+@pytest.mark.parametrize("implicit_broadcast", (True, False))
+def test_broadcasted_transform_rv(implicit_broadcast):
     loc = pt.vector("loc")
-    y_rv = loc + pt.random.normal(0, 1, size=1, name="base_rv")
+    base_rv = pt.random.normal(0, 1, size=1, name="base_rv")
+    if implicit_broadcast:
+        y_rv = loc + base_rv
+    else:
+        y_rv = loc + pt.broadcast_to(base_rv, shape=loc.shape)
     y_rv.name = "y"
     y_vv = y_rv.clone()
 
-    # This logp derivation should fail or count only once the values that are broadcasted
-    logprob = logp(y_rv, y_vv)
-    assert logprob.eval({y_vv: [0, 0, 0, 0], loc: [0, 0, 0, 0]}).shape == ()
+    if implicit_broadcast:
+        with pytest.warns(UserWarning, match="implicit broadcasting detected"):
+            logprob = logp(y_rv, y_vv)
+    else:
+        logprob = logp(y_rv, y_vv)
+    logprob_fn = pytensor.function([loc, y_vv], logprob)
+
+    # All values must have the same offset from `loc`
+    np.testing.assert_allclose(
+        logprob_fn([1, 1, 1, 1], [0, 0, 0, 0]), sp.stats.norm.logpdf([0], loc=1)
+    )
+    np.testing.assert_allclose(
+        logprob_fn([1, 2, 3, 4], [0, 1, 2, 3]), sp.stats.norm.logpdf([0], loc=1)
+    )
+
+    # Otherwise probability is 0
+    np.testing.assert_array_equal(logprob_fn([1, 1, 1, 1], [0, 0, 0, 1]), [-np.inf])
+    np.testing.assert_array_equal(logprob_fn([1, 2, 3, 4], [0, 0, 0, 0]), [-np.inf])
 
 
 @pytest.mark.parametrize("numerator", (1.0, 2.0))