Add executed_with_root_target condition to handle partial runs / fa…

…ilures
dagster-io · Oct 16, 2024 · 7ad38c3 · 7ad38c3
1 parent da8082f
commit 7ad38c3
Show file tree

Hide file tree

Showing 7 changed files with 172 additions and 7 deletions.
diff --git a/...modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py b/...modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py
@@ -676,7 +676,7 @@ def A() -> None: ...
         assert record["numRequested"] == 0
 
         # all nodes in the tree
-        assert len(record["evaluationNodes"]) == 32
+        assert len(record["evaluationNodes"]) == 35
 
         rootNode = record["evaluationNodes"][0]
         assert rootNode["uniqueId"] == record["rootUniqueId"]

diff --git a/python_modules/dagster/dagster/_core/asset_graph_view/asset_graph_view.py b/python_modules/dagster/dagster/_core/asset_graph_view/asset_graph_view.py
@@ -42,6 +42,7 @@
     from dagster._core.definitions.partition import PartitionsDefinition
     from dagster._core.instance import DagsterInstance
     from dagster._core.storage.asset_check_execution_record import AssetCheckExecutionResolvedStatus
+    from dagster._core.storage.dagster_run import RunRecord
     from dagster._utils.caching_instance_queryer import CachingInstanceQueryer
 
 
@@ -475,6 +476,86 @@ def compute_missing_subset(self, *, key: EntityKey, from_subset: EntitySubset) -
             ),
         )
 
+    def _expensively_filter_entity_subset(
+        self, subset: EntitySubset, filter_fn: Callable[[Optional[str]], bool]
+    ) -> EntitySubset:
+        if subset.is_partitioned:
+            return subset.compute_intersection_with_partition_keys(
+                {pk for pk in subset.expensively_compute_partition_keys() if filter_fn(pk)}
+            )
+        else:
+            return (
+                subset
+                if not subset.is_empty and filter_fn(None)
+                else self.get_empty_subset(key=subset.key)
+            )
+
+    def _run_record_targets_entity(self, run_record: "RunRecord", target_key: EntityKey) -> bool:
+        asset_selection = run_record.dagster_run.asset_selection or set()
+        check_selection = run_record.dagster_run.asset_check_selection or set()
+        return target_key in (asset_selection | check_selection)
+
+    def _compute_latest_check_run_executed_with_target(
+        self, partition_key: Optional[str], query_key: AssetCheckKey, target_key: EntityKey
+    ) -> bool:
+        from dagster._core.storage.asset_check_execution_record import AssetCheckExecutionRecord
+        from dagster._core.storage.dagster_run import RunRecord
+
+        check.invariant(partition_key is None, "Partitioned checks not supported")
+        check_record = AssetCheckExecutionRecord.blocking_get(self, query_key)
+        if check_record and check_record.event:
+            run_record = RunRecord.blocking_get(self, check_record.event.run_id)
+            return bool(run_record) and self._run_record_targets_entity(run_record, target_key)
+        else:
+            return False
+
+    def _compute_latest_asset_run_executed_with_target(
+        self, partition_key: Optional[str], query_key: AssetKey, target_key: EntityKey
+    ) -> bool:
+        from dagster._core.storage.dagster_run import RunRecord
+        from dagster._core.storage.event_log.base import AssetRecord
+
+        asset_record = AssetRecord.blocking_get(self, query_key)
+        if (
+            asset_record
+            and asset_record.asset_entry.last_materialization
+            and asset_record.asset_entry.last_materialization.asset_materialization
+            and asset_record.asset_entry.last_materialization.asset_materialization.partition
+            == partition_key
+        ):
+            run_record = RunRecord.blocking_get(
+                self, asset_record.asset_entry.last_materialization.run_id
+            )
+            return bool(run_record) and self._run_record_targets_entity(run_record, target_key)
+        else:
+            return False
+
+    def compute_latest_run_executed_with_subset(
+        self, from_subset: EntitySubset, target: EntityKey
+    ) -> EntitySubset:
+        """Computes the subset of from_subset for which the latest run also targeted
+        the provided target EntityKey.
+        """
+        return _dispatch(
+            key=from_subset.key,
+            check_method=lambda k: self._expensively_filter_entity_subset(
+                from_subset,
+                filter_fn=functools.partial(
+                    self._compute_latest_check_run_executed_with_target,
+                    query_key=k,
+                    target_key=target,
+                ),
+            ),
+            asset_method=lambda k: self._expensively_filter_entity_subset(
+                from_subset,
+                filter_fn=functools.partial(
+                    self._compute_latest_asset_run_executed_with_target,
+                    query_key=k,
+                    target_key=target,
+                ),
+            ),
+        )
+
     def _compute_updated_since_cursor_subset(
         self, key: AssetKey, cursor: Optional[int]
     ) -> EntitySubset[AssetKey]:

diff --git a/..._modules/dagster/dagster/_core/definitions/declarative_automation/automation_condition.py b/..._modules/dagster/dagster/_core/definitions/declarative_automation/automation_condition.py
@@ -441,6 +441,18 @@ def newly_updated() -> "BuiltinAutomationCondition":
 
         return NewlyUpdatedCondition()
 
+    @experimental
+    @staticmethod
+    def executed_with_root_target() -> "BuiltinAutomationCondition":
+        """Returns an AutomationCondition that is true if the latest run that updated the target also executed
+        with the root key that the global condition is applied to.
+        """
+        from dagster._core.definitions.declarative_automation.operands import (
+            LatestRunExecutedWithRootTargetCondition,
+        )
+
+        return LatestRunExecutedWithRootTargetCondition()
+
     @public
     @experimental
     @staticmethod
@@ -496,7 +508,11 @@ def any_deps_updated() -> "DepsAutomationCondition":
         """
         with disable_dagster_warnings():
             return AutomationCondition.any_deps_match(
-                AutomationCondition.newly_updated() | AutomationCondition.will_be_requested()
+                (
+                    AutomationCondition.newly_updated()
+                    & ~AutomationCondition.executed_with_root_target()
+                ).with_label("newly_updated_without_root")
+                | AutomationCondition.will_be_requested()
             ).with_label("any_deps_updated")
 
     @experimental

diff --git a/python_modules/dagster/dagster/_core/definitions/declarative_automation/operands/__init__.py b/python_modules/dagster/dagster/_core/definitions/declarative_automation/operands/__init__.py
@@ -6,6 +6,7 @@
     ExecutionFailedAutomationCondition as ExecutionFailedAutomationCondition,
     InitialEvaluationCondition as InitialEvaluationCondition,
     InLatestTimeWindowCondition as InLatestTimeWindowCondition,
+    LatestRunExecutedWithRootTargetCondition as LatestRunExecutedWithRootTargetCondition,
     MissingAutomationCondition as MissingAutomationCondition,
     NewlyRequestedCondition as NewlyRequestedCondition,
     NewlyUpdatedCondition as NewlyUpdatedCondition,

diff --git a/python_modules/dagster/dagster/_core/definitions/declarative_automation/operands/operands.py b/python_modules/dagster/dagster/_core/definitions/declarative_automation/operands/operands.py
@@ -140,6 +140,19 @@ def compute_subset(self, context: AutomationContext) -> EntitySubset:
         return context.previous_requested_subset or context.get_empty_subset()
 
 
+@whitelist_for_serdes
+@record
+class LatestRunExecutedWithRootTargetCondition(SubsetAutomationCondition):
+    @property
+    def name(self) -> str:
+        return "executed_with_root_target"
+
+    def compute_subset(self, context: AutomationContext) -> EntitySubset:
+        return context.asset_graph_view.compute_latest_run_executed_with_subset(
+            from_subset=context.candidate_subset, target=context.root_context.key
+        )
+
+
 @whitelist_for_serdes
 @record
 class NewlyUpdatedCondition(SubsetAutomationCondition):

diff --git a/.../declarative_automation_tests/automation_condition_tests/builtins/test_eager_condition.py b/.../declarative_automation_tests/automation_condition_tests/builtins/test_eager_condition.py
@@ -1,3 +1,4 @@
+import pytest
 from dagster import (
     AssetDep,
     AssetKey,
@@ -9,6 +10,7 @@
     DimensionPartitionMapping,
     MultiPartitionMapping,
     MultiPartitionsDefinition,
+    Output,
     StaticPartitionsDefinition,
     TimeWindowPartitionMapping,
     asset,
@@ -240,3 +242,55 @@ def foo_check() -> ...: ...
     # don't keep requesting
     result = evaluate_automation_conditions(defs=defs, instance=instance, cursor=result.cursor)
     assert result.total_requested == 0
+
+
+@pytest.mark.parametrize("b_result", ["skip", "fail", "materialize"])
+def test_eager_partial_run(b_result: str) -> None:
+    @asset
+    def root() -> None: ...
+
+    @asset(deps=[root], automation_condition=AutomationCondition.eager())
+    def A() -> None: ...
+
+    @asset(deps=[A], output_required=False, automation_condition=AutomationCondition.eager())
+    def B():
+        if b_result == "skip":
+            pass
+        elif b_result == "materialize":
+            yield Output(1)
+        else:
+            return 1 / 0
+
+    @asset(deps=[B], automation_condition=AutomationCondition.eager())
+    def C() -> None: ...
+
+    defs = Definitions(assets=[root, A, B, C])
+    instance = DagsterInstance.ephemeral()
+
+    # nothing updated yet
+    result = evaluate_automation_conditions(defs=defs, instance=instance)
+    assert result.total_requested == 0
+
+    # now root updated, so request a, b, and c
+    instance.report_runless_asset_event(AssetMaterialization("root"))
+    result = evaluate_automation_conditions(defs=defs, instance=instance, cursor=result.cursor)
+    assert result.total_requested == 3
+
+    # don't keep requesting
+    result = evaluate_automation_conditions(defs=defs, instance=instance, cursor=result.cursor)
+    assert result.total_requested == 0
+
+    # now simulate the above run, B / C will not be materialized
+    defs.get_implicit_global_asset_job_def().execute_in_process(
+        instance=instance, asset_selection=[A.key, B.key, C.key], raise_on_error=False
+    )
+    result = evaluate_automation_conditions(defs=defs, instance=instance, cursor=result.cursor)
+    # A gets materialized, but this shouldn't kick off B and C
+    assert result.total_requested == 0
+
+    # A gets materialized on its own, do kick off B and C
+    defs.get_implicit_global_asset_job_def().execute_in_process(
+        instance=instance, asset_selection=[A.key]
+    )
+    result = evaluate_automation_conditions(defs=defs, instance=instance, cursor=result.cursor)
+    assert result.total_requested == 2
diff --git a/...rative_automation_tests/automation_condition_tests/fundamentals/test_result_value_hash.py b/...rative_automation_tests/automation_condition_tests/fundamentals/test_result_value_hash.py
@@ -38,11 +38,11 @@
         ("dd74c7cfe19d869931ea4aad9ee10127", SC.on_cron("0 * * * *"), two_parents, False),
         ("861f8e40d4624d49c4ebdd034c8e1e84", SC.on_cron("0 * * * *"), two_parents_daily, False),
         # same as above
-        ("dfb268e321e2e7aa7b0e2e71fa674e06", SC.eager(), one_parent, False),
-        ("781252e1a53db1ecd5938b0da61dba0b", SC.eager(), one_parent, True),
-        ("293186887409aac2fe99b09bd633c64b", SC.eager(), one_parent_daily, False),
-        ("c92d9d5181b4d0a6c7ab5d1c6e26962a", SC.eager(), two_parents, False),
-        ("911bcc4f8904ec6dae85f6aaf78f5ee5", SC.eager(), two_parents_daily, False),
+        ("9b4c0a55b5fbea860f72ecc9d7a27e0e", SC.eager(), one_parent, False),
+        ("6925b7770eb2122048e4f50aa0a3303f", SC.eager(), one_parent, True),
+        ("45c2493882264dd06bee6c9abffabf3e", SC.eager(), one_parent_daily, False),
+        ("81132ce4972e8ac8d8be1a0e468637c0", SC.eager(), two_parents, False),
+        ("724d21fef8f7404f1e26b40ddeab711d", SC.eager(), two_parents_daily, False),
         # missing condition is invariant to changes other than partitions def changes
         ("6d7809c4949e3d812d7eddfb1b60d529", SC.missing(), one_parent, False),
         ("6d7809c4949e3d812d7eddfb1b60d529", SC.missing(), one_parent, True),