From 0f13e806179b2c25c7c6930be33b289b8242e984 Mon Sep 17 00:00:00 2001 From: Frank Sachsenheim Date: Mon, 20 May 2024 22:44:21 +0200 Subject: [PATCH] TagNode.merge_text_nodes: Also drops empty text nodes Optimizations are applied as well. --- _delb/nodes.py | 15 +++++++++++++-- tests/test_tag_node.py | 10 ++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/_delb/nodes.py b/_delb/nodes.py index 79c134e..d3e47bd 100644 --- a/_delb/nodes.py +++ b/_delb/nodes.py @@ -2258,9 +2258,20 @@ def location_path(self) -> str: def merge_text_nodes(self): """ Merges all consecutive text nodes in the subtree into one. + Text nodes without content are dropped. """ - for node in self.iterate_descendants(is_text_node): - node._merge_appended_text_nodes() + with _wrapper_cache: + empty_nodes: list[TextNode] = [] + + for node in self.iterate_descendants(): + if not isinstance(node, TextNode): + continue + node._merge_appended_text_nodes() + if not node.content: + empty_nodes.append(node) + + for node in empty_nodes: + node.detach() @property def namespace(self) -> Optional[str]: diff --git a/tests/test_tag_node.py b/tests/test_tag_node.py index 96cff44..bb88dd7 100644 --- a/tests/test_tag_node.py +++ b/tests/test_tag_node.py @@ -460,6 +460,16 @@ def test_make_node_in_context_with_namespace(): assert node._etree_obj.tag == "{https://name.space}foo" +@pytest.mark.parametrize( + ("child_nodes", "expected_count"), + (([""], 0), ([" "], 1), ([" ", " "], 1), (["", "", tag("child"), "", ""], 1)), +) +def test_merge_text_nodes(child_nodes, expected_count): + node = new_tag_node("node", children=child_nodes) + node.merge_text_nodes() + assert len(node) == expected_count + + def test_names(sample_document): root = sample_document.root