Fixes rendering of XML declarations

delb-xml · Dec 30, 2023 · cf8c8ae · cf8c8ae
1 parent 3d2a9dc
commit cf8c8ae
Show file tree

Hide file tree

Showing 7 changed files with 40 additions and 42 deletions.
diff --git a/delb/__init__.py b/delb/__init__.py
@@ -228,7 +228,7 @@ class Document(metaclass=DocumentMeta):
 
     >>> document = Document("<root/>")
     >>> str(document)
-    "<?xml version='1.0' encoding='UTF-8'?><root/>"
+    '<?xml version="1.0" encoding="UTF-8"?><root/>'
 
     :param source: Anything that the configured loaders can make sense of to return a
                    parsed document tree.
@@ -488,16 +488,19 @@ def save(
             )
 
     def __serialize(self, serializer, encoding, indentation):
-        declaration = f"<?xml version='1.0' encoding='{encoding.upper()}'?>"
+        declaration = f'<?xml version="1.0" encoding="{encoding.upper()}"?>'
         if indentation:
             declaration += "\n"
         serializer.buffer.write(declaration)
         for node in self.head_nodes:
             serializer.serialize_node(node)
         with altered_default_filters():
             serializer.serialize_root(self.root)
-        for node in self.tail_nodes:
-            serializer.serialize_node(node)
+        if self.tail_nodes:
+            if indentation:
+                serializer.buffer.write("\n")
+            for node in self.tail_nodes:
+                serializer.serialize_node(node)
 
     def write(
         self,

diff --git a/tests/test_comment_and_pi_nodes.py b/tests/test_comment_and_pi_nodes.py
@@ -69,7 +69,7 @@ def test_comment_node():
     with altered_default_filters():
         assert (
             str(root.document)
-            == "<?xml version='1.0' encoding='UTF-8'?><!--before--><root/><!--after-->"
+            == '<?xml version="1.0" encoding="UTF-8"?><!--before--><root/><!--after-->'
         )
 
 

diff --git a/tests/test_document.py b/tests/test_document.py
@@ -120,7 +120,7 @@ class DocumentSubclass(Document):
 def test_set_root():
     document = Document("<root><node/></root>")
     document.root = document.root[0].detach()
-    assert str(document) == "<?xml version='1.0' encoding='UTF-8'?><node/>"
+    assert str(document) == '<?xml version="1.0" encoding="UTF-8"?><node/>'
 
     document_2 = Document("<root><replacement/>parts</root>")
     with pytest.raises(ValueError, match="detached node"):
@@ -140,8 +140,8 @@ def test_root_siblings():
 
     assert len(head_nodes) == len(tail_nodes) == 2
 
-    assert (
-        str(document) == "<?xml version='1.0' encoding='UTF-8'?><?Blood Fire?>"
+    assert str(document) == (
+        '<?xml version="1.0" encoding="UTF-8"?><?Blood Fire?>'
         "<!-- I Roy --><root/><!-- Prince Jazzbo --><?over out?>"
     )
 

diff --git a/tests/test_nodes.py b/tests/test_nodes.py
@@ -86,9 +86,8 @@ def test_insert_issue_in_a_more_complex_situation():
     document = Document("<root><foo><div1><bar><div2/></bar> </div1></foo></root>")
     for node in document.root.css_select("bar,foo"):
         node.detach(retain_child_nodes=True)
-    assert (
-        str(document) == "<?xml version='1.0' encoding='UTF-8'?>"
-        "<root><div1><div2/> </div1></root>"
+    assert str(document) == (
+        '<?xml version="1.0" encoding="UTF-8"?>' "<root><div1><div2/> </div1></root>"
     )
 
 

diff --git a/tests/test_serialization.py b/tests/test_serialization.py
@@ -14,7 +14,7 @@
 )
 from _delb.nodes import DETACHED
 
-from tests.utils import assert_documents_are_semantical_equal, count_pis
+from tests.utils import assert_documents_are_semantical_equal
 
 
 @pytest.mark.parametrize(
@@ -57,7 +57,7 @@ def test_align_attributes(indentation, out):
             "  ",
             '<root><a>hi</a><b x="foo"><c/></b></root>',
             """\
-             <?xml version='1.0' encoding='UTF-8'?>
+             <?xml version="1.0" encoding="UTF-8"?>
              <root>
                <a>hi</a>
                <b x="foo">
@@ -91,8 +91,8 @@ def test_significant_whitespace_is_saved(result_file):
     root[2].append_children("world!")
 
     document.save(result_file)
-    assert (
-        result_file.read_text() == "<?xml version='1.0' encoding='UTF-8'?>"
+    assert result_file.read_text() == (
+        '<?xml version="1.0" encoding="UTF-8"?>'
         "<text><hi>Hello</hi> <hi>world!</hi></text>"
     )
 
@@ -106,7 +106,7 @@ def test_significant_whitespace_is_saved(result_file):
     )
 
     assert result_file.read_text().splitlines() == [
-        "<?xml version='1.0' encoding='UTF-8'?>",
+        '<?xml version="1.0" encoding="UTF-8"?>',
         "<text>",
         "  <hi>Hello</hi>",
         "   ",  # FIXME?
@@ -154,30 +154,33 @@ def test_single_nodes(declarations, node_constructor, args, out):
 
 
 def test_that_root_siblings_are_preserved(files_path, result_file):
-    Document(files_path / "root_siblings.xml").clone().save(result_file)
-    assert count_pis(result_file) == {
-        '<?another-target ["it", "could", "be", "anything"]?>': 1,
-        '<?target some="processing" instructions="here"?>': 2,
-    }
+    origin_path = files_path / "root_siblings.xml"
+    Document(origin_path).save(result_file, indentation="  ")
 
-    assert result_file.read_text() == (
-        "<?xml version='1.0' encoding='UTF-8'?>"
-        '<?target some="processing" instructions="here"?>'
-        '<?another-target ["it", "could", "be", "anything"]?>'
-        "<!-- a comment -->"
-        '<?target some="processing" instructions="here"?>'
-        "<root/>"
-        "<!-- end -->"
+    assert (
+        origin_path.read_text()
+        == result_file.read_text()
+        == (
+            '<?xml version="1.0" encoding="UTF-8"?>\n'
+            '<?target some="processing" instructions="here"?>\n'
+            '<?another-target ["it", "could", "be", "anything"]?>\n'
+            "<!-- a comment -->\n"
+            '<?target some="processing" instructions="here"?>\n'
+            "<root/>\n"
+            "<!-- end -->\n"
+        )
     )
 
 
 def test_transparency(files_path, result_file):
+    parser_options = ParserOptions(collapse_whitespace=False)
     for file in (x for x in files_path.glob("[!tei_]*.xml")):
-        Document(file, parser_options=ParserOptions(collapse_whitespace=False)).save(
-            result_file
-        )
+        origin = Document(file, parser_options=parser_options)
+        origin.save(result_file)
+        _copy = Document(file, parser_options=parser_options)
         assert_documents_are_semantical_equal(file, result_file)
-        assert count_pis(file) == count_pis(result_file)
+        assert origin.head_nodes == _copy.head_nodes
+        assert origin.tail_nodes == _copy.tail_nodes
 
 
 @pytest.mark.parametrize(
@@ -279,5 +282,5 @@ def test_text_width(files_path, indentation, text_width, out):
 
 def test_xml_declaration(files_path):
     assert str(Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml")).startswith(
-        "<?xml version='1.0' encoding='UTF-8'?>"
+        '<?xml version="1.0" encoding="UTF-8"?>'
     )
diff --git a/tests/test_tag_node.py b/tests/test_tag_node.py
@@ -131,7 +131,7 @@ def test_detach_and_document_property():
     assert node.parent is None
     assert node.document is None
     assert root.document is document
-    assert str(document) == "<?xml version='1.0' encoding='UTF-8'?><root/>"
+    assert str(document) == '<?xml version="1.0" encoding="UTF-8"?><root/>'
 
 
 def test_detach_node_with_tail_1():

diff --git a/tests/utils.py b/tests/utils.py
@@ -1,6 +1,5 @@
 import re
 import sys
-from collections import Counter
 
 from xmldiff import main as xmldiff
 
@@ -47,12 +46,6 @@ def assert_nodes_are_in_document_order(*nodes):
         raise AssertionError
 
 
-def count_pis(source):
-    with source.open("rt") as f:
-        pi_strings = find_processing_instructions(f.read())
-    return Counter(x[0] for x in pi_strings if not x[0].startswith("<?xml"))
-
-
 @altered_default_filters()
 def index_path(node):
     result = []