From 48b1ddf47a3fca24b2d8d93e29f0e064f11eddfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibault=20Cl=C3=A9rice?= Date: Sat, 24 Aug 2024 17:56:39 +0200 Subject: [PATCH] Parents are parenting --- dapitains/metadata/xml_parser.py | 11 +++++------ tests/catalog/example-sub-collection.xml | 1 + tests/test_catalog.py | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dapitains/metadata/xml_parser.py b/dapitains/metadata/xml_parser.py index 946312e..c33c8a4 100644 --- a/dapitains/metadata/xml_parser.py +++ b/dapitains/metadata/xml_parser.py @@ -15,7 +15,7 @@ class Catalog: objects: Dict[str, Collection] = field(default_factory=dict) -def parse_metadata(xml: ET.Element) -> Dict[str, Any]: +def parse_metadata(xml: ET.Element) -> Tuple[Dict[str, Any], List[str]]: """ Parse Metadata :param xml: Collection/Resource tag @@ -50,9 +50,8 @@ def parse_metadata(xml: ET.Element) -> Dict[str, Any]: parents = [] for node in xml.xpath("./parent/text()"): parents.append(str(node)) - obj["parents"] = parents - return obj + return obj, parents def parse_collection(xml: ET.Element, basedir: str, tree: Catalog) -> Collection: @@ -62,8 +61,10 @@ def parse_collection(xml: ET.Element, basedir: str, tree: Catalog) -> Collection :param basedir: Directory used to resolve filepath, that are relative to the main object :param tree: Catalog that is updated with objects. """ - obj = parse_metadata(xml) + obj, parents = parse_metadata(xml) obj = Collection(**obj, resource=xml.tag == "resource") + for parent in parents: + tree.relationships.append((parent, obj.identifier)) tree.objects[obj.identifier] = obj if xml.attrib.get("filepath") and obj.resource: obj.filepath = os.path.normpath(os.path.join(basedir, xml.attrib["filepath"])) @@ -74,8 +75,6 @@ def parse_collection(xml: ET.Element, basedir: str, tree: Catalog) -> Collection else: _, child = ingest_catalog(os.path.join(basedir, member.attrib["filepath"]), tree) tree.relationships.append((obj.identifier, child.identifier)) - for parent in child.parents: - tree.relationships.append((parent, child.identifier)) return obj diff --git a/tests/catalog/example-sub-collection.xml b/tests/catalog/example-sub-collection.xml index e311d53..f5798c4 100644 --- a/tests/catalog/example-sub-collection.xml +++ b/tests/catalog/example-sub-collection.xml @@ -9,6 +9,7 @@ Historical Document A document about historical events. + https://foo.bar/default World War II en diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 9776303..c376624 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -60,5 +60,6 @@ def test_ingestion(): assert sorted(tree.relationships) == [ ('https://example.org/collection1', 'https://example.org/resource1'), ('https://foo.bar/default', 'https://example.org/collection1'), + ('https://foo.bar/default', 'https://example.org/resource1',), ('https://foo.bar/default', 'https://foo.bar/text') ]