Skip to content

Commit

Permalink
[WIP] Navigation function is a go
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Aug 25, 2024
1 parent 0e22570 commit 2867cd0
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 56 deletions.
15 changes: 11 additions & 4 deletions dapitains/app/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,24 @@ def recurse(items, current_path):
def get_nav(
refs: List[Dict[str, Any]],
paths: Dict[str, List[int]],
start_or_ref: Optional[str],
end: Optional[str],
start_or_ref: Optional[str] = None,
end: Optional[str] = None,
down: Optional[int] = 1
) -> Tuple[List[Dict[str, Any]], Optional[Dict[str, Any]], Optional[Dict[str, Any]]]:
""" Given a references set and a path set, provide the CitableUnit from start to end at down level.
"""

paths_index = list(paths.keys())
start_index, end_index = None, None
if start_or_ref:
start_index = paths_index.index(start_or_ref)
if end:
end_index = paths_index.index(end) + 1
if start_or_ref:
start_index = paths_index.index(start_or_ref)
if not end:
for index, reference in enumerate(paths_index[start_index+1:]):
if len(paths[start_or_ref]) == len(paths[reference]):
end_index = index + start_index + 1

paths = dict(list(paths.items())[start_index:end_index])

Expand Down
35 changes: 25 additions & 10 deletions dapitains/tei/citeStructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,15 @@ class CitableUnit:
node: Optional[saxonlib.PyXdmNode] = None
dublinCore: Dict[str, List[str]] = field(default_factory=lambda: defaultdict(list))
extension: Dict[str, List[str]] = field(default_factory=lambda: defaultdict(list))
level: int = 1
parent: Optional[str] = None

def json(self):
out = {
"citeType": self.citeType,
"ref": self.ref
"ref": self.ref,
"level": self.level,
"parent": self.parent
}
if self.children:
out["members"] = [
Expand Down Expand Up @@ -189,26 +193,30 @@ def _dispatch(
child_xpath: str,
structure: CitableStructure,
xpath_processor: saxonlib.PyXPathProcessor,
unit: CitableUnit):
unit: CitableUnit,
level: int):
# target = self.generate_xpath(child.ref)
if len(structure.children) == 1:
self.find_refs(
root=xpath_processor.evaluate_single(child_xpath),
structure=structure.children[0],
unit=unit
unit=unit,
level=level
)
else:
self.find_refs_from_branches(
root=xpath_processor.evaluate_single(child_xpath),
structure=structure.children,
unit=unit
unit=unit,
level=level
)

def find_refs(
self,
root: saxonlib.PyXdmNode,
structure: CitableStructure = None,
unit: Optional[CitableUnit] = None
unit: Optional[CitableUnit] = None,
level: int = 1
) -> List[CitableUnit]:
xpath_proc = get_xpath_proc(elem=root)
prefix = (unit.ref + structure.delim) if unit else ""
Expand All @@ -218,7 +226,9 @@ def find_refs(
for value in xpath_proc.evaluate(f"{xpath_prefix}{structure.xpath}"):
child = CitableUnit(
citeType=structure.citeType,
ref=f"{prefix}{value.string_value}"
ref=f"{prefix}{value.string_value}",
parent=unit.ref if unit else None,
level=level
)

if structure.metadata:
Expand All @@ -238,15 +248,17 @@ def find_refs(
child_xpath=self.generate_xpath(child.ref),
structure=structure,
xpath_processor=xpath_proc,
unit=child
unit=child,
level=level+1
)
return units

def find_refs_from_branches(
self,
root: saxonlib.PyXdmNode,
structure: List[CitableStructure],
unit: Optional[CitableUnit] = None
unit: Optional[CitableUnit] = None,
level: int = 1
) -> List[CitableUnit]:
xpath_proc = get_xpath_proc(elem=root)
prefix = (unit.ref) if unit else "" # ToDo: Reinject delim
Expand Down Expand Up @@ -281,7 +293,9 @@ def compare_nodes_by_doc_order(node1, node2):
for elem in unsorted:
child_unit = CitableUnit(
citeType=elem.struct.citeType,
ref=elem.citation
ref=elem.citation,
level=level,
parent=unit.ref if unit else None
)

if unit:
Expand All @@ -294,7 +308,8 @@ def compare_nodes_by_doc_order(node1, node2):
child_xpath=self.generate_xpath(child_unit.ref),
structure=elem.struct,
xpath_processor=xpath_proc,
unit=child_unit
unit=child_unit,
level=level+1
)
return units

51 changes: 26 additions & 25 deletions tests/test_citeStructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,20 @@ def test_parsing():
assert parser.generate_xpath("Luke") == "//body/div[@n='Luke']"

assert [root.json() for root in parser.find_refs(root=TEI, structure=parser.units)] == [
{'citeType': 'book', 'ref': 'Luke', 'members': [
{'citeType': 'chapter', 'ref': 'Luke 1', 'members': [
{'citeType': 'verse', 'ref': 'Luke 1:1'},
{'citeType': 'verse', 'ref': 'Luke 1:2'},
{'citeType': 'bloup', 'ref': 'Luke 1#1'}
]}
{'citeType': 'book', 'ref': 'Luke', 'parent': None, 'level': 1, 'members': [
{'citeType': 'chapter', 'ref': 'Luke 1', 'parent': 'Luke', 'level': 2, 'members': [
{'citeType': 'verse', 'ref': 'Luke 1:1', 'parent': 'Luke 1', 'level': 3},
{'citeType': 'verse', 'ref': 'Luke 1:2', 'parent': 'Luke 1', 'level': 3},
{'citeType': 'bloup', 'ref': 'Luke 1#1', 'parent': 'Luke 1', 'level': 3}
]
}
]},
{'citeType': 'book', 'ref': 'Mark', 'members': [
{'citeType': 'chapter', 'ref': 'Mark 1', 'members': [
{'citeType': 'verse', 'ref': 'Mark 1:1'},
{'citeType': 'verse', 'ref': 'Mark 1:2'},
{'citeType': 'bloup', 'ref': 'Mark 1#1'},
{'citeType': 'verse', 'ref': 'Mark 1:3'}
{'citeType': 'book', 'ref': 'Mark', 'parent': None, 'level': 1, 'members': [
{'citeType': 'chapter', 'ref': 'Mark 1', 'parent': 'Mark', 'level': 2, 'members': [
{'citeType': 'verse', 'ref': 'Mark 1:1', 'parent': 'Mark 1', 'level': 3},
{'citeType': 'verse', 'ref': 'Mark 1:2', 'parent': 'Mark 1', 'level': 3},
{'citeType': 'bloup', 'ref': 'Mark 1#1', 'parent': 'Mark 1', 'level': 3},
{'citeType': 'verse', 'ref': 'Mark 1:3', 'parent': 'Mark 1', 'level': 3}
]}
]}
]
Expand All @@ -84,17 +85,17 @@ def test_cite_data():
refs = parser.find_refs(root=TEI, structure=parser.units)
refs = [ref.json() for ref in refs]
assert refs == [
{'citeType': 'book', 'ref': '1', 'dublinCore': {
{'citeType': 'book', 'ref': '1', 'parent': None, 'level': 1, 'dublinCore': {
'http://purl.org/dc/terms/title': ['Introduction', 'Introduction'],
'http://purl.org/dc/terms/creator': ['John Doe']}},
{'citeType': 'book', 'ref': '2', 'dublinCore': {'http://purl.org/dc/terms/title': ["Background", 'Contexte']}},
{'citeType': 'book', 'ref': '3', 'dublinCore': {
{'citeType': 'book', 'ref': '2', 'parent': None, 'level': 1, 'dublinCore': {'http://purl.org/dc/terms/title': ["Background", 'Contexte']}},
{'citeType': 'book', 'ref': '3', 'parent': None, 'level': 1, 'dublinCore': {
'http://purl.org/dc/terms/title': ['Methodology', 'Méthodologie'],
'http://purl.org/dc/terms/creator': ['Albert Einstein']}},
{'citeType': 'book', 'ref': '4', 'dublinCore': {
{'citeType': 'book', 'ref': '4', 'parent': None, 'level': 1, 'dublinCore': {
'http://purl.org/dc/terms/title': ['Results', 'Résultats'],
'http://purl.org/dc/terms/creator': ['Isaac Newton']}},
{'citeType': 'book', 'ref': '5', 'dublinCore': {
{'citeType': 'book', 'ref': '5', 'parent': None, 'level': 1, 'dublinCore': {
'http://purl.org/dc/terms/title': ['Conclusion', 'Conclusion'],
'http://purl.org/dc/terms/creator': ['Marie Curie']
}}]
Expand All @@ -108,24 +109,24 @@ def test_advanced_cite_data():
refs = parser.find_refs(root=TEI, structure=parser.units)
refs = [ref.json() for ref in refs]
assert refs == [
{'citeType': 'part', 'ref': 'part-1', 'members': [
{'citeType': 'book', 'ref': 'part-1.1', 'dublinCore': {
{'citeType': 'part', 'ref': 'part-1', 'parent': None, 'level': 1, 'members': [
{'citeType': 'book', 'ref': 'part-1.1', 'parent': 'part-1', 'level': 2, 'dublinCore': {
'http://purl.org/dc/terms/title': ['Introduction', 'Introduction'],
'http://purl.org/dc/terms/creator': ['John Doe']}},
{'citeType': 'book', 'ref': 'part-1.2', 'dublinCore': {
{'citeType': 'book', 'ref': 'part-1.2', 'parent': 'part-1', 'level': 2, 'dublinCore': {
'http://purl.org/dc/terms/title': ["Background", 'Contexte']
}}
], 'extension': {"http://foo.bar/part": ["1"]}},
{'citeType': 'part', 'ref': 'part-2', 'members': [
{'citeType': 'book', 'ref': 'part-2.3', 'dublinCore': {
{'citeType': 'part', 'ref': 'part-2', 'parent': None, 'level': 1, 'members': [
{'citeType': 'book', 'ref': 'part-2.3', 'parent': 'part-2', 'level': 2, 'dublinCore': {
'http://purl.org/dc/terms/title': ['Methodology', 'Méthodologie'],
'http://purl.org/dc/terms/creator': ['Albert Einstein']}},
{'citeType': 'book', 'ref': 'part-2.4', 'dublinCore': {
{'citeType': 'book', 'ref': 'part-2.4', 'parent': 'part-2', 'level': 2, 'dublinCore': {
'http://purl.org/dc/terms/title': ['Results', 'Résultats'],
'http://purl.org/dc/terms/creator': ['Isaac Newton']}}
], 'extension': {"http://foo.bar/part": ["2"]}},
{'citeType': 'part', 'ref': 'part-3', 'members': [
{'citeType': 'book', 'ref': 'part-3.5', 'dublinCore': {
{'citeType': 'part', 'ref': 'part-3', 'parent': None, 'level': 1, 'members': [
{'citeType': 'book', 'ref': 'part-3.5', 'parent': 'part-3', 'level': 2, 'dublinCore': {
'http://purl.org/dc/terms/title': ['Conclusion', 'Conclusion'],
'http://purl.org/dc/terms/creator': ['Marie Curie']
}}
Expand Down
78 changes: 61 additions & 17 deletions tests/test_db_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,12 @@ def test_simple_path():
"Mark 1:3": [1, 0, 3]
}
}
assert strip_members(get_member_by_path(refs[None], paths[None]["Luke"])) == {'citeType': 'book', 'ref': 'Luke'}
assert get_member_by_path(refs[None], paths[None]["Mark 1:3"]) == {'citeType': 'verse', 'ref': 'Mark 1:3'}
assert strip_members(
get_member_by_path(refs[None], paths[None]["Luke"])
) == {'citeType': 'book', 'ref': 'Luke', "level": 1, "parent": None}, "Check that members are stripped"
assert get_member_by_path(
refs[None], paths[None]["Mark 1:3"]
) == {'citeType': 'verse', 'ref': 'Mark 1:3', "level": 3, "parent": "Mark 1"}


def test_navigation():
Expand All @@ -59,19 +63,59 @@ def test_navigation():
for tree, obj in doc.citeStructure.items()
}
paths = {tree: generate_paths(ref) for tree, ref in refs.items()}

assert get_nav(refs[None], paths[None], start_or_ref=None, end=None, down=1) == ([
{'citeType': 'book', 'ref': 'Luke'},
{'citeType': 'book', 'ref': 'Mark'}
], None, None)
assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Luke 1#1", down=0) == ([
{'citeType': 'verse', 'ref': 'Luke 1:1'},
{'citeType': 'verse', 'ref': 'Luke 1:2'},
{'citeType': 'bloup', 'ref': 'Luke 1#1'}
], {'citeType': 'verse', 'ref': 'Luke 1:1'}, {'citeType': 'bloup', 'ref': 'Luke 1#1'})
assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Mark 1:2", down=0) == ([
{'citeType': 'verse', 'ref': 'Luke 1:1'},
{'citeType': 'verse', 'ref': 'Luke 1:2'},
{'citeType': 'bloup', 'ref': 'Luke 1#1'},
{'citeType': 'verse', 'ref': 'Mark 1:1'},
{'citeType': 'verse', 'ref': 'Mark 1:2'}
], {'citeType': 'verse', 'ref': 'Luke 1:1'}, {'citeType': 'verse', 'ref': 'Mark 1:2'})
{'citeType': 'book', 'ref': 'Luke', "level": 1, "parent": None},
{'citeType': 'book', 'ref': 'Mark', "level": 1, "parent": None}
], None, None), "Check that base function works"

assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Luke 1#1", down=0) == (
[
{'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"},
{'citeType': 'verse', 'ref': 'Luke 1:2', "level": 3, "parent": "Luke 1"},
{'citeType': 'bloup', 'ref': 'Luke 1#1', "level": 3, "parent": "Luke 1"}
],
{'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"},
{'citeType': 'bloup', 'ref': 'Luke 1#1', "level": 3, "parent": "Luke 1"}
), "Check that ?start/end works"

assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Mark 1:2", down=0) == (
[
{'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"},
{'citeType': 'verse', 'ref': 'Luke 1:2', "level": 3, "parent": "Luke 1"},
{'citeType': 'bloup', 'ref': 'Luke 1#1', "level": 3, "parent": "Luke 1"},
{'citeType': 'verse', 'ref': 'Mark 1:1', "level": 3, "parent": "Mark 1"},
{'citeType': 'verse', 'ref': 'Mark 1:2', "level": 3, "parent": "Mark 1"}
],
{'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"},
{'citeType': 'verse', 'ref': 'Mark 1:2', "level": 3, "parent": "Mark 1"}
), "Check that ?start/end works across parents"

assert get_nav(refs[None], paths[None], start_or_ref="Luke 1", down=1) == (
[
{'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"},
{'citeType': 'verse', 'ref': 'Luke 1:2', "level": 3, "parent": "Luke 1"},
{'citeType': 'bloup', 'ref': 'Luke 1#1', "level": 3, "parent": "Luke 1"}
],
{'citeType': 'chapter', 'ref': 'Luke 1', "level": 2, "parent": "Luke"},
None
), "Check that ?ref works"

assert get_nav(refs[None], paths[None], start_or_ref="Luke", down=1) == (
[
{'citeType': 'chapter', 'ref': 'Luke 1', "level": 2, "parent": "Luke"},
],
{'citeType': 'book', 'ref': 'Luke', "level": 1, "parent": None},
None
), "Check that ?ref works"

assert get_nav(refs[None], paths[None], start_or_ref=None, end=None, down=2) == (
[
{'citeType': 'book', 'ref': 'Luke', "level": 1, "parent": None},
{'citeType': 'chapter', 'ref': 'Luke 1', "level": 2, "parent": "Luke"},
{'citeType': 'book', 'ref': 'Mark', "level": 1, "parent": None},
{'citeType': 'chapter', 'ref': 'Mark 1', "level": 2, "parent": "Mark"}
],
None,
None
), "Check that down=2 works"

0 comments on commit 2867cd0

Please sign in to comment.