Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

getting bnodes and skolemisation right (aka fix/issue-42) #43

Merged
merged 8 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ test-coverage-with-graphdb: ## runs the standard test-suite for all available i
check: ## performs linting on the python code
@poetry run black --check --diff .
@poetry run isort --check --diff .
@poetry run flake8 . --exclude ${FLAKE8_EXCLUDE} --ignore=E501,E201,E202,W503
@poetry run flake8 . --exclude ${FLAKE8_EXCLUDE}

lint-fix: ## fixes code according to the lint suggestions
@poetry run black .
Expand Down
2 changes: 1 addition & 1 deletion pyrdfstore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
:platform: Unix, Windows
:synopsis: A library for creating and interacting with RDF stores

.. moduleauthor:: "Open Science Team of the Flanders Marine Institute, VLIZ vzw" <[email protected]>
.. moduleauthor:: "Open Science Team VLIZ vzw" <[email protected]>
"""

from .build import create_rdf_store
Expand Down
3 changes: 2 additions & 1 deletion pyrdfstore/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

def create_rdf_store(*store_info) -> RDFStore:
"""Creates an rdf_store based on the passed non-None arguments.
0 of those arguments, will yield a MemoryRDFStore, 1-2 will be passed as read_uri resp write_uri to URIRDFStore
0 of those arguments, will yield a MemoryRDFStore,
1-2 will be passed as read_uri resp write_uri to URIRDFStore
Anything beyond is unacceptable
"""
store_info = [
Expand Down
80 changes: 57 additions & 23 deletions pyrdfstore/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,34 @@ def timestamp():
return datetime.now(UTC_tz)


def reparse(g: Graph, format="nt"):
"""This is a hack workaround for issue
https://github.com/RDFLib/rdflib/issues/2760
It reproduces the graph by serializing and parsing it again
Via an intermediate format (not jsonld!) that is known to work
:param g: the graph to reparse
:param format: the intermediate format to use
"""
return Graph().parse(data=g.serialize(format=format), format=format)


class RDFStore(ABC):
"""This interface describes the basic contract for having read, write operations versus a
managed set of named-graphs so that the lastmod timestamp on each of these is being tracked properly
so the 'age' of these can be compared easily to decide on required or oportune updates
"""This interface describes the basic contract for having read,
write operations versus a managed set of named-graphs so that
the lastmod timestamp on each of these is being tracked properly
so the 'age' of these can be compared easily to decide on required
or oportune updates
"""

@abstractmethod
def select(self, sparql: str, named_graph: Optional[str]) -> Result:
"""executes a sparql select query, possibly narrowed to the named_grap it represents
"""executes a sparql select query, possibly narrowed to
the named_grap it represents

:param sparql: the query-statement to execute
:type sparql: str
:param named_graph: the uri describing the named_graph into which the select should be narrowed
:param named_graph: the uri describing the named_graph into which
the select should be narrowed
:type named_graph: str
:return: the result of the query
:rtype: Result
Expand All @@ -43,25 +58,32 @@ def select(self, sparql: str, named_graph: Optional[str]) -> Result:

@abstractmethod
def insert(self, graph: Graph, named_graph: Optional[str] = None) -> None:
"""inserts the triples from the passed graph into the suggested named_graph
"""inserts the triples from the passed graph into
the suggested named_graph

:param graph: the graph of triples to insert
:type graph: Graph
:param named_graph: the uri describing the named_graph into which the graph should be inserted
:param named_graph: the uri describing the named_graph into which
the graph should be inserted
:type named_graph: str
:rtype: None
"""
pass

def verify_max_age(self, named_graph: str, age_minutes: int) -> bool:
"""verifies that a certain graph is not aged older than a certain amount of minutes
(as this just uses self.lastmod_ts() implementations should just get that right and simply inherit this)

:param named_graph: the uri describing the named_graph to check the age of
"""verifies that a certain graph is not aged older than a certain
amount of minutes
Note: as this just uses self.lastmod_ts() from implementations
those should just get that method right and can simply inherit
this one.

:param named_graph: the uri describing the named_graph to check
the age of
:type named_graph: str
:param age_minutes: the max acceptable age in minutes
:type age_minutes: int
:return: True if the graph has aged less than the passed number of minutes in the argument, else False
:return: True if the graph has aged less than the passed number of
minutes in the argument, else False
:rtype: bool
"""
named_graph_lastmod = self.lastmod_ts(named_graph)
Expand All @@ -76,11 +98,13 @@ def verify_max_age(self, named_graph: str, age_minutes: int) -> bool:
@abstractmethod
def lastmod_ts(self, named_graph: str) -> datetime:
"""returns the update timestamp of the specified graph
Note: the implementations should make the stored and returned datetime object
Note: the implementations should make the stored and returned
datetime object are
1. timezone - aware and
2. placed in the UTC_tz

:param named_graph: the uri describing the named_graph to get the lastmod timestamp of
:param named_graph: the uri describing the named_graph to get
the lastmod timestamp of
:type named_graph: str
:return: the time of last modification (insert or drop)
:rtype: datetime
Expand All @@ -92,7 +116,8 @@ def lastmod_ts(self, named_graph: str) -> datetime:
def named_graphs(self) -> Iterable[str]:
"""returns the known & managed named-graphs in the store

:return: the list of named-graphs, known and managed (possibly already deleted) in this store
:return: the list of named-graphs, known and managed
(possibly already deleted) in this store
:rtype: List[str]
"""
pass
Expand All @@ -115,7 +140,8 @@ def drop_graph(self, named_graph: str) -> None:
def forget_graph(self, named_graph: str) -> None:
"""forgets about the names_graph being under control
This functions independent of the drop_graph method.
So any client of this service is expected to decide when (or not) to combine both
So any client of this service is expected to decide when (or not)
to combine both

Note: dropping any unknown graph should just work without complaints
Note: forgetting a graph removes any trail of its 'update'
Expand All @@ -130,12 +156,13 @@ def forget_graph(self, named_graph: str) -> None:


class URIRDFStore(RDFStore):
"""This class is used to connect to a SPARQL endpoint and execute SPARQL queries
"""This class is used to connect to a SPARQL endpoint and execute
SPARQL queries

:param read_uri: The URI of the SPARQL endpoint to read from
:type read_uri: str
:param write_uri: The URI of the SPARQL endpoint to write to.
If not provided, the store can only be read from, not updated.
If not provided, the store can only be read from, not updated.
:type write_uri: Optional[str]
"""

Expand Down Expand Up @@ -169,6 +196,7 @@ def select(self, sparql: str, named_graph: Optional[str] = None) -> Result:
return result

def insert(self, graph: Graph, named_graph: Optional[str] = NIL_NS):
graph = reparse(graph)
assert (
self.allows_update
), "data can not be inserted into a store if no write_uri is provided"
Expand All @@ -182,11 +210,14 @@ def insert(self, graph: Graph, named_graph: Optional[str] = NIL_NS):
def _update_registry_lastmod(
self, named_graph: str, lastmod: datetime = None
) -> Iterable[str]:
"""Consults and changes the admin-graph of lastmod entries per named_graph.
"""Consults and changes the admin-graph of lastmod entries
per named_graph.

:param named_graph: the named_graph to be handled, required, can be None to return the list of all available names
:param named_graph: the named_graph to be handled, required,
can be None to return the list of all available names
:type named_graph: str (or None)
:param lastmod: the new lastmod timestamp for this named_graph, if None (or not provided) this will 'forget' the named_graph
:param lastmod: the new lastmod timestamp for this named_graph,
if None (or not provided) this will 'forget' the named_graph
:type lastmod: datetime
:return: the list of named_graphs in management
:rtype: Iterable[str]
Expand Down Expand Up @@ -232,7 +263,8 @@ def lastmod_ts(self, named_graph: str) -> datetime:
lastmod: Literal = adm_graph.value(
URIRef(named_graph), SCHEMA_DATEMODIFIED
)
# above is None if nothing found, else convert the literal to actual .value (datetime)
# above is None if nothing found,
# else convert the literal to actual .value (datetime)
return lastmod.value if lastmod is not None else None

def drop_graph(self, named_graph: str) -> None:
Expand All @@ -251,7 +283,8 @@ def forget_graph(self, named_graph: str) -> None:


class MemoryRDFStore(RDFStore):
# check if rdflib.Dataset could not help out here, such would allign more logically and elegantly?
# check if rdflib.Dataset could not help out here,
# such would allign more logically and elegantly?
def __init__(self):
self._all: Graph = Graph(**g_cfg_kwargs)
self._named_graphs = dict()
Expand All @@ -266,6 +299,7 @@ def select(self, sparql: str, named_graph: Optional[str] = None) -> Result:
return target.query(sparql)

def insert(self, graph: Graph, named_graph: Optional[str] = None):
graph = reparse(graph)
named_graph_graph = None
if named_graph is not None:
if named_graph not in self._named_graphs:
Expand Down
61 changes: 57 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,67 @@
from typing import Iterable, Optional

import pytest
from rdflib import BNode, Graph, URIRef
from rdflib import BNode, Graph, Namespace, URIRef
from util4tests import enable_test_logging, log

from pyrdfstore import RDFStore, create_rdf_store

TEST_INPUT_FOLDER = Path(__file__).parent / "./input"
DCT: Namespace = Namespace("http://purl.org/dc/terms/#")
DCT_ABSTRACT: URIRef = DCT.abstract
SELECT_ALL_SPO = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }"


enable_test_logging() # note that this includes loading .env into os.getenv


def format_from_extension(fpath: Path):
sfx = fpath.suffix
sfmap = {".ttl": "turtle", ".jsonld": "json-ld"}
return sfmap[sfx]


def assert_file_ingest(
rdf_store: RDFStore,
fpath: Path,
sparql_test: str = None,
expected_count: int = None,
):
assert fpath.exists(), (
"can not test insertion of " f"non-existent file {fpath=}"
)

ns = f"urn:test:{fpath.stem}"

rdf_store_type = type(rdf_store).__name__
log.debug(f"{rdf_store_type} :: testing ingest of {fpath=} into {ns=}")

# clear it to avoid effects from previous tests
log.debug(f"{rdf_store_type} :: dropping {ns=} to set clear base")
rdf_store.drop_graph(ns)

# read file into graph
fg = Graph().parse(str(fpath), format=format_from_extension(fpath))
num_triples = len(fg)
log.debug(f"{rdf_store_type} :: inserting {num_triples=} into {ns=}")
rdf_store.insert(fg, ns)

# then verify
if sparql_test is None:
# default test is to just retrieve all triples we inserted
sparql_test = SELECT_ALL_SPO
expected_count = num_triples

result = rdf_store.select(sparql_test, ns)
assert len(result) == expected_count, (
f"{rdf_store_type} :: "
f"test after insert of {fpath=} into {ns=} "
f"did not yield {expected_count=}"
)

return fg, ns, result


@pytest.fixture(scope="session")
def quicktest() -> bool:
"""bool setting indicating to skip lengthy tests
Expand Down Expand Up @@ -51,7 +101,8 @@ def _uri_rdf_store() -> RDFStore:
@pytest.fixture()
def rdf_stores(_mem_rdf_store, _uri_rdf_store) -> Iterable[RDFStore]:
"""trimmed list of available stores to be tested
result should contain at least memory_rdf_store, and (if available) also include uri_rdf_store
result should contain at least memory_rdf_store, and (if available)
also include uri_rdf_store
"""
stores = tuple(
store
Expand Down Expand Up @@ -81,14 +132,16 @@ def make_sample_graph(
bnode_subjects: Optional[bool] = False,
) -> Graph:
"""makes a small graph for testing purposes
the graph is build up of triples that follow the pattern {base}{part}-{item}
the graph is build up of triples that follow the
pattern {base}{part}-{item}
where:
- base is optionally provided as argument
- item is iterated from the required items argument
- part is built in iterated over ("subject", "predicate", "object")

:param items: list of 'items' to be inserted in the uri
:type items: Iterable, note that all members will simply be str()-ified into the uri building
:type items: Iterable, note that all members of it will simply be
str()-ified into the uri building
:param base: (optional) baseuri to apply into the pattern
:type base: str
:param bnode_subjects: indicating that the subject
Expand Down
17 changes: 17 additions & 0 deletions tests/input/issue-42.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[
{
"@id": "_:b0",
"@type": "https://schema.org/Person",
"https://schema.org/givenName": "Me"
},
{
"@id": "_:b1",
"@type": "https://schema.org/Person",
"https://schema.org/givenName": "You"
},
{
"@id": "_:x9",
"@type": "https://schema.org/Person",
"https://schema.org/givenName": "Them"
}
]
14 changes: 14 additions & 0 deletions tests/input/issue-42.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@prefix schema: <https://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

_:b1
a schema:Person;
schema:givenName "Me"^^xsd:string .

_:b2
a schema:Person ;
schema:givenName "You"^^xsd:string .

_:x9
a schema:Person ;
schema:givenName "Them"^^xsd:string .
Loading
Loading