Skip to content

Commit

Permalink
Hotfix: Issues with EMMO Fixes #518 (#519)
Browse files Browse the repository at this point in the history
* Issues with EMMO Fixes #518

* Fixed some more tests

* minor changes
  • Loading branch information
urbanmatthias authored Sep 10, 2020
1 parent 1ee04df commit 2824724
Show file tree
Hide file tree
Showing 16 changed files with 440 additions and 92 deletions.
17 changes: 12 additions & 5 deletions osp/core/ontology/attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
logger = logging.getLogger(__name__)


BLACKLIST = {rdflib.OWL.bottomDataProperty, rdflib.OWL.topDataProperty}


class OntologyAttribute(OntologyEntity):
def __init__(self, namespace, name, iri_suffix):
super().__init__(namespace, name, iri_suffix)
Expand All @@ -30,12 +33,14 @@ def datatype(self):
Returns:
URIRef: IRI of the datatype
"""
blacklist = [rdflib.RDFS.Literal]
superclasses = self.superclasses
datatypes = set()
for superclass in superclasses:
triple = (superclass.iri, rdflib.RDFS.range, None)
for _, _, o in self.namespace._graph.triples(triple):
datatypes.add(o)
if o not in blacklist:
datatypes.add(o)
if len(datatypes) == 1:
return datatypes.pop()
if len(datatypes) == 0:
Expand Down Expand Up @@ -64,17 +69,19 @@ def convert_to_basic_type(self, value):
return convert_from(value, self.datatype)

def _direct_superclasses(self):
return self._directly_connected(rdflib.RDFS.subPropertyOf)
return self._directly_connected(rdflib.RDFS.subPropertyOf,
blacklist=BLACKLIST)

def _direct_subclasses(self):
return self._directly_connected(rdflib.RDFS.subPropertyOf,
inverse=True)
inverse=True, blacklist=BLACKLIST)

def _superclasses(self):
yield self
yield from self._transitive_hull(rdflib.RDFS.subPropertyOf)
yield from self._transitive_hull(rdflib.RDFS.subPropertyOf,
blacklist=BLACKLIST)

def _subclasses(self):
yield self
yield from self._transitive_hull(rdflib.RDFS.subPropertyOf,
inverse=True)
inverse=True, blacklist=BLACKLIST)
7 changes: 6 additions & 1 deletion osp/core/ontology/docs/cuba.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,9 @@ cuba:Class a owl:Class ;
rdfs:isDefinedBy "The root of the ontology." .



cuba:Class rdfs:subClassOf owl:Thing .
cuba:relationship rdfs:subPropertyOf owl:topObjectProperty .
cuba:attribute rdfs:subPropertyOf owl:topDataProperty .
owl:Thing rdfs:subClassOf cuba:Class .
owl:topObjectProperty rdfs:subPropertyOf cuba:relationship .
owl:topDataProperty rdfs:subPropertyOf cuba:attribute .
82 changes: 59 additions & 23 deletions osp/core/ontology/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,55 +126,91 @@ def _superclasses(self):
def _subclasses(self):
pass

def _transitive_hull(self, predicate_iri, inverse=False):
def _transitive_hull(self, predicate_iri, inverse=False, blacklist=()):
"""Get all the entities connected with the given predicate.
Args:
predicate_iri (URIRef): The IRI of the predicate
inverse (bool, optional): Use the inverse instead.
Defaults to False.
blacklist (collection): A collection of IRIs not to return.
Yields:
OntologyEntity: The connected entities
"""
result = {self.iri}
visited = {self.iri}
frontier = {self.iri}
while frontier:
current = frontier.pop()
triple = (current, predicate_iri, None)
if inverse:
triple = (None, predicate_iri, current)
for x in self.namespace._graph.triples(triple):
o = x[0 if inverse else 2]
if o not in result and not isinstance(o, rdflib.BNode) \
and not str(o).startswith((str(rdflib.RDF),
str(rdflib.RDFS),
str(rdflib.OWL))):
frontier.add(o)
result.add(o)
yield self.namespace._namespace_registry.from_iri(o)
yield from self._directly_connected(predicate_iri=predicate_iri,
inverse=inverse,
blacklist=blacklist,
_frontier=frontier,
_visited=visited,
_iri=current)

def _special_cases(self, triple):
"""Some supclass statements are often omitted in the ontology.
Replace these with safer triple patterns.
Args:
triple (Tuple[rdflib.term]): A triple pattern to possibly replace.
def _directly_connected(self, predicate_iri, inverse=False):
Returns:
triple (Tuple[rdflib.term]): Possibly replaced triple.
"""
if triple == (None, rdflib.RDFS.subClassOf, rdflib.OWL.Thing):
return (None, rdflib.RDF.type, rdflib.OWL.Class)
if triple == (rdflib.OWL.Nothing, rdflib.RDFS.subClassOf, None):
return (None, rdflib.RDF.type, rdflib.OWL.Class)

if triple == (None, rdflib.RDFS.subPropertyOf,
rdflib.OWL.topObjectProperty):
return (None, rdflib.RDF.type, rdflib.OWL.ObjectProperty)
if triple == (rdflib.OWL.bottomObjectProperty,
rdflib.RDFS.subPropertyOf, None):
return (None, rdflib.RDF.type, rdflib.OWL.ObjectProperty)

if triple == (None, rdflib.RDFS.subPropertyOf,
rdflib.OWL.topDataProperty):
return (None, rdflib.RDF.type, rdflib.OWL.DataProperty)
if triple == (rdflib.OWL.bottomDataProperty,
rdflib.RDFS.subPropertyOf, None):
return (None, rdflib.RDF.type, rdflib.OWL.DataProperty)
return triple

def _directly_connected(self, predicate_iri, inverse=False, blacklist=(),
_frontier=None, _visited=None, _iri=None):
"""Get all the entities directly connected with the given predicate.
Args:
predicate_iri (URIRef): The IRI of the predicate
inverse (bool, optional): Use the inverse instead.
Defaults to False.
blacklist (collection): A collection of IRIs not to return.
Others: Helper for _transitive_hull method.
Yields:
OntologyEntity: The connected entities
"""
triple = (self.iri, predicate_iri, None)
triple = (_iri or self.iri, predicate_iri, None)
if inverse:
triple = (None, predicate_iri, self.iri)
triple = (None, predicate_iri, _iri or self.iri)

if predicate_iri in [rdflib.RDFS.subClassOf,
rdflib.RDFS.subPropertyOf]:
triple = self._special_cases(triple)
for x in self.namespace._graph.triples(triple):
o = x[0 if inverse else 2]
if not isinstance(o, rdflib.BNode) \
and not str(o).startswith((str(rdflib.RDF),
str(rdflib.RDFS),
str(rdflib.OWL))):
yield self.namespace._namespace_registry.from_iri(o)
o = x[0 if triple[0] is None else 2]
if _visited and o in _visited:
continue
if not isinstance(o, rdflib.BNode):
if _visited is not None:
_visited.add(o)
if _frontier is not None:
_frontier.add(o)
if o not in blacklist:
yield self.namespace._namespace_registry.from_iri(o)

def __hash__(self):
return hash(self.iri)
5 changes: 3 additions & 2 deletions osp/core/ontology/namespace_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ def load(self, path):

def _load_cuba(self):
"""Load the cuba namespace"""
path_cuba = os.path.join(os.path.dirname(__file__), "docs", "cuba.ttl")
self._graph.parse(path_cuba, format="ttl")
for x in ["cuba"]: #, "rdf", "rdfs", "owl"]:
path = os.path.join(os.path.dirname(__file__), "docs", x + ".ttl")
self._graph.parse(path, format="ttl")
self._graph.bind("cuba",
rdflib.URIRef("http://www.osp-core.com/cuba#"))
self.update_namespaces()
76 changes: 55 additions & 21 deletions osp/core/ontology/oclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

logger = logging.getLogger(__name__)

BLACKLIST = {rdflib.OWL.Nothing, rdflib.OWL.Thing,
rdflib.OWL.NamedIndividual}


class OntologyClass(OntologyEntity):
def __init__(self, namespace, name, iri_suffix):
Expand All @@ -20,7 +23,10 @@ def attributes(self):
"""
attributes = dict()
for superclass in self.superclasses:
attributes.update(self._get_attributes(superclass.iri))
for attr, v in self._get_attributes(superclass.iri).items():
x = attributes.get(attr, (None, None, None))
x = (x[0] or v[0], x[1] or v[1], x[2] or v[2])
attributes[attr] = x
return attributes

@property
Expand All @@ -43,28 +49,51 @@ def _get_attributes(self, iri):
"""
graph = self._namespace._graph
attributes = dict()

blacklist = [rdflib.OWL.topDataProperty, rdflib.OWL.bottomDataProperty]
# Case 1: domain of Datatype
triple = (None, rdflib.RDFS.domain, iri)
for a_iri, _, _ in self.namespace._graph.triples(triple):
triple = (a_iri, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
if triple in graph \
and not isinstance(a_iri, rdflib.BNode):
a = self.namespace._namespace_registry.from_iri(a_iri)
attributes[a] = self._get_default(a_iri, iri)
if triple not in graph or isinstance(a_iri, rdflib.BNode) \
or a_iri in blacklist:
continue
a = self.namespace._namespace_registry.from_iri(a_iri)
default = self._get_default(a_iri, iri)
triple = (a_iri, rdflib.RDF.type, rdflib.OWL.FunctionalProperty)
obligatory = default is None and triple in graph
attributes[a] = (self._get_default(a_iri, iri), obligatory, None)

# Case 2: restrictions
triple = (iri, rdflib.RDFS.subClassOf, None)
for _, _, o in self.namespace._graph.triples(triple):
if (o, rdflib.RDF.type, rdflib.OWL.Restriction) in graph:
a_iri = graph.value(o, rdflib.OWL.onProperty)
triple = (a_iri, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
if triple in graph \
and not isinstance(a_iri, rdflib.BNode):
a = self.namespace._namespace_registry.from_iri(a_iri)
attributes[a] = self._get_default(a_iri, iri)
if (o, rdflib.RDF.type, rdflib.OWL.Restriction) not in graph:
continue
a_iri = graph.value(o, rdflib.OWL.onProperty)
triple = (a_iri, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
if triple not in graph or isinstance(a_iri, rdflib.BNode):
continue
a = self.namespace._namespace_registry.from_iri(a_iri)
default = self._get_default(a_iri, iri)
dt, obligatory = self._get_datatype_for_restriction(o)
obligatory = default is None and obligatory
attributes[a] = (self._get_default(a_iri, iri), obligatory, dt)

# TODO more cases
return attributes

def _get_datatype_for_restriction(self, r):
obligatory = False
dt = None
g = self.namespace._graph

dt = g.value(r, rdflib.OWL.someValuesFrom)
obligatory = dt is not None
dt = dt or g.value(r, rdflib.OWL.allValuesFrom)
obligatory = obligatory or (r, rdflib.OWL.cardinality) != 0
obligatory = obligatory or (r, rdflib.OWL.minCardinality) != 0
return dt, obligatory

def _get_default(self, attribute_iri, superclass_iri):
"""Get the default of the attribute with the given iri.
Expand Down Expand Up @@ -96,7 +125,7 @@ def _get_attributes_values(self, kwargs, _force):
"""
kwargs = dict(kwargs)
attributes = dict()
for attribute, default in self.attributes.items():
for attribute, (default, obligatory, dt) in self.attributes.items():
if attribute.argname in kwargs:
attributes[attribute] = kwargs[attribute.argname]
del kwargs[attribute.argname]
Expand All @@ -112,32 +141,37 @@ def _get_attributes_values(self, kwargs, _force):
f"to be ALL_CAPS. You can use the yaml2camelcase "
f"commandline tool to transform entity names to CamelCase."
)
else:
elif not _force and obligatory:
raise TypeError("Missing keyword argument: %s" % attribute)
elif default is not None:
attributes[attribute] = default

# Check validity of arguments
if not _force:
if kwargs:
raise TypeError("Unexpected keyword arguments: %s"
% kwargs.keys())
missing = [k.argname for k, v in attributes.items() if v is None]
if missing:
raise TypeError("Missing keyword arguments: %s" % missing)
return attributes

def _direct_superclasses(self):
return self._directly_connected(rdflib.RDFS.subClassOf)
return self._directly_connected(rdflib.RDFS.subClassOf,
blacklist=BLACKLIST)

def _direct_subclasses(self):
return self._directly_connected(rdflib.RDFS.subClassOf, inverse=True)
return self._directly_connected(rdflib.RDFS.subClassOf,
inverse=True, blacklist=BLACKLIST)

def _superclasses(self):
yield self
yield from self._transitive_hull(rdflib.RDFS.subClassOf)
yield from self._transitive_hull(
rdflib.RDFS.subClassOf,
blacklist=BLACKLIST)

def _subclasses(self):
yield self
yield from self._transitive_hull(rdflib.RDFS.subClassOf, inverse=True)
yield from self._transitive_hull(
rdflib.RDFS.subClassOf, inverse=True,
blacklist=BLACKLIST)

def __call__(self, uid=None, session=None, _force=False, **kwargs):
"""Create a Cuds object from this ontology class.
Expand Down
9 changes: 5 additions & 4 deletions osp/core/ontology/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,18 +247,20 @@ def _parse_rdf(self, **kwargs):
self.graph.parse(rdf_file, format=file_format)
default_rels = dict()
reference_styles = dict()
namespace_iris = set()
for namespace, iri in namespaces.items():
if not (
iri.endswith("#") or iri.endswith("/")
):
iri += "#"
namespace_iris.add(iri)
logger.info(f"You can now use `from osp.core.namespaces import "
f"{namespace}`.")
self.graph.bind(namespace, rdflib.URIRef(iri))
default_rels[iri] = default_rel
reference_styles[iri] = reference_style

self._check_namespaces()
self._check_namespaces(namespace_iris)
self._add_cuba_triples(active_rels)
self._add_default_rel_triples(default_rels)
self._add_reference_style_triples(reference_styles)
Expand Down Expand Up @@ -313,9 +315,8 @@ def _add_reference_style_triples(self, reference_styles):
rdflib.Literal(True)
))

def _check_namespaces(self):
namespaces = set(x for _, x in self.graph.namespaces()
if not x.startswith("http://www.w3.org/"))
def _check_namespaces(self, namespace_iris):
namespaces = set(namespace_iris)
for s, p, o in self.graph:
pop = None
for ns in namespaces:
Expand Down
Loading

0 comments on commit 2824724

Please sign in to comment.