Skip to content

Commit

Permalink
Merge pull request #109 from callahantiff/owlnets_assumption_bug
Browse files Browse the repository at this point in the history
🦉 + 🥅 +  🐞 - OWL-NETS Assumption Bug
  • Loading branch information
callahantiff authored Sep 2, 2021
2 parents da61d2c + 5937914 commit 053b062
Show file tree
Hide file tree
Showing 6 changed files with 246 additions and 77 deletions.
175 changes: 121 additions & 54 deletions notebooks/OWLNETS_Example_Application.ipynb

Large diffs are not rendered by default.

53 changes: 32 additions & 21 deletions pkt_kg/owlnets.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
import os.path
import pickle
import ray # type: ignore
import re
# import re

from collections import ChainMap # type: ignore
from random import sample, shuffle
from rdflib import BNode, Graph, Literal, Namespace, URIRef # type: ignore
from rdflib.namespace import RDF, RDFS, OWL # type: ignore
from statistics import mode, StatisticsError
from tqdm import tqdm # type: ignore
from typing import Any, Dict, IO, List, Optional, Set, Tuple, Union
from typing import Any, Dict, List, Optional, Set, Tuple, Union

from pkt_kg.utils import *

Expand Down Expand Up @@ -66,6 +66,12 @@ class OwlNets(object):
filename: A string containing the filename for the full knowledge graph (e.g. "/hpo_owlnets").
kg_construct_approach: A string containing the type of construction approach used to build the knowledge graph.
owl_tools: A string pointing to the location of the owl tools library.
top_level: A list of ontology namespaces that should not appear in any subject or object in the clean graph (
default list: ['ISO', 'SUMO', 'BFO']).
support: A list of ontology namespaces that should not appear in any subject, object, or relation in the clean
graph (default list: ['IAO', 'SWO', 'OBI', 'UBPROP']).
relations: A list of ontology namespaces that should not appear in any subject or object in the clean graph (
default list ['RO']).
Raises:
TypeError: If graph is not an rdflib.graph object.
Expand All @@ -75,16 +81,18 @@ class OwlNets(object):
"""

def __init__(self, graph: Union[Graph, List, str], write_location: str, filename: str,
kg_construct_approach: Optional[str] = None, owl_tools: str = './pkt_kg/libs/owltools') -> None:
kg_construct_approach: Optional[str] = None, owl_tools: str = './pkt_kg/libs/owltools',
top_level: Optional[List] = None, support: Optional[List] = None,
relations: Optional[List] = None) -> None:

self.owl_tools = owl_tools
self.kg_construct_approach = kg_construct_approach
self.write_location = write_location
self.res_dir = os.path.relpath('/'.join(self.write_location.split('/')[:-1]))
self.filename = filename
self.top_level: List = ['ISO', 'SUMO', 'BFO'] # can only appear as predicates
self.relations: List = ['RO'] # can only appear as predicates
self.support: List = ['IAO', 'SWO', 'OBI', 'UBPROP'] # can never appear in OWL-NETS triples
self.top_level: List = ['ISO', 'SUMO', 'BFO'] if top_level is None else top_level # can only be in predicates
self.support: List = ['IAO', 'SWO', 'OBI', 'UBPROP'] if support is None else support # never in triples
self.relations: List = ['RO'] if relations is None else relations # can only appear as relations

# VERIFY INPUT GRAPH
if not isinstance(graph, Graph) and not isinstance(graph, List) and not isinstance(graph, str):
Expand All @@ -107,7 +115,7 @@ def gets_owlnets_dict(self) -> Dict:

return self.owl_nets_dict

def gets_owlnets_graph(self) -> Dict:
def gets_owlnets_graph(self) -> Graph:
"""Returns the graph RDFLib Graph object."""

return self.graph
Expand Down Expand Up @@ -151,7 +159,7 @@ def removes_edges_with_owl_semantics(self, verbose: bool = True) -> Graph:
keep, filtered = set(), set(); exclude = self.top_level + self.relations + self.support
pbar = tqdm(total=len(self.graph)) if verbose else None
for x in self.graph:
if verbose: pbar.update(1)
if verbose: pbar.update()
if isinstance(x[0], URIRef) and isinstance(x[1], URIRef) and isinstance(x[2], URIRef):
# handle top-level, relation, and support ontologies (top/rel can only be rel; remove support onts)
subj = not any(i for i in exclude if str(x[0]).split('/')[-1].startswith(i + '_'))
Expand All @@ -163,15 +171,16 @@ def removes_edges_with_owl_semantics(self, verbose: bool = True) -> Graph:
o = [i for i in list(self.graph.triples((x[2], RDF.type, None)))
if (OWL.Class in i[2] or OWL.NamedIndividual in i[2]) and '#' not in str(x[2])]
p = [i for i in list(self.graph.triples((x[1], RDF.type, None)))
if i[2] != OWL.AnnotationProperty]
if i[2] != OWL.AnnotationProperty and i[2] != OWL.DatatypeProperty]
if len(s) > 0 and len(o) > 0 and len(p) > 0:
if OWL.ObjectProperty in [x[2] for x in p]: keep.add(x)
else: filtered |= {x}
if len(s) > 0 and len(o) > 0 and len(p) == 0:
elif len(s) > 0 and len(o) > 0 and len(p) == 0:
if RDFS.subClassOf in x[1]: keep.add(x)
elif RDF.type in x[1]: keep.add(x)
else: filtered |= {x}
elif x[1] == RDFS.subClassOf and str(OWL) not in str(x[2]): keep.add(x)
elif x[1] == RDFS.subClassOf and (str(OWL) not in str(x[2]) and 'ObsoleteClass' not in str(x[2])):
keep.add(x)
else: filtered |= {x}
else: filtered |= {x}
else: filtered |= {x}
Expand Down Expand Up @@ -204,7 +213,9 @@ def cleans_decoded_graph(self, verbose: bool = True) -> Graph:
obj = not any(i for i in exclude if str(x[2]).split('/')[-1].startswith(i + '_'))
rel = not any(i for i in self.support if str(x[1]).split('/')[-1].startswith(i + '_'))
if subj and obj and rel:
if str(OWL) not in str(x[0]) and str(OWL) not in str(x[2]): keep_predicates.add(x)
if str(OWL) not in str(x[0]) and str(OWL) not in str(x[2]):
if ('XMLSchema' not in str(x[0])) and ('XMLSchema' not in str(x[2])):
keep_predicates.add(x)
else: filtered_triples |= {x}
else: filtered_triples |= {x}
else: filtered_triples |= {x}
Expand Down Expand Up @@ -250,11 +261,11 @@ def finds_uri(self, n1: Union[BNode, URIRef], n2: Optional[URIRef], node_list: O
node: A RDFLib URIRef object.
"""

n = list(self.graph.objects(n1, None)) if node_list is None else node_list
n = list(self.graph.objects(n1)) if node_list is None else node_list
n = [x for x in n if x != n2 and (isinstance(x, BNode) or OWL.Class in set(self.graph.objects(x, RDF.type)))]
n1 = n.pop(0)
if n1 != n2 and OWL.Class in list(self.graph.objects(n1, RDF.type)): return n1
else: n += [x for x in set(self.graph.objects(n1, None)) if x not in n]; return self.finds_uri(n1, n2, n)
else: n += [x for x in set(self.graph.objects(n1)) if x not in n]; return self.finds_uri(n1, n2, n)

def reconciles_axioms(self, src: Union[BNode, URIRef], tgt: Union[BNode, URIRef]) -> Tuple:
"""Method takes two RDFLib objects (both are either a URIRef or a BNode) and performs two steps: (1) if
Expand All @@ -270,12 +281,12 @@ def reconciles_axioms(self, src: Union[BNode, URIRef], tgt: Union[BNode, URIRef]
"""

if isinstance(src, BNode) and isinstance(tgt, BNode):
org_tgt, tgt = tgt, self.finds_uri(tgt, None, None)
org_src, src = src, src if isinstance(src, URIRef) else self.finds_uri(src, tgt, None)
org_tgt, tgt = tgt, self.finds_uri(tgt, None)
org_src, src = src, src if isinstance(src, URIRef) else self.finds_uri(src, tgt)
bnodes = [org_src, org_tgt]
else:
org_src, src = src, src if isinstance(src, URIRef) else self.finds_uri(src, tgt, None)
org_tgt, tgt = tgt, tgt if isinstance(tgt, URIRef) else self.finds_uri(tgt, src, None)
org_src, src = src, src if isinstance(src, URIRef) else self.finds_uri(src, tgt)
org_tgt, tgt = tgt, tgt if isinstance(tgt, URIRef) else self.finds_uri(tgt, src)
bnodes = [org_src] if isinstance(org_src, BNode) and not isinstance(org_tgt, BNode) else [org_tgt]
master, matches = set(), set()
while len(bnodes) > 0:
Expand Down Expand Up @@ -609,7 +620,7 @@ def cleans_owl_encoded_entities(self, node_list: List, verbose: bool = True) ->
if not neg and not comp:
node, org = (node_info[0], node) if isinstance(node, BNode) else (node, node)
cleaned_entities |= {org}; cleaned_classes: Set = set()
bnodes = set(x for x in self.graph.objects(org, None) if isinstance(x, BNode))
bnodes = set(x for x in self.graph.objects(org) if isinstance(x, BNode))
for element in (bnodes if len(bnodes) > 0 else node_info[1].keys()):
edges = node_info[1][element]
while edges:
Expand Down Expand Up @@ -661,7 +672,7 @@ def makes_graph_connected(self, graph: Graph, common_ancestor: Union[URIRef, str
for x in tqdm(nodes):
ancs = gets_entity_ancestors(graph, [x], RDFS.subClassOf)
if len(ancs) == 0:
nbhd = set(graph.objects(x, None))
nbhd = set(graph.objects(x))
ancs = [x for y in [gets_entity_ancestors(graph, [i], RDFS.subClassOf) for i in nbhd] for x in y]
if len(ancs) == 0: ancs = [x]
else:
Expand Down Expand Up @@ -732,7 +743,7 @@ def write_out_results(self, graph: Union[Set, Graph], kg_const: Optional[str] =
f_name = '/' + f_name + '.nt' if not f_name.startswith('/') else f_name + '.nt'
# write graph to n-triples file
if isinstance(graph, Graph): graph.serialize(destination=self.write_location + f_name, format='nt')
else: appends_to_existing_file(graph, self.write_location + f_name, ' ')
else: appends_to_existing_file(graph, self.write_location + f_name)
# write out owl_nets dictionary
with open(self.write_location + f_name.strip('.nt') + '_decoding_dict.pkl', 'wb') as out:
pickle.dump(self.owl_nets_dict, out)
Expand Down
2 changes: 1 addition & 1 deletion pkt_kg/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
'connected_components', 'removes_self_loops', 'derives_graph_statistics', 'splits_knowledge_graph',
'adds_namespace_to_bnodes', 'removes_namespace_from_bnodes', 'updates_pkt_namespace_identifiers',
'finds_node_type', 'updates_graph_namespace', 'maps_ids_to_integers', 'n3', 'appends_to_existing_file',
'deduplicates_file', 'merges_files', 'convert_to_networkx', 'sublist_creator']
'deduplicates_file', 'merges_files', 'convert_to_networkx', 'sublist_creator', 'gets_ontology_definitions']
20 changes: 20 additions & 0 deletions pkt_kg/utils/kg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* gets_object_properties
* gets_ontology_class_dbxrefs
* gets_ontology_class_synonyms
* gets_ontology_definitions
* merges_ontologies
* ontology_file_formatter
* adds_annotation_assertions
Expand Down Expand Up @@ -81,6 +82,25 @@ def gets_ontology_classes(graph: Graph) -> Set:
return class_list


def gets_ontology_definitions(graph: Graph) -> Dict:
"""Queries a knowledge graph and returns a list of all object definitions (obo:IAO_0000115) in the graph.
Args:
graph: An rdflib Graph object.
Returns:
obj_defs: A dictionary where keys are object URiRefs and values are Literal object definitions. For example:
{rdflib.term.URIRef('http://purl.obolibrary.org/obo/OBI_0001648'):
rdflib.term.Literal('A B cell epitope qualitative binding to antibody assay that uses an antibody
cross-blocking assay.', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),
...}
"""

obj_defs = {x[0]: x[2] for x in graph.triples((None, obo.IAO_0000115, None))}

return obj_defs


def gets_deprecated_ontology_classes(graph: Graph) -> Set:
"""Queries a knowledge graph and returns a list of all deprecated owl:Class objects in the graph.
Expand Down
16 changes: 15 additions & 1 deletion tests/test_kg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def test_gets_object_properties(self):
return None

def test_gets_ontology_class_synonyms(self):
"""Tests the gets_ontology_class_synonyms method."""
"""Tests the gets_ontology_class_synonyms method."""

# read in ontology
graph = Graph().parse(self.good_ontology_file_location)
Expand All @@ -378,6 +378,20 @@ def test_gets_ontology_class_synonyms(self):

return None

def test_gets_ontology_definitions(self):
"""Tests the gets_ontology_definitions method."""

# read in ontology
graph = Graph().parse(self.good_ontology_file_location)

# retrieve object properties form graph with data
definition_dict = gets_ontology_definitions(graph)

self.assertIsInstance(definition_dict, Dict)
self.assertEqual(2152, len(definition_dict))

return None

def test_gets_ontology_class_dbxrefs(self):
"""Tests the gets_ontology_class_synonyms method."""

Expand Down
57 changes: 57 additions & 0 deletions tests/test_owlnets.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,63 @@ def test_initialization_owltools(self):

return None

def test_initialization_support(self):
"""Tests the class initialization state for the support parameter."""

# when no list is passed
owl_nets = OwlNets(kg_construct_approach='subclass',
graph=self.graph,
write_location=self.write_location,
filename=self.kg_filename)
self.assertEqual(owl_nets.support, ['IAO', 'SWO', 'OBI', 'UBPROP'])
# when an argument is passed
owl_nets = OwlNets(kg_construct_approach='subclass',
graph=self.graph,
write_location=self.write_location,
filename=self.kg_filename,
support=['IAO'])
self.assertEqual(owl_nets.support, ['IAO'])

return None

def test_initialization_top_level(self):
"""Tests the class initialization state for the top_level parameter."""

# when no list is passed
owl_nets = OwlNets(kg_construct_approach='subclass',
graph=self.graph,
write_location=self.write_location,
filename=self.kg_filename)
self.assertEqual(owl_nets.top_level, ['ISO', 'SUMO', 'BFO'])
# when an argument is passed
owl_nets = OwlNets(kg_construct_approach='subclass',
graph=self.graph,
write_location=self.write_location,
filename=self.kg_filename,
top_level=['BFO'])
self.assertEqual(owl_nets.top_level, ['BFO'])

return None

def test_initialization_relations(self):
"""Tests the class initialization state for the relations parameter."""

# when no list is passed
owl_nets = OwlNets(kg_construct_approach='subclass',
graph=self.graph,
write_location=self.write_location,
filename=self.kg_filename)
self.assertEqual(owl_nets.relations, ['RO'])
# when an argument is passed
owl_nets = OwlNets(kg_construct_approach='subclass',
graph=self.graph,
write_location=self.write_location,
filename=self.kg_filename,
relations=['RO'])
self.assertEqual(owl_nets.relations, ['RO'])

return None

def test_initialization_state_graph(self):
"""Tests the class initialization state for graphs."""

Expand Down

0 comments on commit 053b062

Please sign in to comment.