Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add Atomic Graph #262

Draft
wants to merge 15 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
language: python
python:
- "3.6" # oldest rdflib supported by libgit2
- "3.7" # debian buster (stable) as of 2019-12
#- "3.6" # oldest rdflib supported by libgit2
#- "3.7" # debian buster (stable) as of 2019-12
- "3.8-dev" # 3.8 development branch
- "nightly"

Expand Down Expand Up @@ -45,6 +45,7 @@ script:
- coverage run -a --source=quit tests/test_helpers.py
- coverage run -a --source=quit tests/test_namespace.py
- coverage run -a --source=quit tests/test_provenance.py
- coverage run -a --source=quit tests/merges/test_merge_methods.py

before_deploy:
- mkdir dist
Expand Down
116 changes: 106 additions & 10 deletions quit/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
from pygit2 import GIT_MERGE_ANALYSIS_NORMAL
from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT

import rdflib
from rdflib import Graph, ConjunctiveGraph, BNode, Literal, URIRef
import rdflib.plugins.parsers.ntriples as ntriples

import re

from quit.conf import Feature, QuitGraphConfiguration
Expand Down Expand Up @@ -189,7 +192,12 @@ def instance(self, reference, force=False):
for blob in self.getFilesForCommit(commit):
try:
(name, oid) = blob
(f, context) = self.getFileReferenceAndContext(blob, commit)
result = self.getFileReferenceAndContext(blob, commit)
try:
(f, context, nameMap) = result
except ValueError:
print(result)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the error actually handled?


internal_identifier = context.identifier + '-' + str(oid)

if force or not self.config.hasFeature(Feature.Persistence):
Expand Down Expand Up @@ -330,13 +338,15 @@ def changeset(self, commit):
blob = (entity.name, entity.oid)

try:
f, context = self.getFileReferenceAndContext(blob, commit)
f, context, nameMap = self.getFileReferenceAndContext(blob, commit)
except KeyError:
graph = Graph(identifier=graphUri)
graph.parse(data=entity.content, format='nt')
parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph))
source = rdflib.parser.create_input_source(data=entity.content)
parserGraph.parse(source.getCharacterStream())

self._blobs.set(
blob, (FileReference(entity.name, entity.content), graph)
blob, (FileReference(entity.name, entity.content), graph, {})
)

private_uri = QUIT["graph-{}".format(entity.oid)]
Expand Down Expand Up @@ -413,17 +423,74 @@ def getFileReferenceAndContext(self, blob, commit):
content = commit.node(path=name).content
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if blob in self._blobs:
  return self._blobs.get(blob)
else
  ...
  return quitWorkingData

graphUri = self._graphconfigs.get(commit.id).getgraphuriforfile(name)
graph = Graph(identifier=URIRef(graphUri))
graph.parse(data=content, format='nt')
quitWorkingData = (FileReference(name, content), graph)
parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph))
source = rdflib.parser.create_input_source(data=content)
parserGraph.parse(source.getCharacterStream())
nameMap = {v: k for k, v in parserGraph._bnode_ids.items()}
quitWorkingData = (FileReference(name, content), graph, nameMap)
self._blobs.set(blob, quitWorkingData)
return quitWorkingData
return self._blobs.get(blob)

def _replaceLabledBlankNodes(self, parsedQuery, parent_commit_ref):
"""Replaces blanknodes in parsedQuery with Blanknodes that have the same label in the graph.nt
E.g. We have a Graph with the content: '_:a <urn:pred> _:b'
A BNode('a') found in parsedQuery would be replaced by the blanknode _:a found in the graph.nt.
That way, updates can pass Blanknodes as instances and do not have to work on string representations.
"""
def replaceBlankNode(parsedQuery, nameMap):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very very big: three for loops and four ifs, try to split in multiple functions and maybe use list comprehension for more conciseness.

nameMap = {v: k for k, v in nameMap.items()}
for update in parsedQuery:
for graphURI in update['quads']:
new_triples = []
for triple in update['quads'][graphURI]:
new_triple_subj = None
new_triple_obj = None
if isinstance(triple[0], rdflib.BNode):
bNode_key = triple[0].n3()
bNode_key = bNode_key[2:]
if bNode_key in nameMap:
new_triple_subj = nameMap[bNode_key]
else:
new_triple_subj = triple[0]
nameMap[bNode_key] = triple[0]
else:
new_triple_subj = triple[0]
if isinstance(triple[2], rdflib.BNode):
bNode_key = triple[2].n3()
bNode_key = bNode_key[2:]
if bNode_key in nameMap:
new_triple_obj = nameMap[bNode_key]
else:
new_triple_obj = triple[2]
nameMap[bNode_key] = triple[2]
else:
new_triple_obj = triple[2]
new_triples.append((new_triple_subj, triple[1], new_triple_obj))
update['quads'][graphURI] = new_triples

if parent_commit_ref == None:
return {}
parent_commit = self.repository.revision(parent_commit_ref)
blobs = self.getFilesForCommit(parent_commit)
for blob in blobs:
(name, oid) = blob
if(name == "graph.nt"):
file_reference, context, nameMap = self.getFileReferenceAndContext(
blob, parent_commit)
replaceBlankNode(parsedQuery, nameMap)
return nameMap
return {}

def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=None,
default_graph=[], named_graph=[]):
"""Apply an update query on the graph and the git repository."""
graph, commitid = self.instance(parent_commit_ref)
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
nameMap = self._replaceLabledBlankNodes(parsedQuery, parent_commit_ref)
resultingChanges, exception = graph.update(parsedQuery)
self._replaceExplicitNamedBlankNodesInChanges(resultingChanges, nameMap)
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
if exception:
# TODO need to revert or invalidate the graph at this point.
pass
Expand All @@ -432,6 +499,7 @@ def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=N
named_graph=named_graph)
if exception:
raise exception
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
return oid

def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=None,
Expand Down Expand Up @@ -494,7 +562,7 @@ def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=Non

# Update Cache and add new contexts to store
blob = fileReference.path, index.stash[fileReference.path][0]
self._blobs.set(blob, (fileReference, graph.store.get_context(identifier)))
self._blobs.set(blob, (fileReference, graph.store.get_context(identifier), {}))
blobs_new.add(blob)
if graphconfig.mode == 'configuration':
index.add('config.ttl', new_config.graphconf.serialize(format='turtle').decode())
Expand Down Expand Up @@ -541,12 +609,40 @@ def _build_message(self, message, query, result, default_graph, named_graph, **k
out.append('{}: "{}"'.format(k, v.replace('"', "\\\"")))
return "\n".join(out)

def _replaceExplicitNamedBlankNodesInChanges(self, changes, nameMap):
"""Any changes applied to the update query by _replaceLabledBlankNodes have to be reverted for git deltas.
Otherwise the serialization results in Blanknodes being represented as random hashes instead of their original labels.
"""
def lookUpBNode(bNode, nameMap):
if(bNode in nameMap):
return rdflib.BNode(nameMap[bNode])
return bNode

def replaceBNodesByName(triple, nameMap):
new_subject = triple[0]
new_object = triple[2]
if(isinstance(new_subject, BNode)):
new_subject = lookUpBNode(new_subject, nameMap)
if(isinstance(new_object, BNode)):
new_object = lookUpBNode(new_object, nameMap)
return (new_subject, triple[1], new_object)

if len(nameMap) == 0:
return
for change in changes:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also too deep, difficult to understand.

for context in change['delta']:
for payload in change['delta'][context]:
if(isinstance(payload[1], list)):
for i in range(0, len(payload[1])):
payload[1][i] = replaceBNodesByName(payload[1][i], nameMap)

def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
blobs_new = set()
for blob in blobs:
(fileName, oid) = blob
try:
file_reference, context = self.getFileReferenceAndContext(blob, parent_commit)
file_reference, context, nameMap = self.getFileReferenceAndContext(
blob, parent_commit)
for entry in delta:
changeset = entry['delta'].get(context.identifier, None)

Expand All @@ -558,7 +654,7 @@ def _applyKnownGraphs(self, delta, blobs, parent_commit, index):

self._blobs.remove(blob)
blob = fileName, index.stash[file_reference.path][0]
self._blobs.set(blob, (file_reference, context))
self._blobs.set(blob, (file_reference, context, nameMap))
blobs_new.add(blob)
except KeyError:
pass
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error actually handled?

Expand All @@ -580,7 +676,7 @@ def _applyUnknownGraphs(self, delta, known_blobs):
n = [
int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m
] + [0]
fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n)+1)
fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n) + 1)

new_contexts[identifier] = FileReference(fileName, '')

Expand Down
14 changes: 14 additions & 0 deletions quit/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from rdflib import Graph, ConjunctiveGraph, URIRef
from rdflib.graph import ModificationException
from rdflib.graph import Path
from atomicgraphs.comp_graph import ComparableGraph


class RewriteGraph(Graph):
Expand Down Expand Up @@ -117,6 +118,19 @@ def __repr__(self):
len((c for c in self.graphs() if c not in self.store.contexts()))
)

#def update(self, update_object):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dead code

# comp_graphA = ComparableGraph(self.store)
# comp_graphB = ComparableGraph(self.store)
# answer = comp_graphB.update(update_object)
# diff_tupel = comp_graphA.diff(comp_graphB)
# for removeGraph in diff_tupel[0]:
# for triple in removeGraph:
# self.remove(triple)
# for additionalGraph in diff_tupel[1]:
# for triple in additionalGraph:
# self.add(additionalGraph)
# return answer

def _graph(self, c):
if c is None:
return None
Expand Down
Loading