Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: add changes to make quitstore working with eccenca DataPlatform #240

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion quit/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def parseArgs(args):
configfile_default = "config.ttl"
oauthclientid_default = None
oauthclientsecret_default = None
feature_default = Feature.Unknown

if 'QUIT_PORT' in os.environ:
port_default = os.environ['QUIT_PORT']
Expand All @@ -234,6 +235,9 @@ def parseArgs(args):
if 'QUIT_OAUTH_SECRET' in os.environ:
oauthclientsecret_default = os.environ['QUIT_OAUTH_SECRET']

if 'FEATURE' in os.environ:
feature_default = os.environ['FEATURE']

parser = argparse.ArgumentParser()
parser.add_argument('-b', '--basepath', type=str, default=basepath_default, help=basepathhelp)
parser.add_argument(
Expand All @@ -248,7 +252,7 @@ def parseArgs(args):
parser.add_argument('--flask-debug', action='store_true')
parser.add_argument('--defaultgraph-union', action='store_true')
parser.add_argument('-f', '--features', nargs='*', action=FeaturesAction,
default=Feature.Unknown,
default=feature_default,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The features should be set using the command line arguments instead of environment variables, if this is possible.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to set the environment variables using Docker? I only found the git-way to set this environment variable.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@seebi could you please try if you can use the --feature command line option instead of the environment variables in the orchestration (https://github.com/AKSW/QuitStore#command-line-options)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i can overwrite it in our compose file

help=featurehelp)
parser.add_argument('-p', '--port', default=port_default, type=int)
parser.add_argument('--host', default='::', type=str)
Expand Down
2 changes: 1 addition & 1 deletion quit/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __initstoreconfig(self, namespace, upstream, targetdir, configfile):
return

def hasFeature(self, flags):
return flags == (self.features & flags)
return flags == (self.features and flags)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has to be an & because the features are set as binary flags.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is only related to changes for FEATURES


def getBindings(self):
q = """SELECT DISTINCT ?prefix ?namespace WHERE {{
Expand Down
63 changes: 52 additions & 11 deletions quit/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,14 +415,41 @@ def getFileReferenceAndContext(self, blob, commit):
return self._blobs.get(blob)

def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=None,
default_graph=[], named_graph=[]):
default_graph=[], named_graph=[], queryType=None, comment=None):
"""Apply an update query on the graph and the git repository."""
graph, commitid = self.instance(parent_commit_ref)
resultingChanges, exception = graph.update(parsedQuery)
if exception:
# TODO need to revert or invalidate the graph at this point.
pass
oid = self.commit(graph, resultingChanges, 'New Commit from QuitStore', parent_commit_ref,

graphuri = None

if comment is not None:
queryType = comment
elif len(resultingChanges) > 1:
queryType = 'Edit resource in'
for entry in resultingChanges:
if "delta" in entry:
for x in entry["delta"]:
graphuri = str(x)
if queryType == 'Modify':
ls = entry["delta"][x]
if len(ls) == 1 and "removals" in ls[0]:
queryType = 'Remove resource in'
elif len(ls) == 1 and "additions" in ls[0]:
queryType = 'Add resource in'

if queryType is not None and graphuri is not None:
if queryType == 'InsertData' or queryType == 'Load':
message = 'Insert data into Graph <' + graphuri + '>'
elif queryType == 'DeleteData' or queryType == 'DeleteWhere':
message = 'Delete data from Graph <' + graphuri + '>'
else:
message = queryType + ' Graph <' + graphuri + '>'
else:
message = 'New Commit from QuitStore'
oid = self.commit(graph, resultingChanges, message, parent_commit_ref,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, 'New Commit from QuitStore' is a placeholder. It should be a message given by the client not generated based on the query, as the query is already represented in the commit message.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #237

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the tip! Yes I understand, just want to show the possibility during demonstration =)

target_ref, query=query, default_graph=default_graph,
named_graph=named_graph)
if exception:
Expand Down Expand Up @@ -474,7 +501,7 @@ def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=Non
graphconfig = self._graphconfigs.get(parent_commit_id)
known_files = graphconfig.getfiles().keys()

blobs_new = self._applyKnownGraphs(delta, blobs, parent_commit, index)
blobs_new = self._applyKnownGraphs(delta, blobs, parent_commit, index, graphconfig)
new_contexts = self._applyUnknownGraphs(delta, known_files)
new_config = copy(graphconfig)

Expand Down Expand Up @@ -536,25 +563,39 @@ def _build_message(self, message, query, result, default_graph, named_graph, **k
out.append('{}: "{}"'.format(k, v.replace('"', "\\\"")))
return "\n".join(out)

def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
def _applyKnownGraphs(self, delta, blobs, parent_commit, index, graphconfig):
blobs_new = set()
for blob in blobs:
(fileName, oid) = blob
type = None

try:
file_reference, context = self.getFileReferenceAndContext(blob, parent_commit)
for entry in delta:

changeset = entry['delta'].get(context.identifier, None)

if changeset:
applyChangeset(file_reference, changeset, context.identifier)
del(entry['delta'][context.identifier])

index.add(file_reference.path, file_reference.content)
type = entry['type']
if type == 'DROP':
index.remove(file_reference.path)
index.remove(file_reference.path + '.graph')
graphconfig.removegraph(context.identifier)
del (entry['delta'][context.identifier])
else:
applyChangeset(file_reference, changeset, context.identifier)
del (entry['delta'][context.identifier])

self._blobs.remove(blob)
blob = fileName, index.stash[file_reference.path][0]
self._blobs.set(blob, (file_reference, context))
blobs_new.add(blob)

if type == 'DROP':
pass
else:
index.add(file_reference.path, file_reference.content)
blob = fileName, index.stash[file_reference.path][0]
self._blobs.set(blob, (file_reference, context))
blobs_new.add(blob)

except KeyError:
pass
return blobs_new
Expand Down
3 changes: 2 additions & 1 deletion quit/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,8 @@ def commit(self, message, author_name, author_email, **kwargs):
commiter = pygit2.Signature(commiter_name, commiter_email)

# Sort index items
items = sorted(self.stash.items(), key=lambda x: (x[1][0], x[0]))
#items = sorted(self.stash.items(), key=lambda x: (x[1][0], x[0]))
items = list(self.stash.items())
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@huwenxin Is it unimportant to sort the index? Could we maybe even just use self.stash.items() instead of items and use for item in items below?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

During testing I found that sometimes the self.stash.items() was None so that sorted would result in error. It's also possible to use self.stash.items() instead of items below. By the way, I will list the changes/features I've made and send you later, as well as a newer version of code.


# Create tree
tree = IndexTree(self)
Expand Down
91 changes: 85 additions & 6 deletions quit/helpers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
#!/usr/bin/env python3
import cgi
import logging
import os
from pprint import pprint
from xml.dom.minidom import parse

import uwsgi
from pyparsing import ParseException
from rdflib import Graph
from werkzeug.wsgi import make_chunk_iter

from quit.exceptions import UnSupportedQuery, SparqlProtocolError, NonAbsoluteBaseError
from rdflib.term import URIRef
from rdflib.term import URIRef, Variable
from rdflib.plugins.sparql.parserutils import CompValue, plist
from rdflib.plugins.sparql.parser import parseQuery, parseUpdate
from quit.tools.algebra import translateQuery, translateUpdate
from rdflib.plugins.sparql.parser import parseQuery, parseUpdate, Query
from quit.tools.algebra import translateQuery, translateUpdate, pprintAlgebra
from rdflib.plugins.serializers.nt import _nt_row as _nt
from rdflib.plugins.sparql import parser, algebra
from rdflib.plugins import sparql
Expand Down Expand Up @@ -153,6 +161,7 @@ def configure_query_dataset(parsed_query, default_graphs, named_graphs):
default_graphs: a list of uri strings for default graphs
named_graphs: a list of uri strings for named graphs
"""

if not isinstance(default_graphs, list) or not isinstance(named_graphs, list):
return parsed_query

Expand All @@ -167,7 +176,8 @@ def configure_query_dataset(parsed_query, default_graphs, named_graphs):
for uri in default_graphs:
parsed_query[1]['datasetClause'].append(CompValue('DatasetClause', default=URIRef(uri)))
for uri in named_graphs:
parsed_query[1]['datasetClause'].append(CompValue('DatasetClause', named=URIRef(uri)))
if uri not in default_graphs:
parsed_query[1]['datasetClause'].append(CompValue('DatasetClause', named=URIRef(uri)))

return parsed_query

Expand Down Expand Up @@ -210,8 +220,11 @@ def parse_query_type(query, base=None, default_graph=[], named_graph=[]):
"""Parse a query and add default and named graph uri if possible."""
try:
parsed_query = parseQuery(query)
parsed_query = parse_named_graph_query(parsed_query)
parsed_query = configure_query_dataset(parsed_query, default_graph, named_graph)
translated_query = translateQuery(parsed_query, base=base)


except ParseException:
raise UnSupportedQuery()
except SparqlProtocolError as e:
Expand Down Expand Up @@ -287,6 +300,7 @@ def parse_sparql_request(request):
default_graph = []
named_graph = []
accept_header = None
comment = None

if request.method == "GET":
default_graph = request.args.getlist('default-graph-uri')
Expand All @@ -296,6 +310,7 @@ def parse_sparql_request(request):
elif request.method == "POST":
if 'Content-Type' in request.headers:
content_mimetype, options = parse_options_header(request.headers['Content-Type'])

if content_mimetype == "application/x-www-form-urlencoded":
if 'query' in request.form:
default_graph = request.form.getlist('default-graph-uri')
Expand All @@ -317,5 +332,69 @@ def parse_sparql_request(request):
named_graph = request.args.getlist('using-named-graph-uri')
query = request.data.decode("utf-8")
type = 'update'

return query, type, default_graph, named_graph
elif content_mimetype == "application/rdf+xml":
default_graph = request.args.getlist('default-graph-uri')
named_graph = request.args.getlist('named-graph-uri')
graph = request.args.get('graph')
data = request.data.decode("utf-8")
g = Graph()
g.parse(data=data, format='application/rdf+xml')
query = 'INSERT DATA { GRAPH <' + graph + '> { ' + g.serialize(format="nt").decode("utf-8") + ' } }'
type = 'update'
elif request.method == "PUT":
if 'Content-Type' in request.headers:
content_mimetype, options = parse_options_header(request.headers['Content-Type'])
default_graph = request.args.getlist('default-graph-uri')
named_graph = request.args.getlist('named-graph-uri')
graph = request.args.get('graph')
data = request.input_stream.read()
g = Graph()
if content_mimetype is not None:
g.parse(data=data, format=content_mimetype)
else:
g.parse(data=data, format='application/rdf+xml')
query = 'WITH <' + graph + '> DELETE { ?s ?p ?o . } INSERT { ' + g.serialize(format="nt").decode("utf-8") + ' } WHERE { ?s ?p ?o .}'
type = 'update'
comment = 'Replace'

return query, type, default_graph, named_graph, comment


def parse_named_graph_query(query):

datasetClause = query[1].datasetClause

if datasetClause is not None:
default_list = []
named_list = []
for d in datasetClause:
if d.default:
default_list.append(d.default)

for d in datasetClause:
if d.named:
if d.named in default_list:
query[1].datasetClause.remove(d)
else:
named_list.append(d.named)

if len(named_list) > 0:
q = "SELECT * WHERE{ FILTER ( ?"
for t in query[1].where.part:
try:
term = t.term
except ParseException:
raise UnSupportedQuery()
q = q + term + " IN (<" + '>,<'.join(named_list) + ">))}"

parsedFilter = Query.parseString(q, parseAll=True)[1].where.part[0]
query[1].where.part.append(parsedFilter)
else:
if 'graph' in query[1].where.part[0]:
pass
else:
graphValue = query[1].where
whereValue = CompValue('GroupGraphPatternSub', part=[CompValue('GraphGraphPattern', term=Variable('selfDefinedGraphVariable'), graph=graphValue)])
query[1].where = whereValue

return query
4 changes: 3 additions & 1 deletion quit/tools/algebra.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,8 @@ def translateQuery(q, base=None, initNs=None):
q[1], visitPost=functools.partial(translatePName, prologue=prologue))

P, PV = translate(q[1])
if Variable('selfDefinedGraphVariable') in PV:
PV.remove(Variable('selfDefinedGraphVariable'))
datasetClause = q[1].datasetClause
if q[1].name == 'ConstructQuery':

Expand Down Expand Up @@ -777,7 +779,7 @@ def pp(p, ind=" "):
return
print("%s(" % (p.name, ))
for k in p:
print("%s%s =" % (ind, k,), end=' ')
#print("%s%s =" % (ind, k,), end=' ')
pp(p[k], ind + " ")
print("%s)" % ind)

Expand Down
6 changes: 3 additions & 3 deletions quit/tools/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,8 @@ def evalQuery(graph, query, initBindings, base=None):
# TODO re-enable original behaviour if FROM NAMED works with named graphs
# https://github.com/AKSW/QuitStore/issues/144
elif d.named:
raise FromNamedError
# g = d.named
# ctx.load(g, default=False)
# raise FromNamedError
g = d.named
ctx.load(g, default=False)

return evalPart(ctx, main)
16 changes: 15 additions & 1 deletion quit/tools/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,23 @@ def evalDrop(ctx, u):
"""
http://www.w3.org/TR/sparql11-update/#drop
"""

res = {}
res["type"] = "DROP"
res["delta"] = {}

if ctx.dataset.store.graph_aware:
for g in _graphAll(ctx, u.graphiri):
_append(res["delta"], u.graphiri, 'removals', g)
ctx.dataset.store.remove_graph(g)
graph = ctx.dataset.get_context(u.graphiri)
graph -= g
else:
_append(res["delta"], u.graphiri, 'removals', list(u.triples))
evalClear(ctx, u)

return res


def evalInsertData(ctx, u):
"""
Expand Down Expand Up @@ -390,7 +401,9 @@ def evalUpdate(graph, update, initBindings=None, actionLog=False):
elif u.name == 'Clear':
evalClear(ctx, u)
elif u.name == 'Drop':
evalDrop(ctx, u)
result = evalDrop(ctx, u)
if result:
res.append(result)
elif u.name == 'Create':
evalCreate(ctx, u)
elif u.name == 'Add':
Expand Down Expand Up @@ -422,4 +435,5 @@ def evalUpdate(graph, update, initBindings=None, actionLog=False):
except Exception:
if not u.silent:
raise

return res, None
2 changes: 1 addition & 1 deletion quit/web/modules/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def login():
else:
state = session["state"]
logger.debug("request url: {}".format(request.url))
redirect_uri = request.url
redirect_uri = 'http://docker.local/quitstore/login'
authorizeEndpoint = "https://github.com/login/oauth/authorize"
tokenEndpoint = "https://github.com/login/oauth/access_token"

Expand Down
Loading