diff --git a/quit/application.py b/quit/application.py
index cde40437..fbc433c9 100644
--- a/quit/application.py
+++ b/quit/application.py
@@ -1,7 +1,7 @@
import argparse
import sys
import os
-from quit.conf import Feature, QuitConfiguration
+from quit.conf import Feature, QuitStoreConfiguration
from quit.exceptions import InvalidConfigurationError
import rdflib.plugins.sparql
from rdflib.plugins.sparql.algebra import SequencePath
@@ -98,11 +98,10 @@ def sequencePathCompareGt(self, other):
'quit.plugins.serializers.results.htmlresults', 'HTMLResultSerializer')
try:
- config = QuitConfiguration(
+ config = QuitStoreConfiguration(
configfile=args.configfile,
targetdir=args.targetdir,
- repository=args.repourl,
- configmode=args.configmode,
+ upstream=args.repourl,
features=args.features,
namespace=args.namespace,
)
@@ -111,14 +110,9 @@ def sequencePathCompareGt(self, other):
sys.exit('Exiting quit')
# since repo is handled, we can add graphs to config
- config.initgraphconfig()
- logger.info('QuitStore successfully running.')
- logger.info('Known graphs: ' + str(config.getgraphs()))
- logger.info('Known files: ' + str(config.getfiles()))
+ logger.info('QuitStore Configuration initialized.')
logger.debug('Path of Gitrepo: ' + config.getRepoPath())
- logger.debug('Config mode: ' + str(config.getConfigMode()))
- logger.debug('All RDF files found in Gitepo:' + str(config.getgraphsfromdir()))
return {'config': config}
diff --git a/quit/conf.py b/quit/conf.py
index 6b7f9468..277a3d34 100644
--- a/quit/conf.py
+++ b/quit/conf.py
@@ -1,8 +1,9 @@
import logging
import os
+from pygit2 import Repository
from os import walk
-from os.path import join, isfile
+from os.path import join, isfile, relpath
from quit.exceptions import MissingConfigurationError, InvalidConfigurationError
from quit.exceptions import UnknownConfigurationError
from quit.helpers import isAbsoluteUri
@@ -25,14 +26,16 @@ class Feature:
class QuitConfiguration:
- """A class that keeps track of the relation between named graphs and files."""
+ quit = Namespace('http://quit.aksw.org/vocab/')
+
+class QuitStoreConfiguration(QuitConfiguration):
+ """A class that provides information about settings, filesystem and git."""
def __init__(
self,
- configmode=None,
configfile='config.ttl',
features=None,
- repository=None,
+ upstream=None,
targetdir=None,
namespace=None
):
@@ -48,33 +51,24 @@ def __init__(
self.features = features
self.configchanged = False
self.sysconf = Graph()
- self.graphconf = None
- self.origin = None
- self.graphs = {}
- self.files = {}
+ self.upstream = None
self.namespace = None
- self.quit = Namespace('http://quit.aksw.org/vocab/')
self.nsMngrSysconf = NamespaceManager(self.sysconf)
self.nsMngrSysconf.bind('', 'http://quit.aksw.org/vocab/', override=False)
- self.nsMngrGraphconf = NamespaceManager(self.sysconf)
- self.nsMngrGraphconf.bind('', 'http://quit.aksw.org/vocab/', override=False)
try:
self.__initstoreconfig(
namespace=namespace,
- repository=repository,
+ upstream=upstream,
targetdir=targetdir,
- configfile=configfile,
- configmode=configmode
+ configfile=configfile
)
except InvalidConfigurationError as e:
logger.error(e)
raise e
- return
-
- def __initstoreconfig(self, namespace, repository, targetdir, configfile, configmode):
+ def __initstoreconfig(self, namespace, upstream, targetdir, configfile):
"""Initialize store settings."""
if isAbsoluteUri(namespace):
self.namespace = namespace
@@ -93,214 +87,246 @@ def __initstoreconfig(self, namespace, repository, targetdir, configfile, config
except PermissionError:
raise InvalidConfigurationError(
"Configuration file could not be parsed. Permission denied. {}".format(
- configfile
- )
- )
+ configfile))
except Exception as e:
- raise UnknownConfigurationError(
- "UnknownConfigurationError: {}".format(e)
- )
+ raise UnknownConfigurationError("UnknownConfigurationError: {}".format(e))
self.configfile = configfile
else:
if not targetdir:
raise InvalidConfigurationError('No target directory for git repo given')
- if configmode:
- self.setConfigMode(configmode)
-
if targetdir:
self.setRepoPath(targetdir)
- if repository:
- self.setGitOrigin(repository)
+ if upstream:
+ self.setGitUpstream(upstream)
return
- def initgraphconfig(self):
- """Initialize graph settings.
+ def hasFeature(self, flags):
+ return flags == (self.features & flags)
- Public method to initalize graph settings. This method will be run only once.
+ def getBindings(self):
+ ns = Namespace('http://quit.aksw.org/vocab/')
+ q = """SELECT DISTINCT ?prefix ?namespace WHERE {{
+ {{
+ ?ns a <{binding}> ;
+ <{predicate_prefix}> ?prefix ;
+ <{predicate_namespace}> ?namespace .
+ }}
+ }}""".format(
+ binding=ns['Binding'], predicate_prefix=ns['prefix'],
+ predicate_namespace=ns['namespace']
+ )
+
+ result = self.sysconf.query(q)
+ return [(row['prefix'], row['namespace']) for row in result]
+
+ def getDefaultBranch(self):
+ """Get the default branch on the Git repository from configuration.
+
+ Returns:
+ A string containing the branch name.
"""
- if self.graphconf is None:
- self.__initgraphconfig()
-
- def __initgraphconfig(self, repository=None, targetdir=None):
- """Initialize graph settings."""
- self.graphconf = Graph()
- configmode = self.getConfigMode()
- logger.debug("Graph Config mode is: {}".format(configmode))
-
- if configmode == 'localconfig':
- self.__initgraphsfromconf(self.configfile)
- elif configmode == 'repoconfig':
- remConfigFile = join(self.getRepoPath(), 'config.ttl')
- self.__initgraphsfromconf(remConfigFile)
- elif configmode == 'graphfiles':
- self.__initgraphsfromdir(self.getRepoPath())
- else:
- raise InvalidConfigurationError('This mode is not supported.', self.configmode)
- return
+ nsQuit = 'http://quit.aksw.org/vocab/'
+ storeuri = URIRef('http://my.quit.conf/store')
+ property = URIRef(nsQuit + 'defaultBranch')
- def __initgraphsfromdir(self, repodir):
- """Init a repository by analyzing all existing files."""
- graphs = self.getgraphsfromdir(repodir)
- repopath = self.getRepoPath()
-
- for file, format in graphs.items():
- absgraphfile = os.path.join(repopath, file + '.graph')
- graphuri = self.__readGraphIriFile(absgraphfile)
-
- if graphuri and format == 'nquads':
- self.addgraph(file=file, graphuri=graphuri, format=format)
- elif graphuri is None and format == 'nquads':
- tmpgraph = ConjunctiveGraph(identifier='default')
-
- try:
- tmpgraph.parse(source=os.path.join(repopath, file), format=format)
- except Exception:
- logger.error(
- "Could not parse graphfile {}. File skipped.".format(file)
- )
- continue
-
- namedgraphs = tmpgraph.contexts()
- founduris = []
-
- for graph in namedgraphs:
- if not isinstance(graph, BNode) and str(graph.identifier) != 'default':
- graphuri = graph.identifier
- founduris.append(graphuri)
-
- if len(founduris) == 1:
- self.addgraph(file=file, graphuri=graphuri, format=format)
- elif len(founduris) > 1:
- logger.info("No named graph found. {} skipped.".format(file))
-
- elif len(founduris) < 1:
- logger.info(
- "More than one named graphs found. Can't decide. {} skipped.".format(
- file
- )
- )
-
- elif format == 'nt':
- if graphuri:
- self.addgraph(file=file, graphuri=graphuri, format=format)
- else:
- logger.warning('No *.graph file found. ' + file + ' skipped.')
+ for s, p, o in self.sysconf.triples((None, property, None)):
+ return str(o)
- try:
- self.__setgraphsfromconf()
- except InvalidConfigurationError as e:
- raise e
+ return "master"
- def __initgraphsfromconf(self, configfile):
- """Init graphs with setting from config.ttl."""
- if not isfile(configfile):
- raise MissingConfigurationError("Configfile is missing {}".format(configfile))
+ def getGlobalFile(self):
+ """Get the graph file which should be used for unassigned graphs.
- try:
- self.graphconf.parse(configfile, format='turtle')
- except Exception as e:
- raise InvalidConfigurationError(
- "Configfile could not be parsed {} {}".format(configfile, e)
- )
+ Returns
+ The filename of the graph file where unassigned graphs should be stored.
- # Get Graphs
- self.__setgraphsfromconf()
+ """
+ nsQuit = 'http://quit.aksw.org/vocab/'
+ storeuri = URIRef('http://my.quit.conf/store')
+ property = URIRef(nsQuit + 'globalFile')
- def __readGraphIriFile(self, graphfile):
- """Search for a graph uri in graph file and return it.
+ for s, p, o in self.sysconf.triples((None, property, None)):
+ return str(o)
- Args:
- graphfile: String containing the path of a graph file
+ def getRepoPath(self):
+ """Get the path of Git repository from configuration.
Returns:
- graphuri: String with the graph URI
+ A string containig the path of the git repo.
"""
- try:
- with open(graphfile, 'r') as f:
- graphuri = f.readline().strip()
- except FileNotFoundError:
- logger.debug("File not found {}".format(graphfile))
- return
+ nsQuit = 'http://quit.aksw.org/vocab/'
+ storeuri = URIRef('http://my.quit.conf/store')
+ property = URIRef(nsQuit + 'pathOfGitRepo')
+
+ for s, p, o in self.sysconf.triples((None, property, None)):
+ return str(o)
+
+ def getUpstream(self):
+ """Get the URI of Git remote from configuration."""
+ nsQuit = 'http://quit.aksw.org/vocab/'
+ storeuri = URIRef('http://my.quit.conf/store')
+ property = self.quit.upstream
+
+ for s, p, o in self.sysconf.triples((storeuri, property, None)):
+ return str(o)
+
+ def setUpstream(self, origin):
+ self.sysconf.remove((None, self.quit.origin, None))
+ self.sysconf.add((self.quit.Store, self.quit.upstream, Literal(origin)))
+
+ return
+
+ def setRepoPath(self, path):
+ self.sysconf.remove((None, self.quit.pathOfGitRepo, None))
+ self.sysconf.add((self.quit.Store, self.quit.pathOfGitRepo, Literal(path)))
+
+ return
+
+
+class QuitGraphConfiguration(QuitConfiguration):
+ """A class that keeps track of the relation between named graphs and files."""
+
+ def __init__(self, repository):
+ """The init method.
+
+ This method checks if the config file is given and reads the config file.
+ If the config file is missing, it will be generated after analyzing the
+ file structure.
+ """
+ logger = logging.getLogger('quit.conf.QuitConfiguration')
+ logger.debug('Initializing configuration object.')
+
+ self.repository = repository
+ self.configfile = None
+ self.mode = None
+ self.graphconf = None
+ self.graphs = {}
+ self.files = {}
+
+ def initgraphconfig(self, rev):
+ """Initialize graph settings.
+ Public method to initalize graph settings. This method will be run only once.
+ """
+ if self.graphconf is None:
+ self.graphconf = Graph()
+ self.nsMngrGraphconf = NamespaceManager(self.graphconf)
+ self.nsMngrGraphconf.bind('', 'http://quit.aksw.org/vocab/', override=False)
+
+ grphfile_count, conf_file_count, configured, blobs = self.get_blobs_from_repository(rev)
+
+<<<<<<< HEAD
+ if len(graph_files) == 0 and len(config_files) == 0:
+ self.mode = 'graphfiles'
+ elif len(graph_files) > 0 and len(config_files) > 0:
+=======
+ if grphfile_count == 0 and conf_file_count == 0:
+ raise InvalidConfigurationError(
+ "Did not find graphfiles or a QuitStore configuration file.")
+ elif grphfile_count > 0 and conf_file_count > 0:
+>>>>>>> 3db2d87... Work on graph management
+ raise InvalidConfigurationError(
+ "Conflict. Found graphfiles and QuitStore configuration file.")
+ elif grphfile_count > 0:
+ self.mode = 'graphfiles'
+ self.__init_graph_conf_with_blobs(configured, rev, blobs)
+ elif conf_file_count == 1:
+ self.mode = 'configuration'
+ self.__init_graph_conf_from_configuration(blobs['config.ttl'], blobs)
+ else:
+ raise InvalidConfigurationError(
+ "Conflict. Found more than one QuitStore configuration file.")
+
+ def __init_graph_conf_with_blobs(self, files, rev, known_blobs):
+ """Init graph configuration if graphfile contains a valid IRI."""
+ for filename in files:
+ format = known_blobs[filename][1]
+ oid = known_blobs[filename][0]
+ graphFileId = known_blobs[filename + '.graph']
+ graphuri = URIRef(self.__get_uri_from_graphfile_blob(graphFileId))
+
+ if graphuri:
+ self.graphs[graphuri] = filename
+ self.files[filename] = {
+ 'serialization': format, 'graphs': [graphuri], 'oid': oid}
+ self.files[filename + '.graph'] = {'oid': graphFileId}
+
+ def __init_graph_conf_from_configuration(self, configfileId, known_blobs):
+ """Init graphs with setting from config.ttl."""
try:
- urlparse(graphuri)
- logger.debug("Graph URI {} found in {}".format(graphuri, graphfile))
- except Exception:
- graphuri = None
- logger.debug("No graph URI found in {}".format(graphfile))
+ configfile = self.repository.get(configfileId)
+ except Exception as e:
+ raise InvalidConfigurationError(
+ "Blob for configfile with id {} not found in repository {}".format(configfileId, e))
+
+ content = configfile.read_raw()
- return graphuri
+ try:
+ self.graphconf.parse(data=content, format='turtle')
+ except Exception as e:
+ raise InvalidConfigurationError(
+ "Configfile could not be parsed {} {}".format(configfileId, e)
+ )
+ self.files['config.ttl'] = {'oid': configfileId}
- def __setgraphsfromconf(self):
- """Set all URIs and file paths of graphs that are configured in config.ttl."""
nsQuit = 'http://quit.aksw.org/vocab/'
- query = 'SELECT DISTINCT ?graphuri ?filename WHERE { '
+ query = 'SELECT DISTINCT ?graphuri ?filename ?format WHERE { '
query += ' ?graph a <' + nsQuit + 'Graph> . '
query += ' ?graph <' + nsQuit + 'graphUri> ?graphuri . '
query += ' ?graph <' + nsQuit + 'graphFile> ?filename . '
+ query += ' OPTIONAL { ?graph <' + nsQuit + 'hasFormat> ?format .} '
query += '}'
result = self.graphconf.query(query)
- repopath = self.getRepoPath()
-
for row in result:
filename = str(row['filename'])
- format = guess_format(filename)
+ if row['format'] is None:
+ format = guess_format(filename)
+ else:
+ format = str(row['format'])
if format not in ['nt', 'nquads']:
break
+ if filename not in known_blobs.keys():
+ break
- graphuri = str(row['graphuri'])
+ graphuri = URIRef(str(row['graphuri']))
- graphFile = join(repopath, filename)
+ # we store which named graph is serialized in which file
+ self.graphs[graphuri] = filename
+ self.files[filename] = {
+ 'serialization': format, 'graphs': [graphuri], 'oid': known_blobs[filename]}
- if isfile(graphFile):
- # everything is fine
- pass
- else:
- try:
- open(graphFile, 'a+').close()
- except PermissionError:
- raise InvalidConfigurationError(
- "Permission denied. Can't create file {} in repo {}".format(
- graphFile,
- self.getRepoPath()
- )
- )
- except FileNotFoundError:
- raise InvalidConfigurationError(
- "File not found. Can't create file {} in repo {}".format(
- graphFile,
- self.getRepoPath()
- )
- )
- except Exception as e:
- raise UnknownConfigurationError(
- "Can't create file {} in repo {}. Error: {}".format(
- graphFile,
- self.getRepoPath(),
- e
- )
- )
+ def __get_uri_from_graphfile_blob(self, oid):
+ """Search for a graph uri in graph file and return it.
- graphuri = URIRef(graphuri)
+ Args
+ ----
+ oid: String oid of a graph file
- # we store which named graph is serialized in which file
- self.graphs[graphuri] = filename
- # and furthermore we assume that one file can contain data of more
- # than one named graph and so we store for each file a set of graphs
- if filename in self.files:
- self.files[filename]['graphs'].append(graphuri)
- else:
- self.files[filename] = {
- 'serialization': format,
- 'graphs': [graphuri]
- }
+ Returns
+ -------
+ graphuri: String with the graph URI
- return
+ """
+ try:
+ blob = self.repository.get(oid)
+ except ValueError:
+ logger.debug("Object with OID { } not found in repository.".format(oid))
+ return
+
+ content = blob.read_raw().decode().strip()
+
+ try:
+ urlparse(content)
+ except Exception:
+ logger.debug("No graph URI found in blob with OID {}.".format(oid))
+ return
+
+ return content
def addgraph(self, graphuri, file, format=None):
self.graphconf.add((self.quit[quote(graphuri)], RDF.type, self.quit.Graph))
@@ -316,130 +342,69 @@ def removegraph(self, graphuri):
return
- def getConfigMode(self):
- """Get the mode how Quit-Store detects RDF files and named graphs.
-
- Returns:
- A string containig the mode.
- """
- nsQuit = 'http://quit.aksw.org/vocab/'
- property = URIRef(nsQuit + 'configMode')
-
- for s, p, o in self.sysconf.triples((None, property, None)):
- return str(o)
-
- return 'graphfiles'
-
- def getRepoPath(self):
- """Get the path of Git repository from configuration.
-
- Returns:
- A string containig the path of the git repo.
- """
- nsQuit = 'http://quit.aksw.org/vocab/'
- storeuri = URIRef('http://my.quit.conf/store')
- property = URIRef(nsQuit + 'pathOfGitRepo')
-
- for s, p, o in self.sysconf.triples((None, property, None)):
- return str(o)
-
- def getDefaultBranch(self):
- """Get the default branch on the Git repository from configuration.
-
- Returns:
- A string containing the branch name.
- """
- nsQuit = 'http://quit.aksw.org/vocab/'
- storeuri = URIRef('http://my.quit.conf/store')
- property = URIRef(nsQuit + 'defaultBranch')
-
- for s, p, o in self.sysconf.triples((None, property, None)):
- return str(o)
-
- return "master"
-
- def getGlobalFile(self):
- """Get the graph file which should be used for unassigned graphs.
-
- Returns:
- The filename of the graph file where unassigned graphs should be stored.
- """
- nsQuit = 'http://quit.aksw.org/vocab/'
- storeuri = URIRef('http://my.quit.conf/store')
- property = URIRef(nsQuit + 'globalFile')
-
- for s, p, o in self.sysconf.triples((None, property, None)):
- return str(o)
-
- def getOrigin(self):
- """Get the URI of Git remote from configuration."""
- nsQuit = 'http://quit.aksw.org/vocab/'
- storeuri = URIRef('http://my.quit.conf/store')
- property = URIRef(nsQuit + 'origin')
-
- for s, p, o in self.sysconf.triples((storeuri, property, None)):
- return str(o)
-
def getgraphs(self):
"""Get all graphs known to conf.
- Returns:
+ Returns
+ -------
A list containig all graph uris as string,
- """
- graphs = []
- for graph in self.graphs:
- graphs.append(graph)
- return graphs
+ """
+ return self.graphs
def getfiles(self):
"""Get all files known to conf.
- Returns:
+ Returns
+ -------
A list containig all files as string,
- """
- files = []
- for file in self.files:
- files.append(file)
- return files
+ """
+ return self.files
def getfileforgraphuri(self, graphuri):
"""Get the file for a given graph uri.
- Args:
+ Args
+ ----
graphuri: A String of the named graph
- Returns:
+ Returns
+ -------
A string of the path to the file asociated with named graph
+
"""
if isinstance(graphuri, str):
graphuri = URIRef(graphuri)
- for uri, filename in self.graphs.items():
- if uri == graphuri:
- return filename
+
+ if graphuri in self.graphs.keys():
+ return self.graphs[graphuri]
return
def getgraphurifilemap(self):
"""Get the dictionary of graphuris and their files.
- Returns:
- A dictionary of graphuris and information about their files.
- """
+ Returns
+ -------
+ A dictionary of graphuris and information about their representation in repository.
+ """
return self.graphs
def getserializationoffile(self, file):
"""Get the file for a given graph uri.
- Args:
+ Args
+ ----
file: A String of a file path
- Returns:
+ Returns
+ -------
A string containing the RDF serialization of file
+
"""
- if file in self.files:
+ if file in self.files.keys():
return self.files[file]['serialization']
return
@@ -447,72 +412,65 @@ def getserializationoffile(self, file):
def getgraphuriforfile(self, file):
"""Get the file for a given graph uri.
- Args:
+ Args
+ ----
file: A String of a file path
- Returns:
+ Returns
+ -------
A set containing strings of graph uris asociated to that file
+
"""
if file in self.files:
return self.files[file]['graphs']
return []
- def getgraphsfromdir(self, path=None):
- """Get the files that are part of the repository (tracked or not).
-
- Returns:
- A list of filepathes.
- """
- if path is None:
- path = self.getRepoPath()
-
- exclude = set(['.git'])
-
- graphfiles = {}
- for dirpath, dirs, files in walk(path):
- dirs[:] = [d for d in dirs if d not in exclude]
- for filename in files:
-
- format = guess_format(join(dirpath, filename))
- if format is not None:
- graphfiles[filename] = format
-
- return graphfiles
+ def get_blobs_from_repository(self, rev):
+ """Analyze all blobs of a revision.
- def hasFeature(self, flags):
- return flags == (self.features & flags)
-
- def setConfigMode(self, mode):
- self.sysconf.remove((None, self.quit.configMode, None))
- self.sysconf.add((self.quit.Store, self.quit.configMode, Literal(mode)))
-
- return
-
- def setGitOrigin(self, origin):
- self.sysconf.remove((None, self.quit.origin, None))
- self.sysconf.add((self.quit.Store, self.quit.origin, Literal(origin)))
-
- return
+ Returns
+ -------
+ A triple (dictionary, list, dictionary)
+ dict: containg names of rdf-files plus their format and oid of graph file.
+ list: containing names of config files.
+ dict: containing names rdf files plus format and oid.
- def setRepoPath(self, path):
- self.sysconf.remove((None, self.quit.pathOfGitRepo, None))
- self.sysconf.add((self.quit.Store, self.quit.pathOfGitRepo, Literal(path)))
-
- return
-
- def getBindings(self):
- ns = Namespace('http://quit.aksw.org/vocab/')
- q = """SELECT DISTINCT ?prefix ?namespace WHERE {{
- {{
- ?ns a <{binding}> ;
- <{predicate_prefix}> ?prefix ;
- <{predicate_namespace}> ?namespace .
- }}
- }}""".format(
- binding=ns['Binding'], predicate_prefix=ns['prefix'],
- predicate_namespace=ns['namespace']
- )
-
- result = self.sysconf.query(q)
- return [(row['prefix'], row['namespace']) for row in result]
+ """
+<<<<<<< HEAD
+ config_files = []
+ graph_files = {}
+ graph_file_blobs = {}
+ rdf_file_blobs = {}
+ try:
+ commit = self.repository.revparse_single(rev)
+ except Exception:
+ return graph_files, config_files, rdf_file_blobs
+=======
+ commit = self.repository.revparse_single(rev)
+ config_files_count = 0
+ graph_files_count = 0
+ relevant_blobs = {}
+>>>>>>> 3db2d87... Work on graph management
+
+ # Collect graph files, rdf files and config files
+ for entry in commit.tree:
+ if entry.type == 'blob':
+ print(entry.name, entry.id)
+ format = guess_format(entry.name)
+ if format is None and entry.name.endswith('.graph'):
+ graph_files_count += 1
+ relevant_blobs[entry.name] = (str(entry.id))
+ elif format is not None and format in ['nquads', 'nt']:
+ relevant_blobs[entry.name] = (str(entry.id), format)
+ elif format is not None and entry.name == 'config.ttl':
+ config_files_count += 1
+ relevant_blobs[entry.name] = (str(entry.id))
+
+ # collect pairs of rdf files and graph files
+ graphfiles_configured = []
+ for filename in relevant_blobs.keys():
+ if filename + '.graph' in relevant_blobs.keys():
+ graphfiles_configured.append(filename)
+
+ return graph_files_count, config_files_count, graphfiles_configured, relevant_blobs
diff --git a/quit/core.py b/quit/core.py
index 69951b91..de7c72b9 100644
--- a/quit/core.py
+++ b/quit/core.py
@@ -2,20 +2,24 @@
import logging
+from copy import copy
+
from pygit2 import GIT_MERGE_ANALYSIS_UP_TO_DATE
from pygit2 import GIT_MERGE_ANALYSIS_FASTFORWARD
from pygit2 import GIT_MERGE_ANALYSIS_NORMAL
from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT
from rdflib import Graph, ConjunctiveGraph, BNode, Literal
-from rdflib.plugins.serializers.nquads import _nq_row as _nq
-from quit.conf import Feature
+from quit.conf import Feature, QuitGraphConfiguration
+from quit.helpers import applyChangeset
from quit.namespace import RDFS, FOAF, XSD, PROV, QUIT, is_a
from quit.graphs import RewriteGraph, InMemoryAggregatedGraph
from quit.utils import graphdiff, git_timestamp
from quit.cache import Cache, FileReference
+from urllib.parse import quote_plus
+
import subprocess
logger = logging.getLogger('quit.core')
@@ -98,6 +102,7 @@ def __init__(self, config, repository, store):
self.store = store
self._commits = Cache()
self._blobs = Cache()
+ self._graphconfigs = Cache()
def _exists(self, cid):
uri = QUIT['commit-' + cid]
@@ -290,7 +295,10 @@ def changeset(self, commit):
g.addN((s, p, o, op_uri) for s, p, o in triples)
# Entities
- map = self.config.getgraphurifilemap()
+ if commit.id not in self._graphconfigs:
+ self.updateGraphConfig(commit.id)
+
+ map = self._graphconfigs.get(commit.id).getgraphurifilemap()
for entity in commit.node().entries(recursive=True):
# todo check if file was changed
@@ -299,7 +307,7 @@ def changeset(self, commit):
if entity.name not in map.values():
continue
- graphUris = self.config.getgraphuriforfile(entity.name)
+ graphUris = self._graphconfigs.get(commit.id).getgraphuriforfile(entity.name)
graphsFromConfig = set((Graph(identifier=i) for i in graphUris))
blob = (entity.name, entity.oid)
@@ -357,10 +365,14 @@ def getFilesForCommit(self, commit):
On Cache miss this method also updates the commits cache.
"""
- uriFileMap = self.config.getgraphurifilemap()
if commit.id not in self._commits:
+ if commit.id not in self._graphconfigs:
+ self.updateGraphConfig(commit.id)
+
+ uriFileMap = self._graphconfigs.get(commit.id).getgraphurifilemap()
blobs = set()
+
for entity in commit.node().entries(recursive=True):
if entity.is_file:
if entity.name not in uriFileMap.values():
@@ -372,25 +384,27 @@ def getFilesForCommit(self, commit):
return self._commits.get(commit.id)
def getFileReferenceAndContext(self, blob, commit):
- """Get the FielReference and Context for a given blob (name, oid) of a commit.
+ """Get the FileReference and Context for a given blob (name, oid) of a commit.
- On Cache miss this method also updates teh commits cache.
+ On Cache miss this method also updates the commits cache.
"""
- uriFileMap = self.config.getgraphurifilemap()
+ if commit.id not in self._graphconfigs:
+ self.updateGraphConfig(commit.id)
+
+ uriFileMap = self._graphconfigs.get(commit.id).getgraphurifilemap()
if blob not in self._blobs:
(name, oid) = blob
content = commit.node(path=name).content
# content = self.repository._repository[oid].data
- graphUris = self.config.getgraphuriforfile(name)
+ graphUris = self._graphconfigs.get(commit.id).getgraphuriforfile(name)
graphsFromConfig = set((Graph(identifier=i) for i in graphUris))
tmp = ConjunctiveGraph()
tmp.parse(data=content, format='nquads')
contexts = set((context for context in tmp.contexts(None)
if context.identifier in uriFileMap)) | graphsFromConfig
quitWorkingData = (FileReference(name, content), contexts)
- self._blobs.set(
- blob, quitWorkingData)
+ self._blobs.set(blob, quitWorkingData)
return quitWorkingData
return self._blobs.get(blob)
@@ -428,19 +442,77 @@ def build_message(message, kwargs):
out.append('{}: "{}"'.format(k, v.replace('"', "\\\"")))
return "\n".join(out)
- def _apply(f, changeset, identifier):
- """Update the FileReference (graph uri) of a file with help of the changeset."""
- for (op, triples) in changeset:
- if op == 'additions':
- for triple in triples:
- # the internal _nq serializer appends '\n'
- line = _nq(triple, identifier).rstrip()
- f.add(line)
- elif op == 'removals':
- for triple in triples:
- # the internal _nq serializer appends '\n'
- line = _nq(triple, identifier).rstrip()
- f.remove(line)
+ def prepare_commit():
+ removed = set()
+ touched = set()
+ commit_objects = {}
+
+ # find all named graphs that occur in update
+ for update in delta:
+ if update['type'] in ['ADD', 'CLEAR', 'CREATE', 'COPY']:
+ touched.add(update['graph'])
+ elif update == 'COPY':
+ touched.add(update['src_graph'])
+ touched.add(update['dst_graph'])
+ elif update['type'] in ['DROP', 'MOVE']:
+ removed.add(update['src_graph'])
+ touched.add(update['dst_graph'])
+ else:
+ for identifier, changeset in update['delta'].items():
+ touched.add(identifier)
+
+ file_map = graphconfig.files
+ graphs = graphconfig.graphs
+
+ for identifier in set(touched | removed):
+ file_name = graphconfig.getfileforgraphuri(str(identifier))
+ if file_name:
+ file_oid = file_map[file_name]['oid']
+ blob = (file_name, file_oid)
+ else: # unknown named graph
+ fr = FileReference(quote_plus(str(identifier)), '')
+
+ # commit_objects[identifier] = {
+ # }
+
+ def _applyKnownGraphs(delta, blobs):
+ blobs_new = set()
+ for blob in blobs:
+ (fileName, oid) = blob
+ try:
+ file_reference, contexts = self.getFileReferenceAndContext(blob, parent_commit)
+ for context in contexts:
+ for entry in delta:
+ changeset = entry.get(context.identifier, None)
+
+ if changeset:
+ applyChangeset(file_reference, changeset, context.identifier)
+ del(entry[context.identifier])
+
+ index.add(file_reference.path, file_reference.content)
+
+ self._blobs.remove(blob)
+ blob = fileName, index.stash[file_reference.path][0]
+ self._blobs.set(blob, (file_reference, contexts))
+ blobs_new.add(blob)
+ except KeyError:
+ pass
+ return blobs_new
+
+ def _applyUnknownGraphs(delta):
+ new_contexts = {}
+ for entry in delta:
+ for identifier, changeset in entry.items():
+ if isinstance(identifier, BNode) or str(identifier) == 'default':
+ continue # TODO default graph
+
+ fileName = quote_plus(identifier + '.nq')
+ if identifier not in new_contexts.keys():
+ new_contexts[identifier] = FileReference(fileName, '')
+
+ fileReference = new_contexts[identifier]
+ applyChangeset(fileReference, changeset, identifier)
+ return new_contexts
if not delta:
return
@@ -460,40 +532,72 @@ def _apply(f, changeset, identifier):
pass
index = self.repository.index(parent_commit_id)
- for blob in blobs:
- (fileName, oid) = blob
- try:
- file_reference, contexts = self.getFileReferenceAndContext(blob, parent_commit)
- for context in contexts:
- for entry in delta:
- changeset = entry.get(context.identifier, None)
-
- if changeset:
- _apply(file_reference, changeset, context.identifier)
- del(entry[context.identifier])
+ if parent_commit_id not in self._graphconfigs:
+ self.updateGraphConfig(parent_commit_id)
- index.add(file_reference.path, file_reference.content)
+ graphconfig = self._graphconfigs.get(parent_commit_id)
- self._blobs.remove(blob)
- blob = fileName, index.stash[file_reference.path][0]
- self._blobs.set(blob, (file_reference, contexts))
- blobs_new.add(blob)
- except KeyError:
- pass
-
- unassigned = set()
- f_name = self.config.getGlobalFile() or 'unassigned.nq'
- f_new = FileReference(f_name, "")
- for entry in delta:
- for identifier, changeset in entry.items():
- unassigned.add(graph.store.get_context(identifier))
- _apply(f_new, changeset, graph.store.identifier)
-
- index.add(f_new.path, f_new.content)
-
- blob = f_name, index.stash[f_new.path][0]
- self._blobs.set(blob, (f_new, unassigned))
- blobs_new.add(blob)
+ try:
+ known_blobs = self.getFilesForCommit(parrent_commit)
+ except KeyError:
+ known_blobs = []
+
+ known_blobs_dict = {}
+ for (blob_name, blob_oid) in known_blobs:
+ known_blobs_dict[blob_name] = blob_oid
+
+ # blobs_new = _applyKnownGraphs(delta, blobs)
+ # new_contexts = _applyUnknownGraphs(delta)
+ new_config = copy(graphconfig)
+ prepare_commit()
+
+ # new_file_references = {}
+ # blobs_new = set
+ # for update in delta:
+ # if update['type'] in ['ADD', 'CLEAR', 'CREATE', 'DROP', 'MOVE', 'COPY']:
+ # continue # TODO
+ # else:
+ # for identifier, changeset in update['delta'].items():
+ # if identifier in known_blobs_dict.keys():
+ # file_name = new_config.getfileforgraphuri(identifier)
+ # id = known_blobs_dict[file_name]
+ # blob = (file_name, id)
+ # file_reference, contexts = self.getFileReferenceAndContext(blob, commit)
+ # applyChangeset(file_reference, changeset, identifier)
+ # index.add(file_reference.path, file_reference.content)
+ # self._blobs.remove(blob)
+ # new_blob = file_name, index.stash[file_reference.path][0]
+ # self._blobs.set(new_blob, (file_reference, contexts))
+ # else:
+ # if isinstance(identifier, BNode) or str(identifier) == 'default':
+ # continue # TODO default graph
+ #
+ # file_name = quote_plus(identifier + '.nq')
+ # if identifier not in new_file_references.keys():
+ # new_file_references[identifier] = FileReference(file_name, '')
+ #
+ # file_reference = new_file_references[identifier]
+ # applyChangeset(file_reference, changeset, identifier)
+ # index.add(file_reference.path, file_reference.content)
+
+
+ for identifier, fileReference in new_file_references.items():
+ # Add new blobs to repo
+ index.add(fileReference.path, fileReference.content)
+ if graphconfig.mode == 'graphfiles':
+ index.add(fileReference.path + '.graph', identifier)
+
+ # Update config
+ new_config.addgraph(identifier, fileReference.path, 'nquads')
+ # Update Cache and add new contexts to store
+ blob = fileReference.path, index.stash[fileReference.path][0]
+ tmpRepo = ConjunctiveGraph()
+ tmpRepo.parse(format='nquads', data=fileReference.content)
+ # contexts = set(context for context in tmpRepo.contexts(None))
+ self._blobs.set(blob, (fileReference, set(tmpRepo.get_context(identifier))))
+ blobs_new.add(blob)
+ if graphconfig.mode == 'configuration':
+ index.add('config.ttl', new_config.graphconf.serialize(format='turtle').decode())
message = build_message(message, kwargs)
author = self.repository._repository.default_signature
@@ -528,3 +632,9 @@ def garbagecollection(self):
except Exception as e:
logger.debug('Git garbage collection failed to spawn.')
logger.debug(e)
+
+ def updateGraphConfig(self, commitId):
+ """Update the graph configuration for a given commit id."""
+ graphconf = QuitGraphConfiguration(self.repository._repository)
+ graphconf.initgraphconfig(commitId)
+ self._graphconfigs.set(commitId, graphconf)
diff --git a/quit/helpers.py b/quit/helpers.py
index c466bb23..1e18a5f4 100644
--- a/quit/helpers.py
+++ b/quit/helpers.py
@@ -7,6 +7,7 @@
from rdflib.plugins.sparql.parserutils import CompValue, plist
from rdflib.plugins.sparql.parser import parseQuery, parseUpdate
from quit.tools.algebra import translateQuery, translateUpdate
+from rdflib.plugins.serializers.nquads import _nq_row as _nq
from rdflib.plugins.sparql import parser, algebra
from rdflib.plugins import sparql
from uritools import urisplit
@@ -106,6 +107,23 @@ def evalUpdate(self, querystring, graph):
return
+def applyChangeset(f, changeset, identifier):
+ """Update the FileReference (graph uri) of a file with help of the changeset."""
+ for (op, triples) in changeset:
+ if op == 'additions':
+ for triple in triples:
+ # the internal _nq serializer appends '\n'
+ line = _nq(triple, identifier).rstrip()
+ f.add(line)
+ elif op == 'removals':
+ for triple in triples:
+ # the internal _nq serializer appends '\n'
+ line = _nq(triple, identifier).rstrip()
+ f.remove(line)
+ elif op == 'CREATE':
+ print('erkannt')
+
+
def isAbsoluteUri(uri):
"""Check if a URI is a absolute URI and uses 'http(s)' at protocol part.
diff --git a/quit/tools/update.py b/quit/tools/update.py
index c181615b..bb0251f7 100644
--- a/quit/tools/update.py
+++ b/quit/tools/update.py
@@ -16,7 +16,8 @@
from itertools import tee
from quit.exceptions import UnSupportedQuery
-def _append(dct, identifier, action, items):
+
+def _appendChangesets(dct, identifier, action, items):
if items:
if not isinstance(identifier, Node):
identifier = URIRef(identifier)
@@ -51,32 +52,30 @@ def evalLoad(ctx, u):
"""
http://www.w3.org/TR/sparql11-update/#load
"""
- res = {}
- res["type_"] = "LOAD"
- res["graph"] = u.iri
-
if u.graphiri:
ctx.load(u.iri, default=False, publicID=u.graphiri)
else:
ctx.load(u.iri, default=True)
- return res
-
def evalCreate(ctx, u):
"""
http://www.w3.org/TR/sparql11-update/#create
"""
- g = ctx.datset.get_context(u.graphiri)
+ g = ctx.dataset.get_context(u.graphiri)
if len(g) > 0:
raise Exception("Graph %s already exists." % g.identifier)
- raise Exception("Create not implemented!")
+ # raise Exception("Create not implemented!")
def evalClear(ctx, u):
"""
http://www.w3.org/TR/sparql11-update/#clear
"""
+ res = {}
+ res['type'] = "CLEAR"
+ res["graph"] = u.graphiri
+
for g in _graphAll(ctx, u.graphiri):
g.remove((None, None, None))
@@ -98,14 +97,14 @@ def evalInsertData(ctx, u):
"""
res = {}
- res["type_"] = "INSERT"
- res["delta"] = {}
+ res['type'] = "INSERT"
+ res['delta'] = {}
# add triples
g = ctx.graph
filled = list(filter(lambda triple: triple not in g, u.triples))
if filled:
- _append(res["delta"], 'default', 'additions', filled)
+ _appendChangesets(res['delta'], 'default', 'additions', filled)
g += filled
# add quads
@@ -114,7 +113,7 @@ def evalInsertData(ctx, u):
cg = ctx.dataset.get_context(g)
filledq = list(filter(lambda triple: triple not in cg, u.quads[g]))
if filledq:
- _append(res["delta"], cg.identifier, 'additions', filledq)
+ _appendChangesets(res['delta'], cg.identifier, 'additions', filledq)
cg += filledq
return res
@@ -125,14 +124,14 @@ def evalDeleteData(ctx, u):
http://www.w3.org/TR/sparql11-update/#deleteData
"""
res = {}
- res["type_"] = "DELETE"
- res["delta"] = {}
+ res['type'] = "DELETE"
+ res['delta'] = {}
# remove triples
g = ctx.graph
filled = list(filter(lambda triple: triple in g, u.triples))
if filled:
- _append(res["delta"], 'default', 'removals', filled)
+ _appendChangesets(res['delta'], 'default', 'removals', filled)
g -= filled
# remove quads
@@ -141,7 +140,7 @@ def evalDeleteData(ctx, u):
cg = ctx.dataset.get_context(g)
filledq = list(filter(lambda triple: triple in cg, u.quads[g]))
if filledq:
- _append(res["delta"], cg.identifier, 'removals', filledq)
+ _appendChangesets(res['delta'], cg.identifier, 'removals', filledq)
cg -= filledq
return res
@@ -153,8 +152,8 @@ def evalDeleteWhere(ctx, u):
"""
res = {}
- res["type_"] = "DELETEWHERE"
- res["delta"] = {}
+ res['type'] = "DELETEWHERE"
+ res['delta'] = {}
_res = evalBGP(ctx, u.triples)
for g in u.quads:
@@ -165,13 +164,13 @@ def evalDeleteWhere(ctx, u):
for c in _res:
g = ctx.graph
filled, filled_delta = tee(_fillTemplate(u.triples, c))
- _append(res["delta"], 'default', 'removals', list(filled_delta))
+ _appendChangesets(res['delta'], 'default', 'removals', list(filled_delta))
g -= filled
for g in u.quads:
cg = ctx.dataset.get_context(c.get(g))
filledq, filledq_delta = tee(_fillTemplate(u.quads[g], c))
- _append(res["delta"], cg.identifier, 'removals', list(filledq_delta))
+ _appendChangesets(res['delta'], cg.identifier, 'removals', list(filledq_delta))
cg -= filledq
return res
@@ -181,8 +180,8 @@ def evalModify(ctx, u):
originalctx = ctx
res = {}
- res["type_"] = "MODIFY"
- res["delta"] = {}
+ res['type'] = "MODIFY"
+ res['delta'] = {}
# Using replaces the dataset for evaluating the where-clause
if u.using:
@@ -237,24 +236,24 @@ def evalModify(ctx, u):
dg = ctx.graph
if u.delete:
filled, filled_delta = tee(_fillTemplate(u.delete.triples, c))
- _append(res["delta"], graphName, 'removals', list(filled_delta))
+ _appendChangesets(res['delta'], graphName, 'removals', list(filled_delta))
dg -= filled
for g, q in u.delete.quads.items():
cg = ctx.dataset.get_context(c.get(g))
filledq, filledq_delta = tee(_fillTemplate(q, c))
- _append(res["delta"], cg.identifier, 'removals', list(filledq_delta))
+ _appendChangesets(res['delta'], cg.identifier, 'removals', list(filledq_delta))
cg -= filledq
if u.insert:
filled, filled_delta = tee(_fillTemplate(u.insert.triples, c))
- _append(res["delta"], graphName, 'additions', list(filled_delta))
+ _appendChangesets(res['delta'], graphName, 'additions', list(filled_delta))
dg += filled
for g, q in u.insert.quads.items():
cg = ctx.dataset.get_context(c.get(g))
filledq, filledq_delta = tee(_fillTemplate(q, c))
- _append(res["delta"], cg.identifier, 'additions', list(filledq_delta))
+ _appendChangesets(res['delta'], cg.identifier, 'additions', list(filledq_delta))
cg += filledq
return res
@@ -360,35 +359,43 @@ def evalUpdate(graph, update, initBindings=None, actionLog=False):
try:
if u.name == 'Load':
- result = evalLoad(ctx, u).get('delta', None)
- if result:
- res.append(result)
+ result = evalLoad(ctx, u)
+ res.append({'type': 'LOAD', 'graph': u.graphiri})
elif u.name == 'Clear':
evalClear(ctx, u)
+ res.append({'type': 'CLEAR', 'graph': u.graphiri})
elif u.name == 'Drop':
evalDrop(ctx, u)
+ res.append({'type': 'DROP', 'graph': u.graphiri})
elif u.name == 'Create':
evalCreate(ctx, u)
+ res.append({'type': 'CREATE', 'graph': u.graphiri})
elif u.name == 'Add':
evalAdd(ctx, u)
+ src, dst = u.graph
+ res.append({'type': 'ADD', 'src_graph': src, 'dst_graph': dst})
elif u.name == 'Move':
evalMove(ctx, u)
+ src, dst = u.graph
+ res.append({'type': 'MOVE', 'src_graph': src, 'dst_graph': dst})
elif u.name == 'Copy':
evalCopy(ctx, u)
+ src, dst = u.graph
+ res.append({'type': 'COPY', 'src_graph': src, 'dst_graph': dst})
elif u.name == 'InsertData':
- result = evalInsertData(ctx, u).get('delta', None)
+ result = evalInsertData(ctx, u)
if result:
res.append(result)
elif u.name == 'DeleteData':
- result = evalDeleteData(ctx, u).get('delta', None)
+ result = evalDeleteData(ctx, u)
if result:
res.append(result)
elif u.name == 'DeleteWhere':
- result = evalDeleteWhere(ctx, u).get('delta', None)
+ result = evalDeleteWhere(ctx, u)
if result:
res.append(result)
elif u.name == 'Modify':
- result = evalModify(ctx, u).get('delta', None)
+ result = evalModify(ctx, u)
if result:
res.append(result)
else:
diff --git a/quit/web/app.py b/quit/web/app.py
index bc3689ed..95587128 100644
--- a/quit/web/app.py
+++ b/quit/web/app.py
@@ -107,9 +107,6 @@ def register_app(app, config):
content = quit.store.store.serialize(format='trig').decode()
logger.debug("Initialize store with following content: {}".format(content))
- logger.debug("Initialize store with following graphs: {}".format(
- quit.config.getgraphurifilemap())
- )
app.config['quit'] = quit
app.config['blame'] = Blame(quit)
diff --git a/quit/web/modules/endpoint.py b/quit/web/modules/endpoint.py
index 3b7c8a36..ddfac470 100644
--- a/quit/web/modules/endpoint.py
+++ b/quit/web/modules/endpoint.py
@@ -98,7 +98,7 @@ def sparql(branch_or_ref):
logger.exception(e)
return make_response('No branch or reference given.', 400)
- if queryType in ['InsertData', 'DeleteData', 'Modify', 'DeleteWhere']:
+ if queryType in ['InsertData', 'DeleteData', 'Modify', 'DeleteWhere', 'Create']:
res, exception = graph.update(parsedQuery)
try:
diff --git a/tests/helpers.py b/tests/helpers.py
index 1e87a3a6..294e2620 100644
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -2,6 +2,7 @@
from pygit2 import init_repository, clone_repository, Signature
from os import path, walk
from os.path import join
+from rdflib import Graph
def createCommit(repository, message=None):
@@ -95,8 +96,43 @@ def withGraph(self, graphUri, graphContent=None):
return tmpRepo
- def withGraphs(self, graphUriContentDict):
+ def noConfigInformations(self, graphContent=''):
+ """Give a TemporaryRepository() initialized with a graph with the given content (and one commit)."""
+ tmpRepo = TemporaryRepository()
+
+ # Add a graph.nq and a graph.nq.graph file
+ with open(path.join(tmpRepo.repo.workdir, "graph.nq"), "w") as graphFile:
+ if graphContent:
+ graphFile.write(graphContent)
+
+ # Add and Commit the empty graph
+ index = tmpRepo.repo.index
+ index.read()
+ index.add("graph.nq")
+ index.write()
+
+ # Create commit
+ tree = index.write_tree()
+ message = "init"
+ tmpRepo.repo.create_commit('HEAD', self.author, self.comitter, message, tree, [])
+
+ return tmpRepo
+
+ def withGraphs(self, graphUriContentDict, mode='graphfiles'):
"""Give a TemporaryRepository() initialized with a dictionary of graphUris and content (nq)."""
+ uristring = ''
+ configFileContent = """@base .
+ @prefix conf: .
+
+ conf:store a ;
+ "git://github.com/aksw/QuitStore.git" ;
+ "{}" .
+ {}"""
+
+ graphResource = """conf:graph{} a ;
+ <{}> ;
+ "{}" ."""
+
tmpRepo = TemporaryRepository()
index = tmpRepo.repo.index
index.read()
@@ -108,14 +144,24 @@ def withGraphs(self, graphUriContentDict):
if graphContent:
graphFile.write(graphContent)
- # Set Graph URI to http://example.org/
- with open(path.join(tmpRepo.repo.workdir, filename + ".graph"), "w") as graphFile:
- graphFile.write(graphUri)
+ if mode == 'graphfiles':
+ # Set Graph URI to http://example.org/
+ with open(path.join(tmpRepo.repo.workdir, filename + ".graph"), "w") as graphFile:
+ graphFile.write(graphUri)
+ index.add(filename + '.graph')
+ elif mode == 'configfile':
+ uristring += graphResource.format(i, graphUri, filename)
# Add and Commit the empty graph
index.add(filename)
- index.add(filename + '.graph')
i += 1
+ if mode == 'configfile':
+ graph = Graph()
+ with open(path.join(tmpRepo.repo.workdir, "config.ttl"), "w") as configFile:
+ rdf_content = configFileContent.format(tmpRepo.repo.workdir, uristring)
+ graph.parse(format='turtle', data=rdf_content)
+ configFile.write(graph.serialize(format='turtle').decode())
+ index.add('config.ttl')
index.write()
diff --git a/tests/test_app.py b/tests/test_app.py
index 5dfa7f47..1c6c125d 100644
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -2,6 +2,7 @@
import os
from os import path
+from urllib.parse import quote_plus
from datetime import datetime
from pygit2 import GIT_SORT_TOPOLOGICAL, Signature
import quit.application as quitApp
@@ -522,7 +523,6 @@ def testSelectFromNamed(self):
"p": {'type': 'uri', 'value': 'urn:y'},
"o": {'type': 'uri', 'value': 'urn:z'}})
-
def testQueryProvenanceViaGet(self):
# Prepate a git Repository
content = ' .'
@@ -637,7 +637,9 @@ def testQueryProvenanceViaPostDirectly(self):
response = app.post('/provenance', data=self.update, headers=headers)
self.assertEqual(response.status_code, 400)
+
class QuitAppTestCase(unittest.TestCase):
+ """Test API and synchronization of Store and Git."""
author = Signature('QuitStoreTest', 'quit@quit.aksw.org')
comitter = Signature('QuitStoreTest', 'quit@quit.aksw.org')
@@ -3191,5 +3193,123 @@ def testWithOnInsertUsing(self):
self.assertEqual('\n', f.read())
+class GraphManagementTests(unittest.TestCase):
+ def testCreateGraph(self):
+ """Test if a new empty graph file is added.
+
+ 1. Prepare a git repository with an empty and a non empty graph
+ 2. Start Quit
+ 3. execute Update query
+ 4. check filesystem for new .nq and .nq.graph file with expected content
+ """
+ # Prepate a git Repository
+ content = ' .\n'
+ repoContent = {'http://example.org/': content}
+ with TemporaryRepositoryFactory().withGraphs(repoContent) as repo:
+
+ # Start Quit
+ args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles'])
+ objects = quitApp.initialize(args)
+ config = objects['config']
+ app = create_app(config).test_client()
+ filename = quote_plus('http://aksw.org/') + '.nq'
+
+ # execute UPDATE query
+ update = 'CREATE GRAPH '
+ app.post('/sparql',
+ content_type="application/sparql-update",
+ data=update)
+
+ with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f:
+ self.assertEqual(' .\n', f.read())
+
+
+class FileHandlingTests(unittest.TestCase):
+ def testNewNamedGraph(self):
+ """Test if a new graph is added to the repository.
+
+ 1. Prepare a git repository with an empty and a non empty graph
+ 2. Start Quit
+ 3. execute Update query
+ 4. check filesystem for new .nq and .nq.graph file with expected content
+ """
+ # Prepate a git Repository
+ content = ' .\n'
+ repoContent = {'http://example.org/': content}
+ with TemporaryRepositoryFactory().withGraphs(repoContent) as repo:
+
+ # Start Quit
+ args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles'])
+ objects = quitApp.initialize(args)
+ config = objects['config']
+ app = create_app(config).test_client()
+ filename = quote_plus('http://aksw.org/') + '.nq'
+
+ self.assertFalse(path.isfile(path.join(repo.workdir, filename)))
+ self.assertFalse(path.isfile(path.join(repo.workdir, filename + '.graph')))
+
+ # execute UPDATE query
+ update = 'INSERT DATA { GRAPH { . } }'
+ app.post('/sparql',
+ content_type="application/sparql-update",
+ data=update)
+
+ with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f:
+ self.assertEqual(' .\n', f.read())
+ with open(path.join(repo.workdir, filename), 'r') as f:
+ self.assertEqual(' .\n', f.read())
+ with open(path.join(repo.workdir, filename + '.graph'), 'r') as f:
+ self.assertEqual('http://aksw.org/', f.read())
+
+ def testNewNamedGraphConfigfile(self):
+ """Test if a new graph is added to the repository.
+
+ 1. Prepare a git repository with an empty and a non empty graph
+ 2. Start Quit
+ 3. execute Update query
+ 4. check filesystem and configfile content (before/after)
+ """
+ # Prepate a git Repository
+ content = ' .\n'
+ repoContent = {'http://example.org/': content}
+ with TemporaryRepositoryFactory().withGraphs(repoContent, 'configfile') as repo:
+
+ # Start Quit
+ args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles'])
+ objects = quitApp.initialize(args)
+ config = objects['config']
+ app = create_app(config).test_client()
+
+ with open(path.join(repo.workdir, 'config.ttl'), 'r') as f:
+ configfile_before = f.read()
+
+ # execute DELETE INSERT WHERE query
+ update = 'INSERT DATA { GRAPH { . } }'
+ app.post('/sparql',
+ content_type="application/sparql-update",
+ data=update)
+
+ filename = quote_plus('http://aksw.org/') + '.nq'
+
+ with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f:
+ self.assertEqual(' .\n', f.read())
+ with open(path.join(repo.workdir, filename), 'r') as f:
+ self.assertEqual(' .\n', f.read())
+ with open(path.join(repo.workdir, 'config.ttl'), 'r') as f:
+ configfile_after = f.read()
+
+ config_before = [x.strip() for x in configfile_before.split('\n')]
+ config_after = [x.strip() for x in configfile_after.split('\n')]
+ diff = list(set(config_after) - set(config_before))
+
+ self.assertFalse('ns1:graphFile "' + filename + '" ;' in config_before)
+ self.assertFalse('ns1:hasFormat "nquads" .' in config_before)
+ self.assertFalse('ns1:graphUri ;' in config_before)
+
+ self.assertTrue('ns1:graphFile "' + filename + '" ;' in diff)
+ self.assertTrue('ns1:hasFormat "nquads" .' in diff)
+ self.assertTrue('ns1:graphUri ;' in diff)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/test_conf.py b/tests/test_conf.py
index 53e57fa8..db78c331 100644
--- a/tests/test_conf.py
+++ b/tests/test_conf.py
@@ -6,280 +6,137 @@
from os.path import join, isdir
from pygit2 import init_repository, Repository, clone_repository
from pygit2 import GIT_SORT_TOPOLOGICAL, GIT_SORT_REVERSE, Signature
-from quit.conf import QuitConfiguration
+from quit.conf import QuitStoreConfiguration, QuitGraphConfiguration
from quit.exceptions import MissingConfigurationError, InvalidConfigurationError
from quit.exceptions import MissingFileError
from distutils.dir_util import copy_tree, remove_tree
+from helpers import TemporaryRepository, TemporaryRepositoryFactory
from tempfile import TemporaryDirectory, NamedTemporaryFile
import rdflib
-class TestConfiguration(unittest.TestCase):
-
- def setUp(self):
- self.ns = 'http://quit.instance/'
- self.testData = './tests/samples/configuration_test'
- self.local = './tests/samples/local'
- self.remote = '.tests/samples/remote'
- copy_tree(self.testData, self.local)
- copy_tree(self.testData, self.remote)
- self.localConfigFile = join(self.local, 'config.ttl')
- self.remoteConfigFile = join(self.local, 'config.ttl')
- tempRepoLine = ' "' + self.local + '" .'
-
- with open(self.localConfigFile) as f:
- content = f.readlines()
-
- remove(self.localConfigFile)
-
- with open(self.localConfigFile, 'w+') as f:
- for line in content:
- if line.startswith(' .\n'
+ repoContent = {'http://example.org/': content1, 'http://aksw.org/': content2}
+ with TemporaryRepositoryFactory().withGraphs(repoContent) as repo:
+ conf = QuitGraphConfiguration(repository=repo)
+ conf.initgraphconfig('master')
+
+ graphs = conf.getgraphs()
+ self.assertEqual(
+ sorted([str(x) for x in graphs]), ['http://aksw.org/', 'http://example.org/'])
+
+ files = conf.getfiles()
+ self.assertEqual(sorted(files), ['graph_0.nq', 'graph_1.nq'])
+
+ serialization = conf.getserializationoffile('graph_0.nq')
+ self.assertEqual(serialization, 'nquads')
+
+ serialization = conf.getserializationoffile('graph_1.nq')
+ self.assertEqual(serialization, 'nquads')
+ gfMap = conf.getgraphurifilemap()
+
+ self.assertEqual(gfMap, {
+ rdflib.term.URIRef('http://aksw.org/'): 'graph_0.nq',
+ rdflib.term.URIRef('http://example.org/'): 'graph_1.nq'
+ })
+
+ self.assertEqual(
+ [str(x) for x in conf.getgraphuriforfile('graph_0.nq')],
+ ['http://aksw.org/']
+ )
+ self.assertEqual(
+ [str(x) for x in conf.getgraphuriforfile('graph_1.nq')],
+ ['http://example.org/']
+ )
+ self.assertEqual(conf.getfileforgraphuri('http://aksw.org/'), 'graph_0.nq')
+ self.assertEqual(conf.getfileforgraphuri('http://example.org/'), 'graph_1.nq')
+
+ def testExistingRepoConfigfile(self):
+ content1 = ' .'
+ content2 = ' .\n'
+ content2 += ' .'
+ repoContent = {'http://example.org/': content1, 'http://aksw.org/': content2}
+ with TemporaryRepositoryFactory().withGraphs(repoContent, 'configfile') as repo:
+ conf = QuitGraphConfiguration(repository=repo)
+ conf.initgraphconfig('master')
+
+ graphs = conf.getgraphs()
+ self.assertEqual(sorted([str(x) for x in graphs]), ['http://aksw.org/', 'http://example.org/'])
+
+ files = conf.getfiles()
+ self.assertEqual(sorted(files), ['graph_0.nq', 'graph_1.nq'])
+
+ serialization = conf.getserializationoffile('graph_0.nq')
+ self.assertEqual(serialization, 'nquads')
+ serialization = conf.getserializationoffile('graph_1.nq')
+ self.assertEqual(serialization, 'nquads')
+
+ gfMap = conf.getgraphurifilemap()
+ self.assertEqual(gfMap, {
+ rdflib.term.URIRef('http://aksw.org/'): 'graph_0.nq',
+ rdflib.term.URIRef('http://example.org/'): 'graph_1.nq'
+ })
+
+ self.assertEqual(
+ [str(x) for x in conf.getgraphuriforfile('graph_0.nq')],
+ ['http://aksw.org/']
+ )
+ self.assertEqual(
+ [str(x) for x in conf.getgraphuriforfile('graph_1.nq')], ['http://example.org/']
+ )
+ self.assertEqual(conf.getfileforgraphuri('http://aksw.org/'), 'graph_0.nq')
+ self.assertEqual(conf.getfileforgraphuri('http://example.org/'), 'graph_1.nq')
+
+ def testInitWithMissingInformation(self):
+ """Start QuitStore without graphfiles and configfile."""
+ with TemporaryRepositoryFactory().noConfigInformations() as repo:
+
+ conf = QuitGraphConfiguration(repository=repo)
+ self.assertRaises(InvalidConfigurationError, conf.initgraphconfig, 'master')
- self.assertEqual([str(x) for x in conf.getgraphuriforfile('example1.nq')], ['http://example.org/discovered/'])
- self.assertEqual([str(x) for x in conf.getgraphuriforfile('example2.nt')], ['http://example.org/2/'])
- self.assertEqual(conf.getfileforgraphuri('http://example.org/discovered/'), 'example1.nq')
- self.assertEqual(conf.getfileforgraphuri('http://example.org/2/'), 'example2.nt')
def main():
unittest.main()