diff --git a/quit/application.py b/quit/application.py index cde40437..fbc433c9 100644 --- a/quit/application.py +++ b/quit/application.py @@ -1,7 +1,7 @@ import argparse import sys import os -from quit.conf import Feature, QuitConfiguration +from quit.conf import Feature, QuitStoreConfiguration from quit.exceptions import InvalidConfigurationError import rdflib.plugins.sparql from rdflib.plugins.sparql.algebra import SequencePath @@ -98,11 +98,10 @@ def sequencePathCompareGt(self, other): 'quit.plugins.serializers.results.htmlresults', 'HTMLResultSerializer') try: - config = QuitConfiguration( + config = QuitStoreConfiguration( configfile=args.configfile, targetdir=args.targetdir, - repository=args.repourl, - configmode=args.configmode, + upstream=args.repourl, features=args.features, namespace=args.namespace, ) @@ -111,14 +110,9 @@ def sequencePathCompareGt(self, other): sys.exit('Exiting quit') # since repo is handled, we can add graphs to config - config.initgraphconfig() - logger.info('QuitStore successfully running.') - logger.info('Known graphs: ' + str(config.getgraphs())) - logger.info('Known files: ' + str(config.getfiles())) + logger.info('QuitStore Configuration initialized.') logger.debug('Path of Gitrepo: ' + config.getRepoPath()) - logger.debug('Config mode: ' + str(config.getConfigMode())) - logger.debug('All RDF files found in Gitepo:' + str(config.getgraphsfromdir())) return {'config': config} diff --git a/quit/conf.py b/quit/conf.py index 6b7f9468..277a3d34 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -1,8 +1,9 @@ import logging import os +from pygit2 import Repository from os import walk -from os.path import join, isfile +from os.path import join, isfile, relpath from quit.exceptions import MissingConfigurationError, InvalidConfigurationError from quit.exceptions import UnknownConfigurationError from quit.helpers import isAbsoluteUri @@ -25,14 +26,16 @@ class Feature: class QuitConfiguration: - """A class that keeps track of the relation between named graphs and files.""" + quit = Namespace('http://quit.aksw.org/vocab/') + +class QuitStoreConfiguration(QuitConfiguration): + """A class that provides information about settings, filesystem and git.""" def __init__( self, - configmode=None, configfile='config.ttl', features=None, - repository=None, + upstream=None, targetdir=None, namespace=None ): @@ -48,33 +51,24 @@ def __init__( self.features = features self.configchanged = False self.sysconf = Graph() - self.graphconf = None - self.origin = None - self.graphs = {} - self.files = {} + self.upstream = None self.namespace = None - self.quit = Namespace('http://quit.aksw.org/vocab/') self.nsMngrSysconf = NamespaceManager(self.sysconf) self.nsMngrSysconf.bind('', 'http://quit.aksw.org/vocab/', override=False) - self.nsMngrGraphconf = NamespaceManager(self.sysconf) - self.nsMngrGraphconf.bind('', 'http://quit.aksw.org/vocab/', override=False) try: self.__initstoreconfig( namespace=namespace, - repository=repository, + upstream=upstream, targetdir=targetdir, - configfile=configfile, - configmode=configmode + configfile=configfile ) except InvalidConfigurationError as e: logger.error(e) raise e - return - - def __initstoreconfig(self, namespace, repository, targetdir, configfile, configmode): + def __initstoreconfig(self, namespace, upstream, targetdir, configfile): """Initialize store settings.""" if isAbsoluteUri(namespace): self.namespace = namespace @@ -93,214 +87,246 @@ def __initstoreconfig(self, namespace, repository, targetdir, configfile, config except PermissionError: raise InvalidConfigurationError( "Configuration file could not be parsed. Permission denied. {}".format( - configfile - ) - ) + configfile)) except Exception as e: - raise UnknownConfigurationError( - "UnknownConfigurationError: {}".format(e) - ) + raise UnknownConfigurationError("UnknownConfigurationError: {}".format(e)) self.configfile = configfile else: if not targetdir: raise InvalidConfigurationError('No target directory for git repo given') - if configmode: - self.setConfigMode(configmode) - if targetdir: self.setRepoPath(targetdir) - if repository: - self.setGitOrigin(repository) + if upstream: + self.setGitUpstream(upstream) return - def initgraphconfig(self): - """Initialize graph settings. + def hasFeature(self, flags): + return flags == (self.features & flags) - Public method to initalize graph settings. This method will be run only once. + def getBindings(self): + ns = Namespace('http://quit.aksw.org/vocab/') + q = """SELECT DISTINCT ?prefix ?namespace WHERE {{ + {{ + ?ns a <{binding}> ; + <{predicate_prefix}> ?prefix ; + <{predicate_namespace}> ?namespace . + }} + }}""".format( + binding=ns['Binding'], predicate_prefix=ns['prefix'], + predicate_namespace=ns['namespace'] + ) + + result = self.sysconf.query(q) + return [(row['prefix'], row['namespace']) for row in result] + + def getDefaultBranch(self): + """Get the default branch on the Git repository from configuration. + + Returns: + A string containing the branch name. """ - if self.graphconf is None: - self.__initgraphconfig() - - def __initgraphconfig(self, repository=None, targetdir=None): - """Initialize graph settings.""" - self.graphconf = Graph() - configmode = self.getConfigMode() - logger.debug("Graph Config mode is: {}".format(configmode)) - - if configmode == 'localconfig': - self.__initgraphsfromconf(self.configfile) - elif configmode == 'repoconfig': - remConfigFile = join(self.getRepoPath(), 'config.ttl') - self.__initgraphsfromconf(remConfigFile) - elif configmode == 'graphfiles': - self.__initgraphsfromdir(self.getRepoPath()) - else: - raise InvalidConfigurationError('This mode is not supported.', self.configmode) - return + nsQuit = 'http://quit.aksw.org/vocab/' + storeuri = URIRef('http://my.quit.conf/store') + property = URIRef(nsQuit + 'defaultBranch') - def __initgraphsfromdir(self, repodir): - """Init a repository by analyzing all existing files.""" - graphs = self.getgraphsfromdir(repodir) - repopath = self.getRepoPath() - - for file, format in graphs.items(): - absgraphfile = os.path.join(repopath, file + '.graph') - graphuri = self.__readGraphIriFile(absgraphfile) - - if graphuri and format == 'nquads': - self.addgraph(file=file, graphuri=graphuri, format=format) - elif graphuri is None and format == 'nquads': - tmpgraph = ConjunctiveGraph(identifier='default') - - try: - tmpgraph.parse(source=os.path.join(repopath, file), format=format) - except Exception: - logger.error( - "Could not parse graphfile {}. File skipped.".format(file) - ) - continue - - namedgraphs = tmpgraph.contexts() - founduris = [] - - for graph in namedgraphs: - if not isinstance(graph, BNode) and str(graph.identifier) != 'default': - graphuri = graph.identifier - founduris.append(graphuri) - - if len(founduris) == 1: - self.addgraph(file=file, graphuri=graphuri, format=format) - elif len(founduris) > 1: - logger.info("No named graph found. {} skipped.".format(file)) - - elif len(founduris) < 1: - logger.info( - "More than one named graphs found. Can't decide. {} skipped.".format( - file - ) - ) - - elif format == 'nt': - if graphuri: - self.addgraph(file=file, graphuri=graphuri, format=format) - else: - logger.warning('No *.graph file found. ' + file + ' skipped.') + for s, p, o in self.sysconf.triples((None, property, None)): + return str(o) - try: - self.__setgraphsfromconf() - except InvalidConfigurationError as e: - raise e + return "master" - def __initgraphsfromconf(self, configfile): - """Init graphs with setting from config.ttl.""" - if not isfile(configfile): - raise MissingConfigurationError("Configfile is missing {}".format(configfile)) + def getGlobalFile(self): + """Get the graph file which should be used for unassigned graphs. - try: - self.graphconf.parse(configfile, format='turtle') - except Exception as e: - raise InvalidConfigurationError( - "Configfile could not be parsed {} {}".format(configfile, e) - ) + Returns + The filename of the graph file where unassigned graphs should be stored. - # Get Graphs - self.__setgraphsfromconf() + """ + nsQuit = 'http://quit.aksw.org/vocab/' + storeuri = URIRef('http://my.quit.conf/store') + property = URIRef(nsQuit + 'globalFile') - def __readGraphIriFile(self, graphfile): - """Search for a graph uri in graph file and return it. + for s, p, o in self.sysconf.triples((None, property, None)): + return str(o) - Args: - graphfile: String containing the path of a graph file + def getRepoPath(self): + """Get the path of Git repository from configuration. Returns: - graphuri: String with the graph URI + A string containig the path of the git repo. """ - try: - with open(graphfile, 'r') as f: - graphuri = f.readline().strip() - except FileNotFoundError: - logger.debug("File not found {}".format(graphfile)) - return + nsQuit = 'http://quit.aksw.org/vocab/' + storeuri = URIRef('http://my.quit.conf/store') + property = URIRef(nsQuit + 'pathOfGitRepo') + + for s, p, o in self.sysconf.triples((None, property, None)): + return str(o) + + def getUpstream(self): + """Get the URI of Git remote from configuration.""" + nsQuit = 'http://quit.aksw.org/vocab/' + storeuri = URIRef('http://my.quit.conf/store') + property = self.quit.upstream + + for s, p, o in self.sysconf.triples((storeuri, property, None)): + return str(o) + + def setUpstream(self, origin): + self.sysconf.remove((None, self.quit.origin, None)) + self.sysconf.add((self.quit.Store, self.quit.upstream, Literal(origin))) + + return + + def setRepoPath(self, path): + self.sysconf.remove((None, self.quit.pathOfGitRepo, None)) + self.sysconf.add((self.quit.Store, self.quit.pathOfGitRepo, Literal(path))) + + return + + +class QuitGraphConfiguration(QuitConfiguration): + """A class that keeps track of the relation between named graphs and files.""" + + def __init__(self, repository): + """The init method. + + This method checks if the config file is given and reads the config file. + If the config file is missing, it will be generated after analyzing the + file structure. + """ + logger = logging.getLogger('quit.conf.QuitConfiguration') + logger.debug('Initializing configuration object.') + + self.repository = repository + self.configfile = None + self.mode = None + self.graphconf = None + self.graphs = {} + self.files = {} + + def initgraphconfig(self, rev): + """Initialize graph settings. + Public method to initalize graph settings. This method will be run only once. + """ + if self.graphconf is None: + self.graphconf = Graph() + self.nsMngrGraphconf = NamespaceManager(self.graphconf) + self.nsMngrGraphconf.bind('', 'http://quit.aksw.org/vocab/', override=False) + + grphfile_count, conf_file_count, configured, blobs = self.get_blobs_from_repository(rev) + +<<<<<<< HEAD + if len(graph_files) == 0 and len(config_files) == 0: + self.mode = 'graphfiles' + elif len(graph_files) > 0 and len(config_files) > 0: +======= + if grphfile_count == 0 and conf_file_count == 0: + raise InvalidConfigurationError( + "Did not find graphfiles or a QuitStore configuration file.") + elif grphfile_count > 0 and conf_file_count > 0: +>>>>>>> 3db2d87... Work on graph management + raise InvalidConfigurationError( + "Conflict. Found graphfiles and QuitStore configuration file.") + elif grphfile_count > 0: + self.mode = 'graphfiles' + self.__init_graph_conf_with_blobs(configured, rev, blobs) + elif conf_file_count == 1: + self.mode = 'configuration' + self.__init_graph_conf_from_configuration(blobs['config.ttl'], blobs) + else: + raise InvalidConfigurationError( + "Conflict. Found more than one QuitStore configuration file.") + + def __init_graph_conf_with_blobs(self, files, rev, known_blobs): + """Init graph configuration if graphfile contains a valid IRI.""" + for filename in files: + format = known_blobs[filename][1] + oid = known_blobs[filename][0] + graphFileId = known_blobs[filename + '.graph'] + graphuri = URIRef(self.__get_uri_from_graphfile_blob(graphFileId)) + + if graphuri: + self.graphs[graphuri] = filename + self.files[filename] = { + 'serialization': format, 'graphs': [graphuri], 'oid': oid} + self.files[filename + '.graph'] = {'oid': graphFileId} + + def __init_graph_conf_from_configuration(self, configfileId, known_blobs): + """Init graphs with setting from config.ttl.""" try: - urlparse(graphuri) - logger.debug("Graph URI {} found in {}".format(graphuri, graphfile)) - except Exception: - graphuri = None - logger.debug("No graph URI found in {}".format(graphfile)) + configfile = self.repository.get(configfileId) + except Exception as e: + raise InvalidConfigurationError( + "Blob for configfile with id {} not found in repository {}".format(configfileId, e)) + + content = configfile.read_raw() - return graphuri + try: + self.graphconf.parse(data=content, format='turtle') + except Exception as e: + raise InvalidConfigurationError( + "Configfile could not be parsed {} {}".format(configfileId, e) + ) + self.files['config.ttl'] = {'oid': configfileId} - def __setgraphsfromconf(self): - """Set all URIs and file paths of graphs that are configured in config.ttl.""" nsQuit = 'http://quit.aksw.org/vocab/' - query = 'SELECT DISTINCT ?graphuri ?filename WHERE { ' + query = 'SELECT DISTINCT ?graphuri ?filename ?format WHERE { ' query += ' ?graph a <' + nsQuit + 'Graph> . ' query += ' ?graph <' + nsQuit + 'graphUri> ?graphuri . ' query += ' ?graph <' + nsQuit + 'graphFile> ?filename . ' + query += ' OPTIONAL { ?graph <' + nsQuit + 'hasFormat> ?format .} ' query += '}' result = self.graphconf.query(query) - repopath = self.getRepoPath() - for row in result: filename = str(row['filename']) - format = guess_format(filename) + if row['format'] is None: + format = guess_format(filename) + else: + format = str(row['format']) if format not in ['nt', 'nquads']: break + if filename not in known_blobs.keys(): + break - graphuri = str(row['graphuri']) + graphuri = URIRef(str(row['graphuri'])) - graphFile = join(repopath, filename) + # we store which named graph is serialized in which file + self.graphs[graphuri] = filename + self.files[filename] = { + 'serialization': format, 'graphs': [graphuri], 'oid': known_blobs[filename]} - if isfile(graphFile): - # everything is fine - pass - else: - try: - open(graphFile, 'a+').close() - except PermissionError: - raise InvalidConfigurationError( - "Permission denied. Can't create file {} in repo {}".format( - graphFile, - self.getRepoPath() - ) - ) - except FileNotFoundError: - raise InvalidConfigurationError( - "File not found. Can't create file {} in repo {}".format( - graphFile, - self.getRepoPath() - ) - ) - except Exception as e: - raise UnknownConfigurationError( - "Can't create file {} in repo {}. Error: {}".format( - graphFile, - self.getRepoPath(), - e - ) - ) + def __get_uri_from_graphfile_blob(self, oid): + """Search for a graph uri in graph file and return it. - graphuri = URIRef(graphuri) + Args + ---- + oid: String oid of a graph file - # we store which named graph is serialized in which file - self.graphs[graphuri] = filename - # and furthermore we assume that one file can contain data of more - # than one named graph and so we store for each file a set of graphs - if filename in self.files: - self.files[filename]['graphs'].append(graphuri) - else: - self.files[filename] = { - 'serialization': format, - 'graphs': [graphuri] - } + Returns + ------- + graphuri: String with the graph URI - return + """ + try: + blob = self.repository.get(oid) + except ValueError: + logger.debug("Object with OID { } not found in repository.".format(oid)) + return + + content = blob.read_raw().decode().strip() + + try: + urlparse(content) + except Exception: + logger.debug("No graph URI found in blob with OID {}.".format(oid)) + return + + return content def addgraph(self, graphuri, file, format=None): self.graphconf.add((self.quit[quote(graphuri)], RDF.type, self.quit.Graph)) @@ -316,130 +342,69 @@ def removegraph(self, graphuri): return - def getConfigMode(self): - """Get the mode how Quit-Store detects RDF files and named graphs. - - Returns: - A string containig the mode. - """ - nsQuit = 'http://quit.aksw.org/vocab/' - property = URIRef(nsQuit + 'configMode') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - - return 'graphfiles' - - def getRepoPath(self): - """Get the path of Git repository from configuration. - - Returns: - A string containig the path of the git repo. - """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'pathOfGitRepo') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - - def getDefaultBranch(self): - """Get the default branch on the Git repository from configuration. - - Returns: - A string containing the branch name. - """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'defaultBranch') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - - return "master" - - def getGlobalFile(self): - """Get the graph file which should be used for unassigned graphs. - - Returns: - The filename of the graph file where unassigned graphs should be stored. - """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'globalFile') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - - def getOrigin(self): - """Get the URI of Git remote from configuration.""" - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'origin') - - for s, p, o in self.sysconf.triples((storeuri, property, None)): - return str(o) - def getgraphs(self): """Get all graphs known to conf. - Returns: + Returns + ------- A list containig all graph uris as string, - """ - graphs = [] - for graph in self.graphs: - graphs.append(graph) - return graphs + """ + return self.graphs def getfiles(self): """Get all files known to conf. - Returns: + Returns + ------- A list containig all files as string, - """ - files = [] - for file in self.files: - files.append(file) - return files + """ + return self.files def getfileforgraphuri(self, graphuri): """Get the file for a given graph uri. - Args: + Args + ---- graphuri: A String of the named graph - Returns: + Returns + ------- A string of the path to the file asociated with named graph + """ if isinstance(graphuri, str): graphuri = URIRef(graphuri) - for uri, filename in self.graphs.items(): - if uri == graphuri: - return filename + + if graphuri in self.graphs.keys(): + return self.graphs[graphuri] return def getgraphurifilemap(self): """Get the dictionary of graphuris and their files. - Returns: - A dictionary of graphuris and information about their files. - """ + Returns + ------- + A dictionary of graphuris and information about their representation in repository. + """ return self.graphs def getserializationoffile(self, file): """Get the file for a given graph uri. - Args: + Args + ---- file: A String of a file path - Returns: + Returns + ------- A string containing the RDF serialization of file + """ - if file in self.files: + if file in self.files.keys(): return self.files[file]['serialization'] return @@ -447,72 +412,65 @@ def getserializationoffile(self, file): def getgraphuriforfile(self, file): """Get the file for a given graph uri. - Args: + Args + ---- file: A String of a file path - Returns: + Returns + ------- A set containing strings of graph uris asociated to that file + """ if file in self.files: return self.files[file]['graphs'] return [] - def getgraphsfromdir(self, path=None): - """Get the files that are part of the repository (tracked or not). - - Returns: - A list of filepathes. - """ - if path is None: - path = self.getRepoPath() - - exclude = set(['.git']) - - graphfiles = {} - for dirpath, dirs, files in walk(path): - dirs[:] = [d for d in dirs if d not in exclude] - for filename in files: - - format = guess_format(join(dirpath, filename)) - if format is not None: - graphfiles[filename] = format - - return graphfiles + def get_blobs_from_repository(self, rev): + """Analyze all blobs of a revision. - def hasFeature(self, flags): - return flags == (self.features & flags) - - def setConfigMode(self, mode): - self.sysconf.remove((None, self.quit.configMode, None)) - self.sysconf.add((self.quit.Store, self.quit.configMode, Literal(mode))) - - return - - def setGitOrigin(self, origin): - self.sysconf.remove((None, self.quit.origin, None)) - self.sysconf.add((self.quit.Store, self.quit.origin, Literal(origin))) - - return + Returns + ------- + A triple (dictionary, list, dictionary) + dict: containg names of rdf-files plus their format and oid of graph file. + list: containing names of config files. + dict: containing names rdf files plus format and oid. - def setRepoPath(self, path): - self.sysconf.remove((None, self.quit.pathOfGitRepo, None)) - self.sysconf.add((self.quit.Store, self.quit.pathOfGitRepo, Literal(path))) - - return - - def getBindings(self): - ns = Namespace('http://quit.aksw.org/vocab/') - q = """SELECT DISTINCT ?prefix ?namespace WHERE {{ - {{ - ?ns a <{binding}> ; - <{predicate_prefix}> ?prefix ; - <{predicate_namespace}> ?namespace . - }} - }}""".format( - binding=ns['Binding'], predicate_prefix=ns['prefix'], - predicate_namespace=ns['namespace'] - ) - - result = self.sysconf.query(q) - return [(row['prefix'], row['namespace']) for row in result] + """ +<<<<<<< HEAD + config_files = [] + graph_files = {} + graph_file_blobs = {} + rdf_file_blobs = {} + try: + commit = self.repository.revparse_single(rev) + except Exception: + return graph_files, config_files, rdf_file_blobs +======= + commit = self.repository.revparse_single(rev) + config_files_count = 0 + graph_files_count = 0 + relevant_blobs = {} +>>>>>>> 3db2d87... Work on graph management + + # Collect graph files, rdf files and config files + for entry in commit.tree: + if entry.type == 'blob': + print(entry.name, entry.id) + format = guess_format(entry.name) + if format is None and entry.name.endswith('.graph'): + graph_files_count += 1 + relevant_blobs[entry.name] = (str(entry.id)) + elif format is not None and format in ['nquads', 'nt']: + relevant_blobs[entry.name] = (str(entry.id), format) + elif format is not None and entry.name == 'config.ttl': + config_files_count += 1 + relevant_blobs[entry.name] = (str(entry.id)) + + # collect pairs of rdf files and graph files + graphfiles_configured = [] + for filename in relevant_blobs.keys(): + if filename + '.graph' in relevant_blobs.keys(): + graphfiles_configured.append(filename) + + return graph_files_count, config_files_count, graphfiles_configured, relevant_blobs diff --git a/quit/core.py b/quit/core.py index 69951b91..de7c72b9 100644 --- a/quit/core.py +++ b/quit/core.py @@ -2,20 +2,24 @@ import logging +from copy import copy + from pygit2 import GIT_MERGE_ANALYSIS_UP_TO_DATE from pygit2 import GIT_MERGE_ANALYSIS_FASTFORWARD from pygit2 import GIT_MERGE_ANALYSIS_NORMAL from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT from rdflib import Graph, ConjunctiveGraph, BNode, Literal -from rdflib.plugins.serializers.nquads import _nq_row as _nq -from quit.conf import Feature +from quit.conf import Feature, QuitGraphConfiguration +from quit.helpers import applyChangeset from quit.namespace import RDFS, FOAF, XSD, PROV, QUIT, is_a from quit.graphs import RewriteGraph, InMemoryAggregatedGraph from quit.utils import graphdiff, git_timestamp from quit.cache import Cache, FileReference +from urllib.parse import quote_plus + import subprocess logger = logging.getLogger('quit.core') @@ -98,6 +102,7 @@ def __init__(self, config, repository, store): self.store = store self._commits = Cache() self._blobs = Cache() + self._graphconfigs = Cache() def _exists(self, cid): uri = QUIT['commit-' + cid] @@ -290,7 +295,10 @@ def changeset(self, commit): g.addN((s, p, o, op_uri) for s, p, o in triples) # Entities - map = self.config.getgraphurifilemap() + if commit.id not in self._graphconfigs: + self.updateGraphConfig(commit.id) + + map = self._graphconfigs.get(commit.id).getgraphurifilemap() for entity in commit.node().entries(recursive=True): # todo check if file was changed @@ -299,7 +307,7 @@ def changeset(self, commit): if entity.name not in map.values(): continue - graphUris = self.config.getgraphuriforfile(entity.name) + graphUris = self._graphconfigs.get(commit.id).getgraphuriforfile(entity.name) graphsFromConfig = set((Graph(identifier=i) for i in graphUris)) blob = (entity.name, entity.oid) @@ -357,10 +365,14 @@ def getFilesForCommit(self, commit): On Cache miss this method also updates the commits cache. """ - uriFileMap = self.config.getgraphurifilemap() if commit.id not in self._commits: + if commit.id not in self._graphconfigs: + self.updateGraphConfig(commit.id) + + uriFileMap = self._graphconfigs.get(commit.id).getgraphurifilemap() blobs = set() + for entity in commit.node().entries(recursive=True): if entity.is_file: if entity.name not in uriFileMap.values(): @@ -372,25 +384,27 @@ def getFilesForCommit(self, commit): return self._commits.get(commit.id) def getFileReferenceAndContext(self, blob, commit): - """Get the FielReference and Context for a given blob (name, oid) of a commit. + """Get the FileReference and Context for a given blob (name, oid) of a commit. - On Cache miss this method also updates teh commits cache. + On Cache miss this method also updates the commits cache. """ - uriFileMap = self.config.getgraphurifilemap() + if commit.id not in self._graphconfigs: + self.updateGraphConfig(commit.id) + + uriFileMap = self._graphconfigs.get(commit.id).getgraphurifilemap() if blob not in self._blobs: (name, oid) = blob content = commit.node(path=name).content # content = self.repository._repository[oid].data - graphUris = self.config.getgraphuriforfile(name) + graphUris = self._graphconfigs.get(commit.id).getgraphuriforfile(name) graphsFromConfig = set((Graph(identifier=i) for i in graphUris)) tmp = ConjunctiveGraph() tmp.parse(data=content, format='nquads') contexts = set((context for context in tmp.contexts(None) if context.identifier in uriFileMap)) | graphsFromConfig quitWorkingData = (FileReference(name, content), contexts) - self._blobs.set( - blob, quitWorkingData) + self._blobs.set(blob, quitWorkingData) return quitWorkingData return self._blobs.get(blob) @@ -428,19 +442,77 @@ def build_message(message, kwargs): out.append('{}: "{}"'.format(k, v.replace('"', "\\\""))) return "\n".join(out) - def _apply(f, changeset, identifier): - """Update the FileReference (graph uri) of a file with help of the changeset.""" - for (op, triples) in changeset: - if op == 'additions': - for triple in triples: - # the internal _nq serializer appends '\n' - line = _nq(triple, identifier).rstrip() - f.add(line) - elif op == 'removals': - for triple in triples: - # the internal _nq serializer appends '\n' - line = _nq(triple, identifier).rstrip() - f.remove(line) + def prepare_commit(): + removed = set() + touched = set() + commit_objects = {} + + # find all named graphs that occur in update + for update in delta: + if update['type'] in ['ADD', 'CLEAR', 'CREATE', 'COPY']: + touched.add(update['graph']) + elif update == 'COPY': + touched.add(update['src_graph']) + touched.add(update['dst_graph']) + elif update['type'] in ['DROP', 'MOVE']: + removed.add(update['src_graph']) + touched.add(update['dst_graph']) + else: + for identifier, changeset in update['delta'].items(): + touched.add(identifier) + + file_map = graphconfig.files + graphs = graphconfig.graphs + + for identifier in set(touched | removed): + file_name = graphconfig.getfileforgraphuri(str(identifier)) + if file_name: + file_oid = file_map[file_name]['oid'] + blob = (file_name, file_oid) + else: # unknown named graph + fr = FileReference(quote_plus(str(identifier)), '') + + # commit_objects[identifier] = { + # } + + def _applyKnownGraphs(delta, blobs): + blobs_new = set() + for blob in blobs: + (fileName, oid) = blob + try: + file_reference, contexts = self.getFileReferenceAndContext(blob, parent_commit) + for context in contexts: + for entry in delta: + changeset = entry.get(context.identifier, None) + + if changeset: + applyChangeset(file_reference, changeset, context.identifier) + del(entry[context.identifier]) + + index.add(file_reference.path, file_reference.content) + + self._blobs.remove(blob) + blob = fileName, index.stash[file_reference.path][0] + self._blobs.set(blob, (file_reference, contexts)) + blobs_new.add(blob) + except KeyError: + pass + return blobs_new + + def _applyUnknownGraphs(delta): + new_contexts = {} + for entry in delta: + for identifier, changeset in entry.items(): + if isinstance(identifier, BNode) or str(identifier) == 'default': + continue # TODO default graph + + fileName = quote_plus(identifier + '.nq') + if identifier not in new_contexts.keys(): + new_contexts[identifier] = FileReference(fileName, '') + + fileReference = new_contexts[identifier] + applyChangeset(fileReference, changeset, identifier) + return new_contexts if not delta: return @@ -460,40 +532,72 @@ def _apply(f, changeset, identifier): pass index = self.repository.index(parent_commit_id) - for blob in blobs: - (fileName, oid) = blob - try: - file_reference, contexts = self.getFileReferenceAndContext(blob, parent_commit) - for context in contexts: - for entry in delta: - changeset = entry.get(context.identifier, None) - - if changeset: - _apply(file_reference, changeset, context.identifier) - del(entry[context.identifier]) + if parent_commit_id not in self._graphconfigs: + self.updateGraphConfig(parent_commit_id) - index.add(file_reference.path, file_reference.content) + graphconfig = self._graphconfigs.get(parent_commit_id) - self._blobs.remove(blob) - blob = fileName, index.stash[file_reference.path][0] - self._blobs.set(blob, (file_reference, contexts)) - blobs_new.add(blob) - except KeyError: - pass - - unassigned = set() - f_name = self.config.getGlobalFile() or 'unassigned.nq' - f_new = FileReference(f_name, "") - for entry in delta: - for identifier, changeset in entry.items(): - unassigned.add(graph.store.get_context(identifier)) - _apply(f_new, changeset, graph.store.identifier) - - index.add(f_new.path, f_new.content) - - blob = f_name, index.stash[f_new.path][0] - self._blobs.set(blob, (f_new, unassigned)) - blobs_new.add(blob) + try: + known_blobs = self.getFilesForCommit(parrent_commit) + except KeyError: + known_blobs = [] + + known_blobs_dict = {} + for (blob_name, blob_oid) in known_blobs: + known_blobs_dict[blob_name] = blob_oid + + # blobs_new = _applyKnownGraphs(delta, blobs) + # new_contexts = _applyUnknownGraphs(delta) + new_config = copy(graphconfig) + prepare_commit() + + # new_file_references = {} + # blobs_new = set + # for update in delta: + # if update['type'] in ['ADD', 'CLEAR', 'CREATE', 'DROP', 'MOVE', 'COPY']: + # continue # TODO + # else: + # for identifier, changeset in update['delta'].items(): + # if identifier in known_blobs_dict.keys(): + # file_name = new_config.getfileforgraphuri(identifier) + # id = known_blobs_dict[file_name] + # blob = (file_name, id) + # file_reference, contexts = self.getFileReferenceAndContext(blob, commit) + # applyChangeset(file_reference, changeset, identifier) + # index.add(file_reference.path, file_reference.content) + # self._blobs.remove(blob) + # new_blob = file_name, index.stash[file_reference.path][0] + # self._blobs.set(new_blob, (file_reference, contexts)) + # else: + # if isinstance(identifier, BNode) or str(identifier) == 'default': + # continue # TODO default graph + # + # file_name = quote_plus(identifier + '.nq') + # if identifier not in new_file_references.keys(): + # new_file_references[identifier] = FileReference(file_name, '') + # + # file_reference = new_file_references[identifier] + # applyChangeset(file_reference, changeset, identifier) + # index.add(file_reference.path, file_reference.content) + + + for identifier, fileReference in new_file_references.items(): + # Add new blobs to repo + index.add(fileReference.path, fileReference.content) + if graphconfig.mode == 'graphfiles': + index.add(fileReference.path + '.graph', identifier) + + # Update config + new_config.addgraph(identifier, fileReference.path, 'nquads') + # Update Cache and add new contexts to store + blob = fileReference.path, index.stash[fileReference.path][0] + tmpRepo = ConjunctiveGraph() + tmpRepo.parse(format='nquads', data=fileReference.content) + # contexts = set(context for context in tmpRepo.contexts(None)) + self._blobs.set(blob, (fileReference, set(tmpRepo.get_context(identifier)))) + blobs_new.add(blob) + if graphconfig.mode == 'configuration': + index.add('config.ttl', new_config.graphconf.serialize(format='turtle').decode()) message = build_message(message, kwargs) author = self.repository._repository.default_signature @@ -528,3 +632,9 @@ def garbagecollection(self): except Exception as e: logger.debug('Git garbage collection failed to spawn.') logger.debug(e) + + def updateGraphConfig(self, commitId): + """Update the graph configuration for a given commit id.""" + graphconf = QuitGraphConfiguration(self.repository._repository) + graphconf.initgraphconfig(commitId) + self._graphconfigs.set(commitId, graphconf) diff --git a/quit/helpers.py b/quit/helpers.py index c466bb23..1e18a5f4 100644 --- a/quit/helpers.py +++ b/quit/helpers.py @@ -7,6 +7,7 @@ from rdflib.plugins.sparql.parserutils import CompValue, plist from rdflib.plugins.sparql.parser import parseQuery, parseUpdate from quit.tools.algebra import translateQuery, translateUpdate +from rdflib.plugins.serializers.nquads import _nq_row as _nq from rdflib.plugins.sparql import parser, algebra from rdflib.plugins import sparql from uritools import urisplit @@ -106,6 +107,23 @@ def evalUpdate(self, querystring, graph): return +def applyChangeset(f, changeset, identifier): + """Update the FileReference (graph uri) of a file with help of the changeset.""" + for (op, triples) in changeset: + if op == 'additions': + for triple in triples: + # the internal _nq serializer appends '\n' + line = _nq(triple, identifier).rstrip() + f.add(line) + elif op == 'removals': + for triple in triples: + # the internal _nq serializer appends '\n' + line = _nq(triple, identifier).rstrip() + f.remove(line) + elif op == 'CREATE': + print('erkannt') + + def isAbsoluteUri(uri): """Check if a URI is a absolute URI and uses 'http(s)' at protocol part. diff --git a/quit/tools/update.py b/quit/tools/update.py index c181615b..bb0251f7 100644 --- a/quit/tools/update.py +++ b/quit/tools/update.py @@ -16,7 +16,8 @@ from itertools import tee from quit.exceptions import UnSupportedQuery -def _append(dct, identifier, action, items): + +def _appendChangesets(dct, identifier, action, items): if items: if not isinstance(identifier, Node): identifier = URIRef(identifier) @@ -51,32 +52,30 @@ def evalLoad(ctx, u): """ http://www.w3.org/TR/sparql11-update/#load """ - res = {} - res["type_"] = "LOAD" - res["graph"] = u.iri - if u.graphiri: ctx.load(u.iri, default=False, publicID=u.graphiri) else: ctx.load(u.iri, default=True) - return res - def evalCreate(ctx, u): """ http://www.w3.org/TR/sparql11-update/#create """ - g = ctx.datset.get_context(u.graphiri) + g = ctx.dataset.get_context(u.graphiri) if len(g) > 0: raise Exception("Graph %s already exists." % g.identifier) - raise Exception("Create not implemented!") + # raise Exception("Create not implemented!") def evalClear(ctx, u): """ http://www.w3.org/TR/sparql11-update/#clear """ + res = {} + res['type'] = "CLEAR" + res["graph"] = u.graphiri + for g in _graphAll(ctx, u.graphiri): g.remove((None, None, None)) @@ -98,14 +97,14 @@ def evalInsertData(ctx, u): """ res = {} - res["type_"] = "INSERT" - res["delta"] = {} + res['type'] = "INSERT" + res['delta'] = {} # add triples g = ctx.graph filled = list(filter(lambda triple: triple not in g, u.triples)) if filled: - _append(res["delta"], 'default', 'additions', filled) + _appendChangesets(res['delta'], 'default', 'additions', filled) g += filled # add quads @@ -114,7 +113,7 @@ def evalInsertData(ctx, u): cg = ctx.dataset.get_context(g) filledq = list(filter(lambda triple: triple not in cg, u.quads[g])) if filledq: - _append(res["delta"], cg.identifier, 'additions', filledq) + _appendChangesets(res['delta'], cg.identifier, 'additions', filledq) cg += filledq return res @@ -125,14 +124,14 @@ def evalDeleteData(ctx, u): http://www.w3.org/TR/sparql11-update/#deleteData """ res = {} - res["type_"] = "DELETE" - res["delta"] = {} + res['type'] = "DELETE" + res['delta'] = {} # remove triples g = ctx.graph filled = list(filter(lambda triple: triple in g, u.triples)) if filled: - _append(res["delta"], 'default', 'removals', filled) + _appendChangesets(res['delta'], 'default', 'removals', filled) g -= filled # remove quads @@ -141,7 +140,7 @@ def evalDeleteData(ctx, u): cg = ctx.dataset.get_context(g) filledq = list(filter(lambda triple: triple in cg, u.quads[g])) if filledq: - _append(res["delta"], cg.identifier, 'removals', filledq) + _appendChangesets(res['delta'], cg.identifier, 'removals', filledq) cg -= filledq return res @@ -153,8 +152,8 @@ def evalDeleteWhere(ctx, u): """ res = {} - res["type_"] = "DELETEWHERE" - res["delta"] = {} + res['type'] = "DELETEWHERE" + res['delta'] = {} _res = evalBGP(ctx, u.triples) for g in u.quads: @@ -165,13 +164,13 @@ def evalDeleteWhere(ctx, u): for c in _res: g = ctx.graph filled, filled_delta = tee(_fillTemplate(u.triples, c)) - _append(res["delta"], 'default', 'removals', list(filled_delta)) + _appendChangesets(res['delta'], 'default', 'removals', list(filled_delta)) g -= filled for g in u.quads: cg = ctx.dataset.get_context(c.get(g)) filledq, filledq_delta = tee(_fillTemplate(u.quads[g], c)) - _append(res["delta"], cg.identifier, 'removals', list(filledq_delta)) + _appendChangesets(res['delta'], cg.identifier, 'removals', list(filledq_delta)) cg -= filledq return res @@ -181,8 +180,8 @@ def evalModify(ctx, u): originalctx = ctx res = {} - res["type_"] = "MODIFY" - res["delta"] = {} + res['type'] = "MODIFY" + res['delta'] = {} # Using replaces the dataset for evaluating the where-clause if u.using: @@ -237,24 +236,24 @@ def evalModify(ctx, u): dg = ctx.graph if u.delete: filled, filled_delta = tee(_fillTemplate(u.delete.triples, c)) - _append(res["delta"], graphName, 'removals', list(filled_delta)) + _appendChangesets(res['delta'], graphName, 'removals', list(filled_delta)) dg -= filled for g, q in u.delete.quads.items(): cg = ctx.dataset.get_context(c.get(g)) filledq, filledq_delta = tee(_fillTemplate(q, c)) - _append(res["delta"], cg.identifier, 'removals', list(filledq_delta)) + _appendChangesets(res['delta'], cg.identifier, 'removals', list(filledq_delta)) cg -= filledq if u.insert: filled, filled_delta = tee(_fillTemplate(u.insert.triples, c)) - _append(res["delta"], graphName, 'additions', list(filled_delta)) + _appendChangesets(res['delta'], graphName, 'additions', list(filled_delta)) dg += filled for g, q in u.insert.quads.items(): cg = ctx.dataset.get_context(c.get(g)) filledq, filledq_delta = tee(_fillTemplate(q, c)) - _append(res["delta"], cg.identifier, 'additions', list(filledq_delta)) + _appendChangesets(res['delta'], cg.identifier, 'additions', list(filledq_delta)) cg += filledq return res @@ -360,35 +359,43 @@ def evalUpdate(graph, update, initBindings=None, actionLog=False): try: if u.name == 'Load': - result = evalLoad(ctx, u).get('delta', None) - if result: - res.append(result) + result = evalLoad(ctx, u) + res.append({'type': 'LOAD', 'graph': u.graphiri}) elif u.name == 'Clear': evalClear(ctx, u) + res.append({'type': 'CLEAR', 'graph': u.graphiri}) elif u.name == 'Drop': evalDrop(ctx, u) + res.append({'type': 'DROP', 'graph': u.graphiri}) elif u.name == 'Create': evalCreate(ctx, u) + res.append({'type': 'CREATE', 'graph': u.graphiri}) elif u.name == 'Add': evalAdd(ctx, u) + src, dst = u.graph + res.append({'type': 'ADD', 'src_graph': src, 'dst_graph': dst}) elif u.name == 'Move': evalMove(ctx, u) + src, dst = u.graph + res.append({'type': 'MOVE', 'src_graph': src, 'dst_graph': dst}) elif u.name == 'Copy': evalCopy(ctx, u) + src, dst = u.graph + res.append({'type': 'COPY', 'src_graph': src, 'dst_graph': dst}) elif u.name == 'InsertData': - result = evalInsertData(ctx, u).get('delta', None) + result = evalInsertData(ctx, u) if result: res.append(result) elif u.name == 'DeleteData': - result = evalDeleteData(ctx, u).get('delta', None) + result = evalDeleteData(ctx, u) if result: res.append(result) elif u.name == 'DeleteWhere': - result = evalDeleteWhere(ctx, u).get('delta', None) + result = evalDeleteWhere(ctx, u) if result: res.append(result) elif u.name == 'Modify': - result = evalModify(ctx, u).get('delta', None) + result = evalModify(ctx, u) if result: res.append(result) else: diff --git a/quit/web/app.py b/quit/web/app.py index bc3689ed..95587128 100644 --- a/quit/web/app.py +++ b/quit/web/app.py @@ -107,9 +107,6 @@ def register_app(app, config): content = quit.store.store.serialize(format='trig').decode() logger.debug("Initialize store with following content: {}".format(content)) - logger.debug("Initialize store with following graphs: {}".format( - quit.config.getgraphurifilemap()) - ) app.config['quit'] = quit app.config['blame'] = Blame(quit) diff --git a/quit/web/modules/endpoint.py b/quit/web/modules/endpoint.py index 3b7c8a36..ddfac470 100644 --- a/quit/web/modules/endpoint.py +++ b/quit/web/modules/endpoint.py @@ -98,7 +98,7 @@ def sparql(branch_or_ref): logger.exception(e) return make_response('No branch or reference given.', 400) - if queryType in ['InsertData', 'DeleteData', 'Modify', 'DeleteWhere']: + if queryType in ['InsertData', 'DeleteData', 'Modify', 'DeleteWhere', 'Create']: res, exception = graph.update(parsedQuery) try: diff --git a/tests/helpers.py b/tests/helpers.py index 1e87a3a6..294e2620 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -2,6 +2,7 @@ from pygit2 import init_repository, clone_repository, Signature from os import path, walk from os.path import join +from rdflib import Graph def createCommit(repository, message=None): @@ -95,8 +96,43 @@ def withGraph(self, graphUri, graphContent=None): return tmpRepo - def withGraphs(self, graphUriContentDict): + def noConfigInformations(self, graphContent=''): + """Give a TemporaryRepository() initialized with a graph with the given content (and one commit).""" + tmpRepo = TemporaryRepository() + + # Add a graph.nq and a graph.nq.graph file + with open(path.join(tmpRepo.repo.workdir, "graph.nq"), "w") as graphFile: + if graphContent: + graphFile.write(graphContent) + + # Add and Commit the empty graph + index = tmpRepo.repo.index + index.read() + index.add("graph.nq") + index.write() + + # Create commit + tree = index.write_tree() + message = "init" + tmpRepo.repo.create_commit('HEAD', self.author, self.comitter, message, tree, []) + + return tmpRepo + + def withGraphs(self, graphUriContentDict, mode='graphfiles'): """Give a TemporaryRepository() initialized with a dictionary of graphUris and content (nq).""" + uristring = '' + configFileContent = """@base . + @prefix conf: . + + conf:store a ; + "git://github.com/aksw/QuitStore.git" ; + "{}" . + {}""" + + graphResource = """conf:graph{} a ; + <{}> ; + "{}" .""" + tmpRepo = TemporaryRepository() index = tmpRepo.repo.index index.read() @@ -108,14 +144,24 @@ def withGraphs(self, graphUriContentDict): if graphContent: graphFile.write(graphContent) - # Set Graph URI to http://example.org/ - with open(path.join(tmpRepo.repo.workdir, filename + ".graph"), "w") as graphFile: - graphFile.write(graphUri) + if mode == 'graphfiles': + # Set Graph URI to http://example.org/ + with open(path.join(tmpRepo.repo.workdir, filename + ".graph"), "w") as graphFile: + graphFile.write(graphUri) + index.add(filename + '.graph') + elif mode == 'configfile': + uristring += graphResource.format(i, graphUri, filename) # Add and Commit the empty graph index.add(filename) - index.add(filename + '.graph') i += 1 + if mode == 'configfile': + graph = Graph() + with open(path.join(tmpRepo.repo.workdir, "config.ttl"), "w") as configFile: + rdf_content = configFileContent.format(tmpRepo.repo.workdir, uristring) + graph.parse(format='turtle', data=rdf_content) + configFile.write(graph.serialize(format='turtle').decode()) + index.add('config.ttl') index.write() diff --git a/tests/test_app.py b/tests/test_app.py index 5dfa7f47..1c6c125d 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -2,6 +2,7 @@ import os from os import path +from urllib.parse import quote_plus from datetime import datetime from pygit2 import GIT_SORT_TOPOLOGICAL, Signature import quit.application as quitApp @@ -522,7 +523,6 @@ def testSelectFromNamed(self): "p": {'type': 'uri', 'value': 'urn:y'}, "o": {'type': 'uri', 'value': 'urn:z'}}) - def testQueryProvenanceViaGet(self): # Prepate a git Repository content = ' .' @@ -637,7 +637,9 @@ def testQueryProvenanceViaPostDirectly(self): response = app.post('/provenance', data=self.update, headers=headers) self.assertEqual(response.status_code, 400) + class QuitAppTestCase(unittest.TestCase): + """Test API and synchronization of Store and Git.""" author = Signature('QuitStoreTest', 'quit@quit.aksw.org') comitter = Signature('QuitStoreTest', 'quit@quit.aksw.org') @@ -3191,5 +3193,123 @@ def testWithOnInsertUsing(self): self.assertEqual('\n', f.read()) +class GraphManagementTests(unittest.TestCase): + def testCreateGraph(self): + """Test if a new empty graph file is added. + + 1. Prepare a git repository with an empty and a non empty graph + 2. Start Quit + 3. execute Update query + 4. check filesystem for new .nq and .nq.graph file with expected content + """ + # Prepate a git Repository + content = ' .\n' + repoContent = {'http://example.org/': content} + with TemporaryRepositoryFactory().withGraphs(repoContent) as repo: + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles']) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + filename = quote_plus('http://aksw.org/') + '.nq' + + # execute UPDATE query + update = 'CREATE GRAPH ' + app.post('/sparql', + content_type="application/sparql-update", + data=update) + + with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f: + self.assertEqual(' .\n', f.read()) + + +class FileHandlingTests(unittest.TestCase): + def testNewNamedGraph(self): + """Test if a new graph is added to the repository. + + 1. Prepare a git repository with an empty and a non empty graph + 2. Start Quit + 3. execute Update query + 4. check filesystem for new .nq and .nq.graph file with expected content + """ + # Prepate a git Repository + content = ' .\n' + repoContent = {'http://example.org/': content} + with TemporaryRepositoryFactory().withGraphs(repoContent) as repo: + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles']) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + filename = quote_plus('http://aksw.org/') + '.nq' + + self.assertFalse(path.isfile(path.join(repo.workdir, filename))) + self.assertFalse(path.isfile(path.join(repo.workdir, filename + '.graph'))) + + # execute UPDATE query + update = 'INSERT DATA { GRAPH { . } }' + app.post('/sparql', + content_type="application/sparql-update", + data=update) + + with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f: + self.assertEqual(' .\n', f.read()) + with open(path.join(repo.workdir, filename), 'r') as f: + self.assertEqual(' .\n', f.read()) + with open(path.join(repo.workdir, filename + '.graph'), 'r') as f: + self.assertEqual('http://aksw.org/', f.read()) + + def testNewNamedGraphConfigfile(self): + """Test if a new graph is added to the repository. + + 1. Prepare a git repository with an empty and a non empty graph + 2. Start Quit + 3. execute Update query + 4. check filesystem and configfile content (before/after) + """ + # Prepate a git Repository + content = ' .\n' + repoContent = {'http://example.org/': content} + with TemporaryRepositoryFactory().withGraphs(repoContent, 'configfile') as repo: + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles']) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + + with open(path.join(repo.workdir, 'config.ttl'), 'r') as f: + configfile_before = f.read() + + # execute DELETE INSERT WHERE query + update = 'INSERT DATA { GRAPH { . } }' + app.post('/sparql', + content_type="application/sparql-update", + data=update) + + filename = quote_plus('http://aksw.org/') + '.nq' + + with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f: + self.assertEqual(' .\n', f.read()) + with open(path.join(repo.workdir, filename), 'r') as f: + self.assertEqual(' .\n', f.read()) + with open(path.join(repo.workdir, 'config.ttl'), 'r') as f: + configfile_after = f.read() + + config_before = [x.strip() for x in configfile_before.split('\n')] + config_after = [x.strip() for x in configfile_after.split('\n')] + diff = list(set(config_after) - set(config_before)) + + self.assertFalse('ns1:graphFile "' + filename + '" ;' in config_before) + self.assertFalse('ns1:hasFormat "nquads" .' in config_before) + self.assertFalse('ns1:graphUri ;' in config_before) + + self.assertTrue('ns1:graphFile "' + filename + '" ;' in diff) + self.assertTrue('ns1:hasFormat "nquads" .' in diff) + self.assertTrue('ns1:graphUri ;' in diff) + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_conf.py b/tests/test_conf.py index 53e57fa8..db78c331 100644 --- a/tests/test_conf.py +++ b/tests/test_conf.py @@ -6,280 +6,137 @@ from os.path import join, isdir from pygit2 import init_repository, Repository, clone_repository from pygit2 import GIT_SORT_TOPOLOGICAL, GIT_SORT_REVERSE, Signature -from quit.conf import QuitConfiguration +from quit.conf import QuitStoreConfiguration, QuitGraphConfiguration from quit.exceptions import MissingConfigurationError, InvalidConfigurationError from quit.exceptions import MissingFileError from distutils.dir_util import copy_tree, remove_tree +from helpers import TemporaryRepository, TemporaryRepositoryFactory from tempfile import TemporaryDirectory, NamedTemporaryFile import rdflib -class TestConfiguration(unittest.TestCase): - - def setUp(self): - self.ns = 'http://quit.instance/' - self.testData = './tests/samples/configuration_test' - self.local = './tests/samples/local' - self.remote = '.tests/samples/remote' - copy_tree(self.testData, self.local) - copy_tree(self.testData, self.remote) - self.localConfigFile = join(self.local, 'config.ttl') - self.remoteConfigFile = join(self.local, 'config.ttl') - tempRepoLine = ' "' + self.local + '" .' - - with open(self.localConfigFile) as f: - content = f.readlines() - - remove(self.localConfigFile) - - with open(self.localConfigFile, 'w+') as f: - for line in content: - if line.startswith('