#! /usr/bin/python """ $Id: llyn.py,v 1.120 2004/10/28 17:41:59 timbl Exp $ RDF Store and Query engine Logic Lookup: Yet another Name (also, in Wales, a lake - a storage area at the centre of the valley?) This is an engine which knows a certian amount of stuff and can manipulate it. It is a (forward chaining) query engine, not an (backward chaining) inference engine: that is, it will apply all rules it can but won't figure out which ones to apply to prove something. It is not optimized particularly. Used by cwm - the closed world machine. See: http://www.w3.org/DesignIssues/Notation3 Interfaces ========== This store stores many formulae, where one formula is what in straight RDF implementations is known as a "triple store". So look at the Formula class for a triple store interface. See also for comparison, a python RDF API for the Redland library (in C): http://www.redland.opensource.ac.uk/docs/api/index.html and the redfoot/rdflib interface, a python RDF API: http://rdflib.net/latest/doc/triple_store.html Copyright () 2000-2004 World Wide Web Consortium, (Massachusetts Institute of Technology, European Research Consortium for Informatics and Mathematics, Keio University). All Rights Reserved. This work is distributed under the W3C Software License [1] in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. """ # emacsbug="""emacs got confused by long string above@@""" from __future__ import generators # see http://www.amk.ca/python/2.2/index.html#SECTION000500000000000000000 import types import string import re import StringIO import sys import time from warnings import warn import urllib # for log:content import md5, binascii # for building md5 URIs import uripath import notation3 # N3 parsers and generators, and RDF generator from webAccess import urlopenForRDF # http://www.w3.org/2000/10/swap/ # import sax2rdf # RDF1.0 syntax parser to N3 RDF stream import diag # problems importing the tracking flag, and chatty_flag must be explicit it seems diag.tracking from diag import progress, verbosity from term import BuiltIn, LightBuiltIn, \ HeavyBuiltIn, Function, ReverseFunction, \ Literal, Symbol, Fragment, FragmentNil, Term,\ CompoundTerm, List, EmptyList, NonEmptyList, AnonymousNode from OrderedSequence import merge from formula import Formula, StoredStatement from query import think, applyRules, testIncludes import webAccess from webAccess import DocumentAccessError from decimal import Decimal from RDFSink import Logic_NS, RDFSink, forSomeSym, forAllSym from RDFSink import CONTEXT, PRED, SUBJ, OBJ, PARTS, ALL4 from RDFSink import N3_nil, N3_first, N3_rest, OWL_NS, N3_Empty, N3_List, List_NS from RDFSink import RDF_NS_URI from RDFSink import FORMULA, LITERAL, ANONYMOUS, SYMBOL from pretty import Serializer from OrderedSequence import indentString LITERAL_URI_prefix = "data:text/rdf+n3;" Delta_NS = "http://www.w3.org/2004/delta#" cvsRevision = "$Revision: 1.120 $" # Magic resources we know about from RDFSink import RDF_type_URI, DAML_sameAs_URI from why import Because, BecauseBuiltIn, BecauseOfRule, \ BecauseOfExperience, becauseSubexpression, BecauseMerge ,report STRING_NS_URI = "http://www.w3.org/2000/10/swap/string#" META_NS_URI = "http://www.w3.org/2000/10/swap/meta#" INTEGER_DATATYPE = "http://www.w3.org/2001/XMLSchema#integer" FLOAT_DATATYPE = "http://www.w3.org/2001/XMLSchema#double" DECIMAL_DATATYPE = "http://www.w3.org/2001/XMLSchema#decimal" #reason=Namespace("http://www.w3.org/2000/10/swap/reason#") META_mergedWith = META_NS_URI + "mergedWith" META_source = META_NS_URI + "source" META_run = META_NS_URI + "run" doMeta = 0 # wait until we have written the code! :-) class DataObject: """The info about a term in the context of a specific formula It is created by being passed the formula and the term, and is then accessed like a python dictionary of sequences of values. Example: F = myWorkingFormula x = F.theObject(pred=rdfType obj=fooCar) for y in x[color][label] """ def __init__(context, term): self.context = context self.term = term def __getItem__(pred): # Use . or [] ? values = context.objects(pred=pred, subj=self.term) for v in value: yield DataObject(self.context, v) ###################################### Forumula # class IndexedFormula(Formula): """A formula which has indexes to facilitate queries. A formula is either open or closed. Initially, it is open. In this state is may be modified - for example, triples may be added to it. When it is closed, note that a different interned version of itself may be returned. From then on it is a constant. Only closed formulae may be mentioned in statements in other formuale. There is a reopen() method but it is not recommended, and if desperate should only be used immediately after a close(). """ def __init__(self, store, uri=None): Formula.__init__(self, store, uri) self.descendents = None # Placeholder for list of closure under subcontext self.collector = None # Object collecting evidence, if any self._redirections = {} self._newRedirections = {} # not subsituted yet self._index = {} self._index[(None,None,None)] = self.statements self._closureMode = "" self._closureAgenda = [] self._closureAlready = [] self.stayOpen = 0 # If set, works as a knowledegbase, never canonicalized. def statementsMatching(self, pred=None, subj=None, obj=None): """Return a READ-ONLY list of StoredStatement objects matching the parts given For example: for s in f.statementsMatching(pred=pantoneColor): print "We've got one which is ", `s[OBJ]` If none, returns [] """ return self._index.get((pred, subj, obj), []) def contains(self, pred=None, subj=None, obj=None): """Return boolean true iff formula contains statement(s) matching the parts given For example: if f.contains(pred=pantoneColor): print "We've got one statement about something being some color" """ x = self._index.get((pred, subj, obj), []) if x : return 1 return 0 def any(self, subj=None, pred=None, obj=None): """Return None or the value filing the blank in the called parameters. Specifiy exactly two of the arguments. color = f.any(pred=pantoneColor, subj=myCar) somethingRed = f.any(pred=pantoneColor, obj=red) Note difference from the old store.any!! Note SPO order not PSO. To aboid confusion, use named parameters. """ hits = self._index.get((pred, subj, obj), []) if not hits: return None s = hits[0] if pred == None: return s[PRED] if subj == None: return s[SUBJ] if obj == None: return s[OBJ] raise ParameterError("You must give one wildcard") def the(self, subj=None, pred=None, obj=None): """Return None or the value filing the blank in the called parameters This is just like any() except it checks that there is only one answer in the store. It wise to use this when you expect only one. color = f.the(pred=pantoneColor, subj=myCar) redCar = f.the(pred=pantoneColor, obj=red) """ hits = self._index.get((pred, subj, obj), []) if not hits: return None assert len(hits) == 1, """There should only be one match for (%s %s %s). Found: %s""" %(subj, pred, obj, self.each(subj, pred, obj)) s = hits[0] if pred == None: return s[PRED] if subj == None: return s[SUBJ] if obj == None: return s[OBJ] raise parameterError("You must give one wildcard using the()") def each(self, subj=None, pred=None, obj=None): """Return a list of values value filing the blank in the called parameters Examples: colors = f.each(pred=pantoneColor, subj=myCar) for redthing in f.each(pred=pantoneColor, obj=red): ... """ hits = self._index.get((pred, subj, obj), []) if hits == []: return [] if pred == None: wc = PRED elif subj == None: wc = SUBJ elif obj == None: wc = OBJ else: raise ParameterError("You must give one wildcard None for each()") res = [] for s in hits: res.append(s[wc]) # should use yeild @@ when we are ready return res def searchable(self, subj=None, pred=None, obj=None): """A pair of the difficulty of searching and a statement iterator of found statements The difficulty is a store-portable measure of how long the store thinks (in arbitrary units) it will take to search. This will only be used for choisng which part of the query to search first. If it is 0 there is no solution to the query, we know now. In this implementation, we use the length of the sequence to be searched.""" res = self._index.get((pred, subj, obj), []) return len(res), res def add(self, subj, pred, obj, why=None): """Add a triple to the formula. The formula must be open. subj, pred and obj must be objects as for example generated by Formula.newSymbol() and newLiteral(), or else literal values which can be interned. why may be a reason for use when a proof will be required. """ if self.canonical != None: raise RuntimeError("Attempt to add statement to closed formula "+`self`) store = self.store if not isinstance(subj, Term): subj = store.intern(subj) if not isinstance(pred, Term): pred = store.intern(pred) if not isinstance(obj, Term): obj = store.intern(obj) newBindings = {} # Smushing of things which are equal into a single node # Even if we do not do this with owl:sameAs, we do with lists subj = subj.substituteEquals(self._redirections, newBindings) pred = pred.substituteEquals(self._redirections, newBindings) obj = obj.substituteEquals(self._redirections, newBindings) if diag.chatty_flag > 50: progress("Add statement (size before %i, %i statements) to %s:\n {%s %s %s}" % ( self.store.size, len(self.statements),`self`, `subj`, `pred`, `obj`) ) if self.statementsMatching(pred, subj, obj): if diag.chatty_flag > 97: progress("Add duplicate SUPPRESSED %s: {%s %s %s}" % ( self, subj, pred, obj) ) return 0 # Return no change in size of store assert not isinstance(pred, Formula) or pred.canonical is pred, "pred Should be closed"+`pred` assert (not isinstance(subj, Formula) or subj is self or subj.canonical is subj), "subj Should be closed or self"+`subj` assert not isinstance(obj, Formula) or obj.canonical is obj, "obj Should be closed"+`obj` store.size = store.size+1 # rather nominal but should be monotonic # We collapse lists from the declared daml first,rest structure into List objects. # To do this, we need a bnode with (a) a first; (b) a rest, and (c) the rest being a list. # We trigger list collapse on any of these three becoming true. # @@@ we don't reverse this on remove statement. Remove statement is really not a user call. # (Not clear: how t smush symbols without smushing variables. Need separate pytyhon class # for variables I guess as everyone has been saying. # When that happens, expend smushing to symbols.) if pred is store.rest: if isinstance(obj, List) and subj in self._existentialVariables: ss = self.statementsMatching(pred=store.first, subj=subj) if ss: s = ss[0] self.removeStatement(s) first = s[OBJ] list = obj.prepend(first) self._noteNewList(subj, list, newBindings) self.substituteEqualsInPlace(newBindings) return 1 # Added a statement but ... it is hidden in lists elif pred is store.first and subj in self._existentialVariables: ss = self.statementsMatching(pred=store.rest, subj=subj) if ss: s = ss[0] rest = s[OBJ] if isinstance(rest, List): list = rest.prepend(obj) self.removeStatement(s) self._noteNewList(subj, list, newBindings) self.substituteEqualsInPlace(newBindings) return 1 if "e" in self._closureMode: if pred is store.sameAs: if subj is obj: return 0 # ignore a = a if ((subj in self.existentials() and obj not in self.existentials()) or (subj.generated() and not obj.generated()) or Term.compareAnyTerm(obj, subj) < 0): var, val = subj, obj else: var, val = obj, subj newBindings[var] = val if diag.chatty_flag > 90: progress("Equality: %s = %s" % (`var`, `val`)) self.substituteEqualsInPlace(newBindings) return 1 if "T" in self._closureMode: if pred is store.type and obj is store.Truth: assert isinstance(subj, Formula), "What are we doing concluding %s is true?" % subj self.loadFormulaWithSubsitution(subj) ######### if newBindings != {}: self.substituteEqualsInPlace(newBindings) ####### s = StoredStatement((self, pred, subj, obj)) if diag.tracking: if (why == None): raise RuntimeError( "Tracking reasons but no reason given for"+`s`) report(s, why) # Build 8 indexes. # This now takes a lot of the time in a typical cwm run! :-( # I honestly think the above line is a bit pessemistic. The below lines scale. # The above lines do not (removeStatement does not scale) if subj is self: # Catch variable declarations if pred is self.store.forAll: if obj not in self._universalVariables: if diag.chatty_flag > 50: progress("\tUniversal ", obj) self._universalVariables.append(obj) return 1 if pred is self.store.forSome: if obj not in self._existentialVariables: if diag.chatty_flag > 50: progress("\tExistential ", obj) self._existentialVariables.append(obj) return 1 raise ValueError("You cannot use 'this' except as subject of forAll or forSome") self.statements.append(s) list = self._index.get((None, None, obj), None) if list is None: self._index[(None, None, obj)]=[s] else: list.append(s) list = self._index.get((None, subj, None), None) if list is None: self._index[(None, subj, None)]=[s] else: list.append(s) list = self._index.get((None, subj, obj), None) if list is None: self._index[(None, subj, obj)]=[s] else: list.append(s) list = self._index.get((pred, None, None), None) if list is None: self._index[(pred, None, None)]=[s] else: list.append(s) list = self._index.get((pred, None, obj), None) if list is None: self._index[(pred, None, obj)]=[s] else: list.append(s) list = self._index.get((pred, subj, None), None) if list is None: self._index[(pred, subj, None)]=[s] else: list.append(s) list = self._index.get((pred, subj, obj), None) if list is None: self._index[(pred, subj, obj)]=[s] else: list.append(s) if self._closureMode != "": self.checkClosure(subj, pred, obj) return 1 # One statement has been added @@ ignore closure extras from closure # Obsolete this return value? @@@ def removeStatement(self, s): """Removes a statement The formula must be open. This implementation is alas slow, as removal of items from tha hash is slow. The above statement is false. Removing items from a hash is easily over five times faster than removing them from a list. Also, truth mainainance is not done. You can't undeclare things equal. This is really a low-level method, used within add() and for cleaning up the store to save space in purge() etc. """ assert self.canonical == None, "Cannot remove statement from canonnical"+`self` self.store.size = self.store.size-1 if diag.chatty_flag > 97: progress("removing %s" % (s)) context, pred, subj, obj = s.quad self.statements.remove(s) self._index[(None, None, obj)].remove(s) self._index[(None, subj, None)].remove(s) self._index[(None, subj, obj)].remove(s) self._index[(pred, None, None)].remove(s) self._index[(pred, None, obj)].remove(s) self._index[(pred, subj, None)].remove(s) self._index[(pred, subj, obj)].remove(s) #raise RuntimeError("The triple is %s: %s %s %s"%(context, pred, subj, obj)) return def canonicalize(F): """If this formula already exists, return the master version. If not, record this one and return it. Call this when the formula is in its final form, with all its statements. Make sure no one else has a copy of the pointer to the smushed one. In canonical form, - the statments are ordered - the lists are all internalized as lists Store dependency: Uses store._formulaeOfLength """ store = F.store if F.canonical != None: if diag.chatty_flag > 70: progress("End formula -- @@ already canonical:"+`F`) return F.canonical if F.stayOpen: if diag.chatty_flag > 70: progress("End formula -- @@ Knowledge base mode, ignoring c'n:"+`F`) return F F.existentialDict = {} for existentialVariable in F.existentials(): F.existentialDict[existentialVariable] = 1 fl = F.statements l = len(fl), len(F.universals()), len(F.existentials()) # The number of statements possibles = store._formulaeOfLength.get(l, None) # Formulae of same length if possibles == None: store._formulaeOfLength[l] = [F] if diag.chatty_flag > 70: progress("End formula - first of length", l, F) F.canonical = F return F fl.sort(StoredStatement.compareSubjPredObj) fe = F.existentials() fe.sort(Term.compareAnyTerm) fu = F.universals () fu.sort(Term.compareAnyTerm) for G in possibles: gl = G.statements gkey = len(gl), len(G.universals()), len(G.existentials()) if gkey != l: raise RuntimeError("@@Key of %s is %s instead of %s" %(G, `gkey`, `l`)) gl.sort(StoredStatement.compareSubjPredObj) for se, oe, in ((fe, G.existentials()), (fu, G.universals())): lse = len(se) loe = len(oe) if lse > loe: return 1 if lse < loe: return -1 oe.sort(Term.compareAnyTerm) for i in range(lse): if se[i] is not oe[i]: break # mismatch else: continue # match break for i in range(l[0]): for p in PRED, SUBJ, OBJ: if (fl[i][p] is not gl[i][p] and (fl[i][p] is not F or gl[i][p] is not G)): # Allow self-reference @@ break # mismatch else: #match one statement continue break else: #match if diag.chatty_flag > 20: progress( "** End Formula: Smushed new formula %s giving old %s" % (F, G)) del(F) # Make sure it ain't used again return G possibles.append(F) # raise oops F.canonical = F if diag.chatty_flag > 70: progress("End formula, a fresh one:"+`F`) return F def reopen(self): """Make a formula which was once closed oopen for input again. NOT Recommended. Dangers: this formula will be, because of interning, the same objet as a formula used elsewhere which happens to have the same content. You mess with this one, you mess with that one. Much better to keep teh formula open until you don't needed it open any more. The trouble is, the parsers close it at the moment automatically. To be fixed.""" return self.store.reopen(self) def setClosureMode(self, x): self._closureMode = x def checkClosure(self, subj, pred, obj): """Check the closure of the formula given new contents The s p o flags cause llyn to follow those parts of the new statement. i asks it to follow owl:imports r ask it to follow doc:rules """ firstCall = (self._closureAgenda == []) if "s" in self._closureMode: self.checkClosureOfSymbol(subj) if "p" in self._closureMode: self.checkClosureOfSymbol(pred) if ("o" in self._closureMode or "t" in self._closureMode and pred is self.store.type): self.checkClosureOfSymbol(obj) if (("r" in self._closureMode and pred is self.store.docRules) or ("i" in self._closureMode and pred is self.store.imports)): # check subject? @@@ semantics? self.checkClosureDocument(obj) if firstCall: while self._closureAgenda != []: x = self._closureAgenda.pop() self._closureAlready.append(x) x.dereference("m" + self._closureMode, self) def checkClosureOfSymbol(self, y): if not isinstance(y, Fragment): return return self.checkClosureDocument(y.resource) def checkClosureDocument(self, x): if x != None and x not in self._closureAlready and x not in self._closureAgenda: self._closureAgenda.append(x) def outputStrings(self, channel=None, relation=None): """Fetch output strings from store, sort and output To output a string, associate (using the given relation) with a key such that the order of the keys is the order in which you want the corresponding strings output. """ if channel == None: channel = sys.stdout if relation == None: relation = self.store.intern((SYMBOL, Logic_NS + "outputString")) list = self.statementsMatching(pred=relation) # List of things of (subj, obj) pairs pairs = [] for s in list: pairs.append((s[SUBJ], s[OBJ])) pairs.sort(comparePair) for key, str in pairs: channel.write(str.string.encode('utf-8')) def debugString(self, already=[]): """A simple dump of a formula in debug form. This formula is dumped, using ids for nested formula. Then, each nested formula mentioned is dumped.""" red = "" if self._redirections != {}: red = " redirections:" + `self._redirections` str = `self`+ red + " is {" for vv, ss in ((self.universals(), "@forAll"),(self.existentials(), "@forSome")): if vv != []: str = str + " " + ss + " " + `vv[0]` for v in vv[1:]: str = str + ", " + `v` str = str + "." todo = [] for s in self.statements: subj, pred, obj = s.spo() str = str + "\n%28s %20s %20s ." % (`subj`, `pred`, `obj`) for p in PRED, SUBJ, OBJ: if (isinstance(s[p], CompoundTerm) and s[p] not in already and s[p] not in todo and s[p] is not self): todo.append(s[p]) str = str+ "}.\n" already = already + todo + [ self ] for f in todo: str = str + " " + f.debugString(already) return str def _noteNewList(self, bnode, list, newBindings): """Note that we have a new list. Check whether this new list (given as bnode) causes other things to become lists. Set up redirection so the list is used from now on instead of the bnode. Internal function. This function is extraordinarily slow, .08 seconds per call on reify/reify3.n3""" if diag.chatty_flag > 80: progress("New list was %s, now %s = %s"%(`bnode`, `list`, `list.value()`)) if isinstance(bnode, List): return ##@@@@@ why is this necessary? weid. newBindings[bnode] = list if diag.chatty_flag > 80: progress("...New list newBindings %s"%(`newBindings`)) if bnode in self._existentialVariables: self._existentialVariables.remove(bnode) possibles = self.statementsMatching(pred=self.store.rest, obj=bnode) # What has this as rest? for s in possibles[:]: L2 = s[SUBJ] ff = self.statementsMatching(pred=self.store.first, subj=L2) if ff != []: first = ff[0][OBJ] self.removeStatement(s) self.removeStatement(ff[0]) list2 = list.prepend(first) self._noteNewList(L2, list2, newBindings) return def substituteEqualsInPlace(self, redirections): """Slow ... does not use indexes""" bindings = redirections while bindings != {}: self._redirections.update(bindings) newBindings = {} for s in self.statements[:]: # take a copy! changed = 0 quad = [self, s[PRED], s[SUBJ], s[OBJ]] for p in PRED, SUBJ, OBJ: x = s[p] y = x.substituteEquals(bindings, newBindings) if y is not x: if diag.chatty_flag>90: progress("Substituted %s -> %s in place" %(x, y)) changed = 1 quad[p] = y if changed: self.removeStatement(s) self.add(subj=quad[SUBJ], pred=quad[PRED], obj=quad[OBJ]) bindings = newBindings if diag.chatty_flag>70: progress("Substitions %s generated %s" %(bindings, newBindings)) return def comparePair(self, other): "Used only in outputString" for i in 0,1: x = self[i].compareAnyTerm(other[i]) if x != 0: return x ############################################################################################### # # C W M - S P E C I A L B U I L T - I N s # ########################################################################### # Equivalence relations class BI_EqualTo(LightBuiltIn,Function, ReverseFunction): def eval(self, subj, obj, queue, bindings, proof, query): return (subj is obj) # Assumes interning def evalObj(self, subj, queue, bindings, proof, query): return subj def evalSubj(self, obj, queue, bindings, proof, query): return obj class BI_notEqualTo(LightBuiltIn): def eval(self, subj, obj, queue, bindings, proof, query): return (subj is not obj) # Assumes interning # Functions class BI_uri(LightBuiltIn, Function, ReverseFunction): # def evaluateObject(self, subject): # return subject.uriref() def evalObj(self, subj, queue, bindings, proof, query): type, value = subj.asPair() if type == SYMBOL: # or type == ANONYMOUS: # @@@@@@ Should not allow anonymous, but test/forgetDups.n3 uses it return self.store.intern((LITERAL, value)) def evaluateSubject(self, object): """Return the object which has this string as its URI #@@hm... check string for URI syntax? # or at least for non-uri chars, such as space? Note that relative URIs can be OK as the whole process has a base, which may be irrelevant. Eg see roadmap-test in retest.sh """ store = self.store try: if ':' not in object: progress("Warning: taking log:uri of non-abs: %s" % object) return None except (TypeError, AttributeError): return None return store.intern((SYMBOL, object)) class BI_rawUri(BI_uri): """This is like uri except that it allows you to get the internal identifiers for anonymous nodes and formuale etc.""" def evalObj(self, subj, queue, bindings, proof, query): type, value = subj.asPair() return self.store.intern((LITERAL, value)) class BI_rawType(LightBuiltIn, Function): """ The raw type is a type from the point of view of the langauge: is it a formula, list, and so on. Needed for test for formula in finding subformulae eg see test/includes/check.n3 """ def evalObj(self, subj, queue, bindings, proof, query): store = self.store if isinstance(subj, Literal): y = store.Literal elif isinstance(subj, Formula): y = store.Formula elif isinstance(subj, List): y = store.List #@@elif context.listValue.get(subj, None): y = store.List else: y = store.Other # None? store.Other? if diag.chatty_flag > 91: progress("%s rawType %s." %(`subj`, y)) return y class BI_racine(LightBuiltIn, Function): # The resource whose URI is the same up to the "#" def evalObj(self, subj, queue, bindings, proof, query): if isinstance(subj, Fragment): return subj.resource else: return subj # Heavy Built-ins class BI_includes(HeavyBuiltIn): """Check that one formula does include the other. This limits the ability to bind a variable by searching inside another context. This is quite a limitation in some ways. @@ fix """ def eval(self, subj, obj, queue, bindings, proof, query): store = subj.store if isinstance(subj, Formula) and isinstance(obj, Formula): return testIncludes(subj, obj, bindings=bindings) # No (relevant) variables return 0 class BI_notIncludes(HeavyBuiltIn): """Check that one formula does not include the other. notIncludes is a heavy function not only because it may take more time than a simple search, but also because it must be performed after other work so that the variables within the object formula have all been subsituted. It makes no sense to ask a notIncludes question with variables, "Are there any ?x for which F does not include foo bar ?x" because of course there will always be an infinite number for any finite F. So notIncludes can only be used to check, when a specific case has been found, that it does not exist in the formula. This means we have to know that the variables do not occur in obj. As for the subject, it does make sense for the opposite reason. If F(x) includes G for all x, then G would have to be infinite. """ def eval(self, subj, obj, queue, bindings, proof, query): store = subj.store if isinstance(subj, Formula) and isinstance(obj, Formula): return not testIncludes(subj, obj, bindings=bindings) # No (relevant) variables return 0 # Can't say it *doesn't* include it if it ain't a formula class BI_semantics(HeavyBuiltIn, Function): """ The semantics of a resource are its machine-readable meaning, as an N3 forumula. The URI is used to find a represnetation of the resource in bits which is then parsed according to its content type.""" def evalObj(self, subj, queue, bindings, proof, query): store = subj.store if isinstance(subj, Fragment): doc = subj.resource else: doc = subj F = store.any((store._experience, store.semantics, doc, None)) if F != None: if diag.chatty_flag > 10: progress("Already read and parsed "+`doc`+" to "+ `F`) return F if diag.chatty_flag > 10: progress("Reading and parsing " + doc.uriref()) inputURI = doc.uriref() F = self.store.load(inputURI) if diag.chatty_flag>10: progress(" semantics: %s" % (F)) if diag.tracking: proof.append(F.collector) return F.canonicalize() class BI_semanticsOrError(BI_semantics): """ Either get and parse to semantics or return an error message on any error """ def evalObj(self, subj, queue, bindings, proof, query): import xml.sax._exceptions # hmm... store = subj.store x = store.any((store._experience, store.semanticsOrError, subj, None)) if x != None: if diag.chatty_flag > 10: progress(`store._experience`+`store.semanticsOrError`+": Already found error for "+`subj`+" was: "+ `x`) return x try: return BI_semantics.evalObj(self, subj, queue, bindings, proof, query) except (IOError, SyntaxError, DocumentAccessError, xml.sax._exceptions.SAXParseException): message = sys.exc_info()[1].__str__() result = store.intern((LITERAL, message)) if diag.chatty_flag > 0: progress(`store.semanticsOrError`+": Error trying to resolve <" + `subj` + ">: "+ message) store.storeQuad((store._experience, store.semanticsOrError, subj, result)) return result class BI_content(HeavyBuiltIn, Function): def evalObj(self, subj, queue, bindings, proof, query): store = subj.store if isinstance(subj, Fragment): doc = subj.resource else: doc = subj C = store.any((store._experience, store.content, doc, None)) if C != None: if diag.chatty_flag > 10: progress("already read " + `doc`) return C if diag.chatty_flag > 10: progress("Reading " + `doc`) inputURI = doc.uriref() try: netStream = urlopenForRDF(inputURI) except IOError: return None str = netStream.read() # May be big - buffered in memory! C = store.intern((LITERAL, str)) store.storeQuad((store._experience, store.content, doc, C)) return C class BI_parsedAsN3(HeavyBuiltIn, Function): def evalObj(self, subj, queue, bindings, proof, query): store = subj.store if isinstance(subj, Literal): F = store.any((store._experience, store.parsedAsN3, subj, None)) if F != None: return F if diag.chatty_flag > 10: progress("parsing " + subj.string[:30] + "...") inputURI = subj.asHashURI() # iffy/bogus... rather asDataURI? yes! but make more efficient p = notation3.SinkParser(store) p.startDoc() p.feed(subj.string.encode('utf-8')) #@@ catch parse errors F = p.endDoc() F = F.close() store._experience.add(subj=subj, pred=store.parsedAsN3, obj=F) return F class BI_conclusion(HeavyBuiltIn, Function): """ Deductive Closure Closure under Forward Inference, equivalent to cwm's --think function. This is a function, so the object is calculated from the subject. """ def evalObj(self, subj, queue, bindings, proof, query): store = subj.store if isinstance(subj, Formula): assert subj.canonical != None F = self.store.any((store._experience, store.cufi, subj, None)) # Cached value? if F != None: if diag.chatty_flag > 10: progress("Bultin: " + `subj`+ " cached log:conclusion " + `F`) return F F = self.store.newFormula() if diag.tracking: reason = BecauseMerge(F, subj) F.collector = reason proof.append(reason) else: reason = None if diag.chatty_flag > 10: progress("Bultin: " + `subj`+ " log:conclusion " + `F`) self.store.copyFormula(subj, F, why=reason) # leave open think(F) F = F.close() assert subj.canonical != None self.store.storeQuad((store._experience, store.cufi, subj, F), why=BecauseOfExperience("conclusion")) # Cache for later return F class BI_conjunction(LightBuiltIn, Function): # Light? well, I suppose so. """ The conjunction of a set of formulae is the set of statements which is just the union of the sets of statements modulo non-duplication of course""" def evalObj(self, subj, queue, bindings, proof, query): subj_py = subj.value() if diag.chatty_flag > 50: progress("Conjunction input:"+`subj_py`) for x in subj_py: progress(" conjunction input formula %s has %i statements" % (x, x.size())) # F = conjunctionCache.get(subj_py, None) # if F != None: return F F = self.store.newFormula() if diag.tracking: reason = BecauseMerge(F, subj_py) F.collector = reason proof.append(reason) else: reason = None for x in subj_py: if not isinstance(x, Formula): return None # Can't self.store.copyFormula(x, F, why=reason) if diag.chatty_flag > 74: progress(" Formula %s now has %i" % (`F`,len(F.statements))) return F.canonicalize() class BI_n3String(LightBuiltIn, Function): # Light? well, I suppose so. """ The n3 string for a formula is what you get when you express it in the N3 language without using any URIs. Note that there is no guarantee that two implementations will generate the same thing, but whatever they generate should parse back using parsedAsN3 to exaclty the same original formula. If we *did* have a canonical form it would be great for signature A canonical form is possisble but not simple.""" def evalObj(self, subj, queue, bindings, proof, query): if diag.chatty_flag > 50: progress("Generating N3 string for:"+`subj`) if isinstance(subj, Formula): return self.store.intern((LITERAL, subj.n3String())) ################################################################################################ class RDFStore(RDFSink) : """ Absorbs RDF stream and saves in triple store """ def clear(self): "Remove all formulas from the store @@@ DOESN'T ACTUALLY DO IT/BROKEN" self.resources = {} # Hash table of URIs for interning things # self.formulae = [] # List of all formulae self._experience = None # A formula of all the things program run knows from direct experience self._formulaeOfLength = {} # A dictionary of all the constant formuale in the store, lookup by length key. self.size = 0 def __init__(self, genPrefix=None, metaURI=None, argv=None, crypto=0): RDFSink.__init__(self, genPrefix=genPrefix) self.clear() self.argv = argv # List of command line arguments for N3 scripts run = uripath.join(uripath.base(), ".RUN/") + `time.time()` # Reserrved URI @@ if metaURI != None: meta = metaURI else: meta = run + "meta#formula" self.reset(meta) # Constants, as interned: self.forSome = self.symbol(forSomeSym) self.integer = self.symbol(INTEGER_DATATYPE) self.float = self.symbol(FLOAT_DATATYPE) self.decimal = self.symbol(DECIMAL_DATATYPE) self.forAll = self.symbol(forAllSym) self.implies = self.symbol(Logic_NS + "implies") self.insertion = self.symbol(Delta_NS + "insertion") self.deletion = self.symbol(Delta_NS + "deletion") self.means = self.symbol(Logic_NS + "means") self.asserts = self.symbol(Logic_NS + "asserts") # Register Light Builtins: log = self.symbol(Logic_NS[:-1]) # The resource without the hash # Functions: log.internFrag("racine", BI_racine) # Strip fragment identifier from string self.rawType = log.internFrag("rawType", BI_rawType) # syntactic type, oneOf: log.internFrag("rawUri", BI_rawUri) self.Literal = log.internFrag("Literal", Fragment) # syntactic type possible value - a class self.List = log.internFrag("List", Fragment) # syntactic type possible value - a class self.Formula = log.internFrag("Formula", Fragment) # syntactic type possible value - a class self.Other = log.internFrag("Other", Fragment) # syntactic type possible value - a class log.internFrag("conjunction", BI_conjunction) # Bidirectional things: log.internFrag("uri", BI_uri) log.internFrag("equalTo", BI_EqualTo) log.internFrag("notEqualTo", BI_notEqualTo) self.sameAs = self.symbol(OWL_NS + "sameAs") # Heavy relational operators: self.includes = log.internFrag( "includes", BI_includes) # log.internFrag("directlyIncludes", BI_directlyIncludes) log.internFrag("notIncludes", BI_notIncludes) # log.internFrag("notDirectlyIncludes", BI_notDirectlyIncludes) #Heavy functions: # log.internFrag("resolvesTo", BI_semantics) # obsolete self.semantics = log.internFrag("semantics", BI_semantics) self.cufi = log.internFrag("conclusion", BI_conclusion) self.semanticsOrError = log.internFrag("semanticsOrError", BI_semanticsOrError) self.content = log.internFrag("content", BI_content) self.parsedAsN3 = log.internFrag("parsedAsN3", BI_parsedAsN3) self.n3ExprFor = log.internFrag("n3ExprFor", BI_parsedAsN3) ## Obsolete log.internFrag("n3String", BI_n3String) # Remote service flag in metadata: self.definitiveService = log.internFrag("definitiveService", Fragment) self.definitiveDocument = log.internFrag("definitiveDocument", Fragment) self.pointsAt = log.internFrag("pointsAt", Fragment) # This was EricP's # Constants: self.Truth = self.symbol(Logic_NS + "Truth") self.Falsehood = self.symbol(Logic_NS + "Falsehood") self.type = self.symbol(RDF_type_URI) self.Chaff = self.symbol(Logic_NS + "Chaff") self.docRules = self.symbol("http://www.w3.org/2000/10/swap/pim/doc#rules") self.imports = self.symbol("http://www.w3.org/2002/07/owl#imports") # List stuff - beware of namespace changes! :-( from cwm_list import BI_first, BI_rest rdf = self.symbol(List_NS[:-1]) self.first = rdf.internFrag("first", BI_first) self.rest = rdf.internFrag("rest", BI_rest) self.nil = self.intern(N3_nil, FragmentNil) self.Empty = self.intern(N3_Empty) self.List = self.intern(N3_List) import cwm_string # String builtins import cwm_os # OS builtins import cwm_time # time and date builtins import cwm_math # Mathematics import cwm_trigo # Trignometry import cwm_times # time and date builtins import cwm_maths # Mathematics, perl/string style import cwm_list # List handling operations cwm_string.register(self) cwm_math.register(self) cwm_trigo.register(self) cwm_maths.register(self) cwm_os.register(self) cwm_time.register(self) cwm_times.register(self) cwm_list.register(self) if crypto: import cwm_crypto # Cryptography cwm_crypto.register(self) # would like to anyway to catch bug if used but not available def newLiteral(self, str, dt=None, lang=None): "Interned version: generate new literal object as stored in this store" key = (str, dt, lang) result = self.resources.get(key, None) if result != None: return result result = Literal(self, str, dt, lang) self.resources[key] = result return result def newFormula(self, uri=None): return IndexedFormula(self, uri) def newSymbol(self, uri): return self.intern(RDFSink.newSymbol(self, uri)) def newBlankNode(self, context, uri=None, why=None): """Create or reuse, in the default store, a new unnamed node within the given formula as context, and return it for future use""" return context.newBlankNode(uri=uri) def newExistential(self, context, uri=None, why=None): """Create or reuse, in the default store, a new named variable existentially qualified within the given formula as context, and return it for future use""" return self.intern(RDFSink.newExistential(self, context, uri, why=why)) def newUniversal(self, context, uri=None, why=None): """Create or reuse, in the default store, a named variable universally qualified within the given formula as context, and return it for future use""" return self.intern(RDFSink.newUniversal(self, context, uri, why=why)) ################### def reset(self, metaURI): # Set the metaURI self._experience = self.newFormula(metaURI + "_formula") assert isinstance(self._experience, Formula) def load(store, uri=None, openFormula=None, asIfFrom=None, contentType=None, remember=1, flags="", referer=None, why=None): """Get and parse document. Guesses format if necessary. uri: if None, load from standard input. remember: if 1, store as metadata the relationship between this URI and this formula. Returns: top-level formula of the parsed document. Raises: IOError, SyntaxError, DocumentError This was and could be an independent function, as it is fairly independent of the store. However, it is natural to call it as a method on the store. And a proliferation of APIs confuses. """ baseURI = uripath.base() if uri != None and openFormula==None and remember: addr = uripath.join(baseURI, uri) # Make abs from relative source = store.newSymbol(addr) F = store._experience.the(source, store.semantics) if F != None: if diag.chatty_flag > 40: progress("Using cached semantics for",addr) return F F = webAccess.load(store, uri, openFormula, asIfFrom, contentType, flags, referer, why) store._experience.add( store.intern((SYMBOL, addr)), store.semantics, F, why=BecauseOfExperience("load document")) return F return webAccess.load(store, uri, openFormula, asIfFrom, contentType, flags, \ referer=referer, why=why) def loadMany(self, uris, openFormula=None, referer=None): """Get, parse and merge serveral documents, given a list of URIs. Guesses format if necessary. Returns top-level formula which is the parse result. Raises IOError, SyntaxError """ assert type(uris) is type([]) if openFormula == None: F = self.newFormula() else: F = openFormula f = F.uriref() for u in uris: F.reopen() # should not be necessary self.load(u, openFormula=F, remember=0, referer=referer) return F.close() def genId(self): """Generate a new identifier This uses the inherited class, but also checks that we haven't for some pathalogical reason ended up generating the same one as for example in another run of the same system. """ while 1: uriRefString = RDFSink.genId(self) hash = string.rfind(uriRefString, "#") if hash < 0 : # This is a resource with no fragment return uriRefString # ?! resid = uriRefString[:hash] r = self.resources.get(resid, None) if r == None: return uriRefString fragid = uriRefString[hash+1:] f = r.fragments.get(fragid, None) if f == None: return uriRefString if diag.chatty_flag > 70: progress("llyn.genid Rejecting Id already used: "+uriRefString) def checkNewId(self, urirefString): """Raise an exception if the id is not in fact new. This is useful because it is usfeul to generate IDs with useful diagnostic ways but this lays them open to possibly clashing in pathalogical cases.""" hash = string.rfind(urirefString, "#") if hash < 0 : # This is a resource with no fragment result = self.resources.get(urirefString, None) if result == None: return else: r = self.resources.get(urirefString[:hash], None) if r == None: return f = r.fragments.get(urirefString[hash+1:], None) if f == None: return raise ValueError("Ooops! Attempt to create new identifier hits on one already used: %s"%(urirefString)) return def internURI(self, str, why=None): warn("use symbol()", DeprecationWarning, stacklevel=3) return self.intern((SYMBOL,str), why) def symbol(self, str, why=None): """Intern a URI for a symvol, returning a symbol object""" return self.intern((SYMBOL,str), why) def _fromPython(self, x, queue=None): """Takem a python string, seq etc and represent as a llyn object""" if isinstance(x, tuple(types.StringTypes)): return self.newLiteral(x) elif type(x) is types.LongType or type(x) is types.IntType: return self.newLiteral(str(x), self.integer) elif isinstance(x, Decimal): return self.newLiteral(str(x), self.decimal) elif type(x) is types.FloatType: if `x`.lower() == "nan": # We can get these form eg 2.math:asin return None return self.newLiteral(`x`, self.float) elif isinstance(x, Term): return x elif hasattr(x,'__getitem__'): #type(x) == type([]): return self.nil.newList([self._fromPython(y) for y in x]) return x def intern(self, what, dt=None, lang=None, why=None, ): """find-or-create a Fragment or a Symbol or Literal or list as appropriate returns URISyntaxError if, for example, the URIref has two #'s. This is the way they are actually made. """ if isinstance(what, Term): return what # Already interned. @@Could mask bugs if type(what) is not types.TupleType: if isinstance(what, tuple(types.StringTypes)): return self.newLiteral(what, dt, lang) # progress("llyn1450 @@@ interning non-string", `what`) if type(what) is types.LongType: return self.newLiteral(str(what), self.integer) if type(what) is types.IntType: return self.newLiteral(`what`, self.integer) if type(what) is types.FloatType: return self.newLiteral(`what`, self.float) if isinstance(what,Decimal): return self.newLiteral(str(what), self.decimal) if type(what) is types.ListType: #types.SequenceType: return self.newList(what) raise RuntimeError("Eh? can't intern "+`what`+" of type: "+`type(what)`) typ, urirefString = what if typ == LITERAL: return self.newLiteral(urirefString, dt, lang) else: assert ':' in urirefString, "must be absolute: %s" % urirefString hash = string.rfind(urirefString, "#") if hash < 0 : # This is a resource with no fragment assert typ == SYMBOL, "If URI <%s>has no hash, must be symbol" % urirefString result = self.resources.get(urirefString, None) if result != None: return result result = Symbol(urirefString, self) self.resources[urirefString] = result else : # This has a fragment and a resource resid = urirefString[:hash] if string.find(resid, "#") >= 0: raise URISyntaxError("Hash in document ID - can be from parsing XML as N3! -"+resid) r = self.symbol(resid) if typ == SYMBOL: if urirefString == N3_nil[1]: # Hack - easier if we have a different classs result = r.internFrag(urirefString[hash+1:], FragmentNil) else: result = r.internFrag(urirefString[hash+1:], Fragment) elif typ == ANONYMOUS: result = r.internFrag(urirefString[hash+1:], AnonymousNode) elif typ == FORMULA: raise RuntimeError("obsolete") result = r.internFrag(urirefString[hash+1:], IndexedFormula) else: raise RuntimeError, "did not expect other type:"+`typ` return result def newList(self, value): return self.nil.newList(value) # def deleteFormula(self,F): # if diag.chatty_flag > 30: progress("Deleting formula %s %ic" % # ( `F`, len(F.statements))) # for s in F.statements[:]: # Take copy # self.removeStatement(s) def reopen(self, F): if F.canonical == None: if diag.chatty_flag > 50: progress("reopen formula -- @@ already open: "+`F`) return F # was open if diag.chatty_flag > 00: progress("warning - reopen formula:"+`F`) key = len(F.statements), len(F.universals()), len(F.existentials()) self._formulaeOfLength[key].remove(F) # Formulae of same length F.canonical = None return F def bind(self, prefix, uri): if prefix != "": # Ignore binding to empty prefix return RDFSink.bind(self, prefix, uri) # Otherwise, do as usual. def makeStatement(self, tuple, why=None): """Add a quad to the store, each part of the quad being in pair form.""" q = ( self.intern(tuple[CONTEXT]), self.intern(tuple[PRED]), self.intern(tuple[SUBJ]), self.intern(tuple[OBJ]) ) if q[PRED] is self.forSome and isinstance(q[OBJ], Formula): if diag.chatty_flag > 97: progress("Makestatement suppressed") return # This is implicit, and the same formula can be used un >1 place self.storeQuad(q, why) def makeComment(self, str): pass # Can't store comments def any(self, q): """Query the store for the first match. Quad contains one None as wildcard. Returns first value matching in that position. """ list = q[CONTEXT].statementsMatching(q[PRED], q[SUBJ], q[OBJ]) if list == []: return None for p in ALL4: if q[p] == None: return list[0].quad[p] def storeQuad(self, q, why=None): """ intern quads, in that dupliates are eliminated. subject, predicate and object are terms - or atomic values to be interned. Builds the indexes and does stuff for lists. Deprocated: use Formula.add() """ context, pred, subj, obj = q assert isinstance(context, Formula), "Should be a Formula: "+`context` return context.add(subj=subj, pred=pred, obj=obj, why=why) def startDoc(self): pass def endDoc(self, rootFormulaPair): return ########################################################################## # # Output methods: # def dumpChronological(self, context, sink): "Fast as possible. Only dumps data. No formulae or universals." pp = Serializer(context, sink) pp. dumpChronological() del(pp) def dumpBySubject(self, context, sink, sorting=1): """ Dump by order of subject except forSome's first for n3=a mode""" pp = Serializer(context, sink, sorting=sorting) pp. dumpBySubject() del(pp) def dumpNested(self, context, sink): """ Iterates over all URIs ever seen looking for statements """ pp = Serializer(context, sink) pp. dumpNested() del(pp) ################################## Manipulation methods: # # Note when we move things, then the store may shrink as they may # move on top of existing entries and we don't allow duplicates. # # @@@@ Should automatically here rewrite any variable name clashes # for variable names which occur in the other but not as the saem sort of variable # Must be done by caller. def copyFormula(self, old, new, why=None): bindings = {old: new} for v in old.universals(): new.declareUniversal(bindings.get(v,v)) for v in old.existentials(): new.declareExistential(bindings.get(v,v)) for s in old.statements[:] : # Copy list! q = s.quad for p in CONTEXT, PRED, SUBJ, OBJ: x = q[p] if x is old: q = q[:p] + (new,) + q[p+1:] self.storeQuad(q, why) def purge(self, context, boringClass=None): """Clean up intermediate results Statements in the given context that a term is a Chaff cause any mentions of that term to be removed from the context. """ if boringClass == None: boringClass = self.Chaff for subj in context.subjects(pred=self.type, obj=boringClass): self.purgeSymbol(context, subj) def purgeSymbol(self, context, subj): """Purge all triples in which a symbol occurs. """ total = 0 for t in context.statementsMatching(subj=subj)[:]: context.removeStatement(t) # SLOW total = total + 1 for t in context.statementsMatching(pred=subj)[:]: context.removeStatement(t) # SLOW total = total + 1 for t in context.statementsMatching(obj=subj)[:]: context.removeStatement(t) # SLOW total = total + 1 if diag.chatty_flag > 30: progress("Purged %i statements with %s" % (total,`subj`)) return total # def removeStatement(self, s): # "Remove statement from store" # return s[CONTEXT].removeStatement(s) def purgeExceptData(self, context): """Remove anything which can't be expressed in plain RDF""" uu = context.universals() for s in context.statements[:]: for p in PRED, SUBJ, OBJ: x = s[p] if x in uu or isinstance(x, Formula): context.removeStatement(s) break context._universalVariables =[] # Cheat! @ use API class URISyntaxError(ValueError): """A parameter is passed to a routine that requires a URI reference""" pass def isString(x): # in 2.2, evidently we can test for isinstance(types.StringTypes) # --- but on some releases, we need to say tuple(types.StringTypes) return type(x) is type('') or type(x) is type(u'') ##################### Register this module from myStore import setStoreClass setStoreClass(RDFStore) #ends