#! /usr/bin/python
"""

$Id: formula.py,v 1.15 2004/10/28 17:41:59 timbl Exp $

Formula
See:  http://www.w3.org/DesignIssues/Notation3

Interfaces
==========

The store stores many formulae, where one formula is what in
straight RDF implementations is known as a "triple store".
So look at the Formula class for a triple store interface.

See also for comparison, a python RDF API for the Redland library (in C):
   http://www.redland.opensource.ac.uk/docs/api/index.html 
and the redfoot/rdflib interface, a python RDF API:
   http://rdflib.net/latest/doc/triple_store.html

"""

reifyNS = 'http://www.w3.org/2004/06/rei#'
owlOneOf = 'http://www.w3.org/2002/07/owl#oneOf'

from __future__ import generators

import types
import string
import re
import StringIO
import sys
import time
import uripath

from OrderedSequence import merge

import urllib # for log:content
import md5, binascii  # for building md5 URIs

import notation3    # N3 parsers and generators, and RDF generator
# import sax2rdf      # RDF1.0 syntax parser to N3 RDF stream

import diag  # problems importing the tracking flag, must be explicit it seems diag.tracking
from diag import progress, verbosity, tracking
from term import BuiltIn, LightBuiltIn, \
    HeavyBuiltIn, Function, ReverseFunction, \
    Literal, AnonymousNode , AnonymousExistential, AnonymousUniversal, \
    Symbol, Fragment, FragmentNil,  Term, CompoundTerm, List, EmptyList, NonEmptyList

from RDFSink import Logic_NS, RDFSink, forSomeSym, forAllSym
from RDFSink import CONTEXT, PRED, SUBJ, OBJ, PARTS, ALL4
from RDFSink import N3_nil, N3_first, N3_rest, OWL_NS, N3_Empty, N3_List, List_NS
from RDFSink import RDF_NS_URI
from RDFSink import RDF_type_URI
from RDFSink import FORMULA, LITERAL, ANONYMOUS, SYMBOL


cvsRevision = "$Revision: 1.15 $"

# Magic resources we know about


from why import Because, BecauseBuiltIn, BecauseOfRule, \
    BecauseOfExperience, becauseSubexpression, BecauseMerge ,report


###################################### Forumula
#
# A Formula is a set of triples.

class Formula(AnonymousNode, CompoundTerm):
    """A formula of a set of RDF statements, triples.
    
    (The triples are actually instances of StoredStatement.)
    Other systems such as jena and redland use the term "Model" for Formula.
    For rdflib, this is known as a TripleStore.
    Cwm and N3 extend RDF to allow a literal formula as an item in a triple.
    
    A formula is either open or closed.  Initially, it is open. In this
    state is may be modified - for example, triples may be added to it.
    When it is closed, note that a different interned version of itself
    may be returned. From then on it is a constant.
    
    Only closed formulae may be mentioned in statements in other formuale.
    
    There is a reopen() method but it is not recommended, and if desperate should
    only be used immediately after a close(). 
    """
    def __init__(self, store, uri=None):
        AnonymousNode.__init__(self, store, uri)
        self.canonical = None # Set to self if this has been canonicalized
	self.statements = []
	self._existentialVariables = []
	self._universalVariables = []

    def __repr__(self):
	if self.statements == []:
	    return "{}"
	if len(self.statements) == 1:
	    st = self.statements[0]
	    return "{"+`st[SUBJ]`+" "+`st[PRED]`+" "+`st[OBJ]`+"}"

	s = Term.__repr__(self)
	return "{%i}" % len(self.statements)
	
    def classOrder(self):
	return	11  # Put at the end of a listing because it makes it easier to read

    def compareTerm(self, other):
	"Assume is also a Formula - see function compareTerm below"
	for f in self, other:
	    if f.canonical is not f:
		progress("@@@@@ Comparing formula NOT canonical", `f`)
	s = self.statements
	o = other.statements
	ls = len(s)
	lo = len(o)
	if ls > lo: return 1
	if ls < lo: return -1

	for se, oe, in  ((self.universals(), other.universals()),
			    (self.existentials(), other.existentials())
			):
	    lse = len(se)
	    loe = len(oe)
	    if lse > loe: return 1
	    if lse < loe: return -1
	    se.sort(Term.compareAnyTerm)
	    oe.sort(Term.compareAnyTerm)
	    for i in range(lse):
		diff = se[i].compareAnyTerm(oe[i])
		if diff != 0: return diff

#		@@@@ No need - canonical formulae are always sorted
	s.sort(StoredStatement.compareSubjPredObj) # forumulae are all the same
	o.sort(StoredStatement.compareSubjPredObj)
	for i in range(ls):
	    diff = s[i].compareSubjPredObj(o[i])
	    if diff != 0: return diff
	raise RuntimeError("Identical formulae not interned! Length %i: %s\n\t%s\n vs\t%s" % (
		    ls, `s`, self.debugString(), other.debugString()))


    def existentials(self):
        """Return a list of existential variables with this formula as scope.
	
	Implementation:
	we may move to an internal storage rather than these pseudo-statements"""
        return self._existentialVariables


    def universals(self):
        """Return a list of variables universally quantified with this formula as scope.

	Implementation:
	We may move to an internal storage rather than these statements."""
	return self._universalVariables
    
    def variables(self):
        """Return a list of all variables quantified within this scope."""
        return self.existentials() + self.universals()
	
    def size(self):
        """Return the number statements.
	Obsolete: use len(F)."""
        return len(self.statements)

    def __len__(self):
        """ How many statements? """
        return len(self.statements)

    def __iter__(self):
	"""The internal method which allows one to iterate over the statements
	as though a formula were a sequence.
	"""
	for s in self.statements:
	    yield s

    def newSymbol(self, uri):
	"""Create or reuse the internal representation of the RDF node whose uri is given
	
	The symbol is created in the same store as the formula."""
	return self.store.newSymbol(uri)

    def newList(self, list):
	return self.store.nil.newList(list)

    def newLiteral(self, str, dt=None, lang=None):
	"""Create or reuse the internal representation of the RDF literal whose string is given
	
	The literal is created in the same store as the formula."""
	return self.store.newLiteral(str, dt, lang)

    def intern(self, value):
	return self.store.intern(value)
	
    def newBlankNode(self, uri=None, why=None):
	"""Create a new unnamed node with this formula as context.
	
	The URI is typically omitted, and the system will make up an internal idnetifier.
        If given is used as the (arbitrary) internal identifier of the node."""
	x = AnonymousExistential(self, uri)
	self._existentialVariables.append(x)
	return x

    
    def declareUniversal(self, v):
	if verbosity() > 90: progress("Declare universal:", v)
	if v not in self._universalVariables:
	    self._universalVariables.append(v)
	
    def declareExistential(self, v):
	if verbosity() > 90: progress("Declare existential:", v)
	if v not in self._existentialVariables:  # Takes time
	    self._existentialVariables.append(v)
#	else:
#	    raise RuntimeError("Redeclared %s in %s -- trying to erase that" %(v, self)) 
	
    def newExistential(self, uri=None, why=None):
	"""Create a named variable existentially qualified within this formula
	
	If the URI is not given, an arbitrary identifier is generated.
	See also: existentials()."""
	if uri == None:
	    raise RuntimeError("Please use newBlankNode with no URI")
	    return self.newBlankNode()  # Please ask for a bnode next time
	return self.store.newExistential(self, uri, why=why)
    
    def newUniversal(self, uri=None, why=None):
	"""Create a named variable universally qualified within this formula
	
	If the URI is not given, an arbitrary identifier is generated.
	See also: universals()"""
	x = AnonymousUniversal(self, uri)
	self._universalVariables.append(x)
	return x

    def newFormula(self, uri=None):
	"""Create a new open, empty, formula in the same store as this one.
	
	The URI is typically omitted, and the system will make up an internal idnetifier.
        If given is used as the (arbitrary) internal identifier of the formula."""
	return self.store.newFormula(uri)

    def statementsMatching(self, pred=None, subj=None, obj=None):
        """Return a READ-ONLY list of StoredStatement objects matching the parts given
	
	For example:
	for s in f.statementsMatching(pred=pantoneColor):
	    print "We've got one which is ", `s[OBJ]`
	    
	If none, returns []
	"""
        for s in self.statements:
	    if ((pred == None or pred is s.predciate()) and
		    (subj == None or subj is s.subject()) and
		    (obj == None or obj is s.object())):
		yield s

    def contains(self, pred=None, subj=None, obj=None):
        """Return boolean true iff formula contains statement(s) matching the parts given
	
	For example:
	if f.contains(pred=pantoneColor):
	    print "We've got one statement about something being some color"
	"""
        for s in self.statements:
	    if ((pred == None or pred is s.predciate()) and
		    (subj == None or subj is s.subject()) and
		    (obj == None or obj is s.object())):
		return 1
	return 0


    def any(self, subj=None, pred=None, obj=None):
        """Return None or the value filing the blank in the called parameters.
	
	Specifiy exactly two of the arguments.
	color = f.any(pred=pantoneColor, subj=myCar)
	somethingRed = f.any(pred=pantoneColor, obj=red)
	
	Note difference from the old store.any!!
	Note SPO order not PSO.
	To aboid confusion, use named parameters.
	"""
        for s in self.statements:
	    if ((pred == None or pred is s.predicate()) and
		    (subj == None or subj is s.subject()) and
		    (obj == None or obj is s.object())):
		break
	else: return None
	if obj == None: return s.object()
	if subj == None: return s.subject()
	if pred == None: return s.predicate()
	raise ValueError("You must give one wildcard in (%s, %s, %s)" %(subj, pred, obj))


    def the(self, subj=None, pred=None, obj=None):
        """Return None or the value filing the blank in the called parameters
	
	This is just like any() except it checks that there is only
	one answer in the store. It wise to use this when you expect only one.
	
	color = f.the(pred=pantoneColor, subj=myCar)
	redCar = f.the(pred=pantoneColor, obj=red)
	"""
	return self.any(subj, pred, obj) # @@check >1

    def each(self, subj=None, pred=None, obj=None):
        """Return a list of values value filing the blank in the called parameters
	
	Examples:
	colors = f.each(pred=pantoneColor, subj=myCar)
	
	for redthing in f.each(pred=pantoneColor, obj=red): ...
	
	"""
        for s in self.statements:
	    if ((pred == None or pred is s.predicate()) and
		    (subj == None or subj is s.subject()) and
		    (obj == None or obj is s.object())):
		if pred == None: yield s.predicate()
		elif subj == None: yield s.subject()
		elif obj == None: yield s.object()
		else: raise ValueError(
		  "You must give one wildcard in (%s, %s, %s)" %(subj, pred, obj))

    def searchable(self, subj=None, pred=None, obj=None):
	"""A pair of the difficulty of searching and a statement iterator of found statements
	
	The difficulty is a store-portable measure of how long the store
	thinks (in arbitrary units) it will take to search.
	This will only be used for choisng which part of the query to search first.
	If it is 0 there is no solution to the query, we know now.
	
	In this implementation, we use the length of the sequence to be searched."""
	difficulty = 1
	for p in subj, pred, obj:
	    if p == None:
		difficulty += 1
	return difficulty, self.statementsMatching(subj, pred, obj) # use lazy eval here


    def substitution(self, bindings, why=None):
	"Return this or a version of me with subsitution made"
	assert type(bindings) is type({})
	store = self.store
	oc = self.occurringIn(bindings.keys())
	if oc == []: return self # phew!

	y = store.newFormula()
	if verbosity() > 90: progress("substitution: formula"+`self`+" becomes new "+`y`,
				    " because of ", oc)
	y.loadFormulaWithSubsitution(self, bindings, why=why)
	return y.canonicalize()

    def loadFormulaWithSubsitution(self, old, bindings={}, why=None):
	"""Load information from another formula, subsituting as we go
	returns number of statements added (roughly)"""
        total = 0
	for v in old.universals():
	    self.declareUniversal(bindings.get(v, v))
	for v in old.existentials():
	    self.declareExistential(bindings.get(v, v))
	bindings2 = bindings.copy()
	bindings2[old] = self
        for s in old.statements[:] :   # Copy list!
	    total += self.add(subj=s[SUBJ].substitution(bindings2),
		    pred=s[PRED].substitution(bindings2),
		    obj=s[OBJ].substitution(bindings2),
		    why=why)
        return total
                
    def substituteEquals(self, bindings, newBindings):
	"""Return this or a version of me with subsitution made
	
	Subsitution of = for = does NOT happen inside a formula,
	as the formula is a form of quotation."""
	return self

    def occurringIn(self, vars):
	"Which variables in the list occur in this?"
	set = []
	if verbosity() > 98: progress("----occuringIn: ", `self`)
	for s in self.statements:
	    for p in PRED, SUBJ, OBJ:
		y = s[p]
		if y is self:
		    pass
		else:
		    set = merge(set, y.occurringIn(vars))
	return set

    def unify(self, other, vars, existentials, bindings):
	"""See Term.unify()
	"""

	if not isinstance(other, Formula): return 0
	if self is other: return [({}, None)]
	if (len(self) != len(other)
	    or self. _existentialVariables != other._existentialVariables
	    or self. _universalVariables != other._existentialVariables
	    ): return 0
#	raise RuntimeError("Not implemented unification method on formulae")
	return 0    # @@@@@@@   FINISH THIS
	
		    
    def bind(self, prefix, uri):
	"""Give a prefix and associated URI as a hint for output
	
	The store does not use prefixes internally, but keeping track
	of those usedd in the input data makes for more human-readable output.
	"""
	return self.store.bind(prefix, uri)

    def add(self, subj, pred, obj, why=None):
	"""Add a triple to the formula.
	
	The formula must be open.
	subj, pred and obj must be objects as for example generated by Formula.newSymbol() and newLiteral(), or else literal values which can be interned.
	why 	may be a reason for use when a proof will be required.
	"""
        if self.canonical != None:
            raise RuntimeError("Attempt to add statement to canonical formula "+`self`)

        self.store.size += 1

        s = StoredStatement((self, pred, subj, obj))
	
        self.statements.append(s)
       
        return 1  # One statement has been added  @@ ignore closure extras from closure
		    # Obsolete this return value? @@@ 
    
    def removeStatement(self, s):
	"""Removes a statement The formula must be open.
	
	This implementation is alas slow, as removal of items from tha hash is slow.
	"""
        assert self.canonical == None, "Cannot remove statement from canonical "+`self`
	self.store.size = self.store.size-1
        self.statements.remove(s)
	return
    
    def close(self):
        """No more to add. Please return interned value.
	NOTE You must now use the interned one, not the original!"""
        return self.canonicalize()

    def canonicalize(F):
        """If this formula already exists, return the master version.
        If not, record this one and return it.
        Call this when the formula is in its final form, with all its statements.
        Make sure no one else has a copy of the pointer to the smushed one.
	 
	LIMITATION: The basic Formula class does NOT canonicalize. So
	it won't spot idenical formulae. The IndexedFormula will.
        """
	store = F.store
	if F.canonical != None:
            if verbosity() > 70:
                progress("Canonicalize -- @@ already canonical:"+`F`)
            return F.canonical
	# @@@@@@@@ no canonicalization @@ warning
	F.canonical = F
	return F


    def n3String(self, base=None, flags=""):
        "Dump the formula to an absolute string in N3"
        buffer=StringIO.StringIO()
        _outSink = notation3.ToN3(buffer.write,
                                      quiet=1, base=base, flags=flags)
        self.store.dumpNested(self, _outSink)
        return buffer.getvalue()

    def rdfString(self, base=None, flags=""):
        "Dump the formula to an absolute string in RDF/XML"
        buffer=StringIO.StringIO()
        _outSink = ToRDF(buffer, _outURI, base=base, flags=flags)
        self.store.dumpNested(self, _outSink)
        return buffer.getvalue()

    def outputStrings(self, channel=None, relation=None):
        """Fetch output strings from store, sort and output

        To output a string, associate (using the given relation) with a key
        such that the order of the keys is the order in which you want the corresponding
        strings output.
        """
        if channel == None:
            channel = sys.stdout
        if relation == None:
            relation = self.store.intern((SYMBOL, Logic_NS + "outputString"))
        list = self.statementsMatching(pred=relation)  # List of things of (subj, obj) pairs
        pairs = []
        for s in list:
            pairs.append((s[SUBJ], s[OBJ]))
        pairs.sort(comparePair)
        for key, str in pairs:
            channel.write(str.string.encode('utf-8'))

    def reopen(self):
	"""Make a formula which was once closed oopen for input again.
	
	NOT Recommended.  Dangers: this formula will be, because of interning,
	the same objet as a formula used elsewhere which happens to have the same content.
	You mess with this one, you mess with that one.
	Much better to keep teh formula open until you don't needed it open any more.
	The trouble is, the parsers close it at the moment automatically. To be fixed."""
        return self.store.reopen(self)


    def includes(f, g, _variables=[],  bindings=[]):
	"""Does this formula include the information in the other?
	
	bindings is for use within a query.
	"""
	return  f.store.testIncludes(f, g, _variables=_variables,  bindings=bindings)

    def generated(self):
	"""Yes, any identifier you see for this is arbitrary."""
        return 1

    def asPair(self):
	"""Return an old representation. Obsolete"""
        return (FORMULA, self.uriref())

    def subjects(self, pred=None, obj=None):
        """Obsolete - use each(pred=..., obj=...)"""
	for s in self.statementsMatching(pred=pred, obj=obj)[:]:
	    yield s[SUBJ]

    def predicates(self, subj=None, obj=None):
        """Obsolete - use each(subj=..., obj=...)"""
	for s in self.statementsMatching(subj=subj, obj=obj)[:]:
	    yield s[PRED]

    def objects(self, pred=None, subj=None):
        """Obsolete - use each(subj=..., pred=...)"""
	for s in self.statementsMatching(pred=pred, subj=subj)[:]:
	    yield s[OBJ]

    def reification(self, sink, bnodeMap={}, why=None):
	"""Describe myself in RDF to the given context
	
	
	"""
	try:
	    return bnodeMap[self]
	except KeyError:
	    F = sink.newBlankNode()
	    bnodeMap[self] = F
	rei = sink.newSymbol(reifyNS[:-1])
	myMap = {}
	ooo = sink.newSymbol(owlOneOf)
	for vars, vocab in ((self.existentials(),  rei["existentials"]), 
			(self.universals(), rei["universals"])):
	    if diag.chatty_flag > 54:
        	progress("vars=", vars)
                progress("vars=", [v.uriref() for v in vars])
	    list = sink.store.nil.newList([sink.newLiteral(x.uriref()) for x in vars])
	    klass = sink.newBlankNode()
            sink.add(klass, ooo, list)
	    sink.add(F, vocab, klass) 


	#The great list of statements
        statementList = []
        for s in self.statements:
            subj = sink.newBlankNode()
	    sink.add(subj, rei["subject"], s[SUBJ].reification(sink, myMap, why)) 
	    sink.add(subj, rei["predicate"], s[PRED].reification(sink, myMap, why) )
	    sink.add(subj, rei["object"], s[OBJ].reification(sink, myMap, why)) 
	    statementList.append(subj)
            
    #The great class of statements
        StatementClass = sink.newBlankNode()
        realStatementList = sink.store.nil.newList(statementList)
        sink.add(StatementClass, ooo, realStatementList)
    #We now know something!
        sink.add(F, rei["statements"], StatementClass)
	    
	return F

    def flatten(self, sink, why=None):
        return self.reification(sink, {}, why=why)

    def doesNodeAppear(self, symbol):
        """Does that particular node appear anywhere in this formula

        This function is necessarily recursive, and is useful for the pretty printer
        It will also be useful for the flattener, when we write it.
        """
        for quad in self.statements:
            for s in PRED, SUBJ, OBJ:
                val = 0
                if isinstance(quad[s], CompoundTerm):
                    val = val or quad[s].doesNodeAppear(symbol)
                elif quad[s] == symbol:
                    val = 1
                else:
                    pass
                if val == 1:
                    return 1
        return 0

#################################################################################


class StoredStatement:
    """A statememnt as an element of a formula
    """
    def __init__(self, q):
        self.quad = q

    def __getitem__(self, i):   # So that we can index the stored thing directly
        return self.quad[i]

    def __repr__(self):
        return "{"+`self[CONTEXT]`+":: "+`self[SUBJ]`+" "+`self[PRED]`+" "+`self[OBJ]`+"}"

#   The order of statements is only for canonical output
#   We cannot override __cmp__ or the object becomes unhashable, and can't be put into a dictionary.


    def compareSubjPredObj(self, other):
        """Just compare SUBJ, Pred and OBJ, others the same
        Avoid loops by spotting reference to containing formula"""
        if self is other: return 0
        sc = self.quad[CONTEXT]
        oc = other.quad[CONTEXT]
        for p in [SUBJ, PRED, OBJ]: # Note NOT internal order
            s = self.quad[p]
            o = other.quad[p]
            if s is sc:
                if o is oc: continue
                else: return -1  # @this is smaller than other formulae
            else:           
                if o is oc: return 1
            if s is not o:
                return s.compareAnyTerm(o)
        return 0

    def comparePredObj(self, other):
        """Just compare P and OBJ, others the same"""
        if self is other: return 0
        sc = self.quad[CONTEXT]
        oc = other.quad[CONTEXT]
        for p in [PRED, OBJ]: # Note NOT internal order
            s = self.quad[p]
            o = other.quad[p]
            if s is sc:
                if o is oc: continue
                else: return -1  # @this is smaller than other formulae
            else:           
                if o is oc: return 1
            if s is not o:
                return s.compareAnyTerm(o)
        return 0


    def context(self):
	"""Return the context of the statement"""
	return self.quad[CONTEXT]
    
    def predicate(self):
	"""Return the predicate of the statement"""
	return self.quad[PRED]
    
    def subject(self):
	"""Return the subject of the statement"""
	return self.quad[SUBJ]
    
    def object(self):
	"""Return the object of the statement"""
	return self.quad[OBJ]

    def spo(self):
	return (self.quad[SUBJ], self.quad[PRED], self.quad[OBJ])

    def __len__(self):
	return 1

    def statements(self):
	return [self]


    def asFormula(self, why=None):
	"""The formula which contains only a statement like this.
	
	When we split the statement up, we lose information in any existentials which are
	shared with other statements. So we introduce a skolem constant to tie the
	statements together.  We don't have access to any enclosing formula 
	so we can't express its quantification.  This @@ not ideal.
	
	This extends the StoredStatement class with functionality we only need with "why" module."""
	
	store = self.quad[CONTEXT].store
	c, p, s, o = self.quad
	f = store.newFormula()   # @@@CAN WE DO THIS BY CLEVER SUBCLASSING? statement subclass of f?
	f.add(s, p, o, why=why)
#	uu = store.occurringIn(f, c.universals())
#	ee = store.occurringIn(f, c.existentials())
	uu = f.occurringIn(c.universals())
	ee = f.occurringIn(c.existentials())
	bindings = []
	for v in uu:
	    x = f.newUniversal(v.uriref(), why=why)
	for v in ee:
	    x  = f.newExistential(v.uriref(), why=why)
	return f.close()  # probably slow - much slower than statement subclass of formula


#ends