#! /usr/bin/python
"""

$Id: pretty.py,v 1.15 2004/11/09 19:58:26 timbl Exp $

Printing of N3 and RDF formulae

20003-8-20 split offf from llyn.py

This is or was http://www.w3.org/2000/10/swap/pretty.py
"""


import types
import string

import diag  # problems importing the tracking flag, must be explicit it seems diag.tracking
from diag import progress, verbosity, tracking
from term import   Literal, Symbol, Fragment, AnonymousVariable, FragmentNil, \
     Term, CompoundTerm, List, EmptyList, NonEmptyList
from formula import Formula, StoredStatement

from RDFSink import Logic_NS, RDFSink, forSomeSym, forAllSym
from RDFSink import CONTEXT, PRED, SUBJ, OBJ, PARTS, ALL4
from RDFSink import N3_nil, N3_first, N3_rest, OWL_NS, N3_Empty, N3_List, List_NS
from RDFSink import RDF_NS_URI
from RDFSink import RDF_type_URI

cvsRevision = "$Revision: 1.15 $"

# Magic resources we know about

from RDFSink import RDF_type_URI, DAML_sameAs_URI

STRING_NS_URI = "http://www.w3.org/2000/10/swap/string#"
META_NS_URI = "http://www.w3.org/2000/10/swap/meta#"
INTEGER_DATATYPE = "http://www.w3.org/2001/XMLSchema#integer"
FLOAT_DATATYPE = "http://www.w3.org/2001/XMLSchema#double"

prefixchars = "abcdefghijklmnopqustuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

class Serializer:
    """A serializer to serialize the formula F into the given
    abstract syntax sink
    """
    def __init__(self, F, sink, flags="", sorting=0):
	self.context = F
	assert F.canonical != None, "Formula to be printed must be canonical"
	self.store = F.store
	self.sink = sink
	self.defaultNamespace = None
	self.flags = flags
	self.sorting = sorting
	self._inContext ={}
	self._occurringAs = [{}, {}, {}, {}]
	self._topology_returns = {}

    def selectDefaultPrefix(self, printFunction):

        """ Symbol whose fragments have the most occurrences.
        we suppress the RDF namespace itself because the XML syntax has problems with it
        being default as it is used for attributes.
	
	This also outputs the prefixes."""

        if "d" in self.flags:
	    self.defaultNamespace = None
	    self.dumpPrefixes()
	    return

	dummySink = self.sink.dummyClone()
	dummySerializer = Serializer(self.context,
	    sink=dummySink, flags=self.flags+"d", sorting=self.sorting)
	printFunction(dummySerializer)


        best = 0
        mp = None
	counts = dummySink.namespaceCounts()
        for r, count in counts.items():
            if verbosity() > 25: progress("    Count is %3i for %s" %(count, r))
            if (r != RDF_NS_URI
                and count > 0
		and (count > best or
                     (count == best and mp > r))) :  # Must be repeatable for retests
                best = count
                mp = r

        if verbosity() > 20:
            progress("# Most popular Namespace is %s with %i" % ( mp, best))

	self.defaultNamespace = mp

        for r, count in counts.items():
	    if count > 1 and r != mp:
		if self.store.prefixes.get(r, None) == None:
		    p = r
		    if p[-1] in "/#": p = p[:-1]
		    slash = p.rfind("/")
		    if slash >= 0: p = p[slash+1:]
		    i = 0
		    while i < len(p):
			if p[i] in prefixchars:
			    i = i + 1
			else:
			    break
		    p = p[:i]
		    if len(p) <6 and self.store.namespaces.get(p, None) ==None:
			pref = p
		    else:
			p = p[:5]
			for l in (3, 2, 4, 1, 5):
			    if self.store.namespaces.get(p[:l], None) ==None:
				pref = p[:l]
				break	
			else:
			    n = 2
			    while 1:
				pref = p[:3]+`n`
				if self.store.namespaces.get(pref, None) ==None:
				    break
				n = n + 1			
    
		    self.store.bind(pref, r)
		    if verbosity() > 50: progress("Generated @prefix %s: <%s>." % (pref, r))

	if self.defaultNamespace != None:
	    self.sink.setDefaultNamespace(self.defaultNamespace)

#	progress("&&&& Counts: ", counts)
        prefixes = self.store.namespaces.keys()   #  bind in same way as input did FYI
        prefixes.sort()   # For repeatability of test results
	for pfx in prefixes:
	    r = self.store.namespaces[pfx]
	    try:
		count = counts[r]
		if count > 0:
		    self.sink.bind(pfx, r)
	    except KeyError:
		pass
	return


    def _subFormulae(self, F, path = []):
        """Returns a sequence of the all the formulae nested within this one.
        
	slow... only used in pretty print functions.
        """

	set = [F]
        path2 = path + [ F ]     # Avoid loops
        for s in F.statements:
            for p in PRED, SUBJ, OBJ:
                if isinstance(s[p], Formula):
                    if s[p] not in path2:
                        set2 = self._subFormulae(s[p], path2)
                        for c in set2:
                            if c not in set: set.append(c)
        return set


    def dumpPrefixes(self):
	if self.defaultNamespace != None:
	    sink.setDefaultNamespace(self.defaultNamespace)
        prefixes = self.store.namespaces.keys()   #  bind in same way as input did FYI
        prefixes.sort()
	for pfx in prefixes:
	    uri = self.store.namespaces[pfx]
	    self.sink.bind(pfx, uri)


    def _listsWithinLists(self, L, lists):
	if L not in lists:
	    lists.append(L)
	for i in L:
	    if isinstance(i, NonEmptyList):
		self._listsWithinLists(i, lists)

    def dumpLists(self):
	context = self.context
	sink = self.sink
	lists = []
	for s in context.statements:

	    for x in s.predicate(), s.subject(), s.object():
		if isinstance(x, NonEmptyList):
		    self._listsWithinLists(x, lists)
		    
	for l in lists:
	    list = l
	    while not isinstance(list, EmptyList):
		self._outputStatement(sink, (context, self.store.forSome, context, list))
		list = list.rest

	for l in lists:
	    list = l
	    while not isinstance(list, EmptyList):
		self._outputStatement(sink, (context, self.store.first, list, list.first))
		self._outputStatement(sink, (context, self.store.rest,  list, list.rest))
		list = list.rest


    def dumpChronological(self):
	"Fast as possible. Only dumps data. No formulae or universals."
	context = self.context
	sink = self.sink
        sink.startDoc()
        self.dumpPrefixes()
	self.dumpVariables(context, sink, sorting=0, dataOnly=1)
	uu = context.universals()

	self.dumpLists()
	
        for s in context.statements:
	    for p in SUBJ, PRED, OBJ:
		x = s[p]
		if isinstance(x, Formula) or x in uu:
		    break
	    else:
		self._outputStatement(sink, s.quad)
		    
        sink.endDoc()

    def _outputStatement(self, sink, quad, aWorks = 1):
        sink.makeStatement(self.extern(quad), aIsPossible=aWorks)

    def notAsExtern(self, t):
        return(t[CONTEXT],
                            t[PRED],
                            t[SUBJ],
                            t[OBJ],
                            )

    def extern(self, t):
        return(t[CONTEXT].asPair(),
                            t[PRED].asPair(),
                            t[SUBJ].asPair(),
                            t[OBJ].asPair(),
                            )

    def dumpVariables(self, context, sink, sorting=1, pretty=0, dataOnly=0):
	"""Dump the forAlls and the forSomes at the top of a formula"""
	if sorting:
	    uv = context.universals()[:]
	    uv.sort(Term.compareAnyTerm)
	    ev = context.existentials()[:]
	    ev.sort(Term.compareAnyTerm)
	else:
	    uv = context.universals()
	    ev = context.existentials()
	if not dataOnly:
	    for v in uv:
		self._outputStatement(sink, (context, self.store.forAll, context, v))
	for v in ev:
            aWorks = 0
	    if pretty:
		_anon, _incoming = self._topology(v, context)
	    else:
		_anon = 0
	    if not _anon:
		self._outputStatement(sink, (context, self.store.forSome, context, v), \
                                      canItbeABNode(context, v))

    def dumpBySubject(self, sorting=1):
        """ Dump one formula only by order of subject except forSome's first for n3=a mode"""
        
	context = self.context
	uu = context.universals()[:]
	sink = self.sink
	self._scan(context)
        sink.startDoc()
        self.selectDefaultPrefix(Serializer.dumpBySubject)        
	self.dumpVariables(context, sink, sorting)
	self.dumpLists()

	ss = context.statements[:]
	ss.sort(StoredStatement.compareSubjPredObj)
        for s in ss:
	    for p in SUBJ, PRED, OBJ:
		x = s[p]
		if isinstance(x, Formula) or x in uu:
		    break
	    else:
		self._outputStatement(sink, s.quad)
		    
	if 0:  # Doesn't work as ther ei snow no list of bnodes
	    rs = self.store.resources.values()
	    if sorting: rs.sort(Term.compareAnyTerm)
	    for r in rs :  # First the bare resource
		statements = context.statementsMatching(subj=r)
		if sorting: statements.sort(StoredStatement.comparePredObj)
		for s in statements :
			self._outputStatement(sink, s.quad)
		if not isinstance(r, Literal):
		    fs = r.fragments.values()
		    if sorting: fs.sort
		    for f in fs :  # then anything in its namespace
			statements = context.statementsMatching(subj=f)
			if sorting: statements.sort(StoredStatement.comparePredObj)
			for s in statements:
			    self._outputStatement(sink, s.quad)
        sink.endDoc()
#
#  Pretty printing
#
# An intersting alternative is to use the reverse syntax to the max, which
# makes the DLG an undirected labelled graph. s and o above merge. The only think which
# then prevents us from dumping the graph without new bnode ids is the presence of cycles.
#
# Blank nodes can be represented using the implicit syntax [] or rdf/xml equivalent
# instead of a dummy identifier iff
# - they are blank nodes, ie are existentials whose id has been generated, and
# - the node only occurs directly in one formula in the whole thing to be printed, and
# - the node occurs at most once as a object or list element within that formula

# We used to work this out on the fly, but it is faster to build an index of the
# whole formula to be printed first.
#
# Note when we scan a list we do it in the context of the formula in which we found
# it.  It may occcur in many formulae.

    def _scanObj(self, context, x):
	"Does this appear in just one context, and if so counts how many times as object"
	z = self._inContext.get(x, None)
	if z == "many": return # forget it
	if z == None:
	    self._inContext[x] = context
	elif z is not context:
	    self._inContext[x] = "many"
	    return
	    
	if isinstance(x, AnonymousVariable) or (isinstance(x, Fragment) and x.generated()): 
	    y = self._occurringAs[OBJ].get(x, 0) + 1
	    self._occurringAs[OBJ][x] = y
	    if verbosity() > 98:
                progress(
                    "scan: %s, a %s, now has %i occurrences as %s" 
                    %(x, x.__class__,y,"CPSOq"[y]))
#	else:
#	    if x == None: raise RuntimeError("Weird - None in a statement?")
#	    progress("&&&&&&&&& %s has class %s " %(`z`, `z.__class__`))

    def _scan(self, x, context=None):
#	progress("Scanning ", x, " &&&&&&&&")
#	assert self.context._redirections.get(x, None) == None, "Should not be redirected: "+`x`
	if verbosity() > 98: progress("scanning %s a %s in context %s" %(`x`, `x.__class__`,`context`),
			x.generated(), self._inContext.get(x, "--"))
	if isinstance(x, NonEmptyList):
	    for y in x:
		self._scanObj(context, y)
	if isinstance(x, Formula):
	    for s in x.statements:
		for p in PRED, SUBJ, OBJ:
		    y = s[p]
		    if (isinstance(y, AnonymousVariable) 
			or (isinstance(y, Fragment) and y.generated())): 
			z = self._inContext.get(y, None)
			if z == "many": continue # forget it
			if z == None:
			    self._inContext[y] = x
			elif z is not x:
			    self._inContext[y] = "many"
			    continue
			z = self._occurringAs[p].get(y, 0)
			self._occurringAs[p][y] = z + 1
#			progress("&&&&&&&&& %s now occurs %i times as %s" %(`y`, z+1, "CPSO"[p]))
#		    else:
#			progress("&&&&&&&&& yyyy  %s has class %s " %(`y`, `y.__class__`))
		    if x is not y: self._scan(y, x)
		

    def _topology(self, x, context): 
        """ Can be output as an anonymous node in N3. Also counts incoming links.
        Output tuple parts:

        1. True iff can be represented as anonymous node in N3, [] or {}
        2. Number of incoming links: >0 means occurs as object or pred, 0 means as only as subject.
            1 means just occurs once
            >1 means occurs too many times to be anon
        
        Returns  number of incoming links (1 or 2) including forSome link
        or zero if self can NOT be represented as an anonymous node.
        Paired with this is whether this is a subexpression.
        """
    # This function takes way too long. My attempts to speed it up using a try / except
    # loop were clearly misguided, because this function does very little as is.
    # why does this take .08 seconds per function call to do next to nothing?
##        try:
##            return self._topology_returns[x]
##        except KeyError:
##            pass
#	progress("&&&&&&&&& ", `self`,  self._occurringAs)
#        _isExistential = x in context.existentials()
        _isExistential = context.existentialDict.get(x,0)
#        return (0, 2)
        _loop = context.any(subj=x, obj=x)  # does'nt count as incomming
	_asPred = self._occurringAs[PRED].get(x, 0)
	_asObj = self._occurringAs[OBJ].get(x, 0)
        if isinstance(x, Literal):
            _anon = 0     #  Never anonymous, always just use the string
	elif isinstance(x, Formula):
	    _anon = 2	# always anonymous, represented as itself
		
	elif isinstance(x, List):
	    if isinstance(x, EmptyList):
		_anon = 0     #  Never anonymous, always just use the string
	    else:
		_anon = 2	# always anonymous, represented as itself
		_isExistential = 1
	elif not x.generated():
	    _anon = 0	# Got a name, not anonymous
        else:  # bnode
	    ctx = self._inContext.get(x, "weird")
	    _anon = ctx == "weird" or (ctx is context and
			_asObj < 2 and _asPred == 0 and
			(not _loop) and
			_isExistential)
	    if verbosity() > 97:
		progress( "Topology %s in %s is: ctx=%s,anon=%i obj=%i, pred=%i loop=%s ex=%i "%(
		`x`, `context`, `ctx`, _anon, _asObj, _asPred, _loop, _isExistential))
	    return ( _anon, _asObj+_asPred )  

        if verbosity() > 98:
            progress( "Topology %s in %s is: anon=%i obj=%i, pred=%i loop=%s ex=%i "%(
            `x`, `context`,  _anon, _asObj, _asPred, _loop, _isExistential))

##        self._topology_returns[x] = ( _anon, _asObj+_asPred )
        return ( _anon, _asObj+_asPred )  


    def dumpNested(self):
        """ Iterates over all URIs ever seen looking for statements
        """

	context = self.context
        assert context.canonical != None
	self._scan(context)
        self.sink.startDoc()
        self.selectDefaultPrefix(Serializer.dumpNested)        
        self.dumpFormulaContents(context, self.sink, sorting=1, equals=1)
        self.sink.endDoc()

    def dumpFormulaContents(self, context, sink, sorting, equals=0):
        """ Iterates over statements in formula, bunching them up into a set
        for each subject.
        """

	allStatements = context.statements[:]
	if equals:
	    for x, y in context._redirections.items():
		if not x.generated() and x not in context.variables():
		    allStatements.append(StoredStatement(
			(context, context.store.sameAs, x, y)))
        allStatements.sort(StoredStatement.compareSubjPredObj)
#        context.statements.sort(StoredStatement.compareSubjPredObj)
	# @@ necessary?
	self.dumpVariables(context, sink, sorting, pretty=1)

#	statements = context.statementsMatching(subj=context)  # context is subject
#        if statements:
#	    progress("@@ Statement with context as subj?!", statements,)
#            self._dumpSubject(context, context, sink, sorting, statements)

        currentSubject = None
        statements = []
        for s in allStatements:
            con, pred, subj, obj =  s.quad
            if subj is con: continue # Done them above
            if currentSubject == None: currentSubject = subj
            if subj != currentSubject:
                self._dumpSubject(currentSubject, context, sink, sorting, statements)
                statements = []
                currentSubject = subj
            statements.append(s)
        if currentSubject != None:
            self._dumpSubject(currentSubject, context, sink, sorting, statements)


    def _dumpSubject(self, subj, context, sink, sorting, statements=[]):
        """ Dump the infomation about one top level subject
        
        This outputs arcs leading away from a node, and where appropriate
     recursively descends the tree, by dumping the object nodes
     (and in the case of a compact list, the predicate (rest) node).
     It does NOTHING for anonymous nodes which don't occur explicitly as subjects.

     The list of statements must be sorted if sorting is true.     
        """
        _anon, _incoming = self._topology(subj, context)    # Is anonymous?
        if _anon and  _incoming == 1 and not isinstance(subj, Formula): return   # Forget it - will be dealt with in recursion

	if isinstance(subj, List): li = subj
	else: li = None
	
        if isinstance(subj, Formula) and subj is not context:
            sink.startBagSubject(subj.asPair())
            self.dumpFormulaContents(subj, sink, sorting)  # dump contents of anonymous bag
            sink.endBagSubject(subj.asPair())       # Subject is now set up
            # continue to do arcs
            
        elif _anon and (_incoming == 0 or 
	    (li != None and not isinstance(li, EmptyList))):    # Will be root anonymous node - {} or [] or ()
		
            if subj is context:
                pass
            else:     #  Could have alternative syntax here

                if sorting: statements.sort(StoredStatement.comparePredObj)    # @@ Needed now Fs are canonical?

                if li != None and not isinstance(li, EmptyList):
                    for s in statements:
                        p = s.quad[PRED]
                        if p is not self.store.first and p is not self.store.rest:
			    if verbosity() > 90: progress("@ Is list, has values for", `p`)
                            break # Something to print (later)
                    else:
			if subj.generated(): return # Nothing.
                    sink.startAnonymousNode(subj.asPair(), li)
		    self.dumpStatement(sink, (context, self.store.first, subj, subj.first), sorting)
		    self.dumpStatement(sink, (context, self.store.rest, subj,  subj.rest), sorting)
		    sink.endAnonymousNode(subj.asPair())
                    for s in statements:
                        p = s.quad[PRED]
                        if p is not self.store.first and p is not self.store.rest:
                            self.dumpStatement(sink, s.quad, sorting) # Dump the rest outside the ()
                    return
                else:
		    if verbosity() > 90: progress("%s Not list, has property values." % `subj`)
                    sink.startAnonymousNode(subj.asPair())
                    for s in statements:  #   "[] color blue."  might be nicer. @@@  Try it?
                        self.dumpStatement(sink, s.quad, sorting)
                    sink.endAnonymousNode()
                    return  # arcs as subject done


        if sorting: statements.sort(StoredStatement.comparePredObj)
        for s in statements:
            self.dumpStatement(sink, s.quad, sorting)

                
    def dumpStatement(self, sink, triple, sorting):
	"Dump one statement, including structure within object" 

        context, pre, sub, obj = triple
        if (sub is obj and not isinstance(sub, CompoundTerm))  \
           or (isinstance(obj, EmptyList)) \
           or isinstance(obj, Literal):
            self._outputStatement(sink, triple) # Do 1-loops simply
            return


        if isinstance(obj, Formula):
            sink.startBagObject(self.extern(triple))
            self.dumpFormulaContents(obj, sink, sorting)  # dump contents of anonymous bag
            sink.endBagObject(pre.asPair(), sub.asPair())
            return

	if isinstance(obj, NonEmptyList):
	    if verbosity()>99:
		progress("List found as object of dumpStatement " + `obj` + context.debugString())
	    sink.startAnonymous(self.extern(triple), isList=1)
	    self.dumpStatement(sink, (context, self.store.first, obj, obj.first), sorting)
	    self.dumpStatement(sink, (context, self.store.rest, obj, obj.rest), sorting)
	    sink.endAnonymous(sub.asPair(), pre.asPair()) # Restore parse state
	    return

        _anon, _incoming = self._topology(obj, context)
        if _anon and _incoming == 1:  # Embedded anonymous node in N3
	    sink.startAnonymous(self.extern(triple))
	    ss = context.statementsMatching(subj=obj)
	    if sorting: ss.sort(StoredStatement.comparePredObj)
	    for t in ss:
		self.dumpStatement(sink, t.quad, sorting)
	    sink.endAnonymous(sub.asPair(), pre.asPair()) # Restore parse state
            return

        self._outputStatement(sink, triple)

	
def canItbeABNode(formula, symbol):   # @@@@ Really slow -tbl
    def returnFunc():
        for quad in formula.statements:
            for s in PRED, SUBJ, OBJ:
                if isinstance(quad[s], Formula):
                    if quad[s].doesNodeAppear(symbol):
                        return 0
        return 1
    return returnFunc

##    toplayer = 1
##    otherlayers = 1
##    statementList = formula.statements[:]
##    parentList.append(formula)
##    while statementList:
##        quad = statementList.pop(0)
##        for s in SUBJ, OBJ:
##            if quad[s] == symbol:
##                toplayer = 0
##            elif isinstance(quad[s], List):
##                for elt in quad[s]:
##                    statementList.append(elt)
##            elif isinstance(quad[s], Formula):
##                top, other = canItbeABNode(parentList, quad[s], symbol)
##                otherlayers = otherlayers and top and other
##            else:
##                pass
##    return toplayer, otherlayers


#ends