###############################################################################
##                                                                           ##
##                        ALEXANDRIA DIGITAL LIBRARY                         ##
##                 University of California at Santa Barbara                 ##
##                                                                           ##
## ------------------------------------------------------------------------- ##
##                                                                           ##
##     Copyright (c) 2005 by the Regents of the University of California     ##
##                            All rights reserved                            ##
##                                                                           ##
## Redistribution and use in source and binary forms, with or without        ##
## modification, are permitted provided that the following conditions are    ##
## met:                                                                      ##
##                                                                           ##
##     1. Redistributions of source code must retain the above copyright     ##
##        notice, this list of conditions, and the following disclaimer.     ##
##                                                                           ##
##     2. Redistributions in binary form must reproduce the above copyright  ##
##        notice, this list of conditions, and the following disclaimer in   ##
##        the documentation and/or other materials provided with the         ##
##        distribution.                                                      ##
##                                                                           ##
##     3. All advertising materials mentioning features or use of this       ##
##        software must display the following acknowledgement: This product  ##
##        includes software developed by the Alexandria Digital Library,     ##
##        University of California at Santa Barbara, and its contributors.   ##
##                                                                           ##
##     4. Neither the name of the University nor the names of its            ##
##        contributors may be used to endorse or promote products derived    ##
##        from this software without specific prior written permission.      ##
##                                                                           ##
## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ##
## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ##
## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE   ##
## DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR  ##
## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL    ##
## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS   ##
## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)     ##
## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,       ##
## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN  ##
## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           ##
## POSSIBILITY OF SUCH DAMAGE.                                               ##
##                                                                           ##
###############################################################################

# $Header: /export/home/gjanee/mm/RCS/ADL_mapper.py,v 1.5 2007/02/07 17:28:44 gjanee Exp $

# SYNOPSIS
#
#     python mapping [-options] [input]
#
#     mapping:
#
#         Python script/module of the form:
#
#             from ADL_mapper import *
#             input()
#             ...declarations...
#             output()
#
#     options:
#
#         -t
#             Print tracebacks on errors and warnings.
#
#         -Dparam=value
#             Define a parameter.  Parameters can be accessed from the
#             mapping using the getParam() function.
#
#     input:
#
#         Filename or URL of the source metadata to be mapped; if not
#         specified, the source metadata is read from standard input.
#
# DESCRIPTION
#
#     Python module that defines the ADL mapping language.  See
#     http://www.alexandria.ucsb.edu/~gjanee/mm/tutorial.html for more
#     information.
#
#     This module requires the PyXML package, available at
#     http://pyxml.sourceforge.net/.
#
# AUTHOR
#
#     Greg Janee
#     gjanee@alexandria.ucsb.edu
#
# HISTORY
#
#     $Log: ADL_mapper.py,v $
#     Revision 1.5  2007/02/07 17:28:44  gjanee
#     Added the 'join' XPath pseudo-function.
#
#     Revision 1.4  2005/06/08 16:54:34  gjanee
#     Added support for XML namespaces.  Added the 'namespace'
#     function.
#
#     Revision 1.3  2005/03/18 20:52:56  gjanee
#     Added a 'separator' argument to 'consolidateTextualValues'.
#     Removed 'unconsolidateTextualValues', as it is no longer needed.
#
#     Revision 1.2  2005/03/10 17:21:48  gjanee
#     Added some syntactic sugar.
#
#     Revision 1.1  2005/02/17 18:16:54  gjanee
#     Initial revision
#

import re
import sys
import traceback
import types

import xml.dom
import xml.dom.ext.reader.Sax2
import xml.xpath

# ----------------------------------------
# TYPE CHECKING

_str = [str, unicode]

def _typecheck1 (object, spec):
    if type(spec) is type:
        return type(object) is spec
    elif type(spec) is list:
        for s in spec:
            if _typecheck1(object, s): return True
        return False
    elif type(spec) is tuple:
        if spec[0] == tuple:
            if type(object) is not tuple or len(object) != len(spec)-1:
                return False
            for o, s in zip(object, spec[1:]):
                if not _typecheck1(o, s): return False
            return True
        elif spec[0] == list:
            if type(object) is not list or\
                (len(spec) >= 3 and len(object) < spec[2]) or\
                (len(spec) >= 4 and len(object) > spec[3]):
                return False
            for o in object:
                if not _typecheck1(o, spec[1]): return False
            return True
        elif spec[0] == dict:
            if type(object) is not dict: return False
            for (k, v) in object.items():
                if not _typecheck1(k, spec[1]): return False
                if not _typecheck1(v, spec[2]): return False
            return True
        else:
            return False
    else:
        return False

def _formatSpec (spec):
    if type(spec) is type:
        return str(spec)
    elif type(spec) is list:
        m = ""
        for s in spec:
            if m != "": m += " or "
            m += _formatSpec(s)
        return m
    elif type(spec) is tuple:
        if spec[0] == tuple:
            m = ""
            for s in spec[1:]:
                if m != "": m += ", "
                m += _formatSpec(s)
            return "tuple (" + m + ")"
        elif spec[0] == list:
            m = "list"
            if len(spec) >= 3:
                m += " " + str(spec[2]) + ".."
                if len(spec) >= 4:
                    m += str(spec[3])
                else:
                    m += "*"
            return m + " [" + _formatSpec(spec[1]) + "]"
        elif spec[0] == dict:
            return "dictionary {" + _formatSpec(spec[1]) + " : " +\
                _formatSpec(spec[2]) + "}"
        else:
            return "?"
    else:
        return "?"

def _typecheck (argNum, object, spec):
    if __debug__:
        if not _typecheck1(object, spec):
            if argNum > 0:
                m = "argument " + str(argNum) + ": "
            else:
                m = ""
            raise TypeError, m + "expecting " + _formatSpec(spec)

# ----------------------------------------
# UTILITIES

def _listify (x):
    if type(x) is list:
        # must duplicate lists in certain cases to avoid shared storage
        return list(x)
    else:
        return [x]

def _plural (n):
    if n == 1:
        return ""
    else:
        return "s"

# ----------------------------------------
# ERROR HANDLING

_printTracebacks = False

def fatal (message):
    _typecheck(1, message, _str)
    sys.stderr.write(sys.argv[0] + ": FATAL ERROR: " + message + "\n")
    if _printTracebacks: traceback.print_stack()
    sys.exit(1)

def warning (message):
    _typecheck(1, message, _str)
    sys.stderr.write(sys.argv[0] + ": WARNING: " + message + "\n")
    if _printTracebacks: traceback.print_stack()

def _xmlException (activity):
    fatal("exception raised while " + activity + ": " +\
        str(sys.exc_info()[0]) + ": " + str(sys.exc_info()[1]))

def _usage ():
    sys.stderr.write(
        "usage: python mapping [-options] [input]\n" +\
        "\n" +\
        "mapping:\n" +\
        "\n" +\
        "    Python script/module of the form:\n" +\
        "\n" +\
        "        from ADL_mapper import *\n" +\
        "        input()\n" +\
        "        ...declarations...\n" +\
        "        output()\n" +\
        "\n" +\
        "options:\n" +\
        "\n" +\
        "    -t\n" +\
        "        Print tracebacks on errors and warnings.\n" +\
        "\n" +\
        "    -Dparam=value\n" +\
        "        Define a parameter.  Parameters can be accessed from the\n" +\
        "        mapping using the getParam() function.\n" +\
        "\n" +\
        "input:\n" +\
        "\n" +\
        "    Filename or URL of the source metadata to be mapped; if not\n" +\
        "    specified, the source metadata is read from standard input.\n")
    sys.exit(1)

# ----------------------------------------
# BUCKET TYPES

# _bucketTypes ::= { name : (validator, encoder), ... }
#         name ::= _str, e.g., "temporal"
#    validator ::= func(bucket, field, value, strict) -> retval
#       bucket ::= _str, e.g., "adl:dates"
#        field ::= (fieldName, fieldUri) or None
#    fieldName ::= _str, e.g., "[DC] Title"
#     fieldUri ::= _str, e.g., "http://purl.org/dc/elements/1.1/title"
#        value ::= tuple
#       strict ::= bool
#       retval ::= (field, value) or None
#      encoder ::= func(document, field, value) -> element
#     document ::= DOM document
#      element ::= DOM element

_bucketTypes = {}

def bucketType (name, validator, encoder):
    _typecheck(1, name, _str)
    _typecheck(2, validator, types.FunctionType)
    _typecheck(3, encoder, types.FunctionType)
    _bucketTypes[name] = (validator, encoder)

# ----------------------------------------
# BUCKETS

# _buckets ::= { name : type, ... }
#     name ::= _str, e.g., "adl:dates"
#     type ::= _str, e.g., "temporal"

_buckets = {}

def bucket (name, type):
    _typecheck(1, name, _str)
    _typecheck(2, type, _str)
    if type not in _bucketTypes: fatal("unrecognized bucket type: " + type)
    _buckets[name] = type

# ----------------------------------------
# VOCABULARIES

#   _vocabularies ::= { name : ([bucket, ...], termAncestorMap), ... }
#            name ::= _str, e.g., "ADL Object Type Thesaurus"
#          bucket ::= _str, e.g., "adl:types"
# termAncestorMap ::= { term : [term, ...], ... }
#            term ::= _str, e.g., "maps"

_vocabularies = {}

def _walkGraph (vocabularyName, path, term, map):
    if type(term) is str or type(term) is unicode:
        t = term
    else:
        t = term[0]
    ancestors = map.get(t, [])
    for ancestor in path:
        if ancestor == t:
            fatal("cycle detected in vocabulary '" + vocabularyName +\
                "': term '" + t + "'")
        if ancestor not in ancestors: ancestors.append(ancestor)
    map[t] = ancestors
    if type(term) is tuple:
        path.append(t)
        for t in term[1]:
            _walkGraph(vocabularyName, path, t, map)
        del path[len(path)-1]

# This is the real tree type specification, but we can't use it
# because it will generate an infinitely recursive error message.
_treeTypespec = [_str, (tuple, _str, (list, ["x"], 1))]
_treeTypespec[1][2][1][0] = _treeTypespec

def vocabulary (name, buckets, terms):
    _typecheck(1, name, _str)
    _typecheck(2, buckets, [_str, (list, _str, 1)])
    _typecheck(3, terms, (list, [_str, (tuple, _str, (list, [_str, (tuple,
        _str, (list, [_str, (tuple, _str, (list, [_str, tuple], 1))], 1))],
        1))]))
    buckets = _listify(buckets)
    for bucket in buckets:
        if bucket not in _buckets: fatal("undeclared bucket: " + bucket)
        if _buckets[bucket] != "hierarchical":
            fatal("attempt to associate vocabulary with non-hierarchical " +\
                "bucket '" + bucket + "'")
    map = {}
    for term in terms:
        _walkGraph(name, [], term, map)
    _vocabularies[name] = (buckets, map)

def getVocabulary (name):
    _typecheck(1, name, _str)
    return _vocabularies.get(name, None)

# ----------------------------------------
# REQUIREMENTS & EXPECTATIONS

# _requirements ::= { bucket : cardinality, ... }
#        bucket ::= _str, e.g., "adl:dates"
#   cardinality ::= _str, e.g., "1+"

_requirements = {}

def requirement (bucket, cardinality):
    _typecheck(1, bucket, _str)
    _typecheck(2, cardinality, _str)
    if bucket not in _buckets: fatal("undeclared bucket: " + bucket)
    if cardinality not in ["1", "1?", "1+", "0+"]:
        fatal("bad cardinality: " + cardinality)
    _requirements[bucket] = cardinality

# _expectations ::= { bucket : cardinality, ... }
#        bucket ::= _str, e.g., "adl:dates"
#   cardinality ::= _str, e.g., "1+"

_expectations = {}

def expectation (bucket, cardinality):
    _typecheck(1, bucket, _str)
    _typecheck(2, cardinality, _str)
    if bucket not in _buckets: fatal("undeclared bucket: " + bucket)
    if cardinality not in ["1", "1?", "1+", "0+"]:
        fatal("bad cardinality: " + cardinality)
    _expectations[bucket] = cardinality

# ----------------------------------------
# PARAMETERS

# _params ::= { param : value, ... }
#   param ::= _str, e.g., "collection"
#   value ::= _str, e.g., "adl_catalog"

_params = {}

_params["collection"] = "collection"
_params["holding"] = "holding"

def getParam (param):
    _typecheck(1, param, _str)
    return _params.get(param, None)

def setParam (param, value):
    _typecheck(1, param, _str)
    _params[param] = value

# ----------------------------------------
# INPUT

_invocationLevel = 0
_source = None

def input ():
    global _invocationLevel, _printTracebacks, _source
    _invocationLevel += 1
    if _invocationLevel > 1: return
    for i in range(1, len(sys.argv)):
        arg = sys.argv[i]
        if arg.startswith("-"):
            if arg == "-t":
                _printTracebacks = True
            elif arg.startswith("-D"):
                l = arg[2:].split("=", 1)
                if len(l) != 2: _usage()
                _params[l[0]] = l[1]
            else:
                _usage()
        else:
            if i != len(sys.argv)-1: _usage()
            _source = arg
    try:
        if _source != None:
            _source = xml.dom.ext.reader.Sax2.Reader().fromStream(_source)
        else:
            _source = xml.dom.ext.reader.Sax2.Reader().fromStream(sys.stdin)
    except:
        _xmlException("reading/parsing the source metadata")

def getSource ():
    return _source

# ----------------------------------------
# NAMESPACES

_namespaces = {}

def namespace (prefix, uri):
    _typecheck(1, prefix, _str)
    _typecheck(2, uri, _str)
    _namespaces[prefix] = uri

# ----------------------------------------
# QUERIES

_attributeSelector = re.compile("(.*)@([\w.:-]+)$")

def _rewriteQuery (query):
    newQuery = []
    context = None
    for i in range(len(query)):
        expr = query[i]
        if expr == "" or expr.startswith("="):
            newQuery.append(expr)
        elif expr.startswith("/"):
            j = i + 1
            while j < len(query) and query[j].startswith("="): j += 1
            if j == len(query) or query[j].startswith("/"):
                # This expression has no relative expressions.
                match = _attributeSelector.match(expr)
                if match:
                    newQuery.append(match.group(1))
                    newQuery.append(".@" + match.group(2))
                else:
                    newQuery.append(expr)
                    newQuery.append(".")
            else:
                newQuery.append(expr)
            context = expr
        else:
            if context == None:
                fatal("relative XPath expression has no absolute " +\
                    "contextual expression: " + expr)
            newQuery.append(expr)
    return newQuery

def _getValue (contextExpr, i, n, expr, j, m, attribute, node):
    if attribute == None:
        children = node.childNodes
        if len(children) == 0:
            return None
        elif len(children) == 1:
            node = children[0]
            if node.nodeType != xml.dom.Node.TEXT_NODE:
                fatal("error evaluating XPath expression '" + expr +\
                    "' relative to contextual expression '" +\
                    contextExpr + "', while processing context " +\
                    "node " + str(i+1) + " of " + str(n) +\
                    ", relative node " + str(j+1) + " of " + str(m) +\
                    ": node produced by relative expression does " +\
                    "not contain text only")
            v = node.nodeValue.strip()
            if v == "":
                return None
            else:
                return v
        else:
            fatal("error evaluating XPath expression '" + expr +\
                "' relative to contextual expression '" +\
                contextExpr + "', while processing context node " +\
                str(i+1) + " of " + str(n) + ", relative node " + str(j+1) +\
                " of " + str(m) +\
                ": node produced by relative expression has more " +\
                "than one child node")
    else:
        a = node.getAttribute(attribute).strip()
        if a == "":
            return None
        else:
            return a

_joinSelector = re.compile(" *join *\( *(.*?) *, *('[^']*'|\"[^\"]*\") *\) *$")

def _getRelative (contextExpr, context, expr):
    match = _joinSelector.match(expr)
    if match:
        expr = match.group(1)
        joinString = match.group(2)[1:-1]
    else:
        joinString = None
    match = _attributeSelector.match(expr)
    if match:
        expr = match.group(1)
        attribute = match.group(2)
    else:
        attribute = None
    values = []
    for i in range(len(context)):
        try:
            nodes = xml.xpath.Evaluate(expr,
                context=xml.xpath.Context.Context(context[i],
                    processorNss=_namespaces))
        except:
            _xmlException("evaluating XPath expression '" + expr +\
                "' relative to contextual expression '" + contextExpr +\
                "', while processing context node " + str(i+1) + " of " +\
                str(len(context)))
        if len(nodes) == 0:
            values.append(None)
        elif len(nodes) == 1 or joinString != None:
            vals = []
            for j in range(len(nodes)):
                v = _getValue(contextExpr, i, len(context), expr,
                    j, len(nodes), attribute, nodes[j])
                if v != None: vals.append(v)
            if len(vals) > 0:
                if joinString == None:
                    values.append(vals[0])
                else:
                    values.append(joinString.join(vals))
            else:
                values.append(None)
        else:
            fatal("error evaluating XPath expression '" + expr +\
                "' relative to contextual expression '" + contextExpr +\
                "', while processing context node " + str(i+1) + " of " +\
                str(len(context)) + ": relative expression produced more " +\
                "than one DOM node")
    return values

# In the following:
#
# results ::= [column, ...]
#  column ::= [value, ...]
#   value ::= _str
#
#  values ::= [row, ...]
#     row ::= (value, ...)

def get (query):
    _typecheck(1, query, [_str, (list, _str, 1)])
    query = _rewriteQuery(_listify(query))
    results = []
    length = -1
    isConstant = []
    for i, expr in enumerate(query):
        if expr == "":
            results.append([None])
            isConstant.append(True)
        elif expr.startswith("="):
            s = expr[1:].strip()
            if s == "":
                results.append([None])
            else:
                results.append([s])
            isConstant.append(True)
        elif expr.startswith("/"):
            try:
                context = xml.xpath.Evaluate(expr,
                    context=xml.xpath.Context.Context(_source,
                        processorNss=_namespaces))
            except:
                _xmlException("evaluating XPath expression '" + expr + "'")
            if length >= 0 and len(context) != length:
                fatal("incomensurable column lengths in query: " +\
                    "expression '" + contextExpr + "' produced " +\
                    str(length) + " value" + _plural(length) + " while " +\
                    "expression '" + expr +"' produced " +\
                    str(len(context)) + " value" + _plural(len(context)))
            contextExpr = expr
            length = len(context)
        else:
            results.append(_getRelative(contextExpr, context, expr))
            isConstant.append(False)
    values = []
    if length < 0: length = 1
    for i in range(length):
        # If there are any non-constant columns in the query, each row
        # must have at least one non-null value from a non-constant
        # column.
        hasNonNullColumn = False
        for c in range(len(results)):
            if len(results[c]) == 1:
                v = results[c][0]
            else:
                v = results[c][i]
            if v != None and not isConstant[c]:
                hasNonNullColumn = True
                break
        if hasNonNullColumn or False not in isConstant:
            row = []
            for column in results:
                if len(column) == 1:
                    row.append(column[0])
                else:
                    row.append(column[i])
            values.append(tuple(row))
    return values

# ----------------------------------------
# MAPPING DECLARATIONS

#   _mapdecls ::= [decl, ...]
#        decl ::= (bucket, query, field, prefilters, converters, postfilters,
#                 strict, id)
#      bucket ::= _str, e.g., "adl:dates"
#       query ::= [expr, ...]
#        expr ::= _str, e.g., "/metadata/title"
#       field ::= (fieldName, fieldUri) or None
#   fieldName ::= _str, e.g., "[DC] Title"
#    fieldUri ::= _str, e.g., "http://purl.org/dc/elements/1.1/title"
#  prefilters ::= [filter, ...]
#  converters ::= [converter, ...]
# postfilters ::= [filter, ...]
#      filter ::= func(value) -> retval
#   converter ::= func(value) -> retval
#       value ::= tuple
#      retval ::= value or None
#      strict ::= bool
#          id ::= int or None

_mapdecls = []

def map (bucket, query, field=None, prefilters=[], converters=[],
    postfilters=[], strict=True, id=None):
    _typecheck(1, bucket, _str)
    _typecheck(2, query, [_str, (list, _str, 1)])
    _typecheck(3, field, [(tuple, _str, _str), types.NoneType])
    _typecheck(4, prefilters, [types.FunctionType, (list, types.FunctionType)])
    _typecheck(5, converters, [types.FunctionType, (list, types.FunctionType)])
    _typecheck(6, postfilters,
        [types.FunctionType, (list, types.FunctionType)])
    _typecheck(7, strict, bool)
    _typecheck(8, id, [int, types.NoneType])
    if bucket not in _buckets: fatal("undeclared bucket: " + bucket)
    if id != None:
        for d in _mapdecls:
            if d[0] == bucket and d[7] == id:
                fatal("duplicate mapping ID for bucket '" + bucket + "': " +\
                    str(id))
    _mapdecls.append((bucket, _listify(query), field, _listify(prefilters),
        _listify(converters), _listify(postfilters), strict, id))

def _addFilterOrConverter (bucket, filter, field, id, index, append):
    _typecheck(1, bucket, _str)
    _typecheck(2, filter, types.FunctionType)
    _typecheck(3, field, [(tuple, _str, _str), types.NoneType])
    _typecheck(4, id, [int, types.NoneType])
    if bucket not in _buckets: fatal("undeclared bucket: " + bucket)
    for d in _mapdecls:
        if d[0] == bucket and (field == None or field == d[2]) and\
            (id == None or id == d[7]):
            if append:
                d[index].append(filter)
            else:
                d[index].insert(0, filter)

def prependPrefilter (bucket, filter, field=None, id=None):
    _addFilterOrConverter(bucket, filter, field, id, 3, False)

def appendPrefilter (bucket, filter, field=None, id=None):
    _addFilterOrConverter(bucket, filter, field, id, 3, True)

def prependConverter (bucket, converter, field=None, id=None):
    _addFilterOrConverter(bucket, converter, field, id, 4, False)

def appendConverter (bucket, converter, field=None, id=None):
    _addFilterOrConverter(bucket, converter, field, id, 4, True)

def prependPostfilter (bucket, filter, field=None, id=None):
    _addFilterOrConverter(bucket, filter, field, id, 5, False)

def appendPostfilter (bucket, filter, field=None, id=None):
    _addFilterOrConverter(bucket, filter, field, id, 5, True)

def strict (bucket, newStrict, field=None, id=None):
    _typecheck(1, bucket, _str)
    _typecheck(2, newStrict, bool)
    _typecheck(3, field, [(tuple, _str, _str), types.NoneType])
    _typecheck(4, id, [int, types.NoneType])
    if bucket not in _buckets: fatal("undeclared bucket: " + bucket)
    for i in range(len(_mapdecls)):
        d = _mapdecls[i]
        if d[0] == bucket and (field == None or field == d[2]) and\
            (id == None or id == d[7]):
            _mapdecls[i] = (d[0], d[1], d[2], d[3], d[4], d[5], newStrict,
                d[7])

def unmap (bucket, field=None, id=None):
    _typecheck(1, bucket, _str)
    _typecheck(2, field, [(tuple, _str, _str), types.NoneType])
    _typecheck(3, id, [int, types.NoneType])
    if bucket not in _buckets: fatal("undeclared bucket: " + bucket)
    i = 0
    while i < len(_mapdecls):
        d = _mapdecls[i]
        if d[0] == bucket and (field == None or field == d[2]) and\
            (id == None or id == d[7]):
            del _mapdecls[i]
        else:
            i += 1

# ----------------------------------------
# PROCESSING & VALIDATION

# _mappings ::= { bucket : [mapping, ...], ... }
#    bucket ::= _str, e.g., "adl:dates"
#   mapping ::= (field, value)
#     field ::= (fieldName, fieldUri) or None
# fieldName ::= _str, e.g., "[DC] Title"
#  fieldUri ::= _str, e.g., "http://purl.org/dc/elements/1.1/title"
#     value ::= tuple

_mappings = {}

def _process1 (bucket, query, field, prefilters, converters, postfilters,
    strict):
    validator = _bucketTypes[_buckets[bucket]][0]
    if bucket in _mappings:
        mappings = _mappings[bucket]
    else:
        mappings = []
    for value in get(query):
        for filter in prefilters:
            value = filter(value)
            _typecheck(0, value, [tuple, types.NoneType])
            if value == None: break
        if value == None: continue
        for converter in converters:
            v = converter(value)
            _typecheck(0, v, [tuple, types.NoneType])
            if v != None:
                value = v
                break
        for filter in postfilters:
            value = filter(value)
            _typecheck(0, value, [tuple, types.NoneType])
            if value == None: break
        if value == None: continue
        mapping = validator(bucket, field, value, strict)
        _typecheck(0, mapping, [(tuple, [(tuple, _str, _str),
            types.NoneType], tuple), types.NoneType])
        if mapping != None: mappings.append(mapping)
    _mappings[bucket] = mappings

def _process ():
    for d in _mapdecls:
        _process1(d[0], d[1], d[2], d[3], d[4], d[5], d[6])

# ----------------------------------------
# ADDITIONAL CHECKS

def _checkRequirements ():
    for bucket, cardinality in _requirements.items():
        n = len(_mappings.get(bucket, []))
        if (cardinality == "1" and n != 1) or\
            (cardinality == "1?" and n > 1) or\
            (cardinality == "1+" and n == 0):
            fatal("mapping requirement not satisfied: required cardinality " +\
                "for bucket '%s' is '%s', got %s mappings" %\
                (bucket, cardinality, n))

def _checkExpectations ():
    for bucket, cardinality in _expectations.items():
        n = len(_mappings.get(bucket, []))
        if (cardinality == "1" and n != 1) or\
            (cardinality == "1?" and n > 1) or\
            (cardinality == "1+" and n == 0):
            warning("mapping expectation not met: expected cardinality " +\
                "for bucket '%s' is '%s', got %s mappings" %\
                (bucket, cardinality, n))

# ----------------------------------------
# POST-PROCESSING

# _consolidatedTextualBuckets ::= { bucket : separator, ... }
#                      bucket ::= _str, e.g., "adl:assigned-terms"
#                   separator ::= _str, e.g., "; "

_consolidatedTextualBuckets = {}

def consolidateTextualValues (buckets, separator="; "):
    _typecheck(1, buckets, [_str, (list, _str, 1)])
    _typecheck(2, separator, [_str, types.NoneType])
    buckets = _listify(buckets)
    for bucket in buckets:
        if bucket not in _buckets: fatal("undeclared bucket: " + bucket)
        if _buckets[bucket] != "textual":
            fatal("attempt to consolidate non-textual bucket '" + bucket + "'")
        if separator != None:
            _consolidatedTextualBuckets[bucket] = separator
        else:
            if bucket in _consolidatedTextualBuckets:
                del _consolidatedTextualBuckets[bucket]

def _consolidateTextualMappings ():
   for bucket in _mappings:
       if bucket in _consolidatedTextualBuckets:
           separator = _consolidatedTextualBuckets[bucket]
           d = {}
           for field, value in _mappings[bucket]:
               if field in d:
                   d[field] = d[field] + separator + value[0]
               else:
                   d[field] = value[0]
           mappings = []
           for field, value in d.items():
               mappings.append((field, (value,)))
           _mappings[bucket] = mappings

def _consolidateHierarchicalMappings1 (mappings):
    # We need to use a two-pass algorithm so that ancestor mappings
    # will be weeded out regardless of the order of the mappings.
    seen = {}
    for (field, value) in mappings:
        if value not in seen: seen[value] = "primary"
        ancestors = _vocabularies[value[0]][1][value[1]]
        for ancestor in ancestors:
            seen[(value[0], ancestor)] = "implied"
    i = 0
    while i < len(mappings):
        (field, value) = mappings[i]
        if seen[value] == "primary":
            seen[value] = "seen"
            i += 1
        else:
            del mappings[i]

def _consolidateHierarchicalMappings ():
    for bucket, mappings in _mappings.items():
        if _buckets[bucket] == "hierarchical":
            _consolidateHierarchicalMappings1(mappings)

# ----------------------------------------
# OUTPUT

_dtd = "http://www.alexandria.ucsb.edu/middleware/dtds/ADL-bucket-report.dtd"

def output ():
    global _invocationLevel
    _invocationLevel -= 1
    if _invocationLevel > 0: return
    _process()
    _checkRequirements()
    _checkExpectations()
    _consolidateTextualMappings()
    _consolidateHierarchicalMappings()
    imp = xml.dom.DOMImplementation.getDOMImplementation()
    document = imp.createDocument(None, "ADL-bucket-report",
        imp.createDocumentType("ADL-bucket-report", None, _dtd))
    e = document.createElement("identifier")
    e.appendChild(document.createTextNode(_params["collection"] + ":" +\
        _params["holding"]))
    document.documentElement.appendChild(e)
    for bucket in _mappings:
        if len(_mappings[bucket]) > 0:
            e = document.createElement("bucket")
            e.setAttribute("name", bucket)
            for field, value in _mappings[bucket]:
                encoder = _bucketTypes[_buckets[bucket]][1]
                e.appendChild(encoder(document, field, value))
            document.documentElement.appendChild(e)
    xml.dom.ext.PrettyPrint(document)

# ----------------------------------------
# SYNTACTIC SUGAR

def present (query):
    _typecheck(1, query, [_str, (list, _str, 1)])
    return len(get(query)) > 0

def mapConstant (bucket, value, field=None, id=None):
    _typecheck(1, bucket, _str)
    _typecheck(2, value, [_str, tuple])
    _typecheck(3, field, [(tuple, _str, _str), types.NoneType])
    _typecheck(4, id, [int, types.NoneType])
    if type(value) is not tuple: value = (value,)
    map(bucket, ["="+v for v in value], field, id=id)

# The following import must appear last to allow the bucket type
# modules to see the definitions in this module.

import bucket_types
