001/*
002 * Copyright (c) 2018-2020 Tada AB and other contributors, as listed below.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the The BSD 3-Clause License
006 * which accompanies this distribution, and is available at
007 * http://opensource.org/licenses/BSD-3-Clause
008 *
009 * Contributors:
010 *   Chapman Flack
011 */
012package org.postgresql.pljava.example.saxon;
013
014import java.math.BigDecimal;
015import java.math.BigInteger;
016
017import java.sql.Connection;
018import java.sql.DriverManager;
019import java.sql.ResultSet;
020import java.sql.ResultSetMetaData;
021import static java.sql.ResultSetMetaData.columnNoNulls;
022import java.sql.SQLXML;
023import java.sql.Statement;
024import java.sql.Types;
025
026import java.sql.SQLException;
027import java.sql.SQLDataException;
028import java.sql.SQLFeatureNotSupportedException;
029import java.sql.SQLNonTransientException;
030import java.sql.SQLSyntaxErrorException;
031
032import java.time.LocalDate;
033import java.time.LocalTime;
034import java.time.OffsetTime;
035import java.time.LocalDateTime;
036import java.time.OffsetDateTime;
037import static java.time.ZoneOffset.UTC;
038
039import static java.util.Arrays.asList;
040import static java.util.Arrays.fill;
041import java.util.Collection;
042import java.util.Collections;
043import java.util.HashMap;
044import java.util.Iterator;
045import java.util.List;
046import java.util.Map;
047import java.util.Properties;
048
049import java.util.regex.Matcher;
050import java.util.regex.Pattern;
051
052import javax.xml.transform.Source;
053import javax.xml.transform.Result;
054
055import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
056import static javax.xml.XMLConstants.XML_NS_URI;
057import static javax.xml.XMLConstants.XML_NS_PREFIX;
058import static javax.xml.XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
059import static javax.xml.XMLConstants.XMLNS_ATTRIBUTE;
060
061import net.sf.saxon.event.Receiver;
062
063import net.sf.saxon.lib.ConversionRules;
064import net.sf.saxon.lib.NamespaceConstant;
065
066import static net.sf.saxon.om.NameChecker.isValidNCName;
067
068import net.sf.saxon.query.StaticQueryContext;
069
070import net.sf.saxon.regex.RegexIterator;
071import net.sf.saxon.regex.RegularExpression;
072
073import net.sf.saxon.s9api.Destination;
074import net.sf.saxon.s9api.DocumentBuilder;
075import net.sf.saxon.s9api.ItemType;
076import net.sf.saxon.s9api.ItemTypeFactory;
077import net.sf.saxon.s9api.OccurrenceIndicator;
078import net.sf.saxon.s9api.Processor;
079import net.sf.saxon.s9api.QName;
080import net.sf.saxon.s9api.SAXDestination;
081import net.sf.saxon.s9api.SequenceType;
082import static net.sf.saxon.s9api.SequenceType.makeSequenceType;
083import net.sf.saxon.s9api.XdmAtomicValue;
084import static net.sf.saxon.s9api.XdmAtomicValue.makeAtomicValue;
085import net.sf.saxon.s9api.XdmEmptySequence;
086import net.sf.saxon.s9api.XdmItem;
087import net.sf.saxon.s9api.XdmNode;
088import static net.sf.saxon.s9api.XdmNodeKind.DOCUMENT;
089import net.sf.saxon.s9api.XdmValue;
090import net.sf.saxon.s9api.XdmSequenceIterator;
091import net.sf.saxon.s9api.XQueryCompiler;
092import net.sf.saxon.s9api.XQueryEvaluator;
093import net.sf.saxon.s9api.XQueryExecutable;
094
095import net.sf.saxon.s9api.SaxonApiException;
096
097import net.sf.saxon.trans.XPathException;
098
099import net.sf.saxon.serialize.SerializationProperties;
100
101import net.sf.saxon.type.AtomicType;
102import net.sf.saxon.type.Converter;
103
104import net.sf.saxon.value.AtomicValue;
105import net.sf.saxon.value.Base64BinaryValue;
106import net.sf.saxon.value.CalendarValue;
107import net.sf.saxon.value.HexBinaryValue;
108import net.sf.saxon.value.StringValue;
109import static net.sf.saxon.value.StringValue.getStringLength;
110
111import org.postgresql.pljava.ResultSetProvider;
112
113import org.postgresql.pljava.annotation.Function;
114import org.postgresql.pljava.annotation.SQLType;
115import static org.postgresql.pljava.annotation.Function.OnNullInput.CALLED;
116
117/* For the xmltext function, which only needs plain SAX and not Saxon */
118
119import javax.xml.transform.sax.SAXResult;
120import org.xml.sax.ContentHandler;
121import org.xml.sax.SAXException;
122
123/**
124 * Class illustrating use of XQuery with Saxon as the
125 * implementation, using its native "s9api".
126 *<p>
127 * Supplies alternative, XML Query-based (as the SQL/XML standard dictates)
128 * implementation of some of SQL/XML, where the implementation in core
129 * PostgreSQL is limited to the capabilities of XPath (and XPath 1.0, at that).
130 *<p>
131 * Without the syntatic sugar built into the core PostgreSQL parser, calls to
132 * a function in this class can look a bit more verbose in SQL, but reflect a
133 * straightforward rewriting from the standard syntax. For example, suppose
134 * there is a table {@code catalog_as_xml} with a single row whose {@code x}
135 * column is a (respectably sized) XML document recording the stuff in
136 * {@code pg_catalog}. It could be created like this:
137 *<pre>
138 * CREATE TABLE catalog_as_xml(x) AS
139 *   SELECT schema_to_xml('pg_catalog', false, true, '');
140 *</pre>
141 *<h2>Functions/predicates from ISO 9075-14 SQL/XML</h2>
142 *<h3>XMLQUERY</h3>
143 *<p>
144 * In the syntax of the SQL/XML standard, here is a query that would return
145 * an XML element representing the declaration of a function with a specified
146 * name:
147 *<pre>
148 * SELECT XMLQUERY('/pg_catalog/pg_proc[proname eq $FUNCNAME]'
149 *                 PASSING BY VALUE x, 'numeric_avg' AS FUNCNAME
150 *                 RETURNING CONTENT EMPTY ON EMPTY)
151 * FROM catalog_as_xml;
152 *</pre>
153 *<p>
154 * It binds the 'context item' of the query to {@code x}, and the {@code NAME}
155 * parameter to the given value, then evaluates the query and returns XML
156 * "CONTENT" (a tree structure with a document node at the root, but not
157 * necessarily meeting all the requirements of an XML "DOCUMENT"). It can be
158 * rewritten as this call to the {@link #xq_ret_content xq_ret_content} method:
159 *<pre>
160 * SELECT javatest.xq_ret_content('/pg_catalog/pg_proc[proname eq $FUNCNAME]',
161 *                                PASSING =&gt; p, nullOnEmpty =&gt; false)
162 * FROM catalog_as_xml,
163 * LATERAL (SELECT x AS ".", 'numeric_avg' AS "FUNCNAME") AS p;
164 *</pre>
165 *<p>
166 * In the rewritten form, the form of result wanted ({@code RETURNING CONTENT})
167 * is implicit in the called function name ({@code xq_ret_content}), and the
168 * parameters to pass to the query are moved out to a separate {@code SELECT}
169 * that supplies their values, types, and names (with the context item now given
170 * the name ".") and is passed by its alias into the query function.
171 *<p>
172 * Because of an unconditional uppercasing that PL/Java's JDBC driver currently
173 * applies to column names, any parameter names, such as {@code FUNCNAME} above,
174 * must be spelled in uppercase where used in the XQuery text, or they will not
175 * be recognized. Because the unconditional uppercasing is highly likely to be
176 * dropped in a future PL/Java release, it is wisest until then to use only
177 * parameter names that really are uppercase, both in the XQuery text where they
178 * are used and in the SQL expression that supplies them. In PostgreSQL,
179 * identifiers that are not quoted are <em>lower</em>cased, so they must be both
180 * uppercase and quoted, in the SQL syntax, to be truly uppercase.
181 *<p>
182 * In the standard, parameters and results (of XML types) can be passed
183 * {@code BY VALUE} or {@code BY REF}, where the latter means that the same
184 * nodes will retain their XQuery node identities over calls (note that this is
185 * a meaning unrelated to what "by value" and "by reference" usually mean in
186 * PostgreSQL's documentation). PostgreSQL's implementation of the XML type
187 * provides no way for {@code BY REF} semantics to be implemented, so everything
188 * happening here happens {@code BY VALUE} implicitly, and does not need to be
189 * specified.
190 *<h3>XMLEXISTS</h3>
191 *<p>
192 * The function {@link #xmlexists xmlexists} here implements the
193 * standard function of the same name. Because it is the same name, it has to
194 * be either schema-qualified or double-quoted in a call to avoid confusion
195 * with the reserved word. In the syntax of the SQL/XML standard, here is a
196 * query returning a boolean value indicating whether a function with the
197 * specified name is declared:
198 *<pre>
199 * SELECT XMLEXISTS('/pg_catalog/pg_proc[proname eq $FUNCNAME]'
200 *                  PASSING BY VALUE x, 'numeric_avg' AS FUNCNAME)
201 * FROM catalog_as_xml;
202 *</pre>
203 *<p>
204 * It can be rewritten as this call to the {@link #xmlexists xmlexists} method:
205 *<pre>
206 * SELECT "xmlexists"('/pg_catalog/pg_proc[proname eq $FUNCNAME]',
207 *                    PASSING =&gt; p)
208 * FROM catalog_as_xml,
209 * LATERAL (SELECT x AS ".", 'numeric_avg' AS "FUNCNAME") AS p;
210 *</pre>
211 *<h3>XMLTABLE</h3>
212 *<p>
213 * The function {@link #xmltable xmltable} here implements (much of) the
214 * standard function of the same name. Because it is the same name, it has to
215 * be either schema-qualified or double-quoted in a call to avoid confusion
216 * with the reserved word. A rewritten form of the <a href=
217'https://www.postgresql.org/docs/10/static/functions-xml.html#FUNCTIONS-XML-PROCESSING-XMLTABLE'
218>first example in the PostgreSQL manual</a> could be:
219 *<pre>
220 * SELECT xmltable.*
221 * FROM
222 *  xmldata,
223 *
224 *  LATERAL (SELECT data AS ".", 'not specified'::text AS "DPREMIER") AS p,
225 *
226 *  "xmltable"('//ROWS/ROW', PASSING =&gt; p, COLUMNS =&gt; ARRAY[
227 *   'data(@id)', null, 'COUNTRY_NAME',
228 *   'COUNTRY_ID', 'SIZE[@unit eq "sq_km"]',
229 *   'concat(SIZE[@unit ne "sq_km"], " ", SIZE[@unit ne "sq_km"]/@unit)',
230 *   'let $e := PREMIER_NAME
231 *    return if ( empty($e) )then $DPREMIER else $e'
232 *  ]) AS (
233 *   id int, ordinality int8, "COUNTRY_NAME" text, country_id text,
234 *   size_sq_km float, size_other text, premier_name text
235 *  );
236 *</pre>
237 *<p>
238 * In the first column expression, without the {@code data()} function, the
239 * result would be a bare attribute node (one not enclosed in an XML element).
240 * Many implementations will accept a bare attribute as a column expression
241 * result, and simply assume the attribute's value is wanted, but it appears
242 * that a strict implementation of the spec must raise {@code err:XPTY0004} in
243 * such a case. This implementation is meant to be strict, so the attribute is
244 * wrapped in {@code data()} to extract and return its value. (See
245 * "About bare attribute nodes" in {@link #assignRowValues assignRowValues}
246 * for more explanation.)
247 *<p>
248 * The {@code DPREMIER} parameter passed from SQL to the XQuery expression is
249 * spelled in uppercase (and also, in the SQL expression supplying it, quoted),
250 * for the reasons explained above for the {@code xq_ret_content} function.
251 *<h3>XMLCAST</h3>
252 *<p>
253 * An ISO standard cast expression like
254 *<pre>
255 * XMLCAST(v AS wantedtype)
256 *</pre>
257 * can be rewritten with this idiom and the {@link #xmlcast xmlcast} function
258 * provided here:
259 *<pre>
260 * (SELECT r FROM (SELECT v) AS o, xmlcast(o) AS (r wantedtype))
261 *</pre>
262 *<h2>XQuery regular-expression functions in ISO 9075-2 Foundations</h2>
263 * The methods {@link #like_regex like_regex},
264 * {@link #occurrences_regex occurrences_regex},
265 * {@link #position_regex position_regex},
266 * {@link #substring_regex substring_regex}, and
267 * {@link #translate_regex translate_regex} provide, with slightly altered
268 * syntax, the ISO SQL predicate and functions of the same names.
269 *<p>
270 * For the moment, they will only match newlines in the way W3C XQuery
271 * specifies, not in the more-flexible Unicode-compatible way ISO SQL specifies,
272 * and for the ones where ISO SQL allows {@code USING CHARACTERS} or
273 * {@code USING OCTETS}, only {@code USING CHARACTERS} will work.
274 *<h2>Extensions</h2>
275 *<h3>XQuery module prolog allowed</h3>
276 *<p>
277 * Where any function here accepts an XQuery
278 *<a href='https://www.w3.org/TR/xquery-31/#id-expressions'
279 *>"expression"</a> according to the SQL specification, in fact an XQuery
280 *<a href='https://www.w3.org/TR/xquery-31/#dt-main-module'
281 *>"main module"</a> will be accepted. Therefore, the query can be preceded by
282 * a prolog declaring namespaces, options, local variables and functions, etc.
283 *<h3>Saxon extension to XQuery regular expressions</h3>
284 *<p>
285 * Saxon's implementation of XQuery regular expressions will accept a
286 * nonstandard <em>flag</em> string ending with {@code ;j} to use Java regular
287 * expressions rather than XQuery ones. That extension is available in the
288 * XQuery regular-expression methods provided here.
289 * @author Chapman Flack
290 */
291public class S9 implements ResultSetProvider.Large
292{
293    private S9(
294        XdmSequenceIterator<XdmItem> xsi,
295        XQueryEvaluator[] columnXQEs,
296        SequenceType[] columnStaticTypes,
297        XMLBinary enc)
298    {
299        m_sequenceIterator = xsi;
300        m_columnXQEs = columnXQEs;
301        m_columnStaticTypes = columnStaticTypes;
302        m_atomize = new AtomizingFunction [ columnStaticTypes.length ];
303        m_xmlbinary = enc;
304    }
305
306    final XdmSequenceIterator<XdmItem> m_sequenceIterator;
307    final XQueryEvaluator[] m_columnXQEs;
308    final SequenceType[] m_columnStaticTypes;
309    final SequenceType s_01untypedAtomic = makeSequenceType(
310        ItemType.UNTYPED_ATOMIC, OccurrenceIndicator.ZERO_OR_ONE);
311    final AtomizingFunction[] m_atomize;
312    final XMLBinary m_xmlbinary;
313    Binding.Assemblage m_outBindings;
314
315    static final Connection s_dbc;
316    static final Processor s_s9p = new Processor(false);
317    static final ItemTypeFactory s_itf = new ItemTypeFactory(s_s9p);
318
319    static final Pattern s_intervalSigns;
320    static final Pattern s_intervalSignSite;
321
322    enum XMLBinary { HEX, BASE64 };
323    enum Nulls { ABSENT, NIL };
324
325    static
326    {
327        try
328        {
329            s_dbc = DriverManager.getConnection("jdbc:default:connection");
330
331            /*
332             * XML Schema thinks an ISO 8601 duration must have no sign
333             * anywhere but at the very beginning before the P. PostgreSQL
334             * thinks that's the one place a sign must never be, and instead
335             * it should appear in front of every numeric field. (PostgreSQL
336             * accepts input where the signs vary, and there are cases where it
337             * cannot be normalized away: P1M-1D is a thing, and can't be
338             * simplified until anchored at a date to know how long the month
339             * is! The XML Schema type simply can't represent that, so mapping
340             * of such a value must simply fail, as we'll ensure below.)
341             * So, here's a regex with a capturing group for a leading -, and
342             * one for any field-leading -, and one for the absence of a field-
343             * leading -. Any PostgreSQL or XS duration ought to match overall,
344             * but the capturing group matches should be either (f,f,t) or
345             * (f,t,f) for a PostgreSQL duration, or either (f,f,t) or (t,f,t)
346             * for an XS duration. (f,t,t) would be a PostgreSQL interval with
347             * mixed signs, and inconvertible.
348             */
349            s_intervalSigns = Pattern.compile(
350            "(-)?+(?:[PYMWDTH](?:(?:(-)|())\\d++)?+)++(?:(?:[.,]\\d*+)?+S)?+");
351            /*
352             * To convert from the leading-sign form, need to find every spot
353             * where a digit follows a [PYMWDTH] to insert a - there.
354             */
355            s_intervalSignSite = Pattern.compile("(?<=[PYMWDTH])(?=\\d)");
356        }
357        catch ( SQLException e )
358        {
359            throw new ExceptionInInitializerError(e);
360        }
361    }
362
363    static class PredefinedQueryHolders
364    {
365        static final XQueryCompiler s_xqc = s_s9p.newXQueryCompiler();
366        static final QName s_qEXPR = new QName("EXPR");
367
368        static class DocumentWrapUnwrap
369        {
370            static final XQueryExecutable INSTANCE;
371
372            static
373            {
374                try
375                {
376                    INSTANCE = s_xqc.compile(
377                        "declare construction preserve;" +
378                        "declare variable $EXPR as item()* external;" +
379                        "data(document{$EXPR}/child::node())");
380                }
381                catch ( SaxonApiException e )
382                {
383                    throw new ExceptionInInitializerError(e);
384                }
385            }
386        }
387    }
388
389    /**
390     * PostgreSQL (as of 12) lacks the XMLTEXT function, so here it is.
391     *<p>
392     * As long as PostgreSQL does not have the {@code XML(SEQUENCE)} type,
393     * this can only be the {@code XMLTEXT(sve RETURNING CONTENT)} flavor, which
394     * does create a text node with {@code sve} as its value, but returns the
395     * text node wrapped in a document node.
396     *<p>
397     * This function doesn't actually require Saxon, but otherwise fits in with
398     * the theme here, implementing missing parts of SQL/XML for PostgreSQL.
399     * @param sve SQL string value to use in a text node
400     * @return XML content, the text node wrapped in a document node
401     */
402    @Function(schema="javatest")
403    public static SQLXML xmltext(String sve) throws SQLException
404    {
405        SQLXML rx = s_dbc.createSQLXML();
406        ContentHandler ch = rx.setResult(SAXResult.class).getHandler();
407
408        try
409        {
410            ch.startDocument();
411            /*
412             * It seems XMLTEXT() should be such a trivial function to write,
413             * but already it reveals a subtlety in the SAX API docs. They say
414             * the third argument to characters() is "the number of characters
415             * to read from the array" and that follows a long discussion of how
416             * individual characters can (with code points above U+FFFF) consist
417             * of more than one Java char value.
418             *
419             * And yet, when you try it out (and include some characters above
420             * U+FFFF in the input), you discover the third argument isn't the
421             * number of characters, has to be the number of Java char values.
422             */
423            ch.characters(sve.toCharArray(), 0, sve.length());
424            ch.endDocument();
425        }
426        catch ( SAXException e )
427        {
428            rx.free();
429            throw new SQLException(e.getMessage(), e);
430        }
431
432        return rx;
433    }
434
435    /**
436     * An implementation of XMLCAST.
437     *<p>
438     * Will be declared to take and return type {@code RECORD}, where each must
439     * have exactly one component, just because that makes it easy to use
440     * existing JDBC metadata queries to find out the operand and target SQL
441     * data types.
442     *<p>
443     * Serving suggestion: rewrite this ISO standard expression
444     *<pre>
445     * XMLCAST(v AS wantedtype)
446     *</pre>
447     * to this idiomatic one:
448     *<pre>
449     * (SELECT r FROM (SELECT v) AS o, xmlcast(o) AS (r wantedtype))
450     *</pre>
451     * @param operand a one-row, one-column record supplied by the caller, whose
452     * one typed value is the operand to be cast.
453     * @param base64 true if binary SQL values should be base64-encoded in XML;
454     * if false (the default), values will be encoded in hex.
455     * @param target a one-row, one-column record supplied by PL/Java from the
456     * {@code AS} clause after the function call, whose one column's type is the
457     * type to be cast to.
458     */
459    @Function(
460        schema="javatest",
461        type="pg_catalog.record",
462        onNullInput=CALLED,
463        settings="IntervalStyle TO iso_8601"
464    )
465    public static boolean xmlcast(
466        ResultSet operand, @SQLType(defaultValue="false") Boolean base64,
467        ResultSet target)
468        throws SQLException
469    {
470        if ( null == operand )
471            throw new SQLDataException(
472                "xmlcast \"operand\" must be (in this implementation) " +
473                "a non-null row type", "22004");
474
475        if ( null == base64 )
476            throw new SQLDataException(
477                "xmlcast \"base64\" must be true or false, not null", "22004");
478        XMLBinary enc = base64 ? XMLBinary.BASE64 : XMLBinary.HEX;
479
480        assert null != target : "PL/Java supplied a null output record???";
481
482        if ( 1 != operand.getMetaData().getColumnCount() )
483            throw new SQLDataException(
484                "xmlcast \"operand\" must be a row type with exactly " +
485                "one component", "22000");
486
487        if ( 1 != target.getMetaData().getColumnCount() )
488            throw new SQLDataException(
489                "xmlcast \"target\" must be a row type with exactly " +
490                "one component", "22000");
491
492        Binding.Parameter op =
493            new BindingsFromResultSet(operand, false).iterator().next();
494
495        Binding.Parameter tg =
496            new BindingsFromResultSet(target, null).iterator().next();
497
498        int sd = op.typeJDBC();
499        int td = tg.typeJDBC();
500
501        int castcase =
502            (Types.SQLXML == sd ? 2 : 0) | (Types.SQLXML == td ? 1 : 0);
503
504        switch ( castcase )
505        {
506        case 0: // neither sd nor td is an XML type
507            throw new SQLSyntaxErrorException(
508                "at least one of xmlcast \"operand\" or \"target\" must " +
509                "be of XML type", "42804");
510        case 3: // both XML
511            /*
512             * In an implementation closely following the spec, this case would
513             * be handled in parse analysis and rewritten from an XMLCAST to a
514             * plain CAST, and this code would never see it. This is a plain
515             * example function without benefit of a parser that can do that.
516             * In a DBMS with all the various SQL:2006 XML subtypes, there would
517             * be nontrivial work to do here, but casting from PostgreSQL's one
518             * XML type to itself is more of a warm-up exercise.
519             */
520            target.updateSQLXML(1, operand.getSQLXML(1));
521            return true;
522        case 1: // something non-XML being cast to XML
523            assertCanCastAsXmlSequence(sd, "operand");
524            Object v = op.valueJDBC();
525            if ( null == v )
526            {
527                target.updateNull(1);
528                return true;
529            }
530            ItemType xsbt =
531                mapSQLDataTypeToXMLSchemaDataType(op, enc, Nulls.ABSENT);
532            Iterator<XdmItem> tv =
533                xmlCastAsSequence(v, enc, xsbt).iterator();
534            try
535            {
536                target.updateSQLXML(1,
537                    returnContent(tv, /*nullOnEmpty*/ false));
538            }
539            catch ( SaxonApiException | XPathException e )
540            {
541                throw new SQLException(e.getMessage(), "10000", e);
542            }
543            return true;
544        case 2: // XML being cast to something non-XML
545            assertCanCastAsXmlSequence(td, "target");
546            SQLXML sx = operand.getSQLXML(1);
547            if ( null == sx )
548            {
549                target.updateNull(1);
550                return true;
551            }
552            DocumentBuilder dBuilder = s_s9p.newDocumentBuilder();
553            Source source = sx.getSource(null);
554            try
555            {
556                XdmValue xv = dBuilder.build(source);
557                XQueryEvaluator xqe =
558                    PredefinedQueryHolders.DocumentWrapUnwrap.INSTANCE.load();
559                xqe.setExternalVariable(PredefinedQueryHolders.s_qEXPR, xv);
560                xv = xqe.evaluate();
561                /*
562                 * It's zero-or-one, or XPTY0004 was thrown here.
563                 */
564                if ( 0 == xv.size() )
565                {
566                    target.updateNull(1);
567                    return true;
568                }
569                XdmAtomicValue av = (XdmAtomicValue)xv;
570                xmlCastAsNonXML(
571                    av, ItemType.UNTYPED_ATOMIC, tg, target, 1, enc);
572            }
573            catch ( SaxonApiException | XPathException e )
574            {
575                throw new SQLException(e.getMessage(), "10000", e);
576            }
577            return true;
578        }
579
580        throw new SQLFeatureNotSupportedException(
581            "cannot yet xmlcast from " + op.typePG() +
582            " to " + tg.typePG(), "0A000");
583    }
584
585    /**
586     * A simple example corresponding to {@code XMLQUERY(expression
587     * PASSING BY VALUE passing RETURNING CONTENT {NULL|EMPTY} ON EMPTY)}.
588     * @param expression An XQuery expression. Must not be {@code null} (in the
589     * SQL standard {@code XMLQUERY} syntax, it is not even allowed to be an
590     * SQL expression at all, only a string literal).
591     * @param nullOnEmpty pass {@code true} to get a null return in place of
592     * an empty sequence, or {@code false} to just get the empty sequence.
593     * @param passing A row value whose columns will be supplied to the query
594     * as parameters. Columns with names (typically supplied with {@code AS})
595     * appear as predeclared external variables with matching names (in no
596     * namespace) in the query, with types derived from the SQL types of the
597     * row value's columns. There may be one (and no more than one)
598     * column with {@code AS "."} which, if present, will be bound as the
599     * context item. (The name {@code ?column?}, which PostgreSQL uses for an
600     * otherwise-unnamed column, is also accepted, which will often allow the
601     * context item to be specified with no {@code AS} at all. Beware, though,
602     * that PostgreSQL likes to invent column names from any function or type
603     * name that may appear in the value expression, so this shorthand will not
604     * always work, while {@code AS "."} will.) PL/Java's internal JDBC uppercases all column
605     * names, so any uses of the corresponding variables in the query must have
606     * the names in upper case. It is safest to also uppercase their appearances
607     * in the SQL (for which, in PostgreSQL, they must be quoted), so that the
608     * JDBC uppercasing is not being relied on. It is likely to be dropped in a
609     * future PL/Java release.
610     * @param namespaces An even-length String array where, of each pair of
611     * consecutive entries, the first is a namespace prefix and the second is
612     * the URI to which to bind it. The zero-length prefix sets the default
613     * element and type namespace; if the prefix has zero length, the URI may
614     * also have zero length, to declare that unprefixed elements are in no
615     * namespace.
616     */
617    @Function(
618        schema="javatest",
619        onNullInput=CALLED,
620        settings="IntervalStyle TO iso_8601"
621    )
622    public static SQLXML xq_ret_content(
623        String expression, Boolean nullOnEmpty,
624        @SQLType(defaultValue={}) ResultSet passing,
625        @SQLType(defaultValue={}) String[] namespaces)
626        throws SQLException
627    {
628        /*
629         * The expression itself may not be null (in the standard, it isn't
630         * even allowed to be dynamic, and can only be a string literal!).
631         */
632        if ( null == expression )
633            throw new SQLDataException(
634                "XMLQUERY expression may not be null", "22004");
635
636        if ( null == nullOnEmpty )
637            throw new SQLDataException(
638                "XMLQUERY nullOnEmpty may not be null", "22004");
639
640        try
641        {
642            XdmSequenceIterator<XdmItem> x1 =
643                evalXQuery(expression, passing, namespaces);
644            return null == x1 ? null : returnContent(x1, nullOnEmpty);
645        }
646        catch ( SaxonApiException | XPathException e )
647        {
648            throw new SQLException(e.getMessage(), "10000", e);
649        }
650    }
651
652    /**
653     * An implementation of {@code XMLEXISTS(expression
654     * PASSING BY VALUE passing)}, using genuine XQuery.
655     * @param expression An XQuery expression. Must not be {@code null} (in the
656     * SQL standard {@code XMLQUERY} syntax, it is not even allowed to be an
657     * SQL expression at all, only a string literal).
658     * @param passing A row value whose columns will be supplied to the query
659     * as parameters. Columns with names (typically supplied with {@code AS})
660     * appear as predeclared external variables with matching names (in no
661     * namespace) in the query, with types derived from the SQL types of the
662     * row value's columns. There may be one (and no more than one)
663     * column with {@code AS "."} which, if present, will be bound as the
664     * context item. (The name {@code ?column?}, which PostgreSQL uses for an
665     * otherwise-unnamed column, is also accepted, which will often allow the
666     * context item to be specified with no {@code AS} at all. Beware, though,
667     * that PostgreSQL likes to invent column names from any function or type
668     * name that may appear in the value expression, so this shorthand will not
669     * always work, while {@code AS "."} will.) PL/Java's internal JDBC uppercases all column
670     * names, so any uses of the corresponding variables in the query must have
671     * the names in upper case. It is safest to also uppercase their appearances
672     * in the SQL (for which, in PostgreSQL, they must be quoted), so that the
673     * JDBC uppercasing is not being relied on. It is likely to be dropped in a
674     * future PL/Java release.
675     * @param namespaces An even-length String array where, of each pair of
676     * consecutive entries, the first is a namespace prefix and the second is
677     * the URI to which to bind it. The zero-length prefix sets the default
678     * element and type namespace; if the prefix has zero length, the URI may
679     * also have zero length, to declare that unprefixed elements are in no
680     * namespace.
681     * @return True if the expression evaluates to a nonempty sequence, false if
682     * it evaluates to an empty one. Null if a context item is passed and its
683     * SQL value is null.
684     */
685    @Function(
686        schema="javatest",
687        onNullInput=CALLED,
688        settings="IntervalStyle TO iso_8601"
689    )
690    public static Boolean xmlexists(
691        String expression,
692        @SQLType(defaultValue={}) ResultSet passing,
693        @SQLType(defaultValue={}) String[] namespaces)
694        throws SQLException
695    {
696        /*
697         * The expression itself may not be null (in the standard, it isn't
698         * even allowed to be dynamic, and can only be a string literal!).
699         */
700        if ( null == expression )
701            throw new SQLDataException(
702                "XMLEXISTS expression may not be null", "22004");
703
704        XdmSequenceIterator<XdmItem> x1 =
705            evalXQuery(expression, passing, namespaces);
706        if ( null == x1 )
707            return null;
708        if ( ! x1.hasNext() )
709            return false;
710        x1.close();
711        return true;
712    }
713
714    /**
715     * Implementation factor of XMLEXISTS and XMLQUERY.
716     * @return null if a context item is passed and its SQL value is null
717     */
718    private static XdmSequenceIterator<XdmItem> evalXQuery(
719        String expression, ResultSet passing, String[] namespaces)
720        throws SQLException
721    {
722        Binding.Assemblage bindings = new BindingsFromResultSet(passing, true);
723
724        try
725        {
726            XQueryCompiler xqc = createStaticContextWithPassedTypes(
727                bindings, namespaceBindings(namespaces));
728
729            XQueryEvaluator xqe = xqc.compile(expression).load();
730
731            if ( storePassedValuesInDynamicContext(xqe, bindings, true) )
732                return null;
733
734            /*
735             * For now, punt on whether the <XQuery expression> is evaluated
736             * with XML 1.1 or 1.0 lexical rules....  XXX
737             */
738            return xqe.iterator();
739        }
740        catch ( SaxonApiException | XPathException e )
741        {
742            throw new SQLException(e.getMessage(), "10000", e);
743        }
744    }
745
746    /**
747     * Perform the final steps of <em>something</em> {@code RETURNING CONTENT},
748     * with or without {@code nullOnEmpty}.
749     *<p>
750     * The effects are to be the same as if the supplied sequence were passed
751     * as {@code $EXPR} to {@code document{$EXPR}}.
752     */
753    private static SQLXML returnContent(
754        Iterator<XdmItem> x, boolean nullOnEmpty)
755    throws SQLException, SaxonApiException, XPathException
756    {
757        if ( nullOnEmpty  &&  ! x.hasNext() )
758            return null;
759
760        SQLXML rsx = s_dbc.createSQLXML();
761        /*
762         * Keep this simple by requesting a specific type of Result rather
763         * than letting PL/Java choose. It happens (though this is a detail of
764         * the implementation) that SAXResult won't be a bad choice.
765         */
766        SAXResult sr = rsx.setResult(SAXResult.class);
767        /*
768         * Michael Kay recommends the following as equivalent to the SQL/XML-
769         * mandated behavior of evaluating document{$x}.
770         * https://sourceforge.net/p/saxon/mailman/message/36969060/
771         */
772        SAXDestination d = new SAXDestination(sr.getHandler());
773        Receiver r = d.getReceiver(
774            s_s9p.getUnderlyingConfiguration().makePipelineConfiguration(),
775            new SerializationProperties());
776        r.open();
777        while ( x.hasNext() )
778            r.append(x.next().getUnderlyingValue());
779        r.close();
780        return rsx;
781    }
782
783    /**
784     * An implementation of (much of) XMLTABLE, using genuine XML Query.
785     *<p>
786     * The {@code columns} array must supply a valid XML Query expression for
787     * every column in the column definition list that follows the call of this
788     * function in SQL, except that the column for ordinality, if wanted, is
789     * identified by a {@code null} entry in {@code columns}. Syntax sugar in
790     * the standard allows an omitted column expression to imply an element test
791     * for an element with the same name as the column; that doesn't work here.
792     *<p>
793     * For now, this implementation lacks the ability to specify defaults for
794     * when a column expression produces an empty sequence. It is possible to
795     * do defaults explicitly by rewriting a query expression <em>expr</em> as
796     * {@code let $e := }<em>expr</em>{@code return if(empty($e))then $D else $e}
797     * and supplying the default <em>D</em> as another query parameter, though
798     * such defaults will be evaluated only once when {@code xmltable} is called
799     * and will not be able to refer to other values in an output row.
800     * @param rows The single XQuery expression whose result sequence generates
801     * the rows of the resulting table. Must not be null.
802     * @param columns Array of XQuery expressions, exactly as many as result
803     * columns in the column definition list that follows the SQL call to this
804     * function. This array must not be null. It is allowed for one element (and
805     * no more than one) to be null, marking the corresponding column to be
806     * "FOR ORDINALITY" (the column must be of "exact numeric with scale zero"
807     * type; PostgreSQL supports 64-bit row counters, so {@code int8} is
808     * recommended).
809     * @param passing A row value whose columns will be supplied to the query
810     * as parameters, just as described for
811     * {@link #xq_ret_content xq_ret_content()}. If a context item is supplied,
812     * it is the context item for the {@code rows} query (the {@code columns}
813     * queries get their context item from the {@code rows} query's result). Any
814     * named parameters supplied here are available both in the {@code rows}
815     * expression and (though this goes beyond the standard) in every expression
816     * of {@code columns}, with their values unchanging from row to row.
817     * @param namespaces An even-length String array where, of each pair of
818     * consecutive entries, the first is a namespace prefix and the second is
819     * to URI to which to bind it, just as described for
820     * {@link #xq_ret_content xq_ret_content()}.
821     * @param base64 whether the effective, in-scope 'xmlbinary' setting calls
822     * for base64 or (the default, false) hexadecimal.
823     */
824    @Function(
825        schema="javatest",
826        onNullInput=CALLED,
827        settings="IntervalStyle TO iso_8601"
828    )
829    public static ResultSetProvider xmltable(
830        String rows, String[] columns,
831        @SQLType(defaultValue={}) ResultSet passing,
832        @SQLType(defaultValue={}) String[] namespaces,
833        @SQLType(defaultValue="false") Boolean base64)
834        throws SQLException
835    {
836        if ( null == rows )
837            throw new SQLDataException(
838                "XMLTABLE row expression may not be null", "22004");
839
840        if ( null == columns )
841            throw new SQLDataException(
842                "XMLTABLE columns expression array may not be null", "22004");
843
844        if ( null == base64 )
845            throw new SQLDataException(
846                "XMLTABLE base64 parameter may not be null", "22004");
847        XMLBinary enc = base64 ? XMLBinary.BASE64 : XMLBinary.HEX;
848
849        Binding.Assemblage rowBindings =
850            new BindingsFromResultSet(passing, true);
851
852        Iterable<Map.Entry<String,String>> namespacepairs =
853            namespaceBindings(namespaces);
854
855        XQueryEvaluator[] columnXQEs = new XQueryEvaluator[ columns.length ];
856        SequenceType[] columnStaticTypes = new SequenceType[ columns.length ];
857
858        try
859        {
860            XQueryCompiler rowXQC = createStaticContextWithPassedTypes(
861                rowBindings, namespacepairs);
862
863            XQueryExecutable rowXQX = rowXQC.compile(rows);
864
865            Binding.Assemblage columnBindings =
866                new BindingsFromXQX(rowXQX, rowBindings);
867
868            XQueryCompiler columnXQC = createStaticContextWithPassedTypes(
869                columnBindings, namespacepairs);
870
871            boolean ordinalitySeen = false;
872            for ( int i = 0; i < columns.length; ++ i )
873            {
874                String expr = columns[i];
875                if ( null == expr )
876                {
877                    if ( ordinalitySeen )
878                        throw new SQLSyntaxErrorException(
879                            "No more than one column expression may be null " +
880                            "(=> \"for ordinality\")", "42611");
881                    ordinalitySeen = true;
882                    continue;
883                }
884                XQueryExecutable columnXQX = columnXQC.compile(expr);
885                columnStaticTypes[i] = makeSequenceType(
886                    columnXQX.getResultItemType(),
887                    columnXQX.getResultCardinality());
888                columnXQEs[i] = columnXQX.load();
889                storePassedValuesInDynamicContext(
890                    columnXQEs[i], columnBindings, false);
891            }
892
893            XQueryEvaluator rowXQE = rowXQX.load();
894            XdmSequenceIterator<XdmItem> rowIterator;
895            if ( storePassedValuesInDynamicContext(rowXQE, rowBindings, true) )
896                rowIterator = (XdmSequenceIterator<XdmItem>)
897                    XdmEmptySequence.getInstance().iterator();
898            else
899                rowIterator = rowXQE.iterator();
900            return new S9(rowIterator, columnXQEs, columnStaticTypes, enc);
901        }
902        catch ( SaxonApiException | XPathException e )
903        {
904            throw new SQLException(e.getMessage(), "10000", e);
905        }
906    }
907
908    /**
909     * Called when PostgreSQL has no need for more rows of the tabular result.
910     */
911    @Override
912    public void close()
913    {
914        m_sequenceIterator.close();
915    }
916
917    /**
918     * <a id='assignRowValues'>Produce and return one row</a> of
919     * the {@code XMLTABLE} result table per call.
920     *<p>
921     * The row expression has already been compiled and its evaluation begun,
922     * producing a sequence iterator. The column XQuery expressions have all
923     * been compiled and are ready to evaluate, and the compiler's static
924     * analysis has bounded the data types they will produce. Because of the
925     * way the set-returning function protocol works, we don't know the types
926     * of the SQL output columns yet, until the first call of this function,
927     * when the {@code receive} parameter's {@code ResultSetMetaData} can be
928     * inspected to find out. So that will be the first thing done when called
929     * with {@code currentRow} of zero.
930     *<p>
931     * Each call will then: (a) get the next value from the row expression's
932     * sequence iterator, then for each column, (b) evaluate that column's
933     * XQuery expression on the row value, and (c) assign that column's result
934     * to the SQL output column, casting to the proper type (which the SQL/XML
935     * spec has very exacting rules on how to do).
936     *<p>
937     * A note before going any further: this implementation, while fairly
938     * typical of a PostgreSQL set-returning user function, is <em>not</em> the
939     * way the SQL/XML spec defines {@code XMLTABLE}. The official behavior of
940     * {@code XMLTABLE} is defined in terms of a rewriting, at the SQL level,
941     * into a much-expanded SQL query where each result column appears as an
942     * {@code XMLQUERY} call applying the column expression, wrapped in an
943     * {@code XMLCAST} to the result column type (with a
944     * {@code CASE WHEN XMLEXISTS} thrown in to support column defaults).
945     *<p>
946     * As an ordinary user function, this example cannot rely on any fancy
947     * query rewriting during PostgreSQL's parse analysis. The slight syntax
948     * desugaring needed to transform a standard {@code XMLTABLE} call into a
949     * call of this "xmltable" is not too hard to learn and do by hand, but no
950     * one would ever want to write out by hand the whole longwinded "official"
951     * expansion prescribed in the spec. So this example is a compromise.
952     *<p>
953     * The main thing lost in the compromise is the handling of column defaults.
954     * The full rewriting with per-column SQL expressions means that each
955     * column default expression can be evaluated exactly when/if needed, which
956     * is often the desired behavior. This implementation as an ordinary
957     * function, whose arguments all get evaluated ahead of the call, can't
958     * really do that. Otherwise, there's nothing in the spec that's inherently
959     * unachievable in this implementation.
960     *<p>
961     * Which brings us to the matter of casting each column expression result
962     * to the proper type for its SQL result column.
963     *<p>
964     * Like any spec, {@code SQL/XML} does not mandate that an implementation
965     * must be done in exactly the way presented in the spec (rewritten so each
966     * column value is produced by an {@code XMLQUERY} wrapped in an
967     * {@code XMLCAST}). The requirement is to produce the equivalent result.
968     *<p>
969     * A look at the rewritten query shows that each column XQuery result value
970     * must be representable as some value in SQL's type system, not once, but
971     * twice: first as the result returned by {@code XMLQUERY} and passed along
972     * to {@code XMLCAST}, and finally with the output column's type as the
973     * result of the {@code XMLCAST}.
974     *<p>
975     * Now, the output column type can be whatever is wanted. Importantly, it
976     * can be either an XML type, or any ordinary SQL scalar type, like a
977     * {@code float} or a {@code date}. Likewise, the XQuery column expression
978     * may have produced some atomic value (like an {@code xs:double} or
979     * {@code xs:date}), or some XML node, or any sequence of any of those.
980     *<p>
981     * What are the choices for the type in the middle: the SQL value returned
982     * by {@code XMLQUERY} and passed on to {@code XMLCAST}?
983     *<p>
984     * There are two. An ISO-standard SQL {@code XMLQUERY} can specify
985     * {@code RETURNING SEQUENCE} or {@code RETURNING CONTENT}. The first option
986     * produces the type {@code XML(SEQUENCE)}, a useful type that PostgreSQL
987     * does not currently have. {@code XML(SEQUENCE)} can hold exactly whatever
988     * an XQuery expression can produce: a sequence of any length, of any
989     * mixture of atomic values and XML nodes (even such oddities as attribute
990     * nodes outside of any element), in any order. An {@code XML(SEQUENCE)}
991     * value need not look anything like what "XML" normally brings to mind.
992     *<p>
993     * With the other option, {@code RETURNING CONTENT}, the result of
994     * {@code XMLQUERY} has to be something that PostgreSQL's {@code xml} type
995     * could store: a serialized document with XML structure, but without the
996     * strict requirements of exactly one root element with no text outside it.
997     * At the limit, a completely non-XMLish string of ordinary text is
998     * perfectly acceptable XML {@code CONTENT}, as long as it uses the right
999     * {@code &...;} escapes for any characters that could look like XML markup.
1000     *<p>
1001     * {@code XMLCAST} is able to accept either form as input, and deliver it
1002     * to the output column as whatever type is needed. But the spec leaves no
1003     * wiggle room as to which form to use:
1004     *<ul>
1005     *<li>If the result column type is {@code XML(SEQUENCE)}, then the
1006     * {@code XMLQUERY} is to specify {@code RETURNING SEQUENCE}. It produces
1007     * the column's result type directly, so the {@code XMLCAST} has nothing
1008     * to do.
1009     *<li>In every other case (<em>every</em> other case), the {@code XMLQUERY}
1010     * is to specify {@code RETURNING CONTENT}.
1011     *</ul>
1012     *<p>
1013     * At first blush, that second rule should sound crazy. Imagine a column
1014     * definition like
1015     *<pre>
1016     * growth float8 PATH 'math:pow(1.0 + $RATE, count(year))'
1017     *</pre>
1018     * The expression produces an {@code xs:double}, which can be assigned
1019     * directly to a PostgreSQL {@code float8}, but the rule in the spec will
1020     * have it first converted to a decimal string representation, made into
1021     * a text node, wrapped in a document node, and returned as XML, to be
1022     * passed along to {@code XMLCAST}, which parses it, discards the wrapping
1023     * document node, parses the text content as a double, and returns that as
1024     * a proper value of the result column type (which, in this example, it
1025     * already is).
1026     *<p>
1027     * The spec does not go into why this rule was chosen. The only rationale
1028     * that makes sense to me is that the {@code XML(SEQUENCE)} data type
1029     * is an SQL feature (X190) that not every implementation will support,
1030     * so the spec has to define {@code XMLTABLE} using a rewritten query that
1031     * can work on systems that do not have that type. (PostgreSQL itself, at
1032     * present, does not have it.)
1033     *<p>
1034     * The first rule, when {@code XML(SEQUENCE)} is the result column type,
1035     * will naturally never be in play except on a system that has that type, in
1036     * which case it can be used directly. But even such a system must still
1037     * produce, in all other cases, results that match what a system without
1038     * that type would produce. All those cases are therefore defined as if
1039     * going the long way through {@code XML(CONTENT)}.
1040     *<p>
1041     * Whenever the XQuery expression can be known to produce a (possibly empty
1042     * or) singleton sequence of an atomic type, the long round trip can be
1043     * shown to be idempotent, and we can skip right to casting the atomic type
1044     * to the SQL result column type. A few other cases could be short-circuited
1045     * the same way. But in general, for cases involving nodes or non-singleton
1046     * sequences, it is safest to follow the spec punctiliously; the steps are
1047     * defined in terms of XQuery constructs like {@code document {...}} and
1048     * {@code data()}, which have specs of their own with many traps for the
1049     * unwary, and the XQuery library provides implementations of them that are
1050     * already tested and correct.
1051     *<p>
1052     * Though most of the work can be done by the XQuery library, it may be
1053     * helpful to look closely at just what the specification entails.
1054     *<p>
1055     * Again, but for the case of an {@code XML(SEQUENCE)} result column, in all
1056     * other cases the result must pass through
1057     * {@code XMLQUERY(... RETURNING CONTENT EMPTY ON EMPTY)}. That, in turn, is
1058     * defined as equivalent to {@code XMLQUERY(... RETURNING SEQUENCE)} with
1059     * the result then passed to {@code XMLDOCUMENT(... RETURNING CONTENT)},
1060     * whose behavior is that of a
1061     * <a href='https://www.w3.org/TR/xquery-31/#id-documentConstructors'>
1062     * document node constructor</a> in XQuery, with
1063     * <a href='https://www.w3.org/TR/xquery-31/#dt-construction-mode'>
1064     * construction mode</a> {@code preserve}. The first step of that behavior
1065     * is the same as Step 1e in the processing of
1066     * <a href='https://www.w3.org/TR/xquery-31/#id-content'>direct element
1067     * constructor content</a>. The remaining steps are those laid out for the
1068     * document node constructor.
1069     *<p>
1070     * Clarity demands flattening this nest of specifications into a single
1071     * ordered list of the steps to apply:
1072     *<ul>
1073     *<li>Any item in the sequence that is an array is flattened (its elements
1074     * become items in the sequence).
1075     *<li>If any item is a function, {@code err:XQTY0105} is raised.
1076     *<li>Any sequence {@code $s} of adjacent atomic values is replaced by
1077     * {@code string-join($s, ' ')}.
1078     *<li>Any XML node in the sequence is copied (as detailed in the spec).
1079     *<li>After all the above, any document node that may exist in the resulting
1080     * sequence is flattened (replaced by its children).
1081     *<li>A single text node is produced for any run of adjacent text nodes in
1082     * the sequence (including any that have newly become adjacent by the
1083     * flattening of document nodes), by concatenation with no separator (unlike
1084     * the earlier step where atomic values were concatenated with a space as
1085     * the separator).
1086     *<li>If the sequence directly contains any attribute or namespace node,
1087     * {@code err:XPTY0004} is raised. <b>More on this below.</b>
1088     *<li>The sequence resulting from the preceding steps is wrapped in one
1089     * new document node (as detailed in the spec).
1090     *</ul>
1091     *<p>
1092     * At this point, the result could be returned to SQL as a value of
1093     * {@code XML(CONTENT(ANY))} type, to be passed to an {@code XMLCAST}
1094     * invocation. This implementation avoids that, and simply proceeds with the
1095     * existing Java in-memory representation of the document tree, to the
1096     * remaining steps entailed in an {@code XMLCAST} to the output column type:
1097     *<ul>
1098     *<li>If the result column type is an XML type, rewriting would turn the
1099     * {@code XMLCAST} into a simple {@code CAST} and that's that. Otherwise,
1100     * the result column has some non-XML, SQL type, and:
1101     *<li>The algorithm "Removing XQuery document nodes from an XQuery sequence"
1102     * is applied. By construction, we know the only such node is the one the
1103     * whole sequence was recently wrapped in, two steps ago (you get your
1104     * house back, you get your dog back, you get your truck back...).
1105     *<li>That sequence of zero or more XML nodes is passed to the
1106     *<a href='https://www.w3.org/TR/xpath-functions-31/#func-data'>fn:data</a>
1107     * function, producing a sequence of zero or more atomic values, which will
1108     * all have type {@code xs:untypedAtomic} (because the document-wrapping
1109     * stringified any original atomic values and wrapped them in text nodes,
1110     * for which the
1111     * <a href='https://www.w3.org/TR/xpath-datamodel-31/#acc-summ-typed-value'>
1112     * typed-value</a> is {@code xs:untypedAtomic} by definition). This sequence
1113     * also has cardinality zero-or-more, and may be shorter or longer than the
1114     * original.
1115     *<li>If the sequence is empty, the result column is assigned {@code NULL}
1116     * (or the column's default value, if one was specified). Otherwise, the
1117     * sequence is known to have length one or more, and:
1118     *<li>The spec does not say this (which may be an oversight or bug), but the
1119     * sequence must be checked for length greater than one, raising
1120     * {@code err:XPTY0004} in that case. The following steps require it to be a
1121     * singleton.
1122     *<li>It is labeled as a singleton sequence of {@code xs:anyAtomicType} and
1123     * used as input to an XQuery {@code cast as} expression. (Alternatively, it
1124     * could be labeled a one-or-more sequence of {@code xs:anyAtomicType},
1125     * leaving the length check to be done by {@code cast as}, which would raise
1126     * the same error {@code err:XPTY0004}, if longer than one.)
1127     *<li>The {@code cast as} is to the XQuery type determined as in
1128     * {@code determineXQueryFormalType} below, based on the SQL type of the
1129     * result column; or, if the SQL type is a date/time type with no time zone,
1130     * there is a first {@code cast as} to a specific XSD date/time type, which
1131     * is (if it has a time zone) first adjusted to UTC, then stripped of its
1132     * time zone, followed by a second {@code cast as} from that type to the one
1133     * determined from the result column type. Often, that will be the same type
1134     * as was used for the time zone adjustment, and the second {@code cast as}
1135     * will have nothing to do.
1136     *<li>The XQuery value resulting from the cast is converted and assigned to
1137     * the SQL-typed result column, a step with many details but few surprises,
1138     * therefore left for the morbidly curious to explore in the code. The flip
1139     * side of the time zone removal described above happens here: if the SQL
1140     * column type expects a time zone and the incoming value lacks one, it is
1141     * given a zone of UTC.
1142     *</ul>
1143     *<p>
1144     * The later steps above, those following the length-one check, are
1145     * handled by {@code xmlCastAsNonXML} below.
1146     *<p>
1147     * The earlier steps, from the start through the {@code XMLCAST} early steps
1148     * of document-node unwrapping, can all be applied by letting the original
1149     * result sequence be {@code $EXPR} in the expression:
1150     *<pre>
1151     * declare construction preserve;
1152     * data(document { $EXPR } / child::node())
1153     *</pre>
1154     * which may seem a bit of an anticlimax after seeing how many details lurk
1155     * behind those tidy lines of code.
1156     *<p>
1157     * <strong>About bare attribute nodes</strong>
1158     *<p>
1159     * One consequence of the rules above deserves special attention.
1160     * Consider something like:
1161     *<pre>
1162     * XMLTABLE('.' PASSING '&lt;a foo="bar"/&gt;' COLUMNS c1 VARCHAR PATH 'a/@foo');
1163     *</pre>
1164     *<p>
1165     * The result of the column expression is an XML attribute node all on its
1166     * own, with name {@code foo} and value {@code bar}, not enclosed in any
1167     * XML element. In the data type {@code XML(SEQUENCE)}, an attribute node
1168     * can appear standalone like that, but not in {@code XML(CONTENT)}.
1169     *<p>
1170     * Db2, Oracle, and even the XPath-based pseudo-XMLTABLE built into
1171     * PostgreSQL, will all accept that query and produce the result "bar".
1172     *<p>
1173     * However, a strict interpretation of the spec cannot produce that result,
1174     * because the result column type ({@code VARCHAR}) is not
1175     * {@code XML(SEQUENCE)}, meaning the result must be as if passed through
1176     * {@code XMLDOCUMENT(... RETURNING CONTENT)}, and the XQuery
1177     * {@code document { ... }} constructor is required to raise
1178     * {@code err:XPTY0004} upon encountering any bare attribute node. The
1179     * apparently common, convenient behavior of returning the attribute node's
1180     * value component is not, strictly, conformant.
1181     *<p>
1182     * This implementation will raise {@code err:XPTY0004}. That can be avoided
1183     * by simply wrapping any such bare attribute in {@code data()}:
1184     *<pre>
1185     * ... COLUMNS c1 VARCHAR PATH 'a/data(@foo)');
1186     *</pre>
1187     *<p>
1188     * It is possible the spec has an editorial mistake and did not intend to
1189     * require an error for this usage, in which case this implementation can
1190     * be changed to match a future clarification of the spec.
1191     */
1192    @Override
1193    public boolean assignRowValues(ResultSet receive, long currentRow)
1194    throws SQLException
1195    {
1196        if ( 0 == currentRow )
1197        {
1198            m_outBindings = new BindingsFromResultSet(receive, m_columnXQEs);
1199            int i = -1;
1200            AtomizingFunction atomizer = null;
1201            for ( Binding.Parameter p : m_outBindings )
1202            {
1203                SequenceType staticType = m_columnStaticTypes [ ++ i ];
1204                /*
1205                 * A null in m_columnXQEs identifies the ORDINALITY column,
1206                 * if any. Assign nothing to m_atomize[i], it won't be used.
1207                 */
1208                if ( null == m_columnXQEs [ i ] )
1209                    continue;
1210
1211                if ( Types.SQLXML == p.typeJDBC() )
1212                    continue;
1213
1214                /*
1215                 * Ok, the output column type is non-XML; choose an atomizer,
1216                 * either a simple identity if the result type is statically
1217                 * known to be zero-or-one atomic, or the long way through the
1218                 * general-purpose one. If the type is statically known to be
1219                 * the empty sequence (weird, but not impossible), the identity
1220                 * atomizer suffices and we're on to the next column.
1221                 */
1222                OccurrenceIndicator occur = staticType.getOccurrenceIndicator();
1223                if ( OccurrenceIndicator.ZERO == occur )
1224                {
1225                    m_atomize [ i ] = (v, col) -> v;
1226                    continue;
1227                }
1228
1229                /* So, it isn't known to be empty. If the column
1230                 * expression type isn't known to be atomic, or isn't known to
1231                 * be zero-or-one, then the general-purpose atomizer--a trip
1232                 * through data(document { ... } / child::node())--must be used.
1233                 * This atomizer will definitely produce a sequence of length
1234                 * zero or one, raising XPTY0004 otherwise. So the staticType
1235                 * can be replaced by xs:anyAtomicType?. xmlCastAsNonXML will
1236                 * therefore be passed xs:anyAtomicType, as in the spec.
1237                 *    BUT NO ... Saxon is more likely to find a converter from
1238                 * xs:untypedAtomic than from xs:anyAtomicType.
1239                 */
1240                ItemType itemType = staticType.getItemType();
1241                if ( occur.allowsMany()
1242                    || ! ItemType.ANY_ATOMIC_VALUE.subsumes(itemType)
1243                    /*
1244                     * The following tests may be punctilious to a fault. If we
1245                     * have a bare Saxon atomic type of either xs:base64Binary
1246                     * or xs:hexBinary type, Saxon will happily and successfully
1247                     * convert it to a binary string; but if we have the same
1248                     * thing as a less-statically-determinate type that we'll
1249                     * put through the atomizer, the conversion will fail unless
1250                     * its encoding matches the m_xmlbinary setting. That could
1251                     * seem weirdly unpredictable to a user, so we'll just
1252                     * (perversely) disallow the optimization (which would
1253                     * succeed) in the cases where the specified, unoptimized
1254                     * behavior would be to fail.
1255                     */
1256                    || ItemType.HEX_BINARY.subsumes(itemType)
1257                        && (XMLBinary.HEX != m_xmlbinary)
1258                    || ItemType.BASE64_BINARY.subsumes(itemType)
1259                        && (XMLBinary.BASE64 != m_xmlbinary)
1260                   )
1261                {
1262                    if ( null == atomizer )
1263                    {
1264                        XQueryEvaluator docWrapUnwrap = PredefinedQueryHolders
1265                            .DocumentWrapUnwrap.INSTANCE.load();
1266                        atomizer = (v, col) ->
1267                        {
1268                            docWrapUnwrap.setExternalVariable(
1269                                PredefinedQueryHolders.s_qEXPR, v);
1270                            v = docWrapUnwrap.evaluate();
1271                            /*
1272                             * It's already zero-or-one, or XPTY0004 was thrown
1273                             */
1274                            return v;
1275                        };
1276                    }
1277                    m_atomize [ i ] = atomizer;
1278                    /*
1279                     * The spec wants anyAtomicType below instead of
1280                     * untypedAtomic. But Saxon's getConverter is more likely
1281                     * to fail to find a converter from anyAtomicType to an
1282                     * arbitrary type, than from untypedAtomic. So use that.
1283                     */
1284                    m_columnStaticTypes [ i ] = s_01untypedAtomic;
1285                }
1286                else
1287                {
1288                    /*
1289                     * We know we'll be getting zero-or-one atomic value, so
1290                     * the atomizing function can be the identity.
1291                     */
1292                    m_atomize [ i ] = (v, col) -> v;
1293                }
1294            }
1295        }
1296
1297        if ( ! m_sequenceIterator.hasNext() )
1298            return false;
1299
1300        ++ currentRow; // for use as 1-based ordinality column
1301
1302        XdmItem it = m_sequenceIterator.next();
1303
1304        int i = 0;
1305        for ( Binding.Parameter p : m_outBindings )
1306        {
1307            XQueryEvaluator xqe = m_columnXQEs [ i ];
1308            AtomizingFunction atomizer = m_atomize [ i ];
1309            SequenceType staticType = m_columnStaticTypes [ i++ ];
1310
1311            if ( null == xqe )
1312            {
1313                receive.updateLong( i, currentRow);
1314                continue;
1315            }
1316
1317            try
1318            {
1319                xqe.setContextItem(it);
1320
1321                if ( null == atomizer ) /* => result type was found to be XML */
1322                {
1323                    receive.updateSQLXML(
1324                        i, returnContent(xqe.iterator(), false));
1325                    continue;
1326                }
1327
1328                XdmValue x1 = xqe.evaluate();
1329                x1 = atomizer.apply(x1, i);
1330
1331                /*
1332                 * The value is now known to be atomic and either exactly
1333                 * one or zero-or-one. May as well just use size() to see if
1334                 * it's empty.
1335                 */
1336                if ( 0 == x1.size() )
1337                {
1338                    receive.updateNull(i); // XXX Handle defaults some day
1339                    continue;
1340                }
1341                XdmAtomicValue av = (XdmAtomicValue)x1.itemAt(0);
1342                xmlCastAsNonXML(
1343                    av, staticType.getItemType(), p, receive, i, m_xmlbinary);
1344            }
1345            catch ( SaxonApiException | XPathException e )
1346            {
1347                throw new SQLException(e.getMessage(), "10000", e);
1348            }
1349        }
1350        return true;
1351    }
1352
1353    /**
1354     * Store the values of any passed parameters and/or context item into the
1355     * dynamic context, returning true if the overall query should
1356     * short-circuit and return null.
1357     *<p>
1358     * The specification requires the overall query to return null if a
1359     * context item is specified in the bindings and its value is null.
1360     * @param xqe XQuery evaluator into which to store the values.
1361     * @param passing The bindings whose values should be installed.
1362     * @param setContextItem True to handle the context item, if present in the
1363     * bindings. False to skip any processing of the context item, in cases
1364     * where the caller will handle that.
1365     * @return True if the overall query's return should be null, false if the
1366     * query should proceed to evaluation.
1367     */
1368    private static boolean storePassedValuesInDynamicContext(
1369        XQueryEvaluator xqe, Binding.Assemblage passing, boolean setContextItem)
1370        throws SQLException, SaxonApiException
1371    {
1372        /*
1373         * Is there or is there not a context item?
1374         */
1375        if ( ! setContextItem  ||  null == passing.contextItem() )
1376        {
1377            /* "... there is no context item in XDC." */
1378        }
1379        else
1380        {
1381            Object cve = passing.contextItem().valueJDBC();
1382            if ( null == cve )
1383                return true;
1384            XdmValue ci;
1385            if ( cve instanceof XdmNode ) // XXX support SEQUENCE input someday
1386            {
1387                ci = (XdmNode)cve;
1388            }
1389            else
1390                ci = xmlCastAsSequence(
1391                    cve, XMLBinary.HEX, passing.contextItem().typeXS());
1392            switch ( ci.size() )
1393            {
1394            case 0:
1395                /* "... there is no context item in XDC." */
1396                break;
1397            case 1:
1398                xqe.setContextItem(ci.itemAt(0));
1399                break;
1400            default:
1401                throw new SQLDataException(
1402                    "invalid XQuery context item", "2200V");
1403            }
1404        }
1405
1406        /*
1407         * For each <XML query variable> XQV:
1408         */
1409        for ( Binding.Parameter p : passing )
1410        {
1411            String name = p.name();
1412            Object v = p.valueJDBC();
1413            XdmValue vv;
1414            if ( null == v )
1415                vv = XdmEmptySequence.getInstance();
1416            else if ( v instanceof XdmNode ) // XXX support SEQUENCE someday
1417            {
1418                vv = (XdmNode)v;
1419            }
1420            else
1421                vv = xmlCastAsSequence(
1422                    v, XMLBinary.HEX, p.typeXS().getItemType());
1423            xqe.setExternalVariable(new QName(name), vv);
1424        }
1425
1426        return false;
1427    }
1428
1429    /**
1430     * Return a s9api {@link XQueryCompiler XQueryCompiler} with static context
1431     * preconfigured as the Syntax Rules dictate.
1432     * @param pt The single-row ResultSet representing the passed parameters
1433     * and context item, if any.
1434     * @param nameToIndex A Map, supplied empty, that on return will map
1435     * variable names for the dynamic context to column indices in {@code pt}.
1436     * If a context item was supplied, its index will be entered in the map
1437     * with the null key.
1438     */
1439    private static XQueryCompiler createStaticContextWithPassedTypes(
1440        Binding.Assemblage pt, Iterable<Map.Entry<String,String>> namespaces)
1441        throws SQLException, XPathException
1442    {
1443        XQueryCompiler xqc = s_s9p.newXQueryCompiler();
1444        xqc.declareNamespace(
1445            "sqlxml", "http://standards.iso.org/iso9075/2003/sqlxml");
1446        // https://sourceforge.net/p/saxon/mailman/message/20318550/ :
1447        xqc.declareNamespace("xdt", W3C_XML_SCHEMA_NS_URI);
1448
1449        for ( Map.Entry<String,String> e : namespaces )
1450            xqc.declareNamespace(e.getKey(), e.getValue());
1451
1452        /*
1453         * This business of predeclaring global external named variables
1454         * is not an s9api-level advertised ability in Saxon, hence the
1455         * various getUnderlying.../getStructured... methods here to access
1456         * the things that make it happen.
1457         */
1458        StaticQueryContext sqc = xqc.getUnderlyingStaticContext();
1459
1460        for ( Binding.Parameter p : pt )
1461        {
1462            String name = p.name();
1463            int ct = p.typeJDBC();
1464            assertCanCastAsXmlSequence(ct, name);
1465            SequenceType st = p.typeXS();
1466            sqc.declareGlobalVariable(
1467                new QName(name).getStructuredQName(),
1468                st.getUnderlyingSequenceType(), null, true);
1469        }
1470
1471        /*
1472         * Apply syntax rules to the context item, if any.
1473         */
1474        Binding.ContextItem ci = pt.contextItem();
1475        if ( null != ci )
1476        {
1477            int ct = ci.typeJDBC();
1478            assertCanCastAsXmlSequence(ct, "(context item)");
1479            ItemType it = ci.typeXS();
1480            xqc.setRequiredContextItemType(it);
1481        }
1482
1483        return xqc;
1484    }
1485
1486    /**
1487     * Check that something's type is "convertible to XML(SEQUENCE)
1488     * according to the Syntax Rules of ... <XML cast specification>."
1489     * That turns out not to be a very high bar; not much is excluded
1490     * by those rules except collection, row, structured, or
1491     * reference typed <value expression>s.
1492     * @param jdbcType The {@link Types JDBC type} to be checked.
1493     * @param what A string to include in the exception message if the
1494     * check fails.
1495     * @throws SQLException if {@code jdbcType} is one of the prohibited types.
1496     */
1497    private static void assertCanCastAsXmlSequence(int jdbcType, String what)
1498    throws SQLException
1499    {
1500        if ( Types.ARRAY == jdbcType || Types.STRUCT == jdbcType
1501            || Types.REF == jdbcType )
1502            throw new SQLSyntaxErrorException(
1503                "The type of \"" + what + "\" is not suitable for " +
1504                "XMLCAST to XML(SEQUENCE).", "42804");
1505    }
1506
1507    /**
1508     * The "determination of an XQuery formal type notation" algorithm.
1509     *<p>
1510     * This is relied on for parameters and context items passed to
1511     * {@code XMLQUERY} and therefore, {@code XMLTABLE} (and also, in the spec,
1512     * {@code XMLDOCUMENT} and {@code XMLPI}). Note that it does <em>not</em>
1513     * take an {@code XMLBinary} parameter, but rather imposes hexadecimal form
1514     * unconditionally, so in the contexts where this is called, any
1515     * {@code xmlbinary} setting is ignored.
1516     * @param b a {@code Binding} from which the JDBC type can be retrieved
1517     * @param forContextItem whether the type being derived is for a context
1518     * item or (if false) for a named parameter.
1519     * @return a {@code SequenceType} (always a singleton in the
1520     * {@code forContextItem} case)
1521     */
1522    private static SequenceType determineXQueryFormalType(
1523        Binding b, boolean forContextItem)
1524        throws SQLException
1525    {
1526        int sd = b.typeJDBC();
1527        OccurrenceIndicator suffix;
1528        /*
1529         * The SQL/XML standard uses a formal type notation straight out of
1530         * the XQuery 1.0 and XPath 2.0 Formal Semantics document, and that is
1531         * strictly more fine-grained and expressive than anything you can
1532         * actually say in the form of XQuery SequenceTypes. This method will
1533         * simply return the nearest approximation in the form of a sequence
1534         * type; some of the standard's distinct formal type notations will
1535         * collapse into the same SequenceType.
1536         *  That also means the various cases laid out in the standard will,
1537         * here, all simply assign some ItemType to 'it', and therefore the
1538         * tacking on of the occurrence suffix can be factored out for the
1539         * very end.
1540         */
1541        ItemType it;
1542
1543        if ( forContextItem )
1544            suffix = OccurrenceIndicator.ONE;
1545        // else if sd is XML(SEQUENCE) - we don't have this type yet
1546        //  suffix = OccurrenceIndicator.ZERO_OR_MORE;
1547        /*
1548         * Go through the motions of checking isNullable, though PL/Java's JDBC
1549         * currently hardcodes columnNullableUnknown. Maybe someday it won't.
1550         */
1551        else if ( b.knownNonNull() )
1552            suffix = OccurrenceIndicator.ONE;
1553        else
1554            suffix = OccurrenceIndicator.ZERO_OR_ONE;
1555
1556        // Define ET... for {DOCUMENT|CONTENT}(XMLSCHEMA) case ... not supported
1557
1558        // if SD is XML(DOCUMENT(UNTYPED)) - not currently tracked, can't tell
1559        //  it = s_itf.getDocumentTest(item type for xdt:untyped);
1560        // else if SD is XML(DOCUMENT(ANY)) - not currently tracked, can't tell
1561        //  it = s_itf.getDocumentTest(item type for xs:anyType);
1562        // else if SD is XML(DOCUMENT(XMLSCHEMA)) - unsupported and can't tell
1563        //  it = s_itf.getDocumentTest(the ET... we didn't define earlier)
1564        // else if SD is XML(CONTENT(UNTYPED)) - which we're not tracking ...
1565        //  at s9api granularity, there's no test for this that's not same as:
1566        // else if SD is XML(CONTENT(ANY)) - which we must assume for ANY XML
1567        if ( Types.SQLXML == sd )
1568            it = s_itf.getNodeKindTest(DOCUMENT);
1569        // else if SD is XML(CONTENT(XMLSCHEMA)) - we don't track and can't tell
1570        //  at s9api granularity, there's no test that means this anyway.
1571        // else if SD is XML(SEQUENCE) - we really should have this type, but no
1572        //  it = it.ANY_ITEM
1573        else // it ain't XML, it's some SQL type
1574        {
1575            ItemType xmlt = mapSQLDataTypeToXMLSchemaDataType(
1576                b, XMLBinary.HEX, Nulls.ABSENT);
1577            // ItemType pt = xmlt.getUnderlyingItemType().getPrimitiveType()
1578            //  .somehowGetFromUnderlyingPTBackToS9apiPT() - ugh, the hard part
1579            /*
1580             * The intention here is to replace any derived type with the
1581             * primitive type it is based on, *except* for three types that are
1582             * technically derived: integer (from decimal), yearMonthDuration
1583             * and dayTimeDuration (from duration). Those are not replaced, so
1584             * they stand, as if they were honorary primitive types.
1585             *
1586             * For now, it's simplified greatly by mapSQLDataType... skipping
1587             * the construction of a whole derived XML Schema snippet, and just
1588             * returning the type we want anyway. Also, no need to dive under
1589             * the s9api layer to try to make getPrimitiveType work.
1590             */
1591            it = xmlt;
1592        }
1593
1594        SequenceType xftn = makeSequenceType(it, suffix);
1595        return xftn;
1596    }
1597
1598    @SuppressWarnings("fallthrough")
1599    private static ItemType mapSQLDataTypeToXMLSchemaDataType(
1600        Binding b, XMLBinary xmlbinary, Nulls nulls)
1601        throws SQLException
1602    {
1603        /*
1604         * Nearly all of the fussing about specified in the standard
1605         * for this method is to create XML Schema derived types that
1606         * accurately reflect the typmod information for the SQL type
1607         * in question. Then, in determineXQueryFormalType (the only
1608         * client of this method so far!), all of that is thrown away
1609         * and our painstakingly specified derived type is replaced with
1610         * the primitive type we based it on. That simplifies a lot. :)
1611         * For now, forget the derived XML Schema declarations, and just
1612         * return the primitive types they would be based on.
1613         *
1614         * The need for the nulls parameter vanishes if no XML Schema snippets
1615         * are to be generated.
1616         *
1617         * If the full XML Schema snippet generation ever proves to be
1618         * needed, one hacky way to get it would be with a SELECT
1619         * query_to_xmlschema('SELECT null::type-in-question', false, false,
1620         * '') where the same derivations are already implemented (though it
1621         * produces some different results; that work may have been done from
1622         * an earlier version of the standard).
1623         */
1624        switch ( b.typeJDBC() )
1625        {
1626        case Types.CHAR:
1627        case Types.VARCHAR:
1628        case Types.CLOB:
1629            return ItemType.STRING;
1630
1631        case Types.BINARY:
1632        case Types.VARBINARY:
1633        case Types.BLOB:
1634            return XMLBinary.HEX == xmlbinary ?
1635                ItemType.HEX_BINARY : ItemType.BASE64_BINARY;
1636
1637        case Types.NUMERIC:
1638        case Types.DECIMAL:
1639            /*
1640             * Go through the motions to get the scale and do this right,
1641             * though PL/Java's getScale currently hardcodes a -1 return.
1642             * Maybe someday it won't.
1643             */
1644            int scale = b.scale();
1645            return 0 == scale ? ItemType.INTEGER : ItemType.DECIMAL;
1646
1647        case Types.INTEGER:
1648            return ItemType.INT;
1649        case Types.SMALLINT:
1650            return ItemType.SHORT;
1651        case Types.BIGINT:
1652            return ItemType.LONG;
1653
1654        case Types.REAL:
1655            return ItemType.FLOAT; // could check P, MINEXP, MAXEXP here.
1656        case Types.FLOAT:
1657            assert false; // PG should always report either REAL or DOUBLE
1658            /*FALLTHROUGH*/
1659        case Types.DOUBLE:
1660            return ItemType.DOUBLE;
1661
1662        case Types.BOOLEAN:
1663            return ItemType.BOOLEAN;
1664
1665        case Types.DATE:
1666            return ItemType.DATE;
1667
1668        case Types.TIME:
1669            return ItemType.TIME;
1670
1671        case Types.TIME_WITH_TIMEZONE:
1672            return ItemType.TIME; // restrictive facet would make sense here
1673
1674        case Types.TIMESTAMP:
1675            return ItemType.DATE_TIME;
1676
1677        case Types.TIMESTAMP_WITH_TIMEZONE:
1678            return ItemType.DATE_TIME_STAMP; // xsd 1.1 equivalent of facet!
1679
1680        // There's no JDBC Types.INTERVAL; handle it after switch
1681
1682        // Good luck finding out from JDBC if it's a domain
1683
1684        // PG doesn't have DISTINCT types per se
1685
1686        // PL/Java's JDBC doesn't support PostgreSQL's arrays as ARRAY
1687
1688        // PG doesn't seem to have multisets (JDBC doesn't grok them either)
1689
1690        // Types.SQLXML we could recognize, but for determineFormalTypes it has
1691        // been handled already, and it's not yet clear what would be
1692        // appropriate to return (short of the specified XMLSchema snippet),
1693        // probably just document.
1694
1695        // So punt all these for now; what hasn't been handled in this switch
1696        // can be handled specially after the switch falls through, and what
1697        // isn't, isn't supported just now.
1698        }
1699
1700        String typeName = b.typePG();
1701        if ( "interval".equals(typeName) )
1702        {
1703            /*
1704             * XXX This isn't right yet; it needs to be refined to a
1705             * YEAR_MONTH_DURATION or a DAY_TIME_DURATION in the appropriate
1706             * cases, and for that it needs access to the typmod information
1707             * for the type, which getColumnTypeName doesn't now provide.
1708             */
1709            return ItemType.DURATION;
1710        }
1711
1712        throw new SQLNonTransientException(String.format(
1713            "Mapping SQL type \"%s\" to XML type not supported", typeName),
1714            "0N000");
1715    }
1716
1717    /**
1718     * Implement that portion of the {@code <XML cast>} specification where
1719     * the target data type is sequence, and (for now, anyway) the source is
1720     * not an XML type; the only caller, so far, handles that case separately.
1721     * @param v The SQL value to be cast (in the form of an Object from JDBC).
1722     * @param enc Whether binary values should be encoded in hex or base 64.
1723     * @param xst The formal static XS type derived from the SQL type of v.
1724     * @return An {@code XdmValue}, {@code null} if {@code v} is null.
1725     */
1726    private static XdmValue xmlCastAsSequence(
1727        Object v, XMLBinary enc, ItemType xst)
1728        throws SQLException
1729    {
1730        if ( null == v )
1731            return null;
1732        /*
1733         * What happens next in the standard is one of the most breathtaking
1734         * feats of obscurantism in the whole document. It begins, plausibly
1735         * enough, by using mapValuesOfSQLTypesToValuesOfXSTypes to produce
1736         * the lexical form of the XS type (but with XML metacharacters escaped,
1737         * if it's a string type). Then:
1738         * 1. That lexical form is to be fed to an XML parser, producing an
1739         *    XQuery document node that NEVER can be a well-formed document (it
1740         *    is expected to satisfy document { text ? } where the text node is
1741         *    just the lexical value form we started with, now with the escaped
1742         *    metacharacters unescaped again as a consequence of parsing). For
1743         *    some source types, mapValuesOfSQLTypesToValuesOfXSTypes can
1744         *    produce a string that parses to XML with element content: row
1745         *    types, arrays, multisets, XML. Clearly, those cases can't satisfy
1746         *    the formal type assumed here, and they are cases this routine
1747         *    won't be expected to handle: XML handled separately by the caller,
1748         *    arrays/structs/etc. being ruled out by assertCanCastAsXmlSequence.
1749         * 2. That document node is made the $TEMP parameter of an XML Query,
1750         *    '$TEMP cast as XSBTN' (where XSBTN is a QName for the result type
1751         *    chosen according to the rules) and the sequence resulting from
1752         *    that query is the result of the cast.
1753         *
1754         * Step (1) can only succeed if the XML parser doesn't insist on well-
1755         * formed documents, as the stock JRE parser emphatically does. And the
1756         * ultimate effect of that whole dance is that the cast in (2) casts a
1757         * document node to the target type, which means the document node gets
1758         * atomized, which, for a document node, means everything is thrown away
1759         * save the concatenated values of its descendant text nodes (or node,
1760         * in this case; haven't we seen that value somewhere before?), assigned
1761         * the type xs:untypedAtomic, and then that's operated on by the cast.
1762         *
1763         * Because this implementation's in PL/Java, the value v received here
1764         * has already been mapped from an SQL type to a Java type according to
1765         * JDBC's rules as PL/Java implements them, so there's one degree of
1766         * removal from the specified algorithm anyway. And the s9api
1767         * XdmAtomicValue already has constructors from several of the expected
1768         * Java types, as well as one taking a lexical form and explicit type.
1769         * Beause this is /example/ code, rather than slavishly implementing the
1770         * specified algorithm, it will assume that that is either roughly or
1771         * exactly equivalent to what these s9api constructors in fact do, and
1772         * just use them; conformance-testing code could then check for exact
1773         * equivalence if there's enough interest to write it.
1774         *
1775         * So, we will NOT start with this:
1776         *
1777         *   String xmlv = mapValuesOfSQLTypesToValuesOfXSTypes(
1778         *       v, enc, Nulls.ABSENT, true);
1779         *
1780         * Instead, we'll derive this type first ...
1781         */
1782        ItemType xsbt;
1783        // year-month interval type => xsbt = YEAR_MONTH_DURATION
1784        // day-time interval type => xsbt = DAY_TIME_DURATION
1785        xsbt = xst; // we have a winner!
1786        // xs non-built-in atomic type => xsbt = getPrimitiveType(ugh).
1787
1788        /*
1789         * ... and then use this method instead:
1790         */
1791        try
1792        {
1793            return mapJDBCofSQLvalueToXdmAtomicValue(v, enc, xsbt);
1794        }
1795        catch ( SaxonApiException | XPathException e )
1796        {
1797            throw new SQLException(e.getMessage(), "10000", e);
1798        }
1799    }
1800
1801    @FunctionalInterface
1802    interface CastingFunction
1803    {
1804        AtomicValue apply(AtomicValue v) throws XPathException;
1805    }
1806
1807    @FunctionalInterface
1808    interface CasterSupplier
1809    {
1810        CastingFunction get() throws SQLException, XPathException;
1811    }
1812
1813    @FunctionalInterface
1814    interface AtomizingFunction
1815    {
1816        /**
1817         * @param v sequence to be atomized
1818         * @param columnIndex only to include in exception if result has more
1819         * than one item
1820         */
1821        XdmValue apply(XdmValue v, int columnIndex)
1822        throws SaxonApiException, XPathException;
1823    }
1824
1825    private static XPathException noPrimitiveCast(ItemType vt, ItemType xt)
1826    {
1827        return new XPathException(
1828            "Casting from " + vt.getTypeName() + " to " + xt.getTypeName() +
1829            " can never succeed", "XPTY0004");
1830    }
1831
1832    /**
1833     * Handle the case of XMLCAST to a non-XML target type when the cast operand
1834     * is already a single atomic value.
1835     *<p>
1836     * The caller, if operating on a sequence, must itself handle the case of
1837     * an empty sequence (returning null, per General Rule 4c in :2011), or a
1838     * sequence of length greater than one (raising XPTY0004, which is not
1839     * specified in :2011, but the exclusion of such a sequence is implicit in
1840     * rules 4g and 4h; Db2 silently drops all but the first item, unlike
1841     * Oracle, which raises XPTY0004).
1842     * @param av The atomic operand value
1843     * @param p The parameter binding, recording the needed type information
1844     * @param rs ResultSet into which the value will be stored
1845     * @param col Index of the result column
1846     */
1847    private static void xmlCastAsNonXML(
1848        XdmAtomicValue av, ItemType vt,
1849        Binding.Parameter p, ResultSet rs, int col, XMLBinary enc)
1850        throws SQLException, XPathException
1851    {
1852        XdmAtomicValue bv;
1853        ItemType xt = p.typeXT(enc);
1854
1855        CastingFunction caster = p.atomicCaster(vt, () ->
1856        {
1857            ConversionRules rules = vt.getConversionRules();
1858            Converter c1;
1859            ItemType t1;
1860            Converter c2;
1861
1862            switch ( p.typeJDBC() )
1863            {
1864            case Types.TIMESTAMP:
1865                t1 = ItemType.DATE_TIME;
1866                break;
1867            case Types.TIME:
1868                t1 = ItemType.TIME;
1869                break;
1870            case Types.DATE:
1871                t1 = ItemType.DATE;
1872                break;
1873            default:
1874                c1 = rules.getConverter(
1875                    (AtomicType)vt.getUnderlyingItemType(),
1876                    (AtomicType)xt.getUnderlyingItemType());
1877                if ( null == c1 )
1878                    throw noPrimitiveCast(vt, xt);
1879                return (AtomicValue v) -> c1.convert(v).asAtomic();
1880            }
1881            /*
1882             * Nothing left here but the rest of the three date/timey cases
1883             * partly handled above.
1884             */
1885            c1 = rules.getConverter(
1886                (AtomicType)vt.getUnderlyingItemType(),
1887                (AtomicType)t1.getUnderlyingItemType());
1888            c2 = rules.getConverter(
1889                (AtomicType)t1.getUnderlyingItemType(),
1890                (AtomicType)xt.getUnderlyingItemType());
1891            if ( null == c1  ||  null == c2 )
1892                throw noPrimitiveCast(vt, xt);
1893            return (AtomicValue v) ->
1894            {
1895                v = c1.convert(v).asAtomic();
1896                v = ((CalendarValue)v).adjustTimezone(0).removeTimezone();
1897                return c2.convert(v).asAtomic();
1898            };
1899        });
1900
1901        bv = makeAtomicValue(caster.apply(av.getUnderlyingValue()));
1902
1903        if ( ItemType.STRING.subsumes(xt) )
1904            rs.updateString(col, bv.getStringValue());
1905
1906        else if ( ItemType.HEX_BINARY.subsumes(xt) )
1907            rs.updateBytes(col,
1908                ((HexBinaryValue)bv.getUnderlyingValue()).getBinaryValue());
1909        else if ( ItemType.BASE64_BINARY.subsumes(xt) )
1910            rs.updateBytes(col,
1911                ((Base64BinaryValue)bv.getUnderlyingValue()).getBinaryValue());
1912
1913        else if ( ItemType.DECIMAL.subsumes(xt) )
1914            rs.updateObject(col, bv.getValue());
1915
1916        /*
1917         * The standard calls for throwing "data exception - numeric value out
1918         * of range" rather than forwarding a float or double inf, -inf, or nan
1919         * to SQL, but PostgreSQL supports those values, and these conversions
1920         * preserve them.
1921         *  Because of the collapsing in typeXT(), xt will never be FLOAT,
1922         * only DOUBLE. JDBC is supposed to handle assigning a double to a float
1923         * column, anyway.
1924         */
1925        else if ( ItemType.DOUBLE.subsumes(xt) )
1926            rs.updateObject(col, bv.getValue());
1927
1928        else if ( ItemType.DATE.subsumes(xt) )
1929            rs.updateObject(col, bv.getLocalDate());
1930        else if ( ItemType.DATE_TIME.subsumes(xt) )
1931        {
1932            if ( ((CalendarValue)bv.getUnderlyingValue()).hasTimezone() )
1933                rs.updateObject(col, bv.getOffsetDateTime());
1934            else
1935            {
1936                LocalDateTime jv = bv.getLocalDateTime();
1937                rs.updateObject(col,
1938                    Types.TIMESTAMP_WITH_TIMEZONE == p.typeJDBC() ?
1939                        jv.atOffset(UTC) : jv);
1940            }
1941        }
1942        else if ( ItemType.TIME.subsumes(xt) ) // no handy tz/notz distinction
1943        {
1944            if ( ((CalendarValue)bv.getUnderlyingValue()).hasTimezone() )
1945                rs.updateObject(col, OffsetTime.parse(bv.getStringValue()));
1946            else
1947            {
1948                LocalTime jv = LocalTime.parse(bv.getStringValue());
1949                rs.updateObject(col,
1950                    Types.TIME_WITH_TIMEZONE == p.typeJDBC() ?
1951                        jv.atOffset(UTC) : jv);
1952            }
1953        }
1954
1955        else if ( ItemType.YEAR_MONTH_DURATION.subsumes(xt) )
1956            rs.updateString(col, toggleIntervalRepr(bv.getStringValue()));
1957        else if ( ItemType.DAY_TIME_DURATION.subsumes(xt) )
1958            rs.updateString(col, toggleIntervalRepr(bv.getStringValue()));
1959        else if ( ItemType.DURATION.subsumes(xt) ) // need this case for now
1960            rs.updateString(col, toggleIntervalRepr(bv.getStringValue()));
1961
1962        else if ( ItemType.BOOLEAN.subsumes(xt) )
1963            rs.updateObject(col, bv.getValue());
1964        else
1965            throw new SQLNonTransientException(String.format(
1966                "Mapping XML type \"%s\" to SQL value not supported", xt),
1967                "0N000");
1968    }
1969
1970    /**
1971     * Like the "Mapping values of SQL data types to values of XML Schema
1972     * data types" algorithm, except after the SQL values have already been
1973     * converted to Java values according to JDBC rules.
1974     *<p>
1975     * Also, this uses Saxon s9api constructors for the XML Schema values, which
1976     * accept the Java types directly. As a consequence, where the target type
1977     * {@code xst} is {@code xs:hexBinary} or {@code xs:base64Binary}, that type
1978     * will be produced, regardless of the passed {@code encoding}. This might
1979     * not be strictly correct, but is probably safest until an oddity in the
1980     * spec can be clarified: {@code determineXQueryFormalType} will always
1981     * declare {@code xs:hexBinary} as the type for an SQL byte string, and it
1982     * would violate type safety to construct a value here that honors the
1983     * {@code encoding} parameter but isn't of the declared formal type.
1984     */
1985    private static XdmAtomicValue mapJDBCofSQLvalueToXdmAtomicValue(
1986        Object dv, XMLBinary encoding, ItemType xst)
1987        throws SQLException, SaxonApiException, XPathException
1988    {
1989        if ( ItemType.STRING.equals(xst) )
1990            return new XdmAtomicValue((String)dv);
1991
1992        if ( ItemType.HEX_BINARY.equals(xst) )
1993            return makeAtomicValue(new HexBinaryValue((byte[])dv));
1994        if ( ItemType.BASE64_BINARY.equals(xst) )
1995            return makeAtomicValue(new Base64BinaryValue((byte[])dv));
1996
1997        if ( ItemType.INTEGER.equals(xst) )
1998            return new XdmAtomicValue(((BigInteger)dv).toString(), xst);
1999        if ( ItemType.DECIMAL.equals(xst) )
2000            return new XdmAtomicValue((BigDecimal)dv);
2001        if ( ItemType.INT.equals(xst) )
2002            return new XdmAtomicValue((Integer)dv);
2003        if ( ItemType.SHORT.equals(xst) )
2004            return new XdmAtomicValue((Short)dv);
2005        if ( ItemType.LONG.equals(xst) )
2006            return new XdmAtomicValue((Long)dv);
2007        if ( ItemType.FLOAT.equals(xst) )
2008            return new XdmAtomicValue((Float)dv);
2009        if ( ItemType.DOUBLE.equals(xst) )
2010            return new XdmAtomicValue((Double)dv);
2011
2012        if ( ItemType.BOOLEAN.equals(xst) )
2013            return new XdmAtomicValue((Boolean)dv);
2014
2015        if ( ItemType.DATE.equals(xst) )
2016        {
2017            if ( dv instanceof LocalDate )
2018                return new XdmAtomicValue((LocalDate)dv);
2019            return new XdmAtomicValue(dv.toString(), xst);
2020        }
2021
2022        if ( ItemType.TIME.equals(xst) )
2023            return new XdmAtomicValue(dv.toString(), xst);
2024
2025        if ( ItemType.DATE_TIME.equals(xst) )
2026        {
2027            if ( dv instanceof LocalDateTime )
2028                return new XdmAtomicValue((LocalDateTime)dv);
2029            return new XdmAtomicValue(dv.toString(), xst);
2030        }
2031
2032        if ( ItemType.DATE_TIME_STAMP.equals(xst) )
2033        {
2034            if ( dv instanceof OffsetDateTime )
2035                return new XdmAtomicValue((OffsetDateTime)dv);
2036            return new XdmAtomicValue(dv.toString(), xst);
2037        }
2038
2039        if ( ItemType.DURATION.equals(xst) )
2040            return new XdmAtomicValue(toggleIntervalRepr((String)dv), xst);
2041
2042        throw new SQLNonTransientException(String.format(
2043            "Mapping SQL value to XML type \"%s\" not supported", xst),
2044            "0N000");
2045    }
2046
2047    /*
2048     * Toggle the lexical representation of an interval/duration between the
2049     * form PostgreSQL likes and the form XML Schema likes. Only negative values
2050     * are affected. Positive values are returned unchanged, as are those that
2051     * don't fit any expected form; those will probably be reported as malformed
2052     * by whatever tries to consume them.
2053     */
2054    static String toggleIntervalRepr(String lex)
2055    {
2056        Matcher m = s_intervalSigns.matcher(lex);
2057        if ( ! m.matches() )
2058            return lex; // it's weird, just don't touch it
2059        if ( -1 == m.start(1) )
2060        {
2061            if ( -1 != m.start(2)  &&  -1 == m.start(3) ) // it's PG negative
2062                return '-' + lex.replace("-", "");        // make it XS negative
2063        }
2064        else if ( -1 == m.start(2)  &&  -1 != m.start(3) )// it's XS negative
2065            return m.usePattern(s_intervalSignSite)       // make it PG negative
2066                .reset(lex.substring(1)).replaceAll("-");
2067        return lex; // it's either positive, or weird, just don't touch it
2068    }
2069
2070    static Iterable<Map.Entry<String,String>> namespaceBindings(String[] nbs)
2071    throws SQLException
2072    {
2073        if ( 1 == nbs.length % 2 )
2074            throw new SQLSyntaxErrorException(
2075                "Namespace binding array must have even length", "42000");
2076        Map<String,String> m = new HashMap<>();
2077
2078        for ( int i = 0; i < nbs.length; i += 2 )
2079        {
2080            String prefix = nbs[i];
2081            String uri = nbs[1 + i];
2082
2083            if ( null == prefix  ||  null == uri )
2084                throw new SQLDataException(
2085                    "Namespace binding array elements must not be null",
2086                    "22004");
2087
2088            if ( ! "".equals(prefix) )
2089            {
2090                if ( ! isValidNCName(prefix) )
2091                    throw new SQLSyntaxErrorException(
2092                        "Not an XML NCname: \"" + prefix + '"', "42602");
2093                if ( XML_NS_PREFIX.equals(prefix)
2094                    || XMLNS_ATTRIBUTE.equals(prefix) )
2095                    throw new SQLSyntaxErrorException(
2096                        "Namespace prefix may not be xml or xmlns", "42939");
2097                if ( XML_NS_URI.equals(uri)
2098                    || XMLNS_ATTRIBUTE_NS_URI.equals(uri) )
2099                    throw new SQLSyntaxErrorException(
2100                        "Namespace URI has a disallowed value", "42P17");
2101                if ( "".equals(uri) )
2102                    throw new SQLSyntaxErrorException(
2103                        "URI for non-default namespace may not be zero-length",
2104                        "42P17");
2105            }
2106
2107            String was = m.put(prefix.intern(), uri.intern());
2108
2109            if ( null != was )
2110                throw new SQLSyntaxErrorException(
2111                    "Namespace prefix \"" + prefix + "\" multiply bound (" +
2112                    "to \"" + was + "\" and \"" + uri + "\")", "42712");
2113        }
2114
2115        return Collections.unmodifiableSet(m.entrySet());
2116    }
2117
2118    static class Binding
2119    {
2120        String typePG() throws SQLException
2121        {
2122            if ( null != m_typePG )
2123                return m_typePG;
2124            return m_typePG = implTypePG();
2125        }
2126
2127        int typeJDBC() throws SQLException
2128        {
2129            if ( null != m_typeJDBC )
2130                return m_typeJDBC;
2131            int tj = implTypeJDBC();
2132            /*
2133             * The JDBC types TIME_WITH_TIMEZONE and TIMESTAMP_WITH_TIMEZONE
2134             * first appear in JDBC 4.2 / Java 8. PL/Java's JDBC driver does
2135             * not yet return those values. As a workaround until it does,
2136             * recheck here using the PG type name string, if TIME or TIMESTAMP
2137             * is the JDBC type that the driver returned.
2138             *
2139             * Also for backward compatibility, the driver still returns
2140             * Types.OTHER for XML, rather than Types.SQLXML. Check and fix that
2141             * here too.
2142             */
2143            switch ( tj )
2144            {
2145            case Types.OTHER:
2146                if ( "xml".equals(typePG()) )
2147                    tj = Types.SQLXML;
2148                break;
2149            case Types.TIME:
2150                if ( "timetz".equals(typePG()) )
2151                    tj = Types.TIME_WITH_TIMEZONE;
2152                break;
2153            case Types.TIMESTAMP:
2154                if ( "timestamptz".equals(typePG()) )
2155                    tj = Types.TIMESTAMP_WITH_TIMEZONE;
2156                break;
2157            default:
2158            }
2159            return m_typeJDBC = tj;
2160        }
2161
2162        Object valueJDBC() throws SQLException
2163        {
2164            if ( m_valueJDBCValid )
2165                return m_valueJDBC;
2166            /*
2167             * When JDBC 4.2 added support for the JSR 310 date/time types, for
2168             * back-compatibility purposes, it did not change what types a plain
2169             * getObject(...) would return for them, which could break existing
2170             * code. Instead, it's necessary to use the form of getObject that
2171             * takes a Class<?>, and ask for the new classes explicitly.
2172             *
2173             * Similarly, PL/Java up through 1.5.0 has always returned a String
2174             * from getObject for a PostgreSQL xml type. Here, the JDBC standard
2175             * provides that a SQLXML object should be returned, and that should
2176             * happen in a future major PL/Java release, but for now, the plain
2177             * getObject will still return String, so it is also necessary to
2178             * ask for the SQLXML type explicitly. In fact, we will ask for
2179             * XdmNode, as it might be referred to more than once (if a
2180             * parameter), and a SQLXML can't be read more than once, nor would
2181             * there be any sense in building an XdmNode from it more than once.
2182             */
2183            switch ( typeJDBC() )
2184            {
2185            case Types.DATE:
2186                return setValueJDBC(implValueJDBC(LocalDate.class));
2187            case Types.TIME:
2188                return setValueJDBC(implValueJDBC(LocalTime.class));
2189            case Types.TIME_WITH_TIMEZONE:
2190                return setValueJDBC(implValueJDBC(OffsetTime.class));
2191            case Types.TIMESTAMP:
2192                return setValueJDBC(implValueJDBC(LocalDateTime.class));
2193            case Types.TIMESTAMP_WITH_TIMEZONE:
2194                return setValueJDBC(implValueJDBC(OffsetDateTime.class));
2195            case Types.SQLXML:
2196                return setValueJDBC(implValueJDBC(XdmNode.class));
2197            default:
2198            }
2199            return setValueJDBC(implValueJDBC());
2200        }
2201
2202        boolean knownNonNull() throws SQLException
2203        {
2204            if ( null != m_knownNonNull )
2205                return m_knownNonNull;
2206            return m_knownNonNull = implKnownNonNull();
2207        }
2208
2209        int scale() throws SQLException
2210        {
2211            if ( null != m_scale )
2212                return m_scale;
2213            return m_scale = implScale();
2214        }
2215
2216        static class ContextItem extends Binding
2217        {
2218            /**
2219             * Return the XML Schema type of this input binding for a context
2220             * item.
2221             *<p>
2222             * Because it is based on {@code determinXQueryFormalType}, this
2223             * method is not parameterized by {@code XMLBinary}, and will always
2224             * map a binary-string SQL type to {@code xs:hexBinary}.
2225             */
2226            ItemType typeXS() throws SQLException
2227            {
2228                if ( null != m_typeXS )
2229                    return m_typeXS;
2230                SequenceType st = implTypeXS(true);
2231                assert OccurrenceIndicator.ONE == st.getOccurrenceIndicator();
2232                return m_typeXS = st.getItemType();
2233            }
2234
2235            protected ItemType m_typeXS;
2236        }
2237
2238        static class Parameter extends Binding
2239        {
2240            String name()
2241            {
2242                return m_name;
2243            }
2244
2245            SequenceType typeXS() throws SQLException
2246            {
2247                if ( null != m_typeXS )
2248                    return m_typeXS;
2249                return m_typeXS = implTypeXS(false);
2250            }
2251
2252            /**
2253             * Return the XML Schema type collapsed according to the Syntax Rule
2254             * deriving {@code XT} for {@code XMLCAST}.
2255             *<p>
2256             * The intent of the rule is unclear, but it involves collapsing
2257             * certain sets of more-specific types that {@code typeXS} might
2258             * return into common supertypes, for use only in an intermediate
2259             * step of {@code xmlCastAsNonXML}. Unlike {@code typeXS}, this
2260             * method must be passed an {@code XMLBinary} parameter reflecting
2261             * the hex/base64 choice currently in scope.
2262             * @param enc whether to use {@code xs:hexBinary} or
2263             * {@code xs:base64Binary} as the XML Schema type corresponding to a
2264             * binary-string SQL type.
2265             */
2266            ItemType typeXT(XMLBinary enc) throws SQLException
2267            {
2268                throw new UnsupportedOperationException(
2269                    "typeXT() on synthetic binding");
2270            }
2271
2272            /**
2273             * Memoize and return a casting function from a given
2274             * {@code ItemType} to the type of this parameter.
2275             *<p>
2276             * Used only by {@code xmlCastAsNonXML}, which does all the work
2277             * of constructing the function; this merely allows it to be
2278             * remembered, if many casts to the same output parameter will be
2279             * made (as by {@code xmltable}).
2280             */
2281            CastingFunction atomicCaster(ItemType it, CasterSupplier s)
2282            throws SQLException, XPathException
2283            {
2284                throw new UnsupportedOperationException(
2285                    "atomicCaster() on synthetic binding");
2286            }
2287
2288            protected SequenceType m_typeXS;
2289
2290            private final String m_name;
2291
2292            /**
2293             * @param name The SQL name of the parameter
2294             * @param checkName True if the name must be a valid NCName (as for
2295             * an input parameter from SQL to the XML query context), or false
2296             * if the name doesn't matter (as when it describes a result, or the
2297             * sole input value of an XMLCAST.
2298             * @throws SQLException if the name of a checked input parameter
2299             * isn't a valid NCName.
2300             */
2301            protected Parameter(String name, boolean checkName)
2302            throws SQLException
2303            {
2304                if ( checkName  &&  ! isValidNCName(name) )
2305                    throw new SQLSyntaxErrorException(
2306                        "Not an XML NCname: \"" + name + '"', "42602");
2307                m_name = name;
2308            }
2309        }
2310
2311        protected String m_typePG;
2312        protected Integer m_typeJDBC;
2313        protected Boolean m_knownNonNull;
2314        protected Integer m_scale;
2315        private Object m_valueJDBC;
2316        private boolean m_valueJDBCValid;
2317        protected Object setValueJDBC(Object v)
2318        {
2319            m_valueJDBCValid = true;
2320            return m_valueJDBC = v;
2321        }
2322
2323        protected String implTypePG() throws SQLException
2324        {
2325            throw new UnsupportedOperationException(
2326                "typePG() on synthetic binding");
2327        }
2328
2329        protected int implTypeJDBC() throws SQLException
2330        {
2331            throw new UnsupportedOperationException(
2332                "typeJDBC() on synthetic binding");
2333        }
2334
2335        protected boolean implKnownNonNull() throws SQLException
2336        {
2337            throw new UnsupportedOperationException(
2338                "knownNonNull() on synthetic binding");
2339        }
2340
2341        protected int implScale() throws SQLException
2342        {
2343            throw new UnsupportedOperationException(
2344                "scale() on synthetic binding");
2345        }
2346
2347        protected Object implValueJDBC() throws SQLException
2348        {
2349            throw new UnsupportedOperationException(
2350                "valueJDBC() on synthetic binding");
2351        }
2352
2353        /*
2354         * This implementation just forwards to the type-less version, then
2355         * fails if that did not return the wanted type. Override if a smarter
2356         * behavior is possible.
2357         */
2358        protected <T> T implValueJDBC(Class<T> type) throws SQLException
2359        {
2360            return type.cast(implValueJDBC());
2361        }
2362
2363        protected SequenceType implTypeXS(boolean forContextItem)
2364        throws SQLException
2365        {
2366            return determineXQueryFormalType(this, forContextItem);
2367        }
2368
2369        static class Assemblage implements Iterable<Parameter>
2370        {
2371            ContextItem contextItem() { return m_contextItem; }
2372
2373            @Override
2374            public Iterator<Parameter> iterator()
2375            {
2376                return m_params.iterator();
2377            }
2378
2379            protected ContextItem m_contextItem;
2380            protected Collection<Parameter> m_params = Collections.emptyList();
2381        }
2382    }
2383
2384    static class BindingsFromResultSet extends Binding.Assemblage
2385    {
2386        /**
2387         * Construct the bindings from a ResultSet representing input parameters
2388         * to an XML query.
2389         * @param rs ResultSet representing the input parameters. Column names
2390         * "." and "?COLUMN?" are treated specially, and used to supply the
2391         * query's context item; every other column name must be a valid NCName,
2392         * and neither any named parameter nor the context item may be mentioned
2393         * more than once.
2394         * @param checkNames True if the input parameter names matter (a name of
2395         * "." or "?COLUMN?" will define the context item, and any other name
2396         * must be a valid NCName); false to skip such checking (as for the
2397         * single input value to XMLCAST, whose name doesn't matter).
2398         * @throws SQLException if names are duplicated or invalid.
2399         */
2400        BindingsFromResultSet(ResultSet rs, boolean checkNames)
2401        throws SQLException
2402        {
2403            m_resultSet = rs;
2404            m_rsmd = rs.getMetaData();
2405
2406            int nParams = m_rsmd.getColumnCount();
2407            ContextItem contextItem = null;
2408            Map<String,Binding.Parameter> n2b = new HashMap<>();
2409
2410            if ( 0 < nParams )
2411                m_dBuilder = s_s9p.newDocumentBuilder();
2412
2413            for ( int i = 1; i <= nParams; ++i )
2414            {
2415                String label = m_rsmd.getColumnLabel(i);
2416                if ( checkNames  &&
2417                    ("?COLUMN?".equals(label)  ||  ".".equals(label)) )
2418                {
2419                    if ( null != contextItem )
2420                    throw new SQLSyntaxErrorException(
2421                        "Context item supplied more than once (at " +
2422                        contextItem.m_idx + " and " + i + ')', "42712");
2423                    contextItem = new ContextItem(i);
2424                    continue;
2425                }
2426
2427                Parameter was =
2428                    (Parameter)n2b.put(
2429                        label, new Parameter(label, i, checkNames));
2430                if ( null != was )
2431                    throw new SQLSyntaxErrorException(
2432                        "Name \"" + label + "\" duplicated at positions " +
2433                        was.m_idx + " and " + i, "42712");
2434            }
2435
2436            m_contextItem = contextItem;
2437            m_params = n2b.values();
2438        }
2439
2440        /**
2441         * Construct the bindings from a ResultSet representing output
2442         * parameters (as from XMLTABLE).
2443         * @param rs ResultSet representing the result parameters. Names have
2444         * no particular significance and are not subject to any checks.
2445         * @param exprs Compiled evaluators for the supplied column expressions.
2446         * The number of these must match the number of columns in {@code rs}.
2447         * One of these (and no more than one; the caller will have enforced
2448         * that) is allowed to be null, making the corresponding column
2449         * "FOR ORDINALITY". An ordinality column will be checked to ensure it
2450         * has an SQL type that is (ahem) "exact numeric with scale 0 (zero)."
2451         * May be null if this is some other general-purpose output result set,
2452         * not for an XMLTABLE.
2453         * @throws SQLException if numbers of columns and expressions don't
2454         * match, or there is an ordinality column and its type is not suitable.
2455         */
2456        @SuppressWarnings("fallthrough")
2457        BindingsFromResultSet(ResultSet rs, XQueryEvaluator[] exprs)
2458        throws SQLException
2459        {
2460            m_resultSet = rs;
2461            m_rsmd = rs.getMetaData();
2462
2463            int nParams = m_rsmd.getColumnCount();
2464            if ( null != exprs  &&  nParams != exprs.length )
2465                throw new SQLSyntaxErrorException(
2466                    "Not as many supplied column expressions as output columns",
2467                    "42611");
2468
2469            Binding.Parameter[] ps = new Binding.Parameter[ nParams ];
2470
2471            for ( int i = 1; i <= nParams; ++i )
2472            {
2473                String label = m_rsmd.getColumnLabel(i);
2474                Parameter p = new Parameter(label, i, false);
2475                ps [ i - 1 ] = p;
2476                if ( null != exprs  &&  null == exprs [ i - 1 ] )
2477                {
2478                    switch ( p.typeJDBC() )
2479                    {
2480                    case Types.INTEGER:
2481                    case Types.SMALLINT:
2482                    case Types.BIGINT:
2483                        break;
2484                    case Types.NUMERIC:
2485                    case Types.DECIMAL:
2486                        int scale = p.scale();
2487                        if ( 0 == scale  ||  -1 == scale )
2488                            break;
2489                        /*FALLTHROUGH*/
2490                    default:
2491                        throw new SQLSyntaxErrorException(
2492                            "Column FOR ORDINALITY must have an exact numeric" +
2493                            " type with scale zero.", "42611");
2494                    }
2495                }
2496            }
2497
2498            m_params = asList(ps);
2499        }
2500
2501        private ResultSet m_resultSet;
2502        private ResultSetMetaData m_rsmd;
2503        DocumentBuilder m_dBuilder;
2504
2505        <T> T typedValueAtIndex(int idx, Class<T> type) throws SQLException
2506        {
2507            if ( XdmNode.class != type )
2508                return m_resultSet.getObject(idx, type);
2509            try
2510            {
2511                SQLXML sx = m_resultSet.getObject(idx, SQLXML.class);
2512                return type.cast(
2513                    m_dBuilder.build(sx.getSource((Class<Source>)null)));
2514            }
2515            catch ( SaxonApiException e )
2516            {
2517                throw new SQLException(e.getMessage(), "10000", e);
2518            }
2519        }
2520
2521        class ContextItem extends Binding.ContextItem
2522        {
2523            final int m_idx;
2524
2525            ContextItem(int index) { m_idx = index; }
2526
2527            protected String implTypePG() throws SQLException
2528            {
2529                return m_rsmd.getColumnTypeName(m_idx);
2530            }
2531
2532            protected int implTypeJDBC() throws SQLException
2533            {
2534                return m_rsmd.getColumnType(m_idx);
2535            }
2536
2537            protected int implScale() throws SQLException
2538            {
2539                return m_rsmd.getScale(m_idx);
2540            }
2541
2542            protected Object implValueJDBC() throws SQLException
2543            {
2544                return m_resultSet.getObject(m_idx);
2545            }
2546
2547            protected <T> T implValueJDBC(Class<T> type) throws SQLException
2548            {
2549                return typedValueAtIndex(m_idx, type);
2550            }
2551        }
2552
2553        class Parameter extends Binding.Parameter
2554        {
2555            final int m_idx;
2556            private ItemType m_typeXT;
2557            private CastingFunction m_atomCaster;
2558            private ItemType m_lastCastFrom;
2559
2560            Parameter(String name, int index, boolean isInput)
2561            throws SQLException
2562            {
2563                super(name, isInput);
2564                m_idx = index;
2565            }
2566
2567            @Override
2568            ItemType typeXT(XMLBinary enc) throws SQLException
2569            {
2570                if ( null != m_typeXT )
2571                    return m_typeXT;
2572
2573                ItemType it =
2574                    mapSQLDataTypeToXMLSchemaDataType(this, enc, Nulls.ABSENT);
2575                if ( ! ItemType.ANY_ATOMIC_VALUE.subsumes(it) )
2576                    return m_typeXT = it;
2577
2578                if ( it.equals(ItemType.INTEGER) )
2579                {
2580                    int tj = typeJDBC();
2581                    if ( Types.NUMERIC == tj || Types.DECIMAL == tj )
2582                        it = ItemType.DECIMAL;
2583                }
2584                else if ( ItemType.INTEGER.subsumes(it) )
2585                    it = ItemType.INTEGER;
2586                else if ( ItemType.FLOAT.subsumes(it) )
2587                    it = ItemType.DOUBLE;
2588                else if ( ItemType.DATE_TIME_STAMP.subsumes(it) )
2589                    it = ItemType.DATE_TIME;
2590
2591                return m_typeXT = it;
2592            }
2593
2594            @Override
2595            CastingFunction atomicCaster(ItemType it, CasterSupplier s)
2596            throws SQLException, XPathException
2597            {
2598                if ( null == m_atomCaster || ! it.equals(m_lastCastFrom) )
2599                {
2600                    m_atomCaster = s.get();
2601                    m_lastCastFrom = it;
2602                }
2603                return m_atomCaster;
2604            }
2605
2606            protected String implTypePG() throws SQLException
2607            {
2608                return m_rsmd.getColumnTypeName(m_idx);
2609            }
2610
2611            protected int implTypeJDBC() throws SQLException
2612            {
2613                return m_rsmd.getColumnType(m_idx);
2614            }
2615
2616            protected boolean implKnownNonNull() throws SQLException
2617            {
2618                return columnNoNulls == m_rsmd.isNullable(m_idx);
2619            }
2620
2621            protected int implScale() throws SQLException
2622            {
2623                return m_rsmd.getScale(m_idx);
2624            }
2625
2626            protected Object implValueJDBC() throws SQLException
2627            {
2628                return m_resultSet.getObject(m_idx);
2629            }
2630
2631            protected <T> T implValueJDBC(Class<T> type) throws SQLException
2632            {
2633                return typedValueAtIndex(m_idx, type);
2634            }
2635        }
2636    }
2637
2638    static class BindingsFromXQX extends Binding.Assemblage
2639    {
2640        /**
2641         * Construct a new assemblage of bindings for the static context of an
2642         * XMLTABLE column expression. It will have the same named-parameter
2643         * bindings passed to the row expression, but the static type of the
2644         * context item will be the result type of the row expression. The
2645         * {@code ContextItem} in this assemblage will have no associated value;
2646         * the caller is responsible for retrieving that from the row evaluator
2647         * and storing it in the column expression context every iteration.
2648         * @param xqx The result of compiling the row expression; its
2649         * compiler-determined static result type will be used as the static
2650         * context item type.
2651         * @param params The bindings supplied to the row expression. Its named
2652         * parameters will be copied as the named parameters here.
2653         */
2654        BindingsFromXQX(XQueryExecutable xqx, Binding.Assemblage params)
2655        {
2656            m_params = params.m_params;
2657            m_contextItem = new ContextItem(xqx.getResultItemType());
2658        }
2659
2660        static class ContextItem extends Binding.ContextItem
2661        {
2662            ContextItem(ItemType it)
2663            {
2664                m_typeXS = it;
2665                /*
2666                 * There needs to be a dummy JDBC type to return when queried
2667                 * for purposes of assertCanCastAsXmlSequence. It can literally
2668                 * be any type outside of the few that method rejects. Because
2669                 * the XS type is already known, nothing else will need to ask
2670                 * for this, or care.
2671                 */
2672                m_typeJDBC = Types.OTHER;
2673            }
2674        }
2675    }
2676
2677    /*
2678     * The XQuery-regular-expression-based functions added in 9075-2:2006.
2679     *
2680     * For each function below, a parameter is marked //strict if the spec
2681     * explicitly says the result is NULL when that parameter is NULL. The
2682     * parameters not marked //strict (including the non-standard w3cNewlines
2683     * added here) all have non-null defaults, so by executive decision, these
2684     * functions will all get the onNullInput=RETURNS_NULL treatment, so none of
2685     * the null-checking has to be done here. At worst, that may result in a
2686     * mystery NULL return rather than an error, if someone explicitly passes
2687     * NULL to one of the parameters with a non-null default.
2688     */
2689
2690    /*
2691     * Check valid range of 'from' and supported 'usingOctets'.
2692     *
2693     * Every specified function that has a start position FROM and a USING
2694     * clause starts with a check that the start position is in range. This
2695     * function factors out that test, returning true if the start position is
2696     * /out of range/ (triggering the caller to return the special result
2697     * defined for that case), returning false if the value is in range, or
2698     * throwing an exception if the length unit specified in the USING clause
2699     * isn't supported.
2700     */
2701    private static boolean usingAndLengthCheck(
2702        String in, int from, boolean usingOctets, String function)
2703    throws SQLException
2704    {
2705        if ( usingOctets )
2706            throw new SQLFeatureNotSupportedException(
2707                '"' + function + "\" does not yet support USING OCTETS",
2708                "0A000");
2709        return ( 1 > from  ||  from > getStringLength(in) );
2710    }
2711
2712    private static void newlinesCheck(boolean w3cNewlines, String function)
2713    throws SQLException
2714    {
2715        if ( ! w3cNewlines )
2716            throw new SQLFeatureNotSupportedException(
2717                '"' + function + "\" does not yet support the ISO SQL newline" +
2718                " conventions, only the original W3C XQuery ones" +
2719                " (HINT: pass w3cNewlines => true)", "0A000");
2720    }
2721
2722    private static RegularExpression compileRE(String pattern, String flags)
2723    throws SQLException
2724    {
2725        try
2726        {
2727            return s_s9p.getUnderlyingConfiguration()
2728                .compileRegularExpression(pattern, flags, "XP30", null);
2729        }
2730        catch ( XPathException e )
2731        {
2732            if ( NamespaceConstant.ERR.equals(e.getErrorCodeNamespace()) )
2733            {
2734                if ( "FORX0001".equals(e.getErrorCodeLocalPart()) )
2735                    throw new SQLDataException(
2736                        "invalid XQuery option flag", "2201T", e);
2737                if ( "FORX0002".equals(e.getErrorCodeLocalPart()) )
2738                    throw new SQLDataException(
2739                        "invalid XQuery regular expression", "2201S", e);
2740            }
2741            throw new SQLException(
2742                "compiling XQuery regular expression: " + e.getMessage(), e);
2743        }
2744    }
2745
2746    private static CharSequence replace(
2747        RegularExpression re, CharSequence in, CharSequence with)
2748        throws SQLException
2749    {
2750        /*
2751         * Report the standard-mandated error if replacing a zero-length match.
2752         * Strictly speaking, this is a test of the length of the match, not of
2753         * the input string. Here, though, this private method is only called by
2754         * translate_regex, which always passes only the portion of the input
2755         * string that matched, so the test is equivalent.
2756         *  As to why the SQL committee would make such a point of disallowing
2757         * replacement of a zero-length match, that's a good question. See
2758         * s_intervalSignSite in this very file for an example where replacing
2759         * a zero-length match is just what's wanted. (But that pattern relies
2760         * on lookahead/lookbehind operators, which XQuery regular expressions
2761         * don't have.)
2762         *  When the underlying library is Saxon, there is an Easter egg: if a
2763         * regular expression is compiled with a 'flags' string ending in ";j",
2764         * a Java regular expression is produced instead of an XQuery one (with
2765         * standards conformance cast to the wind). That can be detected with
2766         * getFlags() on the regular expression: not looking for ";j", because
2767         * that has been stripped out, but for "d" which is a Java regex flag
2768         * that Saxon sets by default, and is not a valid XQuery regex flag.
2769         *  If the caller has used Saxon's Easter egg to get a Java regex, here
2770         * is another Easter egg to go with it, allowing zero-length matches
2771         * to be replaced if that's what the caller wants to do.
2772         */
2773        if ( 0 == in.length()  &&  ! re.getFlags().contains("d") )
2774            throw new SQLDataException(
2775                "attempt to replace a zero-length string", "2201U");
2776        try
2777        {
2778            return re.replace(in, with);
2779        }
2780        catch ( XPathException e )
2781        {
2782            if ( NamespaceConstant.ERR.equals(e.getErrorCodeNamespace()) )
2783            {
2784                if ( "FORX0003".equals(e.getErrorCodeLocalPart()) )
2785                    throw new SQLDataException(
2786                        "attempt to replace a zero-length string", "2201U", e);
2787                if ( "FORX0004".equals(e.getErrorCodeLocalPart()) )
2788                    throw new SQLDataException(
2789                        "invalid XQuery replacement string", "2201V", e);
2790            }
2791            throw new SQLException(
2792                "replacing regular expression match: " + e.getMessage(), e);
2793        }
2794    }
2795
2796    interface MatchVector
2797    {
2798        int groups();
2799        int position(int group);
2800        int length(int group);
2801    }
2802
2803    interface ListOfMatchVectors
2804    {
2805        /**
2806         * Return the MatchVector for one occurrence of a match.
2807         *<p>
2808         * Any previously-returned MatchVector is invalid after another get.
2809         * In multiple calls to get, the occurrence parameter must be strictly
2810         * increasing.
2811         * After get has returned null, it should not be called again.
2812         */
2813        MatchVector get(int occurrence) throws SQLException;
2814        void close();
2815    }
2816
2817    static class LOMV
2818    implements ListOfMatchVectors, MatchVector, RegexIterator.MatchHandler
2819    {
2820        private RegexIterator m_ri;
2821        private int m_pos;
2822        private int m_occurrence;
2823
2824        LOMV(int startPos, RegexIterator ri)
2825        {
2826            m_ri = ri;
2827            m_pos = startPos;
2828        }
2829
2830        static ListOfMatchVectors of(
2831            String pattern, String flags, String in, int from)
2832            throws SQLException
2833        {
2834            RegularExpression re = compileRE(pattern, flags);
2835            return of(re, in, from);
2836        }
2837
2838        static ListOfMatchVectors of(RegularExpression re, String in, int from)
2839        {
2840            RegexIterator ri =
2841                re.analyze(in.substring(in.offsetByCodePoints(0, from - 1)));
2842            return new LOMV(from, ri);
2843        }
2844
2845        private int[] m_begPositions;
2846        private int[] m_endPositions;
2847
2848        @Override // ListOfMatchVectors
2849        public MatchVector get(int occurrence) throws SQLException
2850        {
2851            try
2852            {
2853                StringValue sv;
2854                for ( ;; )
2855                {
2856                    sv = m_ri.next();
2857                    if ( null == sv )
2858                        return null;
2859                    if ( m_ri.isMatching() )
2860                        if ( ++ m_occurrence == occurrence )
2861                            break;
2862                    m_pos += sv.getStringLength();
2863                }
2864
2865                if ( null == m_begPositions )
2866                {
2867                    int groups = m_ri.getNumberOfGroups();
2868                    /*
2869                     * Saxon's Apache-derived XQuery engine will report a number
2870                     * of groups counting $0 (so it will be 1 even if no capture
2871                     * groups were defined in the expression). In contrast, the
2872                     * Java regex engine that you get with the Saxon ";j" Easter
2873                     * egg does not count $0 (so arrays need groups+1 entries).
2874                     * It's hard to tell from here which flavor was used, plus
2875                     * the Saxon behavior might change some day, so just spend
2876                     * the extra + 1 every time.
2877                     */
2878                    m_begPositions = new int [ groups + 1 ];
2879                    m_endPositions = new int [ groups + 1 ];
2880                }
2881
2882                m_begPositions [ 0 ] = m_pos;
2883
2884                fill(m_begPositions, 1, m_begPositions.length, 0);
2885                fill(m_endPositions, 1, m_endPositions.length, 0);
2886                m_ri.processMatchingSubstring(this);
2887
2888                m_endPositions [ 0 ] = m_pos;
2889
2890                return this;
2891            }
2892            catch ( XPathException e )
2893            {
2894                throw new SQLException(
2895                    "evaluating XQuery regular expression: " + e.getMessage(),
2896                    e);
2897            }
2898        }
2899
2900        @Override
2901        public void close()
2902        {
2903            m_ri.close();
2904        }
2905
2906        @Override // MatchVector
2907        public int groups()
2908        {
2909            return m_begPositions.length - 1;
2910        }
2911
2912        @Override
2913        public int position(int groupNumber)
2914        {
2915            return m_begPositions [ groupNumber ];
2916        }
2917
2918        @Override
2919        public int length(int groupNumber)
2920        {
2921            return
2922                m_endPositions [ groupNumber ] - m_begPositions [ groupNumber ];
2923        }
2924
2925        @Override // MatchHandler
2926        public void characters(CharSequence s)
2927        {
2928            m_pos += getStringLength(s);
2929        }
2930
2931        @Override
2932        public void onGroupStart(int groupNumber)
2933        {
2934            m_begPositions [ groupNumber ] = m_pos;
2935        }
2936
2937        @Override
2938        public void onGroupEnd(int groupNumber)
2939        {
2940            m_endPositions [ groupNumber ] = m_pos;
2941        }
2942    }
2943
2944    /**
2945     * Function form of the ISO SQL
2946     * <a id='like_regex'>{@code <regex like predicate>}</a>.
2947     *<p>
2948     * Rewrite the standard form
2949     *<pre>
2950     * value LIKE_REGEX pattern FLAG flags
2951     *</pre>
2952     * into this form:
2953     *<pre>
2954     * like_regex(value, pattern, flag =&gt; flags)
2955     *</pre>
2956     * where the {@code flag} parameter defaults to no flags if omitted.
2957     *<p>
2958     * The SQL standard specifies that pattern elements sensitive to newlines
2959     * (namely {@code ^}, {@code $}, {@code \s}, {@code \S}, and {@code .}) are
2960     * to support the various representations of newline set out in
2961     * <a href='http://www.unicode.org/reports/tr18/#RL1.6'>Unicode Technical
2962     * Standard #18, RL1.6</a>. That behavior differs from the standard W3C
2963     * XQuery newline handling, as described for
2964     * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>the flags
2965     * {@code m} and {@code s}</a> and for
2966     * <a href='https://www.w3.org/TR/xmlschema11-2/#cces-mce'>the
2967     * multicharacter escapes {@code \s} and {@code \S}</a>. As an extension to
2968     * ISO SQL, passing {@code w3cNewlines => true} requests the standard W3C
2969     * XQuery behavior rather than the UTS#18 behevior for newlines. If the
2970     * underlying XQuery library only provides the W3C behavior, calls without
2971     * {@code w3cNewlines => true} will throw exceptions.
2972     * @param value The string to be tested against the pattern.
2973     * @param pattern The XQuery regular expression.
2974     * @param flag Optional string of
2975     * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting
2976     * the regular expression behavior</a>.
2977     * @param w3cNewlines Pass true to allow the regular expression to recognize
2978     * newlines according to the W3C XQuery rules rather than those of ISO SQL.
2979     * @return True if the supplied value matches the pattern. Null if any
2980     * parameter is null.
2981     * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular
2982     * expression is invalid, 2201T if the flags string is invalid;
2983     * SQLFeatureNotSupportedException (0A000) if (in the current
2984     * implementation) w3cNewlines is false or omitted.
2985     */
2986    @Function(schema="javatest")
2987    public static boolean like_regex(
2988        String value,                          //strict
2989        String pattern,                        //strict
2990        @SQLType(defaultValue="") String flag, //strict
2991        @SQLType(defaultValue="false") boolean w3cNewlines
2992    )
2993        throws SQLException
2994    {
2995        newlinesCheck(w3cNewlines, "like_regex");
2996        return compileRE(pattern, flag).containsMatch(value);
2997    }
2998
2999    /**
3000     * Syntax-sugar-free form of the ISO SQL
3001     * <a id='occurrences_regex'>{@code OCCURRENCES_REGEX}</a> function:
3002     * how many times does a pattern occur in a string?
3003     *<p>
3004     * Rewrite the standard form
3005     *<pre>
3006     * OCCURRENCES_REGEX(pattern FLAG flags IN str FROM position USING units)
3007     *</pre>
3008     * into this form:
3009     *<pre>
3010     * occurrences_regex(pattern, flag =&gt; flags, "in" =&gt; str,
3011     *                   "from" =&gt; position, usingOctets =&gt; true|false)
3012     *</pre>
3013     * where all of the named parameters are optional except pattern and "in",
3014     * and the standard {@code USING CHARACTERS} becomes
3015     * {@code usingOctets => false}, which is the default, and
3016     * {@code USING OCTETS} becomes {@code usingOctets => true}. See also
3017     * {@link #like_regex like_regex} regarding the {@code w3cNewlines}
3018     * parameter.
3019     * @param pattern XQuery regular expression to seek in the input string.
3020     * @param in The input string.
3021     * @param flag Optional string of
3022     * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting
3023     * the regular expression behavior</a>.
3024     * @param from Starting position in the input string, 1 by default.
3025     * @param usingOctets Whether position is counted in characters (actual
3026     * Unicode characters, not any smaller encoded unit, not even Java char),
3027     * which is the default, or (when true) in octets of the string's encoded
3028     * form.
3029     * @param w3cNewlines Pass true to allow the regular expression to recognize
3030     * newlines according to the W3C XQuery rules rather than those of ISO SQL.
3031     * @return The number of occurrences of the pattern in the input string,
3032     * starting from the specified position. Null if any parameter is null; -1
3033     * if the start position is less than 1 or beyond the end of the string.
3034     * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular
3035     * expression is invalid, 2201T if the flags string is invalid;
3036     * SQLFeatureNotSupportedException (0A000) if (in the current
3037     * implementation) usingOctets is true, or w3cNewlines is false or omitted.
3038     */
3039    @Function(schema="javatest")
3040    public static int occurrences_regex(
3041        String pattern,                        //strict
3042        @SQLType(name="\"in\"") String in,     //strict
3043        @SQLType(defaultValue="") String flag, //strict
3044        @SQLType(name="\"from\"", defaultValue="1") int from,
3045        @SQLType(defaultValue="false") boolean usingOctets,
3046        @SQLType(defaultValue="false") boolean w3cNewlines
3047    )
3048        throws SQLException
3049    {
3050        if ( usingAndLengthCheck(in, from, usingOctets, "occurrences_regex") )
3051            return -1; // note: not the same as in position_regex!
3052        newlinesCheck(w3cNewlines, "occurrences_regex");
3053
3054        ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from);
3055
3056        for ( int i = 1 ;; ++ i )
3057            if ( null == lomv.get(i) )
3058                return i - 1;
3059    }
3060
3061    /**
3062     * Syntax-sugar-free form of the ISO SQL
3063     * <a id='position_regex'>{@code POSITION_REGEX}</a> function:
3064     * where does a pattern, or part of it, occur in a string?
3065     *<p>
3066     * Rewrite the standard forms
3067     *<pre>
3068     * POSITION_REGEX(START pattern FLAG flags IN str FROM position
3069     *                OCCURRENCE n GROUP m)
3070     * POSITION_REGEX(AFTER pattern FLAG flags IN str FROM position
3071     *                OCCURRENCE n GROUP m)
3072     *</pre>
3073     * into these forms, respectively:
3074     *<pre>
3075     * position_regex(pattern, flag =&gt; flags, "in" =&gt; str,
3076     *                "from" =&gt; position, occurrence =&gt; n,
3077     *                "group" =&gt; m)
3078     * position_regex(pattern, flag =&gt; flags, "in" =&gt; str,
3079     *                "from" =&gt; position, occurrence =&gt; n,
3080     *                "group" =&gt; m, after =&gt; true)
3081     *</pre>
3082     * where all of the named parameters are optional except pattern and "in".
3083     * See also {@link #occurrences_regex occurrences_regex} regarding the
3084     * {@code usingOctets} parameter, and {@link #like_regex like_regex}
3085     * regarding {@code w3cNewlines}.
3086     * @param pattern XQuery regular expression to seek in the input string.
3087     * @param in The input string.
3088     * @param flag Optional string of
3089     * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting
3090     * the regular expression behavior</a>.
3091     * @param from Starting position in the input string, 1 by default.
3092     * @param usingOctets Whether position is counted in characters (actual
3093     * Unicode characters, not any smaller encoded unit, not even Java char),
3094     * which is the default, or (when true) in octets of the string's encoded
3095     * form.
3096     * @param after Whether to return the position where the match starts
3097     * (when false, the default), or just after the match ends (when true).
3098     * @param occurrence If specified as an integer n (default 1), returns the
3099     * position starting (or after) the nth match of the pattern in the string.
3100     * @param group If zero (the default), returns the position starting (or
3101     * after) the match of the whole pattern overall, otherwise if an integer m,
3102     * the position starting or after the mth parenthesized group in (the nth
3103     * occurrence of) the pattern.
3104     * @param w3cNewlines Pass true to allow the regular expression to recognize
3105     * newlines according to the W3C XQuery rules rather than those of ISO SQL.
3106     * @return The position, in the specified units, starting or just after,
3107     * the nth occurrence (or mth capturing group of the nth occurrence) of the
3108     * pattern in the input string, starting from the specified position. Null
3109     * if any parameter is null; zero if the start position is less than 1 or
3110     * beyond the end of the string, if occurrence is less than 1 or greater
3111     * than the number of matches, or if group is less than zero or greater than
3112     * the number of parenthesized capturing groups in the pattern.
3113     * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular
3114     * expression is invalid, 2201T if the flags string is invalid;
3115     * SQLFeatureNotSupportedException (0A000) if (in the current
3116     * implementation) usingOctets is true, or w3cNewlines is false or omitted.
3117     */
3118    @Function(schema="javatest")
3119    public static int position_regex(
3120        String pattern,                                         //strict
3121        @SQLType(name="\"in\"") String in,                      //strict
3122        @SQLType(defaultValue="") String flag,                  //strict
3123        @SQLType(name="\"from\"", defaultValue="1") int from,
3124        @SQLType(defaultValue="false") boolean usingOctets,
3125        @SQLType(defaultValue="false") boolean after,
3126        @SQLType(defaultValue="1") int occurrence,              //strict
3127        @SQLType(name="\"group\"", defaultValue="0") int group, //strict
3128        @SQLType(defaultValue="false") boolean w3cNewlines
3129    )
3130        throws SQLException
3131    {
3132        if ( 1 > occurrence )
3133            return 0;
3134        if ( 0 > group ) // test group > ngroups after compiling regex
3135            return 0;
3136        if ( usingAndLengthCheck(in, from, usingOctets, "position_regex") )
3137            return 0; // note: not the same as in occurrences_regex!
3138        newlinesCheck(w3cNewlines, "position_regex");
3139
3140        ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from);
3141
3142        MatchVector mv = lomv.get(occurrence);
3143        if ( null == mv  ||  mv.groups() < group )
3144            return 0;
3145
3146        return mv.position(group) + (after ? mv.length(group) : 0);
3147    }
3148
3149    /**
3150     * Syntax-sugar-free form of the ISO SQL
3151     * <a id='substring_regex'>{@code SUBSTRING_REGEX}</a> function:
3152     * return a substring specified by a pattern match in a string.
3153     *<p>
3154     * Rewrite the standard form
3155     *<pre>
3156     * SUBSTRING_REGEX(pattern FLAG flags IN str FROM position
3157     *                 OCCURRENCE n GROUP m)
3158     *</pre>
3159     * into this form:
3160     *<pre>
3161     * substring_regex(pattern, flag =&gt; flags, "in" =&gt; str,
3162     *                 "from" =&gt; position, occurrence =&gt; n,
3163     *                 "group" =&gt; m)
3164     *</pre>
3165     * where all of the named parameters are optional except pattern and "in".
3166     * See also {@link #position_regex position_regex} regarding the
3167     * {@code occurrence} and {@code "group"} parameters,
3168     * {@link #occurrences_regex occurrences_regex} regarding
3169     * {@code usingOctets}, and {@link #like_regex like_regex}
3170     * regarding {@code w3cNewlines}.
3171     * @param pattern XQuery regular expression to seek in the input string.
3172     * @param in The input string.
3173     * @param flag Optional string of
3174     * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting
3175     * the regular expression behavior</a>.
3176     * @param from Starting position in the input string, 1 by default.
3177     * @param usingOctets Whether position is counted in characters (actual
3178     * Unicode characters, not any smaller encoded unit, not even Java char),
3179     * which is the default, or (when true) in octets of the string's encoded
3180     * form.
3181     * @param occurrence If specified as an integer n (default 1), returns the
3182     * nth match of the pattern in the string.
3183     * @param group If zero (the default), returns the match of the whole
3184     * pattern overall, otherwise if an integer m, the match of the mth
3185     * parenthesized group in (the nth occurrence of) the pattern.
3186     * @param w3cNewlines Pass true to allow the regular expression to recognize
3187     * newlines according to the W3C XQuery rules rather than those of ISO SQL.
3188     * @return The substring matching the nth occurrence (or mth capturing group
3189     * of the nth occurrence) of the pattern in the input string, starting from
3190     * the specified position. Null if any parameter is null, if the start
3191     * position is less than 1 or beyond the end of the string, if occurrence is
3192     * less than 1 or greater than the number of matches, or if group is less
3193     * than zero or greater than the number of parenthesized capturing groups in
3194     * the pattern.
3195     * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular
3196     * expression is invalid, 2201T if the flags string is invalid;
3197     * SQLFeatureNotSupportedException (0A000) if (in the current
3198     * implementation) usingOctets is true, or w3cNewlines is false or omitted.
3199     */
3200    @Function(schema="javatest")
3201    public static String substring_regex(
3202        String pattern,                                          //strict
3203        @SQLType(name="\"in\"") String in,                       //strict
3204        @SQLType(defaultValue="") String flag,                   //strict
3205        @SQLType(name="\"from\"", defaultValue="1") int from,
3206        @SQLType(defaultValue="false") boolean usingOctets,
3207        @SQLType(defaultValue="1") int occurrence,               //strict
3208        @SQLType(name="\"group\"", defaultValue="0") int group,  //strict
3209        @SQLType(defaultValue="false") boolean w3cNewlines
3210    )
3211        throws SQLException
3212    {
3213        if ( 1 > occurrence )
3214            return null;
3215        if ( 0 > group ) // test group > ngroups after compiling regex
3216            return null;
3217        if ( usingAndLengthCheck(in, from, usingOctets, "substring_regex") )
3218            return null;
3219        newlinesCheck(w3cNewlines, "substring_regex");
3220
3221        ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from);
3222
3223        MatchVector mv = lomv.get(occurrence);
3224        if ( null == mv  ||  mv.groups() < group )
3225            return null;
3226
3227        int codePointPos = mv.position(group);
3228        int codePointLen = mv.length(group);
3229
3230        int utf16pos = in.offsetByCodePoints(0, codePointPos - 1);
3231        int utf16end = in.offsetByCodePoints(utf16pos, codePointLen);
3232
3233        return in.substring(utf16pos, utf16end);
3234    }
3235
3236    /**
3237     * Syntax-sugar-free form of the ISO SQL
3238     * <a id='translate_regex'>{@code TRANSLATE_REGEX}</a> function:
3239     * return a string constructed from the input string by replacing one
3240     * specified occurrence, or all occurrences, of a matching pattern.
3241     *<p>
3242     * Rewrite the standard forms
3243     *<pre>
3244     * TRANSLATE_REGEX(pattern FLAG flags IN str WITH repl FROM position
3245     *                 OCCURRENCE ALL)
3246     * TRANSLATE_REGEX(pattern FLAG flags IN str WITH repl FROM position
3247     *                 OCCURRENCE n)
3248     *</pre>
3249     * into these forms, respectively:
3250     *<pre>
3251     * translate_regex(pattern, flag =&gt; flags, "in" =&gt; str,
3252     *                 "with" =&gt; repl, "from" =&gt; position)
3253     * translate_regex(pattern, flag =&gt; flags, "in" =&gt; str,
3254     *                 "with" =&gt; repl, "from" =&gt; position,
3255     *                 occurrence =&gt; n)
3256     *</pre>
3257     * where all of the named parameters are optional except pattern and "in"
3258     * (the default for "with" is the empty string, resulting in matches being
3259     * deleted).
3260     * See also {@link #position_regex position_regex} regarding the
3261     * {@code occurrence} parameter,
3262     * {@link #occurrences_regex occurrences_regex} regarding
3263     * {@code usingOctets}, and {@link #like_regex like_regex}
3264     * regarding {@code w3cNewlines}.
3265     *<p>
3266     * For the specified occurrence (or all occurrences), the matching portion
3267     * <em>s</em> of the string is replaced as by the XQuery function
3268     * <a href='https://www.w3.org/TR/xpath-functions-31/#func-replace'
3269     * >replace</a>(<em>s, pattern, repl, flags</em>). The <em>repl</em> string
3270     * may contain {@code $0} to refer to the entire matched substring, or
3271     * {@code $}<em>m</em> to refer to the <em>m</em>th parenthesized capturing
3272     * group in the pattern.
3273     * @param pattern XQuery regular expression to seek in the input string.
3274     * @param in The input string.
3275     * @param flag Optional string of
3276     * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting
3277     * the regular expression behavior</a>.
3278     * @param with The replacement string, possibly with $m references.
3279     * @param from Starting position in the input string, 1 by default.
3280     * @param usingOctets Whether position is counted in characters (actual
3281     * Unicode characters, not any smaller encoded unit, not even Java char),
3282     * which is the default, or (when true) in octets of the string's encoded
3283     * form.
3284     * @param occurrence If specified as an integer n (default 0 for "ALL"),
3285     * replace the nth match of the pattern in the string.
3286     * @param w3cNewlines Pass true to allow the regular expression to recognize
3287     * newlines according to the W3C XQuery rules rather than those of ISO SQL.
3288     * @return The input string with one occurrence or all occurences of the
3289     * pattern replaced, as described above. Null if any parameter is null, or
3290     * if the start position is less than 1 or beyond the end of the string.
3291     * The input string unchanged if occurrence is less than zero or exceeds the
3292     * number of matches.
3293     * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular
3294     * expression is invalid, 2201T if the flags string is invalid; 2201U if
3295     * replacing where the pattern has matched a substring of zero length; 2201V
3296     * if the replacement string has improper form (a backslash must be used to
3297     * escape any dollar sign or backslash intended literally);
3298     * SQLFeatureNotSupportedException (0A000) if (in the current
3299     * implementation) usingOctets is true, or w3cNewlines is false or omitted.
3300     */
3301    @Function(schema="javatest")
3302    public static String translate_regex(
3303        String pattern,                                          //strict
3304        @SQLType(name="\"in\"") String in,                       //strict
3305        @SQLType(defaultValue="") String flag,                   //strict
3306        @SQLType(name="\"with\"", defaultValue="") String with,  //strict
3307        @SQLType(name="\"from\"", defaultValue="1") int from,
3308        @SQLType(defaultValue="false") boolean usingOctets,
3309        @SQLType(defaultValue="0" /* ALL */) int occurrence,
3310        @SQLType(defaultValue="false") boolean w3cNewlines
3311    )
3312        throws SQLException
3313    {
3314        if ( usingAndLengthCheck(in, from, usingOctets, "translate_regex") )
3315            return null;
3316        newlinesCheck(w3cNewlines, "translate_regex");
3317        if ( 0 > occurrence )
3318            return in;
3319
3320        RegularExpression re = compileRE(pattern, flag);
3321
3322        ListOfMatchVectors lomv = LOMV.of(re, in, from);
3323
3324        MatchVector mv;
3325        int codePointPos;
3326        int codePointLen;
3327        int utf16pos;
3328        int utf16end;
3329
3330        if ( 0 < occurrence )
3331        {
3332            mv = lomv.get(occurrence);
3333            if ( null == mv )
3334                return in;
3335
3336            codePointPos = mv.position(0);
3337            codePointLen = mv.length(0);
3338
3339            utf16pos = in.offsetByCodePoints(0, codePointPos - 1);
3340            utf16end = in.offsetByCodePoints(utf16pos, codePointLen);
3341
3342            return
3343                in.substring(0, utf16pos)
3344                + replace(re, in.substring(utf16pos, utf16end), with)
3345                + in.substring(utf16end);
3346        }
3347
3348        StringBuilder sb = new StringBuilder();
3349        utf16end = 0;
3350
3351        for ( int i = 1; null != (mv = lomv.get(i)); ++ i )
3352        {
3353            codePointPos = mv.position(0);
3354            codePointLen = mv.length(0);
3355
3356            utf16pos = in.offsetByCodePoints(0, codePointPos - 1);
3357
3358            sb.append(in.substring(utf16end, utf16pos));
3359
3360            utf16end = in.offsetByCodePoints(utf16pos, codePointLen);
3361
3362            sb.append(replace(re, in.substring(utf16pos, utf16end), with));
3363        }
3364
3365        return sb.append(in.substring(utf16end)).toString();
3366    }
3367}