001/* 002 * Copyright (c) 2018-2025 Tada AB and other contributors, as listed below. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the The BSD 3-Clause License 006 * which accompanies this distribution, and is available at 007 * http://opensource.org/licenses/BSD-3-Clause 008 * 009 * Contributors: 010 * Chapman Flack 011 */ 012package org.postgresql.pljava.example.saxon; 013 014import java.math.BigDecimal; 015import java.math.BigInteger; 016 017import java.sql.Connection; 018import java.sql.DriverManager; 019import java.sql.ResultSet; 020import java.sql.ResultSetMetaData; 021import static java.sql.ResultSetMetaData.columnNoNulls; 022import java.sql.SQLXML; 023import java.sql.Statement; 024import java.sql.Types; 025 026import java.sql.SQLException; 027import java.sql.SQLDataException; 028import java.sql.SQLFeatureNotSupportedException; 029import java.sql.SQLNonTransientException; 030import java.sql.SQLSyntaxErrorException; 031 032import java.time.LocalDate; 033import java.time.LocalTime; 034import java.time.OffsetTime; 035import java.time.LocalDateTime; 036import java.time.OffsetDateTime; 037import static java.time.ZoneOffset.UTC; 038 039import static java.util.Arrays.asList; 040import static java.util.Arrays.fill; 041import java.util.Collection; 042import java.util.Collections; 043import java.util.HashMap; 044import java.util.Iterator; 045import java.util.List; 046import java.util.Map; 047import java.util.Properties; 048 049import java.util.regex.Matcher; 050import java.util.regex.Pattern; 051 052import javax.xml.transform.Source; 053import javax.xml.transform.Result; 054 055import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI; 056import static javax.xml.XMLConstants.XML_NS_URI; 057import static javax.xml.XMLConstants.XML_NS_PREFIX; 058import static javax.xml.XMLConstants.XMLNS_ATTRIBUTE_NS_URI; 059import static javax.xml.XMLConstants.XMLNS_ATTRIBUTE; 060 061import net.sf.saxon.event.Receiver; 062 063import net.sf.saxon.lib.ConversionRules; 064import net.sf.saxon.lib.NamespaceConstant; 065 066import static net.sf.saxon.om.NameChecker.isValidNCName; 067 068import net.sf.saxon.query.StaticQueryContext; 069 070import net.sf.saxon.regex.RegexIterator; 071import net.sf.saxon.regex.RegularExpression; 072 073import net.sf.saxon.s9api.Destination; 074import net.sf.saxon.s9api.DocumentBuilder; 075import net.sf.saxon.s9api.ItemType; 076import net.sf.saxon.s9api.ItemTypeFactory; 077import net.sf.saxon.s9api.OccurrenceIndicator; 078import net.sf.saxon.s9api.Processor; 079import net.sf.saxon.s9api.QName; 080import net.sf.saxon.s9api.SAXDestination; 081import net.sf.saxon.s9api.SequenceType; 082import static net.sf.saxon.s9api.SequenceType.makeSequenceType; 083import net.sf.saxon.s9api.XdmAtomicValue; 084import static net.sf.saxon.s9api.XdmAtomicValue.makeAtomicValue; 085import net.sf.saxon.s9api.XdmEmptySequence; 086import net.sf.saxon.s9api.XdmItem; 087import net.sf.saxon.s9api.XdmNode; 088import static net.sf.saxon.s9api.XdmNodeKind.DOCUMENT; 089import net.sf.saxon.s9api.XdmValue; 090import net.sf.saxon.s9api.XdmSequenceIterator; 091import net.sf.saxon.s9api.XQueryCompiler; 092import net.sf.saxon.s9api.XQueryEvaluator; 093import net.sf.saxon.s9api.XQueryExecutable; 094 095import net.sf.saxon.s9api.SaxonApiException; 096 097import net.sf.saxon.trans.XPathException; 098 099import net.sf.saxon.serialize.SerializationProperties; 100 101import net.sf.saxon.type.AtomicType; 102import net.sf.saxon.type.Converter; 103 104import net.sf.saxon.value.AtomicValue; 105import net.sf.saxon.value.Base64BinaryValue; 106import net.sf.saxon.value.CalendarValue; 107import net.sf.saxon.value.HexBinaryValue; 108import net.sf.saxon.value.StringValue; 109import static net.sf.saxon.value.StringValue.getStringLength; 110 111import org.postgresql.pljava.ResultSetProvider; 112 113import org.postgresql.pljava.annotation.Function; 114import org.postgresql.pljava.annotation.SQLAction; 115import org.postgresql.pljava.annotation.SQLType; 116import static org.postgresql.pljava.annotation.Function.OnNullInput.CALLED; 117 118/* For the xmltext function, which only needs plain SAX and not Saxon */ 119 120import javax.xml.transform.sax.SAXResult; 121import org.xml.sax.ContentHandler; 122import org.xml.sax.SAXException; 123 124/** 125 * Class illustrating use of XQuery with Saxon as the 126 * implementation, using its native "s9api". 127 *<p> 128 * Supplies alternative, XML Query-based (as the SQL/XML standard dictates) 129 * implementation of some of SQL/XML, where the implementation in core 130 * PostgreSQL is limited to the capabilities of XPath (and XPath 1.0, at that). 131 *<p> 132 * Without the syntatic sugar built into the core PostgreSQL parser, calls to 133 * a function in this class can look a bit more verbose in SQL, but reflect a 134 * straightforward rewriting from the standard syntax. For example, suppose 135 * there is a table {@code catalog_as_xml} with a single row whose {@code x} 136 * column is a (respectably sized) XML document recording the stuff in 137 * {@code pg_catalog}. It could be created like this: 138 *<pre> 139 * CREATE TABLE catalog_as_xml(x) AS 140 * SELECT schema_to_xml('pg_catalog', false, true, ''); 141 *</pre> 142 *<h2>Functions/predicates from ISO 9075-14 SQL/XML</h2> 143 *<h3>XMLQUERY</h3> 144 *<p> 145 * In the syntax of the SQL/XML standard, here is a query that would return 146 * an XML element representing the declaration of a function with a specified 147 * name: 148 *<pre> 149 * SELECT XMLQUERY('/pg_catalog/pg_proc[proname eq $FUNCNAME]' 150 * PASSING BY VALUE x, 'numeric_avg' AS FUNCNAME 151 * RETURNING CONTENT EMPTY ON EMPTY) 152 * FROM catalog_as_xml; 153 *</pre> 154 *<p> 155 * It binds the 'context item' of the query to {@code x}, and the {@code NAME} 156 * parameter to the given value, then evaluates the query and returns XML 157 * "CONTENT" (a tree structure with a document node at the root, but not 158 * necessarily meeting all the requirements of an XML "DOCUMENT"). It can be 159 * rewritten as this call to the {@link #xq_ret_content xq_ret_content} method: 160 *<pre> 161 * SELECT javatest.xq_ret_content('/pg_catalog/pg_proc[proname eq $FUNCNAME]', 162 * PASSING => p, nullOnEmpty => false) 163 * FROM catalog_as_xml, 164 * LATERAL (SELECT x AS ".", 'numeric_avg' AS "FUNCNAME") AS p; 165 *</pre> 166 *<p> 167 * In the rewritten form, the form of result wanted ({@code RETURNING CONTENT}) 168 * is implicit in the called function name ({@code xq_ret_content}), and the 169 * parameters to pass to the query are moved out to a separate {@code SELECT} 170 * that supplies their values, types, and names (with the context item now given 171 * the name ".") and is passed by its alias into the query function. 172 *<p> 173 * Because of an unconditional uppercasing that PL/Java's JDBC driver currently 174 * applies to column names, any parameter names, such as {@code FUNCNAME} above, 175 * must be spelled in uppercase where used in the XQuery text, or they will not 176 * be recognized. Because the unconditional uppercasing is highly likely to be 177 * dropped in a future PL/Java release, it is wisest until then to use only 178 * parameter names that really are uppercase, both in the XQuery text where they 179 * are used and in the SQL expression that supplies them. In PostgreSQL, 180 * identifiers that are not quoted are <em>lower</em>cased, so they must be both 181 * uppercase and quoted, in the SQL syntax, to be truly uppercase. 182 *<p> 183 * In the standard, parameters and results (of XML types) can be passed 184 * {@code BY VALUE} or {@code BY REF}, where the latter means that the same 185 * nodes will retain their XQuery node identities over calls (note that this is 186 * a meaning unrelated to what "by value" and "by reference" usually mean in 187 * PostgreSQL's documentation). PostgreSQL's implementation of the XML type 188 * provides no way for {@code BY REF} semantics to be implemented, so everything 189 * happening here happens {@code BY VALUE} implicitly, and does not need to be 190 * specified. 191 *<h3>XMLEXISTS</h3> 192 *<p> 193 * The function {@link #xmlexists xmlexists} here implements the 194 * standard function of the same name. Because it is the same name, it has to 195 * be either schema-qualified or double-quoted in a call to avoid confusion 196 * with the reserved word. In the syntax of the SQL/XML standard, here is a 197 * query returning a boolean value indicating whether a function with the 198 * specified name is declared: 199 *<pre> 200 * SELECT XMLEXISTS('/pg_catalog/pg_proc[proname eq $FUNCNAME]' 201 * PASSING BY VALUE x, 'numeric_avg' AS FUNCNAME) 202 * FROM catalog_as_xml; 203 *</pre> 204 *<p> 205 * It can be rewritten as this call to the {@link #xmlexists xmlexists} method: 206 *<pre> 207 * SELECT "xmlexists"('/pg_catalog/pg_proc[proname eq $FUNCNAME]', 208 * PASSING => p) 209 * FROM catalog_as_xml, 210 * LATERAL (SELECT x AS ".", 'numeric_avg' AS "FUNCNAME") AS p; 211 *</pre> 212 *<h3>XMLTABLE</h3> 213 *<p> 214 * The function {@link #xmltable xmltable} here implements (much of) the 215 * standard function of the same name. Because it is the same name, it has to 216 * be either schema-qualified or double-quoted in a call to avoid confusion 217 * with the reserved word. A rewritten form of the <a href= 218'https://www.postgresql.org/docs/10/static/functions-xml.html#FUNCTIONS-XML-PROCESSING-XMLTABLE' 219>first example in the PostgreSQL manual</a> could be: 220 *<pre> 221 * SELECT xmltable.* 222 * FROM 223 * xmldata, 224 * 225 * LATERAL (SELECT data AS ".", 'not specified'::text AS "DPREMIER") AS p, 226 * 227 * "xmltable"('//ROWS/ROW', PASSING => p, COLUMNS => ARRAY[ 228 * 'data(@id)', null, 'COUNTRY_NAME', 229 * 'COUNTRY_ID', 'SIZE[@unit eq "sq_km"]', 230 * 'concat(SIZE[@unit ne "sq_km"], " ", SIZE[@unit ne "sq_km"]/@unit)', 231 * 'let $e := PREMIER_NAME 232 * return if ( empty($e) )then $DPREMIER else $e' 233 * ]) AS ( 234 * id int, ordinality int8, "COUNTRY_NAME" text, country_id text, 235 * size_sq_km float, size_other text, premier_name text 236 * ); 237 *</pre> 238 *<p> 239 * In the first column expression, without the {@code data()} function, the 240 * result would be a bare attribute node (one not enclosed in an XML element). 241 * Many implementations will accept a bare attribute as a column expression 242 * result, and simply assume the attribute's value is wanted, but it appears 243 * that a strict implementation of the spec must raise {@code err:XPTY0004} in 244 * such a case. This implementation is meant to be strict, so the attribute is 245 * wrapped in {@code data()} to extract and return its value. (See 246 * "About bare attribute nodes" in {@link #assignRowValues assignRowValues} 247 * for more explanation.) 248 *<p> 249 * The {@code DPREMIER} parameter passed from SQL to the XQuery expression is 250 * spelled in uppercase (and also, in the SQL expression supplying it, quoted), 251 * for the reasons explained above for the {@code xq_ret_content} function. 252 *<h3>XMLCAST</h3> 253 *<p> 254 * An ISO standard cast expression like 255 *<pre> 256 * XMLCAST(v AS wantedtype) 257 *</pre> 258 * can be rewritten with this idiom and the {@link #xmlcast xmlcast} function 259 * provided here: 260 *<pre> 261 * (SELECT r FROM (SELECT v) AS o, xmlcast(o) AS (r wantedtype)) 262 *</pre> 263 *<h2>XQuery regular-expression functions in ISO 9075-2 Foundations</h2> 264 * The methods {@link #like_regex like_regex}, 265 * {@link #occurrences_regex occurrences_regex}, 266 * {@link #position_regex position_regex}, 267 * {@link #substring_regex substring_regex}, and 268 * {@link #translate_regex translate_regex} provide, with slightly altered 269 * syntax, the ISO SQL predicate and functions of the same names. 270 *<p> 271 * For the moment, they will only match newlines in the way W3C XQuery 272 * specifies, not in the more-flexible Unicode-compatible way ISO SQL specifies, 273 * and for the ones where ISO SQL allows {@code USING CHARACTERS} or 274 * {@code USING OCTETS}, only {@code USING CHARACTERS} will work. 275 *<h2>Extensions</h2> 276 *<h3>XQuery module prolog allowed</h3> 277 *<p> 278 * Where any function here accepts an XQuery 279 *<a href='https://www.w3.org/TR/xquery-31/#id-expressions' 280 *>"expression"</a> according to the SQL specification, in fact an XQuery 281 *<a href='https://www.w3.org/TR/xquery-31/#dt-main-module' 282 *>"main module"</a> will be accepted. Therefore, the query can be preceded by 283 * a prolog declaring namespaces, options, local variables and functions, etc. 284 *<h3>Saxon extension to XQuery regular expressions</h3> 285 *<p> 286 * Saxon's implementation of XQuery regular expressions will accept a 287 * nonstandard <em>flag</em> string ending with {@code ;j} to use Java regular 288 * expressions rather than XQuery ones. That extension is available in the 289 * XQuery regular-expression methods provided here. 290 * @author Chapman Flack 291 */ 292@SQLAction( 293 implementor = "postgresql_xml", // skip it all if no xml support 294 requires = "presentOnClassPath", 295 provides = "saxon9api", 296 install = 297 "SELECT CASE WHEN" + 298 " presentOnClassPath('net.sf.saxon.s9api.QName')" + 299 "THEN" + 300 " CAST(" + 301 " set_config('pljava.implementors', 'saxon9api,' || " + 302 " current_setting('pljava.implementors'), true)" + 303 " AS void" + 304 " )" + 305 "ELSE" + 306 " logMessage('INFO', 'Saxon examples skipped: s9api classes missing')" + 307 "END" 308) 309public class S9 implements ResultSetProvider.Large 310{ 311 private S9( 312 XdmSequenceIterator<XdmItem> xsi, 313 XQueryEvaluator[] columnXQEs, 314 SequenceType[] columnStaticTypes, 315 XMLBinary enc) 316 { 317 m_sequenceIterator = xsi; 318 m_columnXQEs = columnXQEs; 319 m_columnStaticTypes = columnStaticTypes; 320 m_atomize = new AtomizingFunction [ columnStaticTypes.length ]; 321 m_xmlbinary = enc; 322 } 323 324 final XdmSequenceIterator<XdmItem> m_sequenceIterator; 325 final XQueryEvaluator[] m_columnXQEs; 326 final SequenceType[] m_columnStaticTypes; 327 final SequenceType s_01untypedAtomic = makeSequenceType( 328 ItemType.UNTYPED_ATOMIC, OccurrenceIndicator.ZERO_OR_ONE); 329 final AtomizingFunction[] m_atomize; 330 final XMLBinary m_xmlbinary; 331 Binding.Assemblage m_outBindings; 332 333 static final Connection s_dbc; 334 static final Processor s_s9p = new Processor(false); 335 static final ItemTypeFactory s_itf = new ItemTypeFactory(s_s9p); 336 337 static final Pattern s_intervalSigns; 338 static final Pattern s_intervalSignSite; 339 340 enum XMLBinary { HEX, BASE64 }; 341 enum Nulls { ABSENT, NIL }; 342 343 static 344 { 345 try 346 { 347 s_dbc = DriverManager.getConnection("jdbc:default:connection"); 348 349 /* 350 * XML Schema thinks an ISO 8601 duration must have no sign 351 * anywhere but at the very beginning before the P. PostgreSQL 352 * thinks that's the one place a sign must never be, and instead 353 * it should appear in front of every numeric field. (PostgreSQL 354 * accepts input where the signs vary, and there are cases where it 355 * cannot be normalized away: P1M-1D is a thing, and can't be 356 * simplified until anchored at a date to know how long the month 357 * is! The XML Schema type simply can't represent that, so mapping 358 * of such a value must simply fail, as we'll ensure below.) 359 * So, here's a regex with a capturing group for a leading -, and 360 * one for any field-leading -, and one for the absence of a field- 361 * leading -. Any PostgreSQL or XS duration ought to match overall, 362 * but the capturing group matches should be either (f,f,t) or 363 * (f,t,f) for a PostgreSQL duration, or either (f,f,t) or (t,f,t) 364 * for an XS duration. (f,t,t) would be a PostgreSQL interval with 365 * mixed signs, and inconvertible. 366 */ 367 s_intervalSigns = Pattern.compile( 368 "(-)?+(?:[PYMWDTH](?:(?:(-)|())\\d++)?+)++(?:(?:[.,]\\d*+)?+S)?+"); 369 /* 370 * To convert from the leading-sign form, need to find every spot 371 * where a digit follows a [PYMWDTH] to insert a - there. 372 */ 373 s_intervalSignSite = Pattern.compile("(?<=[PYMWDTH])(?=\\d)"); 374 } 375 catch ( SQLException e ) 376 { 377 throw new ExceptionInInitializerError(e); 378 } 379 } 380 381 static class PredefinedQueryHolders 382 { 383 static final XQueryCompiler s_xqc = s_s9p.newXQueryCompiler(); 384 static final QName s_qEXPR = new QName("EXPR"); 385 386 static class DocumentWrapUnwrap 387 { 388 static final XQueryExecutable INSTANCE; 389 390 static 391 { 392 try 393 { 394 INSTANCE = s_xqc.compile( 395 "declare construction preserve;" + 396 "declare variable $EXPR as item()* external;" + 397 "data(document{$EXPR}/child::node())"); 398 } 399 catch ( SaxonApiException e ) 400 { 401 throw new ExceptionInInitializerError(e); 402 } 403 } 404 } 405 } 406 407 /** 408 * PostgreSQL (as of 12) lacks the XMLTEXT function, so here it is. 409 *<p> 410 * As long as PostgreSQL does not have the {@code XML(SEQUENCE)} type, 411 * this can only be the {@code XMLTEXT(sve RETURNING CONTENT)} flavor, which 412 * does create a text node with {@code sve} as its value, but returns the 413 * text node wrapped in a document node. 414 *<p> 415 * This function doesn't actually require Saxon, but otherwise fits in with 416 * the theme here, implementing missing parts of SQL/XML for PostgreSQL. 417 * @param sve SQL string value to use in a text node 418 * @return XML content, the text node wrapped in a document node 419 */ 420 @Function(implementor="saxon9api", schema="javatest") 421 public static SQLXML xmltext(String sve) throws SQLException 422 { 423 SQLXML rx = s_dbc.createSQLXML(); 424 ContentHandler ch = rx.setResult(SAXResult.class).getHandler(); 425 426 try 427 { 428 ch.startDocument(); 429 /* 430 * It seems XMLTEXT() should be such a trivial function to write, 431 * but already it reveals a subtlety in the SAX API docs. They say 432 * the third argument to characters() is "the number of characters 433 * to read from the array" and that follows a long discussion of how 434 * individual characters can (with code points above U+FFFF) consist 435 * of more than one Java char value. 436 * 437 * And yet, when you try it out (and include some characters above 438 * U+FFFF in the input), you discover the third argument isn't the 439 * number of characters, has to be the number of Java char values. 440 */ 441 ch.characters(sve.toCharArray(), 0, sve.length()); 442 ch.endDocument(); 443 } 444 catch ( SAXException e ) 445 { 446 rx.free(); 447 throw new SQLException(e.getMessage(), e); 448 } 449 450 return rx; 451 } 452 453 /** 454 * An implementation of XMLCAST. 455 *<p> 456 * Will be declared to take and return type {@code RECORD}, where each must 457 * have exactly one component, just because that makes it easy to use 458 * existing JDBC metadata queries to find out the operand and target SQL 459 * data types. 460 *<p> 461 * Serving suggestion: rewrite this ISO standard expression 462 *<pre> 463 * XMLCAST(v AS wantedtype) 464 *</pre> 465 * to this idiomatic one: 466 *<pre> 467 * (SELECT r FROM (SELECT v) AS o, xmlcast(o) AS (r wantedtype)) 468 *</pre> 469 * @param operand a one-row, one-column record supplied by the caller, whose 470 * one typed value is the operand to be cast. 471 * @param base64 true if binary SQL values should be base64-encoded in XML; 472 * if false (the default), values will be encoded in hex. 473 * @param target a one-row, one-column record supplied by PL/Java from the 474 * {@code AS} clause after the function call, whose one column's type is the 475 * type to be cast to. 476 */ 477 @Function( 478 implementor="saxon9api", 479 schema="javatest", 480 type="pg_catalog.record", 481 onNullInput=CALLED, 482 settings="IntervalStyle TO iso_8601" 483 ) 484 public static boolean xmlcast( 485 ResultSet operand, @SQLType(defaultValue="false") Boolean base64, 486 ResultSet target) 487 throws SQLException 488 { 489 if ( null == operand ) 490 throw new SQLDataException( 491 "xmlcast \"operand\" must be (in this implementation) " + 492 "a non-null row type", "22004"); 493 494 if ( null == base64 ) 495 throw new SQLDataException( 496 "xmlcast \"base64\" must be true or false, not null", "22004"); 497 XMLBinary enc = base64 ? XMLBinary.BASE64 : XMLBinary.HEX; 498 499 assert null != target : "PL/Java supplied a null output record???"; 500 501 if ( 1 != operand.getMetaData().getColumnCount() ) 502 throw new SQLDataException( 503 "xmlcast \"operand\" must be a row type with exactly " + 504 "one component", "22000"); 505 506 if ( 1 != target.getMetaData().getColumnCount() ) 507 throw new SQLDataException( 508 "xmlcast \"target\" must be a row type with exactly " + 509 "one component", "22000"); 510 511 Binding.Parameter op = 512 new BindingsFromResultSet(operand, false).iterator().next(); 513 514 Binding.Parameter tg = 515 new BindingsFromResultSet(target, null).iterator().next(); 516 517 int sd = op.typeJDBC(); 518 int td = tg.typeJDBC(); 519 520 int castcase = 521 (Types.SQLXML == sd ? 2 : 0) | (Types.SQLXML == td ? 1 : 0); 522 523 switch ( castcase ) 524 { 525 case 0: // neither sd nor td is an XML type 526 throw new SQLSyntaxErrorException( 527 "at least one of xmlcast \"operand\" or \"target\" must " + 528 "be of XML type", "42804"); 529 case 3: // both XML 530 /* 531 * In an implementation closely following the spec, this case would 532 * be handled in parse analysis and rewritten from an XMLCAST to a 533 * plain CAST, and this code would never see it. This is a plain 534 * example function without benefit of a parser that can do that. 535 * In a DBMS with all the various SQL:2006 XML subtypes, there would 536 * be nontrivial work to do here, but casting from PostgreSQL's one 537 * XML type to itself is more of a warm-up exercise. 538 */ 539 target.updateSQLXML(1, operand.getSQLXML(1)); 540 return true; 541 case 1: // something non-XML being cast to XML 542 assertCanCastAsXmlSequence(sd, "operand"); 543 Object v = op.valueJDBC(); 544 if ( null == v ) 545 { 546 target.updateNull(1); 547 return true; 548 } 549 ItemType xsbt = 550 mapSQLDataTypeToXMLSchemaDataType(op, enc, Nulls.ABSENT); 551 Iterator<XdmItem> tv = 552 xmlCastAsSequence(v, enc, xsbt).iterator(); 553 try 554 { 555 target.updateSQLXML(1, 556 returnContent(tv, /*nullOnEmpty*/ false)); 557 } 558 catch ( SaxonApiException | XPathException e ) 559 { 560 throw new SQLException(e.getMessage(), "10000", e); 561 } 562 return true; 563 case 2: // XML being cast to something non-XML 564 assertCanCastAsXmlSequence(td, "target"); 565 SQLXML sx = operand.getSQLXML(1); 566 if ( null == sx ) 567 { 568 target.updateNull(1); 569 return true; 570 } 571 DocumentBuilder dBuilder = s_s9p.newDocumentBuilder(); 572 Source source = sx.getSource(null); 573 try 574 { 575 XdmValue xv = dBuilder.build(source); 576 XQueryEvaluator xqe = 577 PredefinedQueryHolders.DocumentWrapUnwrap.INSTANCE.load(); 578 xqe.setExternalVariable(PredefinedQueryHolders.s_qEXPR, xv); 579 xv = xqe.evaluate(); 580 /* 581 * It's zero-or-one, or XPTY0004 was thrown here. 582 */ 583 if ( 0 == xv.size() ) 584 { 585 target.updateNull(1); 586 return true; 587 } 588 XdmAtomicValue av = (XdmAtomicValue)xv; 589 xmlCastAsNonXML( 590 av, ItemType.UNTYPED_ATOMIC, tg, target, 1, enc); 591 } 592 catch ( SaxonApiException | XPathException e ) 593 { 594 throw new SQLException(e.getMessage(), "10000", e); 595 } 596 return true; 597 } 598 599 throw new SQLFeatureNotSupportedException( 600 "cannot yet xmlcast from " + op.typePG() + 601 " to " + tg.typePG(), "0A000"); 602 } 603 604 /** 605 * A simple example corresponding to {@code XMLQUERY(expression 606 * PASSING BY VALUE passing RETURNING CONTENT {NULL|EMPTY} ON EMPTY)}. 607 * @param expression An XQuery expression. Must not be {@code null} (in the 608 * SQL standard {@code XMLQUERY} syntax, it is not even allowed to be an 609 * SQL expression at all, only a string literal). 610 * @param nullOnEmpty pass {@code true} to get a null return in place of 611 * an empty sequence, or {@code false} to just get the empty sequence. 612 * @param passing A row value whose columns will be supplied to the query 613 * as parameters. Columns with names (typically supplied with {@code AS}) 614 * appear as predeclared external variables with matching names (in no 615 * namespace) in the query, with types derived from the SQL types of the 616 * row value's columns. There may be one (and no more than one) 617 * column with {@code AS "."} which, if present, will be bound as the 618 * context item. (The name {@code ?column?}, which PostgreSQL uses for an 619 * otherwise-unnamed column, is also accepted, which will often allow the 620 * context item to be specified with no {@code AS} at all. Beware, though, 621 * that PostgreSQL likes to invent column names from any function or type 622 * name that may appear in the value expression, so this shorthand will not 623 * always work, while {@code AS "."} will.) PL/Java's internal JDBC uppercases all column 624 * names, so any uses of the corresponding variables in the query must have 625 * the names in upper case. It is safest to also uppercase their appearances 626 * in the SQL (for which, in PostgreSQL, they must be quoted), so that the 627 * JDBC uppercasing is not being relied on. It is likely to be dropped in a 628 * future PL/Java release. 629 * @param namespaces An even-length String array where, of each pair of 630 * consecutive entries, the first is a namespace prefix and the second is 631 * the URI to which to bind it. The zero-length prefix sets the default 632 * element and type namespace; if the prefix has zero length, the URI may 633 * also have zero length, to declare that unprefixed elements are in no 634 * namespace. 635 */ 636 @Function( 637 implementor="saxon9api", 638 schema="javatest", 639 onNullInput=CALLED, 640 settings="IntervalStyle TO iso_8601" 641 ) 642 public static SQLXML xq_ret_content( 643 String expression, Boolean nullOnEmpty, 644 @SQLType(defaultValue={}) ResultSet passing, 645 @SQLType(defaultValue={}) String[] namespaces) 646 throws SQLException 647 { 648 /* 649 * The expression itself may not be null (in the standard, it isn't 650 * even allowed to be dynamic, and can only be a string literal!). 651 */ 652 if ( null == expression ) 653 throw new SQLDataException( 654 "XMLQUERY expression may not be null", "22004"); 655 656 if ( null == nullOnEmpty ) 657 throw new SQLDataException( 658 "XMLQUERY nullOnEmpty may not be null", "22004"); 659 660 try 661 { 662 XdmSequenceIterator<XdmItem> x1 = 663 evalXQuery(expression, passing, namespaces); 664 return null == x1 ? null : returnContent(x1, nullOnEmpty); 665 } 666 catch ( SaxonApiException | XPathException e ) 667 { 668 throw new SQLException(e.getMessage(), "10000", e); 669 } 670 } 671 672 /** 673 * An implementation of {@code XMLEXISTS(expression 674 * PASSING BY VALUE passing)}, using genuine XQuery. 675 * @param expression An XQuery expression. Must not be {@code null} (in the 676 * SQL standard {@code XMLQUERY} syntax, it is not even allowed to be an 677 * SQL expression at all, only a string literal). 678 * @param passing A row value whose columns will be supplied to the query 679 * as parameters. Columns with names (typically supplied with {@code AS}) 680 * appear as predeclared external variables with matching names (in no 681 * namespace) in the query, with types derived from the SQL types of the 682 * row value's columns. There may be one (and no more than one) 683 * column with {@code AS "."} which, if present, will be bound as the 684 * context item. (The name {@code ?column?}, which PostgreSQL uses for an 685 * otherwise-unnamed column, is also accepted, which will often allow the 686 * context item to be specified with no {@code AS} at all. Beware, though, 687 * that PostgreSQL likes to invent column names from any function or type 688 * name that may appear in the value expression, so this shorthand will not 689 * always work, while {@code AS "."} will.) PL/Java's internal JDBC uppercases all column 690 * names, so any uses of the corresponding variables in the query must have 691 * the names in upper case. It is safest to also uppercase their appearances 692 * in the SQL (for which, in PostgreSQL, they must be quoted), so that the 693 * JDBC uppercasing is not being relied on. It is likely to be dropped in a 694 * future PL/Java release. 695 * @param namespaces An even-length String array where, of each pair of 696 * consecutive entries, the first is a namespace prefix and the second is 697 * the URI to which to bind it. The zero-length prefix sets the default 698 * element and type namespace; if the prefix has zero length, the URI may 699 * also have zero length, to declare that unprefixed elements are in no 700 * namespace. 701 * @return True if the expression evaluates to a nonempty sequence, false if 702 * it evaluates to an empty one. Null if a context item is passed and its 703 * SQL value is null. 704 */ 705 @Function( 706 implementor="saxon9api", 707 schema="javatest", 708 onNullInput=CALLED, 709 settings="IntervalStyle TO iso_8601" 710 ) 711 public static Boolean xmlexists( 712 String expression, 713 @SQLType(defaultValue={}) ResultSet passing, 714 @SQLType(defaultValue={}) String[] namespaces) 715 throws SQLException 716 { 717 /* 718 * The expression itself may not be null (in the standard, it isn't 719 * even allowed to be dynamic, and can only be a string literal!). 720 */ 721 if ( null == expression ) 722 throw new SQLDataException( 723 "XMLEXISTS expression may not be null", "22004"); 724 725 XdmSequenceIterator<XdmItem> x1 = 726 evalXQuery(expression, passing, namespaces); 727 if ( null == x1 ) 728 return null; 729 if ( ! x1.hasNext() ) 730 return false; 731 x1.close(); 732 return true; 733 } 734 735 /** 736 * Implementation factor of XMLEXISTS and XMLQUERY. 737 * @return null if a context item is passed and its SQL value is null 738 */ 739 private static XdmSequenceIterator<XdmItem> evalXQuery( 740 String expression, ResultSet passing, String[] namespaces) 741 throws SQLException 742 { 743 Binding.Assemblage bindings = new BindingsFromResultSet(passing, true); 744 745 try 746 { 747 XQueryCompiler xqc = createStaticContextWithPassedTypes( 748 bindings, namespaceBindings(namespaces)); 749 750 XQueryEvaluator xqe = xqc.compile(expression).load(); 751 752 if ( storePassedValuesInDynamicContext(xqe, bindings, true) ) 753 return null; 754 755 /* 756 * For now, punt on whether the <XQuery expression> is evaluated 757 * with XML 1.1 or 1.0 lexical rules.... XXX 758 */ 759 return xqe.iterator(); 760 } 761 catch ( SaxonApiException | XPathException e ) 762 { 763 throw new SQLException(e.getMessage(), "10000", e); 764 } 765 } 766 767 /** 768 * Perform the final steps of <em>something</em> {@code RETURNING CONTENT}, 769 * with or without {@code nullOnEmpty}. 770 *<p> 771 * The effects are to be the same as if the supplied sequence were passed 772 * as {@code $EXPR} to {@code document{$EXPR}}. 773 */ 774 private static SQLXML returnContent( 775 Iterator<XdmItem> x, boolean nullOnEmpty) 776 throws SQLException, SaxonApiException, XPathException 777 { 778 if ( nullOnEmpty && ! x.hasNext() ) 779 return null; 780 781 SQLXML rsx = s_dbc.createSQLXML(); 782 /* 783 * Keep this simple by requesting a specific type of Result rather 784 * than letting PL/Java choose. It happens (though this is a detail of 785 * the implementation) that SAXResult won't be a bad choice. 786 */ 787 SAXResult sr = rsx.setResult(SAXResult.class); 788 /* 789 * Michael Kay recommends the following as equivalent to the SQL/XML- 790 * mandated behavior of evaluating document{$x}. 791 * https://sourceforge.net/p/saxon/mailman/message/36969060/ 792 */ 793 SAXDestination d = new SAXDestination(sr.getHandler()); 794 Receiver r = d.getReceiver( 795 s_s9p.getUnderlyingConfiguration().makePipelineConfiguration(), 796 new SerializationProperties()); 797 r.open(); 798 while ( x.hasNext() ) 799 r.append(x.next().getUnderlyingValue()); 800 r.close(); 801 return rsx; 802 } 803 804 /** 805 * An implementation of (much of) XMLTABLE, using genuine XML Query. 806 *<p> 807 * The {@code columns} array must supply a valid XML Query expression for 808 * every column in the column definition list that follows the call of this 809 * function in SQL, except that the column for ordinality, if wanted, is 810 * identified by a {@code null} entry in {@code columns}. Syntax sugar in 811 * the standard allows an omitted column expression to imply an element test 812 * for an element with the same name as the column; that doesn't work here. 813 *<p> 814 * For now, this implementation lacks the ability to specify defaults for 815 * when a column expression produces an empty sequence. It is possible to 816 * do defaults explicitly by rewriting a query expression <em>expr</em> as 817 * {@code let $e := }<em>expr</em>{@code return if(empty($e))then $D else $e} 818 * and supplying the default <em>D</em> as another query parameter, though 819 * such defaults will be evaluated only once when {@code xmltable} is called 820 * and will not be able to refer to other values in an output row. 821 * @param rows The single XQuery expression whose result sequence generates 822 * the rows of the resulting table. Must not be null. 823 * @param columns Array of XQuery expressions, exactly as many as result 824 * columns in the column definition list that follows the SQL call to this 825 * function. This array must not be null. It is allowed for one element (and 826 * no more than one) to be null, marking the corresponding column to be 827 * "FOR ORDINALITY" (the column must be of "exact numeric with scale zero" 828 * type; PostgreSQL supports 64-bit row counters, so {@code int8} is 829 * recommended). 830 * @param passing A row value whose columns will be supplied to the query 831 * as parameters, just as described for 832 * {@link #xq_ret_content xq_ret_content()}. If a context item is supplied, 833 * it is the context item for the {@code rows} query (the {@code columns} 834 * queries get their context item from the {@code rows} query's result). Any 835 * named parameters supplied here are available both in the {@code rows} 836 * expression and (though this goes beyond the standard) in every expression 837 * of {@code columns}, with their values unchanging from row to row. 838 * @param namespaces An even-length String array where, of each pair of 839 * consecutive entries, the first is a namespace prefix and the second is 840 * to URI to which to bind it, just as described for 841 * {@link #xq_ret_content xq_ret_content()}. 842 * @param base64 whether the effective, in-scope 'xmlbinary' setting calls 843 * for base64 or (the default, false) hexadecimal. 844 */ 845 @Function( 846 implementor="saxon9api", 847 schema="javatest", 848 onNullInput=CALLED, 849 settings="IntervalStyle TO iso_8601" 850 ) 851 public static ResultSetProvider xmltable( 852 String rows, String[] columns, 853 @SQLType(defaultValue={}) ResultSet passing, 854 @SQLType(defaultValue={}) String[] namespaces, 855 @SQLType(defaultValue="false") Boolean base64) 856 throws SQLException 857 { 858 if ( null == rows ) 859 throw new SQLDataException( 860 "XMLTABLE row expression may not be null", "22004"); 861 862 if ( null == columns ) 863 throw new SQLDataException( 864 "XMLTABLE columns expression array may not be null", "22004"); 865 866 if ( null == base64 ) 867 throw new SQLDataException( 868 "XMLTABLE base64 parameter may not be null", "22004"); 869 XMLBinary enc = base64 ? XMLBinary.BASE64 : XMLBinary.HEX; 870 871 Binding.Assemblage rowBindings = 872 new BindingsFromResultSet(passing, true); 873 874 Iterable<Map.Entry<String,String>> namespacepairs = 875 namespaceBindings(namespaces); 876 877 XQueryEvaluator[] columnXQEs = new XQueryEvaluator[ columns.length ]; 878 SequenceType[] columnStaticTypes = new SequenceType[ columns.length ]; 879 880 try 881 { 882 XQueryCompiler rowXQC = createStaticContextWithPassedTypes( 883 rowBindings, namespacepairs); 884 885 XQueryExecutable rowXQX = rowXQC.compile(rows); 886 887 Binding.Assemblage columnBindings = 888 new BindingsFromXQX(rowXQX, rowBindings); 889 890 XQueryCompiler columnXQC = createStaticContextWithPassedTypes( 891 columnBindings, namespacepairs); 892 893 boolean ordinalitySeen = false; 894 for ( int i = 0; i < columns.length; ++ i ) 895 { 896 String expr = columns[i]; 897 if ( null == expr ) 898 { 899 if ( ordinalitySeen ) 900 throw new SQLSyntaxErrorException( 901 "No more than one column expression may be null " + 902 "(=> \"for ordinality\")", "42611"); 903 ordinalitySeen = true; 904 continue; 905 } 906 XQueryExecutable columnXQX = columnXQC.compile(expr); 907 columnStaticTypes[i] = makeSequenceType( 908 columnXQX.getResultItemType(), 909 columnXQX.getResultCardinality()); 910 columnXQEs[i] = columnXQX.load(); 911 storePassedValuesInDynamicContext( 912 columnXQEs[i], columnBindings, false); 913 } 914 915 XQueryEvaluator rowXQE = rowXQX.load(); 916 XdmSequenceIterator<XdmItem> rowIterator; 917 if ( storePassedValuesInDynamicContext(rowXQE, rowBindings, true) ) 918 rowIterator = (XdmSequenceIterator<XdmItem>) 919 XdmEmptySequence.getInstance().iterator(); 920 else 921 rowIterator = rowXQE.iterator(); 922 return new S9(rowIterator, columnXQEs, columnStaticTypes, enc); 923 } 924 catch ( SaxonApiException | XPathException e ) 925 { 926 throw new SQLException(e.getMessage(), "10000", e); 927 } 928 } 929 930 /** 931 * Called when PostgreSQL has no need for more rows of the tabular result. 932 */ 933 @Override 934 public void close() 935 { 936 m_sequenceIterator.close(); 937 } 938 939 /** 940 * <a id='assignRowValues'>Produce and return one row</a> of 941 * the {@code XMLTABLE} result table per call. 942 *<p> 943 * The row expression has already been compiled and its evaluation begun, 944 * producing a sequence iterator. The column XQuery expressions have all 945 * been compiled and are ready to evaluate, and the compiler's static 946 * analysis has bounded the data types they will produce. Because of the 947 * way the set-returning function protocol works, we don't know the types 948 * of the SQL output columns yet, until the first call of this function, 949 * when the {@code receive} parameter's {@code ResultSetMetaData} can be 950 * inspected to find out. So that will be the first thing done when called 951 * with {@code currentRow} of zero. 952 *<p> 953 * Each call will then: (a) get the next value from the row expression's 954 * sequence iterator, then for each column, (b) evaluate that column's 955 * XQuery expression on the row value, and (c) assign that column's result 956 * to the SQL output column, casting to the proper type (which the SQL/XML 957 * spec has very exacting rules on how to do). 958 *<p> 959 * A note before going any further: this implementation, while fairly 960 * typical of a PostgreSQL set-returning user function, is <em>not</em> the 961 * way the SQL/XML spec defines {@code XMLTABLE}. The official behavior of 962 * {@code XMLTABLE} is defined in terms of a rewriting, at the SQL level, 963 * into a much-expanded SQL query where each result column appears as an 964 * {@code XMLQUERY} call applying the column expression, wrapped in an 965 * {@code XMLCAST} to the result column type (with a 966 * {@code CASE WHEN XMLEXISTS} thrown in to support column defaults). 967 *<p> 968 * As an ordinary user function, this example cannot rely on any fancy 969 * query rewriting during PostgreSQL's parse analysis. The slight syntax 970 * desugaring needed to transform a standard {@code XMLTABLE} call into a 971 * call of this "xmltable" is not too hard to learn and do by hand, but no 972 * one would ever want to write out by hand the whole longwinded "official" 973 * expansion prescribed in the spec. So this example is a compromise. 974 *<p> 975 * The main thing lost in the compromise is the handling of column defaults. 976 * The full rewriting with per-column SQL expressions means that each 977 * column default expression can be evaluated exactly when/if needed, which 978 * is often the desired behavior. This implementation as an ordinary 979 * function, whose arguments all get evaluated ahead of the call, can't 980 * really do that. Otherwise, there's nothing in the spec that's inherently 981 * unachievable in this implementation. 982 *<p> 983 * Which brings us to the matter of casting each column expression result 984 * to the proper type for its SQL result column. 985 *<p> 986 * Like any spec, {@code SQL/XML} does not mandate that an implementation 987 * must be done in exactly the way presented in the spec (rewritten so each 988 * column value is produced by an {@code XMLQUERY} wrapped in an 989 * {@code XMLCAST}). The requirement is to produce the equivalent result. 990 *<p> 991 * A look at the rewritten query shows that each column XQuery result value 992 * must be representable as some value in SQL's type system, not once, but 993 * twice: first as the result returned by {@code XMLQUERY} and passed along 994 * to {@code XMLCAST}, and finally with the output column's type as the 995 * result of the {@code XMLCAST}. 996 *<p> 997 * Now, the output column type can be whatever is wanted. Importantly, it 998 * can be either an XML type, or any ordinary SQL scalar type, like a 999 * {@code float} or a {@code date}. Likewise, the XQuery column expression 1000 * may have produced some atomic value (like an {@code xs:double} or 1001 * {@code xs:date}), or some XML node, or any sequence of any of those. 1002 *<p> 1003 * What are the choices for the type in the middle: the SQL value returned 1004 * by {@code XMLQUERY} and passed on to {@code XMLCAST}? 1005 *<p> 1006 * There are two. An ISO-standard SQL {@code XMLQUERY} can specify 1007 * {@code RETURNING SEQUENCE} or {@code RETURNING CONTENT}. The first option 1008 * produces the type {@code XML(SEQUENCE)}, a useful type that PostgreSQL 1009 * does not currently have. {@code XML(SEQUENCE)} can hold exactly whatever 1010 * an XQuery expression can produce: a sequence of any length, of any 1011 * mixture of atomic values and XML nodes (even such oddities as attribute 1012 * nodes outside of any element), in any order. An {@code XML(SEQUENCE)} 1013 * value need not look anything like what "XML" normally brings to mind. 1014 *<p> 1015 * With the other option, {@code RETURNING CONTENT}, the result of 1016 * {@code XMLQUERY} has to be something that PostgreSQL's {@code xml} type 1017 * could store: a serialized document with XML structure, but without the 1018 * strict requirements of exactly one root element with no text outside it. 1019 * At the limit, a completely non-XMLish string of ordinary text is 1020 * perfectly acceptable XML {@code CONTENT}, as long as it uses the right 1021 * {@code &...;} escapes for any characters that could look like XML markup. 1022 *<p> 1023 * {@code XMLCAST} is able to accept either form as input, and deliver it 1024 * to the output column as whatever type is needed. But the spec leaves no 1025 * wiggle room as to which form to use: 1026 *<ul> 1027 *<li>If the result column type is {@code XML(SEQUENCE)}, then the 1028 * {@code XMLQUERY} is to specify {@code RETURNING SEQUENCE}. It produces 1029 * the column's result type directly, so the {@code XMLCAST} has nothing 1030 * to do. 1031 *<li>In every other case (<em>every</em> other case), the {@code XMLQUERY} 1032 * is to specify {@code RETURNING CONTENT}. 1033 *</ul> 1034 *<p> 1035 * At first blush, that second rule should sound crazy. Imagine a column 1036 * definition like 1037 *<pre> 1038 * growth float8 PATH 'math:pow(1.0 + $RATE, count(year))' 1039 *</pre> 1040 * The expression produces an {@code xs:double}, which can be assigned 1041 * directly to a PostgreSQL {@code float8}, but the rule in the spec will 1042 * have it first converted to a decimal string representation, made into 1043 * a text node, wrapped in a document node, and returned as XML, to be 1044 * passed along to {@code XMLCAST}, which parses it, discards the wrapping 1045 * document node, parses the text content as a double, and returns that as 1046 * a proper value of the result column type (which, in this example, it 1047 * already is). 1048 *<p> 1049 * The spec does not go into why this rule was chosen. The only rationale 1050 * that makes sense to me is that the {@code XML(SEQUENCE)} data type 1051 * is an SQL feature (X190) that not every implementation will support, 1052 * so the spec has to define {@code XMLTABLE} using a rewritten query that 1053 * can work on systems that do not have that type. (PostgreSQL itself, at 1054 * present, does not have it.) 1055 *<p> 1056 * The first rule, when {@code XML(SEQUENCE)} is the result column type, 1057 * will naturally never be in play except on a system that has that type, in 1058 * which case it can be used directly. But even such a system must still 1059 * produce, in all other cases, results that match what a system without 1060 * that type would produce. All those cases are therefore defined as if 1061 * going the long way through {@code XML(CONTENT)}. 1062 *<p> 1063 * Whenever the XQuery expression can be known to produce a (possibly empty 1064 * or) singleton sequence of an atomic type, the long round trip can be 1065 * shown to be idempotent, and we can skip right to casting the atomic type 1066 * to the SQL result column type. A few other cases could be short-circuited 1067 * the same way. But in general, for cases involving nodes or non-singleton 1068 * sequences, it is safest to follow the spec punctiliously; the steps are 1069 * defined in terms of XQuery constructs like {@code document {...}} and 1070 * {@code data()}, which have specs of their own with many traps for the 1071 * unwary, and the XQuery library provides implementations of them that are 1072 * already tested and correct. 1073 *<p> 1074 * Though most of the work can be done by the XQuery library, it may be 1075 * helpful to look closely at just what the specification entails. 1076 *<p> 1077 * Again, but for the case of an {@code XML(SEQUENCE)} result column, in all 1078 * other cases the result must pass through 1079 * {@code XMLQUERY(... RETURNING CONTENT EMPTY ON EMPTY)}. That, in turn, is 1080 * defined as equivalent to {@code XMLQUERY(... RETURNING SEQUENCE)} with 1081 * the result then passed to {@code XMLDOCUMENT(... RETURNING CONTENT)}, 1082 * whose behavior is that of a 1083 * <a href='https://www.w3.org/TR/xquery-31/#id-documentConstructors'> 1084 * document node constructor</a> in XQuery, with 1085 * <a href='https://www.w3.org/TR/xquery-31/#dt-construction-mode'> 1086 * construction mode</a> {@code preserve}. The first step of that behavior 1087 * is the same as Step 1e in the processing of 1088 * <a href='https://www.w3.org/TR/xquery-31/#id-content'>direct element 1089 * constructor content</a>. The remaining steps are those laid out for the 1090 * document node constructor. 1091 *<p> 1092 * Clarity demands flattening this nest of specifications into a single 1093 * ordered list of the steps to apply: 1094 *<ul> 1095 *<li>Any item in the sequence that is an array is flattened (its elements 1096 * become items in the sequence). 1097 *<li>If any item is a function, {@code err:XQTY0105} is raised. 1098 *<li>Any sequence {@code $s} of adjacent atomic values is replaced by 1099 * {@code string-join($s, ' ')}. 1100 *<li>Any XML node in the sequence is copied (as detailed in the spec). 1101 *<li>After all the above, any document node that may exist in the resulting 1102 * sequence is flattened (replaced by its children). 1103 *<li>A single text node is produced for any run of adjacent text nodes in 1104 * the sequence (including any that have newly become adjacent by the 1105 * flattening of document nodes), by concatenation with no separator (unlike 1106 * the earlier step where atomic values were concatenated with a space as 1107 * the separator). 1108 *<li>If the sequence directly contains any attribute or namespace node, 1109 * {@code err:XPTY0004} is raised. <b>More on this below.</b> 1110 *<li>The sequence resulting from the preceding steps is wrapped in one 1111 * new document node (as detailed in the spec). 1112 *</ul> 1113 *<p> 1114 * At this point, the result could be returned to SQL as a value of 1115 * {@code XML(CONTENT(ANY))} type, to be passed to an {@code XMLCAST} 1116 * invocation. This implementation avoids that, and simply proceeds with the 1117 * existing Java in-memory representation of the document tree, to the 1118 * remaining steps entailed in an {@code XMLCAST} to the output column type: 1119 *<ul> 1120 *<li>If the result column type is an XML type, rewriting would turn the 1121 * {@code XMLCAST} into a simple {@code CAST} and that's that. Otherwise, 1122 * the result column has some non-XML, SQL type, and: 1123 *<li>The algorithm "Removing XQuery document nodes from an XQuery sequence" 1124 * is applied. By construction, we know the only such node is the one the 1125 * whole sequence was recently wrapped in, two steps ago (you get your 1126 * house back, you get your dog back, you get your truck back...). 1127 *<li>That sequence of zero or more XML nodes is passed to the 1128 *<a href='https://www.w3.org/TR/xpath-functions-31/#func-data'>fn:data</a> 1129 * function, producing a sequence of zero or more atomic values, which will 1130 * all have type {@code xs:untypedAtomic} (because the document-wrapping 1131 * stringified any original atomic values and wrapped them in text nodes, 1132 * for which the 1133 * <a href='https://www.w3.org/TR/xpath-datamodel-31/#acc-summ-typed-value'> 1134 * typed-value</a> is {@code xs:untypedAtomic} by definition). This sequence 1135 * also has cardinality zero-or-more, and may be shorter or longer than the 1136 * original. 1137 *<li>If the sequence is empty, the result column is assigned {@code NULL} 1138 * (or the column's default value, if one was specified). Otherwise, the 1139 * sequence is known to have length one or more, and: 1140 *<li>The spec does not say this (which may be an oversight or bug), but the 1141 * sequence must be checked for length greater than one, raising 1142 * {@code err:XPTY0004} in that case. The following steps require it to be a 1143 * singleton. 1144 *<li>It is labeled as a singleton sequence of {@code xs:anyAtomicType} and 1145 * used as input to an XQuery {@code cast as} expression. (Alternatively, it 1146 * could be labeled a one-or-more sequence of {@code xs:anyAtomicType}, 1147 * leaving the length check to be done by {@code cast as}, which would raise 1148 * the same error {@code err:XPTY0004}, if longer than one.) 1149 *<li>The {@code cast as} is to the XQuery type determined as in 1150 * {@code determineXQueryFormalType} below, based on the SQL type of the 1151 * result column; or, if the SQL type is a date/time type with no time zone, 1152 * there is a first {@code cast as} to a specific XSD date/time type, which 1153 * is (if it has a time zone) first adjusted to UTC, then stripped of its 1154 * time zone, followed by a second {@code cast as} from that type to the one 1155 * determined from the result column type. Often, that will be the same type 1156 * as was used for the time zone adjustment, and the second {@code cast as} 1157 * will have nothing to do. 1158 *<li>The XQuery value resulting from the cast is converted and assigned to 1159 * the SQL-typed result column, a step with many details but few surprises, 1160 * therefore left for the morbidly curious to explore in the code. The flip 1161 * side of the time zone removal described above happens here: if the SQL 1162 * column type expects a time zone and the incoming value lacks one, it is 1163 * given a zone of UTC. 1164 *</ul> 1165 *<p> 1166 * The later steps above, those following the length-one check, are 1167 * handled by {@code xmlCastAsNonXML} below. 1168 *<p> 1169 * The earlier steps, from the start through the {@code XMLCAST} early steps 1170 * of document-node unwrapping, can all be applied by letting the original 1171 * result sequence be {@code $EXPR} in the expression: 1172 *<pre> 1173 * declare construction preserve; 1174 * data(document { $EXPR } / child::node()) 1175 *</pre> 1176 * which may seem a bit of an anticlimax after seeing how many details lurk 1177 * behind those tidy lines of code. 1178 *<p> 1179 * <strong>About bare attribute nodes</strong> 1180 *<p> 1181 * One consequence of the rules above deserves special attention. 1182 * Consider something like: 1183 *<pre> 1184 * XMLTABLE('.' PASSING '<a foo="bar"/>' COLUMNS c1 VARCHAR PATH 'a/@foo'); 1185 *</pre> 1186 *<p> 1187 * The result of the column expression is an XML attribute node all on its 1188 * own, with name {@code foo} and value {@code bar}, not enclosed in any 1189 * XML element. In the data type {@code XML(SEQUENCE)}, an attribute node 1190 * can appear standalone like that, but not in {@code XML(CONTENT)}. 1191 *<p> 1192 * Db2, Oracle, and even the XPath-based pseudo-XMLTABLE built into 1193 * PostgreSQL, will all accept that query and produce the result "bar". 1194 *<p> 1195 * However, a strict interpretation of the spec cannot produce that result, 1196 * because the result column type ({@code VARCHAR}) is not 1197 * {@code XML(SEQUENCE)}, meaning the result must be as if passed through 1198 * {@code XMLDOCUMENT(... RETURNING CONTENT)}, and the XQuery 1199 * {@code document { ... }} constructor is required to raise 1200 * {@code err:XPTY0004} upon encountering any bare attribute node. The 1201 * apparently common, convenient behavior of returning the attribute node's 1202 * value component is not, strictly, conformant. 1203 *<p> 1204 * This implementation will raise {@code err:XPTY0004}. That can be avoided 1205 * by simply wrapping any such bare attribute in {@code data()}: 1206 *<pre> 1207 * ... COLUMNS c1 VARCHAR PATH 'a/data(@foo)'); 1208 *</pre> 1209 *<p> 1210 * It is possible the spec has an editorial mistake and did not intend to 1211 * require an error for this usage, in which case this implementation can 1212 * be changed to match a future clarification of the spec. 1213 */ 1214 @Override 1215 public boolean assignRowValues(ResultSet receive, long currentRow) 1216 throws SQLException 1217 { 1218 if ( 0 == currentRow ) 1219 { 1220 m_outBindings = new BindingsFromResultSet(receive, m_columnXQEs); 1221 int i = -1; 1222 AtomizingFunction atomizer = null; 1223 for ( Binding.Parameter p : m_outBindings ) 1224 { 1225 SequenceType staticType = m_columnStaticTypes [ ++ i ]; 1226 /* 1227 * A null in m_columnXQEs identifies the ORDINALITY column, 1228 * if any. Assign nothing to m_atomize[i], it won't be used. 1229 */ 1230 if ( null == m_columnXQEs [ i ] ) 1231 continue; 1232 1233 if ( Types.SQLXML == p.typeJDBC() ) 1234 continue; 1235 1236 /* 1237 * Ok, the output column type is non-XML; choose an atomizer, 1238 * either a simple identity if the result type is statically 1239 * known to be zero-or-one atomic, or the long way through the 1240 * general-purpose one. If the type is statically known to be 1241 * the empty sequence (weird, but not impossible), the identity 1242 * atomizer suffices and we're on to the next column. 1243 */ 1244 OccurrenceIndicator occur = staticType.getOccurrenceIndicator(); 1245 if ( OccurrenceIndicator.ZERO == occur ) 1246 { 1247 m_atomize [ i ] = (v, col) -> v; 1248 continue; 1249 } 1250 1251 /* So, it isn't known to be empty. If the column 1252 * expression type isn't known to be atomic, or isn't known to 1253 * be zero-or-one, then the general-purpose atomizer--a trip 1254 * through data(document { ... } / child::node())--must be used. 1255 * This atomizer will definitely produce a sequence of length 1256 * zero or one, raising XPTY0004 otherwise. So the staticType 1257 * can be replaced by xs:anyAtomicType?. xmlCastAsNonXML will 1258 * therefore be passed xs:anyAtomicType, as in the spec. 1259 * BUT NO ... Saxon is more likely to find a converter from 1260 * xs:untypedAtomic than from xs:anyAtomicType. 1261 */ 1262 ItemType itemType = staticType.getItemType(); 1263 if ( occur.allowsMany() 1264 || ! ItemType.ANY_ATOMIC_VALUE.subsumes(itemType) 1265 /* 1266 * The following tests may be punctilious to a fault. If we 1267 * have a bare Saxon atomic type of either xs:base64Binary 1268 * or xs:hexBinary type, Saxon will happily and successfully 1269 * convert it to a binary string; but if we have the same 1270 * thing as a less-statically-determinate type that we'll 1271 * put through the atomizer, the conversion will fail unless 1272 * its encoding matches the m_xmlbinary setting. That could 1273 * seem weirdly unpredictable to a user, so we'll just 1274 * (perversely) disallow the optimization (which would 1275 * succeed) in the cases where the specified, unoptimized 1276 * behavior would be to fail. 1277 */ 1278 || ItemType.HEX_BINARY.subsumes(itemType) 1279 && (XMLBinary.HEX != m_xmlbinary) 1280 || ItemType.BASE64_BINARY.subsumes(itemType) 1281 && (XMLBinary.BASE64 != m_xmlbinary) 1282 ) 1283 { 1284 if ( null == atomizer ) 1285 { 1286 XQueryEvaluator docWrapUnwrap = PredefinedQueryHolders 1287 .DocumentWrapUnwrap.INSTANCE.load(); 1288 atomizer = (v, col) -> 1289 { 1290 docWrapUnwrap.setExternalVariable( 1291 PredefinedQueryHolders.s_qEXPR, v); 1292 v = docWrapUnwrap.evaluate(); 1293 /* 1294 * It's already zero-or-one, or XPTY0004 was thrown 1295 */ 1296 return v; 1297 }; 1298 } 1299 m_atomize [ i ] = atomizer; 1300 /* 1301 * The spec wants anyAtomicType below instead of 1302 * untypedAtomic. But Saxon's getConverter is more likely 1303 * to fail to find a converter from anyAtomicType to an 1304 * arbitrary type, than from untypedAtomic. So use that. 1305 */ 1306 m_columnStaticTypes [ i ] = s_01untypedAtomic; 1307 } 1308 else 1309 { 1310 /* 1311 * We know we'll be getting zero-or-one atomic value, so 1312 * the atomizing function can be the identity. 1313 */ 1314 m_atomize [ i ] = (v, col) -> v; 1315 } 1316 } 1317 } 1318 1319 if ( ! m_sequenceIterator.hasNext() ) 1320 return false; 1321 1322 ++ currentRow; // for use as 1-based ordinality column 1323 1324 XdmItem it = m_sequenceIterator.next(); 1325 1326 int i = 0; 1327 for ( Binding.Parameter p : m_outBindings ) 1328 { 1329 XQueryEvaluator xqe = m_columnXQEs [ i ]; 1330 AtomizingFunction atomizer = m_atomize [ i ]; 1331 SequenceType staticType = m_columnStaticTypes [ i++ ]; 1332 1333 if ( null == xqe ) 1334 { 1335 receive.updateLong( i, currentRow); 1336 continue; 1337 } 1338 1339 try 1340 { 1341 xqe.setContextItem(it); 1342 1343 if ( null == atomizer ) /* => result type was found to be XML */ 1344 { 1345 receive.updateSQLXML( 1346 i, returnContent(xqe.iterator(), false)); 1347 continue; 1348 } 1349 1350 XdmValue x1 = xqe.evaluate(); 1351 x1 = atomizer.apply(x1, i); 1352 1353 /* 1354 * The value is now known to be atomic and either exactly 1355 * one or zero-or-one. May as well just use size() to see if 1356 * it's empty. 1357 */ 1358 if ( 0 == x1.size() ) 1359 { 1360 receive.updateNull(i); // XXX Handle defaults some day 1361 continue; 1362 } 1363 XdmAtomicValue av = (XdmAtomicValue)x1.itemAt(0); 1364 xmlCastAsNonXML( 1365 av, staticType.getItemType(), p, receive, i, m_xmlbinary); 1366 } 1367 catch ( SaxonApiException | XPathException e ) 1368 { 1369 throw new SQLException(e.getMessage(), "10000", e); 1370 } 1371 } 1372 return true; 1373 } 1374 1375 /** 1376 * Store the values of any passed parameters and/or context item into the 1377 * dynamic context, returning true if the overall query should 1378 * short-circuit and return null. 1379 *<p> 1380 * The specification requires the overall query to return null if a 1381 * context item is specified in the bindings and its value is null. 1382 * @param xqe XQuery evaluator into which to store the values. 1383 * @param passing The bindings whose values should be installed. 1384 * @param setContextItem True to handle the context item, if present in the 1385 * bindings. False to skip any processing of the context item, in cases 1386 * where the caller will handle that. 1387 * @return True if the overall query's return should be null, false if the 1388 * query should proceed to evaluation. 1389 */ 1390 private static boolean storePassedValuesInDynamicContext( 1391 XQueryEvaluator xqe, Binding.Assemblage passing, boolean setContextItem) 1392 throws SQLException, SaxonApiException 1393 { 1394 /* 1395 * Is there or is there not a context item? 1396 */ 1397 if ( ! setContextItem || null == passing.contextItem() ) 1398 { 1399 /* "... there is no context item in XDC." */ 1400 } 1401 else 1402 { 1403 Object cve = passing.contextItem().valueJDBC(); 1404 if ( null == cve ) 1405 return true; 1406 XdmValue ci; 1407 if ( cve instanceof XdmNode ) // XXX support SEQUENCE input someday 1408 { 1409 ci = (XdmNode)cve; 1410 } 1411 else 1412 ci = xmlCastAsSequence( 1413 cve, XMLBinary.HEX, passing.contextItem().typeXS()); 1414 switch ( ci.size() ) 1415 { 1416 case 0: 1417 /* "... there is no context item in XDC." */ 1418 break; 1419 case 1: 1420 xqe.setContextItem(ci.itemAt(0)); 1421 break; 1422 default: 1423 throw new SQLDataException( 1424 "invalid XQuery context item", "2200V"); 1425 } 1426 } 1427 1428 /* 1429 * For each <XML query variable> XQV: 1430 */ 1431 for ( Binding.Parameter p : passing ) 1432 { 1433 String name = p.name(); 1434 Object v = p.valueJDBC(); 1435 XdmValue vv; 1436 if ( null == v ) 1437 vv = XdmEmptySequence.getInstance(); 1438 else if ( v instanceof XdmNode ) // XXX support SEQUENCE someday 1439 { 1440 vv = (XdmNode)v; 1441 } 1442 else 1443 vv = xmlCastAsSequence( 1444 v, XMLBinary.HEX, p.typeXS().getItemType()); 1445 xqe.setExternalVariable(new QName(name), vv); 1446 } 1447 1448 return false; 1449 } 1450 1451 /** 1452 * Return a s9api {@link XQueryCompiler XQueryCompiler} with static context 1453 * preconfigured as the Syntax Rules dictate. 1454 * @param pt The single-row ResultSet representing the passed parameters 1455 * and context item, if any. 1456 * @param nameToIndex A Map, supplied empty, that on return will map 1457 * variable names for the dynamic context to column indices in {@code pt}. 1458 * If a context item was supplied, its index will be entered in the map 1459 * with the null key. 1460 */ 1461 private static XQueryCompiler createStaticContextWithPassedTypes( 1462 Binding.Assemblage pt, Iterable<Map.Entry<String,String>> namespaces) 1463 throws SQLException, XPathException 1464 { 1465 XQueryCompiler xqc = s_s9p.newXQueryCompiler(); 1466 xqc.declareNamespace( 1467 "sqlxml", "http://standards.iso.org/iso9075/2003/sqlxml"); 1468 // https://sourceforge.net/p/saxon/mailman/message/20318550/ : 1469 xqc.declareNamespace("xdt", W3C_XML_SCHEMA_NS_URI); 1470 1471 for ( Map.Entry<String,String> e : namespaces ) 1472 xqc.declareNamespace(e.getKey(), e.getValue()); 1473 1474 /* 1475 * This business of predeclaring global external named variables 1476 * is not an s9api-level advertised ability in Saxon, hence the 1477 * various getUnderlying.../getStructured... methods here to access 1478 * the things that make it happen. 1479 */ 1480 StaticQueryContext sqc = xqc.getUnderlyingStaticContext(); 1481 1482 for ( Binding.Parameter p : pt ) 1483 { 1484 String name = p.name(); 1485 int ct = p.typeJDBC(); 1486 assertCanCastAsXmlSequence(ct, name); 1487 SequenceType st = p.typeXS(); 1488 sqc.declareGlobalVariable( 1489 new QName(name).getStructuredQName(), 1490 st.getUnderlyingSequenceType(), null, true); 1491 } 1492 1493 /* 1494 * Apply syntax rules to the context item, if any. 1495 */ 1496 Binding.ContextItem ci = pt.contextItem(); 1497 if ( null != ci ) 1498 { 1499 int ct = ci.typeJDBC(); 1500 assertCanCastAsXmlSequence(ct, "(context item)"); 1501 ItemType it = ci.typeXS(); 1502 xqc.setRequiredContextItemType(it); 1503 } 1504 1505 return xqc; 1506 } 1507 1508 /** 1509 * Check that something's type is "convertible to XML(SEQUENCE) 1510 * according to the Syntax Rules of ... <XML cast specification>." 1511 * That turns out not to be a very high bar; not much is excluded 1512 * by those rules except collection, row, structured, or 1513 * reference typed <value expression>s. 1514 * @param jdbcType The {@link Types JDBC type} to be checked. 1515 * @param what A string to include in the exception message if the 1516 * check fails. 1517 * @throws SQLException if {@code jdbcType} is one of the prohibited types. 1518 */ 1519 private static void assertCanCastAsXmlSequence(int jdbcType, String what) 1520 throws SQLException 1521 { 1522 if ( Types.ARRAY == jdbcType || Types.STRUCT == jdbcType 1523 || Types.REF == jdbcType ) 1524 throw new SQLSyntaxErrorException( 1525 "The type of \"" + what + "\" is not suitable for " + 1526 "XMLCAST to XML(SEQUENCE).", "42804"); 1527 } 1528 1529 /** 1530 * The "determination of an XQuery formal type notation" algorithm. 1531 *<p> 1532 * This is relied on for parameters and context items passed to 1533 * {@code XMLQUERY} and therefore, {@code XMLTABLE} (and also, in the spec, 1534 * {@code XMLDOCUMENT} and {@code XMLPI}). Note that it does <em>not</em> 1535 * take an {@code XMLBinary} parameter, but rather imposes hexadecimal form 1536 * unconditionally, so in the contexts where this is called, any 1537 * {@code xmlbinary} setting is ignored. 1538 * @param b a {@code Binding} from which the JDBC type can be retrieved 1539 * @param forContextItem whether the type being derived is for a context 1540 * item or (if false) for a named parameter. 1541 * @return a {@code SequenceType} (always a singleton in the 1542 * {@code forContextItem} case) 1543 */ 1544 private static SequenceType determineXQueryFormalType( 1545 Binding b, boolean forContextItem) 1546 throws SQLException 1547 { 1548 int sd = b.typeJDBC(); 1549 OccurrenceIndicator suffix; 1550 /* 1551 * The SQL/XML standard uses a formal type notation straight out of 1552 * the XQuery 1.0 and XPath 2.0 Formal Semantics document, and that is 1553 * strictly more fine-grained and expressive than anything you can 1554 * actually say in the form of XQuery SequenceTypes. This method will 1555 * simply return the nearest approximation in the form of a sequence 1556 * type; some of the standard's distinct formal type notations will 1557 * collapse into the same SequenceType. 1558 * That also means the various cases laid out in the standard will, 1559 * here, all simply assign some ItemType to 'it', and therefore the 1560 * tacking on of the occurrence suffix can be factored out for the 1561 * very end. 1562 */ 1563 ItemType it; 1564 1565 if ( forContextItem ) 1566 suffix = OccurrenceIndicator.ONE; 1567 // else if sd is XML(SEQUENCE) - we don't have this type yet 1568 // suffix = OccurrenceIndicator.ZERO_OR_MORE; 1569 /* 1570 * Go through the motions of checking isNullable, though PL/Java's JDBC 1571 * currently hardcodes columnNullableUnknown. Maybe someday it won't. 1572 */ 1573 else if ( b.knownNonNull() ) 1574 suffix = OccurrenceIndicator.ONE; 1575 else 1576 suffix = OccurrenceIndicator.ZERO_OR_ONE; 1577 1578 // Define ET... for {DOCUMENT|CONTENT}(XMLSCHEMA) case ... not supported 1579 1580 // if SD is XML(DOCUMENT(UNTYPED)) - not currently tracked, can't tell 1581 // it = s_itf.getDocumentTest(item type for xdt:untyped); 1582 // else if SD is XML(DOCUMENT(ANY)) - not currently tracked, can't tell 1583 // it = s_itf.getDocumentTest(item type for xs:anyType); 1584 // else if SD is XML(DOCUMENT(XMLSCHEMA)) - unsupported and can't tell 1585 // it = s_itf.getDocumentTest(the ET... we didn't define earlier) 1586 // else if SD is XML(CONTENT(UNTYPED)) - which we're not tracking ... 1587 // at s9api granularity, there's no test for this that's not same as: 1588 // else if SD is XML(CONTENT(ANY)) - which we must assume for ANY XML 1589 if ( Types.SQLXML == sd ) 1590 it = s_itf.getNodeKindTest(DOCUMENT); 1591 // else if SD is XML(CONTENT(XMLSCHEMA)) - we don't track and can't tell 1592 // at s9api granularity, there's no test that means this anyway. 1593 // else if SD is XML(SEQUENCE) - we really should have this type, but no 1594 // it = it.ANY_ITEM 1595 else // it ain't XML, it's some SQL type 1596 { 1597 ItemType xmlt = mapSQLDataTypeToXMLSchemaDataType( 1598 b, XMLBinary.HEX, Nulls.ABSENT); 1599 // ItemType pt = xmlt.getUnderlyingItemType().getPrimitiveType() 1600 // .somehowGetFromUnderlyingPTBackToS9apiPT() - ugh, the hard part 1601 /* 1602 * The intention here is to replace any derived type with the 1603 * primitive type it is based on, *except* for three types that are 1604 * technically derived: integer (from decimal), yearMonthDuration 1605 * and dayTimeDuration (from duration). Those are not replaced, so 1606 * they stand, as if they were honorary primitive types. 1607 * 1608 * For now, it's simplified greatly by mapSQLDataType... skipping 1609 * the construction of a whole derived XML Schema snippet, and just 1610 * returning the type we want anyway. Also, no need to dive under 1611 * the s9api layer to try to make getPrimitiveType work. 1612 */ 1613 it = xmlt; 1614 } 1615 1616 SequenceType xftn = makeSequenceType(it, suffix); 1617 return xftn; 1618 } 1619 1620 @SuppressWarnings("fallthrough") 1621 private static ItemType mapSQLDataTypeToXMLSchemaDataType( 1622 Binding b, XMLBinary xmlbinary, Nulls nulls) 1623 throws SQLException 1624 { 1625 /* 1626 * Nearly all of the fussing about specified in the standard 1627 * for this method is to create XML Schema derived types that 1628 * accurately reflect the typmod information for the SQL type 1629 * in question. Then, in determineXQueryFormalType (the only 1630 * client of this method so far!), all of that is thrown away 1631 * and our painstakingly specified derived type is replaced with 1632 * the primitive type we based it on. That simplifies a lot. :) 1633 * For now, forget the derived XML Schema declarations, and just 1634 * return the primitive types they would be based on. 1635 * 1636 * The need for the nulls parameter vanishes if no XML Schema snippets 1637 * are to be generated. 1638 * 1639 * If the full XML Schema snippet generation ever proves to be 1640 * needed, one hacky way to get it would be with a SELECT 1641 * query_to_xmlschema('SELECT null::type-in-question', false, false, 1642 * '') where the same derivations are already implemented (though it 1643 * produces some different results; that work may have been done from 1644 * an earlier version of the standard). 1645 */ 1646 switch ( b.typeJDBC() ) 1647 { 1648 case Types.CHAR: 1649 case Types.VARCHAR: 1650 case Types.CLOB: 1651 return ItemType.STRING; 1652 1653 case Types.BINARY: 1654 case Types.VARBINARY: 1655 case Types.BLOB: 1656 return XMLBinary.HEX == xmlbinary ? 1657 ItemType.HEX_BINARY : ItemType.BASE64_BINARY; 1658 1659 case Types.NUMERIC: 1660 case Types.DECIMAL: 1661 /* 1662 * Go through the motions to get the scale and do this right, 1663 * though PL/Java's getScale currently hardcodes a -1 return. 1664 * Maybe someday it won't. 1665 */ 1666 int scale = b.scale(); 1667 return 0 == scale ? ItemType.INTEGER : ItemType.DECIMAL; 1668 1669 case Types.INTEGER: 1670 return ItemType.INT; 1671 case Types.SMALLINT: 1672 return ItemType.SHORT; 1673 case Types.BIGINT: 1674 return ItemType.LONG; 1675 1676 case Types.REAL: 1677 return ItemType.FLOAT; // could check P, MINEXP, MAXEXP here. 1678 case Types.FLOAT: 1679 assert false; // PG should always report either REAL or DOUBLE 1680 /*FALLTHROUGH*/ 1681 case Types.DOUBLE: 1682 return ItemType.DOUBLE; 1683 1684 case Types.BOOLEAN: 1685 return ItemType.BOOLEAN; 1686 1687 case Types.DATE: 1688 return ItemType.DATE; 1689 1690 case Types.TIME: 1691 return ItemType.TIME; 1692 1693 case Types.TIME_WITH_TIMEZONE: 1694 return ItemType.TIME; // restrictive facet would make sense here 1695 1696 case Types.TIMESTAMP: 1697 return ItemType.DATE_TIME; 1698 1699 case Types.TIMESTAMP_WITH_TIMEZONE: 1700 return ItemType.DATE_TIME_STAMP; // xsd 1.1 equivalent of facet! 1701 1702 // There's no JDBC Types.INTERVAL; handle it after switch 1703 1704 // Good luck finding out from JDBC if it's a domain 1705 1706 // PG doesn't have DISTINCT types per se 1707 1708 // PL/Java's JDBC doesn't support PostgreSQL's arrays as ARRAY 1709 1710 // PG doesn't seem to have multisets (JDBC doesn't grok them either) 1711 1712 // Types.SQLXML we could recognize, but for determineFormalTypes it has 1713 // been handled already, and it's not yet clear what would be 1714 // appropriate to return (short of the specified XMLSchema snippet), 1715 // probably just document. 1716 1717 // So punt all these for now; what hasn't been handled in this switch 1718 // can be handled specially after the switch falls through, and what 1719 // isn't, isn't supported just now. 1720 } 1721 1722 String typeName = b.typePG(); 1723 if ( "interval".equals(typeName) ) 1724 { 1725 /* 1726 * XXX This isn't right yet; it needs to be refined to a 1727 * YEAR_MONTH_DURATION or a DAY_TIME_DURATION in the appropriate 1728 * cases, and for that it needs access to the typmod information 1729 * for the type, which getColumnTypeName doesn't now provide. 1730 */ 1731 return ItemType.DURATION; 1732 } 1733 1734 throw new SQLNonTransientException(String.format( 1735 "Mapping SQL type \"%s\" to XML type not supported", typeName), 1736 "0N000"); 1737 } 1738 1739 /** 1740 * Implement that portion of the {@code <XML cast>} specification where 1741 * the target data type is sequence, and (for now, anyway) the source is 1742 * not an XML type; the only caller, so far, handles that case separately. 1743 * @param v The SQL value to be cast (in the form of an Object from JDBC). 1744 * @param enc Whether binary values should be encoded in hex or base 64. 1745 * @param xst The formal static XS type derived from the SQL type of v. 1746 * @return An {@code XdmValue}, {@code null} if {@code v} is null. 1747 */ 1748 private static XdmValue xmlCastAsSequence( 1749 Object v, XMLBinary enc, ItemType xst) 1750 throws SQLException 1751 { 1752 if ( null == v ) 1753 return null; 1754 /* 1755 * What happens next in the standard is one of the most breathtaking 1756 * feats of obscurantism in the whole document. It begins, plausibly 1757 * enough, by using mapValuesOfSQLTypesToValuesOfXSTypes to produce 1758 * the lexical form of the XS type (but with XML metacharacters escaped, 1759 * if it's a string type). Then: 1760 * 1. That lexical form is to be fed to an XML parser, producing an 1761 * XQuery document node that NEVER can be a well-formed document (it 1762 * is expected to satisfy document { text ? } where the text node is 1763 * just the lexical value form we started with, now with the escaped 1764 * metacharacters unescaped again as a consequence of parsing). For 1765 * some source types, mapValuesOfSQLTypesToValuesOfXSTypes can 1766 * produce a string that parses to XML with element content: row 1767 * types, arrays, multisets, XML. Clearly, those cases can't satisfy 1768 * the formal type assumed here, and they are cases this routine 1769 * won't be expected to handle: XML handled separately by the caller, 1770 * arrays/structs/etc. being ruled out by assertCanCastAsXmlSequence. 1771 * 2. That document node is made the $TEMP parameter of an XML Query, 1772 * '$TEMP cast as XSBTN' (where XSBTN is a QName for the result type 1773 * chosen according to the rules) and the sequence resulting from 1774 * that query is the result of the cast. 1775 * 1776 * Step (1) can only succeed if the XML parser doesn't insist on well- 1777 * formed documents, as the stock JRE parser emphatically does. And the 1778 * ultimate effect of that whole dance is that the cast in (2) casts a 1779 * document node to the target type, which means the document node gets 1780 * atomized, which, for a document node, means everything is thrown away 1781 * save the concatenated values of its descendant text nodes (or node, 1782 * in this case; haven't we seen that value somewhere before?), assigned 1783 * the type xs:untypedAtomic, and then that's operated on by the cast. 1784 * 1785 * Because this implementation's in PL/Java, the value v received here 1786 * has already been mapped from an SQL type to a Java type according to 1787 * JDBC's rules as PL/Java implements them, so there's one degree of 1788 * removal from the specified algorithm anyway. And the s9api 1789 * XdmAtomicValue already has constructors from several of the expected 1790 * Java types, as well as one taking a lexical form and explicit type. 1791 * Beause this is /example/ code, rather than slavishly implementing the 1792 * specified algorithm, it will assume that that is either roughly or 1793 * exactly equivalent to what these s9api constructors in fact do, and 1794 * just use them; conformance-testing code could then check for exact 1795 * equivalence if there's enough interest to write it. 1796 * 1797 * So, we will NOT start with this: 1798 * 1799 * String xmlv = mapValuesOfSQLTypesToValuesOfXSTypes( 1800 * v, enc, Nulls.ABSENT, true); 1801 * 1802 * Instead, we'll derive this type first ... 1803 */ 1804 ItemType xsbt; 1805 // year-month interval type => xsbt = YEAR_MONTH_DURATION 1806 // day-time interval type => xsbt = DAY_TIME_DURATION 1807 xsbt = xst; // we have a winner! 1808 // xs non-built-in atomic type => xsbt = getPrimitiveType(ugh). 1809 1810 /* 1811 * ... and then use this method instead: 1812 */ 1813 try 1814 { 1815 return mapJDBCofSQLvalueToXdmAtomicValue(v, enc, xsbt); 1816 } 1817 catch ( SaxonApiException | XPathException e ) 1818 { 1819 throw new SQLException(e.getMessage(), "10000", e); 1820 } 1821 } 1822 1823 @FunctionalInterface 1824 interface CastingFunction 1825 { 1826 AtomicValue apply(AtomicValue v) throws XPathException; 1827 } 1828 1829 @FunctionalInterface 1830 interface CasterSupplier 1831 { 1832 CastingFunction get() throws SQLException, XPathException; 1833 } 1834 1835 @FunctionalInterface 1836 interface AtomizingFunction 1837 { 1838 /** 1839 * @param v sequence to be atomized 1840 * @param columnIndex only to include in exception if result has more 1841 * than one item 1842 */ 1843 XdmValue apply(XdmValue v, int columnIndex) 1844 throws SaxonApiException, XPathException; 1845 } 1846 1847 private static XPathException noPrimitiveCast(ItemType vt, ItemType xt) 1848 { 1849 return new XPathException( 1850 "Casting from " + vt.getTypeName() + " to " + xt.getTypeName() + 1851 " can never succeed", "XPTY0004"); 1852 } 1853 1854 /** 1855 * Handle the case of XMLCAST to a non-XML target type when the cast operand 1856 * is already a single atomic value. 1857 *<p> 1858 * The caller, if operating on a sequence, must itself handle the case of 1859 * an empty sequence (returning null, per General Rule 4c in :2011), or a 1860 * sequence of length greater than one (raising XPTY0004, which is not 1861 * specified in :2011, but the exclusion of such a sequence is implicit in 1862 * rules 4g and 4h; Db2 silently drops all but the first item, unlike 1863 * Oracle, which raises XPTY0004). 1864 * @param av The atomic operand value 1865 * @param p The parameter binding, recording the needed type information 1866 * @param rs ResultSet into which the value will be stored 1867 * @param col Index of the result column 1868 */ 1869 private static void xmlCastAsNonXML( 1870 XdmAtomicValue av, ItemType vt, 1871 Binding.Parameter p, ResultSet rs, int col, XMLBinary enc) 1872 throws SQLException, XPathException 1873 { 1874 XdmAtomicValue bv; 1875 ItemType xt = p.typeXT(enc); 1876 1877 CastingFunction caster = p.atomicCaster(vt, () -> 1878 { 1879 ConversionRules rules = vt.getConversionRules(); 1880 Converter c1; 1881 ItemType t1; 1882 Converter c2; 1883 1884 switch ( p.typeJDBC() ) 1885 { 1886 case Types.TIMESTAMP: 1887 t1 = ItemType.DATE_TIME; 1888 break; 1889 case Types.TIME: 1890 t1 = ItemType.TIME; 1891 break; 1892 case Types.DATE: 1893 t1 = ItemType.DATE; 1894 break; 1895 default: 1896 c1 = rules.getConverter( 1897 (AtomicType)vt.getUnderlyingItemType(), 1898 (AtomicType)xt.getUnderlyingItemType()); 1899 if ( null == c1 ) 1900 throw noPrimitiveCast(vt, xt); 1901 return (AtomicValue v) -> c1.convert(v).asAtomic(); 1902 } 1903 /* 1904 * Nothing left here but the rest of the three date/timey cases 1905 * partly handled above. 1906 */ 1907 c1 = rules.getConverter( 1908 (AtomicType)vt.getUnderlyingItemType(), 1909 (AtomicType)t1.getUnderlyingItemType()); 1910 c2 = rules.getConverter( 1911 (AtomicType)t1.getUnderlyingItemType(), 1912 (AtomicType)xt.getUnderlyingItemType()); 1913 if ( null == c1 || null == c2 ) 1914 throw noPrimitiveCast(vt, xt); 1915 return (AtomicValue v) -> 1916 { 1917 v = c1.convert(v).asAtomic(); 1918 v = ((CalendarValue)v).adjustTimezone(0).removeTimezone(); 1919 return c2.convert(v).asAtomic(); 1920 }; 1921 }); 1922 1923 bv = makeAtomicValue(caster.apply(av.getUnderlyingValue())); 1924 1925 if ( ItemType.STRING.subsumes(xt) ) 1926 rs.updateString(col, bv.getStringValue()); 1927 1928 else if ( ItemType.HEX_BINARY.subsumes(xt) ) 1929 rs.updateBytes(col, 1930 ((HexBinaryValue)bv.getUnderlyingValue()).getBinaryValue()); 1931 else if ( ItemType.BASE64_BINARY.subsumes(xt) ) 1932 rs.updateBytes(col, 1933 ((Base64BinaryValue)bv.getUnderlyingValue()).getBinaryValue()); 1934 1935 else if ( ItemType.DECIMAL.subsumes(xt) ) 1936 rs.updateObject(col, bv.getValue()); 1937 1938 /* 1939 * The standard calls for throwing "data exception - numeric value out 1940 * of range" rather than forwarding a float or double inf, -inf, or nan 1941 * to SQL, but PostgreSQL supports those values, and these conversions 1942 * preserve them. 1943 * Because of the collapsing in typeXT(), xt will never be FLOAT, 1944 * only DOUBLE. JDBC is supposed to handle assigning a double to a float 1945 * column, anyway. 1946 */ 1947 else if ( ItemType.DOUBLE.subsumes(xt) ) 1948 rs.updateObject(col, bv.getValue()); 1949 1950 else if ( ItemType.DATE.subsumes(xt) ) 1951 rs.updateObject(col, bv.getLocalDate()); 1952 else if ( ItemType.DATE_TIME.subsumes(xt) ) 1953 { 1954 if ( ((CalendarValue)bv.getUnderlyingValue()).hasTimezone() ) 1955 rs.updateObject(col, bv.getOffsetDateTime()); 1956 else 1957 { 1958 LocalDateTime jv = bv.getLocalDateTime(); 1959 rs.updateObject(col, 1960 Types.TIMESTAMP_WITH_TIMEZONE == p.typeJDBC() ? 1961 jv.atOffset(UTC) : jv); 1962 } 1963 } 1964 else if ( ItemType.TIME.subsumes(xt) ) // no handy tz/notz distinction 1965 { 1966 if ( ((CalendarValue)bv.getUnderlyingValue()).hasTimezone() ) 1967 rs.updateObject(col, OffsetTime.parse(bv.getStringValue())); 1968 else 1969 { 1970 LocalTime jv = LocalTime.parse(bv.getStringValue()); 1971 rs.updateObject(col, 1972 Types.TIME_WITH_TIMEZONE == p.typeJDBC() ? 1973 jv.atOffset(UTC) : jv); 1974 } 1975 } 1976 1977 else if ( ItemType.YEAR_MONTH_DURATION.subsumes(xt) ) 1978 rs.updateString(col, toggleIntervalRepr(bv.getStringValue())); 1979 else if ( ItemType.DAY_TIME_DURATION.subsumes(xt) ) 1980 rs.updateString(col, toggleIntervalRepr(bv.getStringValue())); 1981 else if ( ItemType.DURATION.subsumes(xt) ) // need this case for now 1982 rs.updateString(col, toggleIntervalRepr(bv.getStringValue())); 1983 1984 else if ( ItemType.BOOLEAN.subsumes(xt) ) 1985 rs.updateObject(col, bv.getValue()); 1986 else 1987 throw new SQLNonTransientException(String.format( 1988 "Mapping XML type \"%s\" to SQL value not supported", xt), 1989 "0N000"); 1990 } 1991 1992 /** 1993 * Like the "Mapping values of SQL data types to values of XML Schema 1994 * data types" algorithm, except after the SQL values have already been 1995 * converted to Java values according to JDBC rules. 1996 *<p> 1997 * Also, this uses Saxon s9api constructors for the XML Schema values, which 1998 * accept the Java types directly. As a consequence, where the target type 1999 * {@code xst} is {@code xs:hexBinary} or {@code xs:base64Binary}, that type 2000 * will be produced, regardless of the passed {@code encoding}. This might 2001 * not be strictly correct, but is probably safest until an oddity in the 2002 * spec can be clarified: {@code determineXQueryFormalType} will always 2003 * declare {@code xs:hexBinary} as the type for an SQL byte string, and it 2004 * would violate type safety to construct a value here that honors the 2005 * {@code encoding} parameter but isn't of the declared formal type. 2006 */ 2007 private static XdmAtomicValue mapJDBCofSQLvalueToXdmAtomicValue( 2008 Object dv, XMLBinary encoding, ItemType xst) 2009 throws SQLException, SaxonApiException, XPathException 2010 { 2011 if ( ItemType.STRING.equals(xst) ) 2012 return new XdmAtomicValue((String)dv); 2013 2014 if ( ItemType.HEX_BINARY.equals(xst) ) 2015 return makeAtomicValue(new HexBinaryValue((byte[])dv)); 2016 if ( ItemType.BASE64_BINARY.equals(xst) ) 2017 return makeAtomicValue(new Base64BinaryValue((byte[])dv)); 2018 2019 if ( ItemType.INTEGER.equals(xst) ) 2020 return new XdmAtomicValue(((BigInteger)dv).toString(), xst); 2021 if ( ItemType.DECIMAL.equals(xst) ) 2022 return new XdmAtomicValue((BigDecimal)dv); 2023 if ( ItemType.INT.equals(xst) ) 2024 return new XdmAtomicValue((Integer)dv); 2025 if ( ItemType.SHORT.equals(xst) ) 2026 return new XdmAtomicValue((Short)dv); 2027 if ( ItemType.LONG.equals(xst) ) 2028 return new XdmAtomicValue((Long)dv); 2029 if ( ItemType.FLOAT.equals(xst) ) 2030 return new XdmAtomicValue((Float)dv); 2031 if ( ItemType.DOUBLE.equals(xst) ) 2032 return new XdmAtomicValue((Double)dv); 2033 2034 if ( ItemType.BOOLEAN.equals(xst) ) 2035 return new XdmAtomicValue((Boolean)dv); 2036 2037 if ( ItemType.DATE.equals(xst) ) 2038 { 2039 if ( dv instanceof LocalDate ) 2040 return new XdmAtomicValue((LocalDate)dv); 2041 return new XdmAtomicValue(dv.toString(), xst); 2042 } 2043 2044 if ( ItemType.TIME.equals(xst) ) 2045 return new XdmAtomicValue(dv.toString(), xst); 2046 2047 if ( ItemType.DATE_TIME.equals(xst) ) 2048 { 2049 if ( dv instanceof LocalDateTime ) 2050 return new XdmAtomicValue((LocalDateTime)dv); 2051 return new XdmAtomicValue(dv.toString(), xst); 2052 } 2053 2054 if ( ItemType.DATE_TIME_STAMP.equals(xst) ) 2055 { 2056 if ( dv instanceof OffsetDateTime ) 2057 return new XdmAtomicValue((OffsetDateTime)dv); 2058 return new XdmAtomicValue(dv.toString(), xst); 2059 } 2060 2061 if ( ItemType.DURATION.equals(xst) ) 2062 return new XdmAtomicValue(toggleIntervalRepr((String)dv), xst); 2063 2064 throw new SQLNonTransientException(String.format( 2065 "Mapping SQL value to XML type \"%s\" not supported", xst), 2066 "0N000"); 2067 } 2068 2069 /* 2070 * Toggle the lexical representation of an interval/duration between the 2071 * form PostgreSQL likes and the form XML Schema likes. Only negative values 2072 * are affected. Positive values are returned unchanged, as are those that 2073 * don't fit any expected form; those will probably be reported as malformed 2074 * by whatever tries to consume them. 2075 */ 2076 static String toggleIntervalRepr(String lex) 2077 { 2078 Matcher m = s_intervalSigns.matcher(lex); 2079 if ( ! m.matches() ) 2080 return lex; // it's weird, just don't touch it 2081 if ( -1 == m.start(1) ) 2082 { 2083 if ( -1 != m.start(2) && -1 == m.start(3) ) // it's PG negative 2084 return '-' + lex.replace("-", ""); // make it XS negative 2085 } 2086 else if ( -1 == m.start(2) && -1 != m.start(3) )// it's XS negative 2087 return m.usePattern(s_intervalSignSite) // make it PG negative 2088 .reset(lex.substring(1)).replaceAll("-"); 2089 return lex; // it's either positive, or weird, just don't touch it 2090 } 2091 2092 static Iterable<Map.Entry<String,String>> namespaceBindings(String[] nbs) 2093 throws SQLException 2094 { 2095 if ( 1 == nbs.length % 2 ) 2096 throw new SQLSyntaxErrorException( 2097 "Namespace binding array must have even length", "42000"); 2098 Map<String,String> m = new HashMap<>(); 2099 2100 for ( int i = 0; i < nbs.length; i += 2 ) 2101 { 2102 String prefix = nbs[i]; 2103 String uri = nbs[1 + i]; 2104 2105 if ( null == prefix || null == uri ) 2106 throw new SQLDataException( 2107 "Namespace binding array elements must not be null", 2108 "22004"); 2109 2110 if ( ! "".equals(prefix) ) 2111 { 2112 if ( ! isValidNCName(prefix) ) 2113 throw new SQLSyntaxErrorException( 2114 "Not an XML NCname: \"" + prefix + '"', "42602"); 2115 if ( XML_NS_PREFIX.equals(prefix) 2116 || XMLNS_ATTRIBUTE.equals(prefix) ) 2117 throw new SQLSyntaxErrorException( 2118 "Namespace prefix may not be xml or xmlns", "42939"); 2119 if ( XML_NS_URI.equals(uri) 2120 || XMLNS_ATTRIBUTE_NS_URI.equals(uri) ) 2121 throw new SQLSyntaxErrorException( 2122 "Namespace URI has a disallowed value", "42P17"); 2123 if ( "".equals(uri) ) 2124 throw new SQLSyntaxErrorException( 2125 "URI for non-default namespace may not be zero-length", 2126 "42P17"); 2127 } 2128 2129 String was = m.put(prefix.intern(), uri.intern()); 2130 2131 if ( null != was ) 2132 throw new SQLSyntaxErrorException( 2133 "Namespace prefix \"" + prefix + "\" multiply bound (" + 2134 "to \"" + was + "\" and \"" + uri + "\")", "42712"); 2135 } 2136 2137 return Collections.unmodifiableSet(m.entrySet()); 2138 } 2139 2140 static class Binding 2141 { 2142 String typePG() throws SQLException 2143 { 2144 if ( null != m_typePG ) 2145 return m_typePG; 2146 return m_typePG = implTypePG(); 2147 } 2148 2149 int typeJDBC() throws SQLException 2150 { 2151 if ( null != m_typeJDBC ) 2152 return m_typeJDBC; 2153 int tj = implTypeJDBC(); 2154 /* 2155 * The JDBC types TIME_WITH_TIMEZONE and TIMESTAMP_WITH_TIMEZONE 2156 * first appear in JDBC 4.2 / Java 8. PL/Java's JDBC driver does 2157 * not yet return those values. As a workaround until it does, 2158 * recheck here using the PG type name string, if TIME or TIMESTAMP 2159 * is the JDBC type that the driver returned. 2160 * 2161 * Also for backward compatibility, the driver still returns 2162 * Types.OTHER for XML, rather than Types.SQLXML. Check and fix that 2163 * here too. 2164 */ 2165 switch ( tj ) 2166 { 2167 case Types.OTHER: 2168 if ( "xml".equals(typePG()) ) 2169 tj = Types.SQLXML; 2170 break; 2171 case Types.TIME: 2172 if ( "timetz".equals(typePG()) ) 2173 tj = Types.TIME_WITH_TIMEZONE; 2174 break; 2175 case Types.TIMESTAMP: 2176 if ( "timestamptz".equals(typePG()) ) 2177 tj = Types.TIMESTAMP_WITH_TIMEZONE; 2178 break; 2179 default: 2180 } 2181 return m_typeJDBC = tj; 2182 } 2183 2184 Object valueJDBC() throws SQLException 2185 { 2186 if ( m_valueJDBCValid ) 2187 return m_valueJDBC; 2188 /* 2189 * When JDBC 4.2 added support for the JSR 310 date/time types, for 2190 * back-compatibility purposes, it did not change what types a plain 2191 * getObject(...) would return for them, which could break existing 2192 * code. Instead, it's necessary to use the form of getObject that 2193 * takes a Class<?>, and ask for the new classes explicitly. 2194 * 2195 * Similarly, PL/Java up through 1.5.0 has always returned a String 2196 * from getObject for a PostgreSQL xml type. Here, the JDBC standard 2197 * provides that a SQLXML object should be returned, and that should 2198 * happen in a future major PL/Java release, but for now, the plain 2199 * getObject will still return String, so it is also necessary to 2200 * ask for the SQLXML type explicitly. In fact, we will ask for 2201 * XdmNode, as it might be referred to more than once (if a 2202 * parameter), and a SQLXML can't be read more than once, nor would 2203 * there be any sense in building an XdmNode from it more than once. 2204 */ 2205 switch ( typeJDBC() ) 2206 { 2207 case Types.DATE: 2208 return setValueJDBC(implValueJDBC(LocalDate.class)); 2209 case Types.TIME: 2210 return setValueJDBC(implValueJDBC(LocalTime.class)); 2211 case Types.TIME_WITH_TIMEZONE: 2212 return setValueJDBC(implValueJDBC(OffsetTime.class)); 2213 case Types.TIMESTAMP: 2214 return setValueJDBC(implValueJDBC(LocalDateTime.class)); 2215 case Types.TIMESTAMP_WITH_TIMEZONE: 2216 return setValueJDBC(implValueJDBC(OffsetDateTime.class)); 2217 case Types.SQLXML: 2218 return setValueJDBC(implValueJDBC(XdmNode.class)); 2219 default: 2220 } 2221 return setValueJDBC(implValueJDBC()); 2222 } 2223 2224 boolean knownNonNull() throws SQLException 2225 { 2226 if ( null != m_knownNonNull ) 2227 return m_knownNonNull; 2228 return m_knownNonNull = implKnownNonNull(); 2229 } 2230 2231 int scale() throws SQLException 2232 { 2233 if ( null != m_scale ) 2234 return m_scale; 2235 return m_scale = implScale(); 2236 } 2237 2238 static class ContextItem extends Binding 2239 { 2240 /** 2241 * Return the XML Schema type of this input binding for a context 2242 * item. 2243 *<p> 2244 * Because it is based on {@code determinXQueryFormalType}, this 2245 * method is not parameterized by {@code XMLBinary}, and will always 2246 * map a binary-string SQL type to {@code xs:hexBinary}. 2247 */ 2248 ItemType typeXS() throws SQLException 2249 { 2250 if ( null != m_typeXS ) 2251 return m_typeXS; 2252 SequenceType st = implTypeXS(true); 2253 assert OccurrenceIndicator.ONE == st.getOccurrenceIndicator(); 2254 return m_typeXS = st.getItemType(); 2255 } 2256 2257 protected ItemType m_typeXS; 2258 } 2259 2260 static class Parameter extends Binding 2261 { 2262 String name() 2263 { 2264 return m_name; 2265 } 2266 2267 SequenceType typeXS() throws SQLException 2268 { 2269 if ( null != m_typeXS ) 2270 return m_typeXS; 2271 return m_typeXS = implTypeXS(false); 2272 } 2273 2274 /** 2275 * Return the XML Schema type collapsed according to the Syntax Rule 2276 * deriving {@code XT} for {@code XMLCAST}. 2277 *<p> 2278 * The intent of the rule is unclear, but it involves collapsing 2279 * certain sets of more-specific types that {@code typeXS} might 2280 * return into common supertypes, for use only in an intermediate 2281 * step of {@code xmlCastAsNonXML}. Unlike {@code typeXS}, this 2282 * method must be passed an {@code XMLBinary} parameter reflecting 2283 * the hex/base64 choice currently in scope. 2284 * @param enc whether to use {@code xs:hexBinary} or 2285 * {@code xs:base64Binary} as the XML Schema type corresponding to a 2286 * binary-string SQL type. 2287 */ 2288 ItemType typeXT(XMLBinary enc) throws SQLException 2289 { 2290 throw new UnsupportedOperationException( 2291 "typeXT() on synthetic binding"); 2292 } 2293 2294 /** 2295 * Memoize and return a casting function from a given 2296 * {@code ItemType} to the type of this parameter. 2297 *<p> 2298 * Used only by {@code xmlCastAsNonXML}, which does all the work 2299 * of constructing the function; this merely allows it to be 2300 * remembered, if many casts to the same output parameter will be 2301 * made (as by {@code xmltable}). 2302 */ 2303 CastingFunction atomicCaster(ItemType it, CasterSupplier s) 2304 throws SQLException, XPathException 2305 { 2306 throw new UnsupportedOperationException( 2307 "atomicCaster() on synthetic binding"); 2308 } 2309 2310 protected SequenceType m_typeXS; 2311 2312 private final String m_name; 2313 2314 /** 2315 * @param name The SQL name of the parameter 2316 * @param checkName True if the name must be a valid NCName (as for 2317 * an input parameter from SQL to the XML query context), or false 2318 * if the name doesn't matter (as when it describes a result, or the 2319 * sole input value of an XMLCAST. 2320 * @throws SQLException if the name of a checked input parameter 2321 * isn't a valid NCName. 2322 */ 2323 protected Parameter(String name, boolean checkName) 2324 throws SQLException 2325 { 2326 if ( checkName && ! isValidNCName(name) ) 2327 throw new SQLSyntaxErrorException( 2328 "Not an XML NCname: \"" + name + '"', "42602"); 2329 m_name = name; 2330 } 2331 } 2332 2333 protected String m_typePG; 2334 protected Integer m_typeJDBC; 2335 protected Boolean m_knownNonNull; 2336 protected Integer m_scale; 2337 private Object m_valueJDBC; 2338 private boolean m_valueJDBCValid; 2339 protected Object setValueJDBC(Object v) 2340 { 2341 m_valueJDBCValid = true; 2342 return m_valueJDBC = v; 2343 } 2344 2345 protected String implTypePG() throws SQLException 2346 { 2347 throw new UnsupportedOperationException( 2348 "typePG() on synthetic binding"); 2349 } 2350 2351 protected int implTypeJDBC() throws SQLException 2352 { 2353 throw new UnsupportedOperationException( 2354 "typeJDBC() on synthetic binding"); 2355 } 2356 2357 protected boolean implKnownNonNull() throws SQLException 2358 { 2359 throw new UnsupportedOperationException( 2360 "knownNonNull() on synthetic binding"); 2361 } 2362 2363 protected int implScale() throws SQLException 2364 { 2365 throw new UnsupportedOperationException( 2366 "scale() on synthetic binding"); 2367 } 2368 2369 protected Object implValueJDBC() throws SQLException 2370 { 2371 throw new UnsupportedOperationException( 2372 "valueJDBC() on synthetic binding"); 2373 } 2374 2375 /* 2376 * This implementation just forwards to the type-less version, then 2377 * fails if that did not return the wanted type. Override if a smarter 2378 * behavior is possible. 2379 */ 2380 protected <T> T implValueJDBC(Class<T> type) throws SQLException 2381 { 2382 return type.cast(implValueJDBC()); 2383 } 2384 2385 protected SequenceType implTypeXS(boolean forContextItem) 2386 throws SQLException 2387 { 2388 return determineXQueryFormalType(this, forContextItem); 2389 } 2390 2391 static class Assemblage implements Iterable<Parameter> 2392 { 2393 ContextItem contextItem() { return m_contextItem; } 2394 2395 @Override 2396 public Iterator<Parameter> iterator() 2397 { 2398 return m_params.iterator(); 2399 } 2400 2401 protected ContextItem m_contextItem; 2402 protected Collection<Parameter> m_params = Collections.emptyList(); 2403 } 2404 } 2405 2406 static class BindingsFromResultSet extends Binding.Assemblage 2407 { 2408 /** 2409 * Construct the bindings from a ResultSet representing input parameters 2410 * to an XML query. 2411 * @param rs ResultSet representing the input parameters. Column names 2412 * "." and "?COLUMN?" are treated specially, and used to supply the 2413 * query's context item; every other column name must be a valid NCName, 2414 * and neither any named parameter nor the context item may be mentioned 2415 * more than once. 2416 * @param checkNames True if the input parameter names matter (a name of 2417 * "." or "?COLUMN?" will define the context item, and any other name 2418 * must be a valid NCName); false to skip such checking (as for the 2419 * single input value to XMLCAST, whose name doesn't matter). 2420 * @throws SQLException if names are duplicated or invalid. 2421 */ 2422 BindingsFromResultSet(ResultSet rs, boolean checkNames) 2423 throws SQLException 2424 { 2425 m_resultSet = rs; 2426 m_rsmd = rs.getMetaData(); 2427 2428 int nParams = m_rsmd.getColumnCount(); 2429 ContextItem contextItem = null; 2430 Map<String,Binding.Parameter> n2b = new HashMap<>(); 2431 2432 if ( 0 < nParams ) 2433 m_dBuilder = s_s9p.newDocumentBuilder(); 2434 2435 for ( int i = 1; i <= nParams; ++i ) 2436 { 2437 String label = m_rsmd.getColumnLabel(i); 2438 if ( checkNames && 2439 ("?COLUMN?".equals(label) || ".".equals(label)) ) 2440 { 2441 if ( null != contextItem ) 2442 throw new SQLSyntaxErrorException( 2443 "Context item supplied more than once (at " + 2444 contextItem.m_idx + " and " + i + ')', "42712"); 2445 contextItem = new ContextItem(i); 2446 continue; 2447 } 2448 2449 Parameter was = 2450 (Parameter)n2b.put( 2451 label, new Parameter(label, i, checkNames)); 2452 if ( null != was ) 2453 throw new SQLSyntaxErrorException( 2454 "Name \"" + label + "\" duplicated at positions " + 2455 was.m_idx + " and " + i, "42712"); 2456 } 2457 2458 m_contextItem = contextItem; 2459 m_params = n2b.values(); 2460 } 2461 2462 /** 2463 * Construct the bindings from a ResultSet representing output 2464 * parameters (as from XMLTABLE). 2465 * @param rs ResultSet representing the result parameters. Names have 2466 * no particular significance and are not subject to any checks. 2467 * @param exprs Compiled evaluators for the supplied column expressions. 2468 * The number of these must match the number of columns in {@code rs}. 2469 * One of these (and no more than one; the caller will have enforced 2470 * that) is allowed to be null, making the corresponding column 2471 * "FOR ORDINALITY". An ordinality column will be checked to ensure it 2472 * has an SQL type that is (ahem) "exact numeric with scale 0 (zero)." 2473 * May be null if this is some other general-purpose output result set, 2474 * not for an XMLTABLE. 2475 * @throws SQLException if numbers of columns and expressions don't 2476 * match, or there is an ordinality column and its type is not suitable. 2477 */ 2478 @SuppressWarnings("fallthrough") 2479 BindingsFromResultSet(ResultSet rs, XQueryEvaluator[] exprs) 2480 throws SQLException 2481 { 2482 m_resultSet = rs; 2483 m_rsmd = rs.getMetaData(); 2484 2485 int nParams = m_rsmd.getColumnCount(); 2486 if ( null != exprs && nParams != exprs.length ) 2487 throw new SQLSyntaxErrorException( 2488 "Not as many supplied column expressions as output columns", 2489 "42611"); 2490 2491 Binding.Parameter[] ps = new Binding.Parameter[ nParams ]; 2492 2493 for ( int i = 1; i <= nParams; ++i ) 2494 { 2495 String label = m_rsmd.getColumnLabel(i); 2496 Parameter p = new Parameter(label, i, false); 2497 ps [ i - 1 ] = p; 2498 if ( null != exprs && null == exprs [ i - 1 ] ) 2499 { 2500 switch ( p.typeJDBC() ) 2501 { 2502 case Types.INTEGER: 2503 case Types.SMALLINT: 2504 case Types.BIGINT: 2505 break; 2506 case Types.NUMERIC: 2507 case Types.DECIMAL: 2508 int scale = p.scale(); 2509 if ( 0 == scale || -1 == scale ) 2510 break; 2511 /*FALLTHROUGH*/ 2512 default: 2513 throw new SQLSyntaxErrorException( 2514 "Column FOR ORDINALITY must have an exact numeric" + 2515 " type with scale zero.", "42611"); 2516 } 2517 } 2518 } 2519 2520 m_params = asList(ps); 2521 } 2522 2523 private ResultSet m_resultSet; 2524 private ResultSetMetaData m_rsmd; 2525 DocumentBuilder m_dBuilder; 2526 2527 <T> T typedValueAtIndex(int idx, Class<T> type) throws SQLException 2528 { 2529 if ( XdmNode.class != type ) 2530 return m_resultSet.getObject(idx, type); 2531 try 2532 { 2533 SQLXML sx = m_resultSet.getObject(idx, SQLXML.class); 2534 return type.cast( 2535 m_dBuilder.build(sx.getSource((Class<Source>)null))); 2536 } 2537 catch ( SaxonApiException e ) 2538 { 2539 throw new SQLException(e.getMessage(), "10000", e); 2540 } 2541 } 2542 2543 class ContextItem extends Binding.ContextItem 2544 { 2545 final int m_idx; 2546 2547 ContextItem(int index) { m_idx = index; } 2548 2549 protected String implTypePG() throws SQLException 2550 { 2551 return m_rsmd.getColumnTypeName(m_idx); 2552 } 2553 2554 protected int implTypeJDBC() throws SQLException 2555 { 2556 return m_rsmd.getColumnType(m_idx); 2557 } 2558 2559 protected int implScale() throws SQLException 2560 { 2561 return m_rsmd.getScale(m_idx); 2562 } 2563 2564 protected Object implValueJDBC() throws SQLException 2565 { 2566 return m_resultSet.getObject(m_idx); 2567 } 2568 2569 protected <T> T implValueJDBC(Class<T> type) throws SQLException 2570 { 2571 return typedValueAtIndex(m_idx, type); 2572 } 2573 } 2574 2575 class Parameter extends Binding.Parameter 2576 { 2577 final int m_idx; 2578 private ItemType m_typeXT; 2579 private CastingFunction m_atomCaster; 2580 private ItemType m_lastCastFrom; 2581 2582 Parameter(String name, int index, boolean isInput) 2583 throws SQLException 2584 { 2585 super(name, isInput); 2586 m_idx = index; 2587 } 2588 2589 @Override 2590 ItemType typeXT(XMLBinary enc) throws SQLException 2591 { 2592 if ( null != m_typeXT ) 2593 return m_typeXT; 2594 2595 ItemType it = 2596 mapSQLDataTypeToXMLSchemaDataType(this, enc, Nulls.ABSENT); 2597 if ( ! ItemType.ANY_ATOMIC_VALUE.subsumes(it) ) 2598 return m_typeXT = it; 2599 2600 if ( it.equals(ItemType.INTEGER) ) 2601 { 2602 int tj = typeJDBC(); 2603 if ( Types.NUMERIC == tj || Types.DECIMAL == tj ) 2604 it = ItemType.DECIMAL; 2605 } 2606 else if ( ItemType.INTEGER.subsumes(it) ) 2607 it = ItemType.INTEGER; 2608 else if ( ItemType.FLOAT.subsumes(it) ) 2609 it = ItemType.DOUBLE; 2610 else if ( ItemType.DATE_TIME_STAMP.subsumes(it) ) 2611 it = ItemType.DATE_TIME; 2612 2613 return m_typeXT = it; 2614 } 2615 2616 @Override 2617 CastingFunction atomicCaster(ItemType it, CasterSupplier s) 2618 throws SQLException, XPathException 2619 { 2620 if ( null == m_atomCaster || ! it.equals(m_lastCastFrom) ) 2621 { 2622 m_atomCaster = s.get(); 2623 m_lastCastFrom = it; 2624 } 2625 return m_atomCaster; 2626 } 2627 2628 protected String implTypePG() throws SQLException 2629 { 2630 return m_rsmd.getColumnTypeName(m_idx); 2631 } 2632 2633 protected int implTypeJDBC() throws SQLException 2634 { 2635 return m_rsmd.getColumnType(m_idx); 2636 } 2637 2638 protected boolean implKnownNonNull() throws SQLException 2639 { 2640 return columnNoNulls == m_rsmd.isNullable(m_idx); 2641 } 2642 2643 protected int implScale() throws SQLException 2644 { 2645 return m_rsmd.getScale(m_idx); 2646 } 2647 2648 protected Object implValueJDBC() throws SQLException 2649 { 2650 return m_resultSet.getObject(m_idx); 2651 } 2652 2653 protected <T> T implValueJDBC(Class<T> type) throws SQLException 2654 { 2655 return typedValueAtIndex(m_idx, type); 2656 } 2657 } 2658 } 2659 2660 static class BindingsFromXQX extends Binding.Assemblage 2661 { 2662 /** 2663 * Construct a new assemblage of bindings for the static context of an 2664 * XMLTABLE column expression. It will have the same named-parameter 2665 * bindings passed to the row expression, but the static type of the 2666 * context item will be the result type of the row expression. The 2667 * {@code ContextItem} in this assemblage will have no associated value; 2668 * the caller is responsible for retrieving that from the row evaluator 2669 * and storing it in the column expression context every iteration. 2670 * @param xqx The result of compiling the row expression; its 2671 * compiler-determined static result type will be used as the static 2672 * context item type. 2673 * @param params The bindings supplied to the row expression. Its named 2674 * parameters will be copied as the named parameters here. 2675 */ 2676 BindingsFromXQX(XQueryExecutable xqx, Binding.Assemblage params) 2677 { 2678 m_params = params.m_params; 2679 m_contextItem = new ContextItem(xqx.getResultItemType()); 2680 } 2681 2682 static class ContextItem extends Binding.ContextItem 2683 { 2684 ContextItem(ItemType it) 2685 { 2686 m_typeXS = it; 2687 /* 2688 * There needs to be a dummy JDBC type to return when queried 2689 * for purposes of assertCanCastAsXmlSequence. It can literally 2690 * be any type outside of the few that method rejects. Because 2691 * the XS type is already known, nothing else will need to ask 2692 * for this, or care. 2693 */ 2694 m_typeJDBC = Types.OTHER; 2695 } 2696 } 2697 } 2698 2699 /* 2700 * The XQuery-regular-expression-based functions added in 9075-2:2006. 2701 * 2702 * For each function below, a parameter is marked //strict if the spec 2703 * explicitly says the result is NULL when that parameter is NULL. The 2704 * parameters not marked //strict (including the non-standard w3cNewlines 2705 * added here) all have non-null defaults, so by executive decision, these 2706 * functions will all get the onNullInput=RETURNS_NULL treatment, so none of 2707 * the null-checking has to be done here. At worst, that may result in a 2708 * mystery NULL return rather than an error, if someone explicitly passes 2709 * NULL to one of the parameters with a non-null default. 2710 */ 2711 2712 /* 2713 * Check valid range of 'from' and supported 'usingOctets'. 2714 * 2715 * Every specified function that has a start position FROM and a USING 2716 * clause starts with a check that the start position is in range. This 2717 * function factors out that test, returning true if the start position is 2718 * /out of range/ (triggering the caller to return the special result 2719 * defined for that case), returning false if the value is in range, or 2720 * throwing an exception if the length unit specified in the USING clause 2721 * isn't supported. 2722 */ 2723 private static boolean usingAndLengthCheck( 2724 String in, int from, boolean usingOctets, String function) 2725 throws SQLException 2726 { 2727 if ( usingOctets ) 2728 throw new SQLFeatureNotSupportedException( 2729 '"' + function + "\" does not yet support USING OCTETS", 2730 "0A000"); 2731 return ( 1 > from || from > getStringLength(in) ); 2732 } 2733 2734 private static void newlinesCheck(boolean w3cNewlines, String function) 2735 throws SQLException 2736 { 2737 if ( ! w3cNewlines ) 2738 throw new SQLFeatureNotSupportedException( 2739 '"' + function + "\" does not yet support the ISO SQL newline" + 2740 " conventions, only the original W3C XQuery ones" + 2741 " (HINT: pass w3cNewlines => true)", "0A000"); 2742 } 2743 2744 private static RegularExpression compileRE(String pattern, String flags) 2745 throws SQLException 2746 { 2747 try 2748 { 2749 return s_s9p.getUnderlyingConfiguration() 2750 .compileRegularExpression(pattern, flags, "XP30", null); 2751 } 2752 catch ( XPathException e ) 2753 { 2754 if ( NamespaceConstant.ERR.equals(e.getErrorCodeNamespace()) ) 2755 { 2756 if ( "FORX0001".equals(e.getErrorCodeLocalPart()) ) 2757 throw new SQLDataException( 2758 "invalid XQuery option flag", "2201T", e); 2759 if ( "FORX0002".equals(e.getErrorCodeLocalPart()) ) 2760 throw new SQLDataException( 2761 "invalid XQuery regular expression", "2201S", e); 2762 } 2763 throw new SQLException( 2764 "compiling XQuery regular expression: " + e.getMessage(), e); 2765 } 2766 } 2767 2768 private static CharSequence replace( 2769 RegularExpression re, CharSequence in, CharSequence with) 2770 throws SQLException 2771 { 2772 /* 2773 * Report the standard-mandated error if replacing a zero-length match. 2774 * Strictly speaking, this is a test of the length of the match, not of 2775 * the input string. Here, though, this private method is only called by 2776 * translate_regex, which always passes only the portion of the input 2777 * string that matched, so the test is equivalent. 2778 * As to why the SQL committee would make such a point of disallowing 2779 * replacement of a zero-length match, that's a good question. See 2780 * s_intervalSignSite in this very file for an example where replacing 2781 * a zero-length match is just what's wanted. (But that pattern relies 2782 * on lookahead/lookbehind operators, which XQuery regular expressions 2783 * don't have.) 2784 * When the underlying library is Saxon, there is an Easter egg: if a 2785 * regular expression is compiled with a 'flags' string ending in ";j", 2786 * a Java regular expression is produced instead of an XQuery one (with 2787 * standards conformance cast to the wind). That can be detected with 2788 * getFlags() on the regular expression: not looking for ";j", because 2789 * that has been stripped out, but for "d" which is a Java regex flag 2790 * that Saxon sets by default, and is not a valid XQuery regex flag. 2791 * If the caller has used Saxon's Easter egg to get a Java regex, here 2792 * is another Easter egg to go with it, allowing zero-length matches 2793 * to be replaced if that's what the caller wants to do. 2794 */ 2795 if ( 0 == in.length() && ! re.getFlags().contains("d") ) 2796 throw new SQLDataException( 2797 "attempt to replace a zero-length string", "2201U"); 2798 try 2799 { 2800 return re.replace(in, with); 2801 } 2802 catch ( XPathException e ) 2803 { 2804 if ( NamespaceConstant.ERR.equals(e.getErrorCodeNamespace()) ) 2805 { 2806 if ( "FORX0003".equals(e.getErrorCodeLocalPart()) ) 2807 throw new SQLDataException( 2808 "attempt to replace a zero-length string", "2201U", e); 2809 if ( "FORX0004".equals(e.getErrorCodeLocalPart()) ) 2810 throw new SQLDataException( 2811 "invalid XQuery replacement string", "2201V", e); 2812 } 2813 throw new SQLException( 2814 "replacing regular expression match: " + e.getMessage(), e); 2815 } 2816 } 2817 2818 interface MatchVector 2819 { 2820 int groups(); 2821 int position(int group); 2822 int length(int group); 2823 } 2824 2825 interface ListOfMatchVectors 2826 { 2827 /** 2828 * Return the MatchVector for one occurrence of a match. 2829 *<p> 2830 * Any previously-returned MatchVector is invalid after another get. 2831 * In multiple calls to get, the occurrence parameter must be strictly 2832 * increasing. 2833 * After get has returned null, it should not be called again. 2834 */ 2835 MatchVector get(int occurrence) throws SQLException; 2836 void close(); 2837 } 2838 2839 static class LOMV 2840 implements ListOfMatchVectors, MatchVector, RegexIterator.MatchHandler 2841 { 2842 private RegexIterator m_ri; 2843 private int m_pos; 2844 private int m_occurrence; 2845 2846 LOMV(int startPos, RegexIterator ri) 2847 { 2848 m_ri = ri; 2849 m_pos = startPos; 2850 } 2851 2852 static ListOfMatchVectors of( 2853 String pattern, String flags, String in, int from) 2854 throws SQLException 2855 { 2856 RegularExpression re = compileRE(pattern, flags); 2857 return of(re, in, from); 2858 } 2859 2860 static ListOfMatchVectors of(RegularExpression re, String in, int from) 2861 { 2862 RegexIterator ri = 2863 re.analyze(in.substring(in.offsetByCodePoints(0, from - 1))); 2864 return new LOMV(from, ri); 2865 } 2866 2867 private int[] m_begPositions; 2868 private int[] m_endPositions; 2869 2870 @Override // ListOfMatchVectors 2871 public MatchVector get(int occurrence) throws SQLException 2872 { 2873 try 2874 { 2875 StringValue sv; 2876 for ( ;; ) 2877 { 2878 sv = m_ri.next(); 2879 if ( null == sv ) 2880 return null; 2881 if ( m_ri.isMatching() ) 2882 if ( ++ m_occurrence == occurrence ) 2883 break; 2884 m_pos += sv.getStringLength(); 2885 } 2886 2887 if ( null == m_begPositions ) 2888 { 2889 int groups = m_ri.getNumberOfGroups(); 2890 /* 2891 * Saxon's Apache-derived XQuery engine will report a number 2892 * of groups counting $0 (so it will be 1 even if no capture 2893 * groups were defined in the expression). In contrast, the 2894 * Java regex engine that you get with the Saxon ";j" Easter 2895 * egg does not count $0 (so arrays need groups+1 entries). 2896 * It's hard to tell from here which flavor was used, plus 2897 * the Saxon behavior might change some day, so just spend 2898 * the extra + 1 every time. 2899 */ 2900 m_begPositions = new int [ groups + 1 ]; 2901 m_endPositions = new int [ groups + 1 ]; 2902 } 2903 2904 m_begPositions [ 0 ] = m_pos; 2905 2906 fill(m_begPositions, 1, m_begPositions.length, 0); 2907 fill(m_endPositions, 1, m_endPositions.length, 0); 2908 m_ri.processMatchingSubstring(this); 2909 2910 m_endPositions [ 0 ] = m_pos; 2911 2912 return this; 2913 } 2914 catch ( XPathException e ) 2915 { 2916 throw new SQLException( 2917 "evaluating XQuery regular expression: " + e.getMessage(), 2918 e); 2919 } 2920 } 2921 2922 @Override 2923 public void close() 2924 { 2925 m_ri.close(); 2926 } 2927 2928 @Override // MatchVector 2929 public int groups() 2930 { 2931 return m_begPositions.length - 1; 2932 } 2933 2934 @Override 2935 public int position(int groupNumber) 2936 { 2937 return m_begPositions [ groupNumber ]; 2938 } 2939 2940 @Override 2941 public int length(int groupNumber) 2942 { 2943 return 2944 m_endPositions [ groupNumber ] - m_begPositions [ groupNumber ]; 2945 } 2946 2947 @Override // MatchHandler 2948 public void characters(CharSequence s) 2949 { 2950 m_pos += getStringLength(s); 2951 } 2952 2953 @Override 2954 public void onGroupStart(int groupNumber) 2955 { 2956 m_begPositions [ groupNumber ] = m_pos; 2957 } 2958 2959 @Override 2960 public void onGroupEnd(int groupNumber) 2961 { 2962 m_endPositions [ groupNumber ] = m_pos; 2963 } 2964 } 2965 2966 /** 2967 * Function form of the ISO SQL 2968 * <a id='like_regex'>{@code <regex like predicate>}</a>. 2969 *<p> 2970 * Rewrite the standard form 2971 *<pre> 2972 * value LIKE_REGEX pattern FLAG flags 2973 *</pre> 2974 * into this form: 2975 *<pre> 2976 * like_regex(value, pattern, flag => flags) 2977 *</pre> 2978 * where the {@code flag} parameter defaults to no flags if omitted. 2979 *<p> 2980 * The SQL standard specifies that pattern elements sensitive to newlines 2981 * (namely {@code ^}, {@code $}, {@code \s}, {@code \S}, and {@code .}) are 2982 * to support the various representations of newline set out in 2983 * <a href='http://www.unicode.org/reports/tr18/#RL1.6'>Unicode Technical 2984 * Standard #18, RL1.6</a>. That behavior differs from the standard W3C 2985 * XQuery newline handling, as described for 2986 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>the flags 2987 * {@code m} and {@code s}</a> and for 2988 * <a href='https://www.w3.org/TR/xmlschema11-2/#cces-mce'>the 2989 * multicharacter escapes {@code \s} and {@code \S}</a>. As an extension to 2990 * ISO SQL, passing {@code w3cNewlines => true} requests the standard W3C 2991 * XQuery behavior rather than the UTS#18 behevior for newlines. If the 2992 * underlying XQuery library only provides the W3C behavior, calls without 2993 * {@code w3cNewlines => true} will throw exceptions. 2994 * @param value The string to be tested against the pattern. 2995 * @param pattern The XQuery regular expression. 2996 * @param flag Optional string of 2997 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 2998 * the regular expression behavior</a>. 2999 * @param w3cNewlines Pass true to allow the regular expression to recognize 3000 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3001 * @return True if the supplied value matches the pattern. Null if any 3002 * parameter is null. 3003 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3004 * expression is invalid, 2201T if the flags string is invalid; 3005 * SQLFeatureNotSupportedException (0A000) if (in the current 3006 * implementation) w3cNewlines is false or omitted. 3007 */ 3008 @Function(implementor="saxon9api", schema="javatest") 3009 public static boolean like_regex( 3010 String value, //strict 3011 String pattern, //strict 3012 @SQLType(defaultValue="") String flag, //strict 3013 @SQLType(defaultValue="false") boolean w3cNewlines 3014 ) 3015 throws SQLException 3016 { 3017 newlinesCheck(w3cNewlines, "like_regex"); 3018 return compileRE(pattern, flag).containsMatch(value); 3019 } 3020 3021 /** 3022 * Syntax-sugar-free form of the ISO SQL 3023 * <a id='occurrences_regex'>{@code OCCURRENCES_REGEX}</a> function: 3024 * how many times does a pattern occur in a string? 3025 *<p> 3026 * Rewrite the standard form 3027 *<pre> 3028 * OCCURRENCES_REGEX(pattern FLAG flags IN str FROM position USING units) 3029 *</pre> 3030 * into this form: 3031 *<pre> 3032 * occurrences_regex(pattern, flag => flags, "in" => str, 3033 * "from" => position, usingOctets => true|false) 3034 *</pre> 3035 * where all of the named parameters are optional except pattern and "in", 3036 * and the standard {@code USING CHARACTERS} becomes 3037 * {@code usingOctets => false}, which is the default, and 3038 * {@code USING OCTETS} becomes {@code usingOctets => true}. See also 3039 * {@link #like_regex like_regex} regarding the {@code w3cNewlines} 3040 * parameter. 3041 * @param pattern XQuery regular expression to seek in the input string. 3042 * @param in The input string. 3043 * @param flag Optional string of 3044 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 3045 * the regular expression behavior</a>. 3046 * @param from Starting position in the input string, 1 by default. 3047 * @param usingOctets Whether position is counted in characters (actual 3048 * Unicode characters, not any smaller encoded unit, not even Java char), 3049 * which is the default, or (when true) in octets of the string's encoded 3050 * form. 3051 * @param w3cNewlines Pass true to allow the regular expression to recognize 3052 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3053 * @return The number of occurrences of the pattern in the input string, 3054 * starting from the specified position. Null if any parameter is null; -1 3055 * if the start position is less than 1 or beyond the end of the string. 3056 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3057 * expression is invalid, 2201T if the flags string is invalid; 3058 * SQLFeatureNotSupportedException (0A000) if (in the current 3059 * implementation) usingOctets is true, or w3cNewlines is false or omitted. 3060 */ 3061 @Function(implementor="saxon9api", schema="javatest") 3062 public static int occurrences_regex( 3063 String pattern, //strict 3064 @SQLType(name="\"in\"") String in, //strict 3065 @SQLType(defaultValue="") String flag, //strict 3066 @SQLType(name="\"from\"", defaultValue="1") int from, 3067 @SQLType(defaultValue="false") boolean usingOctets, 3068 @SQLType(defaultValue="false") boolean w3cNewlines 3069 ) 3070 throws SQLException 3071 { 3072 if ( usingAndLengthCheck(in, from, usingOctets, "occurrences_regex") ) 3073 return -1; // note: not the same as in position_regex! 3074 newlinesCheck(w3cNewlines, "occurrences_regex"); 3075 3076 ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from); 3077 3078 for ( int i = 1 ;; ++ i ) 3079 if ( null == lomv.get(i) ) 3080 return i - 1; 3081 } 3082 3083 /** 3084 * Syntax-sugar-free form of the ISO SQL 3085 * <a id='position_regex'>{@code POSITION_REGEX}</a> function: 3086 * where does a pattern, or part of it, occur in a string? 3087 *<p> 3088 * Rewrite the standard forms 3089 *<pre> 3090 * POSITION_REGEX(START pattern FLAG flags IN str FROM position 3091 * OCCURRENCE n GROUP m) 3092 * POSITION_REGEX(AFTER pattern FLAG flags IN str FROM position 3093 * OCCURRENCE n GROUP m) 3094 *</pre> 3095 * into these forms, respectively: 3096 *<pre> 3097 * position_regex(pattern, flag => flags, "in" => str, 3098 * "from" => position, occurrence => n, 3099 * "group" => m) 3100 * position_regex(pattern, flag => flags, "in" => str, 3101 * "from" => position, occurrence => n, 3102 * "group" => m, after => true) 3103 *</pre> 3104 * where all of the named parameters are optional except pattern and "in". 3105 * See also {@link #occurrences_regex occurrences_regex} regarding the 3106 * {@code usingOctets} parameter, and {@link #like_regex like_regex} 3107 * regarding {@code w3cNewlines}. 3108 * @param pattern XQuery regular expression to seek in the input string. 3109 * @param in The input string. 3110 * @param flag Optional string of 3111 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 3112 * the regular expression behavior</a>. 3113 * @param from Starting position in the input string, 1 by default. 3114 * @param usingOctets Whether position is counted in characters (actual 3115 * Unicode characters, not any smaller encoded unit, not even Java char), 3116 * which is the default, or (when true) in octets of the string's encoded 3117 * form. 3118 * @param after Whether to return the position where the match starts 3119 * (when false, the default), or just after the match ends (when true). 3120 * @param occurrence If specified as an integer n (default 1), returns the 3121 * position starting (or after) the nth match of the pattern in the string. 3122 * @param group If zero (the default), returns the position starting (or 3123 * after) the match of the whole pattern overall, otherwise if an integer m, 3124 * the position starting or after the mth parenthesized group in (the nth 3125 * occurrence of) the pattern. 3126 * @param w3cNewlines Pass true to allow the regular expression to recognize 3127 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3128 * @return The position, in the specified units, starting or just after, 3129 * the nth occurrence (or mth capturing group of the nth occurrence) of the 3130 * pattern in the input string, starting from the specified position. Null 3131 * if any parameter is null; zero if the start position is less than 1 or 3132 * beyond the end of the string, if occurrence is less than 1 or greater 3133 * than the number of matches, or if group is less than zero or greater than 3134 * the number of parenthesized capturing groups in the pattern. 3135 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3136 * expression is invalid, 2201T if the flags string is invalid; 3137 * SQLFeatureNotSupportedException (0A000) if (in the current 3138 * implementation) usingOctets is true, or w3cNewlines is false or omitted. 3139 */ 3140 @Function(implementor="saxon9api", schema="javatest") 3141 public static int position_regex( 3142 String pattern, //strict 3143 @SQLType(name="\"in\"") String in, //strict 3144 @SQLType(defaultValue="") String flag, //strict 3145 @SQLType(name="\"from\"", defaultValue="1") int from, 3146 @SQLType(defaultValue="false") boolean usingOctets, 3147 @SQLType(defaultValue="false") boolean after, 3148 @SQLType(defaultValue="1") int occurrence, //strict 3149 @SQLType(name="\"group\"", defaultValue="0") int group, //strict 3150 @SQLType(defaultValue="false") boolean w3cNewlines 3151 ) 3152 throws SQLException 3153 { 3154 if ( 1 > occurrence ) 3155 return 0; 3156 if ( 0 > group ) // test group > ngroups after compiling regex 3157 return 0; 3158 if ( usingAndLengthCheck(in, from, usingOctets, "position_regex") ) 3159 return 0; // note: not the same as in occurrences_regex! 3160 newlinesCheck(w3cNewlines, "position_regex"); 3161 3162 ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from); 3163 3164 MatchVector mv = lomv.get(occurrence); 3165 if ( null == mv || mv.groups() < group ) 3166 return 0; 3167 3168 return mv.position(group) + (after ? mv.length(group) : 0); 3169 } 3170 3171 /** 3172 * Syntax-sugar-free form of the ISO SQL 3173 * <a id='substring_regex'>{@code SUBSTRING_REGEX}</a> function: 3174 * return a substring specified by a pattern match in a string. 3175 *<p> 3176 * Rewrite the standard form 3177 *<pre> 3178 * SUBSTRING_REGEX(pattern FLAG flags IN str FROM position 3179 * OCCURRENCE n GROUP m) 3180 *</pre> 3181 * into this form: 3182 *<pre> 3183 * substring_regex(pattern, flag => flags, "in" => str, 3184 * "from" => position, occurrence => n, 3185 * "group" => m) 3186 *</pre> 3187 * where all of the named parameters are optional except pattern and "in". 3188 * See also {@link #position_regex position_regex} regarding the 3189 * {@code occurrence} and {@code "group"} parameters, 3190 * {@link #occurrences_regex occurrences_regex} regarding 3191 * {@code usingOctets}, and {@link #like_regex like_regex} 3192 * regarding {@code w3cNewlines}. 3193 * @param pattern XQuery regular expression to seek in the input string. 3194 * @param in The input string. 3195 * @param flag Optional string of 3196 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 3197 * the regular expression behavior</a>. 3198 * @param from Starting position in the input string, 1 by default. 3199 * @param usingOctets Whether position is counted in characters (actual 3200 * Unicode characters, not any smaller encoded unit, not even Java char), 3201 * which is the default, or (when true) in octets of the string's encoded 3202 * form. 3203 * @param occurrence If specified as an integer n (default 1), returns the 3204 * nth match of the pattern in the string. 3205 * @param group If zero (the default), returns the match of the whole 3206 * pattern overall, otherwise if an integer m, the match of the mth 3207 * parenthesized group in (the nth occurrence of) the pattern. 3208 * @param w3cNewlines Pass true to allow the regular expression to recognize 3209 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3210 * @return The substring matching the nth occurrence (or mth capturing group 3211 * of the nth occurrence) of the pattern in the input string, starting from 3212 * the specified position. Null if any parameter is null, if the start 3213 * position is less than 1 or beyond the end of the string, if occurrence is 3214 * less than 1 or greater than the number of matches, or if group is less 3215 * than zero or greater than the number of parenthesized capturing groups in 3216 * the pattern. 3217 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3218 * expression is invalid, 2201T if the flags string is invalid; 3219 * SQLFeatureNotSupportedException (0A000) if (in the current 3220 * implementation) usingOctets is true, or w3cNewlines is false or omitted. 3221 */ 3222 @Function(implementor="saxon9api", schema="javatest") 3223 public static String substring_regex( 3224 String pattern, //strict 3225 @SQLType(name="\"in\"") String in, //strict 3226 @SQLType(defaultValue="") String flag, //strict 3227 @SQLType(name="\"from\"", defaultValue="1") int from, 3228 @SQLType(defaultValue="false") boolean usingOctets, 3229 @SQLType(defaultValue="1") int occurrence, //strict 3230 @SQLType(name="\"group\"", defaultValue="0") int group, //strict 3231 @SQLType(defaultValue="false") boolean w3cNewlines 3232 ) 3233 throws SQLException 3234 { 3235 if ( 1 > occurrence ) 3236 return null; 3237 if ( 0 > group ) // test group > ngroups after compiling regex 3238 return null; 3239 if ( usingAndLengthCheck(in, from, usingOctets, "substring_regex") ) 3240 return null; 3241 newlinesCheck(w3cNewlines, "substring_regex"); 3242 3243 ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from); 3244 3245 MatchVector mv = lomv.get(occurrence); 3246 if ( null == mv || mv.groups() < group ) 3247 return null; 3248 3249 int codePointPos = mv.position(group); 3250 int codePointLen = mv.length(group); 3251 3252 int utf16pos = in.offsetByCodePoints(0, codePointPos - 1); 3253 int utf16end = in.offsetByCodePoints(utf16pos, codePointLen); 3254 3255 return in.substring(utf16pos, utf16end); 3256 } 3257 3258 /** 3259 * Syntax-sugar-free form of the ISO SQL 3260 * <a id='translate_regex'>{@code TRANSLATE_REGEX}</a> function: 3261 * return a string constructed from the input string by replacing one 3262 * specified occurrence, or all occurrences, of a matching pattern. 3263 *<p> 3264 * Rewrite the standard forms 3265 *<pre> 3266 * TRANSLATE_REGEX(pattern FLAG flags IN str WITH repl FROM position 3267 * OCCURRENCE ALL) 3268 * TRANSLATE_REGEX(pattern FLAG flags IN str WITH repl FROM position 3269 * OCCURRENCE n) 3270 *</pre> 3271 * into these forms, respectively: 3272 *<pre> 3273 * translate_regex(pattern, flag => flags, "in" => str, 3274 * "with" => repl, "from" => position) 3275 * translate_regex(pattern, flag => flags, "in" => str, 3276 * "with" => repl, "from" => position, 3277 * occurrence => n) 3278 *</pre> 3279 * where all of the named parameters are optional except pattern and "in" 3280 * (the default for "with" is the empty string, resulting in matches being 3281 * deleted). 3282 * See also {@link #position_regex position_regex} regarding the 3283 * {@code occurrence} parameter, 3284 * {@link #occurrences_regex occurrences_regex} regarding 3285 * {@code usingOctets}, and {@link #like_regex like_regex} 3286 * regarding {@code w3cNewlines}. 3287 *<p> 3288 * For the specified occurrence (or all occurrences), the matching portion 3289 * <em>s</em> of the string is replaced as by the XQuery function 3290 * <a href='https://www.w3.org/TR/xpath-functions-31/#func-replace' 3291 * >replace</a>(<em>s, pattern, repl, flags</em>). The <em>repl</em> string 3292 * may contain {@code $0} to refer to the entire matched substring, or 3293 * {@code $}<em>m</em> to refer to the <em>m</em>th parenthesized capturing 3294 * group in the pattern. 3295 * @param pattern XQuery regular expression to seek in the input string. 3296 * @param in The input string. 3297 * @param flag Optional string of 3298 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 3299 * the regular expression behavior</a>. 3300 * @param with The replacement string, possibly with $m references. 3301 * @param from Starting position in the input string, 1 by default. 3302 * @param usingOctets Whether position is counted in characters (actual 3303 * Unicode characters, not any smaller encoded unit, not even Java char), 3304 * which is the default, or (when true) in octets of the string's encoded 3305 * form. 3306 * @param occurrence If specified as an integer n (default 0 for "ALL"), 3307 * replace the nth match of the pattern in the string. 3308 * @param w3cNewlines Pass true to allow the regular expression to recognize 3309 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3310 * @return The input string with one occurrence or all occurences of the 3311 * pattern replaced, as described above. Null if any parameter is null, or 3312 * if the start position is less than 1 or beyond the end of the string. 3313 * The input string unchanged if occurrence is less than zero or exceeds the 3314 * number of matches. 3315 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3316 * expression is invalid, 2201T if the flags string is invalid; 2201U if 3317 * replacing where the pattern has matched a substring of zero length; 2201V 3318 * if the replacement string has improper form (a backslash must be used to 3319 * escape any dollar sign or backslash intended literally); 3320 * SQLFeatureNotSupportedException (0A000) if (in the current 3321 * implementation) usingOctets is true, or w3cNewlines is false or omitted. 3322 */ 3323 @Function(implementor="saxon9api", schema="javatest") 3324 public static String translate_regex( 3325 String pattern, //strict 3326 @SQLType(name="\"in\"") String in, //strict 3327 @SQLType(defaultValue="") String flag, //strict 3328 @SQLType(name="\"with\"", defaultValue="") String with, //strict 3329 @SQLType(name="\"from\"", defaultValue="1") int from, 3330 @SQLType(defaultValue="false") boolean usingOctets, 3331 @SQLType(defaultValue="0" /* ALL */) int occurrence, 3332 @SQLType(defaultValue="false") boolean w3cNewlines 3333 ) 3334 throws SQLException 3335 { 3336 if ( usingAndLengthCheck(in, from, usingOctets, "translate_regex") ) 3337 return null; 3338 newlinesCheck(w3cNewlines, "translate_regex"); 3339 if ( 0 > occurrence ) 3340 return in; 3341 3342 RegularExpression re = compileRE(pattern, flag); 3343 3344 ListOfMatchVectors lomv = LOMV.of(re, in, from); 3345 3346 MatchVector mv; 3347 int codePointPos; 3348 int codePointLen; 3349 int utf16pos; 3350 int utf16end; 3351 3352 if ( 0 < occurrence ) 3353 { 3354 mv = lomv.get(occurrence); 3355 if ( null == mv ) 3356 return in; 3357 3358 codePointPos = mv.position(0); 3359 codePointLen = mv.length(0); 3360 3361 utf16pos = in.offsetByCodePoints(0, codePointPos - 1); 3362 utf16end = in.offsetByCodePoints(utf16pos, codePointLen); 3363 3364 return 3365 in.substring(0, utf16pos) 3366 + replace(re, in.substring(utf16pos, utf16end), with) 3367 + in.substring(utf16end); 3368 } 3369 3370 StringBuilder sb = new StringBuilder(); 3371 utf16end = 0; 3372 3373 for ( int i = 1; null != (mv = lomv.get(i)); ++ i ) 3374 { 3375 codePointPos = mv.position(0); 3376 codePointLen = mv.length(0); 3377 3378 utf16pos = in.offsetByCodePoints(0, codePointPos - 1); 3379 3380 sb.append(in.substring(utf16end, utf16pos)); 3381 3382 utf16end = in.offsetByCodePoints(utf16pos, codePointLen); 3383 3384 sb.append(replace(re, in.substring(utf16pos, utf16end), with)); 3385 } 3386 3387 return sb.append(in.substring(utf16end)).toString(); 3388 } 3389}