001/* 002 * Copyright (c) 2018-2020 Tada AB and other contributors, as listed below. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the The BSD 3-Clause License 006 * which accompanies this distribution, and is available at 007 * http://opensource.org/licenses/BSD-3-Clause 008 * 009 * Contributors: 010 * Chapman Flack 011 */ 012package org.postgresql.pljava.example.saxon; 013 014import java.math.BigDecimal; 015import java.math.BigInteger; 016 017import java.sql.Connection; 018import java.sql.DriverManager; 019import java.sql.ResultSet; 020import java.sql.ResultSetMetaData; 021import static java.sql.ResultSetMetaData.columnNoNulls; 022import java.sql.SQLXML; 023import java.sql.Statement; 024import java.sql.Types; 025 026import java.sql.SQLException; 027import java.sql.SQLDataException; 028import java.sql.SQLFeatureNotSupportedException; 029import java.sql.SQLNonTransientException; 030import java.sql.SQLSyntaxErrorException; 031 032import java.time.LocalDate; 033import java.time.LocalTime; 034import java.time.OffsetTime; 035import java.time.LocalDateTime; 036import java.time.OffsetDateTime; 037import static java.time.ZoneOffset.UTC; 038 039import static java.util.Arrays.asList; 040import static java.util.Arrays.fill; 041import java.util.Collection; 042import java.util.Collections; 043import java.util.HashMap; 044import java.util.Iterator; 045import java.util.List; 046import java.util.Map; 047import java.util.Properties; 048 049import java.util.regex.Matcher; 050import java.util.regex.Pattern; 051 052import javax.xml.transform.Source; 053import javax.xml.transform.Result; 054 055import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI; 056import static javax.xml.XMLConstants.XML_NS_URI; 057import static javax.xml.XMLConstants.XML_NS_PREFIX; 058import static javax.xml.XMLConstants.XMLNS_ATTRIBUTE_NS_URI; 059import static javax.xml.XMLConstants.XMLNS_ATTRIBUTE; 060 061import net.sf.saxon.event.Receiver; 062 063import net.sf.saxon.lib.ConversionRules; 064import net.sf.saxon.lib.NamespaceConstant; 065 066import static net.sf.saxon.om.NameChecker.isValidNCName; 067 068import net.sf.saxon.query.StaticQueryContext; 069 070import net.sf.saxon.regex.RegexIterator; 071import net.sf.saxon.regex.RegularExpression; 072 073import net.sf.saxon.s9api.Destination; 074import net.sf.saxon.s9api.DocumentBuilder; 075import net.sf.saxon.s9api.ItemType; 076import net.sf.saxon.s9api.ItemTypeFactory; 077import net.sf.saxon.s9api.OccurrenceIndicator; 078import net.sf.saxon.s9api.Processor; 079import net.sf.saxon.s9api.QName; 080import net.sf.saxon.s9api.SAXDestination; 081import net.sf.saxon.s9api.SequenceType; 082import static net.sf.saxon.s9api.SequenceType.makeSequenceType; 083import net.sf.saxon.s9api.XdmAtomicValue; 084import static net.sf.saxon.s9api.XdmAtomicValue.makeAtomicValue; 085import net.sf.saxon.s9api.XdmEmptySequence; 086import net.sf.saxon.s9api.XdmItem; 087import net.sf.saxon.s9api.XdmNode; 088import static net.sf.saxon.s9api.XdmNodeKind.DOCUMENT; 089import net.sf.saxon.s9api.XdmValue; 090import net.sf.saxon.s9api.XdmSequenceIterator; 091import net.sf.saxon.s9api.XQueryCompiler; 092import net.sf.saxon.s9api.XQueryEvaluator; 093import net.sf.saxon.s9api.XQueryExecutable; 094 095import net.sf.saxon.s9api.SaxonApiException; 096 097import net.sf.saxon.trans.XPathException; 098 099import net.sf.saxon.serialize.SerializationProperties; 100 101import net.sf.saxon.type.AtomicType; 102import net.sf.saxon.type.Converter; 103 104import net.sf.saxon.value.AtomicValue; 105import net.sf.saxon.value.Base64BinaryValue; 106import net.sf.saxon.value.CalendarValue; 107import net.sf.saxon.value.HexBinaryValue; 108import net.sf.saxon.value.StringValue; 109import static net.sf.saxon.value.StringValue.getStringLength; 110 111import org.postgresql.pljava.ResultSetProvider; 112 113import org.postgresql.pljava.annotation.Function; 114import org.postgresql.pljava.annotation.SQLType; 115import static org.postgresql.pljava.annotation.Function.OnNullInput.CALLED; 116 117/* For the xmltext function, which only needs plain SAX and not Saxon */ 118 119import javax.xml.transform.sax.SAXResult; 120import org.xml.sax.ContentHandler; 121import org.xml.sax.SAXException; 122 123/** 124 * Class illustrating use of XQuery with Saxon as the 125 * implementation, using its native "s9api". 126 *<p> 127 * Supplies alternative, XML Query-based (as the SQL/XML standard dictates) 128 * implementation of some of SQL/XML, where the implementation in core 129 * PostgreSQL is limited to the capabilities of XPath (and XPath 1.0, at that). 130 *<p> 131 * Without the syntatic sugar built into the core PostgreSQL parser, calls to 132 * a function in this class can look a bit more verbose in SQL, but reflect a 133 * straightforward rewriting from the standard syntax. For example, suppose 134 * there is a table {@code catalog_as_xml} with a single row whose {@code x} 135 * column is a (respectably sized) XML document recording the stuff in 136 * {@code pg_catalog}. It could be created like this: 137 *<pre> 138 * CREATE TABLE catalog_as_xml(x) AS 139 * SELECT schema_to_xml('pg_catalog', false, true, ''); 140 *</pre> 141 *<h2>Functions/predicates from ISO 9075-14 SQL/XML</h2> 142 *<h3>XMLQUERY</h3> 143 *<p> 144 * In the syntax of the SQL/XML standard, here is a query that would return 145 * an XML element representing the declaration of a function with a specified 146 * name: 147 *<pre> 148 * SELECT XMLQUERY('/pg_catalog/pg_proc[proname eq $FUNCNAME]' 149 * PASSING BY VALUE x, 'numeric_avg' AS FUNCNAME 150 * RETURNING CONTENT EMPTY ON EMPTY) 151 * FROM catalog_as_xml; 152 *</pre> 153 *<p> 154 * It binds the 'context item' of the query to {@code x}, and the {@code NAME} 155 * parameter to the given value, then evaluates the query and returns XML 156 * "CONTENT" (a tree structure with a document node at the root, but not 157 * necessarily meeting all the requirements of an XML "DOCUMENT"). It can be 158 * rewritten as this call to the {@link #xq_ret_content xq_ret_content} method: 159 *<pre> 160 * SELECT javatest.xq_ret_content('/pg_catalog/pg_proc[proname eq $FUNCNAME]', 161 * PASSING => p, nullOnEmpty => false) 162 * FROM catalog_as_xml, 163 * LATERAL (SELECT x AS ".", 'numeric_avg' AS "FUNCNAME") AS p; 164 *</pre> 165 *<p> 166 * In the rewritten form, the form of result wanted ({@code RETURNING CONTENT}) 167 * is implicit in the called function name ({@code xq_ret_content}), and the 168 * parameters to pass to the query are moved out to a separate {@code SELECT} 169 * that supplies their values, types, and names (with the context item now given 170 * the name ".") and is passed by its alias into the query function. 171 *<p> 172 * Because of an unconditional uppercasing that PL/Java's JDBC driver currently 173 * applies to column names, any parameter names, such as {@code FUNCNAME} above, 174 * must be spelled in uppercase where used in the XQuery text, or they will not 175 * be recognized. Because the unconditional uppercasing is highly likely to be 176 * dropped in a future PL/Java release, it is wisest until then to use only 177 * parameter names that really are uppercase, both in the XQuery text where they 178 * are used and in the SQL expression that supplies them. In PostgreSQL, 179 * identifiers that are not quoted are <em>lower</em>cased, so they must be both 180 * uppercase and quoted, in the SQL syntax, to be truly uppercase. 181 *<p> 182 * In the standard, parameters and results (of XML types) can be passed 183 * {@code BY VALUE} or {@code BY REF}, where the latter means that the same 184 * nodes will retain their XQuery node identities over calls (note that this is 185 * a meaning unrelated to what "by value" and "by reference" usually mean in 186 * PostgreSQL's documentation). PostgreSQL's implementation of the XML type 187 * provides no way for {@code BY REF} semantics to be implemented, so everything 188 * happening here happens {@code BY VALUE} implicitly, and does not need to be 189 * specified. 190 *<h3>XMLEXISTS</h3> 191 *<p> 192 * The function {@link #xmlexists xmlexists} here implements the 193 * standard function of the same name. Because it is the same name, it has to 194 * be either schema-qualified or double-quoted in a call to avoid confusion 195 * with the reserved word. In the syntax of the SQL/XML standard, here is a 196 * query returning a boolean value indicating whether a function with the 197 * specified name is declared: 198 *<pre> 199 * SELECT XMLEXISTS('/pg_catalog/pg_proc[proname eq $FUNCNAME]' 200 * PASSING BY VALUE x, 'numeric_avg' AS FUNCNAME) 201 * FROM catalog_as_xml; 202 *</pre> 203 *<p> 204 * It can be rewritten as this call to the {@link #xmlexists xmlexists} method: 205 *<pre> 206 * SELECT "xmlexists"('/pg_catalog/pg_proc[proname eq $FUNCNAME]', 207 * PASSING => p) 208 * FROM catalog_as_xml, 209 * LATERAL (SELECT x AS ".", 'numeric_avg' AS "FUNCNAME") AS p; 210 *</pre> 211 *<h3>XMLTABLE</h3> 212 *<p> 213 * The function {@link #xmltable xmltable} here implements (much of) the 214 * standard function of the same name. Because it is the same name, it has to 215 * be either schema-qualified or double-quoted in a call to avoid confusion 216 * with the reserved word. A rewritten form of the <a href= 217'https://www.postgresql.org/docs/10/static/functions-xml.html#FUNCTIONS-XML-PROCESSING-XMLTABLE' 218>first example in the PostgreSQL manual</a> could be: 219 *<pre> 220 * SELECT xmltable.* 221 * FROM 222 * xmldata, 223 * 224 * LATERAL (SELECT data AS ".", 'not specified'::text AS "DPREMIER") AS p, 225 * 226 * "xmltable"('//ROWS/ROW', PASSING => p, COLUMNS => ARRAY[ 227 * 'data(@id)', null, 'COUNTRY_NAME', 228 * 'COUNTRY_ID', 'SIZE[@unit eq "sq_km"]', 229 * 'concat(SIZE[@unit ne "sq_km"], " ", SIZE[@unit ne "sq_km"]/@unit)', 230 * 'let $e := PREMIER_NAME 231 * return if ( empty($e) )then $DPREMIER else $e' 232 * ]) AS ( 233 * id int, ordinality int8, "COUNTRY_NAME" text, country_id text, 234 * size_sq_km float, size_other text, premier_name text 235 * ); 236 *</pre> 237 *<p> 238 * In the first column expression, without the {@code data()} function, the 239 * result would be a bare attribute node (one not enclosed in an XML element). 240 * Many implementations will accept a bare attribute as a column expression 241 * result, and simply assume the attribute's value is wanted, but it appears 242 * that a strict implementation of the spec must raise {@code err:XPTY0004} in 243 * such a case. This implementation is meant to be strict, so the attribute is 244 * wrapped in {@code data()} to extract and return its value. (See 245 * "About bare attribute nodes" in {@link #assignRowValues assignRowValues} 246 * for more explanation.) 247 *<p> 248 * The {@code DPREMIER} parameter passed from SQL to the XQuery expression is 249 * spelled in uppercase (and also, in the SQL expression supplying it, quoted), 250 * for the reasons explained above for the {@code xq_ret_content} function. 251 *<h3>XMLCAST</h3> 252 *<p> 253 * An ISO standard cast expression like 254 *<pre> 255 * XMLCAST(v AS wantedtype) 256 *</pre> 257 * can be rewritten with this idiom and the {@link #xmlcast xmlcast} function 258 * provided here: 259 *<pre> 260 * (SELECT r FROM (SELECT v) AS o, xmlcast(o) AS (r wantedtype)) 261 *</pre> 262 *<h2>XQuery regular-expression functions in ISO 9075-2 Foundations</h2> 263 * The methods {@link #like_regex like_regex}, 264 * {@link #occurrences_regex occurrences_regex}, 265 * {@link #position_regex position_regex}, 266 * {@link #substring_regex substring_regex}, and 267 * {@link #translate_regex translate_regex} provide, with slightly altered 268 * syntax, the ISO SQL predicate and functions of the same names. 269 *<p> 270 * For the moment, they will only match newlines in the way W3C XQuery 271 * specifies, not in the more-flexible Unicode-compatible way ISO SQL specifies, 272 * and for the ones where ISO SQL allows {@code USING CHARACTERS} or 273 * {@code USING OCTETS}, only {@code USING CHARACTERS} will work. 274 *<h2>Extensions</h2> 275 *<h3>XQuery module prolog allowed</h3> 276 *<p> 277 * Where any function here accepts an XQuery 278 *<a href='https://www.w3.org/TR/xquery-31/#id-expressions' 279 *>"expression"</a> according to the SQL specification, in fact an XQuery 280 *<a href='https://www.w3.org/TR/xquery-31/#dt-main-module' 281 *>"main module"</a> will be accepted. Therefore, the query can be preceded by 282 * a prolog declaring namespaces, options, local variables and functions, etc. 283 *<h3>Saxon extension to XQuery regular expressions</h3> 284 *<p> 285 * Saxon's implementation of XQuery regular expressions will accept a 286 * nonstandard <em>flag</em> string ending with {@code ;j} to use Java regular 287 * expressions rather than XQuery ones. That extension is available in the 288 * XQuery regular-expression methods provided here. 289 * @author Chapman Flack 290 */ 291public class S9 implements ResultSetProvider.Large 292{ 293 private S9( 294 XdmSequenceIterator<XdmItem> xsi, 295 XQueryEvaluator[] columnXQEs, 296 SequenceType[] columnStaticTypes, 297 XMLBinary enc) 298 { 299 m_sequenceIterator = xsi; 300 m_columnXQEs = columnXQEs; 301 m_columnStaticTypes = columnStaticTypes; 302 m_atomize = new AtomizingFunction [ columnStaticTypes.length ]; 303 m_xmlbinary = enc; 304 } 305 306 final XdmSequenceIterator<XdmItem> m_sequenceIterator; 307 final XQueryEvaluator[] m_columnXQEs; 308 final SequenceType[] m_columnStaticTypes; 309 final SequenceType s_01untypedAtomic = makeSequenceType( 310 ItemType.UNTYPED_ATOMIC, OccurrenceIndicator.ZERO_OR_ONE); 311 final AtomizingFunction[] m_atomize; 312 final XMLBinary m_xmlbinary; 313 Binding.Assemblage m_outBindings; 314 315 static final Connection s_dbc; 316 static final Processor s_s9p = new Processor(false); 317 static final ItemTypeFactory s_itf = new ItemTypeFactory(s_s9p); 318 319 static final Pattern s_intervalSigns; 320 static final Pattern s_intervalSignSite; 321 322 enum XMLBinary { HEX, BASE64 }; 323 enum Nulls { ABSENT, NIL }; 324 325 static 326 { 327 try 328 { 329 s_dbc = DriverManager.getConnection("jdbc:default:connection"); 330 331 /* 332 * XML Schema thinks an ISO 8601 duration must have no sign 333 * anywhere but at the very beginning before the P. PostgreSQL 334 * thinks that's the one place a sign must never be, and instead 335 * it should appear in front of every numeric field. (PostgreSQL 336 * accepts input where the signs vary, and there are cases where it 337 * cannot be normalized away: P1M-1D is a thing, and can't be 338 * simplified until anchored at a date to know how long the month 339 * is! The XML Schema type simply can't represent that, so mapping 340 * of such a value must simply fail, as we'll ensure below.) 341 * So, here's a regex with a capturing group for a leading -, and 342 * one for any field-leading -, and one for the absence of a field- 343 * leading -. Any PostgreSQL or XS duration ought to match overall, 344 * but the capturing group matches should be either (f,f,t) or 345 * (f,t,f) for a PostgreSQL duration, or either (f,f,t) or (t,f,t) 346 * for an XS duration. (f,t,t) would be a PostgreSQL interval with 347 * mixed signs, and inconvertible. 348 */ 349 s_intervalSigns = Pattern.compile( 350 "(-)?+(?:[PYMWDTH](?:(?:(-)|())\\d++)?+)++(?:(?:[.,]\\d*+)?+S)?+"); 351 /* 352 * To convert from the leading-sign form, need to find every spot 353 * where a digit follows a [PYMWDTH] to insert a - there. 354 */ 355 s_intervalSignSite = Pattern.compile("(?<=[PYMWDTH])(?=\\d)"); 356 } 357 catch ( SQLException e ) 358 { 359 throw new ExceptionInInitializerError(e); 360 } 361 } 362 363 static class PredefinedQueryHolders 364 { 365 static final XQueryCompiler s_xqc = s_s9p.newXQueryCompiler(); 366 static final QName s_qEXPR = new QName("EXPR"); 367 368 static class DocumentWrapUnwrap 369 { 370 static final XQueryExecutable INSTANCE; 371 372 static 373 { 374 try 375 { 376 INSTANCE = s_xqc.compile( 377 "declare construction preserve;" + 378 "declare variable $EXPR as item()* external;" + 379 "data(document{$EXPR}/child::node())"); 380 } 381 catch ( SaxonApiException e ) 382 { 383 throw new ExceptionInInitializerError(e); 384 } 385 } 386 } 387 } 388 389 /** 390 * PostgreSQL (as of 12) lacks the XMLTEXT function, so here it is. 391 *<p> 392 * As long as PostgreSQL does not have the {@code XML(SEQUENCE)} type, 393 * this can only be the {@code XMLTEXT(sve RETURNING CONTENT)} flavor, which 394 * does create a text node with {@code sve} as its value, but returns the 395 * text node wrapped in a document node. 396 *<p> 397 * This function doesn't actually require Saxon, but otherwise fits in with 398 * the theme here, implementing missing parts of SQL/XML for PostgreSQL. 399 * @param sve SQL string value to use in a text node 400 * @return XML content, the text node wrapped in a document node 401 */ 402 @Function(schema="javatest") 403 public static SQLXML xmltext(String sve) throws SQLException 404 { 405 SQLXML rx = s_dbc.createSQLXML(); 406 ContentHandler ch = rx.setResult(SAXResult.class).getHandler(); 407 408 try 409 { 410 ch.startDocument(); 411 /* 412 * It seems XMLTEXT() should be such a trivial function to write, 413 * but already it reveals a subtlety in the SAX API docs. They say 414 * the third argument to characters() is "the number of characters 415 * to read from the array" and that follows a long discussion of how 416 * individual characters can (with code points above U+FFFF) consist 417 * of more than one Java char value. 418 * 419 * And yet, when you try it out (and include some characters above 420 * U+FFFF in the input), you discover the third argument isn't the 421 * number of characters, has to be the number of Java char values. 422 */ 423 ch.characters(sve.toCharArray(), 0, sve.length()); 424 ch.endDocument(); 425 } 426 catch ( SAXException e ) 427 { 428 rx.free(); 429 throw new SQLException(e.getMessage(), e); 430 } 431 432 return rx; 433 } 434 435 /** 436 * An implementation of XMLCAST. 437 *<p> 438 * Will be declared to take and return type {@code RECORD}, where each must 439 * have exactly one component, just because that makes it easy to use 440 * existing JDBC metadata queries to find out the operand and target SQL 441 * data types. 442 *<p> 443 * Serving suggestion: rewrite this ISO standard expression 444 *<pre> 445 * XMLCAST(v AS wantedtype) 446 *</pre> 447 * to this idiomatic one: 448 *<pre> 449 * (SELECT r FROM (SELECT v) AS o, xmlcast(o) AS (r wantedtype)) 450 *</pre> 451 * @param operand a one-row, one-column record supplied by the caller, whose 452 * one typed value is the operand to be cast. 453 * @param base64 true if binary SQL values should be base64-encoded in XML; 454 * if false (the default), values will be encoded in hex. 455 * @param target a one-row, one-column record supplied by PL/Java from the 456 * {@code AS} clause after the function call, whose one column's type is the 457 * type to be cast to. 458 */ 459 @Function( 460 schema="javatest", 461 type="pg_catalog.record", 462 onNullInput=CALLED, 463 settings="IntervalStyle TO iso_8601" 464 ) 465 public static boolean xmlcast( 466 ResultSet operand, @SQLType(defaultValue="false") Boolean base64, 467 ResultSet target) 468 throws SQLException 469 { 470 if ( null == operand ) 471 throw new SQLDataException( 472 "xmlcast \"operand\" must be (in this implementation) " + 473 "a non-null row type", "22004"); 474 475 if ( null == base64 ) 476 throw new SQLDataException( 477 "xmlcast \"base64\" must be true or false, not null", "22004"); 478 XMLBinary enc = base64 ? XMLBinary.BASE64 : XMLBinary.HEX; 479 480 assert null != target : "PL/Java supplied a null output record???"; 481 482 if ( 1 != operand.getMetaData().getColumnCount() ) 483 throw new SQLDataException( 484 "xmlcast \"operand\" must be a row type with exactly " + 485 "one component", "22000"); 486 487 if ( 1 != target.getMetaData().getColumnCount() ) 488 throw new SQLDataException( 489 "xmlcast \"target\" must be a row type with exactly " + 490 "one component", "22000"); 491 492 Binding.Parameter op = 493 new BindingsFromResultSet(operand, false).iterator().next(); 494 495 Binding.Parameter tg = 496 new BindingsFromResultSet(target, null).iterator().next(); 497 498 int sd = op.typeJDBC(); 499 int td = tg.typeJDBC(); 500 501 int castcase = 502 (Types.SQLXML == sd ? 2 : 0) | (Types.SQLXML == td ? 1 : 0); 503 504 switch ( castcase ) 505 { 506 case 0: // neither sd nor td is an XML type 507 throw new SQLSyntaxErrorException( 508 "at least one of xmlcast \"operand\" or \"target\" must " + 509 "be of XML type", "42804"); 510 case 3: // both XML 511 /* 512 * In an implementation closely following the spec, this case would 513 * be handled in parse analysis and rewritten from an XMLCAST to a 514 * plain CAST, and this code would never see it. This is a plain 515 * example function without benefit of a parser that can do that. 516 * In a DBMS with all the various SQL:2006 XML subtypes, there would 517 * be nontrivial work to do here, but casting from PostgreSQL's one 518 * XML type to itself is more of a warm-up exercise. 519 */ 520 target.updateSQLXML(1, operand.getSQLXML(1)); 521 return true; 522 case 1: // something non-XML being cast to XML 523 assertCanCastAsXmlSequence(sd, "operand"); 524 Object v = op.valueJDBC(); 525 if ( null == v ) 526 { 527 target.updateNull(1); 528 return true; 529 } 530 ItemType xsbt = 531 mapSQLDataTypeToXMLSchemaDataType(op, enc, Nulls.ABSENT); 532 Iterator<XdmItem> tv = 533 xmlCastAsSequence(v, enc, xsbt).iterator(); 534 try 535 { 536 target.updateSQLXML(1, 537 returnContent(tv, /*nullOnEmpty*/ false)); 538 } 539 catch ( SaxonApiException | XPathException e ) 540 { 541 throw new SQLException(e.getMessage(), "10000", e); 542 } 543 return true; 544 case 2: // XML being cast to something non-XML 545 assertCanCastAsXmlSequence(td, "target"); 546 SQLXML sx = operand.getSQLXML(1); 547 if ( null == sx ) 548 { 549 target.updateNull(1); 550 return true; 551 } 552 DocumentBuilder dBuilder = s_s9p.newDocumentBuilder(); 553 Source source = sx.getSource(null); 554 try 555 { 556 XdmValue xv = dBuilder.build(source); 557 XQueryEvaluator xqe = 558 PredefinedQueryHolders.DocumentWrapUnwrap.INSTANCE.load(); 559 xqe.setExternalVariable(PredefinedQueryHolders.s_qEXPR, xv); 560 xv = xqe.evaluate(); 561 /* 562 * It's zero-or-one, or XPTY0004 was thrown here. 563 */ 564 if ( 0 == xv.size() ) 565 { 566 target.updateNull(1); 567 return true; 568 } 569 XdmAtomicValue av = (XdmAtomicValue)xv; 570 xmlCastAsNonXML( 571 av, ItemType.UNTYPED_ATOMIC, tg, target, 1, enc); 572 } 573 catch ( SaxonApiException | XPathException e ) 574 { 575 throw new SQLException(e.getMessage(), "10000", e); 576 } 577 return true; 578 } 579 580 throw new SQLFeatureNotSupportedException( 581 "cannot yet xmlcast from " + op.typePG() + 582 " to " + tg.typePG(), "0A000"); 583 } 584 585 /** 586 * A simple example corresponding to {@code XMLQUERY(expression 587 * PASSING BY VALUE passing RETURNING CONTENT {NULL|EMPTY} ON EMPTY)}. 588 * @param expression An XQuery expression. Must not be {@code null} (in the 589 * SQL standard {@code XMLQUERY} syntax, it is not even allowed to be an 590 * SQL expression at all, only a string literal). 591 * @param nullOnEmpty pass {@code true} to get a null return in place of 592 * an empty sequence, or {@code false} to just get the empty sequence. 593 * @param passing A row value whose columns will be supplied to the query 594 * as parameters. Columns with names (typically supplied with {@code AS}) 595 * appear as predeclared external variables with matching names (in no 596 * namespace) in the query, with types derived from the SQL types of the 597 * row value's columns. There may be one (and no more than one) 598 * column with {@code AS "."} which, if present, will be bound as the 599 * context item. (The name {@code ?column?}, which PostgreSQL uses for an 600 * otherwise-unnamed column, is also accepted, which will often allow the 601 * context item to be specified with no {@code AS} at all. Beware, though, 602 * that PostgreSQL likes to invent column names from any function or type 603 * name that may appear in the value expression, so this shorthand will not 604 * always work, while {@code AS "."} will.) PL/Java's internal JDBC uppercases all column 605 * names, so any uses of the corresponding variables in the query must have 606 * the names in upper case. It is safest to also uppercase their appearances 607 * in the SQL (for which, in PostgreSQL, they must be quoted), so that the 608 * JDBC uppercasing is not being relied on. It is likely to be dropped in a 609 * future PL/Java release. 610 * @param namespaces An even-length String array where, of each pair of 611 * consecutive entries, the first is a namespace prefix and the second is 612 * the URI to which to bind it. The zero-length prefix sets the default 613 * element and type namespace; if the prefix has zero length, the URI may 614 * also have zero length, to declare that unprefixed elements are in no 615 * namespace. 616 */ 617 @Function( 618 schema="javatest", 619 onNullInput=CALLED, 620 settings="IntervalStyle TO iso_8601" 621 ) 622 public static SQLXML xq_ret_content( 623 String expression, Boolean nullOnEmpty, 624 @SQLType(defaultValue={}) ResultSet passing, 625 @SQLType(defaultValue={}) String[] namespaces) 626 throws SQLException 627 { 628 /* 629 * The expression itself may not be null (in the standard, it isn't 630 * even allowed to be dynamic, and can only be a string literal!). 631 */ 632 if ( null == expression ) 633 throw new SQLDataException( 634 "XMLQUERY expression may not be null", "22004"); 635 636 if ( null == nullOnEmpty ) 637 throw new SQLDataException( 638 "XMLQUERY nullOnEmpty may not be null", "22004"); 639 640 try 641 { 642 XdmSequenceIterator<XdmItem> x1 = 643 evalXQuery(expression, passing, namespaces); 644 return null == x1 ? null : returnContent(x1, nullOnEmpty); 645 } 646 catch ( SaxonApiException | XPathException e ) 647 { 648 throw new SQLException(e.getMessage(), "10000", e); 649 } 650 } 651 652 /** 653 * An implementation of {@code XMLEXISTS(expression 654 * PASSING BY VALUE passing)}, using genuine XQuery. 655 * @param expression An XQuery expression. Must not be {@code null} (in the 656 * SQL standard {@code XMLQUERY} syntax, it is not even allowed to be an 657 * SQL expression at all, only a string literal). 658 * @param passing A row value whose columns will be supplied to the query 659 * as parameters. Columns with names (typically supplied with {@code AS}) 660 * appear as predeclared external variables with matching names (in no 661 * namespace) in the query, with types derived from the SQL types of the 662 * row value's columns. There may be one (and no more than one) 663 * column with {@code AS "."} which, if present, will be bound as the 664 * context item. (The name {@code ?column?}, which PostgreSQL uses for an 665 * otherwise-unnamed column, is also accepted, which will often allow the 666 * context item to be specified with no {@code AS} at all. Beware, though, 667 * that PostgreSQL likes to invent column names from any function or type 668 * name that may appear in the value expression, so this shorthand will not 669 * always work, while {@code AS "."} will.) PL/Java's internal JDBC uppercases all column 670 * names, so any uses of the corresponding variables in the query must have 671 * the names in upper case. It is safest to also uppercase their appearances 672 * in the SQL (for which, in PostgreSQL, they must be quoted), so that the 673 * JDBC uppercasing is not being relied on. It is likely to be dropped in a 674 * future PL/Java release. 675 * @param namespaces An even-length String array where, of each pair of 676 * consecutive entries, the first is a namespace prefix and the second is 677 * the URI to which to bind it. The zero-length prefix sets the default 678 * element and type namespace; if the prefix has zero length, the URI may 679 * also have zero length, to declare that unprefixed elements are in no 680 * namespace. 681 * @return True if the expression evaluates to a nonempty sequence, false if 682 * it evaluates to an empty one. Null if a context item is passed and its 683 * SQL value is null. 684 */ 685 @Function( 686 schema="javatest", 687 onNullInput=CALLED, 688 settings="IntervalStyle TO iso_8601" 689 ) 690 public static Boolean xmlexists( 691 String expression, 692 @SQLType(defaultValue={}) ResultSet passing, 693 @SQLType(defaultValue={}) String[] namespaces) 694 throws SQLException 695 { 696 /* 697 * The expression itself may not be null (in the standard, it isn't 698 * even allowed to be dynamic, and can only be a string literal!). 699 */ 700 if ( null == expression ) 701 throw new SQLDataException( 702 "XMLEXISTS expression may not be null", "22004"); 703 704 XdmSequenceIterator<XdmItem> x1 = 705 evalXQuery(expression, passing, namespaces); 706 if ( null == x1 ) 707 return null; 708 if ( ! x1.hasNext() ) 709 return false; 710 x1.close(); 711 return true; 712 } 713 714 /** 715 * Implementation factor of XMLEXISTS and XMLQUERY. 716 * @return null if a context item is passed and its SQL value is null 717 */ 718 private static XdmSequenceIterator<XdmItem> evalXQuery( 719 String expression, ResultSet passing, String[] namespaces) 720 throws SQLException 721 { 722 Binding.Assemblage bindings = new BindingsFromResultSet(passing, true); 723 724 try 725 { 726 XQueryCompiler xqc = createStaticContextWithPassedTypes( 727 bindings, namespaceBindings(namespaces)); 728 729 XQueryEvaluator xqe = xqc.compile(expression).load(); 730 731 if ( storePassedValuesInDynamicContext(xqe, bindings, true) ) 732 return null; 733 734 /* 735 * For now, punt on whether the <XQuery expression> is evaluated 736 * with XML 1.1 or 1.0 lexical rules.... XXX 737 */ 738 return xqe.iterator(); 739 } 740 catch ( SaxonApiException | XPathException e ) 741 { 742 throw new SQLException(e.getMessage(), "10000", e); 743 } 744 } 745 746 /** 747 * Perform the final steps of <em>something</em> {@code RETURNING CONTENT}, 748 * with or without {@code nullOnEmpty}. 749 *<p> 750 * The effects are to be the same as if the supplied sequence were passed 751 * as {@code $EXPR} to {@code document{$EXPR}}. 752 */ 753 private static SQLXML returnContent( 754 Iterator<XdmItem> x, boolean nullOnEmpty) 755 throws SQLException, SaxonApiException, XPathException 756 { 757 if ( nullOnEmpty && ! x.hasNext() ) 758 return null; 759 760 SQLXML rsx = s_dbc.createSQLXML(); 761 /* 762 * Keep this simple by requesting a specific type of Result rather 763 * than letting PL/Java choose. It happens (though this is a detail of 764 * the implementation) that SAXResult won't be a bad choice. 765 */ 766 SAXResult sr = rsx.setResult(SAXResult.class); 767 /* 768 * Michael Kay recommends the following as equivalent to the SQL/XML- 769 * mandated behavior of evaluating document{$x}. 770 * https://sourceforge.net/p/saxon/mailman/message/36969060/ 771 */ 772 SAXDestination d = new SAXDestination(sr.getHandler()); 773 Receiver r = d.getReceiver( 774 s_s9p.getUnderlyingConfiguration().makePipelineConfiguration(), 775 new SerializationProperties()); 776 r.open(); 777 while ( x.hasNext() ) 778 r.append(x.next().getUnderlyingValue()); 779 r.close(); 780 return rsx; 781 } 782 783 /** 784 * An implementation of (much of) XMLTABLE, using genuine XML Query. 785 *<p> 786 * The {@code columns} array must supply a valid XML Query expression for 787 * every column in the column definition list that follows the call of this 788 * function in SQL, except that the column for ordinality, if wanted, is 789 * identified by a {@code null} entry in {@code columns}. Syntax sugar in 790 * the standard allows an omitted column expression to imply an element test 791 * for an element with the same name as the column; that doesn't work here. 792 *<p> 793 * For now, this implementation lacks the ability to specify defaults for 794 * when a column expression produces an empty sequence. It is possible to 795 * do defaults explicitly by rewriting a query expression <em>expr</em> as 796 * {@code let $e := }<em>expr</em>{@code return if(empty($e))then $D else $e} 797 * and supplying the default <em>D</em> as another query parameter, though 798 * such defaults will be evaluated only once when {@code xmltable} is called 799 * and will not be able to refer to other values in an output row. 800 * @param rows The single XQuery expression whose result sequence generates 801 * the rows of the resulting table. Must not be null. 802 * @param columns Array of XQuery expressions, exactly as many as result 803 * columns in the column definition list that follows the SQL call to this 804 * function. This array must not be null. It is allowed for one element (and 805 * no more than one) to be null, marking the corresponding column to be 806 * "FOR ORDINALITY" (the column must be of "exact numeric with scale zero" 807 * type; PostgreSQL supports 64-bit row counters, so {@code int8} is 808 * recommended). 809 * @param passing A row value whose columns will be supplied to the query 810 * as parameters, just as described for 811 * {@link #xq_ret_content xq_ret_content()}. If a context item is supplied, 812 * it is the context item for the {@code rows} query (the {@code columns} 813 * queries get their context item from the {@code rows} query's result). Any 814 * named parameters supplied here are available both in the {@code rows} 815 * expression and (though this goes beyond the standard) in every expression 816 * of {@code columns}, with their values unchanging from row to row. 817 * @param namespaces An even-length String array where, of each pair of 818 * consecutive entries, the first is a namespace prefix and the second is 819 * to URI to which to bind it, just as described for 820 * {@link #xq_ret_content xq_ret_content()}. 821 * @param base64 whether the effective, in-scope 'xmlbinary' setting calls 822 * for base64 or (the default, false) hexadecimal. 823 */ 824 @Function( 825 schema="javatest", 826 onNullInput=CALLED, 827 settings="IntervalStyle TO iso_8601" 828 ) 829 public static ResultSetProvider xmltable( 830 String rows, String[] columns, 831 @SQLType(defaultValue={}) ResultSet passing, 832 @SQLType(defaultValue={}) String[] namespaces, 833 @SQLType(defaultValue="false") Boolean base64) 834 throws SQLException 835 { 836 if ( null == rows ) 837 throw new SQLDataException( 838 "XMLTABLE row expression may not be null", "22004"); 839 840 if ( null == columns ) 841 throw new SQLDataException( 842 "XMLTABLE columns expression array may not be null", "22004"); 843 844 if ( null == base64 ) 845 throw new SQLDataException( 846 "XMLTABLE base64 parameter may not be null", "22004"); 847 XMLBinary enc = base64 ? XMLBinary.BASE64 : XMLBinary.HEX; 848 849 Binding.Assemblage rowBindings = 850 new BindingsFromResultSet(passing, true); 851 852 Iterable<Map.Entry<String,String>> namespacepairs = 853 namespaceBindings(namespaces); 854 855 XQueryEvaluator[] columnXQEs = new XQueryEvaluator[ columns.length ]; 856 SequenceType[] columnStaticTypes = new SequenceType[ columns.length ]; 857 858 try 859 { 860 XQueryCompiler rowXQC = createStaticContextWithPassedTypes( 861 rowBindings, namespacepairs); 862 863 XQueryExecutable rowXQX = rowXQC.compile(rows); 864 865 Binding.Assemblage columnBindings = 866 new BindingsFromXQX(rowXQX, rowBindings); 867 868 XQueryCompiler columnXQC = createStaticContextWithPassedTypes( 869 columnBindings, namespacepairs); 870 871 boolean ordinalitySeen = false; 872 for ( int i = 0; i < columns.length; ++ i ) 873 { 874 String expr = columns[i]; 875 if ( null == expr ) 876 { 877 if ( ordinalitySeen ) 878 throw new SQLSyntaxErrorException( 879 "No more than one column expression may be null " + 880 "(=> \"for ordinality\")", "42611"); 881 ordinalitySeen = true; 882 continue; 883 } 884 XQueryExecutable columnXQX = columnXQC.compile(expr); 885 columnStaticTypes[i] = makeSequenceType( 886 columnXQX.getResultItemType(), 887 columnXQX.getResultCardinality()); 888 columnXQEs[i] = columnXQX.load(); 889 storePassedValuesInDynamicContext( 890 columnXQEs[i], columnBindings, false); 891 } 892 893 XQueryEvaluator rowXQE = rowXQX.load(); 894 XdmSequenceIterator<XdmItem> rowIterator; 895 if ( storePassedValuesInDynamicContext(rowXQE, rowBindings, true) ) 896 rowIterator = (XdmSequenceIterator<XdmItem>) 897 XdmEmptySequence.getInstance().iterator(); 898 else 899 rowIterator = rowXQE.iterator(); 900 return new S9(rowIterator, columnXQEs, columnStaticTypes, enc); 901 } 902 catch ( SaxonApiException | XPathException e ) 903 { 904 throw new SQLException(e.getMessage(), "10000", e); 905 } 906 } 907 908 /** 909 * Called when PostgreSQL has no need for more rows of the tabular result. 910 */ 911 @Override 912 public void close() 913 { 914 m_sequenceIterator.close(); 915 } 916 917 /** 918 * <a id='assignRowValues'>Produce and return one row</a> of 919 * the {@code XMLTABLE} result table per call. 920 *<p> 921 * The row expression has already been compiled and its evaluation begun, 922 * producing a sequence iterator. The column XQuery expressions have all 923 * been compiled and are ready to evaluate, and the compiler's static 924 * analysis has bounded the data types they will produce. Because of the 925 * way the set-returning function protocol works, we don't know the types 926 * of the SQL output columns yet, until the first call of this function, 927 * when the {@code receive} parameter's {@code ResultSetMetaData} can be 928 * inspected to find out. So that will be the first thing done when called 929 * with {@code currentRow} of zero. 930 *<p> 931 * Each call will then: (a) get the next value from the row expression's 932 * sequence iterator, then for each column, (b) evaluate that column's 933 * XQuery expression on the row value, and (c) assign that column's result 934 * to the SQL output column, casting to the proper type (which the SQL/XML 935 * spec has very exacting rules on how to do). 936 *<p> 937 * A note before going any further: this implementation, while fairly 938 * typical of a PostgreSQL set-returning user function, is <em>not</em> the 939 * way the SQL/XML spec defines {@code XMLTABLE}. The official behavior of 940 * {@code XMLTABLE} is defined in terms of a rewriting, at the SQL level, 941 * into a much-expanded SQL query where each result column appears as an 942 * {@code XMLQUERY} call applying the column expression, wrapped in an 943 * {@code XMLCAST} to the result column type (with a 944 * {@code CASE WHEN XMLEXISTS} thrown in to support column defaults). 945 *<p> 946 * As an ordinary user function, this example cannot rely on any fancy 947 * query rewriting during PostgreSQL's parse analysis. The slight syntax 948 * desugaring needed to transform a standard {@code XMLTABLE} call into a 949 * call of this "xmltable" is not too hard to learn and do by hand, but no 950 * one would ever want to write out by hand the whole longwinded "official" 951 * expansion prescribed in the spec. So this example is a compromise. 952 *<p> 953 * The main thing lost in the compromise is the handling of column defaults. 954 * The full rewriting with per-column SQL expressions means that each 955 * column default expression can be evaluated exactly when/if needed, which 956 * is often the desired behavior. This implementation as an ordinary 957 * function, whose arguments all get evaluated ahead of the call, can't 958 * really do that. Otherwise, there's nothing in the spec that's inherently 959 * unachievable in this implementation. 960 *<p> 961 * Which brings us to the matter of casting each column expression result 962 * to the proper type for its SQL result column. 963 *<p> 964 * Like any spec, {@code SQL/XML} does not mandate that an implementation 965 * must be done in exactly the way presented in the spec (rewritten so each 966 * column value is produced by an {@code XMLQUERY} wrapped in an 967 * {@code XMLCAST}). The requirement is to produce the equivalent result. 968 *<p> 969 * A look at the rewritten query shows that each column XQuery result value 970 * must be representable as some value in SQL's type system, not once, but 971 * twice: first as the result returned by {@code XMLQUERY} and passed along 972 * to {@code XMLCAST}, and finally with the output column's type as the 973 * result of the {@code XMLCAST}. 974 *<p> 975 * Now, the output column type can be whatever is wanted. Importantly, it 976 * can be either an XML type, or any ordinary SQL scalar type, like a 977 * {@code float} or a {@code date}. Likewise, the XQuery column expression 978 * may have produced some atomic value (like an {@code xs:double} or 979 * {@code xs:date}), or some XML node, or any sequence of any of those. 980 *<p> 981 * What are the choices for the type in the middle: the SQL value returned 982 * by {@code XMLQUERY} and passed on to {@code XMLCAST}? 983 *<p> 984 * There are two. An ISO-standard SQL {@code XMLQUERY} can specify 985 * {@code RETURNING SEQUENCE} or {@code RETURNING CONTENT}. The first option 986 * produces the type {@code XML(SEQUENCE)}, a useful type that PostgreSQL 987 * does not currently have. {@code XML(SEQUENCE)} can hold exactly whatever 988 * an XQuery expression can produce: a sequence of any length, of any 989 * mixture of atomic values and XML nodes (even such oddities as attribute 990 * nodes outside of any element), in any order. An {@code XML(SEQUENCE)} 991 * value need not look anything like what "XML" normally brings to mind. 992 *<p> 993 * With the other option, {@code RETURNING CONTENT}, the result of 994 * {@code XMLQUERY} has to be something that PostgreSQL's {@code xml} type 995 * could store: a serialized document with XML structure, but without the 996 * strict requirements of exactly one root element with no text outside it. 997 * At the limit, a completely non-XMLish string of ordinary text is 998 * perfectly acceptable XML {@code CONTENT}, as long as it uses the right 999 * {@code &...;} escapes for any characters that could look like XML markup. 1000 *<p> 1001 * {@code XMLCAST} is able to accept either form as input, and deliver it 1002 * to the output column as whatever type is needed. But the spec leaves no 1003 * wiggle room as to which form to use: 1004 *<ul> 1005 *<li>If the result column type is {@code XML(SEQUENCE)}, then the 1006 * {@code XMLQUERY} is to specify {@code RETURNING SEQUENCE}. It produces 1007 * the column's result type directly, so the {@code XMLCAST} has nothing 1008 * to do. 1009 *<li>In every other case (<em>every</em> other case), the {@code XMLQUERY} 1010 * is to specify {@code RETURNING CONTENT}. 1011 *</ul> 1012 *<p> 1013 * At first blush, that second rule should sound crazy. Imagine a column 1014 * definition like 1015 *<pre> 1016 * growth float8 PATH 'math:pow(1.0 + $RATE, count(year))' 1017 *</pre> 1018 * The expression produces an {@code xs:double}, which can be assigned 1019 * directly to a PostgreSQL {@code float8}, but the rule in the spec will 1020 * have it first converted to a decimal string representation, made into 1021 * a text node, wrapped in a document node, and returned as XML, to be 1022 * passed along to {@code XMLCAST}, which parses it, discards the wrapping 1023 * document node, parses the text content as a double, and returns that as 1024 * a proper value of the result column type (which, in this example, it 1025 * already is). 1026 *<p> 1027 * The spec does not go into why this rule was chosen. The only rationale 1028 * that makes sense to me is that the {@code XML(SEQUENCE)} data type 1029 * is an SQL feature (X190) that not every implementation will support, 1030 * so the spec has to define {@code XMLTABLE} using a rewritten query that 1031 * can work on systems that do not have that type. (PostgreSQL itself, at 1032 * present, does not have it.) 1033 *<p> 1034 * The first rule, when {@code XML(SEQUENCE)} is the result column type, 1035 * will naturally never be in play except on a system that has that type, in 1036 * which case it can be used directly. But even such a system must still 1037 * produce, in all other cases, results that match what a system without 1038 * that type would produce. All those cases are therefore defined as if 1039 * going the long way through {@code XML(CONTENT)}. 1040 *<p> 1041 * Whenever the XQuery expression can be known to produce a (possibly empty 1042 * or) singleton sequence of an atomic type, the long round trip can be 1043 * shown to be idempotent, and we can skip right to casting the atomic type 1044 * to the SQL result column type. A few other cases could be short-circuited 1045 * the same way. But in general, for cases involving nodes or non-singleton 1046 * sequences, it is safest to follow the spec punctiliously; the steps are 1047 * defined in terms of XQuery constructs like {@code document {...}} and 1048 * {@code data()}, which have specs of their own with many traps for the 1049 * unwary, and the XQuery library provides implementations of them that are 1050 * already tested and correct. 1051 *<p> 1052 * Though most of the work can be done by the XQuery library, it may be 1053 * helpful to look closely at just what the specification entails. 1054 *<p> 1055 * Again, but for the case of an {@code XML(SEQUENCE)} result column, in all 1056 * other cases the result must pass through 1057 * {@code XMLQUERY(... RETURNING CONTENT EMPTY ON EMPTY)}. That, in turn, is 1058 * defined as equivalent to {@code XMLQUERY(... RETURNING SEQUENCE)} with 1059 * the result then passed to {@code XMLDOCUMENT(... RETURNING CONTENT)}, 1060 * whose behavior is that of a 1061 * <a href='https://www.w3.org/TR/xquery-31/#id-documentConstructors'> 1062 * document node constructor</a> in XQuery, with 1063 * <a href='https://www.w3.org/TR/xquery-31/#dt-construction-mode'> 1064 * construction mode</a> {@code preserve}. The first step of that behavior 1065 * is the same as Step 1e in the processing of 1066 * <a href='https://www.w3.org/TR/xquery-31/#id-content'>direct element 1067 * constructor content</a>. The remaining steps are those laid out for the 1068 * document node constructor. 1069 *<p> 1070 * Clarity demands flattening this nest of specifications into a single 1071 * ordered list of the steps to apply: 1072 *<ul> 1073 *<li>Any item in the sequence that is an array is flattened (its elements 1074 * become items in the sequence). 1075 *<li>If any item is a function, {@code err:XQTY0105} is raised. 1076 *<li>Any sequence {@code $s} of adjacent atomic values is replaced by 1077 * {@code string-join($s, ' ')}. 1078 *<li>Any XML node in the sequence is copied (as detailed in the spec). 1079 *<li>After all the above, any document node that may exist in the resulting 1080 * sequence is flattened (replaced by its children). 1081 *<li>A single text node is produced for any run of adjacent text nodes in 1082 * the sequence (including any that have newly become adjacent by the 1083 * flattening of document nodes), by concatenation with no separator (unlike 1084 * the earlier step where atomic values were concatenated with a space as 1085 * the separator). 1086 *<li>If the sequence directly contains any attribute or namespace node, 1087 * {@code err:XPTY0004} is raised. <b>More on this below.</b> 1088 *<li>The sequence resulting from the preceding steps is wrapped in one 1089 * new document node (as detailed in the spec). 1090 *</ul> 1091 *<p> 1092 * At this point, the result could be returned to SQL as a value of 1093 * {@code XML(CONTENT(ANY))} type, to be passed to an {@code XMLCAST} 1094 * invocation. This implementation avoids that, and simply proceeds with the 1095 * existing Java in-memory representation of the document tree, to the 1096 * remaining steps entailed in an {@code XMLCAST} to the output column type: 1097 *<ul> 1098 *<li>If the result column type is an XML type, rewriting would turn the 1099 * {@code XMLCAST} into a simple {@code CAST} and that's that. Otherwise, 1100 * the result column has some non-XML, SQL type, and: 1101 *<li>The algorithm "Removing XQuery document nodes from an XQuery sequence" 1102 * is applied. By construction, we know the only such node is the one the 1103 * whole sequence was recently wrapped in, two steps ago (you get your 1104 * house back, you get your dog back, you get your truck back...). 1105 *<li>That sequence of zero or more XML nodes is passed to the 1106 *<a href='https://www.w3.org/TR/xpath-functions-31/#func-data'>fn:data</a> 1107 * function, producing a sequence of zero or more atomic values, which will 1108 * all have type {@code xs:untypedAtomic} (because the document-wrapping 1109 * stringified any original atomic values and wrapped them in text nodes, 1110 * for which the 1111 * <a href='https://www.w3.org/TR/xpath-datamodel-31/#acc-summ-typed-value'> 1112 * typed-value</a> is {@code xs:untypedAtomic} by definition). This sequence 1113 * also has cardinality zero-or-more, and may be shorter or longer than the 1114 * original. 1115 *<li>If the sequence is empty, the result column is assigned {@code NULL} 1116 * (or the column's default value, if one was specified). Otherwise, the 1117 * sequence is known to have length one or more, and: 1118 *<li>The spec does not say this (which may be an oversight or bug), but the 1119 * sequence must be checked for length greater than one, raising 1120 * {@code err:XPTY0004} in that case. The following steps require it to be a 1121 * singleton. 1122 *<li>It is labeled as a singleton sequence of {@code xs:anyAtomicType} and 1123 * used as input to an XQuery {@code cast as} expression. (Alternatively, it 1124 * could be labeled a one-or-more sequence of {@code xs:anyAtomicType}, 1125 * leaving the length check to be done by {@code cast as}, which would raise 1126 * the same error {@code err:XPTY0004}, if longer than one.) 1127 *<li>The {@code cast as} is to the XQuery type determined as in 1128 * {@code determineXQueryFormalType} below, based on the SQL type of the 1129 * result column; or, if the SQL type is a date/time type with no time zone, 1130 * there is a first {@code cast as} to a specific XSD date/time type, which 1131 * is (if it has a time zone) first adjusted to UTC, then stripped of its 1132 * time zone, followed by a second {@code cast as} from that type to the one 1133 * determined from the result column type. Often, that will be the same type 1134 * as was used for the time zone adjustment, and the second {@code cast as} 1135 * will have nothing to do. 1136 *<li>The XQuery value resulting from the cast is converted and assigned to 1137 * the SQL-typed result column, a step with many details but few surprises, 1138 * therefore left for the morbidly curious to explore in the code. The flip 1139 * side of the time zone removal described above happens here: if the SQL 1140 * column type expects a time zone and the incoming value lacks one, it is 1141 * given a zone of UTC. 1142 *</ul> 1143 *<p> 1144 * The later steps above, those following the length-one check, are 1145 * handled by {@code xmlCastAsNonXML} below. 1146 *<p> 1147 * The earlier steps, from the start through the {@code XMLCAST} early steps 1148 * of document-node unwrapping, can all be applied by letting the original 1149 * result sequence be {@code $EXPR} in the expression: 1150 *<pre> 1151 * declare construction preserve; 1152 * data(document { $EXPR } / child::node()) 1153 *</pre> 1154 * which may seem a bit of an anticlimax after seeing how many details lurk 1155 * behind those tidy lines of code. 1156 *<p> 1157 * <strong>About bare attribute nodes</strong> 1158 *<p> 1159 * One consequence of the rules above deserves special attention. 1160 * Consider something like: 1161 *<pre> 1162 * XMLTABLE('.' PASSING '<a foo="bar"/>' COLUMNS c1 VARCHAR PATH 'a/@foo'); 1163 *</pre> 1164 *<p> 1165 * The result of the column expression is an XML attribute node all on its 1166 * own, with name {@code foo} and value {@code bar}, not enclosed in any 1167 * XML element. In the data type {@code XML(SEQUENCE)}, an attribute node 1168 * can appear standalone like that, but not in {@code XML(CONTENT)}. 1169 *<p> 1170 * Db2, Oracle, and even the XPath-based pseudo-XMLTABLE built into 1171 * PostgreSQL, will all accept that query and produce the result "bar". 1172 *<p> 1173 * However, a strict interpretation of the spec cannot produce that result, 1174 * because the result column type ({@code VARCHAR}) is not 1175 * {@code XML(SEQUENCE)}, meaning the result must be as if passed through 1176 * {@code XMLDOCUMENT(... RETURNING CONTENT)}, and the XQuery 1177 * {@code document { ... }} constructor is required to raise 1178 * {@code err:XPTY0004} upon encountering any bare attribute node. The 1179 * apparently common, convenient behavior of returning the attribute node's 1180 * value component is not, strictly, conformant. 1181 *<p> 1182 * This implementation will raise {@code err:XPTY0004}. That can be avoided 1183 * by simply wrapping any such bare attribute in {@code data()}: 1184 *<pre> 1185 * ... COLUMNS c1 VARCHAR PATH 'a/data(@foo)'); 1186 *</pre> 1187 *<p> 1188 * It is possible the spec has an editorial mistake and did not intend to 1189 * require an error for this usage, in which case this implementation can 1190 * be changed to match a future clarification of the spec. 1191 */ 1192 @Override 1193 public boolean assignRowValues(ResultSet receive, long currentRow) 1194 throws SQLException 1195 { 1196 if ( 0 == currentRow ) 1197 { 1198 m_outBindings = new BindingsFromResultSet(receive, m_columnXQEs); 1199 int i = -1; 1200 AtomizingFunction atomizer = null; 1201 for ( Binding.Parameter p : m_outBindings ) 1202 { 1203 SequenceType staticType = m_columnStaticTypes [ ++ i ]; 1204 /* 1205 * A null in m_columnXQEs identifies the ORDINALITY column, 1206 * if any. Assign nothing to m_atomize[i], it won't be used. 1207 */ 1208 if ( null == m_columnXQEs [ i ] ) 1209 continue; 1210 1211 if ( Types.SQLXML == p.typeJDBC() ) 1212 continue; 1213 1214 /* 1215 * Ok, the output column type is non-XML; choose an atomizer, 1216 * either a simple identity if the result type is statically 1217 * known to be zero-or-one atomic, or the long way through the 1218 * general-purpose one. If the type is statically known to be 1219 * the empty sequence (weird, but not impossible), the identity 1220 * atomizer suffices and we're on to the next column. 1221 */ 1222 OccurrenceIndicator occur = staticType.getOccurrenceIndicator(); 1223 if ( OccurrenceIndicator.ZERO == occur ) 1224 { 1225 m_atomize [ i ] = (v, col) -> v; 1226 continue; 1227 } 1228 1229 /* So, it isn't known to be empty. If the column 1230 * expression type isn't known to be atomic, or isn't known to 1231 * be zero-or-one, then the general-purpose atomizer--a trip 1232 * through data(document { ... } / child::node())--must be used. 1233 * This atomizer will definitely produce a sequence of length 1234 * zero or one, raising XPTY0004 otherwise. So the staticType 1235 * can be replaced by xs:anyAtomicType?. xmlCastAsNonXML will 1236 * therefore be passed xs:anyAtomicType, as in the spec. 1237 * BUT NO ... Saxon is more likely to find a converter from 1238 * xs:untypedAtomic than from xs:anyAtomicType. 1239 */ 1240 ItemType itemType = staticType.getItemType(); 1241 if ( occur.allowsMany() 1242 || ! ItemType.ANY_ATOMIC_VALUE.subsumes(itemType) 1243 /* 1244 * The following tests may be punctilious to a fault. If we 1245 * have a bare Saxon atomic type of either xs:base64Binary 1246 * or xs:hexBinary type, Saxon will happily and successfully 1247 * convert it to a binary string; but if we have the same 1248 * thing as a less-statically-determinate type that we'll 1249 * put through the atomizer, the conversion will fail unless 1250 * its encoding matches the m_xmlbinary setting. That could 1251 * seem weirdly unpredictable to a user, so we'll just 1252 * (perversely) disallow the optimization (which would 1253 * succeed) in the cases where the specified, unoptimized 1254 * behavior would be to fail. 1255 */ 1256 || ItemType.HEX_BINARY.subsumes(itemType) 1257 && (XMLBinary.HEX != m_xmlbinary) 1258 || ItemType.BASE64_BINARY.subsumes(itemType) 1259 && (XMLBinary.BASE64 != m_xmlbinary) 1260 ) 1261 { 1262 if ( null == atomizer ) 1263 { 1264 XQueryEvaluator docWrapUnwrap = PredefinedQueryHolders 1265 .DocumentWrapUnwrap.INSTANCE.load(); 1266 atomizer = (v, col) -> 1267 { 1268 docWrapUnwrap.setExternalVariable( 1269 PredefinedQueryHolders.s_qEXPR, v); 1270 v = docWrapUnwrap.evaluate(); 1271 /* 1272 * It's already zero-or-one, or XPTY0004 was thrown 1273 */ 1274 return v; 1275 }; 1276 } 1277 m_atomize [ i ] = atomizer; 1278 /* 1279 * The spec wants anyAtomicType below instead of 1280 * untypedAtomic. But Saxon's getConverter is more likely 1281 * to fail to find a converter from anyAtomicType to an 1282 * arbitrary type, than from untypedAtomic. So use that. 1283 */ 1284 m_columnStaticTypes [ i ] = s_01untypedAtomic; 1285 } 1286 else 1287 { 1288 /* 1289 * We know we'll be getting zero-or-one atomic value, so 1290 * the atomizing function can be the identity. 1291 */ 1292 m_atomize [ i ] = (v, col) -> v; 1293 } 1294 } 1295 } 1296 1297 if ( ! m_sequenceIterator.hasNext() ) 1298 return false; 1299 1300 ++ currentRow; // for use as 1-based ordinality column 1301 1302 XdmItem it = m_sequenceIterator.next(); 1303 1304 int i = 0; 1305 for ( Binding.Parameter p : m_outBindings ) 1306 { 1307 XQueryEvaluator xqe = m_columnXQEs [ i ]; 1308 AtomizingFunction atomizer = m_atomize [ i ]; 1309 SequenceType staticType = m_columnStaticTypes [ i++ ]; 1310 1311 if ( null == xqe ) 1312 { 1313 receive.updateLong( i, currentRow); 1314 continue; 1315 } 1316 1317 try 1318 { 1319 xqe.setContextItem(it); 1320 1321 if ( null == atomizer ) /* => result type was found to be XML */ 1322 { 1323 receive.updateSQLXML( 1324 i, returnContent(xqe.iterator(), false)); 1325 continue; 1326 } 1327 1328 XdmValue x1 = xqe.evaluate(); 1329 x1 = atomizer.apply(x1, i); 1330 1331 /* 1332 * The value is now known to be atomic and either exactly 1333 * one or zero-or-one. May as well just use size() to see if 1334 * it's empty. 1335 */ 1336 if ( 0 == x1.size() ) 1337 { 1338 receive.updateNull(i); // XXX Handle defaults some day 1339 continue; 1340 } 1341 XdmAtomicValue av = (XdmAtomicValue)x1.itemAt(0); 1342 xmlCastAsNonXML( 1343 av, staticType.getItemType(), p, receive, i, m_xmlbinary); 1344 } 1345 catch ( SaxonApiException | XPathException e ) 1346 { 1347 throw new SQLException(e.getMessage(), "10000", e); 1348 } 1349 } 1350 return true; 1351 } 1352 1353 /** 1354 * Store the values of any passed parameters and/or context item into the 1355 * dynamic context, returning true if the overall query should 1356 * short-circuit and return null. 1357 *<p> 1358 * The specification requires the overall query to return null if a 1359 * context item is specified in the bindings and its value is null. 1360 * @param xqe XQuery evaluator into which to store the values. 1361 * @param passing The bindings whose values should be installed. 1362 * @param setContextItem True to handle the context item, if present in the 1363 * bindings. False to skip any processing of the context item, in cases 1364 * where the caller will handle that. 1365 * @return True if the overall query's return should be null, false if the 1366 * query should proceed to evaluation. 1367 */ 1368 private static boolean storePassedValuesInDynamicContext( 1369 XQueryEvaluator xqe, Binding.Assemblage passing, boolean setContextItem) 1370 throws SQLException, SaxonApiException 1371 { 1372 /* 1373 * Is there or is there not a context item? 1374 */ 1375 if ( ! setContextItem || null == passing.contextItem() ) 1376 { 1377 /* "... there is no context item in XDC." */ 1378 } 1379 else 1380 { 1381 Object cve = passing.contextItem().valueJDBC(); 1382 if ( null == cve ) 1383 return true; 1384 XdmValue ci; 1385 if ( cve instanceof XdmNode ) // XXX support SEQUENCE input someday 1386 { 1387 ci = (XdmNode)cve; 1388 } 1389 else 1390 ci = xmlCastAsSequence( 1391 cve, XMLBinary.HEX, passing.contextItem().typeXS()); 1392 switch ( ci.size() ) 1393 { 1394 case 0: 1395 /* "... there is no context item in XDC." */ 1396 break; 1397 case 1: 1398 xqe.setContextItem(ci.itemAt(0)); 1399 break; 1400 default: 1401 throw new SQLDataException( 1402 "invalid XQuery context item", "2200V"); 1403 } 1404 } 1405 1406 /* 1407 * For each <XML query variable> XQV: 1408 */ 1409 for ( Binding.Parameter p : passing ) 1410 { 1411 String name = p.name(); 1412 Object v = p.valueJDBC(); 1413 XdmValue vv; 1414 if ( null == v ) 1415 vv = XdmEmptySequence.getInstance(); 1416 else if ( v instanceof XdmNode ) // XXX support SEQUENCE someday 1417 { 1418 vv = (XdmNode)v; 1419 } 1420 else 1421 vv = xmlCastAsSequence( 1422 v, XMLBinary.HEX, p.typeXS().getItemType()); 1423 xqe.setExternalVariable(new QName(name), vv); 1424 } 1425 1426 return false; 1427 } 1428 1429 /** 1430 * Return a s9api {@link XQueryCompiler XQueryCompiler} with static context 1431 * preconfigured as the Syntax Rules dictate. 1432 * @param pt The single-row ResultSet representing the passed parameters 1433 * and context item, if any. 1434 * @param nameToIndex A Map, supplied empty, that on return will map 1435 * variable names for the dynamic context to column indices in {@code pt}. 1436 * If a context item was supplied, its index will be entered in the map 1437 * with the null key. 1438 */ 1439 private static XQueryCompiler createStaticContextWithPassedTypes( 1440 Binding.Assemblage pt, Iterable<Map.Entry<String,String>> namespaces) 1441 throws SQLException, XPathException 1442 { 1443 XQueryCompiler xqc = s_s9p.newXQueryCompiler(); 1444 xqc.declareNamespace( 1445 "sqlxml", "http://standards.iso.org/iso9075/2003/sqlxml"); 1446 // https://sourceforge.net/p/saxon/mailman/message/20318550/ : 1447 xqc.declareNamespace("xdt", W3C_XML_SCHEMA_NS_URI); 1448 1449 for ( Map.Entry<String,String> e : namespaces ) 1450 xqc.declareNamespace(e.getKey(), e.getValue()); 1451 1452 /* 1453 * This business of predeclaring global external named variables 1454 * is not an s9api-level advertised ability in Saxon, hence the 1455 * various getUnderlying.../getStructured... methods here to access 1456 * the things that make it happen. 1457 */ 1458 StaticQueryContext sqc = xqc.getUnderlyingStaticContext(); 1459 1460 for ( Binding.Parameter p : pt ) 1461 { 1462 String name = p.name(); 1463 int ct = p.typeJDBC(); 1464 assertCanCastAsXmlSequence(ct, name); 1465 SequenceType st = p.typeXS(); 1466 sqc.declareGlobalVariable( 1467 new QName(name).getStructuredQName(), 1468 st.getUnderlyingSequenceType(), null, true); 1469 } 1470 1471 /* 1472 * Apply syntax rules to the context item, if any. 1473 */ 1474 Binding.ContextItem ci = pt.contextItem(); 1475 if ( null != ci ) 1476 { 1477 int ct = ci.typeJDBC(); 1478 assertCanCastAsXmlSequence(ct, "(context item)"); 1479 ItemType it = ci.typeXS(); 1480 xqc.setRequiredContextItemType(it); 1481 } 1482 1483 return xqc; 1484 } 1485 1486 /** 1487 * Check that something's type is "convertible to XML(SEQUENCE) 1488 * according to the Syntax Rules of ... <XML cast specification>." 1489 * That turns out not to be a very high bar; not much is excluded 1490 * by those rules except collection, row, structured, or 1491 * reference typed <value expression>s. 1492 * @param jdbcType The {@link Types JDBC type} to be checked. 1493 * @param what A string to include in the exception message if the 1494 * check fails. 1495 * @throws SQLException if {@code jdbcType} is one of the prohibited types. 1496 */ 1497 private static void assertCanCastAsXmlSequence(int jdbcType, String what) 1498 throws SQLException 1499 { 1500 if ( Types.ARRAY == jdbcType || Types.STRUCT == jdbcType 1501 || Types.REF == jdbcType ) 1502 throw new SQLSyntaxErrorException( 1503 "The type of \"" + what + "\" is not suitable for " + 1504 "XMLCAST to XML(SEQUENCE).", "42804"); 1505 } 1506 1507 /** 1508 * The "determination of an XQuery formal type notation" algorithm. 1509 *<p> 1510 * This is relied on for parameters and context items passed to 1511 * {@code XMLQUERY} and therefore, {@code XMLTABLE} (and also, in the spec, 1512 * {@code XMLDOCUMENT} and {@code XMLPI}). Note that it does <em>not</em> 1513 * take an {@code XMLBinary} parameter, but rather imposes hexadecimal form 1514 * unconditionally, so in the contexts where this is called, any 1515 * {@code xmlbinary} setting is ignored. 1516 * @param b a {@code Binding} from which the JDBC type can be retrieved 1517 * @param forContextItem whether the type being derived is for a context 1518 * item or (if false) for a named parameter. 1519 * @return a {@code SequenceType} (always a singleton in the 1520 * {@code forContextItem} case) 1521 */ 1522 private static SequenceType determineXQueryFormalType( 1523 Binding b, boolean forContextItem) 1524 throws SQLException 1525 { 1526 int sd = b.typeJDBC(); 1527 OccurrenceIndicator suffix; 1528 /* 1529 * The SQL/XML standard uses a formal type notation straight out of 1530 * the XQuery 1.0 and XPath 2.0 Formal Semantics document, and that is 1531 * strictly more fine-grained and expressive than anything you can 1532 * actually say in the form of XQuery SequenceTypes. This method will 1533 * simply return the nearest approximation in the form of a sequence 1534 * type; some of the standard's distinct formal type notations will 1535 * collapse into the same SequenceType. 1536 * That also means the various cases laid out in the standard will, 1537 * here, all simply assign some ItemType to 'it', and therefore the 1538 * tacking on of the occurrence suffix can be factored out for the 1539 * very end. 1540 */ 1541 ItemType it; 1542 1543 if ( forContextItem ) 1544 suffix = OccurrenceIndicator.ONE; 1545 // else if sd is XML(SEQUENCE) - we don't have this type yet 1546 // suffix = OccurrenceIndicator.ZERO_OR_MORE; 1547 /* 1548 * Go through the motions of checking isNullable, though PL/Java's JDBC 1549 * currently hardcodes columnNullableUnknown. Maybe someday it won't. 1550 */ 1551 else if ( b.knownNonNull() ) 1552 suffix = OccurrenceIndicator.ONE; 1553 else 1554 suffix = OccurrenceIndicator.ZERO_OR_ONE; 1555 1556 // Define ET... for {DOCUMENT|CONTENT}(XMLSCHEMA) case ... not supported 1557 1558 // if SD is XML(DOCUMENT(UNTYPED)) - not currently tracked, can't tell 1559 // it = s_itf.getDocumentTest(item type for xdt:untyped); 1560 // else if SD is XML(DOCUMENT(ANY)) - not currently tracked, can't tell 1561 // it = s_itf.getDocumentTest(item type for xs:anyType); 1562 // else if SD is XML(DOCUMENT(XMLSCHEMA)) - unsupported and can't tell 1563 // it = s_itf.getDocumentTest(the ET... we didn't define earlier) 1564 // else if SD is XML(CONTENT(UNTYPED)) - which we're not tracking ... 1565 // at s9api granularity, there's no test for this that's not same as: 1566 // else if SD is XML(CONTENT(ANY)) - which we must assume for ANY XML 1567 if ( Types.SQLXML == sd ) 1568 it = s_itf.getNodeKindTest(DOCUMENT); 1569 // else if SD is XML(CONTENT(XMLSCHEMA)) - we don't track and can't tell 1570 // at s9api granularity, there's no test that means this anyway. 1571 // else if SD is XML(SEQUENCE) - we really should have this type, but no 1572 // it = it.ANY_ITEM 1573 else // it ain't XML, it's some SQL type 1574 { 1575 ItemType xmlt = mapSQLDataTypeToXMLSchemaDataType( 1576 b, XMLBinary.HEX, Nulls.ABSENT); 1577 // ItemType pt = xmlt.getUnderlyingItemType().getPrimitiveType() 1578 // .somehowGetFromUnderlyingPTBackToS9apiPT() - ugh, the hard part 1579 /* 1580 * The intention here is to replace any derived type with the 1581 * primitive type it is based on, *except* for three types that are 1582 * technically derived: integer (from decimal), yearMonthDuration 1583 * and dayTimeDuration (from duration). Those are not replaced, so 1584 * they stand, as if they were honorary primitive types. 1585 * 1586 * For now, it's simplified greatly by mapSQLDataType... skipping 1587 * the construction of a whole derived XML Schema snippet, and just 1588 * returning the type we want anyway. Also, no need to dive under 1589 * the s9api layer to try to make getPrimitiveType work. 1590 */ 1591 it = xmlt; 1592 } 1593 1594 SequenceType xftn = makeSequenceType(it, suffix); 1595 return xftn; 1596 } 1597 1598 @SuppressWarnings("fallthrough") 1599 private static ItemType mapSQLDataTypeToXMLSchemaDataType( 1600 Binding b, XMLBinary xmlbinary, Nulls nulls) 1601 throws SQLException 1602 { 1603 /* 1604 * Nearly all of the fussing about specified in the standard 1605 * for this method is to create XML Schema derived types that 1606 * accurately reflect the typmod information for the SQL type 1607 * in question. Then, in determineXQueryFormalType (the only 1608 * client of this method so far!), all of that is thrown away 1609 * and our painstakingly specified derived type is replaced with 1610 * the primitive type we based it on. That simplifies a lot. :) 1611 * For now, forget the derived XML Schema declarations, and just 1612 * return the primitive types they would be based on. 1613 * 1614 * The need for the nulls parameter vanishes if no XML Schema snippets 1615 * are to be generated. 1616 * 1617 * If the full XML Schema snippet generation ever proves to be 1618 * needed, one hacky way to get it would be with a SELECT 1619 * query_to_xmlschema('SELECT null::type-in-question', false, false, 1620 * '') where the same derivations are already implemented (though it 1621 * produces some different results; that work may have been done from 1622 * an earlier version of the standard). 1623 */ 1624 switch ( b.typeJDBC() ) 1625 { 1626 case Types.CHAR: 1627 case Types.VARCHAR: 1628 case Types.CLOB: 1629 return ItemType.STRING; 1630 1631 case Types.BINARY: 1632 case Types.VARBINARY: 1633 case Types.BLOB: 1634 return XMLBinary.HEX == xmlbinary ? 1635 ItemType.HEX_BINARY : ItemType.BASE64_BINARY; 1636 1637 case Types.NUMERIC: 1638 case Types.DECIMAL: 1639 /* 1640 * Go through the motions to get the scale and do this right, 1641 * though PL/Java's getScale currently hardcodes a -1 return. 1642 * Maybe someday it won't. 1643 */ 1644 int scale = b.scale(); 1645 return 0 == scale ? ItemType.INTEGER : ItemType.DECIMAL; 1646 1647 case Types.INTEGER: 1648 return ItemType.INT; 1649 case Types.SMALLINT: 1650 return ItemType.SHORT; 1651 case Types.BIGINT: 1652 return ItemType.LONG; 1653 1654 case Types.REAL: 1655 return ItemType.FLOAT; // could check P, MINEXP, MAXEXP here. 1656 case Types.FLOAT: 1657 assert false; // PG should always report either REAL or DOUBLE 1658 /*FALLTHROUGH*/ 1659 case Types.DOUBLE: 1660 return ItemType.DOUBLE; 1661 1662 case Types.BOOLEAN: 1663 return ItemType.BOOLEAN; 1664 1665 case Types.DATE: 1666 return ItemType.DATE; 1667 1668 case Types.TIME: 1669 return ItemType.TIME; 1670 1671 case Types.TIME_WITH_TIMEZONE: 1672 return ItemType.TIME; // restrictive facet would make sense here 1673 1674 case Types.TIMESTAMP: 1675 return ItemType.DATE_TIME; 1676 1677 case Types.TIMESTAMP_WITH_TIMEZONE: 1678 return ItemType.DATE_TIME_STAMP; // xsd 1.1 equivalent of facet! 1679 1680 // There's no JDBC Types.INTERVAL; handle it after switch 1681 1682 // Good luck finding out from JDBC if it's a domain 1683 1684 // PG doesn't have DISTINCT types per se 1685 1686 // PL/Java's JDBC doesn't support PostgreSQL's arrays as ARRAY 1687 1688 // PG doesn't seem to have multisets (JDBC doesn't grok them either) 1689 1690 // Types.SQLXML we could recognize, but for determineFormalTypes it has 1691 // been handled already, and it's not yet clear what would be 1692 // appropriate to return (short of the specified XMLSchema snippet), 1693 // probably just document. 1694 1695 // So punt all these for now; what hasn't been handled in this switch 1696 // can be handled specially after the switch falls through, and what 1697 // isn't, isn't supported just now. 1698 } 1699 1700 String typeName = b.typePG(); 1701 if ( "interval".equals(typeName) ) 1702 { 1703 /* 1704 * XXX This isn't right yet; it needs to be refined to a 1705 * YEAR_MONTH_DURATION or a DAY_TIME_DURATION in the appropriate 1706 * cases, and for that it needs access to the typmod information 1707 * for the type, which getColumnTypeName doesn't now provide. 1708 */ 1709 return ItemType.DURATION; 1710 } 1711 1712 throw new SQLNonTransientException(String.format( 1713 "Mapping SQL type \"%s\" to XML type not supported", typeName), 1714 "0N000"); 1715 } 1716 1717 /** 1718 * Implement that portion of the {@code <XML cast>} specification where 1719 * the target data type is sequence, and (for now, anyway) the source is 1720 * not an XML type; the only caller, so far, handles that case separately. 1721 * @param v The SQL value to be cast (in the form of an Object from JDBC). 1722 * @param enc Whether binary values should be encoded in hex or base 64. 1723 * @param xst The formal static XS type derived from the SQL type of v. 1724 * @return An {@code XdmValue}, {@code null} if {@code v} is null. 1725 */ 1726 private static XdmValue xmlCastAsSequence( 1727 Object v, XMLBinary enc, ItemType xst) 1728 throws SQLException 1729 { 1730 if ( null == v ) 1731 return null; 1732 /* 1733 * What happens next in the standard is one of the most breathtaking 1734 * feats of obscurantism in the whole document. It begins, plausibly 1735 * enough, by using mapValuesOfSQLTypesToValuesOfXSTypes to produce 1736 * the lexical form of the XS type (but with XML metacharacters escaped, 1737 * if it's a string type). Then: 1738 * 1. That lexical form is to be fed to an XML parser, producing an 1739 * XQuery document node that NEVER can be a well-formed document (it 1740 * is expected to satisfy document { text ? } where the text node is 1741 * just the lexical value form we started with, now with the escaped 1742 * metacharacters unescaped again as a consequence of parsing). For 1743 * some source types, mapValuesOfSQLTypesToValuesOfXSTypes can 1744 * produce a string that parses to XML with element content: row 1745 * types, arrays, multisets, XML. Clearly, those cases can't satisfy 1746 * the formal type assumed here, and they are cases this routine 1747 * won't be expected to handle: XML handled separately by the caller, 1748 * arrays/structs/etc. being ruled out by assertCanCastAsXmlSequence. 1749 * 2. That document node is made the $TEMP parameter of an XML Query, 1750 * '$TEMP cast as XSBTN' (where XSBTN is a QName for the result type 1751 * chosen according to the rules) and the sequence resulting from 1752 * that query is the result of the cast. 1753 * 1754 * Step (1) can only succeed if the XML parser doesn't insist on well- 1755 * formed documents, as the stock JRE parser emphatically does. And the 1756 * ultimate effect of that whole dance is that the cast in (2) casts a 1757 * document node to the target type, which means the document node gets 1758 * atomized, which, for a document node, means everything is thrown away 1759 * save the concatenated values of its descendant text nodes (or node, 1760 * in this case; haven't we seen that value somewhere before?), assigned 1761 * the type xs:untypedAtomic, and then that's operated on by the cast. 1762 * 1763 * Because this implementation's in PL/Java, the value v received here 1764 * has already been mapped from an SQL type to a Java type according to 1765 * JDBC's rules as PL/Java implements them, so there's one degree of 1766 * removal from the specified algorithm anyway. And the s9api 1767 * XdmAtomicValue already has constructors from several of the expected 1768 * Java types, as well as one taking a lexical form and explicit type. 1769 * Beause this is /example/ code, rather than slavishly implementing the 1770 * specified algorithm, it will assume that that is either roughly or 1771 * exactly equivalent to what these s9api constructors in fact do, and 1772 * just use them; conformance-testing code could then check for exact 1773 * equivalence if there's enough interest to write it. 1774 * 1775 * So, we will NOT start with this: 1776 * 1777 * String xmlv = mapValuesOfSQLTypesToValuesOfXSTypes( 1778 * v, enc, Nulls.ABSENT, true); 1779 * 1780 * Instead, we'll derive this type first ... 1781 */ 1782 ItemType xsbt; 1783 // year-month interval type => xsbt = YEAR_MONTH_DURATION 1784 // day-time interval type => xsbt = DAY_TIME_DURATION 1785 xsbt = xst; // we have a winner! 1786 // xs non-built-in atomic type => xsbt = getPrimitiveType(ugh). 1787 1788 /* 1789 * ... and then use this method instead: 1790 */ 1791 try 1792 { 1793 return mapJDBCofSQLvalueToXdmAtomicValue(v, enc, xsbt); 1794 } 1795 catch ( SaxonApiException | XPathException e ) 1796 { 1797 throw new SQLException(e.getMessage(), "10000", e); 1798 } 1799 } 1800 1801 @FunctionalInterface 1802 interface CastingFunction 1803 { 1804 AtomicValue apply(AtomicValue v) throws XPathException; 1805 } 1806 1807 @FunctionalInterface 1808 interface CasterSupplier 1809 { 1810 CastingFunction get() throws SQLException, XPathException; 1811 } 1812 1813 @FunctionalInterface 1814 interface AtomizingFunction 1815 { 1816 /** 1817 * @param v sequence to be atomized 1818 * @param columnIndex only to include in exception if result has more 1819 * than one item 1820 */ 1821 XdmValue apply(XdmValue v, int columnIndex) 1822 throws SaxonApiException, XPathException; 1823 } 1824 1825 private static XPathException noPrimitiveCast(ItemType vt, ItemType xt) 1826 { 1827 return new XPathException( 1828 "Casting from " + vt.getTypeName() + " to " + xt.getTypeName() + 1829 " can never succeed", "XPTY0004"); 1830 } 1831 1832 /** 1833 * Handle the case of XMLCAST to a non-XML target type when the cast operand 1834 * is already a single atomic value. 1835 *<p> 1836 * The caller, if operating on a sequence, must itself handle the case of 1837 * an empty sequence (returning null, per General Rule 4c in :2011), or a 1838 * sequence of length greater than one (raising XPTY0004, which is not 1839 * specified in :2011, but the exclusion of such a sequence is implicit in 1840 * rules 4g and 4h; Db2 silently drops all but the first item, unlike 1841 * Oracle, which raises XPTY0004). 1842 * @param av The atomic operand value 1843 * @param p The parameter binding, recording the needed type information 1844 * @param rs ResultSet into which the value will be stored 1845 * @param col Index of the result column 1846 */ 1847 private static void xmlCastAsNonXML( 1848 XdmAtomicValue av, ItemType vt, 1849 Binding.Parameter p, ResultSet rs, int col, XMLBinary enc) 1850 throws SQLException, XPathException 1851 { 1852 XdmAtomicValue bv; 1853 ItemType xt = p.typeXT(enc); 1854 1855 CastingFunction caster = p.atomicCaster(vt, () -> 1856 { 1857 ConversionRules rules = vt.getConversionRules(); 1858 Converter c1; 1859 ItemType t1; 1860 Converter c2; 1861 1862 switch ( p.typeJDBC() ) 1863 { 1864 case Types.TIMESTAMP: 1865 t1 = ItemType.DATE_TIME; 1866 break; 1867 case Types.TIME: 1868 t1 = ItemType.TIME; 1869 break; 1870 case Types.DATE: 1871 t1 = ItemType.DATE; 1872 break; 1873 default: 1874 c1 = rules.getConverter( 1875 (AtomicType)vt.getUnderlyingItemType(), 1876 (AtomicType)xt.getUnderlyingItemType()); 1877 if ( null == c1 ) 1878 throw noPrimitiveCast(vt, xt); 1879 return (AtomicValue v) -> c1.convert(v).asAtomic(); 1880 } 1881 /* 1882 * Nothing left here but the rest of the three date/timey cases 1883 * partly handled above. 1884 */ 1885 c1 = rules.getConverter( 1886 (AtomicType)vt.getUnderlyingItemType(), 1887 (AtomicType)t1.getUnderlyingItemType()); 1888 c2 = rules.getConverter( 1889 (AtomicType)t1.getUnderlyingItemType(), 1890 (AtomicType)xt.getUnderlyingItemType()); 1891 if ( null == c1 || null == c2 ) 1892 throw noPrimitiveCast(vt, xt); 1893 return (AtomicValue v) -> 1894 { 1895 v = c1.convert(v).asAtomic(); 1896 v = ((CalendarValue)v).adjustTimezone(0).removeTimezone(); 1897 return c2.convert(v).asAtomic(); 1898 }; 1899 }); 1900 1901 bv = makeAtomicValue(caster.apply(av.getUnderlyingValue())); 1902 1903 if ( ItemType.STRING.subsumes(xt) ) 1904 rs.updateString(col, bv.getStringValue()); 1905 1906 else if ( ItemType.HEX_BINARY.subsumes(xt) ) 1907 rs.updateBytes(col, 1908 ((HexBinaryValue)bv.getUnderlyingValue()).getBinaryValue()); 1909 else if ( ItemType.BASE64_BINARY.subsumes(xt) ) 1910 rs.updateBytes(col, 1911 ((Base64BinaryValue)bv.getUnderlyingValue()).getBinaryValue()); 1912 1913 else if ( ItemType.DECIMAL.subsumes(xt) ) 1914 rs.updateObject(col, bv.getValue()); 1915 1916 /* 1917 * The standard calls for throwing "data exception - numeric value out 1918 * of range" rather than forwarding a float or double inf, -inf, or nan 1919 * to SQL, but PostgreSQL supports those values, and these conversions 1920 * preserve them. 1921 * Because of the collapsing in typeXT(), xt will never be FLOAT, 1922 * only DOUBLE. JDBC is supposed to handle assigning a double to a float 1923 * column, anyway. 1924 */ 1925 else if ( ItemType.DOUBLE.subsumes(xt) ) 1926 rs.updateObject(col, bv.getValue()); 1927 1928 else if ( ItemType.DATE.subsumes(xt) ) 1929 rs.updateObject(col, bv.getLocalDate()); 1930 else if ( ItemType.DATE_TIME.subsumes(xt) ) 1931 { 1932 if ( ((CalendarValue)bv.getUnderlyingValue()).hasTimezone() ) 1933 rs.updateObject(col, bv.getOffsetDateTime()); 1934 else 1935 { 1936 LocalDateTime jv = bv.getLocalDateTime(); 1937 rs.updateObject(col, 1938 Types.TIMESTAMP_WITH_TIMEZONE == p.typeJDBC() ? 1939 jv.atOffset(UTC) : jv); 1940 } 1941 } 1942 else if ( ItemType.TIME.subsumes(xt) ) // no handy tz/notz distinction 1943 { 1944 if ( ((CalendarValue)bv.getUnderlyingValue()).hasTimezone() ) 1945 rs.updateObject(col, OffsetTime.parse(bv.getStringValue())); 1946 else 1947 { 1948 LocalTime jv = LocalTime.parse(bv.getStringValue()); 1949 rs.updateObject(col, 1950 Types.TIME_WITH_TIMEZONE == p.typeJDBC() ? 1951 jv.atOffset(UTC) : jv); 1952 } 1953 } 1954 1955 else if ( ItemType.YEAR_MONTH_DURATION.subsumes(xt) ) 1956 rs.updateString(col, toggleIntervalRepr(bv.getStringValue())); 1957 else if ( ItemType.DAY_TIME_DURATION.subsumes(xt) ) 1958 rs.updateString(col, toggleIntervalRepr(bv.getStringValue())); 1959 else if ( ItemType.DURATION.subsumes(xt) ) // need this case for now 1960 rs.updateString(col, toggleIntervalRepr(bv.getStringValue())); 1961 1962 else if ( ItemType.BOOLEAN.subsumes(xt) ) 1963 rs.updateObject(col, bv.getValue()); 1964 else 1965 throw new SQLNonTransientException(String.format( 1966 "Mapping XML type \"%s\" to SQL value not supported", xt), 1967 "0N000"); 1968 } 1969 1970 /** 1971 * Like the "Mapping values of SQL data types to values of XML Schema 1972 * data types" algorithm, except after the SQL values have already been 1973 * converted to Java values according to JDBC rules. 1974 *<p> 1975 * Also, this uses Saxon s9api constructors for the XML Schema values, which 1976 * accept the Java types directly. As a consequence, where the target type 1977 * {@code xst} is {@code xs:hexBinary} or {@code xs:base64Binary}, that type 1978 * will be produced, regardless of the passed {@code encoding}. This might 1979 * not be strictly correct, but is probably safest until an oddity in the 1980 * spec can be clarified: {@code determineXQueryFormalType} will always 1981 * declare {@code xs:hexBinary} as the type for an SQL byte string, and it 1982 * would violate type safety to construct a value here that honors the 1983 * {@code encoding} parameter but isn't of the declared formal type. 1984 */ 1985 private static XdmAtomicValue mapJDBCofSQLvalueToXdmAtomicValue( 1986 Object dv, XMLBinary encoding, ItemType xst) 1987 throws SQLException, SaxonApiException, XPathException 1988 { 1989 if ( ItemType.STRING.equals(xst) ) 1990 return new XdmAtomicValue((String)dv); 1991 1992 if ( ItemType.HEX_BINARY.equals(xst) ) 1993 return makeAtomicValue(new HexBinaryValue((byte[])dv)); 1994 if ( ItemType.BASE64_BINARY.equals(xst) ) 1995 return makeAtomicValue(new Base64BinaryValue((byte[])dv)); 1996 1997 if ( ItemType.INTEGER.equals(xst) ) 1998 return new XdmAtomicValue(((BigInteger)dv).toString(), xst); 1999 if ( ItemType.DECIMAL.equals(xst) ) 2000 return new XdmAtomicValue((BigDecimal)dv); 2001 if ( ItemType.INT.equals(xst) ) 2002 return new XdmAtomicValue((Integer)dv); 2003 if ( ItemType.SHORT.equals(xst) ) 2004 return new XdmAtomicValue((Short)dv); 2005 if ( ItemType.LONG.equals(xst) ) 2006 return new XdmAtomicValue((Long)dv); 2007 if ( ItemType.FLOAT.equals(xst) ) 2008 return new XdmAtomicValue((Float)dv); 2009 if ( ItemType.DOUBLE.equals(xst) ) 2010 return new XdmAtomicValue((Double)dv); 2011 2012 if ( ItemType.BOOLEAN.equals(xst) ) 2013 return new XdmAtomicValue((Boolean)dv); 2014 2015 if ( ItemType.DATE.equals(xst) ) 2016 { 2017 if ( dv instanceof LocalDate ) 2018 return new XdmAtomicValue((LocalDate)dv); 2019 return new XdmAtomicValue(dv.toString(), xst); 2020 } 2021 2022 if ( ItemType.TIME.equals(xst) ) 2023 return new XdmAtomicValue(dv.toString(), xst); 2024 2025 if ( ItemType.DATE_TIME.equals(xst) ) 2026 { 2027 if ( dv instanceof LocalDateTime ) 2028 return new XdmAtomicValue((LocalDateTime)dv); 2029 return new XdmAtomicValue(dv.toString(), xst); 2030 } 2031 2032 if ( ItemType.DATE_TIME_STAMP.equals(xst) ) 2033 { 2034 if ( dv instanceof OffsetDateTime ) 2035 return new XdmAtomicValue((OffsetDateTime)dv); 2036 return new XdmAtomicValue(dv.toString(), xst); 2037 } 2038 2039 if ( ItemType.DURATION.equals(xst) ) 2040 return new XdmAtomicValue(toggleIntervalRepr((String)dv), xst); 2041 2042 throw new SQLNonTransientException(String.format( 2043 "Mapping SQL value to XML type \"%s\" not supported", xst), 2044 "0N000"); 2045 } 2046 2047 /* 2048 * Toggle the lexical representation of an interval/duration between the 2049 * form PostgreSQL likes and the form XML Schema likes. Only negative values 2050 * are affected. Positive values are returned unchanged, as are those that 2051 * don't fit any expected form; those will probably be reported as malformed 2052 * by whatever tries to consume them. 2053 */ 2054 static String toggleIntervalRepr(String lex) 2055 { 2056 Matcher m = s_intervalSigns.matcher(lex); 2057 if ( ! m.matches() ) 2058 return lex; // it's weird, just don't touch it 2059 if ( -1 == m.start(1) ) 2060 { 2061 if ( -1 != m.start(2) && -1 == m.start(3) ) // it's PG negative 2062 return '-' + lex.replace("-", ""); // make it XS negative 2063 } 2064 else if ( -1 == m.start(2) && -1 != m.start(3) )// it's XS negative 2065 return m.usePattern(s_intervalSignSite) // make it PG negative 2066 .reset(lex.substring(1)).replaceAll("-"); 2067 return lex; // it's either positive, or weird, just don't touch it 2068 } 2069 2070 static Iterable<Map.Entry<String,String>> namespaceBindings(String[] nbs) 2071 throws SQLException 2072 { 2073 if ( 1 == nbs.length % 2 ) 2074 throw new SQLSyntaxErrorException( 2075 "Namespace binding array must have even length", "42000"); 2076 Map<String,String> m = new HashMap<>(); 2077 2078 for ( int i = 0; i < nbs.length; i += 2 ) 2079 { 2080 String prefix = nbs[i]; 2081 String uri = nbs[1 + i]; 2082 2083 if ( null == prefix || null == uri ) 2084 throw new SQLDataException( 2085 "Namespace binding array elements must not be null", 2086 "22004"); 2087 2088 if ( ! "".equals(prefix) ) 2089 { 2090 if ( ! isValidNCName(prefix) ) 2091 throw new SQLSyntaxErrorException( 2092 "Not an XML NCname: \"" + prefix + '"', "42602"); 2093 if ( XML_NS_PREFIX.equals(prefix) 2094 || XMLNS_ATTRIBUTE.equals(prefix) ) 2095 throw new SQLSyntaxErrorException( 2096 "Namespace prefix may not be xml or xmlns", "42939"); 2097 if ( XML_NS_URI.equals(uri) 2098 || XMLNS_ATTRIBUTE_NS_URI.equals(uri) ) 2099 throw new SQLSyntaxErrorException( 2100 "Namespace URI has a disallowed value", "42P17"); 2101 if ( "".equals(uri) ) 2102 throw new SQLSyntaxErrorException( 2103 "URI for non-default namespace may not be zero-length", 2104 "42P17"); 2105 } 2106 2107 String was = m.put(prefix.intern(), uri.intern()); 2108 2109 if ( null != was ) 2110 throw new SQLSyntaxErrorException( 2111 "Namespace prefix \"" + prefix + "\" multiply bound (" + 2112 "to \"" + was + "\" and \"" + uri + "\")", "42712"); 2113 } 2114 2115 return Collections.unmodifiableSet(m.entrySet()); 2116 } 2117 2118 static class Binding 2119 { 2120 String typePG() throws SQLException 2121 { 2122 if ( null != m_typePG ) 2123 return m_typePG; 2124 return m_typePG = implTypePG(); 2125 } 2126 2127 int typeJDBC() throws SQLException 2128 { 2129 if ( null != m_typeJDBC ) 2130 return m_typeJDBC; 2131 int tj = implTypeJDBC(); 2132 /* 2133 * The JDBC types TIME_WITH_TIMEZONE and TIMESTAMP_WITH_TIMEZONE 2134 * first appear in JDBC 4.2 / Java 8. PL/Java's JDBC driver does 2135 * not yet return those values. As a workaround until it does, 2136 * recheck here using the PG type name string, if TIME or TIMESTAMP 2137 * is the JDBC type that the driver returned. 2138 * 2139 * Also for backward compatibility, the driver still returns 2140 * Types.OTHER for XML, rather than Types.SQLXML. Check and fix that 2141 * here too. 2142 */ 2143 switch ( tj ) 2144 { 2145 case Types.OTHER: 2146 if ( "xml".equals(typePG()) ) 2147 tj = Types.SQLXML; 2148 break; 2149 case Types.TIME: 2150 if ( "timetz".equals(typePG()) ) 2151 tj = Types.TIME_WITH_TIMEZONE; 2152 break; 2153 case Types.TIMESTAMP: 2154 if ( "timestamptz".equals(typePG()) ) 2155 tj = Types.TIMESTAMP_WITH_TIMEZONE; 2156 break; 2157 default: 2158 } 2159 return m_typeJDBC = tj; 2160 } 2161 2162 Object valueJDBC() throws SQLException 2163 { 2164 if ( m_valueJDBCValid ) 2165 return m_valueJDBC; 2166 /* 2167 * When JDBC 4.2 added support for the JSR 310 date/time types, for 2168 * back-compatibility purposes, it did not change what types a plain 2169 * getObject(...) would return for them, which could break existing 2170 * code. Instead, it's necessary to use the form of getObject that 2171 * takes a Class<?>, and ask for the new classes explicitly. 2172 * 2173 * Similarly, PL/Java up through 1.5.0 has always returned a String 2174 * from getObject for a PostgreSQL xml type. Here, the JDBC standard 2175 * provides that a SQLXML object should be returned, and that should 2176 * happen in a future major PL/Java release, but for now, the plain 2177 * getObject will still return String, so it is also necessary to 2178 * ask for the SQLXML type explicitly. In fact, we will ask for 2179 * XdmNode, as it might be referred to more than once (if a 2180 * parameter), and a SQLXML can't be read more than once, nor would 2181 * there be any sense in building an XdmNode from it more than once. 2182 */ 2183 switch ( typeJDBC() ) 2184 { 2185 case Types.DATE: 2186 return setValueJDBC(implValueJDBC(LocalDate.class)); 2187 case Types.TIME: 2188 return setValueJDBC(implValueJDBC(LocalTime.class)); 2189 case Types.TIME_WITH_TIMEZONE: 2190 return setValueJDBC(implValueJDBC(OffsetTime.class)); 2191 case Types.TIMESTAMP: 2192 return setValueJDBC(implValueJDBC(LocalDateTime.class)); 2193 case Types.TIMESTAMP_WITH_TIMEZONE: 2194 return setValueJDBC(implValueJDBC(OffsetDateTime.class)); 2195 case Types.SQLXML: 2196 return setValueJDBC(implValueJDBC(XdmNode.class)); 2197 default: 2198 } 2199 return setValueJDBC(implValueJDBC()); 2200 } 2201 2202 boolean knownNonNull() throws SQLException 2203 { 2204 if ( null != m_knownNonNull ) 2205 return m_knownNonNull; 2206 return m_knownNonNull = implKnownNonNull(); 2207 } 2208 2209 int scale() throws SQLException 2210 { 2211 if ( null != m_scale ) 2212 return m_scale; 2213 return m_scale = implScale(); 2214 } 2215 2216 static class ContextItem extends Binding 2217 { 2218 /** 2219 * Return the XML Schema type of this input binding for a context 2220 * item. 2221 *<p> 2222 * Because it is based on {@code determinXQueryFormalType}, this 2223 * method is not parameterized by {@code XMLBinary}, and will always 2224 * map a binary-string SQL type to {@code xs:hexBinary}. 2225 */ 2226 ItemType typeXS() throws SQLException 2227 { 2228 if ( null != m_typeXS ) 2229 return m_typeXS; 2230 SequenceType st = implTypeXS(true); 2231 assert OccurrenceIndicator.ONE == st.getOccurrenceIndicator(); 2232 return m_typeXS = st.getItemType(); 2233 } 2234 2235 protected ItemType m_typeXS; 2236 } 2237 2238 static class Parameter extends Binding 2239 { 2240 String name() 2241 { 2242 return m_name; 2243 } 2244 2245 SequenceType typeXS() throws SQLException 2246 { 2247 if ( null != m_typeXS ) 2248 return m_typeXS; 2249 return m_typeXS = implTypeXS(false); 2250 } 2251 2252 /** 2253 * Return the XML Schema type collapsed according to the Syntax Rule 2254 * deriving {@code XT} for {@code XMLCAST}. 2255 *<p> 2256 * The intent of the rule is unclear, but it involves collapsing 2257 * certain sets of more-specific types that {@code typeXS} might 2258 * return into common supertypes, for use only in an intermediate 2259 * step of {@code xmlCastAsNonXML}. Unlike {@code typeXS}, this 2260 * method must be passed an {@code XMLBinary} parameter reflecting 2261 * the hex/base64 choice currently in scope. 2262 * @param enc whether to use {@code xs:hexBinary} or 2263 * {@code xs:base64Binary} as the XML Schema type corresponding to a 2264 * binary-string SQL type. 2265 */ 2266 ItemType typeXT(XMLBinary enc) throws SQLException 2267 { 2268 throw new UnsupportedOperationException( 2269 "typeXT() on synthetic binding"); 2270 } 2271 2272 /** 2273 * Memoize and return a casting function from a given 2274 * {@code ItemType} to the type of this parameter. 2275 *<p> 2276 * Used only by {@code xmlCastAsNonXML}, which does all the work 2277 * of constructing the function; this merely allows it to be 2278 * remembered, if many casts to the same output parameter will be 2279 * made (as by {@code xmltable}). 2280 */ 2281 CastingFunction atomicCaster(ItemType it, CasterSupplier s) 2282 throws SQLException, XPathException 2283 { 2284 throw new UnsupportedOperationException( 2285 "atomicCaster() on synthetic binding"); 2286 } 2287 2288 protected SequenceType m_typeXS; 2289 2290 private final String m_name; 2291 2292 /** 2293 * @param name The SQL name of the parameter 2294 * @param checkName True if the name must be a valid NCName (as for 2295 * an input parameter from SQL to the XML query context), or false 2296 * if the name doesn't matter (as when it describes a result, or the 2297 * sole input value of an XMLCAST. 2298 * @throws SQLException if the name of a checked input parameter 2299 * isn't a valid NCName. 2300 */ 2301 protected Parameter(String name, boolean checkName) 2302 throws SQLException 2303 { 2304 if ( checkName && ! isValidNCName(name) ) 2305 throw new SQLSyntaxErrorException( 2306 "Not an XML NCname: \"" + name + '"', "42602"); 2307 m_name = name; 2308 } 2309 } 2310 2311 protected String m_typePG; 2312 protected Integer m_typeJDBC; 2313 protected Boolean m_knownNonNull; 2314 protected Integer m_scale; 2315 private Object m_valueJDBC; 2316 private boolean m_valueJDBCValid; 2317 protected Object setValueJDBC(Object v) 2318 { 2319 m_valueJDBCValid = true; 2320 return m_valueJDBC = v; 2321 } 2322 2323 protected String implTypePG() throws SQLException 2324 { 2325 throw new UnsupportedOperationException( 2326 "typePG() on synthetic binding"); 2327 } 2328 2329 protected int implTypeJDBC() throws SQLException 2330 { 2331 throw new UnsupportedOperationException( 2332 "typeJDBC() on synthetic binding"); 2333 } 2334 2335 protected boolean implKnownNonNull() throws SQLException 2336 { 2337 throw new UnsupportedOperationException( 2338 "knownNonNull() on synthetic binding"); 2339 } 2340 2341 protected int implScale() throws SQLException 2342 { 2343 throw new UnsupportedOperationException( 2344 "scale() on synthetic binding"); 2345 } 2346 2347 protected Object implValueJDBC() throws SQLException 2348 { 2349 throw new UnsupportedOperationException( 2350 "valueJDBC() on synthetic binding"); 2351 } 2352 2353 /* 2354 * This implementation just forwards to the type-less version, then 2355 * fails if that did not return the wanted type. Override if a smarter 2356 * behavior is possible. 2357 */ 2358 protected <T> T implValueJDBC(Class<T> type) throws SQLException 2359 { 2360 return type.cast(implValueJDBC()); 2361 } 2362 2363 protected SequenceType implTypeXS(boolean forContextItem) 2364 throws SQLException 2365 { 2366 return determineXQueryFormalType(this, forContextItem); 2367 } 2368 2369 static class Assemblage implements Iterable<Parameter> 2370 { 2371 ContextItem contextItem() { return m_contextItem; } 2372 2373 @Override 2374 public Iterator<Parameter> iterator() 2375 { 2376 return m_params.iterator(); 2377 } 2378 2379 protected ContextItem m_contextItem; 2380 protected Collection<Parameter> m_params = Collections.emptyList(); 2381 } 2382 } 2383 2384 static class BindingsFromResultSet extends Binding.Assemblage 2385 { 2386 /** 2387 * Construct the bindings from a ResultSet representing input parameters 2388 * to an XML query. 2389 * @param rs ResultSet representing the input parameters. Column names 2390 * "." and "?COLUMN?" are treated specially, and used to supply the 2391 * query's context item; every other column name must be a valid NCName, 2392 * and neither any named parameter nor the context item may be mentioned 2393 * more than once. 2394 * @param checkNames True if the input parameter names matter (a name of 2395 * "." or "?COLUMN?" will define the context item, and any other name 2396 * must be a valid NCName); false to skip such checking (as for the 2397 * single input value to XMLCAST, whose name doesn't matter). 2398 * @throws SQLException if names are duplicated or invalid. 2399 */ 2400 BindingsFromResultSet(ResultSet rs, boolean checkNames) 2401 throws SQLException 2402 { 2403 m_resultSet = rs; 2404 m_rsmd = rs.getMetaData(); 2405 2406 int nParams = m_rsmd.getColumnCount(); 2407 ContextItem contextItem = null; 2408 Map<String,Binding.Parameter> n2b = new HashMap<>(); 2409 2410 if ( 0 < nParams ) 2411 m_dBuilder = s_s9p.newDocumentBuilder(); 2412 2413 for ( int i = 1; i <= nParams; ++i ) 2414 { 2415 String label = m_rsmd.getColumnLabel(i); 2416 if ( checkNames && 2417 ("?COLUMN?".equals(label) || ".".equals(label)) ) 2418 { 2419 if ( null != contextItem ) 2420 throw new SQLSyntaxErrorException( 2421 "Context item supplied more than once (at " + 2422 contextItem.m_idx + " and " + i + ')', "42712"); 2423 contextItem = new ContextItem(i); 2424 continue; 2425 } 2426 2427 Parameter was = 2428 (Parameter)n2b.put( 2429 label, new Parameter(label, i, checkNames)); 2430 if ( null != was ) 2431 throw new SQLSyntaxErrorException( 2432 "Name \"" + label + "\" duplicated at positions " + 2433 was.m_idx + " and " + i, "42712"); 2434 } 2435 2436 m_contextItem = contextItem; 2437 m_params = n2b.values(); 2438 } 2439 2440 /** 2441 * Construct the bindings from a ResultSet representing output 2442 * parameters (as from XMLTABLE). 2443 * @param rs ResultSet representing the result parameters. Names have 2444 * no particular significance and are not subject to any checks. 2445 * @param exprs Compiled evaluators for the supplied column expressions. 2446 * The number of these must match the number of columns in {@code rs}. 2447 * One of these (and no more than one; the caller will have enforced 2448 * that) is allowed to be null, making the corresponding column 2449 * "FOR ORDINALITY". An ordinality column will be checked to ensure it 2450 * has an SQL type that is (ahem) "exact numeric with scale 0 (zero)." 2451 * May be null if this is some other general-purpose output result set, 2452 * not for an XMLTABLE. 2453 * @throws SQLException if numbers of columns and expressions don't 2454 * match, or there is an ordinality column and its type is not suitable. 2455 */ 2456 @SuppressWarnings("fallthrough") 2457 BindingsFromResultSet(ResultSet rs, XQueryEvaluator[] exprs) 2458 throws SQLException 2459 { 2460 m_resultSet = rs; 2461 m_rsmd = rs.getMetaData(); 2462 2463 int nParams = m_rsmd.getColumnCount(); 2464 if ( null != exprs && nParams != exprs.length ) 2465 throw new SQLSyntaxErrorException( 2466 "Not as many supplied column expressions as output columns", 2467 "42611"); 2468 2469 Binding.Parameter[] ps = new Binding.Parameter[ nParams ]; 2470 2471 for ( int i = 1; i <= nParams; ++i ) 2472 { 2473 String label = m_rsmd.getColumnLabel(i); 2474 Parameter p = new Parameter(label, i, false); 2475 ps [ i - 1 ] = p; 2476 if ( null != exprs && null == exprs [ i - 1 ] ) 2477 { 2478 switch ( p.typeJDBC() ) 2479 { 2480 case Types.INTEGER: 2481 case Types.SMALLINT: 2482 case Types.BIGINT: 2483 break; 2484 case Types.NUMERIC: 2485 case Types.DECIMAL: 2486 int scale = p.scale(); 2487 if ( 0 == scale || -1 == scale ) 2488 break; 2489 /*FALLTHROUGH*/ 2490 default: 2491 throw new SQLSyntaxErrorException( 2492 "Column FOR ORDINALITY must have an exact numeric" + 2493 " type with scale zero.", "42611"); 2494 } 2495 } 2496 } 2497 2498 m_params = asList(ps); 2499 } 2500 2501 private ResultSet m_resultSet; 2502 private ResultSetMetaData m_rsmd; 2503 DocumentBuilder m_dBuilder; 2504 2505 <T> T typedValueAtIndex(int idx, Class<T> type) throws SQLException 2506 { 2507 if ( XdmNode.class != type ) 2508 return m_resultSet.getObject(idx, type); 2509 try 2510 { 2511 SQLXML sx = m_resultSet.getObject(idx, SQLXML.class); 2512 return type.cast( 2513 m_dBuilder.build(sx.getSource((Class<Source>)null))); 2514 } 2515 catch ( SaxonApiException e ) 2516 { 2517 throw new SQLException(e.getMessage(), "10000", e); 2518 } 2519 } 2520 2521 class ContextItem extends Binding.ContextItem 2522 { 2523 final int m_idx; 2524 2525 ContextItem(int index) { m_idx = index; } 2526 2527 protected String implTypePG() throws SQLException 2528 { 2529 return m_rsmd.getColumnTypeName(m_idx); 2530 } 2531 2532 protected int implTypeJDBC() throws SQLException 2533 { 2534 return m_rsmd.getColumnType(m_idx); 2535 } 2536 2537 protected int implScale() throws SQLException 2538 { 2539 return m_rsmd.getScale(m_idx); 2540 } 2541 2542 protected Object implValueJDBC() throws SQLException 2543 { 2544 return m_resultSet.getObject(m_idx); 2545 } 2546 2547 protected <T> T implValueJDBC(Class<T> type) throws SQLException 2548 { 2549 return typedValueAtIndex(m_idx, type); 2550 } 2551 } 2552 2553 class Parameter extends Binding.Parameter 2554 { 2555 final int m_idx; 2556 private ItemType m_typeXT; 2557 private CastingFunction m_atomCaster; 2558 private ItemType m_lastCastFrom; 2559 2560 Parameter(String name, int index, boolean isInput) 2561 throws SQLException 2562 { 2563 super(name, isInput); 2564 m_idx = index; 2565 } 2566 2567 @Override 2568 ItemType typeXT(XMLBinary enc) throws SQLException 2569 { 2570 if ( null != m_typeXT ) 2571 return m_typeXT; 2572 2573 ItemType it = 2574 mapSQLDataTypeToXMLSchemaDataType(this, enc, Nulls.ABSENT); 2575 if ( ! ItemType.ANY_ATOMIC_VALUE.subsumes(it) ) 2576 return m_typeXT = it; 2577 2578 if ( it.equals(ItemType.INTEGER) ) 2579 { 2580 int tj = typeJDBC(); 2581 if ( Types.NUMERIC == tj || Types.DECIMAL == tj ) 2582 it = ItemType.DECIMAL; 2583 } 2584 else if ( ItemType.INTEGER.subsumes(it) ) 2585 it = ItemType.INTEGER; 2586 else if ( ItemType.FLOAT.subsumes(it) ) 2587 it = ItemType.DOUBLE; 2588 else if ( ItemType.DATE_TIME_STAMP.subsumes(it) ) 2589 it = ItemType.DATE_TIME; 2590 2591 return m_typeXT = it; 2592 } 2593 2594 @Override 2595 CastingFunction atomicCaster(ItemType it, CasterSupplier s) 2596 throws SQLException, XPathException 2597 { 2598 if ( null == m_atomCaster || ! it.equals(m_lastCastFrom) ) 2599 { 2600 m_atomCaster = s.get(); 2601 m_lastCastFrom = it; 2602 } 2603 return m_atomCaster; 2604 } 2605 2606 protected String implTypePG() throws SQLException 2607 { 2608 return m_rsmd.getColumnTypeName(m_idx); 2609 } 2610 2611 protected int implTypeJDBC() throws SQLException 2612 { 2613 return m_rsmd.getColumnType(m_idx); 2614 } 2615 2616 protected boolean implKnownNonNull() throws SQLException 2617 { 2618 return columnNoNulls == m_rsmd.isNullable(m_idx); 2619 } 2620 2621 protected int implScale() throws SQLException 2622 { 2623 return m_rsmd.getScale(m_idx); 2624 } 2625 2626 protected Object implValueJDBC() throws SQLException 2627 { 2628 return m_resultSet.getObject(m_idx); 2629 } 2630 2631 protected <T> T implValueJDBC(Class<T> type) throws SQLException 2632 { 2633 return typedValueAtIndex(m_idx, type); 2634 } 2635 } 2636 } 2637 2638 static class BindingsFromXQX extends Binding.Assemblage 2639 { 2640 /** 2641 * Construct a new assemblage of bindings for the static context of an 2642 * XMLTABLE column expression. It will have the same named-parameter 2643 * bindings passed to the row expression, but the static type of the 2644 * context item will be the result type of the row expression. The 2645 * {@code ContextItem} in this assemblage will have no associated value; 2646 * the caller is responsible for retrieving that from the row evaluator 2647 * and storing it in the column expression context every iteration. 2648 * @param xqx The result of compiling the row expression; its 2649 * compiler-determined static result type will be used as the static 2650 * context item type. 2651 * @param params The bindings supplied to the row expression. Its named 2652 * parameters will be copied as the named parameters here. 2653 */ 2654 BindingsFromXQX(XQueryExecutable xqx, Binding.Assemblage params) 2655 { 2656 m_params = params.m_params; 2657 m_contextItem = new ContextItem(xqx.getResultItemType()); 2658 } 2659 2660 static class ContextItem extends Binding.ContextItem 2661 { 2662 ContextItem(ItemType it) 2663 { 2664 m_typeXS = it; 2665 /* 2666 * There needs to be a dummy JDBC type to return when queried 2667 * for purposes of assertCanCastAsXmlSequence. It can literally 2668 * be any type outside of the few that method rejects. Because 2669 * the XS type is already known, nothing else will need to ask 2670 * for this, or care. 2671 */ 2672 m_typeJDBC = Types.OTHER; 2673 } 2674 } 2675 } 2676 2677 /* 2678 * The XQuery-regular-expression-based functions added in 9075-2:2006. 2679 * 2680 * For each function below, a parameter is marked //strict if the spec 2681 * explicitly says the result is NULL when that parameter is NULL. The 2682 * parameters not marked //strict (including the non-standard w3cNewlines 2683 * added here) all have non-null defaults, so by executive decision, these 2684 * functions will all get the onNullInput=RETURNS_NULL treatment, so none of 2685 * the null-checking has to be done here. At worst, that may result in a 2686 * mystery NULL return rather than an error, if someone explicitly passes 2687 * NULL to one of the parameters with a non-null default. 2688 */ 2689 2690 /* 2691 * Check valid range of 'from' and supported 'usingOctets'. 2692 * 2693 * Every specified function that has a start position FROM and a USING 2694 * clause starts with a check that the start position is in range. This 2695 * function factors out that test, returning true if the start position is 2696 * /out of range/ (triggering the caller to return the special result 2697 * defined for that case), returning false if the value is in range, or 2698 * throwing an exception if the length unit specified in the USING clause 2699 * isn't supported. 2700 */ 2701 private static boolean usingAndLengthCheck( 2702 String in, int from, boolean usingOctets, String function) 2703 throws SQLException 2704 { 2705 if ( usingOctets ) 2706 throw new SQLFeatureNotSupportedException( 2707 '"' + function + "\" does not yet support USING OCTETS", 2708 "0A000"); 2709 return ( 1 > from || from > getStringLength(in) ); 2710 } 2711 2712 private static void newlinesCheck(boolean w3cNewlines, String function) 2713 throws SQLException 2714 { 2715 if ( ! w3cNewlines ) 2716 throw new SQLFeatureNotSupportedException( 2717 '"' + function + "\" does not yet support the ISO SQL newline" + 2718 " conventions, only the original W3C XQuery ones" + 2719 " (HINT: pass w3cNewlines => true)", "0A000"); 2720 } 2721 2722 private static RegularExpression compileRE(String pattern, String flags) 2723 throws SQLException 2724 { 2725 try 2726 { 2727 return s_s9p.getUnderlyingConfiguration() 2728 .compileRegularExpression(pattern, flags, "XP30", null); 2729 } 2730 catch ( XPathException e ) 2731 { 2732 if ( NamespaceConstant.ERR.equals(e.getErrorCodeNamespace()) ) 2733 { 2734 if ( "FORX0001".equals(e.getErrorCodeLocalPart()) ) 2735 throw new SQLDataException( 2736 "invalid XQuery option flag", "2201T", e); 2737 if ( "FORX0002".equals(e.getErrorCodeLocalPart()) ) 2738 throw new SQLDataException( 2739 "invalid XQuery regular expression", "2201S", e); 2740 } 2741 throw new SQLException( 2742 "compiling XQuery regular expression: " + e.getMessage(), e); 2743 } 2744 } 2745 2746 private static CharSequence replace( 2747 RegularExpression re, CharSequence in, CharSequence with) 2748 throws SQLException 2749 { 2750 /* 2751 * Report the standard-mandated error if replacing a zero-length match. 2752 * Strictly speaking, this is a test of the length of the match, not of 2753 * the input string. Here, though, this private method is only called by 2754 * translate_regex, which always passes only the portion of the input 2755 * string that matched, so the test is equivalent. 2756 * As to why the SQL committee would make such a point of disallowing 2757 * replacement of a zero-length match, that's a good question. See 2758 * s_intervalSignSite in this very file for an example where replacing 2759 * a zero-length match is just what's wanted. (But that pattern relies 2760 * on lookahead/lookbehind operators, which XQuery regular expressions 2761 * don't have.) 2762 * When the underlying library is Saxon, there is an Easter egg: if a 2763 * regular expression is compiled with a 'flags' string ending in ";j", 2764 * a Java regular expression is produced instead of an XQuery one (with 2765 * standards conformance cast to the wind). That can be detected with 2766 * getFlags() on the regular expression: not looking for ";j", because 2767 * that has been stripped out, but for "d" which is a Java regex flag 2768 * that Saxon sets by default, and is not a valid XQuery regex flag. 2769 * If the caller has used Saxon's Easter egg to get a Java regex, here 2770 * is another Easter egg to go with it, allowing zero-length matches 2771 * to be replaced if that's what the caller wants to do. 2772 */ 2773 if ( 0 == in.length() && ! re.getFlags().contains("d") ) 2774 throw new SQLDataException( 2775 "attempt to replace a zero-length string", "2201U"); 2776 try 2777 { 2778 return re.replace(in, with); 2779 } 2780 catch ( XPathException e ) 2781 { 2782 if ( NamespaceConstant.ERR.equals(e.getErrorCodeNamespace()) ) 2783 { 2784 if ( "FORX0003".equals(e.getErrorCodeLocalPart()) ) 2785 throw new SQLDataException( 2786 "attempt to replace a zero-length string", "2201U", e); 2787 if ( "FORX0004".equals(e.getErrorCodeLocalPart()) ) 2788 throw new SQLDataException( 2789 "invalid XQuery replacement string", "2201V", e); 2790 } 2791 throw new SQLException( 2792 "replacing regular expression match: " + e.getMessage(), e); 2793 } 2794 } 2795 2796 interface MatchVector 2797 { 2798 int groups(); 2799 int position(int group); 2800 int length(int group); 2801 } 2802 2803 interface ListOfMatchVectors 2804 { 2805 /** 2806 * Return the MatchVector for one occurrence of a match. 2807 *<p> 2808 * Any previously-returned MatchVector is invalid after another get. 2809 * In multiple calls to get, the occurrence parameter must be strictly 2810 * increasing. 2811 * After get has returned null, it should not be called again. 2812 */ 2813 MatchVector get(int occurrence) throws SQLException; 2814 void close(); 2815 } 2816 2817 static class LOMV 2818 implements ListOfMatchVectors, MatchVector, RegexIterator.MatchHandler 2819 { 2820 private RegexIterator m_ri; 2821 private int m_pos; 2822 private int m_occurrence; 2823 2824 LOMV(int startPos, RegexIterator ri) 2825 { 2826 m_ri = ri; 2827 m_pos = startPos; 2828 } 2829 2830 static ListOfMatchVectors of( 2831 String pattern, String flags, String in, int from) 2832 throws SQLException 2833 { 2834 RegularExpression re = compileRE(pattern, flags); 2835 return of(re, in, from); 2836 } 2837 2838 static ListOfMatchVectors of(RegularExpression re, String in, int from) 2839 { 2840 RegexIterator ri = 2841 re.analyze(in.substring(in.offsetByCodePoints(0, from - 1))); 2842 return new LOMV(from, ri); 2843 } 2844 2845 private int[] m_begPositions; 2846 private int[] m_endPositions; 2847 2848 @Override // ListOfMatchVectors 2849 public MatchVector get(int occurrence) throws SQLException 2850 { 2851 try 2852 { 2853 StringValue sv; 2854 for ( ;; ) 2855 { 2856 sv = m_ri.next(); 2857 if ( null == sv ) 2858 return null; 2859 if ( m_ri.isMatching() ) 2860 if ( ++ m_occurrence == occurrence ) 2861 break; 2862 m_pos += sv.getStringLength(); 2863 } 2864 2865 if ( null == m_begPositions ) 2866 { 2867 int groups = m_ri.getNumberOfGroups(); 2868 /* 2869 * Saxon's Apache-derived XQuery engine will report a number 2870 * of groups counting $0 (so it will be 1 even if no capture 2871 * groups were defined in the expression). In contrast, the 2872 * Java regex engine that you get with the Saxon ";j" Easter 2873 * egg does not count $0 (so arrays need groups+1 entries). 2874 * It's hard to tell from here which flavor was used, plus 2875 * the Saxon behavior might change some day, so just spend 2876 * the extra + 1 every time. 2877 */ 2878 m_begPositions = new int [ groups + 1 ]; 2879 m_endPositions = new int [ groups + 1 ]; 2880 } 2881 2882 m_begPositions [ 0 ] = m_pos; 2883 2884 fill(m_begPositions, 1, m_begPositions.length, 0); 2885 fill(m_endPositions, 1, m_endPositions.length, 0); 2886 m_ri.processMatchingSubstring(this); 2887 2888 m_endPositions [ 0 ] = m_pos; 2889 2890 return this; 2891 } 2892 catch ( XPathException e ) 2893 { 2894 throw new SQLException( 2895 "evaluating XQuery regular expression: " + e.getMessage(), 2896 e); 2897 } 2898 } 2899 2900 @Override 2901 public void close() 2902 { 2903 m_ri.close(); 2904 } 2905 2906 @Override // MatchVector 2907 public int groups() 2908 { 2909 return m_begPositions.length - 1; 2910 } 2911 2912 @Override 2913 public int position(int groupNumber) 2914 { 2915 return m_begPositions [ groupNumber ]; 2916 } 2917 2918 @Override 2919 public int length(int groupNumber) 2920 { 2921 return 2922 m_endPositions [ groupNumber ] - m_begPositions [ groupNumber ]; 2923 } 2924 2925 @Override // MatchHandler 2926 public void characters(CharSequence s) 2927 { 2928 m_pos += getStringLength(s); 2929 } 2930 2931 @Override 2932 public void onGroupStart(int groupNumber) 2933 { 2934 m_begPositions [ groupNumber ] = m_pos; 2935 } 2936 2937 @Override 2938 public void onGroupEnd(int groupNumber) 2939 { 2940 m_endPositions [ groupNumber ] = m_pos; 2941 } 2942 } 2943 2944 /** 2945 * Function form of the ISO SQL 2946 * <a id='like_regex'>{@code <regex like predicate>}</a>. 2947 *<p> 2948 * Rewrite the standard form 2949 *<pre> 2950 * value LIKE_REGEX pattern FLAG flags 2951 *</pre> 2952 * into this form: 2953 *<pre> 2954 * like_regex(value, pattern, flag => flags) 2955 *</pre> 2956 * where the {@code flag} parameter defaults to no flags if omitted. 2957 *<p> 2958 * The SQL standard specifies that pattern elements sensitive to newlines 2959 * (namely {@code ^}, {@code $}, {@code \s}, {@code \S}, and {@code .}) are 2960 * to support the various representations of newline set out in 2961 * <a href='http://www.unicode.org/reports/tr18/#RL1.6'>Unicode Technical 2962 * Standard #18, RL1.6</a>. That behavior differs from the standard W3C 2963 * XQuery newline handling, as described for 2964 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>the flags 2965 * {@code m} and {@code s}</a> and for 2966 * <a href='https://www.w3.org/TR/xmlschema11-2/#cces-mce'>the 2967 * multicharacter escapes {@code \s} and {@code \S}</a>. As an extension to 2968 * ISO SQL, passing {@code w3cNewlines => true} requests the standard W3C 2969 * XQuery behavior rather than the UTS#18 behevior for newlines. If the 2970 * underlying XQuery library only provides the W3C behavior, calls without 2971 * {@code w3cNewlines => true} will throw exceptions. 2972 * @param value The string to be tested against the pattern. 2973 * @param pattern The XQuery regular expression. 2974 * @param flag Optional string of 2975 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 2976 * the regular expression behavior</a>. 2977 * @param w3cNewlines Pass true to allow the regular expression to recognize 2978 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 2979 * @return True if the supplied value matches the pattern. Null if any 2980 * parameter is null. 2981 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 2982 * expression is invalid, 2201T if the flags string is invalid; 2983 * SQLFeatureNotSupportedException (0A000) if (in the current 2984 * implementation) w3cNewlines is false or omitted. 2985 */ 2986 @Function(schema="javatest") 2987 public static boolean like_regex( 2988 String value, //strict 2989 String pattern, //strict 2990 @SQLType(defaultValue="") String flag, //strict 2991 @SQLType(defaultValue="false") boolean w3cNewlines 2992 ) 2993 throws SQLException 2994 { 2995 newlinesCheck(w3cNewlines, "like_regex"); 2996 return compileRE(pattern, flag).containsMatch(value); 2997 } 2998 2999 /** 3000 * Syntax-sugar-free form of the ISO SQL 3001 * <a id='occurrences_regex'>{@code OCCURRENCES_REGEX}</a> function: 3002 * how many times does a pattern occur in a string? 3003 *<p> 3004 * Rewrite the standard form 3005 *<pre> 3006 * OCCURRENCES_REGEX(pattern FLAG flags IN str FROM position USING units) 3007 *</pre> 3008 * into this form: 3009 *<pre> 3010 * occurrences_regex(pattern, flag => flags, "in" => str, 3011 * "from" => position, usingOctets => true|false) 3012 *</pre> 3013 * where all of the named parameters are optional except pattern and "in", 3014 * and the standard {@code USING CHARACTERS} becomes 3015 * {@code usingOctets => false}, which is the default, and 3016 * {@code USING OCTETS} becomes {@code usingOctets => true}. See also 3017 * {@link #like_regex like_regex} regarding the {@code w3cNewlines} 3018 * parameter. 3019 * @param pattern XQuery regular expression to seek in the input string. 3020 * @param in The input string. 3021 * @param flag Optional string of 3022 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 3023 * the regular expression behavior</a>. 3024 * @param from Starting position in the input string, 1 by default. 3025 * @param usingOctets Whether position is counted in characters (actual 3026 * Unicode characters, not any smaller encoded unit, not even Java char), 3027 * which is the default, or (when true) in octets of the string's encoded 3028 * form. 3029 * @param w3cNewlines Pass true to allow the regular expression to recognize 3030 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3031 * @return The number of occurrences of the pattern in the input string, 3032 * starting from the specified position. Null if any parameter is null; -1 3033 * if the start position is less than 1 or beyond the end of the string. 3034 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3035 * expression is invalid, 2201T if the flags string is invalid; 3036 * SQLFeatureNotSupportedException (0A000) if (in the current 3037 * implementation) usingOctets is true, or w3cNewlines is false or omitted. 3038 */ 3039 @Function(schema="javatest") 3040 public static int occurrences_regex( 3041 String pattern, //strict 3042 @SQLType(name="\"in\"") String in, //strict 3043 @SQLType(defaultValue="") String flag, //strict 3044 @SQLType(name="\"from\"", defaultValue="1") int from, 3045 @SQLType(defaultValue="false") boolean usingOctets, 3046 @SQLType(defaultValue="false") boolean w3cNewlines 3047 ) 3048 throws SQLException 3049 { 3050 if ( usingAndLengthCheck(in, from, usingOctets, "occurrences_regex") ) 3051 return -1; // note: not the same as in position_regex! 3052 newlinesCheck(w3cNewlines, "occurrences_regex"); 3053 3054 ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from); 3055 3056 for ( int i = 1 ;; ++ i ) 3057 if ( null == lomv.get(i) ) 3058 return i - 1; 3059 } 3060 3061 /** 3062 * Syntax-sugar-free form of the ISO SQL 3063 * <a id='position_regex'>{@code POSITION_REGEX}</a> function: 3064 * where does a pattern, or part of it, occur in a string? 3065 *<p> 3066 * Rewrite the standard forms 3067 *<pre> 3068 * POSITION_REGEX(START pattern FLAG flags IN str FROM position 3069 * OCCURRENCE n GROUP m) 3070 * POSITION_REGEX(AFTER pattern FLAG flags IN str FROM position 3071 * OCCURRENCE n GROUP m) 3072 *</pre> 3073 * into these forms, respectively: 3074 *<pre> 3075 * position_regex(pattern, flag => flags, "in" => str, 3076 * "from" => position, occurrence => n, 3077 * "group" => m) 3078 * position_regex(pattern, flag => flags, "in" => str, 3079 * "from" => position, occurrence => n, 3080 * "group" => m, after => true) 3081 *</pre> 3082 * where all of the named parameters are optional except pattern and "in". 3083 * See also {@link #occurrences_regex occurrences_regex} regarding the 3084 * {@code usingOctets} parameter, and {@link #like_regex like_regex} 3085 * regarding {@code w3cNewlines}. 3086 * @param pattern XQuery regular expression to seek in the input string. 3087 * @param in The input string. 3088 * @param flag Optional string of 3089 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 3090 * the regular expression behavior</a>. 3091 * @param from Starting position in the input string, 1 by default. 3092 * @param usingOctets Whether position is counted in characters (actual 3093 * Unicode characters, not any smaller encoded unit, not even Java char), 3094 * which is the default, or (when true) in octets of the string's encoded 3095 * form. 3096 * @param after Whether to return the position where the match starts 3097 * (when false, the default), or just after the match ends (when true). 3098 * @param occurrence If specified as an integer n (default 1), returns the 3099 * position starting (or after) the nth match of the pattern in the string. 3100 * @param group If zero (the default), returns the position starting (or 3101 * after) the match of the whole pattern overall, otherwise if an integer m, 3102 * the position starting or after the mth parenthesized group in (the nth 3103 * occurrence of) the pattern. 3104 * @param w3cNewlines Pass true to allow the regular expression to recognize 3105 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3106 * @return The position, in the specified units, starting or just after, 3107 * the nth occurrence (or mth capturing group of the nth occurrence) of the 3108 * pattern in the input string, starting from the specified position. Null 3109 * if any parameter is null; zero if the start position is less than 1 or 3110 * beyond the end of the string, if occurrence is less than 1 or greater 3111 * than the number of matches, or if group is less than zero or greater than 3112 * the number of parenthesized capturing groups in the pattern. 3113 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3114 * expression is invalid, 2201T if the flags string is invalid; 3115 * SQLFeatureNotSupportedException (0A000) if (in the current 3116 * implementation) usingOctets is true, or w3cNewlines is false or omitted. 3117 */ 3118 @Function(schema="javatest") 3119 public static int position_regex( 3120 String pattern, //strict 3121 @SQLType(name="\"in\"") String in, //strict 3122 @SQLType(defaultValue="") String flag, //strict 3123 @SQLType(name="\"from\"", defaultValue="1") int from, 3124 @SQLType(defaultValue="false") boolean usingOctets, 3125 @SQLType(defaultValue="false") boolean after, 3126 @SQLType(defaultValue="1") int occurrence, //strict 3127 @SQLType(name="\"group\"", defaultValue="0") int group, //strict 3128 @SQLType(defaultValue="false") boolean w3cNewlines 3129 ) 3130 throws SQLException 3131 { 3132 if ( 1 > occurrence ) 3133 return 0; 3134 if ( 0 > group ) // test group > ngroups after compiling regex 3135 return 0; 3136 if ( usingAndLengthCheck(in, from, usingOctets, "position_regex") ) 3137 return 0; // note: not the same as in occurrences_regex! 3138 newlinesCheck(w3cNewlines, "position_regex"); 3139 3140 ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from); 3141 3142 MatchVector mv = lomv.get(occurrence); 3143 if ( null == mv || mv.groups() < group ) 3144 return 0; 3145 3146 return mv.position(group) + (after ? mv.length(group) : 0); 3147 } 3148 3149 /** 3150 * Syntax-sugar-free form of the ISO SQL 3151 * <a id='substring_regex'>{@code SUBSTRING_REGEX}</a> function: 3152 * return a substring specified by a pattern match in a string. 3153 *<p> 3154 * Rewrite the standard form 3155 *<pre> 3156 * SUBSTRING_REGEX(pattern FLAG flags IN str FROM position 3157 * OCCURRENCE n GROUP m) 3158 *</pre> 3159 * into this form: 3160 *<pre> 3161 * substring_regex(pattern, flag => flags, "in" => str, 3162 * "from" => position, occurrence => n, 3163 * "group" => m) 3164 *</pre> 3165 * where all of the named parameters are optional except pattern and "in". 3166 * See also {@link #position_regex position_regex} regarding the 3167 * {@code occurrence} and {@code "group"} parameters, 3168 * {@link #occurrences_regex occurrences_regex} regarding 3169 * {@code usingOctets}, and {@link #like_regex like_regex} 3170 * regarding {@code w3cNewlines}. 3171 * @param pattern XQuery regular expression to seek in the input string. 3172 * @param in The input string. 3173 * @param flag Optional string of 3174 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 3175 * the regular expression behavior</a>. 3176 * @param from Starting position in the input string, 1 by default. 3177 * @param usingOctets Whether position is counted in characters (actual 3178 * Unicode characters, not any smaller encoded unit, not even Java char), 3179 * which is the default, or (when true) in octets of the string's encoded 3180 * form. 3181 * @param occurrence If specified as an integer n (default 1), returns the 3182 * nth match of the pattern in the string. 3183 * @param group If zero (the default), returns the match of the whole 3184 * pattern overall, otherwise if an integer m, the match of the mth 3185 * parenthesized group in (the nth occurrence of) the pattern. 3186 * @param w3cNewlines Pass true to allow the regular expression to recognize 3187 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3188 * @return The substring matching the nth occurrence (or mth capturing group 3189 * of the nth occurrence) of the pattern in the input string, starting from 3190 * the specified position. Null if any parameter is null, if the start 3191 * position is less than 1 or beyond the end of the string, if occurrence is 3192 * less than 1 or greater than the number of matches, or if group is less 3193 * than zero or greater than the number of parenthesized capturing groups in 3194 * the pattern. 3195 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3196 * expression is invalid, 2201T if the flags string is invalid; 3197 * SQLFeatureNotSupportedException (0A000) if (in the current 3198 * implementation) usingOctets is true, or w3cNewlines is false or omitted. 3199 */ 3200 @Function(schema="javatest") 3201 public static String substring_regex( 3202 String pattern, //strict 3203 @SQLType(name="\"in\"") String in, //strict 3204 @SQLType(defaultValue="") String flag, //strict 3205 @SQLType(name="\"from\"", defaultValue="1") int from, 3206 @SQLType(defaultValue="false") boolean usingOctets, 3207 @SQLType(defaultValue="1") int occurrence, //strict 3208 @SQLType(name="\"group\"", defaultValue="0") int group, //strict 3209 @SQLType(defaultValue="false") boolean w3cNewlines 3210 ) 3211 throws SQLException 3212 { 3213 if ( 1 > occurrence ) 3214 return null; 3215 if ( 0 > group ) // test group > ngroups after compiling regex 3216 return null; 3217 if ( usingAndLengthCheck(in, from, usingOctets, "substring_regex") ) 3218 return null; 3219 newlinesCheck(w3cNewlines, "substring_regex"); 3220 3221 ListOfMatchVectors lomv = LOMV.of(pattern, flag, in, from); 3222 3223 MatchVector mv = lomv.get(occurrence); 3224 if ( null == mv || mv.groups() < group ) 3225 return null; 3226 3227 int codePointPos = mv.position(group); 3228 int codePointLen = mv.length(group); 3229 3230 int utf16pos = in.offsetByCodePoints(0, codePointPos - 1); 3231 int utf16end = in.offsetByCodePoints(utf16pos, codePointLen); 3232 3233 return in.substring(utf16pos, utf16end); 3234 } 3235 3236 /** 3237 * Syntax-sugar-free form of the ISO SQL 3238 * <a id='translate_regex'>{@code TRANSLATE_REGEX}</a> function: 3239 * return a string constructed from the input string by replacing one 3240 * specified occurrence, or all occurrences, of a matching pattern. 3241 *<p> 3242 * Rewrite the standard forms 3243 *<pre> 3244 * TRANSLATE_REGEX(pattern FLAG flags IN str WITH repl FROM position 3245 * OCCURRENCE ALL) 3246 * TRANSLATE_REGEX(pattern FLAG flags IN str WITH repl FROM position 3247 * OCCURRENCE n) 3248 *</pre> 3249 * into these forms, respectively: 3250 *<pre> 3251 * translate_regex(pattern, flag => flags, "in" => str, 3252 * "with" => repl, "from" => position) 3253 * translate_regex(pattern, flag => flags, "in" => str, 3254 * "with" => repl, "from" => position, 3255 * occurrence => n) 3256 *</pre> 3257 * where all of the named parameters are optional except pattern and "in" 3258 * (the default for "with" is the empty string, resulting in matches being 3259 * deleted). 3260 * See also {@link #position_regex position_regex} regarding the 3261 * {@code occurrence} parameter, 3262 * {@link #occurrences_regex occurrences_regex} regarding 3263 * {@code usingOctets}, and {@link #like_regex like_regex} 3264 * regarding {@code w3cNewlines}. 3265 *<p> 3266 * For the specified occurrence (or all occurrences), the matching portion 3267 * <em>s</em> of the string is replaced as by the XQuery function 3268 * <a href='https://www.w3.org/TR/xpath-functions-31/#func-replace' 3269 * >replace</a>(<em>s, pattern, repl, flags</em>). The <em>repl</em> string 3270 * may contain {@code $0} to refer to the entire matched substring, or 3271 * {@code $}<em>m</em> to refer to the <em>m</em>th parenthesized capturing 3272 * group in the pattern. 3273 * @param pattern XQuery regular expression to seek in the input string. 3274 * @param in The input string. 3275 * @param flag Optional string of 3276 * <a href='https://www.w3.org/TR/xpath-functions-31/#flags'>flags adjusting 3277 * the regular expression behavior</a>. 3278 * @param with The replacement string, possibly with $m references. 3279 * @param from Starting position in the input string, 1 by default. 3280 * @param usingOctets Whether position is counted in characters (actual 3281 * Unicode characters, not any smaller encoded unit, not even Java char), 3282 * which is the default, or (when true) in octets of the string's encoded 3283 * form. 3284 * @param occurrence If specified as an integer n (default 0 for "ALL"), 3285 * replace the nth match of the pattern in the string. 3286 * @param w3cNewlines Pass true to allow the regular expression to recognize 3287 * newlines according to the W3C XQuery rules rather than those of ISO SQL. 3288 * @return The input string with one occurrence or all occurences of the 3289 * pattern replaced, as described above. Null if any parameter is null, or 3290 * if the start position is less than 1 or beyond the end of the string. 3291 * The input string unchanged if occurrence is less than zero or exceeds the 3292 * number of matches. 3293 * @throws SQLException SQLDataException with SQLSTATE 2201S if the regular 3294 * expression is invalid, 2201T if the flags string is invalid; 2201U if 3295 * replacing where the pattern has matched a substring of zero length; 2201V 3296 * if the replacement string has improper form (a backslash must be used to 3297 * escape any dollar sign or backslash intended literally); 3298 * SQLFeatureNotSupportedException (0A000) if (in the current 3299 * implementation) usingOctets is true, or w3cNewlines is false or omitted. 3300 */ 3301 @Function(schema="javatest") 3302 public static String translate_regex( 3303 String pattern, //strict 3304 @SQLType(name="\"in\"") String in, //strict 3305 @SQLType(defaultValue="") String flag, //strict 3306 @SQLType(name="\"with\"", defaultValue="") String with, //strict 3307 @SQLType(name="\"from\"", defaultValue="1") int from, 3308 @SQLType(defaultValue="false") boolean usingOctets, 3309 @SQLType(defaultValue="0" /* ALL */) int occurrence, 3310 @SQLType(defaultValue="false") boolean w3cNewlines 3311 ) 3312 throws SQLException 3313 { 3314 if ( usingAndLengthCheck(in, from, usingOctets, "translate_regex") ) 3315 return null; 3316 newlinesCheck(w3cNewlines, "translate_regex"); 3317 if ( 0 > occurrence ) 3318 return in; 3319 3320 RegularExpression re = compileRE(pattern, flag); 3321 3322 ListOfMatchVectors lomv = LOMV.of(re, in, from); 3323 3324 MatchVector mv; 3325 int codePointPos; 3326 int codePointLen; 3327 int utf16pos; 3328 int utf16end; 3329 3330 if ( 0 < occurrence ) 3331 { 3332 mv = lomv.get(occurrence); 3333 if ( null == mv ) 3334 return in; 3335 3336 codePointPos = mv.position(0); 3337 codePointLen = mv.length(0); 3338 3339 utf16pos = in.offsetByCodePoints(0, codePointPos - 1); 3340 utf16end = in.offsetByCodePoints(utf16pos, codePointLen); 3341 3342 return 3343 in.substring(0, utf16pos) 3344 + replace(re, in.substring(utf16pos, utf16end), with) 3345 + in.substring(utf16end); 3346 } 3347 3348 StringBuilder sb = new StringBuilder(); 3349 utf16end = 0; 3350 3351 for ( int i = 1; null != (mv = lomv.get(i)); ++ i ) 3352 { 3353 codePointPos = mv.position(0); 3354 codePointLen = mv.length(0); 3355 3356 utf16pos = in.offsetByCodePoints(0, codePointPos - 1); 3357 3358 sb.append(in.substring(utf16end, utf16pos)); 3359 3360 utf16end = in.offsetByCodePoints(utf16pos, codePointLen); 3361 3362 sb.append(replace(re, in.substring(utf16pos, utf16end), with)); 3363 } 3364 3365 return sb.append(in.substring(utf16end)).toString(); 3366 } 3367}