001/* 002 * Copyright (c) 2015-2023 Tada AB and other contributors, as listed below. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the The BSD 3-Clause License 006 * which accompanies this distribution, and is available at 007 * http://opensource.org/licenses/BSD-3-Clause 008 * 009 * Contributors: 010 * Chapman Flack 011 */ 012package org.postgresql.pljava.example.annotation; 013 014import java.sql.ResultSet; 015import java.sql.SQLException; 016 017import org.postgresql.pljava.annotation.SQLAction; 018import org.postgresql.pljava.annotation.Function; 019 020/** 021 * Test that strings containing characters from all Unicode planes 022 * are passed between PG and Java without alteration (issue 21). 023 * <p> 024 * This function takes a string and an array of ints constructed in PG, such 025 * that PG believes the codepoints in the string to correspond exactly with the 026 * ints in the array. The function compares the two, generates a new array from 027 * the codepoints Java sees in the string and a new Java string from the 028 * original array, and returns a tuple (matched, cparray, s) where 029 * {@code matched} indicates whether the original array and string matched 030 * as seen by Java, and {@code cparray} and {@code s} are the new array and 031 * string generated in Java. 032 * <p> 033 * The supplied test query generates all Unicode code points 1k at a time, 034 * calls this function on each (1k array, 1k string) pair, and counts a failure 035 * if {@code matched} is false or the original and returned arrays or strings 036 * do not match as seen in SQL. 037 * <p> 038 * This example sets an {@code implementor} tag based on a PostgreSQL condition, 039 * as further explained in the {@link ConditionalDDR} example. 040 */ 041@SQLAction(provides="postgresql_unicodetest", install= 042 "SELECT CASE" + 043 " WHEN 'UTF8' = current_setting('server_encoding')" + 044 " THEN set_config('pljava.implementors', 'postgresql_unicodetest,' ||" + 045 " current_setting('pljava.implementors'), true) " + 046 "END" 047) 048@SQLAction(requires="unicodetest fn", 049implementor="postgresql_unicodetest", 050install= 051" WITH " + 052" usable_codepoints ( cp ) AS ( " + 053" SELECT generate_series(1,x'd7ff'::int) " + 054" UNION ALL " + 055" SELECT generate_series(x'e000'::int,x'10ffff'::int) " + 056" ), " + 057" test_inputs ( groupnum, cparray, s ) AS ( " + 058" SELECT " + 059" cp / 1024 AS groupnum, " + 060" array_agg(cp ORDER BY cp), string_agg(chr(cp), '' ORDER BY cp) " + 061" FROM usable_codepoints " + 062" GROUP BY groupnum " + 063" ), " + 064" test_outputs AS ( " + 065" SELECT groupnum, cparray, s, unicodetest(s, cparray) AS roundtrip " + 066" FROM test_inputs " + 067" ), " + 068" test_failures AS ( " + 069" SELECT * " + 070" FROM test_outputs " + 071" WHERE " + 072" cparray != (roundtrip).cparray OR s != (roundtrip).s " + 073" OR NOT (roundtrip).matched " + 074" ), " + 075" test_summary ( n_failing_groups, first_failing_group ) AS ( " + 076" SELECT count(*), min(groupnum) FROM test_failures " + 077" ) " + 078" SELECT " + 079" CASE WHEN n_failing_groups > 0 THEN " + 080" javatest.logmessage('WARNING', n_failing_groups || " + 081" ' 1k codepoint ranges had mismatches, first is block starting 0x' || " + 082" to_hex(1024 * first_failing_group)) " + 083" ELSE " + 084" javatest.logmessage('INFO', " + 085" 'all Unicode codepoint ranges roundtripped successfully.') " + 086" END " + 087" FROM test_summary" 088) 089public class UnicodeRoundTripTest { 090 /** 091 * This function takes a string and an array of ints constructed in PG, 092 * such that PG believes the codepoints in the string to correspond exactly 093 * with the ints in the array. The function compares the two, generates a 094 * new array from the codepoints Java sees in the string and a new Java 095 * string from the original array, and returns a tuple (matched, cparray, 096 * s) where {@code matched} indicates whether the original array and string 097 * matched as seen by Java, and {@code cparray} and {@code s} are the new 098 * array and string generated in Java. 099 * 100 * @param s A string, whose codepoints should match the entries of 101 * {@code ints} 102 * @param ints Array of ints that should match the codepoints in {@code s} 103 * @param rs OUT (matched, cparray, s) as described above 104 * @return true to indicate the OUT tuple is not null 105 */ 106 @Function(out={"matched boolean", "cparray integer[]", "s text"}, 107 provides="unicodetest fn") 108 public static boolean unicodetest(String s, int[] ints, ResultSet rs) 109 throws SQLException { 110 boolean ok = true; 111 112 int cpc = s.codePointCount(0, s.length()); 113 Integer[] myInts = new Integer[cpc]; 114 int ci = 0; 115 for ( int cpi = 0; cpi < cpc; cpi++ ) { 116 myInts[cpi] = s.codePointAt(ci); 117 ci = s.offsetByCodePoints(ci, 1); 118 } 119 120 String myS = new String(ints, 0, ints.length); 121 122 if ( ints.length != myInts.length ) 123 ok = false; 124 else 125 for ( int i = 0; i < ints.length; ++ i ) 126 if ( ints[i] != myInts[i] ) 127 ok = false; 128 129 rs.updateBoolean("matched", ok); 130 rs.updateObject("cparray", myInts); 131 rs.updateString("s", myS); 132 return true; 133 } 134}