001/*
002 * Copyright (c) 2015-2023 Tada AB and other contributors, as listed below.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the The BSD 3-Clause License
006 * which accompanies this distribution, and is available at
007 * http://opensource.org/licenses/BSD-3-Clause
008 *
009 * Contributors:
010 *   Chapman Flack
011 */
012package org.postgresql.pljava.example.annotation;
013
014import java.sql.ResultSet;
015import java.sql.SQLException;
016
017import org.postgresql.pljava.annotation.SQLAction;
018import org.postgresql.pljava.annotation.Function;
019
020/**
021 * Test that strings containing characters from all Unicode planes
022 * are passed between PG and Java without alteration (issue 21).
023 * <p>
024 * This function takes a string and an array of ints constructed in PG, such
025 * that PG believes the codepoints in the string to correspond exactly with the
026 * ints in the array. The function compares the two, generates a new array from
027 * the codepoints Java sees in the string and a new Java string from the
028 * original array, and returns a tuple (matched, cparray, s) where
029 * {@code matched} indicates whether the original array and string matched
030 * as seen by Java, and {@code cparray} and {@code s} are the new array and
031 * string generated in Java.
032 * <p>
033 * The supplied test query generates all Unicode code points 1k at a time,
034 * calls this function on each (1k array, 1k string) pair, and counts a failure
035 * if {@code matched} is false or the original and returned arrays or strings
036 * do not match as seen in SQL.
037 * <p>
038 * This example sets an {@code implementor} tag based on a PostgreSQL condition,
039 * as further explained in the {@link ConditionalDDR} example.
040 */
041@SQLAction(provides="postgresql_unicodetest", install=
042    "SELECT CASE" +
043    " WHEN 'UTF8' = current_setting('server_encoding')" +
044    " THEN set_config('pljava.implementors', 'postgresql_unicodetest,' ||" +
045    " current_setting('pljava.implementors'), true) " +
046    "END"
047)
048@SQLAction(requires="unicodetest fn",
049implementor="postgresql_unicodetest",
050install=
051"   WITH " +
052"    usable_codepoints ( cp ) AS ( " +
053"     SELECT generate_series(1,x'd7ff'::int) " +
054"     UNION ALL " +
055"     SELECT generate_series(x'e000'::int,x'10ffff'::int) " +
056"    ), " +
057"    test_inputs ( groupnum, cparray, s ) AS ( " +
058"     SELECT " +
059"       cp / 1024 AS groupnum, " +
060"       array_agg(cp ORDER BY cp), string_agg(chr(cp), '' ORDER BY cp) " +
061"     FROM usable_codepoints " +
062"     GROUP BY groupnum " +
063"    ), " +
064"    test_outputs AS ( " +
065"     SELECT groupnum, cparray, s, unicodetest(s, cparray) AS roundtrip " +
066"     FROM test_inputs " +
067"    ), " +
068"    test_failures AS ( " +
069"     SELECT * " +
070"     FROM test_outputs " +
071"     WHERE " +
072"      cparray != (roundtrip).cparray OR s != (roundtrip).s " +
073"      OR NOT (roundtrip).matched " +
074"    ), " +
075"    test_summary ( n_failing_groups, first_failing_group ) AS ( " +
076"     SELECT count(*), min(groupnum) FROM test_failures " +
077"    ) " +
078"   SELECT " +
079"    CASE WHEN n_failing_groups > 0 THEN " +
080"     javatest.logmessage('WARNING', n_failing_groups || " +
081"      ' 1k codepoint ranges had mismatches, first is block starting 0x' || " +
082"      to_hex(1024 * first_failing_group)) " +
083"    ELSE " +
084"     javatest.logmessage('INFO', " +
085"        'all Unicode codepoint ranges roundtripped successfully.') " +
086"    END " +
087"    FROM test_summary"
088)
089public class UnicodeRoundTripTest {
090    /**
091     * This function takes a string and an array of ints constructed in PG,
092     * such that PG believes the codepoints in the string to correspond exactly
093     * with the ints in the array. The function compares the two, generates a
094     * new array from the codepoints Java sees in the string and a new Java
095     * string from the original array, and returns a tuple (matched, cparray,
096     * s) where {@code matched} indicates whether the original array and string
097     * matched as seen by Java, and {@code cparray} and {@code s} are the new
098     * array and string generated in Java.
099     *
100     * @param s A string, whose codepoints should match the entries of
101     *        {@code ints}
102     * @param ints Array of ints that should match the codepoints in {@code s}
103     * @param rs OUT (matched, cparray, s) as described above
104     * @return true to indicate the OUT tuple is not null
105     */
106    @Function(out={"matched boolean", "cparray integer[]", "s text"},
107        provides="unicodetest fn")
108    public static boolean unicodetest(String s, int[] ints, ResultSet rs)
109    throws SQLException {
110        boolean ok = true;
111
112        int cpc = s.codePointCount(0, s.length());
113        Integer[] myInts = new Integer[cpc];
114        int ci = 0;
115        for ( int cpi = 0; cpi < cpc; cpi++ ) {
116            myInts[cpi] = s.codePointAt(ci);
117            ci = s.offsetByCodePoints(ci, 1);
118        }
119
120        String myS = new String(ints, 0, ints.length);
121
122        if ( ints.length != myInts.length )
123            ok = false;
124        else
125            for ( int i = 0; i < ints.length; ++ i )
126    if ( ints[i] != myInts[i] )
127        ok = false;
128
129        rs.updateBoolean("matched", ok);
130        rs.updateObject("cparray", myInts);
131        rs.updateString("s", myS);
132        return true;
133    }
134}