001/*
002 * Copyright (c) 2020 Tada AB and other contributors, as listed below.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the The BSD 3-Clause License
006 * which accompanies this distribution, and is available at
007 * http://opensource.org/licenses/BSD-3-Clause
008 *
009 * Contributors:
010 *   Chapman Flack
011 */
012package org.postgresql.pljava.example.annotation;
013
014import static java.lang.Math.fma;
015
016import java.sql.ResultSet;
017import java.sql.SQLException;
018
019import org.postgresql.pljava.annotation.Aggregate;
020import org.postgresql.pljava.annotation.Function;
021import static
022    org.postgresql.pljava.annotation.Function.OnNullInput.RETURNS_NULL;
023import static org.postgresql.pljava.annotation.Function.Effects.IMMUTABLE;
024import org.postgresql.pljava.annotation.SQLAction;
025
026/**
027 * A class demonstrating several aggregate functions.
028 *<p>
029 * They are (some of) the same two-variable statistical aggregates already
030 * offered in core PostgreSQL, just because they make clear examples. For
031 * numerical reasons, they might not produce results identical to PG's built-in
032 * ones. These closely follow the "schoolbook" formulas in the HP-11C calculator
033 * owner's handbook, while the ones built into PostgreSQL use a more clever
034 * algorithm instead to reduce rounding error in the finishers.
035 *<p>
036 * All these aggregates can be computed by different finishers that share a
037 * state that accumulates the count of rows, sum of x, sum of xx, sum of y, sum
038 * of yy, and sum of xy. That is easy with finishers that don't need to modify
039 * the state, so the default {@code FinishEffect=READ_ONLY} is appropriate.
040 *<p>
041 * Everything here takes the y parameter first, then x, like the SQL ones.
042 */
043@SQLAction(requires = { "avgx", "avgy", "slope", "intercept" }, install = {
044    "WITH" +
045    " data (y, x) AS (VALUES" +
046    "  (1.761 ::float8, 5.552::float8)," +
047    "  (1.775,          5.963)," +
048    "  (1.792,          6.135)," +
049    "  (1.884,          6.313)," +
050    "  (1.946,          6.713)"  +
051    " )," +
052    " expected (avgx, avgy, slope, intercept) AS (" +
053    "  SELECT 6.1352, 1.8316, 0.1718, 0.7773" +
054    " )," +
055    " got AS (" +
056    "  SELECT" +
057    "    round(     avgx(y,x)::numeric, 4) AS avgx," +
058    "    round(     avgy(y,x)::numeric, 4) AS avgy," +
059    "    round(    slope(y,x)::numeric, 4) AS slope," +
060    "    round(intercept(y,x)::numeric, 4) AS intercept" +
061    "   FROM" +
062    "    data" +
063    " )" +
064    "SELECT" +
065    "  CASE WHEN expected IS NOT DISTINCT FROM got" +
066    "  THEN javatest.logmessage('INFO', 'aggregate examples ok')" +
067    "  ELSE javatest.logmessage('WARNING', 'aggregate examples ng')" +
068    "  END" +
069    " FROM" +
070    "  expected, got"
071})
072@Aggregate(provides = "avgx",
073    name = { "javatest", "avgx" },
074    arguments = { "y double precision", "x double precision" },
075    plan = @Aggregate.Plan(
076        stateType = "double precision[]",
077        /*
078         * State size is merely a hint to PostgreSQL's planner and can
079         * be omitted. Perhaps it is worth hinting, as the state type
080         * "double precision[]" does not tell PostgreSQL how large the array
081         * might be. Anyway, this is an example and should show how to do it.
082         * For this aggregate, the state never grows; the size of the initial
083         * value is the size forever.
084         *
085         * To get a quick sense of the size, one can assign the initial state
086         * as the default for a table column, then consult the pg_node_tree for
087         * the attribute default entry:
088         *
089         * CREATE TEMPORARY TABLE
090         *   foo (bar DOUBLE PRECISION[] DEFAULT '{0,0,0,0,0,0}');
091         *
092         * SELECT
093         *   xpath('/CONST/member[@name="constvalue"]/@length',
094         *         javatest.pgNodeTreeAsXML(adbin)             )
095         *  FROM pg_attrdef
096         *  WHERE adrelid = 'foo'::regclass;
097         *
098         * In this case the 72 that comes back represents 48 bytes for six
099         * float8s, plus 24 for varlena and array overhead, with no null bitmap
100         * because no element is null.
101         */
102        stateSize = 72,
103        initialState = "{0,0,0,0,0,0}",
104        accumulate = { "javatest", "accumulateXY" },
105        finish = { "javatest", "finishAvgX" }
106    )
107)
108@Aggregate(provides = "avgy",
109    name = { "javatest", "avgy" },
110    arguments = { "y double precision", "x double precision" },
111    plan = @Aggregate.Plan(
112        stateType = "double precision[]",
113        stateSize = 72,
114        initialState = "{0,0,0,0,0,0}",
115        accumulate = { "javatest", "accumulateXY" },
116        finish = { "javatest", "finishAvgY" }
117    )
118)
119@Aggregate(provides = "slope",
120    name = { "javatest", "slope" },
121    arguments = { "y double precision", "x double precision" },
122    plan = @Aggregate.Plan(
123        stateType = "double precision[]",
124        stateSize = 72,
125        initialState = "{0,0,0,0,0,0}",
126        accumulate = { "javatest", "accumulateXY" },
127        finish = { "javatest", "finishSlope" }
128    )
129)
130@Aggregate(provides = "intercept",
131    name = { "javatest", "intercept" },
132    arguments = { "y double precision", "x double precision" },
133    plan = @Aggregate.Plan(
134        stateType = "double precision[]",
135        stateSize = 72,
136        initialState = "{0,0,0,0,0,0}",
137        accumulate = { "javatest", "accumulateXY" },
138        finish = { "javatest", "finishIntercept" }
139    )
140)
141@Aggregate(
142    name = "javatest.regression",
143    arguments = { "y double precision", "x double precision" },
144    plan = @Aggregate.Plan(
145        stateType = "double precision[]",
146        stateSize = 72,
147        initialState = "{0,0,0,0,0,0}",
148        accumulate = { "javatest", "accumulateXY" },
149        finish = { "javatest", "finishRegr" }
150    ),
151    /*
152     * There is no special reason for this aggregate and not the others to have
153     * a movingPlan; one example is enough, that's all.
154     */
155    movingPlan = @Aggregate.Plan(
156        stateType = "double precision[]",
157        stateSize = 72,
158        initialState = "{0,0,0,0,0,0}",
159        accumulate = { "javatest", "accumulateXY" },
160        remove = { "javatest", "removeXY" },
161        finish = { "javatest", "finishRegr" }
162    )
163)
164public class Aggregates
165{
166    private Aggregates() { } // do not instantiate
167
168    private static final int N   = 0;
169    private static final int SX  = 1;
170    private static final int SXX = 2;
171    private static final int SY  = 3;
172    private static final int SYY = 4;
173    private static final int SXY = 5;
174
175    /**
176     * A common accumulator for two-variable statistical aggregates that
177     * depend on n, Sx, Sxx, Sy, Syy, and Sxy.
178     */
179    @Function(
180        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL
181    )
182    public static double[] accumulateXY(double[] state, double y, double x)
183    {
184        state[N  ] += 1.;
185        state[SX ] += x;
186        state[SXX] = fma(x, x, state[2]);
187        state[SY ] += y;
188        state[SYY] = fma(y, y, state[4]);
189        state[SXY] = fma(x, y, state[5]);
190        return state;
191    }
192
193    /**
194     * 'Removes' from the state a row previously accumulated, for possible use
195     * in a window with a moving frame start.
196     *<p>
197     * This can be a numerically poor idea for exactly the reasons covered in
198     * the PostgreSQL docs involving loss of significance in long sums, but it
199     * does demonstrate the idea.
200     */
201    @Function(
202        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL
203    )
204    public static double[] removeXY(double[] state, double y, double x)
205    {
206        state[N  ] -= 1.;
207        state[SX ] -= x;
208        state[SXX] = fma(x, -x, state[2]);
209        state[SY ] -= y;
210        state[SYY] = fma(y, -y, state[4]);
211        state[SXY] = fma(x, -y, state[5]);
212        return state;
213    }
214
215    /**
216     * Finisher that returns the count of non-null rows accumulated.
217     *<p>
218     * As an alternative to collecting all {@code @Aggregate} annotations up at
219     * the top of the class and specifying everything explicitly, an
220     * {@code @Aggregate} annotation can be placed on a method, either
221     * the accumulator or the finisher, in which case less needs to be
222     * specified. The state type can always be determined from the annotated
223     * method (whether it is the accumulator or the finisher), and its SQL name
224     * will be the default name for the aggregate also. When the method is the
225     * accumulator, the aggregate's arguments are also determined.
226     *<p>
227     * This being a finisher method, the {@code @Aggregate} annotation placed
228     * here does need to specify the arguments, initial state, and accumulator.
229     */
230    @Aggregate(
231        arguments = { "y double precision", "x double precision" },
232        plan = @Aggregate.Plan(
233            stateSize = 72,
234            initialState = "{0,0,0,0,0,0}",
235            accumulate = { "javatest", "accumulateXY" }
236        )
237    )
238    @Function(
239        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL
240    )
241    public static long count(double[] state)
242    {
243        return (long)state[N];
244    }
245
246    /**
247     * Finisher that returns the mean of the accumulated x values.
248     */
249    @Function(
250        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL
251    )
252    public static Double finishAvgX(double[] state)
253    {
254        if ( 0. == state[N] )
255            return null;
256        return state[SX] / state[N];
257    }
258
259    /**
260     * Finisher that returns the mean of the accumulated y values.
261     */
262    @Function(
263        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL
264    )
265    public static Double finishAvgY(double[] state)
266    {
267        if ( 0. == state[N] )
268            return null;
269        return state[SY] / state[N];
270    }
271
272    /**
273     * Finisher that returns the slope of a regression line.
274     */
275    @Function(
276        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL
277    )
278    public static Double finishSlope(double[] state)
279    {
280        if ( 2. > state[N] )
281            return null;
282
283        double numer = fma(state[SX], -state[SY], state[N] * state[SXY]);
284        double denom = fma(state[SX], -state[SX], state[N] * state[SXX]);
285        return 0. == denom ? null : numer / denom;
286    }
287
288    /**
289     * Finisher that returns the intercept of a regression line.
290     */
291    @Function(
292        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL
293    )
294    public static Double finishIntercept(double[] state)
295    {
296        if ( 2 > state[N] )
297            return null;
298
299        double numer = fma(state[SY], state[SXX], -state[SX] * state[SXY]);
300        double denom = fma(state[SX], -state[SX], state[N] * state[SXX]);
301        return 0. == denom ? null : numer / denom;
302    }
303
304    /**
305     * A finisher that returns the slope and intercept together.
306     *<p>
307     * An aggregate can be built over this finisher and will return a record
308     * result, but at present (PG 13) access to that record by field doesn't
309     * work, as its tuple descriptor gets lost along the way. Unclear so far
310     * whether it might be feasible to fix that.
311     */
312    @Function(
313        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL,
314        out = { "slope double precision", "intercept double precision" }
315    )
316    public static boolean finishRegr(double[] state, ResultSet out)
317    throws SQLException
318    {
319        out.updateObject(1, finishSlope(state));
320        out.updateObject(2, finishIntercept(state));
321        return true;
322    }
323
324    /**
325     * An example aggregate that sums its input.
326     *<p>
327     * The simplest kind of aggregate, having only an accumulate function,
328     * default initial state, and no finisher (the state value is the return)
329     * can be declared very concisely by annotating the accumulate method.
330     */
331    @Aggregate
332    @Function(
333        schema = "javatest", effects = IMMUTABLE, onNullInput = RETURNS_NULL
334    )
335    public static double sum(double state, double x)
336    {
337        return state + x;
338    }
339}