/*
* stats_agg 是 PostgreSQL 的聚合函數工作方式類似于先前存在的聚集體(min(x),max(),avg()等),但計算一
* 次,并傳回他們都多的統計資料。傳回的統計資料是count,min,max,mean,variance,skewness,和 kurtosis,但其他人可以加入。
*
* 我需要一個傳回偏度和峰度的聚合,而不是為每個需要多次傳遞來計算平均值等的單獨函數,我認為最好有一個聚合在一次傳遞中傳回所有内容。
*
* 感謝 John D. Cook 和他的部落格文章(https://www.johndcook.com/blog/skewness_kurtosis/)解釋了如何做到這一點。
* */
CREATE TYPE _stats_agg_accum_type AS (
n BIGINT,
min DOUBLE PRECISION,
max DOUBLE PRECISION,
m1 DOUBLE PRECISION,
m2 DOUBLE PRECISION,
m3 DOUBLE PRECISION,
m4 DOUBLE PRECISION
);
CREATE TYPE _stats_agg_result_type AS (
count BIGINT,
min DOUBLE PRECISION,
max DOUBLE PRECISION,
mean DOUBLE PRECISION,
variance DOUBLE PRECISION,
skewness DOUBLE PRECISION,
kurtosis DOUBLE PRECISION
);
CREATE OR REPLACE FUNCTION _stats_agg_accumulator(_stats_agg_accum_type, DOUBLE PRECISION)
RETURNS _stats_agg_accum_type AS
$example_table$
DECLARE
a ALIAS FOR $1;
x ALIAS FOR $2;
n1 BIGINT;
delta DOUBLE PRECISION;
delta_n DOUBLE PRECISION;
delta_n2 DOUBLE PRECISION;
term1 DOUBLE PRECISION;
BEGIN
IF x IS NOT NULL THEN
n1 = a.n;
a.n = a.n + 1;
delta = x - a.m1;
delta_n = delta / a.n;
delta_n2 = delta_n * delta_n;
term1 = delta * delta_n * n1;
a.m1 = a.m1 + delta_n;
a.m4 = a.m4 + term1 * delta_n2 * (a.n*a.n - 3*a.n + 3) + 6 * delta_n2 * a.m2 - 4 * delta_n * a.m3;
a.m3 = a.m3 + term1 * delta_n * (a.n - 2) - 3 * delta_n * a.m2;
a.m2 = a.m2 + term1;
a.min = LEAST(a.min, x);
a.max = GREATEST(a.max, x);
END IF;
RETURN a;
END;
$example_table$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION _stats_agg_finalizer(_stats_agg_accum_type)
RETURNS _stats_agg_result_type AS
$example_table$
BEGIN
RETURN ROW(
$1.n,
$1.min,
$1.max,
$1.m1,
$1.m2 / NULLIF(($1.n - 1.0), 0),
CASE WHEN $1.m2 = 0 THEN NULL ELSE SQRT($1.n) * $1.m3 / NULLIF(($1.m2 ^ 1.5), 0) END,
CASE WHEN $1.m2 = 0 THEN NULL ELSE $1.n * $1.m4 / NULLIF(($1.m2 * $1.m2) - 3.0, 0) END
);
END;
$example_table$
LANGUAGE plpgsql;
CREATE AGGREGATE stats_agg(DOUBLE PRECISION) (
sfunc = _stats_agg_accumulator,
stype = _stats_agg_accum_type,
finalfunc = _stats_agg_finalizer,
initcond = '(0,,, 0, 0, 0, 0)'
);
/**
* 自定義函數
* 以下是根據具體項目需求和需求計算公式進行聚合(非标準聚合公式)
*/
/* 求和 */
CREATE OR REPLACE FUNCTION sum(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS
$function$
declare
ret DOUBLE PRECISION;
item DOUBLE PRECISION;
a DOUBLE PRECISION = 0;
begin
FOREACH item IN ARRAY arr
LOOP
a = a + item;
END LOOP;
ret = a;
RETURN ret;
END;
$function$
;
/* 标準差 */
CREATE OR REPLACE FUNCTION stddev(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS
$function$
DECLARE
ret DOUBLE PRECISION;
item DOUBLE PRECISION;
mean DOUBLE PRECISION;
a DOUBLE PRECISION = 0;
c INT = 0;
BEGIN
c = ARRAY_LENGTH(arr,1);
mean = MEAN(arr);
FOREACH item IN ARRAY arr
LOOP
a = a + POW(item - mean, 2);
END LOOP;
ret = POW(a/(c - 1), 0.5);
RETURN ret;
END;
$function$
;
/* 平均值 */
CREATE OR REPLACE FUNCTION mean(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS
$function$
DECLARE
ret DOUBLE PRECISION;
item DOUBLE PRECISION;
a DOUBLE PRECISION = 0;
c INT = 0;
BEGIN
c = ARRAY_LENGTH(arr, 1);
FOREACH item IN ARRAY arr
LOOP
a = a + item;
END LOOP;
ret = a / c ;
RETURN ret;
END;
$function$
;
/*幾何平均值*/
CREATE OR REPLACE FUNCTION geo_mean(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS
$function$
DECLARE
ret DOUBLE PRECISION;
item DOUBLE PRECISION;
a DOUBLE PRECISION = 0;
c INT;
BEGIN
FOREACH item IN ARRAY arr
LOOP
IF a = 0 THEN
a = item;
ELSE
a = A * ITEM;
END IF;
END LOOP;
c = ARRAY_LENGTH(arr,1);
ret = POW(a, 1.0 / c);
RETURN ret;
END;
$function$
;
/* 幾何标準差 */
CREATE OR REPLACE FUNCTION geo_std(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS
$function$
declare
ret DOUBLE PRECISION;
item DOUBLE PRECISION;
a DOUBLE PRECISION = 0;
b DOUBLE PRECISION = 0;
base NUMERIC = 10.0;
c INT = 0;
BEGIN
FOREACH item IN ARRAY arr
LOOP
a = a + POW(LOG(base, item::NUMERIC), 2.0);
b = b + LOG(base, item::NUMERIC);
END LOOP;
b = POW(b, 2);
c = ARRAY_LENGTH(arr, 1);
ret = 10 ^ (POW(((a - b / c) / (c - 1)), 0.5));
RETURN ret;
END;
$function$
;
/* 偏度系數 */
CREATE OR REPLACE FUNCTION skewness(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS
$function$
DECLARE
ret DOUBLE PRECISION;
item DOUBLE PRECISION;
a DOUBLE PRECISION = 0;
mean DOUBLE PRECISION = 0;
std_val DOUBLE PRECISION = 0;
c INT = 0;
BEGIN
c = ARRAY_LENGTH(arr,1);
mean = MEAN(arr);
std_val = STDDEV(arr);
FOREACH item IN ARRAY arr
LOOP
a = a + (POW(item - mean, 3) / POW(std_val, 3));
END LOOP;
ret = ((1 / (c - 1)::DOUBLE PRECISION) * a);
RETURN ret;
END;
$function$
;
/* 峰度系數 */
CREATE OR REPLACE FUNCTION kurtosis(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS
$function$
DECLARE
ret DOUBLE PRECISION;
item DOUBLE PRECISION;
a DOUBLE PRECISION = 0;
mean DOUBLE PRECISION = 0;
std_val DOUBLE PRECISION = 0;
c INT = 0;
BEGIN
c = ARRAY_LENGTH(arr,1);
mean = MEAN(arr);
std_val = STDDEV(arr);
FOREACH item IN ARRAY arr
LOOP
a = a + (POW(item - mean, 4) / POW(std_val, 4));
END LOOP;
ret = ((1 / (c-1)::DOUBLE PRECISION) * a) - 3;
RETURN ret;
END;
$function$
;
自定義函數中用到的幾個公式
1、 平均值

2、标準差
3、幾何平均值
4、幾何标準差
5、偏度系數
6、峰度系數