diff --git a/pkg/sql/colexec/aggexec/vardev2.go b/pkg/sql/colexec/aggexec/vardev2.go index 12351621b9c6b..da2aa9775095d 100644 --- a/pkg/sql/colexec/aggexec/vardev2.go +++ b/pkg/sql/colexec/aggexec/vardev2.go @@ -220,18 +220,21 @@ func (exec *varStdDevExec[T, A]) Flush() (_ []*vector.Vector, retErr error) { if exec.IsDistinct() { for i := range vecs { for j := 0; j < int(exec.state[i].length); j++ { - if exec.state[i].argCnt[j] == 0 { - if err := vector.AppendNull(vecs[i], exec.mp); err != nil { - return nil, err + cnt := int64(exec.state[i].argCnt[j]) + if cnt <= 1 { + // cnt == 1 && exec is samp + if cnt == 0 || !exec.isPop { + if err := vector.AppendNull(vecs[i], exec.mp); err != nil { + return nil, err + } + continue } - continue - } else if exec.state[i].argCnt[j] == 1 { z, _ := exec.f2t(0, exec.aggInfo.retType.Scale) if err := vector.AppendFixed(vecs[i], z, false, exec.mp); err != nil { return nil, err } + continue } else { - cnt := int64(exec.state[i].argCnt[j]) s := float64(0) s2 := float64(0) err := exec.state[i].iter(uint16(j), func(k []byte) error { @@ -265,10 +268,12 @@ func (exec *varStdDevExec[T, A]) Flush() (_ []*vector.Vector, retErr error) { sums := vector.MustFixedColNoTypeCheck[float64](exec.state[i].vecs[1]) sumsqs := vector.MustFixedColNoTypeCheck[float64](exec.state[i].vecs[2]) for j, cnt := range cnts { - if cnt == 0 { - vector.AppendNull(vecs[i], exec.mp) - continue - } else if cnt == 1 { + if cnt <= 1 { + // cnt == 1 && exec is samp + if cnt == 0 || !exec.isPop { + vector.AppendNull(vecs[i], exec.mp) + continue + } z, _ := exec.f2t(0, exec.aggInfo.retType.Scale) vector.AppendFixed(vecs[i], z, false, exec.mp) } else { diff --git a/pkg/sql/colexec/aggexec/vardev2_test.go b/pkg/sql/colexec/aggexec/vardev2_test.go index 0ecf217744f3a..a9a18e56f0b16 100644 --- a/pkg/sql/colexec/aggexec/vardev2_test.go +++ b/pkg/sql/colexec/aggexec/vardev2_test.go @@ -135,6 +135,45 @@ func TestGetResultVarClampsTinyVarianceToZero(t *testing.T) { require.Equal(t, 0.0, got) } +func TestVarSampleSingleNonNullValueReturnsNull(t *testing.T) { + tests := []struct { + name string + isDistinct bool + }{ + { + name: "non-distinct", + }, + { + name: "distinct", + isDistinct: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + mp := mpool.MustNewZero() + param := types.T_int32.ToType() + exec := makeVarSampleExec(mp, 0, tc.isDistinct, param) + require.NoError(t, exec.GroupGrow(1)) + + v := vector.NewVec(param) + require.NoError(t, vector.AppendFixed(v, int32(4), false, mp)) + require.NoError(t, exec.Fill(0, 0, []*vector.Vector{v})) + v.Free(mp) + + vecs, err := exec.Flush() + require.NoError(t, err) + require.Len(t, vecs, 1) + require.True(t, vecs[0].IsNull(0)) + + for _, vec := range vecs { + vec.Free(mp) + } + exec.Free() + }) + } +} + func TestNumericToFloat64ViaVarExec(t *testing.T) { mp := mpool.MustNewZero() diff --git a/test/distributed/cases/function/func_aggr_sample.result b/test/distributed/cases/function/func_aggr_sample.result new file mode 100644 index 0000000000000..99ba823589f8c --- /dev/null +++ b/test/distributed/cases/function/func_aggr_sample.result @@ -0,0 +1,27 @@ +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x) t; +➤ stddev_samp(x)[8,54,0] ¦ var_samp(x)[8,54,0] 𝄀 +null ¦ null +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x union select 6 as x) t; +➤ stddev_samp(x)[8,54,0] ¦ var_samp(x)[8,54,0] 𝄀 +0.7071067811865476 ¦ 0.5 +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x union select 5 as x) t; +➤ stddev_samp(x)[8,54,0] ¦ var_samp(x)[8,54,0] 𝄀 +null ¦ null +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x union all select 6 as x) t; +➤ stddev_samp(x)[8,54,0] ¦ var_samp(x)[8,54,0] 𝄀 +0.7071067811865476 ¦ 0.5 +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x union all select 5 as x) t; +➤ stddev_samp(x)[8,54,0] ¦ var_samp(x)[8,54,0] 𝄀 +0.0 ¦ 0.0 +SELECT stddev_samp(DISTINCT x), var_samp(DISTINCT x) FROM (SELECT 5 AS x union select 6 as x) t; +➤ stddev_samp(distinct x)[8,54,0] ¦ var_samp(distinct x)[8,54,0] 𝄀 +0.7071067811865476 ¦ 0.5 +SELECT stddev_samp(DISTINCT x), var_samp(DISTINCT x) FROM (SELECT 5 AS x union select 5 as x) t; +➤ stddev_samp(distinct x)[8,54,0] ¦ var_samp(distinct x)[8,54,0] 𝄀 +null ¦ null +SELECT stddev_samp(DISTINCT x), var_samp(DISTINCT x) FROM (SELECT 5 AS x union all select 6 as x) t; +➤ stddev_samp(distinct x)[8,54,0] ¦ var_samp(distinct x)[8,54,0] 𝄀 +0.7071067811865476 ¦ 0.5 +SELECT stddev_samp(DISTINCT x), var_samp(DISTINCT x) FROM (SELECT 5 AS x union all select 5 as x) t; +➤ stddev_samp(distinct x)[8,54,0] ¦ var_samp(distinct x)[8,54,0] 𝄀 +null ¦ null diff --git a/test/distributed/cases/function/func_aggr_sample.sql b/test/distributed/cases/function/func_aggr_sample.sql new file mode 100644 index 0000000000000..f68a49eee7a53 --- /dev/null +++ b/test/distributed/cases/function/func_aggr_sample.sql @@ -0,0 +1,32 @@ +-- !!!result below are same as postgresql and mysql +-- !!! +-- input: 5 +-- null, null +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x) t; +-- input: 5,6 +-- not,not +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x union select 6 as x) t; +-- input: 5 +-- null, null +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x union select 5 as x) t; +-- input: 5,6 +-- not,not +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x union all select 6 as x) t; +-- input: 5,5 +-- not, not +SELECT stddev_samp(x), var_samp(x) FROM (SELECT 5 AS x union all select 5 as x) t; +---------------------- +-- Mysql does not support the syntax below +-- input: 5,6 +-- not,not +SELECT stddev_samp(DISTINCT x), var_samp(DISTINCT x) FROM (SELECT 5 AS x union select 6 as x) t; +-- input: 5 +-- null, null +SELECT stddev_samp(DISTINCT x), var_samp(DISTINCT x) FROM (SELECT 5 AS x union select 5 as x) t; +-- input: 5,6 +-- not,not +SELECT stddev_samp(DISTINCT x), var_samp(DISTINCT x) FROM (SELECT 5 AS x union all select 6 as x) t; +-- input: 5 +-- not +SELECT stddev_samp(DISTINCT x), var_samp(DISTINCT x) FROM (SELECT 5 AS x union all select 5 as x) t; + diff --git a/test/distributed/cases/function/func_aggr_std.result b/test/distributed/cases/function/func_aggr_std.result index 68a74d6e6ada5..4669a70001a46 100644 --- a/test/distributed/cases/function/func_aggr_std.result +++ b/test/distributed/cases/function/func_aggr_std.result @@ -1,11 +1,11 @@ SELECT STD(null); -STD(null) +➤ STD(null)[8,54,0] 𝄀 null SELECT STDDEV(null); -STDDEV(null) +➤ STDDEV(null)[8,54,0] 𝄀 null SELECT STDDEV_POP(null); -STDDEV_POP(null) +➤ STDDEV_POP(null)[8,54,0] 𝄀 null create table t1(a tinyint, b SMALLINT, c BIGINT, d INT, e BIGINT, f FLOAT, g DOUBLE, h decimal(38,19), i DATE, k datetime, l TIMESTAMP, m char(255), n varchar(255)); insert into t1 values(1, 1, 2, 43, 5, 35.5, 31.133, 14.314, "2012-03-10", "2012-03-12 10:03:12", "2022-03-12 13:03:12", "ab23c", "d5cf"); @@ -13,28 +13,28 @@ insert into t1 values(71, 1, 2, 34, 5, 5.5, 341.13, 15.314, "2012-03-22", "2013- insert into t1 values(1, 1, 21, 4, 54, 53.5, 431.13, 14.394, "2011-03-12", "2015-03-12 10:03:12", "2002-03-12 13:03:12", "afbc", "dct5f"); insert into t1 values(1, 71, 2, 34, 5, 5.5, 31.313, 124.314, "2012-01-12", "2019-03-12 10:03:12", "2013-03-12 13:03:12", "3abd1c", "dcvf"); select std(a) from t1; -std(a) +➤ std(a)[8,54,0] 𝄀 30.31088913245535 select std(b) from t1; -std(b) +➤ std(b)[8,54,0] 𝄀 30.31088913245535 select std(c) from t1; -std(c) +➤ std(c)[8,54,0] 𝄀 8.227241335952167 select std(d) from t1; -std(d) +➤ std(d)[8,54,0] 𝄀 14.7542366796795 select std(e) from t1; -std(e) +➤ std(e)[8,54,0] 𝄀 21.21762239271875 select std(f) from t1; -std(f) +➤ std(f)[8,54,0] 𝄀 20.512191496766015 select std(g) from t1; -std(g) +➤ std(g)[8,54,0] 𝄀 180.28380047095192 select std(h) from t1; -std(h) +➤ std(h)[3,38,19] 𝄀 47.4771387090671206400 select std(i) from t1; invalid argument aggregate function std, bad value [DATE] @@ -47,28 +47,28 @@ invalid argument aggregate function std, bad value [CHAR] select std(n) from t1; invalid argument aggregate function std, bad value [VARCHAR] select STDDEV_POP(a) from t1; -STDDEV_POP(a) +➤ STDDEV_POP(a)[8,54,0] 𝄀 30.31088913245535 select STDDEV_POP(b) from t1; -STDDEV_POP(b) +➤ STDDEV_POP(b)[8,54,0] 𝄀 30.31088913245535 select STDDEV_POP(c) from t1; -STDDEV_POP(c) +➤ STDDEV_POP(c)[8,54,0] 𝄀 8.227241335952167 select STDDEV_POP(d) from t1; -STDDEV_POP(d) +➤ STDDEV_POP(d)[8,54,0] 𝄀 14.7542366796795 select STDDEV_POP(e) from t1; -STDDEV_POP(e) +➤ STDDEV_POP(e)[8,54,0] 𝄀 21.21762239271875 select STDDEV_POP(f) from t1; -STDDEV_POP(f) +➤ STDDEV_POP(f)[8,54,0] 𝄀 20.512191496766015 select STDDEV_POP(g) from t1; -STDDEV_POP(g) +➤ STDDEV_POP(g)[8,54,0] 𝄀 180.28380047095192 select STDDEV_POP(h) from t1; -STDDEV_POP(h) +➤ STDDEV_POP(h)[3,38,19] 𝄀 47.4771387090671206400 select STDDEV_POP(i) from t1; invalid argument aggregate function stddev_pop, bad value [DATE] @@ -82,165 +82,163 @@ select STDDEV_POP(n) from t1; invalid argument aggregate function stddev_pop, bad value [VARCHAR] drop table t1; select STDDEV_POP(99999999999999999.99999); -STDDEV_POP(99999999999999999.99999) -0E-12 +➤ STDDEV_POP(99999999999999999.99999)[3,38,11] 𝄀 +0E-11 select STDDEV_POP(999999999999999933193939.99999); -STDDEV_POP(999999999999999933193939.99999) -0E-12 +➤ STDDEV_POP(999999999999999933193939.99999)[3,38,11] 𝄀 +0E-11 select STDDEV_POP(9999999999999999999999999999999999.9999999999999); -STDDEV_POP(9999999999999999999999999999999999.9999999999999) -0E-12 +➤ STDDEV_POP(9999999999999999999999999999999999.9999999999999)[3,38,9] 𝄀 +0E-9 select STDDEV_POP(-99999999999999999.99999); -STDDEV_POP(-99999999999999999.99999) -0E-12 +➤ STDDEV_POP(-99999999999999999.99999)[3,38,11] 𝄀 +0E-11 select STDDEV_POP(-999999999999999933193939.99999); -STDDEV_POP(-999999999999999933193939.99999) -0E-12 +➤ STDDEV_POP(-999999999999999933193939.99999)[3,38,11] 𝄀 +0E-11 select STDDEV_POP(-9999999999999999999999999999999999.9999999999999); -STDDEV_POP(-9999999999999999999999999999999999.9999999999999) -0E-12 +➤ STDDEV_POP(-9999999999999999999999999999999999.9999999999999)[3,38,9] 𝄀 +0E-9 create table t1(a bigint); select STDDEV_POP(a) from t1; -STDDEV_POP(a) +➤ STDDEV_POP(a)[8,54,0] 𝄀 null insert into t1 values(null),(null),(null),(null); select STDDEV_POP(a) from t1; -STDDEV_POP(a) +➤ STDDEV_POP(a)[8,54,0] 𝄀 null insert into t1 values(12417249128419),(124124125124151),(5124125151415),(124125152651515); select STDDEV_POP(a) from t1; -STDDEV_POP(a) +➤ STDDEV_POP(a)[8,54,0] 𝄀 5.773458455090071E13 drop table t1; create table t1 ( a int not null default 1, big bigint ); insert into t1 (big) values (-1),(1234567890167),(92233720368547),(18446744073709515); select * from t1; -a big -1 -1 -1 1234567890167 -1 92233720368547 -1 18446744073709515 +➤ a[4,32,0] ¦ big[-5,64,0] 𝄀 +1 ¦ -1 𝄀 +1 ¦ 1234567890167 𝄀 +1 ¦ 92233720368547 𝄀 +1 ¦ 18446744073709515 select distinct STDDEV_POP(big),max(big),STDDEV_POP(big)-1 from t1; -STDDEV_POP(big) max(big) STDDEV_POP(big) - 1 -7.974271234688913E15 18446744073709515 7.974271234688912E15 +➤ STDDEV_POP(big)[8,54,0] ¦ max(big)[-5,64,0] ¦ STDDEV_POP(big) - 1[8,54,0] 𝄀 +7.974271234688913E15 ¦ 18446744073709515 ¦ 7.974271234688912E15 select STDDEV_POP(big),max(big),STDDEV_POP(big)-1 from t1 group by a; -STDDEV_POP(big) max(big) STDDEV_POP(big) - 1 -7.974271234688913E15 18446744073709515 7.974271234688912E15 +➤ STDDEV_POP(big)[8,54,0] ¦ max(big)[-5,64,0] ¦ STDDEV_POP(big) - 1[8,54,0] 𝄀 +7.974271234688913E15 ¦ 18446744073709515 ¦ 7.974271234688912E15 insert into t1 (big) values (184467440737615); select * from t1; -a big -1 -1 -1 1234567890167 -1 92233720368547 -1 18446744073709515 -1 184467440737615 +➤ a[4,32,0] ¦ big[-5,64,0] 𝄀 +1 ¦ -1 𝄀 +1 ¦ 1234567890167 𝄀 +1 ¦ 92233720368547 𝄀 +1 ¦ 18446744073709515 𝄀 +1 ¦ 184467440737615 select STDDEV_POP(big),max(big),STDDEV_POP(big)-1 from t1; -STDDEV_POP(big) max(big) STDDEV_POP(big) - 1 -7.351219993962747E15 18446744073709515 7.351219993962746E15 +➤ STDDEV_POP(big)[8,54,0] ¦ max(big)[-5,64,0] ¦ STDDEV_POP(big) - 1[8,54,0] 𝄀 +7.351219993962747E15 ¦ 18446744073709515 ¦ 7.351219993962746E15 select STDDEV_POP(big),max(big),STDDEV_POP(big)-1 from t1 group by a; -STDDEV_POP(big) max(big) STDDEV_POP(big) - 1 -7.351219993962747E15 18446744073709515 7.351219993962746E15 +➤ STDDEV_POP(big)[8,54,0] ¦ max(big)[-5,64,0] ¦ STDDEV_POP(big) - 1[8,54,0] 𝄀 +7.351219993962747E15 ¦ 18446744073709515 ¦ 7.351219993962746E15 drop table t1; CREATE TABLE t1 (Fld1 int(11) default NULL,Fld2 int(11) default NULL); INSERT INTO t1 VALUES (1,10),(1,20),(2,NULL),(2,NULL),(3,50); select Fld1, STDDEV_POP(Fld2) as q from t1 group by Fld1 having q is not null; -Fld1 q -1 5.0 -3 0.0 +➤ Fld1[4,32,0] ¦ q[8,54,0] 𝄀 +1 ¦ 5.0 𝄀 +3 ¦ 0.0 select Fld1, STDDEV_POP(Fld2) from t1 group by Fld1 having STDDEV_POP(Fld2) is not null; -Fld1 STDDEV_POP(Fld2) -1 5.0 -3 0.0 +➤ Fld1[4,32,0] ¦ STDDEV_POP(Fld2)[8,54,0] 𝄀 +1 ¦ 5.0 𝄀 +3 ¦ 0.0 select Fld1, STDDEV_POP(Fld2) from t1 group by Fld1 having avg(Fld2) is not null; -Fld1 STDDEV_POP(Fld2) -1 5.0 -3 0.0 +➤ Fld1[4,32,0] ¦ STDDEV_POP(Fld2)[8,54,0] 𝄀 +1 ¦ 5.0 𝄀 +3 ¦ 0.0 select Fld1, STDDEV_POP(Fld2) from t1 group by Fld1 having variance(Fld2) is not null; -Fld1 STDDEV_POP(Fld2) -1 5.0 -3 0.0 +➤ Fld1[4,32,0] ¦ STDDEV_POP(Fld2)[8,54,0] 𝄀 +1 ¦ 5.0 𝄀 +3 ¦ 0.0 drop table t1; SELECT STDDEV_POP(1)