diff --git a/pkg/sql/logictest/testdata/logic_test/distinct b/pkg/sql/logictest/testdata/logic_test/distinct index f218db9fb07b..1236ac5c75e8 100644 --- a/pkg/sql/logictest/testdata/logic_test/distinct +++ b/pkg/sql/logictest/testdata/logic_test/distinct @@ -232,3 +232,41 @@ query IT SELECT x, jsonb_agg(DISTINCT jsonb_build_object('y', y, 'z', z)) FROM (SELECT * FROM t ORDER BY i) GROUP BY x ---- 1 [{"y": 2, "z": "hello"}, {"y": 2, "z": "hello there"}] + +# Regression test for #168317. Under null_ordered_last, a SELECT DISTINCT whose +# ORDER BY references select-list columns synthesizes (col IS NULL) ordering +# columns to implement NULLS LAST. These must not raise a spurious "ORDER BY +# expressions must appear in select list" error, and NULLs must sort last. + +statement ok +SET null_ordered_last = on + +statement ok +CREATE TABLE distinct_null_order (a INT, b INT) + +statement ok +INSERT INTO distinct_null_order VALUES (1, 1), (1, 1), (2, NULL), (NULL, 3), (NULL, NULL) + +query II +SELECT DISTINCT a, b FROM distinct_null_order ORDER BY a, b +---- +1 1 +2 NULL +NULL 3 +NULL NULL + +# A column referenced by ORDER BY but absent from the select list must still +# error, even though null_ordered_last synthesizes an IS NULL column for it. +statement error for SELECT DISTINCT, ORDER BY expressions must appear in select list +SELECT DISTINCT a FROM distinct_null_order ORDER BY b + +# The originally reported symptom: DROP USER internally runs a +# SELECT DISTINCT ... ORDER BY against system.privileges, which hit the bug. +statement ok +CREATE USER testuser168317 + +statement ok +DROP USER testuser168317 + +statement ok +RESET null_ordered_last diff --git a/pkg/sql/opt/optbuilder/distinct.go b/pkg/sql/opt/optbuilder/distinct.go index 92bd709b9ffc..7c52451237d0 100644 --- a/pkg/sql/opt/optbuilder/distinct.go +++ b/pkg/sql/opt/optbuilder/distinct.go @@ -28,12 +28,22 @@ func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr { // This will cause an error for queries like: // SELECT DISTINCT a FROM t ORDER BY b // Note: this behavior is consistent with PostgreSQL. + // + // An exception is made for the synthesized (col IS NULL) columns that + // buildOrderBy adds to implement non-default NULLS ordering (e.g. when + // null_ordered_last is set, or NULLS FIRST/LAST is given explicitly). When + // col is itself a projected column, the synthesized column is functionally + // determined by it and does not change the DISTINCT result, so we treat it + // as a grouping column. This mirrors case 3 in buildDistinctOn. for _, col := range inScope.ordering { if !private.GroupingCols.Contains(col.ID()) { - panic(pgerror.Newf( - pgcode.InvalidColumnReference, - "for SELECT DISTINCT, ORDER BY expressions must appear in select list", - )) + if !b.isSynthesizedNullsOrderingCol(inScope, col.ID(), private.GroupingCols) { + panic(pgerror.Newf( + pgcode.InvalidColumnReference, + "for SELECT DISTINCT, ORDER BY expressions must appear in select list", + )) + } + private.GroupingCols.Add(col.ID()) } } @@ -44,6 +54,34 @@ func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr { return b.factory.ConstructDistinctOn(input, memo.EmptyAggregationsExpr, &private) } +// isSynthesizedNullsOrderingCol reports whether the ordering column identified +// by colID is a synthesized "(col IS NULL)" expression where col is one of the +// columns in groupingCols. buildOrderBy synthesizes such columns to implement +// non-default NULLS ordering (e.g. when null_ordered_last is set, or NULLS +// FIRST/LAST is requested explicitly). Because (col IS NULL) is functionally +// determined by col, treating it as a grouping column does not change the +// result of a DISTINCT or DISTINCT ON. +func (b *Builder) isSynthesizedNullsOrderingCol( + inScope *scope, colID opt.ColumnID, groupingCols opt.ColSet, +) bool { + scopeCol := inScope.getColumn(colID) + if scopeCol == nil { + return false + } + isExpr, isIs := scopeCol.scalar.(*memo.IsExpr) + if !isIs { + return false + } + if _, isNull := isExpr.Right.(*memo.NullExpr); !isNull { + return false + } + v, isVar := isExpr.Left.(*memo.VariableExpr) + if !isVar { + return false + } + return groupingCols.Contains(v.Col) +} + // buildDistinctOn builds a set of memo groups that represent a DISTINCT ON // expression. If nullsAreDistinct is true, then construct the UpsertDistinctOn // operator rather than the DistinctOn operator (see the UpsertDistinctOn @@ -84,29 +122,16 @@ func (b *Builder) buildDistinctOn( var seen opt.ColSet for _, col := range inScope.ordering { if !distinctOnCols.Contains(col.ID()) { - colIsValid := false - scopeCol := inScope.getColumn(col.ID()) - if scopeCol != nil { - if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok { - if _, ok := isExpr.Right.(*memo.NullExpr); ok { - if v, ok := isExpr.Left.(*memo.VariableExpr); ok { - if distinctOnCols.Contains(v.Col) { - // We have a col IS NULL expression (case 3 above). - // Add the new column to distinctOnCols, since it doesn't change - // the semantics of the DISTINCT ON. - distinctOnCols.Add(col.ID()) - colIsValid = true - } - } - } - } - } - if !colIsValid { + if !b.isSynthesizedNullsOrderingCol(inScope, col.ID(), distinctOnCols) { panic(pgerror.Newf( pgcode.InvalidColumnReference, "SELECT DISTINCT ON expressions must match initial ORDER BY expressions", )) } + // We have a (col IS NULL) expression (case 3 above). Add the new + // column to distinctOnCols, since it doesn't change the semantics of + // the DISTINCT ON. + distinctOnCols.Add(col.ID()) } seen.Add(col.ID()) if seen.Equals(distinctOnCols) { diff --git a/pkg/sql/opt/optbuilder/testdata/distinct b/pkg/sql/opt/optbuilder/testdata/distinct index 62d1579e39c8..8af447009976 100644 --- a/pkg/sql/opt/optbuilder/testdata/distinct +++ b/pkg/sql/opt/optbuilder/testdata/distinct @@ -317,3 +317,35 @@ sort │ └── columns: a:1!null b:2!null c:3!null d:4!null crdb_internal_mvcc_timestamp:5 tableoid:6 └── projections └── 1 [as=x:7] + +# Regression test for #168317. Under null_ordered_last, a SELECT DISTINCT whose +# ORDER BY references select-list columns synthesizes (col IS NULL) ordering +# columns (to implement non-default NULLS ordering). These synthesized columns +# must not trigger a spurious "ORDER BY expressions must appear in select list" +# error, mirroring the DISTINCT ON handling (case 3 in buildDistinctOn). +build set=null_ordered_last=true +SELECT DISTINCT y, z FROM xyz ORDER BY y, z +---- +distinct-on + ├── columns: y:2 z:3 [hidden: nulls_ordering_y:6!null nulls_ordering_z:7!null] + ├── grouping columns: y:2 z:3 nulls_ordering_y:6!null nulls_ordering_z:7!null + ├── ordering: +6,+2,+7,+3 + └── sort + ├── columns: y:2 z:3 nulls_ordering_y:6!null nulls_ordering_z:7!null + ├── ordering: +6,+2,+7,+3 + └── project + ├── columns: nulls_ordering_y:6!null nulls_ordering_z:7!null y:2 z:3 + ├── project + │ ├── columns: y:2 z:3 + │ └── scan xyz + │ └── columns: x:1!null y:2 z:3 crdb_internal_mvcc_timestamp:4 tableoid:5 + └── projections + ├── y:2 IS NULL [as=nulls_ordering_y:6] + └── z:3 IS NULL [as=nulls_ordering_z:7] + +# A genuinely invalid query (ORDER BY a non-projected column) must still error, +# even though null_ordered_last synthesizes an IS NULL column for it. +build set=null_ordered_last=true +SELECT DISTINCT y FROM xyz ORDER BY z +---- +error (42P10): for SELECT DISTINCT, ORDER BY expressions must appear in select list