From c37311f5c0b712df713881ab6963ae3aca19ac78 Mon Sep 17 00:00:00 2001 From: shafi-VM Date: Thu, 25 Jun 2026 06:48:16 +0530 Subject: [PATCH] sql/opt: allow non-default NULLS ordering with SELECT DISTINCT When null_ordered_last is enabled (or an explicit NULLS FIRST/LAST is given), buildOrderBy synthesizes a (col IS NULL) ordering column ahead of each column with non-default NULLS ordering. For a plain SELECT DISTINCT, constructDistinct rejected any ordering column not in the select list, so these synthesized columns triggered a spurious for SELECT DISTINCT, ORDER BY expressions must appear in select list error. The DISTINCT ON path (buildDistinctOn) already tolerates these columns (case 3): a (col IS NULL) expression is functionally determined by col, so when col is a grouping column the synthesized column can be treated as one too without changing the result. This extracts that check into a shared isSynthesizedNullsOrderingCol helper and uses it from both constructDistinct and buildDistinctOn. It fixes user queries such as SET null_ordered_last = on; SELECT DISTINCT a FROM t ORDER BY a; as well as DROP USER and DROP ROLE, which internally run a SELECT DISTINCT ... ORDER BY against system.privileges. Fixing constructDistinct rather than suppressing the session setting for internal queries is the correct layer, since the error reproduces with the plain user query above. Fixes #168317 Epic: none Release note (bug fix): Fixed a bug that caused SELECT DISTINCT ... ORDER BY queries to fail with a "for SELECT DISTINCT, ORDER BY expressions must appear in select list" error when the null_ordered_last session setting was enabled. The same error caused DROP USER and DROP ROLE statements to fail, since they run such a query internally. --- .../logictest/testdata/logic_test/distinct | 38 ++++++++++ pkg/sql/opt/optbuilder/distinct.go | 69 +++++++++++++------ pkg/sql/opt/optbuilder/testdata/distinct | 32 +++++++++ 3 files changed, 117 insertions(+), 22 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/distinct b/pkg/sql/logictest/testdata/logic_test/distinct index f218db9fb07..1236ac5c75e 100644 --- a/pkg/sql/logictest/testdata/logic_test/distinct +++ b/pkg/sql/logictest/testdata/logic_test/distinct @@ -232,3 +232,41 @@ query IT SELECT x, jsonb_agg(DISTINCT jsonb_build_object('y', y, 'z', z)) FROM (SELECT * FROM t ORDER BY i) GROUP BY x ---- 1 [{"y": 2, "z": "hello"}, {"y": 2, "z": "hello there"}] + +# Regression test for #168317. Under null_ordered_last, a SELECT DISTINCT whose +# ORDER BY references select-list columns synthesizes (col IS NULL) ordering +# columns to implement NULLS LAST. These must not raise a spurious "ORDER BY +# expressions must appear in select list" error, and NULLs must sort last. + +statement ok +SET null_ordered_last = on + +statement ok +CREATE TABLE distinct_null_order (a INT, b INT) + +statement ok +INSERT INTO distinct_null_order VALUES (1, 1), (1, 1), (2, NULL), (NULL, 3), (NULL, NULL) + +query II +SELECT DISTINCT a, b FROM distinct_null_order ORDER BY a, b +---- +1 1 +2 NULL +NULL 3 +NULL NULL + +# A column referenced by ORDER BY but absent from the select list must still +# error, even though null_ordered_last synthesizes an IS NULL column for it. +statement error for SELECT DISTINCT, ORDER BY expressions must appear in select list +SELECT DISTINCT a FROM distinct_null_order ORDER BY b + +# The originally reported symptom: DROP USER internally runs a +# SELECT DISTINCT ... ORDER BY against system.privileges, which hit the bug. +statement ok +CREATE USER testuser168317 + +statement ok +DROP USER testuser168317 + +statement ok +RESET null_ordered_last diff --git a/pkg/sql/opt/optbuilder/distinct.go b/pkg/sql/opt/optbuilder/distinct.go index 92bd709b9ff..7c52451237d 100644 --- a/pkg/sql/opt/optbuilder/distinct.go +++ b/pkg/sql/opt/optbuilder/distinct.go @@ -28,12 +28,22 @@ func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr { // This will cause an error for queries like: // SELECT DISTINCT a FROM t ORDER BY b // Note: this behavior is consistent with PostgreSQL. + // + // An exception is made for the synthesized (col IS NULL) columns that + // buildOrderBy adds to implement non-default NULLS ordering (e.g. when + // null_ordered_last is set, or NULLS FIRST/LAST is given explicitly). When + // col is itself a projected column, the synthesized column is functionally + // determined by it and does not change the DISTINCT result, so we treat it + // as a grouping column. This mirrors case 3 in buildDistinctOn. for _, col := range inScope.ordering { if !private.GroupingCols.Contains(col.ID()) { - panic(pgerror.Newf( - pgcode.InvalidColumnReference, - "for SELECT DISTINCT, ORDER BY expressions must appear in select list", - )) + if !b.isSynthesizedNullsOrderingCol(inScope, col.ID(), private.GroupingCols) { + panic(pgerror.Newf( + pgcode.InvalidColumnReference, + "for SELECT DISTINCT, ORDER BY expressions must appear in select list", + )) + } + private.GroupingCols.Add(col.ID()) } } @@ -44,6 +54,34 @@ func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr { return b.factory.ConstructDistinctOn(input, memo.EmptyAggregationsExpr, &private) } +// isSynthesizedNullsOrderingCol reports whether the ordering column identified +// by colID is a synthesized "(col IS NULL)" expression where col is one of the +// columns in groupingCols. buildOrderBy synthesizes such columns to implement +// non-default NULLS ordering (e.g. when null_ordered_last is set, or NULLS +// FIRST/LAST is requested explicitly). Because (col IS NULL) is functionally +// determined by col, treating it as a grouping column does not change the +// result of a DISTINCT or DISTINCT ON. +func (b *Builder) isSynthesizedNullsOrderingCol( + inScope *scope, colID opt.ColumnID, groupingCols opt.ColSet, +) bool { + scopeCol := inScope.getColumn(colID) + if scopeCol == nil { + return false + } + isExpr, isIs := scopeCol.scalar.(*memo.IsExpr) + if !isIs { + return false + } + if _, isNull := isExpr.Right.(*memo.NullExpr); !isNull { + return false + } + v, isVar := isExpr.Left.(*memo.VariableExpr) + if !isVar { + return false + } + return groupingCols.Contains(v.Col) +} + // buildDistinctOn builds a set of memo groups that represent a DISTINCT ON // expression. If nullsAreDistinct is true, then construct the UpsertDistinctOn // operator rather than the DistinctOn operator (see the UpsertDistinctOn @@ -84,29 +122,16 @@ func (b *Builder) buildDistinctOn( var seen opt.ColSet for _, col := range inScope.ordering { if !distinctOnCols.Contains(col.ID()) { - colIsValid := false - scopeCol := inScope.getColumn(col.ID()) - if scopeCol != nil { - if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok { - if _, ok := isExpr.Right.(*memo.NullExpr); ok { - if v, ok := isExpr.Left.(*memo.VariableExpr); ok { - if distinctOnCols.Contains(v.Col) { - // We have a col IS NULL expression (case 3 above). - // Add the new column to distinctOnCols, since it doesn't change - // the semantics of the DISTINCT ON. - distinctOnCols.Add(col.ID()) - colIsValid = true - } - } - } - } - } - if !colIsValid { + if !b.isSynthesizedNullsOrderingCol(inScope, col.ID(), distinctOnCols) { panic(pgerror.Newf( pgcode.InvalidColumnReference, "SELECT DISTINCT ON expressions must match initial ORDER BY expressions", )) } + // We have a (col IS NULL) expression (case 3 above). Add the new + // column to distinctOnCols, since it doesn't change the semantics of + // the DISTINCT ON. + distinctOnCols.Add(col.ID()) } seen.Add(col.ID()) if seen.Equals(distinctOnCols) { diff --git a/pkg/sql/opt/optbuilder/testdata/distinct b/pkg/sql/opt/optbuilder/testdata/distinct index 62d1579e39c..8af44700997 100644 --- a/pkg/sql/opt/optbuilder/testdata/distinct +++ b/pkg/sql/opt/optbuilder/testdata/distinct @@ -317,3 +317,35 @@ sort │ └── columns: a:1!null b:2!null c:3!null d:4!null crdb_internal_mvcc_timestamp:5 tableoid:6 └── projections └── 1 [as=x:7] + +# Regression test for #168317. Under null_ordered_last, a SELECT DISTINCT whose +# ORDER BY references select-list columns synthesizes (col IS NULL) ordering +# columns (to implement non-default NULLS ordering). These synthesized columns +# must not trigger a spurious "ORDER BY expressions must appear in select list" +# error, mirroring the DISTINCT ON handling (case 3 in buildDistinctOn). +build set=null_ordered_last=true +SELECT DISTINCT y, z FROM xyz ORDER BY y, z +---- +distinct-on + ├── columns: y:2 z:3 [hidden: nulls_ordering_y:6!null nulls_ordering_z:7!null] + ├── grouping columns: y:2 z:3 nulls_ordering_y:6!null nulls_ordering_z:7!null + ├── ordering: +6,+2,+7,+3 + └── sort + ├── columns: y:2 z:3 nulls_ordering_y:6!null nulls_ordering_z:7!null + ├── ordering: +6,+2,+7,+3 + └── project + ├── columns: nulls_ordering_y:6!null nulls_ordering_z:7!null y:2 z:3 + ├── project + │ ├── columns: y:2 z:3 + │ └── scan xyz + │ └── columns: x:1!null y:2 z:3 crdb_internal_mvcc_timestamp:4 tableoid:5 + └── projections + ├── y:2 IS NULL [as=nulls_ordering_y:6] + └── z:3 IS NULL [as=nulls_ordering_z:7] + +# A genuinely invalid query (ORDER BY a non-projected column) must still error, +# even though null_ordered_last synthesizes an IS NULL column for it. +build set=null_ordered_last=true +SELECT DISTINCT y FROM xyz ORDER BY z +---- +error (42P10): for SELECT DISTINCT, ORDER BY expressions must appear in select list