Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distinct
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,41 @@ query IT
SELECT x, jsonb_agg(DISTINCT jsonb_build_object('y', y, 'z', z)) FROM (SELECT * FROM t ORDER BY i) GROUP BY x
----
1 [{"y": 2, "z": "hello"}, {"y": 2, "z": "hello there"}]

# Regression test for #168317. Under null_ordered_last, a SELECT DISTINCT whose
# ORDER BY references select-list columns synthesizes (col IS NULL) ordering
# columns to implement NULLS LAST. These must not raise a spurious "ORDER BY
# expressions must appear in select list" error, and NULLs must sort last.

statement ok
SET null_ordered_last = on

statement ok
CREATE TABLE distinct_null_order (a INT, b INT)

statement ok
INSERT INTO distinct_null_order VALUES (1, 1), (1, 1), (2, NULL), (NULL, 3), (NULL, NULL)

query II
SELECT DISTINCT a, b FROM distinct_null_order ORDER BY a, b
----
1 1
2 NULL
NULL 3
NULL NULL

# A column referenced by ORDER BY but absent from the select list must still
# error, even though null_ordered_last synthesizes an IS NULL column for it.
statement error for SELECT DISTINCT, ORDER BY expressions must appear in select list
SELECT DISTINCT a FROM distinct_null_order ORDER BY b

# The originally reported symptom: DROP USER internally runs a
# SELECT DISTINCT ... ORDER BY against system.privileges, which hit the bug.
statement ok
CREATE USER testuser168317

statement ok
DROP USER testuser168317

statement ok
RESET null_ordered_last
69 changes: 47 additions & 22 deletions pkg/sql/opt/optbuilder/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,22 @@ func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr {
// This will cause an error for queries like:
// SELECT DISTINCT a FROM t ORDER BY b
// Note: this behavior is consistent with PostgreSQL.
//
// An exception is made for the synthesized (col IS NULL) columns that
// buildOrderBy adds to implement non-default NULLS ordering (e.g. when
// null_ordered_last is set, or NULLS FIRST/LAST is given explicitly). When
// col is itself a projected column, the synthesized column is functionally
// determined by it and does not change the DISTINCT result, so we treat it
// as a grouping column. This mirrors case 3 in buildDistinctOn.
for _, col := range inScope.ordering {
if !private.GroupingCols.Contains(col.ID()) {
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"for SELECT DISTINCT, ORDER BY expressions must appear in select list",
))
if !b.isSynthesizedNullsOrderingCol(inScope, col.ID(), private.GroupingCols) {
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"for SELECT DISTINCT, ORDER BY expressions must appear in select list",
))
}
private.GroupingCols.Add(col.ID())
}
}

Expand All @@ -44,6 +54,34 @@ func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr {
return b.factory.ConstructDistinctOn(input, memo.EmptyAggregationsExpr, &private)
}

// isSynthesizedNullsOrderingCol reports whether the ordering column identified
// by colID is a synthesized "(col IS NULL)" expression where col is one of the
// columns in groupingCols. buildOrderBy synthesizes such columns to implement
// non-default NULLS ordering (e.g. when null_ordered_last is set, or NULLS
// FIRST/LAST is requested explicitly). Because (col IS NULL) is functionally
// determined by col, treating it as a grouping column does not change the
// result of a DISTINCT or DISTINCT ON.
func (b *Builder) isSynthesizedNullsOrderingCol(
inScope *scope, colID opt.ColumnID, groupingCols opt.ColSet,
) bool {
scopeCol := inScope.getColumn(colID)
if scopeCol == nil {
return false
}
isExpr, isIs := scopeCol.scalar.(*memo.IsExpr)
if !isIs {
return false
}
if _, isNull := isExpr.Right.(*memo.NullExpr); !isNull {
return false
}
v, isVar := isExpr.Left.(*memo.VariableExpr)
if !isVar {
return false
}
return groupingCols.Contains(v.Col)
}

// buildDistinctOn builds a set of memo groups that represent a DISTINCT ON
// expression. If nullsAreDistinct is true, then construct the UpsertDistinctOn
// operator rather than the DistinctOn operator (see the UpsertDistinctOn
Expand Down Expand Up @@ -84,29 +122,16 @@ func (b *Builder) buildDistinctOn(
var seen opt.ColSet
for _, col := range inScope.ordering {
if !distinctOnCols.Contains(col.ID()) {
colIsValid := false
scopeCol := inScope.getColumn(col.ID())
if scopeCol != nil {
if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok {
if _, ok := isExpr.Right.(*memo.NullExpr); ok {
if v, ok := isExpr.Left.(*memo.VariableExpr); ok {
if distinctOnCols.Contains(v.Col) {
// We have a col IS NULL expression (case 3 above).
// Add the new column to distinctOnCols, since it doesn't change
// the semantics of the DISTINCT ON.
distinctOnCols.Add(col.ID())
colIsValid = true
}
}
}
}
}
if !colIsValid {
if !b.isSynthesizedNullsOrderingCol(inScope, col.ID(), distinctOnCols) {
panic(pgerror.Newf(
pgcode.InvalidColumnReference,
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
))
}
// We have a (col IS NULL) expression (case 3 above). Add the new
// column to distinctOnCols, since it doesn't change the semantics of
// the DISTINCT ON.
distinctOnCols.Add(col.ID())
}
seen.Add(col.ID())
if seen.Equals(distinctOnCols) {
Expand Down
32 changes: 32 additions & 0 deletions pkg/sql/opt/optbuilder/testdata/distinct
Original file line number Diff line number Diff line change
Expand Up @@ -317,3 +317,35 @@ sort
│ └── columns: a:1!null b:2!null c:3!null d:4!null crdb_internal_mvcc_timestamp:5 tableoid:6
└── projections
└── 1 [as=x:7]

# Regression test for #168317. Under null_ordered_last, a SELECT DISTINCT whose
# ORDER BY references select-list columns synthesizes (col IS NULL) ordering
# columns (to implement non-default NULLS ordering). These synthesized columns
# must not trigger a spurious "ORDER BY expressions must appear in select list"
# error, mirroring the DISTINCT ON handling (case 3 in buildDistinctOn).
build set=null_ordered_last=true
SELECT DISTINCT y, z FROM xyz ORDER BY y, z
----
distinct-on
├── columns: y:2 z:3 [hidden: nulls_ordering_y:6!null nulls_ordering_z:7!null]
├── grouping columns: y:2 z:3 nulls_ordering_y:6!null nulls_ordering_z:7!null
├── ordering: +6,+2,+7,+3
└── sort
├── columns: y:2 z:3 nulls_ordering_y:6!null nulls_ordering_z:7!null
├── ordering: +6,+2,+7,+3
└── project
├── columns: nulls_ordering_y:6!null nulls_ordering_z:7!null y:2 z:3
├── project
│ ├── columns: y:2 z:3
│ └── scan xyz
│ └── columns: x:1!null y:2 z:3 crdb_internal_mvcc_timestamp:4 tableoid:5
└── projections
├── y:2 IS NULL [as=nulls_ordering_y:6]
└── z:3 IS NULL [as=nulls_ordering_z:7]

# A genuinely invalid query (ORDER BY a non-projected column) must still error,
# even though null_ordered_last synthesizes an IS NULL column for it.
build set=null_ordered_last=true
SELECT DISTINCT y FROM xyz ORDER BY z
----
error (42P10): for SELECT DISTINCT, ORDER BY expressions must appear in select list