Fix distinctness check for queries with grouping sets

author Richard Guo <[email protected]>

Tue, 9 Dec 2025 08:09:27 +0000 (17:09 +0900)

committer Richard Guo <[email protected]>

Tue, 9 Dec 2025 08:09:27 +0000 (17:09 +0900)
author Richard Guo <[email protected]>
Tue, 9 Dec 2025 08:09:27 +0000 (17:09 +0900)
committer Richard Guo <[email protected]>
Tue, 9 Dec 2025 08:09:27 +0000 (17:09 +0900)
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c

index e592e1ac3d127643ff45172c98ba859f754a6119..296a894b8578fc347db29a40880a4598cd0049aa 100644 (file)
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -31,6 +31,7 @@
  #include "optimizer/placeholder.h"
  #include "optimizer/planmain.h"
  #include "optimizer/restrictinfo.h"
+#include "parser/parse_agg.h"
  #include "rewrite/rewriteManip.h"
  #include "utils/lsyscache.h"
  
@@ -1175,6 +1176,8 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
     }
     else if (query->groupingSets)
     {
+       List       *gsets;
+
         /*
          * If we have grouping sets with expressions, we probably don't have
          * uniqueness and analysis would be hard. Punt.
@@ -1184,15 +1187,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
  
         /*
          * If we have no groupClause (therefore no grouping expressions), we
-        * might have one or many empty grouping sets. If there's just one,
-        * then we're returning only one row and are certainly unique. But
-        * otherwise, we know we're certainly not unique.
+        * might have one or many empty grouping sets.  If there's just one,
+        * or if the DISTINCT clause is used on the GROUP BY, then we're
+        * returning only one row and are certainly unique.  But otherwise, we
+        * know we're certainly not unique.
          */
-       if (list_length(query->groupingSets) == 1 &&
-           ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
+       if (query->groupDistinct)
             return true;
-       else
-           return false;
+
+       gsets = expand_grouping_sets(query->groupingSets, false, -1);
+
+       return (list_length(gsets) == 1);
     }
     else
     {
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 2a0a457a7d9f896a02e9e699947cdf0ccb524c72..edde9e99893a0eb2700017941c1700d85c9c535c 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6161,6 +6161,40 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
   Seq Scan on d
  (1 row)
  
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique by GROUPING SETS
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by ()) s
+  on d.a = s.x;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets(())) s
+  on d.a = s.x;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets(()), grouping sets(())) s
+  on d.a = s.x;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by distinct grouping sets((), ())) s
+  on d.a = s.x;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
  -- similarly, but keying off a DISTINCT clause
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
@@ -6189,6 +6223,55 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
           ->  Seq Scan on d
  (8 rows)
  
+-- join removal is not possible when the GROUP BY contains non-empty grouping
+-- sets or multiple empty grouping sets
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by rollup(x)) s
+  on d.a = s.x;
+           QUERY PLAN            
+---------------------------------
+ Hash Left Join
+   Hash Cond: (d.a = (1))
+   ->  Seq Scan on d
+   ->  Hash
+         ->  MixedAggregate
+               Hash Key: 1
+               Group Key: ()
+               ->  Seq Scan on b
+(8 rows)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets((), ())) s
+  on d.a = s.x;
+               QUERY PLAN                
+-----------------------------------------
+ Hash Left Join
+   Hash Cond: (d.a = (1))
+   ->  Seq Scan on d
+   ->  Hash
+         ->  Append
+               ->  Result
+                     Replaces: Aggregate
+               ->  Result
+                     Replaces: Aggregate
+(9 rows)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets((), grouping sets(()))) s
+  on d.a = s.x;
+               QUERY PLAN                
+-----------------------------------------
+ Hash Left Join
+   Hash Cond: (d.a = (1))
+   ->  Seq Scan on d
+   ->  Hash
+         ->  Append
+               ->  Result
+                     Replaces: Aggregate
+               ->  Result
+                     Replaces: Aggregate
+(9 rows)
+
  -- similarly, but keying off a DISTINCT clause
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index c47118e9291a3f6d3e4687bb7deb0b5e528b773b..7ec84f3b143600c7aaf6e20d93df1dc09aff2a65 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2232,6 +2232,24 @@ explain (costs off)
  select d.* from d left join (select * from b group by b.id, b.c_id) s
    on d.a = s.id and d.b = s.c_id;
  
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique by GROUPING SETS
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by ()) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets(())) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets(()), grouping sets(())) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by distinct grouping sets((), ())) s
+  on d.a = s.x;
+
  -- similarly, but keying off a DISTINCT clause
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
@@ -2245,6 +2263,20 @@ explain (costs off)
  select d.* from d left join (select * from b group by b.id, b.c_id) s
    on d.a = s.id;
  
+-- join removal is not possible when the GROUP BY contains non-empty grouping
+-- sets or multiple empty grouping sets
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by rollup(x)) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets((), ())) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets((), grouping sets(()))) s
+  on d.a = s.x;
+
  -- similarly, but keying off a DISTINCT clause
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
author	Richard Guo <[email protected]>
	Tue, 9 Dec 2025 08:09:27 +0000 (17:09 +0900)
committer	Richard Guo <[email protected]>
	Tue, 9 Dec 2025 08:09:27 +0000 (17:09 +0900)
src/backend/optimizer/plan/analyzejoins.c		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history