Do not miss opportunity to FQS certain joins
authorPavan Deolasee <[email protected]>
Mon, 21 Jan 2019 11:53:50 +0000 (17:23 +0530)
committerPavan Deolasee <[email protected]>
Mon, 21 Jan 2019 12:58:34 +0000 (18:28 +0530)
When a query involves a 3-way (or N-way) join between two distributed tables and
a replicated table, such joins should be shippable to datanodes as long as the
join between the distributed tables is an equi-join on the distributed column,
with the same distribution property AND the join between the result of the join
on distributed tables and the replicate table is either an INNER JOIN or a LEFT
JOIN where the right side is the replicated table.

This fix allows such joins to be pushed down to the datanodes. A few test cases
have been added to test this functionality.

src/backend/optimizer/util/pgxcship.c
src/test/regress/expected/xl_join.out
src/test/regress/sql/xl_join.sql

index 14dd4a833484af2e00df82e4d76a9974a9e76070..1f0d29ee802956f72df65f67e630f77c1b960979 100644 (file)
@@ -1641,7 +1641,7 @@ pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2)
                else
                {
                        merged_en->nodeList = list_copy(en2->nodeList);
-                       merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED;
+                       merged_en->baselocatortype = en2->baselocatortype;
                }
                return merged_en;
        }
@@ -1667,7 +1667,7 @@ pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2)
                else
                {
                        merged_en->nodeList = list_copy(en1->nodeList);
-                       merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED;
+                       merged_en->baselocatortype = en1->baselocatortype;
                }
                return merged_en;
        }
index 463e1baa4c3f973b1ba23e5b08f9d499648651aa..edf3087c099a988e29767fda3ba3a46259f23e5d 100644 (file)
@@ -1,6 +1,8 @@
 CREATE TABLE xl_join_t1 (val1 int, val2 int);
 CREATE TABLE xl_join_t2 (val1 int, val2 int);
 CREATE TABLE xl_join_t3 (val1 int, val2 int);
+CREATE TABLE xl_join_t4 (val1 int, val2 int) DISTRIBUTE BY REPLICATION;
+CREATE TABLE xl_join_t5 (val1 int, val2 int) DISTRIBUTE BY REPLICATION;
 INSERT INTO xl_join_t1 VALUES (1,10),(2,20);
 INSERT INTO xl_join_t2 VALUES (3,30),(4,40);
 INSERT INTO xl_join_t3 VALUES (5,50),(6,60);
@@ -36,6 +38,523 @@ SELECT * FROM xl_join_t1
 ------+------+------+------+------+------
 (0 rows)
 
+-- Join on two replicated tables should get shipped, irrespective of the join
+-- columns.
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t4
+       INNER JOIN xl_join_t5 ON xl_join_t4.val1 = xl_join_t5.val1;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1
+   ->  Merge Join
+         Merge Cond: (xl_join_t4.val1 = xl_join_t5.val1)
+         ->  Sort
+               Sort Key: xl_join_t4.val1
+               ->  Seq Scan on xl_join_t4
+         ->  Sort
+               Sort Key: xl_join_t5.val1
+               ->  Seq Scan on xl_join_t5
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t4
+       INNER JOIN xl_join_t5 ON xl_join_t4.val1 = xl_join_t5.val2;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1
+   ->  Merge Join
+         Merge Cond: (xl_join_t4.val1 = xl_join_t5.val2)
+         ->  Sort
+               Sort Key: xl_join_t4.val1
+               ->  Seq Scan on xl_join_t4
+         ->  Sort
+               Sort Key: xl_join_t5.val2
+               ->  Seq Scan on xl_join_t5
+(10 rows)
+
+-- Join on a distributed and one/more replicated tables should get shipped.
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t3
+       INNER JOIN xl_join_t5 ON xl_join_t3.val1 = xl_join_t5.val1;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Join
+         Merge Cond: (xl_join_t3.val1 = xl_join_t5.val1)
+         ->  Sort
+               Sort Key: xl_join_t3.val1
+               ->  Seq Scan on xl_join_t3
+         ->  Sort
+               Sort Key: xl_join_t5.val1
+               ->  Seq Scan on xl_join_t5
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t3
+       INNER JOIN xl_join_t5 ON xl_join_t3.val1 = xl_join_t5.val2
+       INNER JOIN xl_join_t4 ON xl_join_t3.val1 = xl_join_t4.val2;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Join
+         Merge Cond: (xl_join_t4.val2 = xl_join_t3.val1)
+         ->  Sort
+               Sort Key: xl_join_t4.val2
+               ->  Seq Scan on xl_join_t4
+         ->  Materialize
+               ->  Merge Join
+                     Merge Cond: (xl_join_t3.val1 = xl_join_t5.val2)
+                     ->  Sort
+                           Sort Key: xl_join_t3.val1
+                           ->  Seq Scan on xl_join_t3
+                     ->  Sort
+                           Sort Key: xl_join_t5.val2
+                           ->  Seq Scan on xl_join_t5
+(16 rows)
+
+-- Equi-join on distribution column should get shipped
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Join
+         Merge Cond: (xl_join_t1.val1 = xl_join_t2.val1)
+         ->  Sort
+               Sort Key: xl_join_t1.val1
+               ->  Seq Scan on xl_join_t1
+         ->  Sort
+               Sort Key: xl_join_t2.val1
+               ->  Seq Scan on xl_join_t2
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Join
+         Merge Cond: (xl_join_t3.val1 = xl_join_t1.val1)
+         ->  Sort
+               Sort Key: xl_join_t3.val1
+               ->  Seq Scan on xl_join_t3
+         ->  Materialize
+               ->  Merge Join
+                     Merge Cond: (xl_join_t1.val1 = xl_join_t2.val1)
+                     ->  Sort
+                           Sort Key: xl_join_t1.val1
+                           ->  Seq Scan on xl_join_t1
+                     ->  Sort
+                           Sort Key: xl_join_t2.val1
+                           ->  Seq Scan on xl_join_t2
+(16 rows)
+
+-- Equi-join on non-distribution column should not get shipped
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val2 = xl_join_t2.val2;
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Remote Subquery Scan on all (datanode_1,datanode_2)
+   ->  Merge Join
+         Merge Cond: (xl_join_t1.val2 = xl_join_t2.val2)
+         ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+               Distribute results by H: val2
+               ->  Sort
+                     Sort Key: xl_join_t1.val2
+                     ->  Seq Scan on xl_join_t1
+         ->  Materialize
+               ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+                     Distribute results by H: val2
+                     ->  Sort
+                           Sort Key: xl_join_t2.val2
+                           ->  Seq Scan on xl_join_t2
+(14 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val2 = xl_join_t2.val2
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Remote Subquery Scan on all (datanode_1,datanode_2)
+   ->  Merge Join
+         Merge Cond: (xl_join_t2.val2 = xl_join_t1.val2)
+         ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+               Distribute results by H: val2
+               ->  Sort
+                     Sort Key: xl_join_t2.val2
+                     ->  Seq Scan on xl_join_t2
+         ->  Materialize
+               ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+                     Distribute results by H: val2
+                     ->  Sort
+                           Sort Key: xl_join_t1.val2
+                           ->  Merge Join
+                                 Merge Cond: (xl_join_t1.val1 = xl_join_t3.val1)
+                                 ->  Sort
+                                       Sort Key: xl_join_t1.val1
+                                       ->  Seq Scan on xl_join_t1
+                                 ->  Sort
+                                       Sort Key: xl_join_t3.val1
+                                       ->  Seq Scan on xl_join_t3
+(21 rows)
+
+-- Equi-join on distribution column and replicated table(s) should get shipped.
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Join
+         Merge Cond: (xl_join_t1.val1 = xl_join_t4.val1)
+         ->  Merge Join
+               Merge Cond: (xl_join_t1.val1 = xl_join_t2.val1)
+               ->  Sort
+                     Sort Key: xl_join_t1.val1
+                     ->  Seq Scan on xl_join_t1
+               ->  Sort
+                     Sort Key: xl_join_t2.val1
+                     ->  Seq Scan on xl_join_t2
+         ->  Materialize
+               ->  Merge Join
+                     Merge Cond: (xl_join_t4.val1 = xl_join_t5.val2)
+                     ->  Sort
+                           Sort Key: xl_join_t4.val1
+                           ->  Seq Scan on xl_join_t4
+                     ->  Sort
+                           Sort Key: xl_join_t5.val2
+                           ->  Seq Scan on xl_join_t5
+(21 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Join
+         Merge Cond: (xl_join_t1.val1 = xl_join_t3.val1)
+         ->  Merge Join
+               Merge Cond: (xl_join_t1.val1 = xl_join_t2.val1)
+               ->  Sort
+                     Sort Key: xl_join_t1.val1
+                     ->  Seq Scan on xl_join_t1
+               ->  Sort
+                     Sort Key: xl_join_t2.val1
+                     ->  Seq Scan on xl_join_t2
+         ->  Materialize
+               ->  Merge Join
+                     Merge Cond: (xl_join_t5.val2 = xl_join_t3.val1)
+                     ->  Sort
+                           Sort Key: xl_join_t5.val2
+                           ->  Seq Scan on xl_join_t5
+                     ->  Materialize
+                           ->  Merge Join
+                                 Merge Cond: (xl_join_t3.val1 = xl_join_t4.val1)
+                                 ->  Sort
+                                       Sort Key: xl_join_t3.val1
+                                       ->  Seq Scan on xl_join_t3
+                                 ->  Sort
+                                       Sort Key: xl_join_t4.val1
+                                       ->  Seq Scan on xl_join_t4
+(27 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON (xl_join_t1.val1 = xl_join_t2.val1 AND xl_join_t1.val2 = xl_join_t2.val2)
+       INNER JOIN xl_join_t3 ON (xl_join_t1.val1 = xl_join_t3.val1 AND xl_join_t1.val2 = xl_join_t3.val2)
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2;
+                                                              QUERY PLAN                                                               
+---------------------------------------------------------------------------------------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Hash Join
+         Hash Cond: (xl_join_t5.val2 = xl_join_t1.val1)
+         ->  Seq Scan on xl_join_t5
+         ->  Hash
+               ->  Hash Join
+                     Hash Cond: (xl_join_t4.val1 = xl_join_t1.val1)
+                     ->  Seq Scan on xl_join_t4
+                     ->  Hash
+                           ->  Hash Join
+                                 Hash Cond: ((xl_join_t3.val1 = xl_join_t1.val1) AND (xl_join_t3.val2 = xl_join_t1.val2))
+                                 ->  Seq Scan on xl_join_t3
+                                 ->  Hash
+                                       ->  Merge Join
+                                             Merge Cond: ((xl_join_t1.val1 = xl_join_t2.val1) AND (xl_join_t1.val2 = xl_join_t2.val2))
+                                             ->  Sort
+                                                   Sort Key: xl_join_t1.val1, xl_join_t1.val2
+                                                   ->  Seq Scan on xl_join_t1
+                                             ->  Sort
+                                                   Sort Key: xl_join_t2.val1, xl_join_t2.val2
+                                                   ->  Seq Scan on xl_join_t2
+(22 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON (xl_join_t1.val1 = xl_join_t2.val1 AND xl_join_t1.val2 = xl_join_t2.val2)
+       INNER JOIN xl_join_t5 ON (xl_join_t1.val1 = xl_join_t5.val1 AND xl_join_t1.val2 = xl_join_t5.val2)
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+                                                              QUERY PLAN                                                               
+---------------------------------------------------------------------------------------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Hash Join
+         Hash Cond: (xl_join_t3.val1 = xl_join_t1.val1)
+         ->  Seq Scan on xl_join_t3
+         ->  Hash
+               ->  Hash Join
+                     Hash Cond: (xl_join_t4.val1 = xl_join_t1.val1)
+                     ->  Seq Scan on xl_join_t4
+                     ->  Hash
+                           ->  Hash Join
+                                 Hash Cond: ((xl_join_t5.val1 = xl_join_t1.val1) AND (xl_join_t5.val2 = xl_join_t1.val2))
+                                 ->  Seq Scan on xl_join_t5
+                                 ->  Hash
+                                       ->  Merge Join
+                                             Merge Cond: ((xl_join_t1.val1 = xl_join_t2.val1) AND (xl_join_t1.val2 = xl_join_t2.val2))
+                                             ->  Sort
+                                                   Sort Key: xl_join_t1.val1, xl_join_t1.val2
+                                                   ->  Seq Scan on xl_join_t1
+                                             ->  Sort
+                                                   Sort Key: xl_join_t2.val1, xl_join_t2.val2
+                                                   ->  Seq Scan on xl_join_t2
+(22 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t5 ON (xl_join_t1.val1 = xl_join_t5.val1 AND xl_join_t1.val2 = xl_join_t5.val2)
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+                                                  QUERY PLAN                                                   
+---------------------------------------------------------------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Hash Join
+         Hash Cond: (xl_join_t3.val1 = xl_join_t1.val1)
+         ->  Seq Scan on xl_join_t3
+         ->  Hash
+               ->  Merge Join
+                     Merge Cond: ((xl_join_t1.val1 = xl_join_t5.val1) AND (xl_join_t1.val2 = xl_join_t5.val2))
+                     ->  Sort
+                           Sort Key: xl_join_t1.val1, xl_join_t1.val2
+                           ->  Seq Scan on xl_join_t1
+                     ->  Sort
+                           Sort Key: xl_join_t5.val1, xl_join_t5.val2
+                           ->  Seq Scan on xl_join_t5
+(14 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2
+       WHERE xl_join_t1.val1 = 1;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Nested Loop
+         ->  Nested Loop
+               ->  Seq Scan on xl_join_t1
+                     Filter: (val1 = 1)
+               ->  Materialize
+                     ->  Seq Scan on xl_join_t2
+                           Filter: (val1 = 1)
+         ->  Materialize
+               ->  Nested Loop
+                     ->  Seq Scan on xl_join_t4
+                           Filter: (val1 = 1)
+                     ->  Materialize
+                           ->  Seq Scan on xl_join_t5
+                                 Filter: (val2 = 1)
+(16 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2
+       WHERE xl_join_t1.val1 IN (1, 3);
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Join
+         Merge Cond: (xl_join_t1.val1 = xl_join_t5.val2)
+         ->  Merge Join
+               Merge Cond: (xl_join_t1.val1 = xl_join_t4.val1)
+               ->  Sort
+                     Sort Key: xl_join_t1.val1
+                     ->  Hash Join
+                           Hash Cond: (xl_join_t2.val1 = xl_join_t1.val1)
+                           ->  Seq Scan on xl_join_t2
+                           ->  Hash
+                                 ->  Seq Scan on xl_join_t1
+                                       Filter: (val1 = ANY ('{1,3}'::integer[]))
+               ->  Sort
+                     Sort Key: xl_join_t4.val1
+                     ->  Seq Scan on xl_join_t4
+         ->  Sort
+               Sort Key: xl_join_t5.val2
+               ->  Seq Scan on xl_join_t5
+(20 rows)
+
+-- LEFT JOIN should get shipped when the right side is replicated
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       LEFT JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Left Join
+         Merge Cond: (xl_join_t1.val1 = xl_join_t4.val1)
+         ->  Sort
+               Sort Key: xl_join_t1.val1
+               ->  Seq Scan on xl_join_t1
+         ->  Sort
+               Sort Key: xl_join_t4.val1
+               ->  Seq Scan on xl_join_t4
+(10 rows)
+
+-- But not when the left side is replicated
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t4
+       LEFT JOIN xl_join_t1 ON xl_join_t1.val1 = xl_join_t4.val1;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Subquery Scan on all (datanode_1,datanode_2)
+   ->  Merge Left Join
+         Merge Cond: (xl_join_t4.val1 = xl_join_t1.val1)
+         ->  Remote Subquery Scan on all (datanode_1)
+               Distribute results by H: val1
+               ->  Sort
+                     Sort Key: xl_join_t4.val1
+                     ->  Seq Scan on xl_join_t4
+         ->  Sort
+               Sort Key: xl_join_t1.val1
+               ->  Seq Scan on xl_join_t1
+(11 rows)
+
+-- Similarly RIGHT JOIN is not shipped when the right side is replicated
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       RIGHT JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Subquery Scan on all (datanode_1,datanode_2)
+   ->  Merge Left Join
+         Merge Cond: (xl_join_t4.val1 = xl_join_t1.val1)
+         ->  Remote Subquery Scan on all (datanode_1)
+               Distribute results by H: val1
+               ->  Sort
+                     Sort Key: xl_join_t4.val1
+                     ->  Seq Scan on xl_join_t4
+         ->  Sort
+               Sort Key: xl_join_t1.val1
+               ->  Seq Scan on xl_join_t1
+(11 rows)
+
+-- FULL JOIN is shipped only when both sides are replicated or when it's an
+-- equi-join on distribution column
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       FULL JOIN xl_join_t2 ON (xl_join_t1.val1 = xl_join_t2.val1);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1, datanode_2
+   ->  Merge Full Join
+         Merge Cond: (xl_join_t1.val1 = xl_join_t2.val1)
+         ->  Sort
+               Sort Key: xl_join_t1.val1
+               ->  Seq Scan on xl_join_t1
+         ->  Sort
+               Sort Key: xl_join_t2.val1
+               ->  Seq Scan on xl_join_t2
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t4
+       FULL JOIN xl_join_t5 ON (xl_join_t4.val1 = xl_join_t5.val1);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Fast Query Execution
+   Node/s: datanode_1
+   ->  Merge Full Join
+         Merge Cond: (xl_join_t4.val1 = xl_join_t5.val1)
+         ->  Sort
+               Sort Key: xl_join_t4.val1
+               ->  Seq Scan on xl_join_t4
+         ->  Sort
+               Sort Key: xl_join_t5.val1
+               ->  Seq Scan on xl_join_t5
+(10 rows)
+
+-- FULL JOIN on a distributed and replicated table is not shipped
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       FULL JOIN xl_join_t5 ON (xl_join_t1.val1 = xl_join_t5.val1);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Remote Subquery Scan on all (datanode_1,datanode_2)
+   ->  Merge Full Join
+         Merge Cond: (xl_join_t5.val1 = xl_join_t1.val1)
+         ->  Remote Subquery Scan on all (datanode_1)
+               Distribute results by H: val1
+               ->  Sort
+                     Sort Key: xl_join_t5.val1
+                     ->  Seq Scan on xl_join_t5
+         ->  Sort
+               Sort Key: xl_join_t1.val1
+               ->  Seq Scan on xl_join_t1
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       FULL JOIN xl_join_t5 ON (xl_join_t1.val1 = xl_join_t5.val1)
+       INNER JOIN xl_join_t2 ON (xl_join_t1.val1 = xl_join_t2.val1);
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Remote Subquery Scan on all (datanode_1,datanode_2)
+   ->  Merge Join
+         Merge Cond: (xl_join_t2.val1 = xl_join_t1.val1)
+         ->  Sort
+               Sort Key: xl_join_t2.val1
+               ->  Seq Scan on xl_join_t2
+         ->  Materialize
+               ->  Merge Left Join
+                     Merge Cond: (xl_join_t1.val1 = xl_join_t5.val1)
+                     ->  Sort
+                           Sort Key: xl_join_t1.val1
+                           ->  Seq Scan on xl_join_t1
+                     ->  Sort
+                           Sort Key: xl_join_t5.val1
+                           ->  Seq Scan on xl_join_t5
+(15 rows)
+
 DROP TABLE xl_join_t1;
 DROP TABLE xl_join_t2;
 DROP TABLE xl_join_t3;
+DROP TABLE xl_join_t4;
+DROP TABLE xl_join_t5;
index 1b63d20a074f2aba0e03866017974dc361d7ec17..c7def5db0e9d47fc7077fb1e22ede2415aa9d3b5 100644 (file)
@@ -2,6 +2,8 @@
 CREATE TABLE xl_join_t1 (val1 int, val2 int);
 CREATE TABLE xl_join_t2 (val1 int, val2 int);
 CREATE TABLE xl_join_t3 (val1 int, val2 int);
+CREATE TABLE xl_join_t4 (val1 int, val2 int) DISTRIBUTE BY REPLICATION;
+CREATE TABLE xl_join_t5 (val1 int, val2 int) DISTRIBUTE BY REPLICATION;
 
 INSERT INTO xl_join_t1 VALUES (1,10),(2,20);
 INSERT INTO xl_join_t2 VALUES (3,30),(4,40);
@@ -16,6 +18,128 @@ SELECT * FROM xl_join_t1
        INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val2 
        INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
 
+-- Join on two replicated tables should get shipped, irrespective of the join
+-- columns.
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t4
+       INNER JOIN xl_join_t5 ON xl_join_t4.val1 = xl_join_t5.val1;
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t4
+       INNER JOIN xl_join_t5 ON xl_join_t4.val1 = xl_join_t5.val2;
+
+
+-- Join on a distributed and one/more replicated tables should get shipped.
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t3
+       INNER JOIN xl_join_t5 ON xl_join_t3.val1 = xl_join_t5.val1;
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t3
+       INNER JOIN xl_join_t5 ON xl_join_t3.val1 = xl_join_t5.val2
+       INNER JOIN xl_join_t4 ON xl_join_t3.val1 = xl_join_t4.val2;
+
+-- Equi-join on distribution column should get shipped
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1;
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+
+-- Equi-join on non-distribution column should not get shipped
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val2 = xl_join_t2.val2;
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val2 = xl_join_t2.val2
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+
+-- Equi-join on distribution column and replicated table(s) should get shipped.
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2;
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON (xl_join_t1.val1 = xl_join_t2.val1 AND xl_join_t1.val2 = xl_join_t2.val2)
+       INNER JOIN xl_join_t3 ON (xl_join_t1.val1 = xl_join_t3.val1 AND xl_join_t1.val2 = xl_join_t3.val2)
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON (xl_join_t1.val1 = xl_join_t2.val1 AND xl_join_t1.val2 = xl_join_t2.val2)
+       INNER JOIN xl_join_t5 ON (xl_join_t1.val1 = xl_join_t5.val1 AND xl_join_t1.val2 = xl_join_t5.val2)
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t5 ON (xl_join_t1.val1 = xl_join_t5.val1 AND xl_join_t1.val2 = xl_join_t5.val2)
+       INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2
+       WHERE xl_join_t1.val1 = 1;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val1
+       INNER JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1
+       INNER JOIN xl_join_t5 ON xl_join_t1.val1 = xl_join_t5.val2
+       WHERE xl_join_t1.val1 IN (1, 3);
+
+
+-- LEFT JOIN should get shipped when the right side is replicated
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       LEFT JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1;
+
+-- But not when the left side is replicated
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t4
+       LEFT JOIN xl_join_t1 ON xl_join_t1.val1 = xl_join_t4.val1;
+
+-- Similarly RIGHT JOIN is not shipped when the right side is replicated
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       RIGHT JOIN xl_join_t4 ON xl_join_t1.val1 = xl_join_t4.val1;
+
+-- FULL JOIN is shipped only when both sides are replicated or when it's an
+-- equi-join on distribution column
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       FULL JOIN xl_join_t2 ON (xl_join_t1.val1 = xl_join_t2.val1);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t4
+       FULL JOIN xl_join_t5 ON (xl_join_t4.val1 = xl_join_t5.val1);
+
+-- FULL JOIN on a distributed and replicated table is not shipped
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       FULL JOIN xl_join_t5 ON (xl_join_t1.val1 = xl_join_t5.val1);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM xl_join_t1
+       FULL JOIN xl_join_t5 ON (xl_join_t1.val1 = xl_join_t5.val1)
+       INNER JOIN xl_join_t2 ON (xl_join_t1.val1 = xl_join_t2.val1);
+
 DROP TABLE xl_join_t1;
 DROP TABLE xl_join_t2;
 DROP TABLE xl_join_t3;
+DROP TABLE xl_join_t4;
+DROP TABLE xl_join_t5;