one or more consumers to finish.
We have seen bunch of cases where a consumer may never bind to the SharedQ and
rightfully so. For example, in a join between 3 tables which requires
redistribution of tuples, a consumer may not at all bind to the SharedQ because
it the top level outer side did not produce any tuples to join against the
redistributed inner node.
This patch avoids the unnecessary FATAL errors, but what we still do not do
nicely is to avoid the 10s timeout (as currently set for producer). So while
queries, as included in the test case, will finally return success, it will
unnecessarily add a 10s delay in the response time. This is a TODO.
* consumers.
*/
if (queryDesc->squeue)
- SharedQueueUnBind(queryDesc->squeue);
+ SharedQueueUnBind(queryDesc->squeue, true);
FreeQueryDesc(queryDesc);
}
}
/* wait while consumer are finishing and release shared resources */
if (myState->squeue)
- SharedQueueUnBind(myState->squeue);
+ SharedQueueUnBind(myState->squeue, false);
myState->squeue = NULL;
/* Release workspace if any */
* set the tuplestore parameter to NULL.
*/
void
-SharedQueueUnBind(SharedQueue squeue)
+SharedQueueUnBind(SharedQueue squeue, bool failed)
{
SQueueSync *sqsync = squeue->sq_sync;
int wait_result = 0;
{
int i;
int c_count = 0;
+ int unbound_count = 0;
/* check queue states */
for (i = 0; i < squeue->sq_nconsumers; i++)
ConsState *cstate = &squeue->sq_consumers[i];
LWLockAcquire(sqsync->sqs_consumer_sync[i].cs_lwlock, LW_EXCLUSIVE);
/* is consumer working yet ? */
- if (cstate->cs_status == CONSUMER_ACTIVE)
+ if (cstate->cs_status == CONSUMER_ACTIVE && failed)
cstate->cs_status = CONSUMER_ERROR;
if (cstate->cs_status != CONSUMER_DONE)
{
SetLatch(&sqsync->sqs_consumer_sync[i].cs_latch);
/* producer will continue waiting */
ResetLatch(&sqsync->sqs_producer_latch);
+
+ if (cstate->cs_pid == 0)
+ unbound_count++;
}
LWLockRelease(sqsync->sqs_consumer_sync[i].cs_lwlock);
}
if (c_count == 0)
break;
- elog(DEBUG1, "Wait while %d squeue readers finishing", c_count);
+ elog(DEBUG1, "Wait while %d squeue readers finish, %d squeue readers "
+ "not yet bound", c_count, unbound_count);
/* wait for a notification */
wait_result = WaitLatch(&sqsync->sqs_producer_latch,
WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT,
LWLockRelease(SQueuesLock);
elog(DEBUG1, "Finalized squeue");
if (wait_result & WL_TIMEOUT)
- elog(FATAL, "Timeout while waiting for Consumers finishing");
+ elog(WARNING, "Timeout while waiting for Consumers finishing");
}
PG_CATCH();
{
/* Ensure SharedQueue is released */
- SharedQueueUnBind(queryDesc->squeue);
+ SharedQueueUnBind(queryDesc->squeue, true);
queryDesc->squeue = NULL;
PG_RE_THROW();
}
extern void SharedQueueAcquire(const char *sqname, int ncons);
extern SharedQueue SharedQueueBind(const char *sqname, List *consNodes,
List *distNodes, int *myindex, int *consMap);
-extern void SharedQueueUnBind(SharedQueue squeue);
+extern void SharedQueueUnBind(SharedQueue squeue, bool failed);
extern void SharedQueueRelease(const char *sqname);
extern void SharedQueuesCleanup(int code, Datum arg);
--- /dev/null
+CREATE TABLE xl_join_t1 (val1 int, val2 int);
+CREATE TABLE xl_join_t2 (val1 int, val2 int);
+CREATE TABLE xl_join_t3 (val1 int, val2 int);
+INSERT INTO xl_join_t1 VALUES (1,10),(2,20);
+INSERT INTO xl_join_t2 VALUES (3,30),(4,40);
+INSERT INTO xl_join_t3 VALUES (5,50),(6,60);
+EXPLAIN SELECT * FROM xl_join_t1
+ INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val2
+ INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------
+ Remote Subquery Scan on all (datanode_1,datanode_2) (cost=475.52..5209.87 rows=288579 width=24)
+ -> Merge Join (cost=475.52..5209.87 rows=288579 width=24)
+ Merge Cond: (xl_join_t3.val1 = xl_join_t1.val1)
+ -> Sort (cost=158.51..164.16 rows=2260 width=8)
+ Sort Key: xl_join_t3.val1
+ -> Seq Scan on xl_join_t3 (cost=0.00..32.60 rows=2260 width=8)
+ -> Materialize (cost=317.01..775.23 rows=25538 width=16)
+ -> Merge Join (cost=317.01..711.38 rows=25538 width=16)
+ Merge Cond: (xl_join_t2.val2 = xl_join_t1.val1)
+ -> Remote Subquery Scan on all (datanode_1,datanode_2) (cost=100.00..161.98 rows=2260 width=8)
+ Distribute results by H: val2
+ -> Sort (cost=287.89..293.54 rows=2260 width=8)
+ Sort Key: xl_join_t2.val2
+ -> Seq Scan on xl_join_t2 (cost=0.00..32.60 rows=2260 width=8)
+ -> Sort (cost=158.51..164.16 rows=2260 width=8)
+ Sort Key: xl_join_t1.val1
+ -> Seq Scan on xl_join_t1 (cost=0.00..32.60 rows=2260 width=8)
+(17 rows)
+
+SELECT * FROM xl_join_t1
+ INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val2
+ INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+ val1 | val2 | val1 | val2 | val1 | val2
+------+------+------+------+------+------
+(0 rows)
+
+DROP TABLE xl_join_t1;
+DROP TABLE xl_join_t2;
+DROP TABLE xl_join_t3;
test: xc_notrans_block
# This runs XL specific tests
-test: xl_primary_key xl_foreign_key xl_distribution_column_types xl_alter_table xl_distribution_column_types_modulo xl_plan_pushdown xl_functions xl_limitations xl_user_defined_functions
+test: xl_primary_key xl_foreign_key xl_distribution_column_types xl_alter_table xl_distribution_column_types_modulo xl_plan_pushdown xl_functions xl_limitations xl_user_defined_functions xl_join
#known bugs
test: xl_known_bugs
test: xl_functions
test: xl_limitations
test: xl_user_defined_functions
+test: xl_join
--- /dev/null
+
+CREATE TABLE xl_join_t1 (val1 int, val2 int);
+CREATE TABLE xl_join_t2 (val1 int, val2 int);
+CREATE TABLE xl_join_t3 (val1 int, val2 int);
+
+INSERT INTO xl_join_t1 VALUES (1,10),(2,20);
+INSERT INTO xl_join_t2 VALUES (3,30),(4,40);
+INSERT INTO xl_join_t3 VALUES (5,50),(6,60);
+
+EXPLAIN SELECT * FROM xl_join_t1
+ INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val2
+ INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+
+SELECT * FROM xl_join_t1
+ INNER JOIN xl_join_t2 ON xl_join_t1.val1 = xl_join_t2.val2
+ INNER JOIN xl_join_t3 ON xl_join_t1.val1 = xl_join_t3.val1;
+
+DROP TABLE xl_join_t1;
+DROP TABLE xl_join_t2;
+DROP TABLE xl_join_t3;