Fix a bug where we will pick up random nodes for individual replicated tables,
authorPavan Deolasee <[email protected]>
Mon, 25 Jan 2016 09:32:34 +0000 (15:02 +0530)
committerPavan Deolasee <[email protected]>
Mon, 25 Jan 2016 09:35:48 +0000 (15:05 +0530)
thus reducing the chances of query getting fully shipped to the remote node.

We now remember all nodes that can satify a READ request for a replicated table
and then finally choose a node randomly if no preferred datanode is specified.
This will avoid non-deterministic selection of FQS query plans as well as allow
us to send some more queries to the remote node.

src/backend/optimizer/util/pgxcship.c
src/backend/pgxc/locator/locator.c
src/include/pgxc/locator.h

index 6d64434c9e14b213e7f5b1d57ffe0c242210dee6..7453bdc952cc91787f427a6e6c58622e3a67a94b 100644 (file)
@@ -406,7 +406,8 @@ pgxc_FQS_find_datanodes(Query *query)
                 * preferred node choose that one, otherwise choose the first one.
                 */
                if (IsLocatorReplicated(exec_nodes->baselocatortype) &&
-                       exec_nodes->accesstype == RELATION_ACCESS_READ)
+                       (exec_nodes->accesstype == RELATION_ACCESS_READ ||
+                       exec_nodes->accesstype == RELATION_ACCESS_READ_FQS))
                {
                        List *tmp_list = exec_nodes->nodeList;
                        exec_nodes->nodeList = GetPreferredReplicationNode(exec_nodes->nodeList);
@@ -450,7 +451,7 @@ pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, Index varno, Query *query)
                        if (for_update)
                                rel_access = RELATION_ACCESS_READ_FOR_UPDATE;
                        else
-                               rel_access = RELATION_ACCESS_READ;
+                               rel_access = RELATION_ACCESS_READ_FQS;
                        break;
 
                case CMD_UPDATE:
@@ -1560,7 +1561,8 @@ pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2)
        if (en1->primarynodelist || en2->primarynodelist ||
                en1->en_expr || en2->en_expr ||
                OidIsValid(en1->en_relid) || OidIsValid(en2->en_relid) ||
-               en1->accesstype != RELATION_ACCESS_READ || en2->accesstype != RELATION_ACCESS_READ)
+               (en1->accesstype != RELATION_ACCESS_READ && en1->accesstype != RELATION_ACCESS_READ_FQS) ||
+               (en2->accesstype != RELATION_ACCESS_READ && en2->accesstype != RELATION_ACCESS_READ_FQS))
                return NULL;
 
        if (IsExecNodesReplicated(en1) &&
index e9c6cb82730bd5c2e8aa71e5b8c77d8a07620531..2ee9e272957eeca9141b48cb038cc89bcc0d1103 100644 (file)
@@ -203,7 +203,8 @@ GetPreferredReplicationNode(List *relNodes)
                        break;
        }
        if (nodeid < 0)
-               return list_make1_int(linitial_int(relNodes));
+               return list_make1_int(list_nth_int(relNodes,
+                                       ((unsigned int) random()) % list_length(relNodes)));
 
        return list_make1_int(nodeid);
 }
@@ -1005,7 +1006,8 @@ createLocator(char locatorType, RelationAccessType accessType,
        {
                case LOCATOR_TYPE_REPLICATED:
                        if (accessType == RELATION_ACCESS_INSERT ||
-                                       accessType == RELATION_ACCESS_UPDATE)
+                                       accessType == RELATION_ACCESS_UPDATE ||
+                                       accessType == RELATION_ACCESS_READ_FQS)
                        {
                                locator->locatefunc = locate_static;
                                if (nodeMap == NULL)
index a5501ce0d4af229752d8a5bae2d2f4cb7219ef5c..dd4892da02e51b0ce11020999fe9babbc65ff097 100644 (file)
@@ -53,6 +53,7 @@ typedef int PartAttrNumber;
 typedef enum
 {
        RELATION_ACCESS_READ,                           /* SELECT */
+       RELATION_ACCESS_READ_FQS,                               /* SELECT for FQS */
        RELATION_ACCESS_READ_FOR_UPDATE,        /* SELECT FOR UPDATE */
        RELATION_ACCESS_UPDATE,                         /* UPDATE OR DELETE */
        RELATION_ACCESS_INSERT                          /* INSERT */