Add a mechanism to selectively refresh pooler information when only connection
authorPavan Deolasee <[email protected]>
Mon, 9 May 2016 08:39:21 +0000 (14:09 +0530)
committerPavan Deolasee <[email protected]>
Mon, 9 May 2016 08:39:21 +0000 (14:09 +0530)
options, such as hostname/port changes for a node

This allows us to retain connections to all other nodes in the cluster and just
recreate connections to the node whose connection information is changed. This
will be especially handy while dealing with datanode/coordinator failover

12 files changed:
doc/src/sgml/func.sgml
src/backend/pgxc/pool/pgxcnode.c
src/backend/pgxc/pool/poolmgr.c
src/backend/pgxc/pool/poolutils.c
src/backend/storage/ipc/procarray.c
src/backend/storage/ipc/procsignal.c
src/backend/tcop/postgres.c
src/include/pgxc/pgxcnode.h
src/include/pgxc/poolmgr.h
src/include/pgxc/poolutils.h
src/include/storage/procarray.h
src/include/storage/procsignal.h

index f24ec8cac038922e617820bb4ac1e5b939bbeb94..61bff80ec8c521fba11114ae615296f16557fd92 100644 (file)
@@ -18416,7 +18416,7 @@ SELECT (pg_stat_file('filename')).modification;
         <literal><function>pgxc_pool_reload()</function></literal>
        </entry>
        <entry><type>boolean</type></entry>
-       <entry>Reload connection data cached in pooler and reload sessions in server</entry>
+       <entry>Refresh or reload connection data cached in pooler and reload sessions in server</entry>
       </row>
      </tbody>
     </tgroup>
@@ -18436,11 +18436,18 @@ SELECT (pg_stat_file('filename')).modification;
     <primary>pgxc_pool_reload</primary>
    </indexterm>
    <para>
-    <function>pgxc_pool_reload</> reloads connection data cached in pooler from
+    <function>pgxc_pool_reload</> attempts to identify if metadata about nodes was added,
+     deleted or altered. If metadata was only altered then it does a refresh of the
+    connection data by selectively cleaning up that specific node's connections from the
+    pooler and from all backends. Thus connections to nodes that are not altered are
+    retained in this case.
+    
+    However, if nodes were added or deleted then it resorts to
+    a more destructive reload operation. It reloads connection data cached in pooler from
     <link linkend="catalog-pgxc-node"><structname>pgxc_node</structname></link> catalog
     and reloads all the information info cached in pooler. All the active transactions
     are aborted and all existing pooler connections are dropped. This results in having
-    all the temporary and prepared objects dropped on remote and local node for session.
+    all the temporary and prepared objects dropped on remote and local nodes for the session.
    </para>
 
    <para>
index 7b9b7855abf4b755c5fc9cd245438d16e9ddba3d..40d718c52d3511409e31f1f449dc39c3ab7f4a27 100644 (file)
@@ -34,6 +34,7 @@
 #include "access/gtm.h"
 #include "access/transam.h"
 #include "access/xact.h"
+#include "access/htup_details.h"
 #include "catalog/pg_type.h"
 #include "commands/prepare.h"
 #include "gtm/gtm_c.h"
@@ -55,6 +56,7 @@
 #include "utils/syscache.h"
 #include "utils/lsyscache.h"
 #include "utils/formatting.h"
+#include "utils/tqual.h"
 #include "../interfaces/libpq/libpq-fe.h"
 #ifdef XCP
 #include "miscadmin.h"
@@ -90,6 +92,7 @@ int           NumCoords;
 
 #ifdef XCP
 volatile bool HandlesInvalidatePending = false;
+volatile bool HandlesRefreshPending = false;
 
 /*
  * Session and transaction parameters need to to be set on newly connected
@@ -108,6 +111,7 @@ typedef struct
 
 
 static bool DoInvalidateRemoteHandles(void);
+static bool DoRefreshRemoteHandles(void);
 #endif
 
 #ifdef XCP
@@ -216,6 +220,9 @@ InitMultinodeExecutor(bool is_force)
                dn_handles[count].nodeid = get_pgxc_node_id(dnOids[count]);
                strncpy(dn_handles[count].nodename, get_pgxc_nodename(dnOids[count]),
                                NAMEDATALEN);
+               strncpy(dn_handles[count].nodehost, get_pgxc_nodehost(dnOids[count]),
+                               NAMEDATALEN);
+               dn_handles[count].nodeport = get_pgxc_nodeport(dnOids[count]);
        }
        for (count = 0; count < NumCoords; count++)
        {
@@ -224,6 +231,9 @@ InitMultinodeExecutor(bool is_force)
                co_handles[count].nodeid = get_pgxc_node_id(coOids[count]);
                strncpy(co_handles[count].nodename, get_pgxc_nodename(coOids[count]),
                                NAMEDATALEN);
+               strncpy(co_handles[count].nodehost, get_pgxc_nodehost(coOids[count]),
+                               NAMEDATALEN);
+               co_handles[count].nodeport = get_pgxc_nodeport(coOids[count]);
        }
 
        datanode_count = 0;
@@ -350,7 +360,8 @@ PGXCNodeConnected(NODE_CONNECTION *conn)
 static void
 pgxc_node_free(PGXCNodeHandle *handle)
 {
-       close(handle->sock);
+       if (handle->sock != NO_SOCKET)
+               close(handle->sock);
        handle->sock = NO_SOCKET;
 }
 
@@ -393,6 +404,7 @@ pgxc_node_all_free(void)
        co_handles = NULL;
        dn_handles = NULL;
        HandlesInvalidatePending = false;
+       HandlesRefreshPending = false;
 }
 
 /*
@@ -1960,6 +1972,12 @@ get_any_handle(List *datanodelist)
                                        (errcode(ERRCODE_QUERY_CANCELED),
                                         errmsg("canceling transaction due to cluster configuration reset by administrator command")));
 
+       if (HandlesRefreshPending)
+               if (DoRefreshRemoteHandles())
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_QUERY_CANCELED),
+                                        errmsg("canceling transaction due to cluster configuration reset by administrator command")));
+
        /* loop through local datanode handles */
        for (i = 0, node = load_balancer; i < NumDataNodes; i++, node++)
        {
@@ -2073,6 +2091,12 @@ get_handles(List *datanodelist, List *coordlist, bool is_coord_only_query, bool
                                        (errcode(ERRCODE_QUERY_CANCELED),
                                         errmsg("canceling transaction due to cluster configuration reset by administrator command")));
 
+       if (HandlesRefreshPending)
+               if (DoRefreshRemoteHandles())
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_QUERY_CANCELED),
+                                        errmsg("canceling transaction due to cluster configuration reset by administrator command")));
+
        result = (PGXCNodeAllHandles *) palloc(sizeof(PGXCNodeAllHandles));
        if (!result)
        {
@@ -2394,7 +2418,7 @@ pfree_pgxc_all_handles(PGXCNodeAllHandles *pgxc_handles)
 }
 
 /*
- * PGXCNode_getNodeId
+ * PGXCNodeGetNodeId
  *             Look at the data cached for handles and return node position
  *             If node type is PGXC_NODE_COORDINATOR look only in coordinator list,
  *             if node type is PGXC_NODE_DATANODE look only in datanode list,
@@ -2439,7 +2463,7 @@ PGXCNodeGetNodeId(Oid nodeoid, char *node_type)
 }
 
 /*
- * PGXCNode_getNodeOid
+ * PGXCNodeGetNodeOid
  *             Look at the data cached for handles and return node Oid
  */
 Oid
@@ -2801,6 +2825,20 @@ RequestInvalidateRemoteHandles(void)
        HandlesInvalidatePending = true;
 }
 
+void
+RequestRefreshRemoteHandles(void)
+{
+       HandlesRefreshPending = true;
+}
+
+bool
+PoolerMessagesPending(void)
+{
+       if (HandlesRefreshPending)
+               return true;
+
+       return false;
+}
 
 /*
  * For all handles, mark as they are not in use and discard pending input/output
@@ -2813,6 +2851,7 @@ DoInvalidateRemoteHandles(void)
        bool                    result = false;
 
        HandlesInvalidatePending = false;
+       HandlesRefreshPending = false;
 
        for (i = 0; i < NumCoords; i++)
        {
@@ -2838,6 +2877,151 @@ DoInvalidateRemoteHandles(void)
        return result;
 }
 
+/*
+ * Diff handles using shmem, and remove ALTERed handles
+ */
+static bool
+DoRefreshRemoteHandles(void)
+{
+       List                    *altered = NIL, *deleted = NIL, *added = NIL;
+       List                    *shmoids = NIL;
+       Oid                             *coOids, *dnOids, *allOids, nodeoid;
+       int                             numDNodes;
+       int                             i, numCoords, total_nodes;
+       NodeDefinition  *nodeDef;
+       PGXCNodeHandle  *handle;
+       bool                    res = true;
+
+       HandlesRefreshPending = false;
+
+       PgxcNodeGetOids(&coOids, &dnOids, &numCoords, &numDNodes, false);
+
+       total_nodes = numCoords + numDNodes;
+       if (total_nodes > 0)
+       {
+               allOids = (Oid *)palloc(total_nodes * sizeof(Oid));
+
+               for (i = 0; i < numCoords; i++)
+                       allOids[i] = coOids[i];
+
+               for (i = 0; i + numCoords < total_nodes; i++)
+                       allOids[i + numCoords] = dnOids[i];
+       }
+
+       LWLockAcquire(NodeTableLock, LW_SHARED);
+       for (i = 0; i < total_nodes; i++)
+       {
+               int nid;
+               Oid nodeoid;
+               char ntype = PGXC_NODE_NONE;
+
+               nodeoid = allOids[i];
+               shmoids = lappend_oid(shmoids, nodeoid);
+
+               nodeDef = PgxcNodeGetDefinition(nodeoid);
+               /*
+                * identify an entry with this nodeoid. If found
+                * compare the name/host/port entries. If the name is
+                * same and other info is different, it's an ALTER.
+                * If the local entry does not exist in the shmem, it's
+                * a DELETE. If the entry from shmem does not exist
+                * locally, it's an ADDITION
+                */
+               nid = PGXCNodeGetNodeId(nodeoid, &ntype);
+
+               if (nid == -1)
+               {
+                       /* a new node has been added to the shmem */
+                       added = lappend_oid(added, nodeoid);
+                       elog(LOG, "Node added: name (%s) host (%s) port (%d)",
+                                NameStr(nodeDef->nodename), NameStr(nodeDef->nodehost),
+                                nodeDef->nodeport);
+               }
+               else
+               {
+                       if (ntype == PGXC_NODE_COORDINATOR)
+                               handle = &co_handles[nid];
+                       else if (ntype == PGXC_NODE_DATANODE)
+                               handle = &dn_handles[nid];
+                       else
+                               elog(ERROR, "Node with non-existent node type!");
+
+                       /*
+                        * compare name, host, port to see if this node
+                        * has been ALTERed
+                        */
+                       if (strncmp(handle->nodename, NameStr(nodeDef->nodename), NAMEDATALEN)
+                               != 0 ||
+                               strncmp(handle->nodehost, NameStr(nodeDef->nodehost), NAMEDATALEN)
+                               != 0 ||
+                               handle->nodeport != nodeDef->nodeport)
+                       {
+                               elog(LOG, "Node altered: old name (%s) old host (%s) old port (%d)"
+                                               " new name (%s) new host (%s) new port (%d)",
+                                        handle->nodename, handle->nodehost, handle->nodeport,
+                                        NameStr(nodeDef->nodename), NameStr(nodeDef->nodehost),
+                                        nodeDef->nodeport);
+                               altered = lappend_oid(altered, nodeoid);
+                       }
+                       /* else do nothing */
+               }
+               pfree(nodeDef);
+       }
+
+       /*
+        * Any entry in backend area but not in shmem means that it has
+        * been deleted
+        */
+       for (i = 0; i < NumCoords; i++)
+       {
+               handle = &co_handles[i];
+               nodeoid = handle->nodeoid;
+               if (!list_member_oid(shmoids, nodeoid))
+               {
+                       deleted = lappend_oid(deleted, nodeoid);
+                       elog(LOG, "Node deleted: name (%s) host (%s) port (%d)",
+                                handle->nodename, handle->nodehost, handle->nodeport);
+               }
+       }
+       for (i = 0; i < NumDataNodes; i++)
+       {
+               handle = &dn_handles[i];
+               nodeoid = handle->nodeoid;
+               if (!list_member_oid(shmoids, nodeoid))
+               {
+                       deleted = lappend_oid(deleted, nodeoid);
+                       elog(LOG, "Node deleted: name (%s) host (%s) port (%d)",
+                                handle->nodename, handle->nodehost, handle->nodeport);
+               }
+       }
+       LWLockRelease(NodeTableLock);
+
+       /* Release palloc'ed memory */
+       pfree(coOids);
+       pfree(dnOids);
+       pfree(allOids);
+
+       if (deleted != NIL || added != NIL)
+       {
+               elog(LOG, "Nodes added/deleted. Reload needed!");
+               res = false;
+       }
+
+       if (altered == NIL)
+       {
+               elog(LOG, "No nodes altered. Returning");
+               res = true;
+       }
+       else
+               PgxcNodeRefreshBackendHandlesShmem(altered);
+
+       list_free(shmoids);
+       list_free(altered);
+       list_free(added);
+       list_free(deleted);
+       return res;
+}
+
 void
 PGXCNodeSetConnectionState(PGXCNodeHandle *handle, DNConnectionState new_state)
 {
@@ -2845,3 +3029,200 @@ PGXCNodeSetConnectionState(PGXCNodeHandle *handle, DNConnectionState new_state)
                        "new state %d", handle->nodename, handle->state, new_state);
        handle->state = new_state;
 }
+
+/*
+ * Do a "Diff" of backend NODE metadata and the one present in catalog
+ *
+ * We do this in order to identify if we should do a destructive
+ * cleanup or just invalidation of some specific handles
+ */
+bool
+PgxcNodeDiffBackendHandles(List **nodes_alter,
+                          List **nodes_delete, List **nodes_add)
+{
+       Relation rel;
+       HeapScanDesc scan;
+       HeapTuple   tuple;
+       int     i;
+       List *altered = NIL, *added = NIL, *deleted = NIL;
+       List *catoids = NIL;
+       PGXCNodeHandle *handle;
+       Oid     nodeoid;
+       bool res = true;
+
+       LWLockAcquire(NodeTableLock, LW_SHARED);
+
+       rel = heap_open(PgxcNodeRelationId, AccessShareLock);
+       scan = heap_beginscan(rel, SnapshotSelf, 0, NULL);
+       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       {
+               Form_pgxc_node  nodeForm = (Form_pgxc_node) GETSTRUCT(tuple);
+               int nid;
+               Oid nodeoid;
+               char ntype = PGXC_NODE_NONE;
+
+               nodeoid = HeapTupleGetOid(tuple);
+               catoids = lappend_oid(catoids, nodeoid);
+
+               /*
+                * identify an entry with this nodeoid. If found
+                * compare the name/host/port entries. If the name is
+                * same and other info is different, it's an ALTER.
+                * If the local entry does not exist in the catalog, it's
+                * a DELETE. If the entry from catalog does not exist
+                * locally, it's an ADDITION
+                */
+               nid = PGXCNodeGetNodeId(nodeoid, &ntype);
+
+               if (nid == -1)
+               {
+                       /* a new node has been added to the catalog */
+                       added = lappend_oid(added, nodeoid);
+                       elog(LOG, "Node added: name (%s) host (%s) port (%d)",
+                                NameStr(nodeForm->node_name), NameStr(nodeForm->node_host),
+                                nodeForm->node_port);
+               }
+               else
+               {
+                       if (ntype == PGXC_NODE_COORDINATOR)
+                               handle = &co_handles[nid];
+                       else if (ntype == PGXC_NODE_DATANODE)
+                               handle = &dn_handles[nid];
+                       else
+                               elog(ERROR, "Node with non-existent node type!");
+
+                       /*
+                        * compare name, host, port to see if this node
+                        * has been ALTERed
+                        */
+                       if (strncmp(handle->nodename, NameStr(nodeForm->node_name), NAMEDATALEN)
+                               != 0 ||
+                               strncmp(handle->nodehost, NameStr(nodeForm->node_host), NAMEDATALEN)
+                               != 0 ||
+                               handle->nodeport != nodeForm->node_port)
+                       {
+                               elog(LOG, "Node altered: old name (%s) old host (%s) old port (%d)"
+                                               " new name (%s) new host (%s) new port (%d)",
+                                        handle->nodename, handle->nodehost, handle->nodeport,
+                                        NameStr(nodeForm->node_name), NameStr(nodeForm->node_host),
+                                        nodeForm->node_port);
+                               /*
+                                * If this node itself is being altered, then we need to
+                                * resort to a reload. Check so..
+                                */
+                               if (pg_strcasecmp(PGXCNodeName,
+                                                                 NameStr(nodeForm->node_name)) == 0)
+                               {
+                                       res = false;
+                               }
+                               altered = lappend_oid(altered, nodeoid);
+                       }
+                       /* else do nothing */
+               }
+       }
+       heap_endscan(scan);
+
+       /*
+        * Any entry in backend area but not in catalog means that it has
+        * been deleted
+        */
+       for (i = 0; i < NumCoords; i++)
+       {
+               handle = &co_handles[i];
+               nodeoid = handle->nodeoid;
+               if (!list_member_oid(catoids, nodeoid))
+               {
+                       deleted = lappend_oid(deleted, nodeoid);
+                       elog(LOG, "Node deleted: name (%s) host (%s) port (%d)",
+                                handle->nodename, handle->nodehost, handle->nodeport);
+               }
+       }
+       for (i = 0; i < NumDataNodes; i++)
+       {
+               handle = &dn_handles[i];
+               nodeoid = handle->nodeoid;
+               if (!list_member_oid(catoids, nodeoid))
+               {
+                       deleted = lappend_oid(deleted, nodeoid);
+                       elog(LOG, "Node deleted: name (%s) host (%s) port (%d)",
+                                handle->nodename, handle->nodehost, handle->nodeport);
+               }
+       }
+       heap_close(rel, AccessShareLock);
+       LWLockRelease(NodeTableLock);
+
+       if (nodes_alter)
+               *nodes_alter = altered;
+       if (nodes_delete)
+               *nodes_delete = deleted;
+       if (nodes_add)
+               *nodes_add = added;
+
+       if (catoids)
+               list_free(catoids);
+
+       return res;
+}
+
+/*
+ * Refresh specific backend handles associated with
+ * nodes in the "nodes_alter" list below
+ *
+ * The handles are refreshed using shared memory
+ */
+void
+PgxcNodeRefreshBackendHandlesShmem(List *nodes_alter)
+{
+       ListCell *lc;
+       Oid nodeoid;
+       int nid;
+       PGXCNodeHandle *handle = NULL;
+
+       foreach(lc, nodes_alter)
+       {
+               char ntype = PGXC_NODE_NONE;
+               NodeDefinition *nodedef;
+
+               nodeoid = lfirst_oid(lc);
+               nid = PGXCNodeGetNodeId(nodeoid, &ntype);
+
+               if (nid == -1)
+                       elog(ERROR, "Looks like node metadata changed again");
+               else
+               {
+                       if (ntype == PGXC_NODE_COORDINATOR)
+                               handle = &co_handles[nid];
+                       else if (ntype == PGXC_NODE_DATANODE)
+                               handle = &dn_handles[nid];
+                       else
+                               elog(ERROR, "Node with non-existent node type!");
+               }
+
+               /*
+                * Update the local backend handle data with data from catalog
+                * Free the handle first..
+                */
+               pgxc_node_free(handle);
+               elog(LOG, "Backend (%u), Node (%s) updated locally",
+                        MyBackendId, handle->nodename);
+               nodedef = PgxcNodeGetDefinition(nodeoid);
+               strncpy(handle->nodename, NameStr(nodedef->nodename), NAMEDATALEN);
+               strncpy(handle->nodehost, NameStr(nodedef->nodehost), NAMEDATALEN);
+               handle->nodeport = nodedef->nodeport;
+               pfree(nodedef);
+       }
+       return;
+}
+
+void
+HandlePoolerMessages(void)
+{
+       if (HandlesRefreshPending)
+       {
+               DoRefreshRemoteHandles();
+
+               elog(LOG, "Backend (%u), doing handles refresh",
+                        MyBackendId);
+       }
+       return;
+}
index 11c2774e5922df7b60768d4ea728f9ce53b50a9b..e3ed53ce4beb6d59404257817cc726c75e4a184c 100644 (file)
@@ -126,6 +126,8 @@ static DatabasePool *create_database_pool(const char *database, const char *user
 static void insert_database_pool(DatabasePool *pool);
 static int     destroy_database_pool(const char *database, const char *user_name);
 static void reload_database_pools(PoolAgent *agent);
+static int refresh_database_pools(PoolAgent *agent);
+static bool remove_all_agent_references(Oid nodeoid);
 static DatabasePool *find_database_pool(const char *database, const char *user_name, const char *pgoptions);
 static DatabasePool *remove_database_pool(const char *database, const char *user_name);
 static int *agent_acquire_connections(PoolAgent *agent, List *datanodelist,
@@ -1145,6 +1147,29 @@ PoolManagerReloadConnectionInfo(void)
        pool_flush(&poolHandle->port);
 }
 
+/*
+ * Refresh connection data in pooler and drop connections for those nodes
+ * that have changed. Thus, this operation is less destructive as compared
+ * to PoolManagerReloadConnectionInfo and should typically be called when
+ * NODE ALTER has been performed
+ */
+int
+PoolManagerRefreshConnectionInfo(void)
+{
+       int res;
+
+       Assert(poolHandle);
+       PgxcNodeListAndCount();
+       pool_putmessage(&poolHandle->port, 'R', NULL, 0);
+       pool_flush(&poolHandle->port);
+
+       res = pool_recvres(&poolHandle->port);
+
+       if (res == POOL_CHECK_SUCCESS)
+               return true;
+
+       return false;
+}
 
 /*
  * Handle messages to agent
@@ -1228,7 +1253,7 @@ agent_handle_input(PoolAgent * agent, StringInfo s)
                        case 'f':                       /* CLEAN CONNECTION */
                                pool_getmessage(&agent->port, s, 0);
                                datanodecount = pq_getmsgint(s, 4);
-                               /* It is possible to clean up only Coordinators connections */
+                               /* It is possible to clean up only datanode connections */
                                for (i = 0; i < datanodecount; i++)
                                {
                                        /* Translate index to Oid */
@@ -1237,7 +1262,7 @@ agent_handle_input(PoolAgent * agent, StringInfo s)
                                        nodelist = lappend_oid(nodelist, node);
                                }
                                coordcount = pq_getmsgint(s, 4);
-                               /* It is possible to clean up only Datanode connections */
+                               /* It is possible to clean up only coordinator connections */
                                for (i = 0; i < coordcount; i++)
                                {
                                        /* Translate index to Oid */
@@ -1363,6 +1388,24 @@ agent_handle_input(PoolAgent * agent, StringInfo s)
                                /* First update all the pools */
                                reload_database_pools(agent);
                                break;
+                       case 'R':                       /* Refresh connection info */
+                               /*
+                                * Connection information refresh concerns all the database pools.
+                                * A database pool is refreshed as follows for each remote node:
+                                * - node pool is deleted if its port or host information is changed.
+                                *   Subsequently all its connections are dropped.
+                                *
+                                * If any other type of activity is found, we error out
+                                */
+                               pool_getmessage(&agent->port, s, 4);
+                               pq_getmsgend(s);
+
+                               /* Refresh the pools */
+                               res = refresh_database_pools(agent);
+
+                               /* Send result */
+                               pool_sendres(&agent->port, res);
+                               break;
                        case 'P':                       /* Ping connection info */
                                /*
                                 * Ping unhealthy nodes in the pools. If any of the
@@ -1954,6 +1997,148 @@ reload_database_pools(PoolAgent *agent)
        }
 }
 
+/*
+ * Refresh information of database pools
+ */
+static int
+refresh_database_pools(PoolAgent *agent)
+{
+       DatabasePool *databasePool;
+       Oid                        *coOids;
+       Oid                        *dnOids;
+       int                             numCo;
+       int                             numDn;
+       int                     res = POOL_REFRESH_SUCCESS;
+
+       elog(LOG, "Refreshing database pools");
+
+       /*
+        * re-check if agent's node information matches current contents of the
+        * shared memory table.
+        */
+       PgxcNodeGetOids(&coOids, &dnOids, &numCo, &numDn, false);
+
+       if (agent->num_coord_connections != numCo ||
+                       agent->num_dn_connections != numDn ||
+                       memcmp(agent->coord_conn_oids, coOids, numCo * sizeof(Oid)) ||
+                       memcmp(agent->dn_conn_oids, dnOids, numDn * sizeof(Oid)))
+               res = POOL_REFRESH_FAILED;
+
+       /* Release palloc'ed memory */
+       pfree(coOids);
+       pfree(dnOids);
+
+       /*
+        * Scan the list and destroy any altered pool. They will be recreated
+        * upon subsequent connection acquisition.
+        */
+       databasePool = databasePools;
+       while (res == POOL_REFRESH_SUCCESS && databasePool)
+       {
+               HASH_SEQ_STATUS hseq_status;
+               PGXCNodePool   *nodePool;
+
+               hash_seq_init(&hseq_status, databasePool->nodePools);
+               while ((nodePool = (PGXCNodePool *) hash_seq_search(&hseq_status)))
+               {
+                       char *connstr_chk = build_node_conn_str(nodePool->nodeoid, databasePool);
+
+                       /*
+                        * Since we re-checked the numbers above, we should not get
+                        * the case of an ADDED or a DELETED node here..
+                        */
+                       if (connstr_chk == NULL)
+                       {
+                               elog(LOG, "Found a deleted node (%u)", nodePool->nodeoid);
+                               hash_seq_term(&hseq_status);
+                               res = POOL_REFRESH_FAILED;
+                               break;
+                       }
+
+                       if (strcmp(connstr_chk, nodePool->connstr))
+                       {
+                               elog(LOG, "Found an altered node (%u)", nodePool->nodeoid);
+                               /*
+                                * Node has been altered. First remove
+                                * all references to this node from ALL the
+                                * agents before destroying it..
+                                */
+                               if (!remove_all_agent_references(nodePool->nodeoid))
+                               {
+                                       res = POOL_REFRESH_FAILED;
+                                       break;
+                               }
+
+                               destroy_node_pool(nodePool);
+                               hash_search(databasePool->nodePools, &nodePool->nodeoid,
+                                                       HASH_REMOVE, NULL);
+                       }
+
+                       if (connstr_chk)
+                               pfree(connstr_chk);
+               }
+
+               databasePool = databasePool->next;
+       }
+       return res;
+}
+
+static bool
+remove_all_agent_references(Oid nodeoid)
+{
+       int i, j;
+       bool res = true;
+
+       /*
+        * Identify if it's a coordinator or datanode first
+        * and get its index
+        */
+       for (i = 1; i <= agentCount; i++)
+       {
+               bool found = false;
+
+               PoolAgent *agent = poolAgents[i - 1];
+               for (j = 0; j < agent->num_dn_connections; j++)
+               {
+                       if (agent->dn_conn_oids[j] == nodeoid)
+                       {
+                               found = true;
+                               break;
+                       }
+               }
+               if (found)
+               {
+                       PGXCNodePoolSlot *slot = agent->dn_connections[j];
+                       if (slot)
+                               release_connection(agent->pool, slot, agent->dn_conn_oids[j], false);
+                       agent->dn_connections[j] = NULL;
+               }
+               else
+               {
+                       for (j = 0; j < agent->num_coord_connections; j++)
+                       {
+                               if (agent->coord_conn_oids[j] == nodeoid)
+                               {
+                                       found = true;
+                                       break;
+                               }
+                       }
+                       if (found)
+                       {
+                               PGXCNodePoolSlot *slot = agent->coord_connections[j];
+                               if (slot)
+                                       release_connection(agent->pool, slot, agent->coord_conn_oids[j], true);
+                               agent->coord_connections[j] = NULL;
+                       }
+                       else
+                       {
+                               elog(LOG, "Node not found! (%u)", nodeoid);
+                               res = false;
+                       }
+               }
+       }
+       return res;
+}
 
 /*
  * Find pool for specified database and username in the list
index 35227be7d58fc6c05be187e8dbad08aaa64e7709..ad361a0812197edcd7ce282dc326ff473e03d462 100644 (file)
@@ -33,6 +33,7 @@
 #include "commands/prepare.h"
 #include "storage/ipc.h"
 #include "storage/procarray.h"
+#include "storage/latch.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -64,6 +65,10 @@ pgxc_pool_check(PG_FUNCTION_ARGS)
 /*
  * pgxc_pool_reload
  *
+ * This function checks if a refresh should be carried out first. A refresh
+ * is carried out if NODEs have only been ALTERed in the catalog. Otherwise
+ * reload is performed as below.
+ *
  * Reload data cached in pooler and reload node connection
  * information in all the server sessions. This aborts all
  * the existing transactions on this node and reinitializes pooler.
@@ -97,6 +102,13 @@ pgxc_pool_reload(PG_FUNCTION_ARGS)
                                (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
                                 errmsg("pgxc_pool_reload cannot run inside a transaction block")));
 
+       /*
+        * Always check if we can get away with a LESS destructive refresh
+        * operation.
+        */
+       if (PgxcNodeRefresh())
+               PG_RETURN_BOOL(true);
+
        /* Session is being reloaded, drop prepared and temporary objects */
        DropAllPreparedStatements();
 
@@ -112,11 +124,61 @@ pgxc_pool_reload(PG_FUNCTION_ARGS)
 
        /* Signal other sessions to reconnect to pooler if have privileges */
        if (superuser())
-               ReloadConnInfoOnBackends();
+               ReloadConnInfoOnBackends(false);
 
        PG_RETURN_BOOL(true);
 }
 
+bool
+PgxcNodeRefresh(void)
+{
+       List *nodes_alter = NIL, *nodes_delete = NIL, *nodes_add = NIL;
+
+       /*
+        * Check if NODE metadata has been ALTERed only. If there are DELETIONs
+        * or ADDITIONs of NODEs, then we tell the caller to use reload
+        * instead
+        */
+       if (!PgxcNodeDiffBackendHandles(&nodes_alter, &nodes_delete, &nodes_add))
+       {
+               elog(LOG, "Self node altered. Performing reload"
+                        " to re-create connections!");
+               return false;
+       }
+
+       if (nodes_delete != NIL || nodes_add != NIL)
+       {
+               elog(LOG, "Nodes added/deleted. Performing reload"
+                        " to re-create connections!");
+               return false;
+       }
+
+       if (nodes_alter == NIL)
+       {
+               elog(LOG, "No nodes altered. Returning");
+               return true;
+       }
+
+       /* Be sure it is done consistently */
+       while (!PoolManagerCheckConnectionInfo())
+       {
+               /* Refresh connection information in pooler */
+               PoolManagerRefreshConnectionInfo();
+       }
+
+       PgxcNodeRefreshBackendHandlesShmem(nodes_alter);
+
+       /* Signal other sessions to reconnect to pooler if have privileges */
+       if (superuser())
+               ReloadConnInfoOnBackends(true);
+
+       list_free(nodes_alter);
+       list_free(nodes_add);
+       list_free(nodes_delete);
+
+       return true;
+}
+
 /*
  * CleanConnection()
  *
@@ -349,3 +411,24 @@ HandlePoolerReload(void)
        /* Prevent using of cached connections to remote nodes */
        RequestInvalidateRemoteHandles();
 }
+
+/*
+ * HandlePoolerRefresh
+ *
+ * This is called when PROCSIG_PGXCPOOL_REFRESH is activated.
+ * Reconcile local backend connection info with the one in
+ * shared memory
+ */
+void
+HandlePoolerRefresh(void)
+{
+       if (proc_exit_inprogress)
+               return;
+
+       InterruptPending = true;
+
+       RequestRefreshRemoteHandles();
+
+       /* make sure the event is processed in due course */
+       SetLatch(MyLatch);
+}
index cfc995de3729135a7cf305b008faa8fffaaf034b..885a92909e0ae009a7539358f7855fae21fb47ba 100644 (file)
@@ -2919,10 +2919,13 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
 
 #ifdef PGXC
 /*
- * ReloadConnInfoOnBackends -- reload connection information for all the backends
+ * ReloadConnInfoOnBackends -- reload/refresh connection information
+ * for all the backends
+ *
+ * "refresh" is less destructive than "reload"
  */
 void
-ReloadConnInfoOnBackends(void)
+ReloadConnInfoOnBackends(bool refresh_only)
 {
        ProcArrayStruct *arrayP = procArray;
        int                     index;
@@ -2954,7 +2957,9 @@ ReloadConnInfoOnBackends(void)
                /*
                 * Send the reload signal if backend still exists
                 */
-               (void) SendProcSignal(pid, PROCSIG_PGXCPOOL_RELOAD, vxid.backendId);
+               (void) SendProcSignal(pid, refresh_only?
+                                         PROCSIG_PGXCPOOL_REFRESH:PROCSIG_PGXCPOOL_RELOAD,
+                                         vxid.backendId);
        }
 
        LWLockRelease(ProcArrayLock);
index f067a039bb1857bb7fee3a1ad48ef57a376374bf..82f1ae837a592e43004c3b25bd197d5f635bddc5 100644 (file)
@@ -281,6 +281,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
 #ifdef PGXC
        if (CheckProcSignal(PROCSIG_PGXCPOOL_RELOAD))
                HandlePoolerReload();
+
+       if (CheckProcSignal(PROCSIG_PGXCPOOL_REFRESH))
+               HandlePoolerRefresh();
 #endif
        if (CheckProcSignal(PROCSIG_PARALLEL_MESSAGE))
                HandleParallelMessageInterrupt();
index 581e694d8dc4281e0d2650435f4e92c153cd43c6..42c0077f191458f5488501911197152a651c3ca5 100644 (file)
@@ -3419,6 +3419,9 @@ ProcessInterrupts(void)
 
        if (ParallelMessagePending)
                HandleParallelMessages();
+
+       if (PoolerMessagesPending())
+               HandlePoolerMessages();
 }
 
 
index 93cc925c70c418b2487710f4d718d685373301eb..f40120671fa2443e7fa63f3c48178a7a1fc4bfa2 100644 (file)
@@ -60,6 +60,8 @@ struct pgxc_node_handle
        Oid                     nodeoid;
        int                     nodeid;
        char            nodename[NAMEDATALEN];
+       char            nodehost[NAMEDATALEN];
+       int                     nodeport;
 
        /* fd of the connection */
        int             sock;
@@ -191,7 +193,12 @@ extern char *PGXCNodeGetSessionParamStr(void);
 extern char *PGXCNodeGetTransactionParamStr(void);
 extern void pgxc_node_set_query(PGXCNodeHandle *handle, const char *set_query);
 extern void RequestInvalidateRemoteHandles(void);
+extern void RequestRefreshRemoteHandles(void);
+extern bool PoolerMessagesPending(void);
 extern void PGXCNodeSetConnectionState(PGXCNodeHandle *handle,
                DNConnectionState new_state);
-
+extern bool PgxcNodeDiffBackendHandles(List **nodes_alter,
+                          List **nodes_delete, List **nodes_add);
+extern void PgxcNodeRefreshBackendHandlesShmem(List *nodes_alter);
+extern void HandlePoolerMessages(void);
 #endif /* PGXCNODE_H */
index 31690983d10ace57e143ec1335946cbeea863540..47a54c67b2ac2b92c42792ab55f133f2200d0d30 100644 (file)
@@ -125,6 +125,9 @@ extern bool PoolManagerCheckConnectionInfo(void);
 /* Reload connection data in pooler and drop all the existing connections of pooler */
 extern void PoolManagerReloadConnectionInfo(void);
 
+/* Refresh connection data in pooler and drop connections of altered nodes in pooler */
+extern int PoolManagerRefreshConnectionInfo(void);
+
 /* Send Abort signal to transactions being run */
 extern int     PoolManagerAbortTransactions(char *dbname, char *username, int **proc_pids);
 
index 1b88b34e3542c1c1b61a393aa2da52e9398664e5..0570b70d6dea65446b7800f928d78fae9f53c88b 100644 (file)
 #define POOL_CHECK_SUCCESS                                     0
 #define POOL_CHECK_FAILED                                      1
 
+/* Results for pooler connection info refresh */
+#define POOL_REFRESH_SUCCESS                           0
+#define POOL_REFRESH_FAILED                                    1
+
 void CleanConnection(CleanConnStmt *stmt);
 void DropDBCleanConnection(char *dbname);
 
-/* Handle pooler connection reload when signaled by SIGUSR1 */
+/* Handle pooler connection reload/refresh when signaled by SIGUSR1 */
 void HandlePoolerReload(void);
+void HandlePoolerRefresh(void);
+bool PgxcNodeRefresh(void);
 #endif
index 1afbe777eacaa56a514501552c4cfefd2b7521e8..1872559532eb1ae11cd0f614067e5a83d988a0f4 100644 (file)
@@ -51,7 +51,7 @@ extern void SetGlobalSnapshotData(TransactionId xmin, TransactionId xmax, int xc
                TransactionId *xip,
                SnapshotSource source);
 extern void UnsetGlobalSnapshotData(void);
-extern void ReloadConnInfoOnBackends(void);
+extern void ReloadConnInfoOnBackends(bool refresh_only);
 #endif /* PGXC */
 extern void ProcArrayInitRecovery(TransactionId initializedUptoXID);
 extern void ProcArrayApplyRecoveryInfo(RunningTransactions running);
index c73cab47064968acb9d96dd22721ea983db03129..89692e2500e075c9d34dd434742cdd1003d78bc6 100644 (file)
@@ -43,6 +43,7 @@ typedef enum
        PROCSIG_NOTIFY_INTERRUPT,       /* listen/notify interrupt */
 #ifdef PGXC
        PROCSIG_PGXCPOOL_RELOAD,        /* abort current transaction and reconnect to pooler */
+       PROCSIG_PGXCPOOL_REFRESH,       /* refresh local view of connection handles */
 #endif
        PROCSIG_PARALLEL_MESSAGE,       /* message from cooperating parallel backend */