Do not turn hot_standby in coordinator/datanode slaves since its not supported.
authorPavan Deolasee <[email protected]>
Fri, 25 Mar 2016 11:23:31 +0000 (16:53 +0530)
committerPavan Deolasee <[email protected]>
Fri, 25 Mar 2016 11:23:31 +0000 (16:53 +0530)
We'd earlier turned that on so that PQping() can check status of standbys. But
that clearly creates bigger trouble and standbys may just stop working. So add
a new mechanism to ping slave nodes by using pg_ctl

contrib/pgxc_ctl/coord_cmd.c
contrib/pgxc_ctl/datanode_cmd.c
contrib/pgxc_ctl/monitor.c
contrib/pgxc_ctl/utils.c
contrib/pgxc_ctl/utils.h

index 78af7fb3cf57516c55bef1d4c2c37582358bebd8..7e59070e95668b8181b42f8c11e947cfa7507d35 100644 (file)
@@ -158,7 +158,7 @@ cmd_t *prepare_initCoordinatorMaster(char *nodeName)
                fprintf(f, 
                                "#========================================\n"
                                "# Addition for log shipping, %s\n"
-                               "wal_level = hot_standby\n"
+                               "wal_level = archive\n"
                                "archive_mode = on\n"
                                "archive_command = 'rsync %%p %s@%s:%s/%%f'\n"
                                "max_wal_senders = %s\n"
@@ -325,10 +325,10 @@ cmd_t *prepare_initCoordinatorSlave(char *nodeName)
        fprintf(f,
                        "#==========================================\n"
                        "# Added to initialize the slave, %s\n"
-                       "hot_standby = on\n"
+                       "hot_standby = off\n"
                        "port = %s\n"
                        "pooler_port = %s\n"
-                       "wal_level = minimal\n"
+                       "wal_level = archive\n"
                        "archive_mode = off\n"
                        "archive_command = ''\n"
                        "max_wal_senders = 0\n"
@@ -1305,7 +1305,7 @@ int add_coordinatorSlave(char *name, char *host, int port, int pooler_port, char
        fprintf(f, 
                        "#========================================\n"
                        "# Addition for log shipping, %s\n"
-                       "wal_level = hot_standby\n"
+                       "wal_level = archive\n"
                        "archive_mode = on\n"
                        "archive_command = 'rsync %%p %s@%s:%s/%%f'\n"
                        "max_wal_senders = %d\n"
@@ -1405,10 +1405,10 @@ int add_coordinatorSlave(char *name, char *host, int port, int pooler_port, char
        fprintf(f,
                        "#==========================================\n"
                        "# Added to initialize the slave, %s\n"
-                       "hot_standby = on\n"
+                       "hot_standby = off\n"
                        "port = %d\n"
                        "pooler_port = %d\n"
-                       "wal_level = minimal\n"         /* WAL level --- minimal.   No cascade slave so far. */
+                       "wal_level = archive\n"
                        "archive_mode = off\n"          /* No archive mode */
                        "archive_command = ''\n"        /* No archive mode */
                        "max_wal_senders = 0\n"         /* Minimum WAL senders */
@@ -1641,7 +1641,7 @@ int remove_coordinatorSlave(char *name, int clean_opt)
                return 1;
        }
        AddMember(nodelist, name);
-       if (pingNode(aval(VAR_coordSlaveServers)[idx], aval(VAR_coordSlavePorts)[idx]) == 0)
+       if (pingNodeSlave(aval(VAR_coordSlaveServers)[idx], aval(VAR_coordSlaveDirs)[idx]) == 0)
                stop_coordinator_slave(nodelist, "immediate");
        {
                FILE *f;
index 18180bd896d3c673458d81130fbf8bfe08217303..c9d4731ba2c854126d487ff5bc178470c77aef24 100644 (file)
@@ -175,7 +175,7 @@ cmd_t *prepare_initDatanodeMaster(char *nodeName)
                        return(NULL);
                }
                fprintf(f,
-                               "wal_level = hot_standby\n"
+                               "wal_level = archive\n"
                                "archive_mode = on\n"
                                "archive_command = 'rsync %%p %s@%s:%s/%%f'\n"
                                "max_wal_senders = %s\n"
@@ -349,7 +349,7 @@ cmd_t *prepare_initDatanodeSlave(char *nodeName)
        fprintf(f,
                        "#==========================================\n"
                        "# Added to startup the slave, %s\n"
-                       "hot_standby = on\n"
+                       "hot_standby = off\n"
                        "port = %s\n"
                        "pooler_port = %s\n"
                        "# End of addition\n",
@@ -646,7 +646,8 @@ cmd_t *prepare_stopDatanodeSlave(char *nodeName, char *immediate)
        /* The next step might need improvement.  When GTM is dead, the following may
         * fail even though the master is running.
         */
-       if (pingNode(aval(VAR_datanodeSlaveServers)[idx], aval(VAR_datanodeSlavePorts)[idx]) == 0)
+       if (pingNodeSlave(aval(VAR_datanodeSlaveServers)[idx],
+                               aval(VAR_datanodeSlaveDirs)[idx]) == 0)
        {
                cmd_t *cmdReloadMaster;
 
@@ -1360,7 +1361,7 @@ int add_datanodeSlave(char *name, char *host, int port, int pooler, char *dir,
        fprintf(f, 
                        "#========================================\n"
                        "# Addition for log shipping, %s\n"
-                       "wal_level = hot_standby\n"
+                       "wal_level = archive\n"
                        "archive_mode = on\n"
                        "archive_command = 'rsync %%p %s@%s:%s/%%f'\n"
                        "max_wal_senders = %d\n"
@@ -1472,10 +1473,10 @@ int add_datanodeSlave(char *name, char *host, int port, int pooler, char *dir,
        fprintf(f,
                        "#==========================================\n"
                        "# Added to initialize the slave, %s\n"
-                       "hot_standby = on\n"
+                       "hot_standby = off\n"
                        "port = %s\n"
                        "pooler_port = %s\n"
-                       "wal_level = minimal\n"         /* WAL level --- minimal.   No cascade slave so far. */
+                       "wal_level = archive\n"
                        "archive_mode = off\n"          /* No archive mode */
                        "archive_command = ''\n"        /* No archive mode */
                        "max_wal_senders = 0\n"         /* Minimum WAL senders */
@@ -1728,7 +1729,8 @@ int remove_datanodeSlave(char *name, int clean_opt)
                return 1;
        }
        AddMember(nodelist, name);
-       if (pingNode(aval(VAR_datanodeSlaveServers)[idx], aval(VAR_datanodeSlavePorts)[idx]) == 0)
+       if (pingNodeSlave(aval(VAR_datanodeSlaveServers)[idx],
+                               aval(VAR_datanodeSlaveDirs)[idx]) == 0)
                stop_datanode_slave(nodelist, "immediate");
        {
                FILE *f;
index 148350426980c4cf2646d87e12a7759dee935354..f14c24bb4773ea59fffe69cf378dda46180d1f5a 100644 (file)
@@ -155,7 +155,8 @@ static void monitor_coordinator(char **nodeList)
                printResult(pingNode(aval(VAR_coordMasterServers)[idx], aval(VAR_coordPorts)[idx]), 
                                        "coordinator master", actualNodeList[ii]);
                if (doesExist(VAR_coordSlaveServers, idx) && !is_none(aval(VAR_coordSlaveServers)[idx]))
-                       printResult(pingNode(aval(VAR_coordSlaveServers)[idx], aval(VAR_coordSlavePorts)[idx]),
+                       printResult(pingNodeSlave(aval(VAR_coordSlaveServers)[idx],
+                                               aval(VAR_coordSlaveDirs)[idx]),
                                                "coordinator slave", actualNodeList[ii]);
        }
 }
@@ -198,7 +199,8 @@ static void monitor_datanode_slave(char **nodeList)
                        continue;
                }
                if (doesExist(VAR_datanodeSlaveServers, idx) && !is_none(aval(VAR_datanodeSlaveServers)[idx]))
-                       printResult(pingNode(aval(VAR_datanodeSlaveServers)[idx], aval(VAR_datanodeSlavePorts)[idx]), 
+                       printResult(pingNodeSlave(aval(VAR_datanodeSlaveServers)[idx],
+                                               aval(VAR_datanodeSlaveDirs)[idx]), 
                                                "datanode slave", actualNodeList[ii]);
                else
                        elog(ERROR, "ERROR: datanode slave %s is not configured.\n", actualNodeList[ii]);
@@ -222,7 +224,8 @@ static void monitor_datanode(char **nodeList)
                printResult(pingNode(aval(VAR_datanodeMasterServers)[idx], aval(VAR_datanodePorts)[idx]), 
                                        "datanode master", actualNodeList[ii]);
                if (doesExist(VAR_datanodeSlaveServers, idx) && !is_none(aval(VAR_datanodeSlaveServers)[idx]))
-                       printResult(pingNode(aval(VAR_datanodeSlaveServers)[idx], aval(VAR_datanodeSlavePorts)[idx]),
+                       printResult(pingNodeSlave(aval(VAR_datanodeSlaveServers)[idx],
+                                               aval(VAR_datanodeSlaveDirs)[idx]),
                                                "datanode slave", actualNodeList[ii]);
        }
 }
index dd79fa10dc75f65e58fa95e1fd6c3c5da9ccc6ae..96c487f20cf60b5b7734fab1d3bc2ad3dcc73121 100644 (file)
@@ -324,6 +324,35 @@ int pingNode(char *host, char *port)
                return -1;
 }
 
+/*
+ * A different mechanism to ping datanode and coordinator slaves since these
+ * nodes currently do not accept connections and hence won't respond to PQping
+ * requests. Instead we rely on "pg_ctl status", which must be run via ssh on
+ * the remote machine
+ */
+int pingNodeSlave(char *host, char *datadir)
+{
+       FILE *wkf;
+       char cmd[MAXLINE+1];
+       char line[MAXLINE+1];
+       int      rv;
+
+       snprintf(cmd, MAXLINE, "ssh %s@%s pg_ctl -D %s status > /dev/null 2>&1; echo $?",
+                        sval(VAR_pgxcUser), host, datadir);
+       wkf = popen(cmd, "r");
+       if (wkf == NULL)
+               return -1;
+       if (fgets(line, MAXLINE, wkf))
+       {
+               trimNl(line);
+               rv = atoi(line);
+       }
+       else
+               rv = -1;
+       pclose(wkf);
+       return rv;
+}
+
 void trimNl(char *s)
 {
        for (;*s && *s != '\n'; s++);
index 6d0fb9d691e2fd064e7abf5fdfdc277a2b723311..18cc0ec9fe170f4078d623645f325d91049ec8ab 100644 (file)
@@ -30,6 +30,7 @@ extern int datanodeIdx(char *datanodeName);
 extern int getEffectiveGtmProxyIdxFromServerName(char *serverName);
 extern pid_t get_prog_pid(char *host, char *pidfile, char *dir);
 extern int pingNode(char *host, char *port);
+extern int pingNodeSlave(char *host, char *datadir);
 extern void trimNl(char *s);
 extern char *getChPidList(char *host, pid_t ppid);
 extern char *getIpAddress(char *hostName);