Some misc fixes and documentation updates for pgxc_ctl

author Pavan Deolasee <[email protected]>

Wed, 10 Feb 2016 10:14:00 +0000 (15:44 +0530)

committer Pavan Deolasee <[email protected]>

Wed, 10 Feb 2016 10:14:00 +0000 (15:44 +0530)
author Pavan Deolasee <[email protected]>
Wed, 10 Feb 2016 10:14:00 +0000 (15:44 +0530)
committer Pavan Deolasee <[email protected]>
Wed, 10 Feb 2016 10:14:00 +0000 (15:44 +0530)
diff --git a/contrib/pgxc_ctl/config.c b/contrib/pgxc_ctl/config.c

index 2098fd786d5a43775a61ca06804a28376135b55b..b36fa2bc820328a936792e7eb1b08dedb679d83a 100644 (file)
--- a/contrib/pgxc_ctl/config.c
+++ b/contrib/pgxc_ctl/config.c
@@ -1208,5 +1208,6 @@ int getDefaultWalSender(int isCoord)
                 if (doesExist(names, ii) && !is_none(aval(names)[ii]) && (atoi(aval(walSender)[ii]) >= 0))
                         return atoi(aval(walSender)[ii]);
         }
-       return 0;
+       /* If none found, return 5 as the default value.. */
+       return 5;
  }
diff --git a/contrib/pgxc_ctl/datanode_cmd.c b/contrib/pgxc_ctl/datanode_cmd.c

index a60f53c68a2c407123221b4289dbef433c3012d4..18180bd896d3c673458d81130fbf8bfe08217303 100644 (file)
--- a/contrib/pgxc_ctl/datanode_cmd.c
+++ b/contrib/pgxc_ctl/datanode_cmd.c
@@ -1772,7 +1772,7 @@ int remove_datanodeSlave(char *name, int clean_opt)
         }
         fprintf(f, 
                         "#================================================================\n"
-                       "# pgxc configuration file updated due to coodinator slave removal\n"
+                       "# pgxc configuration file updated due to datanode slave removal\n"
                         "#        %s\n",
                         timeStampString(date, MAXTOKEN+1));
         fprintSval(f, VAR_datanodeSlave);
diff --git a/contrib/pgxc_ctl/do_shell.c b/contrib/pgxc_ctl/do_shell.c

index 7dd126c851b761a5eabc6393252e07e60b7f45fb..8c112651fca67e70706aa0deb0012c23ffbc8ba5 100644 (file)
--- a/contrib/pgxc_ctl/do_shell.c
+++ b/contrib/pgxc_ctl/do_shell.c
@@ -715,7 +715,7 @@ void dump_cmdList(cmdList_t *cmdList)
                  "allocated = %d, used = %d\n", cmdList->allocated, cmdList->used);
         if (cmdList->cmds == NULL)
         {
-               elog(DEBUG1, "=== No command dfined. ===\n");
+               elog(DEBUG1, "=== No command defined. ===\n");
                 return;
         }
         for (ii = 0; cmdList->cmds[ii]; ii++)
diff --git a/contrib/pgxc_ctl/gtm_cmd.c b/contrib/pgxc_ctl/gtm_cmd.c

index 5f92fb7965a2d3e540f18cbc55745bbd9fc5c776..7fc77ba28cfa6d534f14b631cf15246d06e4f603 100644 (file)
--- a/contrib/pgxc_ctl/gtm_cmd.c
+++ b/contrib/pgxc_ctl/gtm_cmd.c
@@ -124,11 +124,17 @@ cmd_t *prepare_initGtmMaster(bool stop)
   
  int init_gtm_master(bool stop)
  {
-       int rc;
+       int rc = 0;
         cmdList_t *cmdList;
         cmd_t *cmd;
  
         elog(INFO, "Initialize GTM master\n");
+       if (is_none(sval(VAR_gtmMasterServer)))
+       {
+               elog(INFO, "No GTM master specified, exiting!\n");
+               return rc;
+       }
+
         cmdList = initCmdList();
  
         /* Kill current gtm, build work directory and run initgtm */
@@ -482,9 +488,14 @@ cmd_t *prepare_startGtmMaster(void)
  int start_gtm_master(void)
  {
         cmdList_t *cmdList;
-       int rc;
+       int rc = 0;
  
         elog(INFO, "Start GTM master\n");
+       if (is_none(sval(VAR_gtmMasterServer)))
+       {
+               elog(INFO, "No GTM master specified, cannot start. Exiting!\n");
+               return rc;
+       }
         cmdList = initCmdList();
         addCmd(cmdList, prepare_startGtmMaster());
         rc = doCmdList(cmdList);
diff --git a/contrib/pgxc_ctl/pgxc_ctl.c b/contrib/pgxc_ctl/pgxc_ctl.c

index eaad8a0b8a23b944aa4ac3d6c8f22a8d2b10546e..cb59eed1add1557cf8f888ab7725dd967ba44495 100644 (file)
--- a/contrib/pgxc_ctl/pgxc_ctl.c
+++ b/contrib/pgxc_ctl/pgxc_ctl.c
@@ -266,7 +266,7 @@ static void read_configuration(void)
         read_vars(conf);
         pclose(conf);
         uninstall_pgxc_ctl_bash(pgxc_ctl_bash_path);
-       elog(INFO, "Finished to read configuration.\n");
+       elog(INFO, "Finished reading configuration.\n");
  }
  
  static void prepare_pgxc_ctl_bash(char *path)
diff --git a/doc/src/sgml/pgxc_ctl-ref.sgml b/doc/src/sgml/pgxc_ctl-ref.sgml

index 50c094c060178b46297a6a4a476d1c1b3164e5d8..9405a3086f3a422ba574bff581c78b910013c674 100644 (file)
--- a/doc/src/sgml/pgxc_ctl-ref.sgml
+++ b/doc/src/sgml/pgxc_ctl-ref.sgml
@@ -212,6 +212,14 @@ You may also generate a template configuration file suitable for testing
  <programlisting>
  PGXC$ prepare config minimal
  PGXC$ prepare config minimal my_minimal_pgxc.conf
+</programlisting>
+
+If you want, you may want to start off with a completely empty cluster to
+add all the nodes one-by-one. Use option <literal>empty</literal> to generate
+an empty template configuration file.
+<programlisting>
+PGXC$ prepare config empty 
+PGXC$ prepare config empty my_empty_pgxc.conf
  </programlisting>
  
     A more detailed syntax of the command will be described in a later section.
@@ -222,6 +230,16 @@ PGXC$ prepare config minimal my_minimal_pgxc.conf
   <sect2>
    <title>Make your configuration</title>
  
+  <para>
+  If you are starting with an <literal>empty</literal> configuration file, then
+  there is no real need to provide values for most of the variables. However, if
+  you want to provide custom values for <filename>pg_hba.conf</filename> entries
+  or additional parameters to be added to your <filename>postgresql.conf</filename>
+  file, then you will need to do so before going ahead with your cluster creation.
+  You can skip the rest of this section if you are going ahead with an
+  <literal>empty</literal> configuration.
+  </para>
+
    <para>
     Please take a look at the template of the configuration file you
     created in the previous section.
@@ -726,36 +744,45 @@ $
     As described in the previous section, you can configure your
     <application>Postgres-XL</application> cluster by editing
     <filename>pgxc_ctl.conf</filename> or other configuration files manually.
-   But editing the file from the scratch can be a mess.  It is much better to
-   have a separate configuration file.  You can create a configuration file
-   template by typing
+   But editing the file can be a bit of work. A better way would be to
+   start off with an empty configuration file. The pgxc_ctl utility supports
+   three types of templates as shown below.
  
+<programlisting>
+PGXC$ prepare config empty
+</programlisting>
+or
+<programlisting>
+PGXC$ prepare config minimal 
+</programlisting>
+or
  <programlisting>
  PGXC$ prepare config
-PGXC$ 
  </programlisting>
    </para>
  
    <para>
-   You have your <filename>pgxc_ctl.conf</filename> file at
-   <filename>$HOME/pgxc_ctl</filename>.
+   The default <filename>pgxc_ctl.conf</filename> file can be found inside the 
+   <filename>$HOME/pgxc_ctl</filename> location.
     You can edit it to configure your
-   <application>Postgres-XL</application> cluster.
-   When it messes up, you can again create the template with
+   <application>Postgres-XL</application> cluster or you can choose
+   to start with an empty cluster and add components one-by-one.
+   When the configuration is messed up, you can again create a specific template
+   of your choice with the proper 
     <command>prepare config</command> command.
-   If you want to use other file name, specify the names
-   <command>prepare config</command> command option like:
+   You can choose to specify your own custom name for the configuration file
+   like below:
  
  <programlisting>
-PGXC$ prepare config my_config.conf
+PGXC$ prepare config empty my_config.conf
  </programlisting>
    </para>
    <para>
-   Then you can edit this file to configure your
-   <application>postgres-XL</application> cluster.  This file is actually a
-   bash script file defining many variables to define the cluster
-   configuration.  With template values and comments, it will be easy to
-   understand what they mean.
+   Then you can edit this file to configure and customize your
+   <application>postgres-XL</application> cluster. This configuration file is basically a
+   bash script file which declares many variables to define the cluster
+   configuration. Although it might seem confusing, but With template values and comments,
+   one can easily understand what each of these variables mean.
     </para>
     <para>
     You can also generate a minimal configuration file, good enough to test
@@ -766,8 +793,8 @@ PGXC$ prepare config minimal
  PGXC$ prepare config minimal my_minimal_config.conf
  </programlisting>
  
-   The following describes each variable in the order you find in the
-   configuration template. 
+   Given below is the description of the various variables in the order that they
+   appear in the configuration file.
    </para>
  
    <sect3>
@@ -1431,7 +1458,7 @@ PGXC$ prepare config minimal my_minimal_config.conf
        <term><option>datanodeNames</option></term>
        <listitem>
         <para>
-        Array to specify Coordinator names.
+        Array to specify Datanode names.
         </para>
        </listitem>
       </varlistentry>
@@ -1594,16 +1621,17 @@ PGXC$ prepare config minimal my_minimal_config.conf
     <variablelist>
  
     <varlistentry>
+    <term><literal>add gtm master <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">dir</replaceable></literal></term>
      <term><literal>add gtm slave <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">dir</replaceable></literal></term>
      <term><literal>add gtm_proxy <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">dir</replaceable></literal></term>
      <term><literal>add coordinator master <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable> <replaceable class="parameter">dir</replaceable>< <replaceable class="parameter">extraServerConf</replaceable> <replaceable class="parameter">extraPgHbaConf</replaceable></literal></term>
      <term><literal>add coordinator slave <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable> <replaceable class="parameter">dir</replaceable> <replaceable class="parameter">archDir</replaceable></literal></term>
-    <term><literal>add datanode master <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable>  <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable>  <replaceable class="parameter">dir</replaceable> <replaceable class="parameter">xlogdir</replaceable>  <replaceable class="parameter">restoreDatanode</replaceable> <replaceable class="parameter">extraServerConf</replaceable> <replaceable class="parameter">extraPgHbaConf</replaceable></literal></term>
+    <term><literal>add datanode master <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable>  <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable>  <replaceable class="parameter">dir</replaceable> <replaceable class="parameter">xlogdir</replaceable>  <replaceable class="parameter">extraServerConf</replaceable> <replaceable class="parameter">extraPgHbaConf</replaceable></literal></term>
      <term><literal>add datanode slave <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable>  <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable>  <replaceable class="parameter">dir</replaceable> <replaceable class="parameter">xlogdir</replaceable> <replaceable class="parameter">archDir</replaceable></literal></term>
      <listitem>
       <para>
        Add the specified node to your Postgres-XL cluster.  Each node needs a
-      host name and its work directory.  GTM slave, GTM proxy, Coordinator
+      host name and its work directory. GTM master, GTM slave, GTM proxy, Coordinator
        master/slave and Datanode master/slave need its own port to listen to.
        Coordinators and Datanodes also need a pooler port to pool connections to
        Datanodes.  Coordinator and Datanode slaves need a directory to receive
@@ -1620,6 +1648,16 @@ PGXC$ prepare config minimal my_minimal_config.conf
       <para>
        You cannot add slaves without master.
       </para>
+     <para>
+     Typically, when you start with an empty configuration file, first you will add your
+     GTM node. Then you will add your first Coordinator master and then the first Datanode master.
+     When you add a Coordinator master and it is the first Coordinator in the cluster, then it
+     starts up on its own with empty node metadata. Otherwise the new Coordinator master connects to any
+     existing Coordinator and gets the existing node metadata of the cluster.
+     When you add a Datanode master and it is the first Datanode, then it connects
+     to any existing Coordinator to get the node metadata. Otherwise the Datanode master
+     connects to any existing Datanode and gets the current metadata from it.
+     </para>
      </listitem>
     </varlistentry>
  
@@ -1789,7 +1827,8 @@ PGXC$ prepare config minimal my_minimal_config.conf
     </varlistentry>
  
     <varlistentry>
-    <term><literal>remove gtm slave</literal></term>
+    <term><literal>remove gtm master [ clean ]</literal></term>
+    <term><literal>remove gtm slave [ clean ]</literal></term>
      <term><literal>remove gtm_proxy <replaceable class="parameter">nodename</replaceable> [ clean ]</literal></term>
      <term><literal>remove coordinator [ master| slave ] <replaceable class="parameter">nodename</replaceable> [ clean ]</literal></term>
      <term><literal>remove datanode [ master| slave ] <replaceable class="parameter">nodename</replaceable> [ clean ]</literal></term>
diff --git a/doc/src/sgml/start.sgml b/doc/src/sgml/start.sgml

index cf285f8777297de3ad19d29d830ce68cb40c5241..1bd9c47885a561b40d67054c403799be118b3ca3 100644 (file)
--- a/doc/src/sgml/start.sgml
+++ b/doc/src/sgml/start.sgml
@@ -187,6 +187,604 @@
     </para>
    </sect1>
  
+  <sect1 id="tutorial-createcluster">
+   <title>Creating a Postgres-XL cluster</title>
+
+   <para>
+    As mentioned in the architectural fundamentals, <productname>Postgres-XL</productname>
+    is a collection of multiple components. It can be a bit of work to come up with your
+    initial working setup. In this tutorial, we will show how one can start with
+    an <literal>empty</literal> configuration file and use the <application>pgxc_ctl</application>
+    utility to create your <productname>Postgres-XL</productname> cluster from scratch.
+   </para>
+
+   <para>
+    A few pre-requisites are necessary on each node that is going to be a part of the
+    <productname>Postgres-XL</productname> setup.
+
+    <itemizedlist>
+     <listitem>
+      <para>
+       Password-less ssh access is required from the node that is going to run the
+       <application>pgxc_ctl</application> utility.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+      The PATH environment variable should have the correct <productname>Postgres-XL</productname>
+      binaries on all nodes, especially while running a command via ssh.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+      The <filename>pg_hba.conf</filename> entries must be updated to allow remote access. Variables
+      like <option>coordPgHbaEntries</option> and <option>datanodePgHbaEntries</option>
+      in the <filename>pgxc_ctl.conf</filename> configuration file may need appropriate changes.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+      Firewalls and iptables may need to be updated to allow access to ports.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+  <para>
+  The <application>pgxc_ctl</application> utility should be present in your PATH. If it is
+  not there, it can be compiled from source.
+<screen>
+<prompt>$</prompt> <userinput>cd $XLSRC/contrib/pgxc_ctl</userinput>
+<prompt>$</prompt> <userinput>make install</userinput>
+</screen>
+
+  We are now ready to prepare our template configuration file. The <application>pgxc_ctl</application>
+  utility allows you to create three types of configuration. We will choose the <literal>empty</literal>
+  configuration which will allow us to create our <productname>Postgres-XL</productname> setup from
+  scratch. Note that we also need to set up the <option>dataDirRoot</option> environment
+  variable properly for all future invocations of <application>pgxc_ctl</application>.
+<screen>
+<prompt>$</prompt> <userinput>export dataDirRoot=$HOME/DATA/pgxl/nodes</userinput>
+<prompt>$</prompt> <userinput>mkdir $HOME/pgxc_ctl</userinput>
+<prompt>$</prompt> <userinput>pgxc_ctl</userinput>
+Installing pgxc_ctl_bash script as /Users/nikhils/pgxc_ctl/pgxc_ctl_bash.
+Installing pgxc_ctl_bash script as /Users/nikhils/pgxc_ctl/pgxc_ctl_bash.
+Reading configuration using /Users/nikhils/pgxc_ctl/pgxc_ctl_bash --home
+/Users/nikhils/pgxc_ctl --configuration
+/Users/nikhils/pgxc_ctl/pgxc_ctl.conf
+Finished reading configuration.
+   ******** PGXC_CTL START ***************
+
+   Current directory: /Users/nikhils/pgxc_ctl
+<prompt>PGXC$ </prompt> <userinput>prepare config empty</userinput>
+<prompt>PGXC$ </prompt> <userinput>exit</userinput>
+</screen>
+
+   The <literal>empty</literal> configuration file is now ready. You should now make changes
+   to the <filename>pgxc_ctl.conf</filename>. At a minimum, <option>pgxcOwner</option>
+   should be set correctly. The configuration file does contain <envar>USERi</> and <envar>HOME</>
+   environment variables to allow easy defaults for the current user.
+   </para>
+
+   <para>
+   The next step is to add the GTM master to the setup. 
+<screen>
+<prompt>$</prompt> <userinput>pgxc_ctl</userinput>
+<prompt>PGXC$ </prompt> <userinput>add gtm master gtm localhost 20001 $dataDirRoot/gtm</userinput>
+</screen>
+
+    Use the "monitor" command to check the status of the cluster.
+<screen>
+<prompt>$</prompt> <userinput>pgxc_ctl</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+</screen>
+   </para>
+
+
+   <para>
+   Let us now add a couple of coordinators. When the first coordinator is added, it just
+starts up. When another coordinator is added, it connects to any existing coordinator node
+to fetch the metadata about objects. 
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add coordinator master coord1 localhost 30001 30011 $dataDirRoot/coord_master.1 none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+<prompt>PGXC$ </prompt> <userinput>add coordinator master coord2 localhost 30002 30012 $dataDirRoot/coord_master.2 none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+</screen>
+   </para>
+
+   <para>
+   Moving on to the addition of a couple of datanodes, now. When the first datanode is added,
+it connects to any existing coordinator node to fetch global metadata. When a subsequent
+datanode is added, it connects to any existing datanode for the metadata.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add datanode master dn1 localhost 40001 40011 $dataDirRoot/dn_master.1 none none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+<prompt>PGXC$ </prompt> <userinput>add datanode master dn2 localhost 40002 40012 $dataDirRoot/dn_master.2 none none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+</screen>
+</para>
+
+<para>
+  Your <productname>Postgres-XL</productname> setup is ready now and you can move on to the next
+  "Getting Started" topic. 
+  </para>
+  <para>
+  Read on further, only if you want a quick crash course on the various commands you can
+  try out with <productname>Postgres-XL</productname>. It is strongly recommended to go through
+  the entire documentation for more details on each and every command that we will touch upon
+  below.
+  </para>
+
+  <para>
+  Connect to one of the coordinators and create a test database.
+<screen>
+<prompt>$ </prompt> <userinput>psql -p 30001 postgres</userinput>
+postgres=# CREATE DATABASE testdb;
+CREATE DATABASE
+postgres=# \q
+</screen>
+
+Look at pgxc_node catalog. It should show all the configured nodes. It is normal to have
+negative node id values. This will be fixed soon.
+<screen>
+<prompt>$ </prompt> <userinput>psql -p 30001 testdb</userinput>
+testdb=# SELECT * FROM pgxc_node;
+ node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred |   node_id   
+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ coord1    | C         |     30001 | localhost | f              | f                |  1885696643
+ coord2    | C         |     30002 | localhost | f              | f                | -1197102633
+ dn1       | D         |     40001 | localhost | t              | t                |  -560021589
+ dn2       | D         |     40002 | localhost | f              | t                |   352366662
+(4 rows)
+
+</screen>
+
+Let us now create a distributed table, distributed on first column by HASH.
+
+<screen>
+testdb=# CREATE TABLE disttab(col1 int, col2 int, col3 text) DISTRIBUTE BY HASH(col1);
+CREATE TABLE
+testdb=# \d+ disttab
+                        Table "public.disttab"
+ Column |  Type   | Modifiers | Storage  | Stats target | Description 
+--------+---------+-----------+----------+--------------+-------------
+ col1   | integer |           | plain    |              | 
+ col2   | integer |           | plain    |              | 
+ col3   | text    |           | extended |              | 
+Has OIDs: no
+Distribute By: HASH(col1)
+Location Nodes: ALL DATANODES
+
+</screen>
+
+Also create a replicated table.
+
+<screen>
+testdb=# CREATE TABLE repltab (col1 int, col2 int) DISTRIBUTE BY
+REPLICATION;
+CREATE TABLE
+testdb=# \d+ repltab
+                       Table "public.repltab"
+ Column |  Type   | Modifiers | Storage | Stats target | Description 
+--------+---------+-----------+---------+--------------+-------------
+ col1   | integer |           | plain   |              | 
+ col2   | integer |           | plain   |              | 
+Has OIDs: no
+Distribute By: REPLICATION
+Location Nodes: ALL DATANODES
+
+</screen>
+
+Now insert some sample data in these tables.
+<screen>
+testdb=# INSERT INTO disttab VALUES (generate_series(1,100), generate_series(101, 200), 'foo');
+INSERT 0 100
+testdb=# INSERT INTO repltab VALUES (generate_series(1,100), generate_series(101, 200));
+INSERT 0 100
+
+</screen>
+Ok. So the distributed table should have 100 rows
+
+<screen>
+testdb=# SELECT count(*) FROM disttab;
+ count 
+-------
+   100
+(1 row)
+
+
+</screen>
+
+And they must not be all on the same node. <literal>xc_node_id</> is a system
+column which shows the originating datanode for each row.
+
+Note that the distribution can be slightly uneven because of the HASH
+function
+
+<screen>
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -560021589 |    42
+  352366662 |    58
+(2 rows)
+
+
+</screen>
+For replicated tables, we expect all rows to come from a single
+datanode (even though the other node has a copy too).
+
+<screen>
+testdb=# SELECT count(*) FROM repltab;
+ count 
+-------
+   100
+(1 row)
+
+testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -560021589 |   100
+(1 row)
+
+</screen>
+
+Now add a new datanode to the cluster.
+
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add datanode master dn3 localhost 40003 40013 $dataDirRoot/dn_master.3 none none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+Running: datanode master dn3
+</screen>
+
+
+Note that during cluster reconfiguration, all outstanding transactions
+are aborted and sessions are reset. So you would typically see errors
+like these on open sessions
+
+<screen>
+testdb=# SELECT * FROM pgxc_node;
+ERROR:  canceling statement due to user request             <==== pgxc_pool_reload() resets all sessions and aborts all open transactions
+
+testdb=# SELECT * FROM pgxc_node;
+ node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred |   node_id   
+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ coord1    | C         |     30001 | localhost | f              | f                |  1885696643
+ coord2    | C         |     30002 | localhost | f              | f                | -1197102633
+ dn1       | D         |     40001 | localhost | t              | t                |  -560021589
+ dn2       | D         |     40002 | localhost | f              | t                |   352366662
+ dn3       | D         |     40003 | localhost | f              | f                |  -700122826
+(5 rows)
+</screen>
+
+Note that with new datanode addition, Existing tables are not affected. The distribution information now
+explicitly shows the older datanodes
+<screen>
+testdb=# \d+ disttab
+                        Table "public.disttab"
+ Column |  Type   | Modifiers | Storage  | Stats target | Description 
+--------+---------+-----------+----------+--------------+-------------
+ col1   | integer |           | plain    |              | 
+ col2   | integer |           | plain    |              | 
+ col3   | text    |           | extended |              | 
+Has OIDs: no
+Distribute By: HASH(col1)
+Location Nodes: dn1, dn2
+
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -560021589 |    42
+  352366662 |    58
+(2 rows)
+
+testdb=# \d+ repltab
+                       Table "public.repltab"
+ Column |  Type   | Modifiers | Storage | Stats target | Description 
+--------+---------+-----------+---------+--------------+-------------
+ col1   | integer |           | plain   |              | 
+ col2   | integer |           | plain   |              | 
+Has OIDs: no
+Distribute By: REPLICATION
+Location Nodes: dn1, dn2
+</screen>
+
+Let us now try to redistribute tables so that they can take advantage
+of the new datanode
+
+<screen>
+testdb=# ALTER TABLE disttab ADD NODE (dn3);
+ALTER TABLE
+testdb=# \d+ disttab
+                        Table "public.disttab"
+ Column |  Type   | Modifiers | Storage  | Stats target | Description 
+--------+---------+-----------+----------+--------------+-------------
+ col1   | integer |           | plain    |              | 
+ col2   | integer |           | plain    |              | 
+ col3   | text    |           | extended |              | 
+Has OIDs: no
+Distribute By: HASH(col1)
+Location Nodes: ALL DATANODES
+
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -700122826 |    32
+  352366662 |    32
+ -560021589 |    36
+(3 rows)
+
+</screen>
+
+Let us now add a third coordinator.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add coordinator master coord3 localhost 30003 30013 $dataDirRoot/coord_master.3 none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: coordinator master coord3
+Running: datanode master dn1
+Running: datanode master dn2
+Running: datanode master dn3
+
+testdb=# SELECT * FROM pgxc_node;
+ERROR:  canceling statement due to user request
+testdb=# SELECT * FROM pgxc_node;
+ node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred |   node_id   
+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ coord1    | C         |     30001 | localhost | f              | f                |  1885696643
+ coord2    | C         |     30002 | localhost | f              | f                | -1197102633
+ dn1       | D         |     40001 | localhost | t              | t                |  -560021589
+ dn2       | D         |     40002 | localhost | f              | t                |   352366662
+ dn3       | D         |     40003 | localhost | f              | f                |  -700122826
+ coord3    | C         |     30003 | localhost | f              | f                |  1638403545
+(6 rows)
+
+</screen>
+
+We can try some more ALTER TABLE so as to delete a node from a table
+distribution and add it back
+
+<screen>
+testdb=# ALTER TABLE disttab DELETE NODE (dn1);
+ALTER TABLE
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+  352366662 |    42
+ -700122826 |    58
+(2 rows)
+
+testdb=# ALTER TABLE disttab ADD NODE (dn1);
+ALTER TABLE
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -700122826 |    32
+  352366662 |    32
+ -560021589 |    36
+(3 rows)
+</screen>
+
+
+You could also alter a replicated table to make it a distributed table.
+Note that even though the cluster has 3 datanodes now, the table will continue
+to use only 2 datanodes where the table was originally replicated on.
+
+<screen>
+testdb=# ALTER TABLE repltab DISTRIBUTE BY HASH(col1);
+ALTER TABLE
+testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -560021589 |    42
+  352366662 |    58
+(2 rows)
+
+testdb=# ALTER TABLE repltab DISTRIBUTE BY REPLICATION;
+ALTER TABLE
+testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -560021589 |   100
+(1 row)
+</screen>
+
+Remove the coordinator added previously now. You can use the "clean" option
+to remove the corresponding data directory as well.
+
+<screen>
+<prompt>PGXC$ </prompt> <userinput>remove coordinator master coord3 clean</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+Running: datanode master dn3
+
+testdb=# SELECT oid, * FROM pgxc_node;
+ERROR:  canceling statement due to user request
+testdb=# SELECT oid, * FROM pgxc_node;
+  oid  | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred |   node_id   
+-------+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ 11197 | coord1    | C         |     30001 | localhost | f              | f                |  1885696643
+ 16384 | coord2    | C         |     30002 | localhost | f              | f                | -1197102633
+ 16385 | dn1       | D         |     40001 | localhost | t              | t                |  -560021589
+ 16386 | dn2       | D         |     40002 | localhost | f              | t                |   352366662
+ 16397 | dn3       | D         |     40003 | localhost | f              | f                |  -700122826
+(5 rows)
+
+</screen>
+
+Let us try to remove a datanode now. NOTE: <productname>Postgres-XL</productname> does not
+employ any additional checks to ascertain if the datanode being dropped has data from tables
+that are replicated/distributed. It is the responsibility of the user to ensure that it's
+safe to remove a datanode.
+
+You can use the below query to find out if the datanode being removed has any data on it.
+Do note that this only shows tables from the current database. You might want to ensure
+the same for all databases before going ahead with the datanode removal. Use the OID of the
+datanode that is to be removed in the below query: 
+ 
+<screen>
+testdb=# SELECT * FROM pgxc_class WHERE nodeoids::integer[] @> ARRAY[16397];
+ pcrelid | pclocatortype | pcattnum | pchashalgorithm | pchashbuckets |     nodeoids      
+---------+---------------+----------+-----------------+---------------+-------------------
+   16388 | H             |        1 |               1 |          4096 | 16385 16386 16397
+(1 row)
+
+
+testdb=# ALTER TABLE disttab DELETE NODE (dn3);
+ALTER TABLE
+testdb=# SELECT * FROM pgxc_class WHERE nodeoids::integer[] @> ARRAY[16397];
+ pcrelid | pclocatortype | pcattnum | pchashalgorithm | pchashbuckets | nodeoids 
+---------+---------------+----------+-----------------+---------------+----------
+(0 rows)
+</screen>
+
+Ok, it is safe to remove datanode "dn3" now.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>remove datanode master dn3 clean</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+
+testdb=# SELECT oid, * FROM pgxc_node;
+ERROR:  canceling statement due to user request
+testdb=# SELECT oid, * FROM pgxc_node;
+  oid  | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred |   node_id   
+-------+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ 11197 | coord1    | C         |     30001 | localhost | f              | f                |  1885696643
+ 16384 | coord2    | C         |     30002 | localhost | f              | f                | -1197102633
+ 16385 | dn1       | D         |     40001 | localhost | t              | t                |  -560021589
+ 16386 | dn2       | D         |     40002 | localhost | f              | t                |   352366662
+(4 rows)
+
+</screen>
+
+The <application>pgxc_ctl</application> utility can also help in setting up slaves for
+datanodes and coordinators. Let us setup a slave for a datanode and see how failover can
+be performed in case the master datanode goes down.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add datanode slave dn1 localhost 40101 40111 $dataDirRoot/dn_slave.1 none $dataDirRoot/datanode_archlog.1</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode slave dn1
+Running: datanode master dn2
+
+testdb=# EXECUTE DIRECT ON(dn1) 'SELECT client_hostname, state, sync_state FROM pg_stat_replication';
+ client_hostname |   state   | sync_state 
+-----------------+-----------+------------
+                 | streaming | async
+(1 row)
+</screen>
+
+Add some more rows to test failover now.
+
+<screen>
+testdb=# INSERT INTO disttab VALUES (generate_series(1001,1100), generate_series(1101, 1200), 'foo');
+INSERT 0 100
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -560021589 |    94
+  352366662 |   106
+(2 rows)
+</screen>
+
+Let us simulate datanode failover now. We will first stop the datanode master "dn1" for
+which we configured a slave above. Note that since the slave is connected to the master
+we will use "immediate" mode for stopping it.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>stop -m immediate datanode master dn1</userinput>
+</screen>
+
+Since a datanode is down, queries will fail. Though a few queries may still work if
+the failed node is not required to run the query, and that is determined by the
+distribution of the data and the WHERE clause being used.
+
+<screen>
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ERROR:  Failed to get pooled connections
+
+testdb=# SELECT xc_node_id, * FROM disttab WHERE col1 = 3;
+ xc_node_id | col1 | col2 | col3 
+------------+------+------+------
+  352366662 |    3 |  103 | foo
+(1 row)
+</screen>
+
+We will now perform the failover and check that everything is working fine post that.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>failover datanode dn1</userinput>
+
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ERROR:  canceling statement due to user request
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count 
+------------+-------
+ -560021589 |    94
+  352366662 |   106
+(2 rows)
+</screen>
+
+
+The pgxc_node catalog now should have updated entries. Especially, the
+failed over datanode node_host and node_port should have been replaced
+with the slave's host and port values.
+
+<screen>
+testdb=# SELECT oid, * FROM pgxc_node;
+  oid  | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred |   node_id   
+-------+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ 11197 | coord1    | C         |     30001 | localhost | f              | f                |  1885696643
+ 16384 | coord2    | C         |     30002 | localhost | f              | f                | -1197102633
+ 16386 | dn2       | D         |     40002 | localhost | f              | t                |   352366662
+ 16385 | dn1       | D         |     40101 | localhost | t              | t                |  -560021589
+(4 rows)
+
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+</screen>
+</para>
+
+  </sect1>
  
    <sect1 id="tutorial-createdb">
     <title>Creating a Database</title>
diff --git a/src/backend/pgxc/pool/poolutils.c b/src/backend/pgxc/pool/poolutils.c

index fad39cda9e8dc78d61219685de8c9a2891589171..9dbf9d81cb104d458a44c842545666e4ac0c4d84 100644 (file)
--- a/src/backend/pgxc/pool/poolutils.c
+++ b/src/backend/pgxc/pool/poolutils.c
@@ -90,7 +90,7 @@ pgxc_pool_check(PG_FUNCTION_ARGS)
   *    to remote nodes. This results in losing prepared and temporary objects
   *    in all the sessions of server. All the existing transactions are aborted
   *    and a WARNING message is sent back to client.
- *    Session that invocated the reload does the same process, but no WARNING
+ *    Session that invoked the reload does the same process, but no WARNING
   *    message is sent back to client.
   */
  Datum
author	Pavan Deolasee <[email protected]>
	Wed, 10 Feb 2016 10:14:00 +0000 (15:44 +0530)
committer	Pavan Deolasee <[email protected]>
	Wed, 10 Feb 2016 10:14:00 +0000 (15:44 +0530)
contrib/pgxc_ctl/config.c		patch \| blob \| blame \| history
contrib/pgxc_ctl/datanode_cmd.c		patch \| blob \| blame \| history
contrib/pgxc_ctl/do_shell.c		patch \| blob \| blame \| history
contrib/pgxc_ctl/gtm_cmd.c		patch \| blob \| blame \| history
contrib/pgxc_ctl/pgxc_ctl.c		patch \| blob \| blame \| history
doc/src/sgml/pgxc_ctl-ref.sgml		patch \| blob \| blame \| history
doc/src/sgml/start.sgml		patch \| blob \| blame \| history
src/backend/pgxc/pool/poolutils.c		patch \| blob \| blame \| history