From: Pavan Deolasee Date: Wed, 10 Feb 2016 10:14:00 +0000 (+0530) Subject: Some misc fixes and documentation updates for pgxc_ctl X-Git-Tag: XL9_5_R1BETA1~28 X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=c05d28fee2ac95f8dbda0bed37d6cd2a1f6eeeb6;p=postgres-xl.git Some misc fixes and documentation updates for pgxc_ctl --- diff --git a/contrib/pgxc_ctl/config.c b/contrib/pgxc_ctl/config.c index 2098fd786d..b36fa2bc82 100644 --- a/contrib/pgxc_ctl/config.c +++ b/contrib/pgxc_ctl/config.c @@ -1208,5 +1208,6 @@ int getDefaultWalSender(int isCoord) if (doesExist(names, ii) && !is_none(aval(names)[ii]) && (atoi(aval(walSender)[ii]) >= 0)) return atoi(aval(walSender)[ii]); } - return 0; + /* If none found, return 5 as the default value.. */ + return 5; } diff --git a/contrib/pgxc_ctl/datanode_cmd.c b/contrib/pgxc_ctl/datanode_cmd.c index a60f53c68a..18180bd896 100644 --- a/contrib/pgxc_ctl/datanode_cmd.c +++ b/contrib/pgxc_ctl/datanode_cmd.c @@ -1772,7 +1772,7 @@ int remove_datanodeSlave(char *name, int clean_opt) } fprintf(f, "#================================================================\n" - "# pgxc configuration file updated due to coodinator slave removal\n" + "# pgxc configuration file updated due to datanode slave removal\n" "# %s\n", timeStampString(date, MAXTOKEN+1)); fprintSval(f, VAR_datanodeSlave); diff --git a/contrib/pgxc_ctl/do_shell.c b/contrib/pgxc_ctl/do_shell.c index 7dd126c851..8c112651fc 100644 --- a/contrib/pgxc_ctl/do_shell.c +++ b/contrib/pgxc_ctl/do_shell.c @@ -715,7 +715,7 @@ void dump_cmdList(cmdList_t *cmdList) "allocated = %d, used = %d\n", cmdList->allocated, cmdList->used); if (cmdList->cmds == NULL) { - elog(DEBUG1, "=== No command dfined. ===\n"); + elog(DEBUG1, "=== No command defined. ===\n"); return; } for (ii = 0; cmdList->cmds[ii]; ii++) diff --git a/contrib/pgxc_ctl/gtm_cmd.c b/contrib/pgxc_ctl/gtm_cmd.c index 5f92fb7965..7fc77ba28c 100644 --- a/contrib/pgxc_ctl/gtm_cmd.c +++ b/contrib/pgxc_ctl/gtm_cmd.c @@ -124,11 +124,17 @@ cmd_t *prepare_initGtmMaster(bool stop) int init_gtm_master(bool stop) { - int rc; + int rc = 0; cmdList_t *cmdList; cmd_t *cmd; elog(INFO, "Initialize GTM master\n"); + if (is_none(sval(VAR_gtmMasterServer))) + { + elog(INFO, "No GTM master specified, exiting!\n"); + return rc; + } + cmdList = initCmdList(); /* Kill current gtm, build work directory and run initgtm */ @@ -482,9 +488,14 @@ cmd_t *prepare_startGtmMaster(void) int start_gtm_master(void) { cmdList_t *cmdList; - int rc; + int rc = 0; elog(INFO, "Start GTM master\n"); + if (is_none(sval(VAR_gtmMasterServer))) + { + elog(INFO, "No GTM master specified, cannot start. Exiting!\n"); + return rc; + } cmdList = initCmdList(); addCmd(cmdList, prepare_startGtmMaster()); rc = doCmdList(cmdList); diff --git a/contrib/pgxc_ctl/pgxc_ctl.c b/contrib/pgxc_ctl/pgxc_ctl.c index eaad8a0b8a..cb59eed1ad 100644 --- a/contrib/pgxc_ctl/pgxc_ctl.c +++ b/contrib/pgxc_ctl/pgxc_ctl.c @@ -266,7 +266,7 @@ static void read_configuration(void) read_vars(conf); pclose(conf); uninstall_pgxc_ctl_bash(pgxc_ctl_bash_path); - elog(INFO, "Finished to read configuration.\n"); + elog(INFO, "Finished reading configuration.\n"); } static void prepare_pgxc_ctl_bash(char *path) diff --git a/doc/src/sgml/pgxc_ctl-ref.sgml b/doc/src/sgml/pgxc_ctl-ref.sgml index 50c094c060..9405a3086f 100644 --- a/doc/src/sgml/pgxc_ctl-ref.sgml +++ b/doc/src/sgml/pgxc_ctl-ref.sgml @@ -212,6 +212,14 @@ You may also generate a template configuration file suitable for testing PGXC$ prepare config minimal PGXC$ prepare config minimal my_minimal_pgxc.conf + + +If you want, you may want to start off with a completely empty cluster to +add all the nodes one-by-one. Use option empty to generate +an empty template configuration file. + +PGXC$ prepare config empty +PGXC$ prepare config empty my_empty_pgxc.conf A more detailed syntax of the command will be described in a later section. @@ -222,6 +230,16 @@ PGXC$ prepare config minimal my_minimal_pgxc.conf Make your configuration + + If you are starting with an empty configuration file, then + there is no real need to provide values for most of the variables. However, if + you want to provide custom values for pg_hba.conf entries + or additional parameters to be added to your postgresql.conf + file, then you will need to do so before going ahead with your cluster creation. + You can skip the rest of this section if you are going ahead with an + empty configuration. + + Please take a look at the template of the configuration file you created in the previous section. @@ -726,36 +744,45 @@ $ As described in the previous section, you can configure your Postgres-XL cluster by editing pgxc_ctl.conf or other configuration files manually. - But editing the file from the scratch can be a mess. It is much better to - have a separate configuration file. You can create a configuration file - template by typing + But editing the file can be a bit of work. A better way would be to + start off with an empty configuration file. The pgxc_ctl utility supports + three types of templates as shown below. + +PGXC$ prepare config empty + +or + +PGXC$ prepare config minimal + +or PGXC$ prepare config -PGXC$ - You have your pgxc_ctl.conf file at - $HOME/pgxc_ctl. + The default pgxc_ctl.conf file can be found inside the + $HOME/pgxc_ctl location. You can edit it to configure your - Postgres-XL cluster. - When it messes up, you can again create the template with + Postgres-XL cluster or you can choose + to start with an empty cluster and add components one-by-one. + When the configuration is messed up, you can again create a specific template + of your choice with the proper prepare config command. - If you want to use other file name, specify the names - prepare config command option like: + You can choose to specify your own custom name for the configuration file + like below: -PGXC$ prepare config my_config.conf +PGXC$ prepare config empty my_config.conf - Then you can edit this file to configure your - postgres-XL cluster. This file is actually a - bash script file defining many variables to define the cluster - configuration. With template values and comments, it will be easy to - understand what they mean. + Then you can edit this file to configure and customize your + postgres-XL cluster. This configuration file is basically a + bash script file which declares many variables to define the cluster + configuration. Although it might seem confusing, but With template values and comments, + one can easily understand what each of these variables mean. You can also generate a minimal configuration file, good enough to test @@ -766,8 +793,8 @@ PGXC$ prepare config minimal PGXC$ prepare config minimal my_minimal_config.conf - The following describes each variable in the order you find in the - configuration template. + Given below is the description of the various variables in the order that they + appear in the configuration file. @@ -1431,7 +1458,7 @@ PGXC$ prepare config minimal my_minimal_config.conf - Array to specify Coordinator names. + Array to specify Datanode names. @@ -1594,16 +1621,17 @@ PGXC$ prepare config minimal my_minimal_config.conf + add gtm master name host port dir add gtm slave name host port dir add gtm_proxy name host port dir add coordinator master name host port pooler dir< extraServerConf extraPgHbaConf add coordinator slave name host port pooler dir archDir - add datanode master name host port pooler dir xlogdir restoreDatanode extraServerConf extraPgHbaConf + add datanode master name host port pooler dir xlogdir extraServerConf extraPgHbaConf add datanode slave name host port pooler dir xlogdir archDir Add the specified node to your Postgres-XL cluster. Each node needs a - host name and its work directory. GTM slave, GTM proxy, Coordinator + host name and its work directory. GTM master, GTM slave, GTM proxy, Coordinator master/slave and Datanode master/slave need its own port to listen to. Coordinators and Datanodes also need a pooler port to pool connections to Datanodes. Coordinator and Datanode slaves need a directory to receive @@ -1620,6 +1648,16 @@ PGXC$ prepare config minimal my_minimal_config.conf You cannot add slaves without master. + + Typically, when you start with an empty configuration file, first you will add your + GTM node. Then you will add your first Coordinator master and then the first Datanode master. + When you add a Coordinator master and it is the first Coordinator in the cluster, then it + starts up on its own with empty node metadata. Otherwise the new Coordinator master connects to any + existing Coordinator and gets the existing node metadata of the cluster. + When you add a Datanode master and it is the first Datanode, then it connects + to any existing Coordinator to get the node metadata. Otherwise the Datanode master + connects to any existing Datanode and gets the current metadata from it. + @@ -1789,7 +1827,8 @@ PGXC$ prepare config minimal my_minimal_config.conf - remove gtm slave + remove gtm master [ clean ] + remove gtm slave [ clean ] remove gtm_proxy nodename [ clean ] remove coordinator [ master| slave ] nodename [ clean ] remove datanode [ master| slave ] nodename [ clean ] diff --git a/doc/src/sgml/start.sgml b/doc/src/sgml/start.sgml index cf285f8777..1bd9c47885 100644 --- a/doc/src/sgml/start.sgml +++ b/doc/src/sgml/start.sgml @@ -187,6 +187,604 @@ + + Creating a Postgres-XL cluster + + + As mentioned in the architectural fundamentals, Postgres-XL + is a collection of multiple components. It can be a bit of work to come up with your + initial working setup. In this tutorial, we will show how one can start with + an empty configuration file and use the pgxc_ctl + utility to create your Postgres-XL cluster from scratch. + + + + A few pre-requisites are necessary on each node that is going to be a part of the + Postgres-XL setup. + + + + + Password-less ssh access is required from the node that is going to run the + pgxc_ctl utility. + + + + + + The PATH environment variable should have the correct Postgres-XL + binaries on all nodes, especially while running a command via ssh. + + + + + + The pg_hba.conf entries must be updated to allow remote access. Variables + like and + in the pgxc_ctl.conf configuration file may need appropriate changes. + + + + + + Firewalls and iptables may need to be updated to allow access to ports. + + + + + + + The pgxc_ctl utility should be present in your PATH. If it is + not there, it can be compiled from source. + +$ cd $XLSRC/contrib/pgxc_ctl +$ make install + + + We are now ready to prepare our template configuration file. The pgxc_ctl + utility allows you to create three types of configuration. We will choose the empty + configuration which will allow us to create our Postgres-XL setup from + scratch. Note that we also need to set up the environment + variable properly for all future invocations of pgxc_ctl. + +$ export dataDirRoot=$HOME/DATA/pgxl/nodes +$ mkdir $HOME/pgxc_ctl +$ pgxc_ctl +Installing pgxc_ctl_bash script as /Users/nikhils/pgxc_ctl/pgxc_ctl_bash. +Installing pgxc_ctl_bash script as /Users/nikhils/pgxc_ctl/pgxc_ctl_bash. +Reading configuration using /Users/nikhils/pgxc_ctl/pgxc_ctl_bash --home +/Users/nikhils/pgxc_ctl --configuration +/Users/nikhils/pgxc_ctl/pgxc_ctl.conf +Finished reading configuration. + ******** PGXC_CTL START *************** + + Current directory: /Users/nikhils/pgxc_ctl +PGXC$ prepare config empty +PGXC$ exit + + + The empty configuration file is now ready. You should now make changes + to the pgxc_ctl.conf. At a minimum, + should be set correctly. The configuration file does contain USERi and HOME + environment variables to allow easy defaults for the current user. + + + + The next step is to add the GTM master to the setup. + +$ pgxc_ctl +PGXC$ add gtm master gtm localhost 20001 $dataDirRoot/gtm + + + Use the "monitor" command to check the status of the cluster. + +$ pgxc_ctl +PGXC$ monitor all +Running: gtm master + + + + + + Let us now add a couple of coordinators. When the first coordinator is added, it just +starts up. When another coordinator is added, it connects to any existing coordinator node +to fetch the metadata about objects. + +PGXC$ add coordinator master coord1 localhost 30001 30011 $dataDirRoot/coord_master.1 none none +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +PGXC$ add coordinator master coord2 localhost 30002 30012 $dataDirRoot/coord_master.2 none none +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 + + + + + Moving on to the addition of a couple of datanodes, now. When the first datanode is added, +it connects to any existing coordinator node to fetch global metadata. When a subsequent +datanode is added, it connects to any existing datanode for the metadata. + +PGXC$ add datanode master dn1 localhost 40001 40011 $dataDirRoot/dn_master.1 none none none +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 +Running: datanode master dn1 +PGXC$ add datanode master dn2 localhost 40002 40012 $dataDirRoot/dn_master.2 none none none +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 +Running: datanode master dn1 +Running: datanode master dn2 + + + + + Your Postgres-XL setup is ready now and you can move on to the next + "Getting Started" topic. + + + Read on further, only if you want a quick crash course on the various commands you can + try out with Postgres-XL. It is strongly recommended to go through + the entire documentation for more details on each and every command that we will touch upon + below. + + + + Connect to one of the coordinators and create a test database. + +$ psql -p 30001 postgres +postgres=# CREATE DATABASE testdb; +CREATE DATABASE +postgres=# \q + + +Look at pgxc_node catalog. It should show all the configured nodes. It is normal to have +negative node id values. This will be fixed soon. + +$ psql -p 30001 testdb +testdb=# SELECT * FROM pgxc_node; + node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id +-----------+-----------+-----------+-----------+----------------+------------------+------------- + coord1 | C | 30001 | localhost | f | f | 1885696643 + coord2 | C | 30002 | localhost | f | f | -1197102633 + dn1 | D | 40001 | localhost | t | t | -560021589 + dn2 | D | 40002 | localhost | f | t | 352366662 +(4 rows) + + + +Let us now create a distributed table, distributed on first column by HASH. + + +testdb=# CREATE TABLE disttab(col1 int, col2 int, col3 text) DISTRIBUTE BY HASH(col1); +CREATE TABLE +testdb=# \d+ disttab + Table "public.disttab" + Column | Type | Modifiers | Storage | Stats target | Description +--------+---------+-----------+----------+--------------+------------- + col1 | integer | | plain | | + col2 | integer | | plain | | + col3 | text | | extended | | +Has OIDs: no +Distribute By: HASH(col1) +Location Nodes: ALL DATANODES + + + +Also create a replicated table. + + +testdb=# CREATE TABLE repltab (col1 int, col2 int) DISTRIBUTE BY +REPLICATION; +CREATE TABLE +testdb=# \d+ repltab + Table "public.repltab" + Column | Type | Modifiers | Storage | Stats target | Description +--------+---------+-----------+---------+--------------+------------- + col1 | integer | | plain | | + col2 | integer | | plain | | +Has OIDs: no +Distribute By: REPLICATION +Location Nodes: ALL DATANODES + + + +Now insert some sample data in these tables. + +testdb=# INSERT INTO disttab VALUES (generate_series(1,100), generate_series(101, 200), 'foo'); +INSERT 0 100 +testdb=# INSERT INTO repltab VALUES (generate_series(1,100), generate_series(101, 200)); +INSERT 0 100 + + +Ok. So the distributed table should have 100 rows + + +testdb=# SELECT count(*) FROM disttab; + count +------- + 100 +(1 row) + + + + +And they must not be all on the same node. xc_node_id is a system +column which shows the originating datanode for each row. + +Note that the distribution can be slightly uneven because of the HASH +function + + +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -560021589 | 42 + 352366662 | 58 +(2 rows) + + + +For replicated tables, we expect all rows to come from a single +datanode (even though the other node has a copy too). + + +testdb=# SELECT count(*) FROM repltab; + count +------- + 100 +(1 row) + +testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -560021589 | 100 +(1 row) + + + +Now add a new datanode to the cluster. + + +PGXC$ add datanode master dn3 localhost 40003 40013 $dataDirRoot/dn_master.3 none none none +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 +Running: datanode master dn1 +Running: datanode master dn2 +Running: datanode master dn3 + + + +Note that during cluster reconfiguration, all outstanding transactions +are aborted and sessions are reset. So you would typically see errors +like these on open sessions + + +testdb=# SELECT * FROM pgxc_node; +ERROR: canceling statement due to user request <==== pgxc_pool_reload() resets all sessions and aborts all open transactions + +testdb=# SELECT * FROM pgxc_node; + node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id +-----------+-----------+-----------+-----------+----------------+------------------+------------- + coord1 | C | 30001 | localhost | f | f | 1885696643 + coord2 | C | 30002 | localhost | f | f | -1197102633 + dn1 | D | 40001 | localhost | t | t | -560021589 + dn2 | D | 40002 | localhost | f | t | 352366662 + dn3 | D | 40003 | localhost | f | f | -700122826 +(5 rows) + + +Note that with new datanode addition, Existing tables are not affected. The distribution information now +explicitly shows the older datanodes + +testdb=# \d+ disttab + Table "public.disttab" + Column | Type | Modifiers | Storage | Stats target | Description +--------+---------+-----------+----------+--------------+------------- + col1 | integer | | plain | | + col2 | integer | | plain | | + col3 | text | | extended | | +Has OIDs: no +Distribute By: HASH(col1) +Location Nodes: dn1, dn2 + +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -560021589 | 42 + 352366662 | 58 +(2 rows) + +testdb=# \d+ repltab + Table "public.repltab" + Column | Type | Modifiers | Storage | Stats target | Description +--------+---------+-----------+---------+--------------+------------- + col1 | integer | | plain | | + col2 | integer | | plain | | +Has OIDs: no +Distribute By: REPLICATION +Location Nodes: dn1, dn2 + + +Let us now try to redistribute tables so that they can take advantage +of the new datanode + + +testdb=# ALTER TABLE disttab ADD NODE (dn3); +ALTER TABLE +testdb=# \d+ disttab + Table "public.disttab" + Column | Type | Modifiers | Storage | Stats target | Description +--------+---------+-----------+----------+--------------+------------- + col1 | integer | | plain | | + col2 | integer | | plain | | + col3 | text | | extended | | +Has OIDs: no +Distribute By: HASH(col1) +Location Nodes: ALL DATANODES + +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -700122826 | 32 + 352366662 | 32 + -560021589 | 36 +(3 rows) + + + +Let us now add a third coordinator. + +PGXC$ add coordinator master coord3 localhost 30003 30013 $dataDirRoot/coord_master.3 none none +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 +Running: coordinator master coord3 +Running: datanode master dn1 +Running: datanode master dn2 +Running: datanode master dn3 + +testdb=# SELECT * FROM pgxc_node; +ERROR: canceling statement due to user request +testdb=# SELECT * FROM pgxc_node; + node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id +-----------+-----------+-----------+-----------+----------------+------------------+------------- + coord1 | C | 30001 | localhost | f | f | 1885696643 + coord2 | C | 30002 | localhost | f | f | -1197102633 + dn1 | D | 40001 | localhost | t | t | -560021589 + dn2 | D | 40002 | localhost | f | t | 352366662 + dn3 | D | 40003 | localhost | f | f | -700122826 + coord3 | C | 30003 | localhost | f | f | 1638403545 +(6 rows) + + + +We can try some more ALTER TABLE so as to delete a node from a table +distribution and add it back + + +testdb=# ALTER TABLE disttab DELETE NODE (dn1); +ALTER TABLE +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + 352366662 | 42 + -700122826 | 58 +(2 rows) + +testdb=# ALTER TABLE disttab ADD NODE (dn1); +ALTER TABLE +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -700122826 | 32 + 352366662 | 32 + -560021589 | 36 +(3 rows) + + + +You could also alter a replicated table to make it a distributed table. +Note that even though the cluster has 3 datanodes now, the table will continue +to use only 2 datanodes where the table was originally replicated on. + + +testdb=# ALTER TABLE repltab DISTRIBUTE BY HASH(col1); +ALTER TABLE +testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -560021589 | 42 + 352366662 | 58 +(2 rows) + +testdb=# ALTER TABLE repltab DISTRIBUTE BY REPLICATION; +ALTER TABLE +testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -560021589 | 100 +(1 row) + + +Remove the coordinator added previously now. You can use the "clean" option +to remove the corresponding data directory as well. + + +PGXC$ remove coordinator master coord3 clean +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 +Running: datanode master dn1 +Running: datanode master dn2 +Running: datanode master dn3 + +testdb=# SELECT oid, * FROM pgxc_node; +ERROR: canceling statement due to user request +testdb=# SELECT oid, * FROM pgxc_node; + oid | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id +-------+-----------+-----------+-----------+-----------+----------------+------------------+------------- + 11197 | coord1 | C | 30001 | localhost | f | f | 1885696643 + 16384 | coord2 | C | 30002 | localhost | f | f | -1197102633 + 16385 | dn1 | D | 40001 | localhost | t | t | -560021589 + 16386 | dn2 | D | 40002 | localhost | f | t | 352366662 + 16397 | dn3 | D | 40003 | localhost | f | f | -700122826 +(5 rows) + + + +Let us try to remove a datanode now. NOTE: Postgres-XL does not +employ any additional checks to ascertain if the datanode being dropped has data from tables +that are replicated/distributed. It is the responsibility of the user to ensure that it's +safe to remove a datanode. + +You can use the below query to find out if the datanode being removed has any data on it. +Do note that this only shows tables from the current database. You might want to ensure +the same for all databases before going ahead with the datanode removal. Use the OID of the +datanode that is to be removed in the below query: + + +testdb=# SELECT * FROM pgxc_class WHERE nodeoids::integer[] @> ARRAY[16397]; + pcrelid | pclocatortype | pcattnum | pchashalgorithm | pchashbuckets | nodeoids +---------+---------------+----------+-----------------+---------------+------------------- + 16388 | H | 1 | 1 | 4096 | 16385 16386 16397 +(1 row) + + +testdb=# ALTER TABLE disttab DELETE NODE (dn3); +ALTER TABLE +testdb=# SELECT * FROM pgxc_class WHERE nodeoids::integer[] @> ARRAY[16397]; + pcrelid | pclocatortype | pcattnum | pchashalgorithm | pchashbuckets | nodeoids +---------+---------------+----------+-----------------+---------------+---------- +(0 rows) + + +Ok, it is safe to remove datanode "dn3" now. + +PGXC$ remove datanode master dn3 clean +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 +Running: datanode master dn1 +Running: datanode master dn2 + +testdb=# SELECT oid, * FROM pgxc_node; +ERROR: canceling statement due to user request +testdb=# SELECT oid, * FROM pgxc_node; + oid | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id +-------+-----------+-----------+-----------+-----------+----------------+------------------+------------- + 11197 | coord1 | C | 30001 | localhost | f | f | 1885696643 + 16384 | coord2 | C | 30002 | localhost | f | f | -1197102633 + 16385 | dn1 | D | 40001 | localhost | t | t | -560021589 + 16386 | dn2 | D | 40002 | localhost | f | t | 352366662 +(4 rows) + + + +The pgxc_ctl utility can also help in setting up slaves for +datanodes and coordinators. Let us setup a slave for a datanode and see how failover can +be performed in case the master datanode goes down. + +PGXC$ add datanode slave dn1 localhost 40101 40111 $dataDirRoot/dn_slave.1 none $dataDirRoot/datanode_archlog.1 +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 +Running: datanode master dn1 +Running: datanode slave dn1 +Running: datanode master dn2 + +testdb=# EXECUTE DIRECT ON(dn1) 'SELECT client_hostname, state, sync_state FROM pg_stat_replication'; + client_hostname | state | sync_state +-----------------+-----------+------------ + | streaming | async +(1 row) + + +Add some more rows to test failover now. + + +testdb=# INSERT INTO disttab VALUES (generate_series(1001,1100), generate_series(1101, 1200), 'foo'); +INSERT 0 100 +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -560021589 | 94 + 352366662 | 106 +(2 rows) + + +Let us simulate datanode failover now. We will first stop the datanode master "dn1" for +which we configured a slave above. Note that since the slave is connected to the master +we will use "immediate" mode for stopping it. + +PGXC$ stop -m immediate datanode master dn1 + + +Since a datanode is down, queries will fail. Though a few queries may still work if +the failed node is not required to run the query, and that is determined by the +distribution of the data and the WHERE clause being used. + + +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; +ERROR: Failed to get pooled connections + +testdb=# SELECT xc_node_id, * FROM disttab WHERE col1 = 3; + xc_node_id | col1 | col2 | col3 +------------+------+------+------ + 352366662 | 3 | 103 | foo +(1 row) + + +We will now perform the failover and check that everything is working fine post that. + +PGXC$ failover datanode dn1 + +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; +ERROR: canceling statement due to user request +testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id; + xc_node_id | count +------------+------- + -560021589 | 94 + 352366662 | 106 +(2 rows) + + + +The pgxc_node catalog now should have updated entries. Especially, the +failed over datanode node_host and node_port should have been replaced +with the slave's host and port values. + + +testdb=# SELECT oid, * FROM pgxc_node; + oid | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id +-------+-----------+-----------+-----------+-----------+----------------+------------------+------------- + 11197 | coord1 | C | 30001 | localhost | f | f | 1885696643 + 16384 | coord2 | C | 30002 | localhost | f | f | -1197102633 + 16386 | dn2 | D | 40002 | localhost | f | t | 352366662 + 16385 | dn1 | D | 40101 | localhost | t | t | -560021589 +(4 rows) + +PGXC$ monitor all +Running: gtm master +Running: coordinator master coord1 +Running: coordinator master coord2 +Running: datanode master dn1 +Running: datanode master dn2 + + + + Creating a Database diff --git a/src/backend/pgxc/pool/poolutils.c b/src/backend/pgxc/pool/poolutils.c index fad39cda9e..9dbf9d81cb 100644 --- a/src/backend/pgxc/pool/poolutils.c +++ b/src/backend/pgxc/pool/poolutils.c @@ -90,7 +90,7 @@ pgxc_pool_check(PG_FUNCTION_ARGS) * to remote nodes. This results in losing prepared and temporary objects * in all the sessions of server. All the existing transactions are aborted * and a WARNING message is sent back to client. - * Session that invocated the reload does the same process, but no WARNING + * Session that invoked the reload does the same process, but no WARNING * message is sent back to client. */ Datum