bdr: Initial framework for choosing the correct data encoding based on the client.
authorAndres Freund <[email protected]>
Sun, 16 Mar 2014 21:59:34 +0000 (22:59 +0100)
committerAlvaro Herrera <[email protected]>
Wed, 14 May 2014 18:46:03 +0000 (14:46 -0400)
contrib/bdr/bdr.c
contrib/bdr/bdr.h
contrib/bdr/bdr_compat.c [new file with mode: 0644]
contrib/bdr/bdr_output.c
contrib/bdr/output.mk
contrib/bdr/worker.mk

index b1cd860dab6fe39c0552324d71125d02a3bd3d49..c6e2a4e586f296d94b10fc04a185a3e677153cf1 100644 (file)
 #include "access/xact.h"
 #include "catalog/pg_extension.h"
 #include "catalog/pg_index.h"
+#include "catalog/catversion.h"
 #include "commands/extension.h"
 #include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
 #include "replication/replication_identifier.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
@@ -204,7 +206,7 @@ bdr_apply_main(Datum main_arg)
    Oid         remote_dboid_i;
    char        local_sysid[32];
    char        remote_ident[256];
-   char        query[256];
+   StringInfoData query;
    char        conninfo_repl[MAXCONNINFO + 75];
    XLogRecPtr  last_received = InvalidXLogRecPtr;
    char       *sqlstate;
@@ -213,6 +215,8 @@ bdr_apply_main(Datum main_arg)
    XLogRecPtr  start_from;
    NameData    slot_name;
 
+   initStringInfo(&query);
+
    bdr_apply_con = (BDRWorkerCon *) DatumGetPointer(main_arg);
 
    NameStr(replication_name)[0] = '\0';
@@ -338,14 +342,15 @@ bdr_apply_main(Datum main_arg)
        ForceSyncCommit();
 
        /* acquire remote decoding slot */
-       snprintf(query, sizeof(query), "CREATE_REPLICATION_SLOT \"%s\" LOGICAL %s",
-                NameStr(slot_name), "bdr_output");
-       res = PQexec(streamConn, query);
+       resetStringInfo(&query);
+       appendStringInfo(&query, "CREATE_REPLICATION_SLOT \"%s\" LOGICAL %s",
+                        NameStr(slot_name), "bdr_output");
+       res = PQexec(streamConn, query.data);
 
        if (PQresultStatus(res) != PGRES_TUPLES_OK)
        {
            elog(FATAL, "could not send replication command \"%s\": status %s: %s\n",
-                query, PQresStatus(PQresultStatus(res)), PQresultErrorMessage(res));
+                query.data, PQresStatus(PQresultStatus(res)), PQresultErrorMessage(res));
        }
        PQclear(res);
 
@@ -387,16 +392,32 @@ bdr_apply_main(Datum main_arg)
         replication_identifier,
         (uint32) (start_from >> 32), (uint32) start_from);
 
-   snprintf(query, sizeof(query), "START_REPLICATION SLOT \"%s\" LOGICAL %X/%X",
-      NameStr(slot_name), (uint32) (start_from >> 32), (uint32) start_from);
-   res = PQexec(streamConn, query);
+   resetStringInfo(&query);
+   appendStringInfo(&query, "START_REPLICATION SLOT \"%s\" LOGICAL %X/%X (",
+                    NameStr(slot_name), (uint32) (start_from >> 32),
+                    (uint32) start_from);
+   appendStringInfo(&query, "pg_version '%u'", PG_VERSION_NUM);
+   appendStringInfo(&query, ", pg_catversion '%u'", CATALOG_VERSION_NO);
+   appendStringInfo(&query, ", bdr_version '%u'", BDR_VERSION_NUM);
+   appendStringInfo(&query, ", sizeof_int '%zu'", sizeof(int));
+   appendStringInfo(&query, ", sizeof_long '%zu'", sizeof(long));
+   appendStringInfo(&query, ", sizeof_datum '%zu'", sizeof(Datum));
+   appendStringInfo(&query, ", maxalign '%d'", MAXIMUM_ALIGNOF);
+   appendStringInfo(&query, ", float4_byval '%d'", bdr_get_float4byval());
+   appendStringInfo(&query, ", float8_byval '%d'", bdr_get_float8byval());
+   appendStringInfo(&query, ", integer_datetimes '%d'", bdr_get_integer_timestamps());
+   appendStringInfo(&query, ", bigendian '%d'", bdr_get_bigendian());
+   appendStringInfo(&query, ", db_encoding '%s'", GetDatabaseEncodingName());
+
+   appendStringInfoChar(&query, ')');
+   res = PQexec(streamConn, query.data);
 
    sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
 
    if (PQresultStatus(res) != PGRES_COPY_BOTH)
    {
        elog(FATAL, "could not send replication command \"%s\": %s\n, sqlstate: %s",
-            query, PQresultErrorMessage(res), sqlstate);
+            query.data, PQresultErrorMessage(res), sqlstate);
    }
    PQclear(res);
 
index 5103b250471394e3ecf5de39eaa6f7f94a04ab42..0b836577502f79ef93e8fe274eeccb1ccc2f4483 100644 (file)
@@ -14,6 +14,7 @@
 #include "access/xlogdefs.h"
 #include "utils/resowner.h"
 
+#define BDR_VERSION_NUM 500
 
 typedef struct BDRWorkerCon
 {
@@ -84,5 +85,11 @@ extern void bdr_count_delete(void);
 extern void bdr_count_delete_conflict(void);
 extern void bdr_count_disconnect(void);
 
+/* compat check functions */
+extern bool bdr_get_float4byval(void);
+extern bool bdr_get_float8byval(void);
+extern bool bdr_get_integer_timestamps(void);
+extern bool bdr_get_bigendian(void);
+
 
 #endif /* BDR_H */
diff --git a/contrib/bdr/bdr_compat.c b/contrib/bdr/bdr_compat.c
new file mode 100644 (file)
index 0000000..7fe79f8
--- /dev/null
@@ -0,0 +1,55 @@
+/* -------------------------------------------------------------------------
+ *
+ * bdr_compat.c
+ *     Checks for cross version/arch/settings compatibility
+ *
+ * Copyright (C) 2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *     contrib/bdr/bdr_compat.c
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "bdr.h"
+
+bool
+bdr_get_float4byval(void)
+{
+#ifdef USE_FLOAT4_BYVAL
+   return true;
+#else
+   return false;
+#endif
+}
+
+bool
+bdr_get_float8byval(void)
+{
+#ifdef USE_FLOAT8_BYVAL
+   return true;
+#else
+   return false;
+#endif
+}
+
+bool
+bdr_get_integer_timestamps(void)
+{
+#ifdef USE_INTEGER_DATETIMES
+   return true;
+#else
+   return false;
+#endif
+}
+
+bool
+bdr_get_bigendian(void)
+{
+#ifdef WORDS_BIGENDIAN
+   return true;
+#else
+   return false;
+#endif
+}
index 7360ef24e5da3721dac0a79cc050cfff58e3df53..ab40ed757c5701148271ca78845b1dda894bf7f7 100644 (file)
@@ -15,6 +15,8 @@
 #include "access/sysattr.h"
 #include "access/tuptoaster.h"
 
+#include "bdr.h"
+
 #include "catalog/pg_class.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_type.h"
 
 #include "libpq/pqformat.h"
 
+#include "mb/pg_wchar.h"
+
 #include "nodes/parsenodes.h"
 
 #include "replication/output_plugin.h"
 #include "replication/logical.h"
 
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
@@ -43,8 +48,24 @@ extern void      _PG_output_plugin_init(OutputPluginCallbacks *cb);
 typedef struct
 {
    MemoryContext context;
-   bool        include_xids;
-} TestDecodingData;
+
+   bool allow_binary_protocol;
+   bool allow_sendrecv_protocol;
+   bool int_datetime_mismatch;
+
+   uint32 client_pg_version;
+   uint32 client_pg_catversion;
+   uint32 client_bdr_version;
+   size_t client_sizeof_int;
+   size_t client_sizeof_long;
+   size_t client_sizeof_datum;
+   size_t client_maxalign;
+   bool client_bigendian;
+   bool client_float4_byval;
+   bool client_float8_byval;
+   bool client_int_datetime;
+   char *client_db_encoding;
+} BdrOutputData;
 
 /* These must be available to pg_dlsym() */
 static void pg_decode_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt,
@@ -59,7 +80,8 @@ static void pg_decode_change(LogicalDecodingContext *ctx,
 
 /* private prototypes */
 static void write_rel(StringInfo out, Relation rel);
-static void write_tuple(StringInfo out, Relation rel, HeapTuple tuple);
+static void write_tuple(BdrOutputData *data, StringInfo out, Relation rel,
+                       HeapTuple tuple);
 
 void
 _PG_init(void)
@@ -80,13 +102,60 @@ _PG_output_plugin_init(OutputPluginCallbacks *cb)
 }
 
 
+static void
+bdr_parse_uint32(DefElem *elem, uint32 *res)
+{
+   errno = 0;
+   *res = strtoul(strVal(elem->arg), NULL, 0);
+
+   if (errno != 0)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("could not parse uint32 value \"%s\" for parameter \"%s\": %m",
+                       strVal(elem->arg), elem->defname)));
+}
+
+static void
+bdr_parse_size_t(DefElem *elem, size_t *res)
+{
+   errno = 0;
+   *res = strtoull(strVal(elem->arg), NULL, 0);
+
+   if (errno != 0)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("could not parse size_t value \"%s\" for parameter \"%s\": %m",
+                       strVal(elem->arg), elem->defname)));
+}
+
+static void
+bdr_parse_bool(DefElem *elem, bool *res)
+{
+   if (!parse_bool(strVal(elem->arg), res))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("could not parse boolean value \"%s\" for parameter \"%s\": %m",
+                       strVal(elem->arg), elem->defname)));
+}
+
+static void
+bdr_req_param(const char *param)
+{
+   ereport(ERROR,
+           (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+            errmsg("missing value for for parameter \"%s\"",
+                   param)));
+}
+
+
 /* initialize this plugin */
 static void
 pg_decode_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt, bool is_init)
 {
-   TestDecodingData *data;
+   ListCell   *option;
+   BdrOutputData *data;
 
-   data = palloc(sizeof(TestDecodingData));
+   data = palloc0(sizeof(BdrOutputData));
    data->context = AllocSetContextCreate(TopMemoryContext,
                                          "bdr conversion context",
                                          ALLOCSET_DEFAULT_MINSIZE,
@@ -96,6 +165,130 @@ pg_decode_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt, bool i
    ctx->output_plugin_private = data;
 
    opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT;
+
+   /* parse options passed in by the client */
+
+   foreach(option, ctx->output_plugin_options)
+   {
+       DefElem    *elem = lfirst(option);
+
+       Assert(elem->arg == NULL || IsA(elem->arg, String));
+
+       if (strcmp(elem->defname, "pg_version") == 0)
+           bdr_parse_uint32(elem, &data->client_pg_version);
+       else if (strcmp(elem->defname, "pg_catversion") == 0)
+           bdr_parse_uint32(elem, &data->client_pg_catversion);
+       else if (strcmp(elem->defname, "bdr_version") == 0)
+           bdr_parse_uint32(elem, &data->client_bdr_version);
+       else if (strcmp(elem->defname, "sizeof_int") == 0)
+           bdr_parse_size_t(elem, &data->client_sizeof_int);
+       else if (strcmp(elem->defname, "sizeof_long") == 0)
+           bdr_parse_size_t(elem, &data->client_sizeof_long);
+       else if (strcmp(elem->defname, "sizeof_datum") == 0)
+           bdr_parse_size_t(elem, &data->client_sizeof_datum);
+       else if (strcmp(elem->defname, "maxalign") == 0)
+           bdr_parse_size_t(elem, &data->client_maxalign);
+       else if (strcmp(elem->defname, "bigendian") == 0)
+           bdr_parse_bool(elem, &data->client_bigendian);
+       else if (strcmp(elem->defname, "float4_byval") == 0)
+           bdr_parse_bool(elem, &data->client_float4_byval);
+       else if (strcmp(elem->defname, "float8_byval") == 0)
+           bdr_parse_bool(elem, &data->client_float8_byval);
+       else if (strcmp(elem->defname, "integer_datetimes") == 0)
+           bdr_parse_bool(elem, &data->client_int_datetime);
+       else if (strcmp(elem->defname, "db_encoding") == 0)
+           data->client_db_encoding = pstrdup(strVal(elem->arg));
+       else
+       {
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                    errmsg("option \"%s\" = \"%s\" is unknown",
+                           elem->defname,
+                           elem->arg ? strVal(elem->arg) : "(null)")));
+       }
+   }
+
+   /* no options are passed in during initialization, so don't complain there */
+   if (!is_init)
+   {
+       if (data->client_pg_version == 0)
+           bdr_req_param("pg_version");
+       if (data->client_pg_catversion == 0)
+           bdr_req_param("pg_catversion");
+       if (data->client_bdr_version == 0)
+           bdr_req_param("bdr_version");
+       if (data->client_sizeof_int == 0)
+           bdr_req_param("sizeof_int");
+       if (data->client_sizeof_long == 0)
+           bdr_req_param("sizeof_long");
+       if (data->client_sizeof_datum == 0)
+           bdr_req_param("sizeof_datum");
+       if (data->client_maxalign == 0)
+           bdr_req_param("maxalign");
+       /* XXX: can't check for boolean values this way */
+       if (data->client_db_encoding == NULL)
+           bdr_req_param("db_encoding");
+
+       /* check incompatibilities we cannot work around */
+       if (strcmp(data->client_db_encoding, GetDatabaseEncodingName()) != 0)
+           elog(ERROR, "mismatching encodings are not yet supported");
+
+       if (data->client_bdr_version != BDR_VERSION_NUM)
+           elog(ERROR, "bdr versions currently have to match on both sides");
+
+       data->allow_binary_protocol = true;
+       data->allow_sendrecv_protocol = true;
+
+       /*
+        * Now use the passed in information to determine how to encode the
+        * data sent by the output plugin. We don't make datatype specific
+        * decisions here, just generic decisions about using binary and/or
+        * send/recv protocols.
+        */
+
+       /*
+        * Don't use the binary protocol if there are fundamental arch
+        * differences.
+        */
+       if (data->client_sizeof_int != sizeof(int) ||
+           data->client_sizeof_long != sizeof(long) ||
+           data->client_sizeof_datum != sizeof(Datum))
+       {
+           data->allow_binary_protocol = false;
+           elog(LOG, "disabling binary protocol because of sizeof differences");
+       }
+       else if (data->client_bigendian != bdr_get_bigendian())
+       {
+           data->allow_binary_protocol = false;
+           elog(LOG, "disabling binary protocol because of endianess difference");
+       }
+
+       /*
+        * We also can't use the binary protocol if there are critical
+        * differences in compile time settings.
+        */
+       if (data->client_float4_byval != bdr_get_float4byval() ||
+           data->client_float8_byval != bdr_get_float8byval())
+           data->allow_binary_protocol = false;
+
+       if (data->client_int_datetime != bdr_get_integer_timestamps())
+           data->int_datetime_mismatch = true;
+       else
+           data->int_datetime_mismatch = false;
+
+
+       /*
+        * Don't use the send/recv protocol if there are version
+        * differences. There currently isn't any guarantee for cross version
+        * compatibility of the send/recv representations. But there actually
+        * *is* a compat. guarantee for architecture differences...
+        *
+        * XXX: We could easily do better by doing per datatype considerations
+        * if there are known incompatibilities.
+        */
+       if (data->client_pg_version / 100 == PG_VERSION_NUM / 100)
+           data->allow_sendrecv_protocol = false;
+   }
 }
 
 /* BEGIN callback */
@@ -103,7 +296,7 @@ void
 pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
 {
 #ifdef NOT_YET
-   TestDecodingData *data = ctx->output_plugin_private;
+   BdrOutputData *data = ctx->output_plugin_private;
 #endif
    AssertVariableIsOfType(&pg_decode_begin_txn, LogicalDecodeBeginCB);
 
@@ -124,7 +317,7 @@ pg_decode_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
                     XLogRecPtr commit_lsn)
 {
 #ifdef NOT_YET
-   TestDecodingData *data = ctx->output_plugin_private;
+   BdrOutputData *data = ctx->output_plugin_private;
 #endif
 
    if (txn->origin_id != InvalidRepNodeId)
@@ -142,7 +335,7 @@ void
 pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
                 Relation relation, ReorderBufferChange *change)
 {
-   TestDecodingData *data;
+   BdrOutputData *data;
    MemoryContext old;
 
    data = ctx->output_plugin_private;
@@ -162,7 +355,7 @@ pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
            appendStringInfoChar(ctx->out, 'I');        /* action INSERT */
            write_rel(ctx->out, relation);
            appendStringInfoChar(ctx->out, 'N');        /* new tuple follows */
-           write_tuple(ctx->out, relation, &change->data.tp.newtuple->tuple);
+           write_tuple(data, ctx->out, relation, &change->data.tp.newtuple->tuple);
            break;
        case REORDER_BUFFER_CHANGE_UPDATE:
            appendStringInfoChar(ctx->out, 'U');        /* action UPDATE */
@@ -170,11 +363,11 @@ pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
            if (change->data.tp.oldtuple != NULL)
            {
                appendStringInfoChar(ctx->out, 'K');    /* old key follows */
-               write_tuple(ctx->out, relation,
+               write_tuple(data, ctx->out, relation,
                            &change->data.tp.oldtuple->tuple);
            }
            appendStringInfoChar(ctx->out, 'N');        /* new tuple follows */
-           write_tuple(ctx->out, relation,
+           write_tuple(data, ctx->out, relation,
                        &change->data.tp.newtuple->tuple);
            break;
        case REORDER_BUFFER_CHANGE_DELETE:
@@ -183,7 +376,7 @@ pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
            if (change->data.tp.oldtuple != NULL)
            {
                appendStringInfoChar(ctx->out, 'K');    /* old key follows */
-               write_tuple(ctx->out, relation,
+               write_tuple(data, ctx->out, relation,
                            &change->data.tp.oldtuple->tuple);
            }
            else
@@ -225,11 +418,42 @@ write_rel(StringInfo out, Relation rel)
    appendBinaryStringInfo(out, relname, relnamelen);
 }
 
+/*
+ * Make the executive decision about which protocol to use.
+ */
+static void
+decide_datum_transfer(BdrOutputData *data,
+                     Form_pg_attribute att, Form_pg_type typclass,
+                     bool *use_binary, bool *use_sendrecv)
+{
+   /* builtin type */
+   if (data->int_datetime_mismatch &&
+       (att->atttypid == TIMESTAMPOID || att->atttypid == TIMESTAMPTZOID ||
+        att->atttypid == TIMEOID))
+   {
+       *use_binary = false;
+       *use_sendrecv = false;
+   }
+   else if (data->allow_binary_protocol &&
+       typclass->typtype == 'b' &&
+       att->atttypid < FirstNormalObjectId &&
+       typclass->typelem == InvalidOid)
+   {
+       *use_binary = true;
+   }
+   else if (data->allow_sendrecv_protocol &&
+            OidIsValid(typclass->typreceive))
+   {
+       *use_sendrecv = true;
+   }
+}
+
 /*
  * Write a tuple to the outputstream, in the most efficient format possible.
  */
 static void
-write_tuple(StringInfo out, Relation rel, HeapTuple tuple)
+write_tuple(BdrOutputData *data, StringInfo out, Relation rel,
+           HeapTuple tuple)
 {
    TupleDesc   desc;
    Datum       values[MaxTupleAttributeNumber];
@@ -279,13 +503,7 @@ write_tuple(StringInfo out, Relation rel, HeapTuple tuple)
            elog(ERROR, "cache lookup failed for type %u", att->atttypid);
        typclass = (Form_pg_type) GETSTRUCT(typtup);
 
-       /* builtin type */
-       if (typclass->typtype == 'b' &&
-           att->atttypid < FirstNormalObjectId &&
-           typclass->typelem == InvalidOid)
-           use_binary = true;
-       else if (OidIsValid(typclass->typreceive))
-           use_sendrecv = true;
+       decide_datum_transfer(data, att, typclass, &use_binary, &use_sendrecv);
 
        if (use_binary)
        {
index 5a33339c07b31c77f79c7b107be68de3c3df144e..b5a6fb6271728068e0b095948102ca5ca6938b20 100644 (file)
@@ -1,7 +1,7 @@
 # contrib/bdr/output.mk
 
 MODULE_big = bdr_output
-OBJS = bdr_output.o
+OBJS = bdr_output.o bdr_compat.o
 
 PG_CPPFLAGS = -I$(libpq_srcdir)
 SHLIB_LINK = $(libpq)
index 7520b42caa1fd2efa4a133c44e680e8e6495174b..4020123fa5146e30c3f230816d34c4da490da695 100644 (file)
@@ -1,7 +1,7 @@
 # contrib/bdr/worker.mk
 
 MODULE_big = bdr
-OBJS = bdr.o bdr_apply.o bdr_count.o bdr_seq.o
+OBJS = bdr.o bdr_apply.o bdr_count.o bdr_seq.o bdr_compat.o
 
 EXTENSION = bdr
 DATA = bdr--0.5.sql