From: Tomas Vondra Date: Sun, 11 Jun 2017 16:00:37 +0000 (+0200) Subject: Add support for MODULO distribution on BIGINT X-Git-Tag: XL_10_R1BETA1~265^2~4 X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=b11c6ff119ffce5174b221deb7e65c2720da6623;p=postgres-xl.git Add support for MODULO distribution on BIGINT Until now BIGINT data type was not supported by MODULO distribution and attempts to create such tables failed. This patch removes the limitation. The compute_modulo() function originally used an optimized algorithm from http://www.graphics.stanford.edu/~seander/bithacks.html (namely the one described in section "Compute modulus division by (1 << s) - 1 in parallel without a division operator") to compute the modulo. But that algorithm version only supported 32-bit values, and so would require changes to support 64-bit values. Instead, I've decided to simply drop that code and use simple % operator, which should translate to IDIV instruction. Judging by benchmarks (MODULO on INTEGER column), switching to plain modulo (%) might result in about 1% slowdown, but it might easily be just noise caused by different binary layout due to code changes. In fact, the simplified algorithm is much less noisy in this respect. --- diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index c45d7e7d14..2ecffe45ac 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -107,68 +107,6 @@ static Expr * pgxc_find_distcol_expr(Index varno, Node *quals); #endif -static const unsigned int xc_mod_m[] = -{ - 0x00000000, 0x55555555, 0x33333333, 0xc71c71c7, - 0x0f0f0f0f, 0xc1f07c1f, 0x3f03f03f, 0xf01fc07f, - 0x00ff00ff, 0x07fc01ff, 0x3ff003ff, 0xffc007ff, - 0xff000fff, 0xfc001fff, 0xf0003fff, 0xc0007fff, - 0x0000ffff, 0x0001ffff, 0x0003ffff, 0x0007ffff, - 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, - 0x00ffffff, 0x01ffffff, 0x03ffffff, 0x07ffffff, - 0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff -}; - -static const unsigned int xc_mod_q[][6] = -{ - { 0, 0, 0, 0, 0, 0}, {16, 8, 4, 2, 1, 1}, {16, 8, 4, 2, 2, 2}, - {15, 6, 3, 3, 3, 3}, {16, 8, 4, 4, 4, 4}, {15, 5, 5, 5, 5, 5}, - {12, 6, 6, 6 , 6, 6}, {14, 7, 7, 7, 7, 7}, {16, 8, 8, 8, 8, 8}, - { 9, 9, 9, 9, 9, 9}, {10, 10, 10, 10, 10, 10}, {11, 11, 11, 11, 11, 11}, - {12, 12, 12, 12, 12, 12}, {13, 13, 13, 13, 13, 13}, {14, 14, 14, 14, 14, 14}, - {15, 15, 15, 15, 15, 15}, {16, 16, 16, 16, 16, 16}, {17, 17, 17, 17, 17, 17}, - {18, 18, 18, 18, 18, 18}, {19, 19, 19, 19, 19, 19}, {20, 20, 20, 20, 20, 20}, - {21, 21, 21, 21, 21, 21}, {22, 22, 22, 22, 22, 22}, {23, 23, 23, 23, 23, 23}, - {24, 24, 24, 24, 24, 24}, {25, 25, 25, 25, 25, 25}, {26, 26, 26, 26, 26, 26}, - {27, 27, 27, 27, 27, 27}, {28, 28, 28, 28, 28, 28}, {29, 29, 29, 29, 29, 29}, - {30, 30, 30, 30, 30, 30}, {31, 31, 31, 31, 31, 31} -}; - -static const unsigned int xc_mod_r[][6] = -{ - {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x0000ffff, 0x000000ff, 0x0000000f, 0x00000003, 0x00000001, 0x00000001}, - {0x0000ffff, 0x000000ff, 0x0000000f, 0x00000003, 0x00000003, 0x00000003}, - {0x00007fff, 0x0000003f, 0x00000007, 0x00000007, 0x00000007, 0x00000007}, - {0x0000ffff, 0x000000ff, 0x0000000f, 0x0000000f, 0x0000000f, 0x0000000f}, - {0x00007fff, 0x0000001f, 0x0000001f, 0x0000001f, 0x0000001f, 0x0000001f}, - {0x00000fff, 0x0000003f, 0x0000003f, 0x0000003f, 0x0000003f, 0x0000003f}, - {0x00003fff, 0x0000007f, 0x0000007f, 0x0000007f, 0x0000007f, 0x0000007f}, - {0x0000ffff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff}, - {0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff}, - {0x000003ff, 0x000003ff, 0x000003ff, 0x000003ff, 0x000003ff, 0x000003ff}, - {0x000007ff, 0x000007ff, 0x000007ff, 0x000007ff, 0x000007ff, 0x000007ff}, - {0x00000fff, 0x00000fff, 0x00000fff, 0x00000fff, 0x00000fff, 0x00000fff}, - {0x00001fff, 0x00001fff, 0x00001fff, 0x00001fff, 0x00001fff, 0x00001fff}, - {0x00003fff, 0x00003fff, 0x00003fff, 0x00003fff, 0x00003fff, 0x00003fff}, - {0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff}, - {0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff}, - {0x0001ffff, 0x0001ffff, 0x0001ffff, 0x0001ffff, 0x0001ffff, 0x0001ffff}, - {0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff}, - {0x0007ffff, 0x0007ffff, 0x0007ffff, 0x0007ffff, 0x0007ffff, 0x0007ffff}, - {0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff}, - {0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff}, - {0x003fffff, 0x003fffff, 0x003fffff, 0x003fffff, 0x003fffff, 0x003fffff}, - {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}, - {0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff}, - {0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}, - {0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff}, - {0x07ffffff, 0x07ffffff, 0x07ffffff, 0x07ffffff, 0x07ffffff, 0x07ffffff}, - {0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff}, - {0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff}, - {0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff}, - {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff} -}; /* * GetPreferredReplicationNode @@ -258,54 +196,13 @@ GetAnyDataNode(Bitmapset *nodes) /* * compute_modulo - * This function performs modulo in an optimized way - * It optimizes modulo of any positive number by - * 1,2,3,4,7,8,15,16,31,32,63,64 and so on - * for the rest of the denominators it uses % operator - * The optimized algos have been taken from - * http://www-graphics.stanford.edu/~seander/bithacks.html + * Computes modulo of two 64-bit unsigned values. */ static int -compute_modulo(unsigned int numerator, unsigned int denominator) +compute_modulo(uint64 numerator, uint64 denominator) { - unsigned int d; - unsigned int m; - unsigned int s; - unsigned int mask; - int k; - unsigned int q, r; - - if (numerator == 0) - return 0; - - /* Check if denominator is a power of 2 */ - if ((denominator & (denominator - 1)) == 0) - return numerator & (denominator - 1); - - /* Check if (denominator+1) is a power of 2 */ - d = denominator + 1; - if ((d & (d - 1)) == 0) - { - /* Which power of 2 is this number */ - s = 0; - mask = 0x01; - for (k = 0; k < 32; k++) - { - if ((d & mask) == mask) - break; - s++; - mask = mask << 1; - } - - m = (numerator & xc_mod_m[s]) + ((numerator >> s) & xc_mod_m[s]); - - for (q = 0, r = 0; m > denominator; q++, r++) - m = (m >> xc_mod_q[s][q]) + (m & xc_mod_r[s][r]); + Assert(denominator > 0); - m = m == denominator ? 0 : m; - - return m; - } return numerator % denominator; } @@ -861,6 +758,8 @@ modulo_value_len(Oid dataType) case RELTIMEOID: case DATEOID: return 4; + case INT8OID: + return 8; default: return -1; } @@ -1430,18 +1329,20 @@ locate_modulo_insert(Locator *self, Datum value, bool isnull, index = 0; else { - unsigned int mod32; + uint64 val; - if (self->valuelen == 4) - mod32 = (unsigned int) (GET_4_BYTES(value)); + if (self->valuelen == 8) + val = (uint64) (GET_8_BYTES(value)); + else if (self->valuelen == 4) + val = (uint64) (GET_4_BYTES(value)); else if (self->valuelen == 2) - mod32 = (unsigned int) (GET_2_BYTES(value)); + val = (uint64) (GET_2_BYTES(value)); else if (self->valuelen == 1) - mod32 = (unsigned int) (GET_1_BYTE(value)); + val = (uint64) (GET_1_BYTE(value)); else - mod32 = 0; + val = 0; - index = compute_modulo(mod32, self->nodeCount); + index = compute_modulo(val, self->nodeCount); } switch (self->listType) { @@ -1506,19 +1407,21 @@ locate_modulo_select(Locator *self, Datum value, bool isnull, } else { - unsigned int mod32; - int index; + uint64 val; + int index; - if (self->valuelen == 4) - mod32 = (unsigned int) (GET_4_BYTES(value)); + if (self->valuelen == 8) + val = (uint64) (GET_8_BYTES(value)); + else if (self->valuelen == 4) + val = (unsigned int) (GET_4_BYTES(value)); else if (self->valuelen == 2) - mod32 = (unsigned int) (GET_2_BYTES(value)); + val = (unsigned int) (GET_2_BYTES(value)); else if (self->valuelen == 1) - mod32 = (unsigned int) (GET_1_BYTE(value)); + val = (unsigned int) (GET_1_BYTE(value)); else - mod32 = 0; + val = 0; - index = compute_modulo(mod32, self->nodeCount); + index = compute_modulo(val, self->nodeCount); switch (self->listType) { diff --git a/src/test/regress/expected/xc_distkey.out b/src/test/regress/expected/xc_distkey.out index 33f284f263..9e456e2d5c 100644 --- a/src/test/regress/expected/xc_distkey.out +++ b/src/test/regress/expected/xc_distkey.out @@ -201,39 +201,41 @@ ERROR: relation "f4_tab" does not exist LINE 1: select * from f4_tab where a = 10.987654::float4; ^ create table i8_tab(a int8) distribute by modulo(a); -ERROR: Column a is not modulo distributable data type insert into i8_tab values(8446744073709551359); -ERROR: relation "i8_tab" does not exist -LINE 1: insert into i8_tab values(8446744073709551359); - ^ insert into i8_tab values(78902); -ERROR: relation "i8_tab" does not exist -LINE 1: insert into i8_tab values(78902); - ^ insert into i8_tab values(NULL); -ERROR: relation "i8_tab" does not exist -LINE 1: insert into i8_tab values(NULL); - ^ select * from i8_tab order by a; -ERROR: relation "i8_tab" does not exist -LINE 1: select * from i8_tab order by a; - ^ + a +--------------------- + 78902 + 8446744073709551359 + +(3 rows) + select * from i8_tab where a = 8446744073709551359::int8; -ERROR: relation "i8_tab" does not exist -LINE 1: select * from i8_tab where a = 8446744073709551359::int8; - ^ + a +--------------------- + 8446744073709551359 +(1 row) + select * from i8_tab where a = 8446744073709551359; -ERROR: relation "i8_tab" does not exist -LINE 1: select * from i8_tab where a = 8446744073709551359; - ^ + a +--------------------- + 8446744073709551359 +(1 row) + select * from i8_tab where a = 78902::int8; -ERROR: relation "i8_tab" does not exist -LINE 1: select * from i8_tab where a = 78902::int8; - ^ + a +------- + 78902 +(1 row) + select * from i8_tab where a = 78902; -ERROR: relation "i8_tab" does not exist -LINE 1: select * from i8_tab where a = 78902; - ^ + a +------- + 78902 +(1 row) + create table i2_tab(a int2) distribute by modulo(a); insert into i2_tab values(123); insert into i2_tab values(456); diff --git a/src/test/regress/expected/xl_distribution_column_types_modulo.out b/src/test/regress/expected/xl_distribution_column_types_modulo.out index ddd3ff5887..ad171f26a6 100755 --- a/src/test/regress/expected/xl_distribution_column_types_modulo.out +++ b/src/test/regress/expected/xl_distribution_column_types_modulo.out @@ -7,9 +7,8 @@ CREATE TABLE xl_dcm ( name text, price numeric ) DISTRIBUTE BY MODULO (product_id); -ERROR: Column product_id is not modulo distributable data type --integer -CREATE TABLE xl_dcm ( +CREATE TABLE xl_dcm0 ( product_no integer, product_id integer PRIMARY KEY, name text, diff --git a/src/test/regress/sql/xl_distribution_column_types_modulo.sql b/src/test/regress/sql/xl_distribution_column_types_modulo.sql index 316a8e4bf6..52c03aad80 100755 --- a/src/test/regress/sql/xl_distribution_column_types_modulo.sql +++ b/src/test/regress/sql/xl_distribution_column_types_modulo.sql @@ -12,7 +12,7 @@ CREATE TABLE xl_dcm ( ) DISTRIBUTE BY MODULO (product_id); --integer -CREATE TABLE xl_dcm ( +CREATE TABLE xl_dcm0 ( product_no integer, product_id integer PRIMARY KEY, name text,