From: Peter Geoghegan Date: Wed, 19 Mar 2014 02:55:29 +0000 (-0700) Subject: Failing GIN type confusion test X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=264db561217bd75a4461f23f7fd6e1eb7334002a;p=users%2Fandresfreund%2Fpostgres.git Failing GIN type confusion test Currently, the default GIN opclass thinks that the exists operator can be used to find Jsonb datums with JSON arrays that have only non-string elements, on the basis of the textual storage format of those non-string elements accidentally matching (i.e. being bitwise equal) to what would be expected for an actual string element. This is bogus, in that it does not comport with the general definition of Jsonb existence (that is, the behavior of the ?/exists operator, and the numerous slight variants of same). The likely fix here is to represent JsonbValue type directly in the first byte of the serialized text representation of the opclass (i.e. in addition to keylement and value flags, there'd be a JsonbValue type flag), but just commit this test for now. --- diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c index 852c55c360..57b4cd0b57 100644 --- a/src/backend/utils/adt/jsonb_gin.c +++ b/src/backend/utils/adt/jsonb_gin.c @@ -27,7 +27,7 @@ typedef struct PathHashStack } PathHashStack; static text *make_text_key(const char *str, int len, char flag); -static text *make_scalar_text_key(const JsonbValue * v, char flag); +static text *make_scalar_key(const JsonbValue * v, char flag); /* * @@ -93,22 +93,30 @@ gin_extract_jsonb(PG_FUNCTION_ARGS) /* * Serialize keys and elements as one. Array elements are indexed as * keys, for the benefit of JsonbContainsStrategyNumber (i.e. so that - * the structure of the index comports with the general Jsonb notion of - * containment). + * the structure of the index comports with the general Jsonb + * definition of containment). * * See remarks above findJsonbValueFromSuperHeader() for information on * our definition of containment as it relates to elements and - * key/value pairs. Note also that the recheck flag is set for - * JsonbContainsStrategyNumber. + * key/value pairs. We do not treat "raw scalar" pseudo arrays + * specially here, which is consistent with that definition. Note also + * that the recheck flag is set for JsonbContainsStrategyNumber. */ switch (r) { + /* + * FIXME: Figure out a way of making checking the existence of a + * key or text element work, without spuriously returning on + * non-string elements just because they happen to have the same + * textual representation. We can't just avoid storing non-string + * elements, because we need those for testing containment. + */ case WJB_KEY: case WJB_ELEM: - entries[i++] = PointerGetDatum(make_scalar_text_key(&v, KEYELEMFLAG)); + entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM)); break; case WJB_VALUE: - entries[i++] = PointerGetDatum(make_scalar_text_key(&v, VALFLAG)); + entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL)); break; default: break; @@ -147,7 +155,7 @@ gin_extract_jsonb_query(PG_FUNCTION_ARGS) *nentries = 1; entries = (Datum *) palloc(sizeof(Datum)); item = make_text_key(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query), - KEYELEMFLAG); + JKEYELEM); entries[0] = PointerGetDatum(item); } else if (strategy == JsonbExistsAnyStrategyNumber || @@ -174,7 +182,7 @@ gin_extract_jsonb_query(PG_FUNCTION_ARGS) continue; item = make_text_key(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ, - KEYELEMFLAG); + JKEYELEM); entries[j++] = PointerGetDatum(item); } @@ -403,9 +411,9 @@ gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS) bool has_maybe = false; /* - * All extracted keys must be present. Combination of GIN_MAYBE and - * GIN_TRUE gives GIN_MAYBE result because all keys may be present in - * this situation. + * All extracted keys must be present. A combination of GIN_MAYBE and + * GIN_TRUE induces a GIN_MAYBE result, because all keys may be present + * in this situation. */ for (i = 0; i < nkeys; i++) { @@ -422,8 +430,8 @@ gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS) } /* * Index doesn't have information about correspondence of Jsonb keys - * and values (as distinct from GIN keys, which both are stored as). So - * invariably we recheck. In thiconsistent function we reflect it as + * and values (as distinct from GIN keys, which both are stored as), so + * invariably we recheck. In the consistent function we reflect it as * GIN_MAYBE in response to no GIN_MAYBE at input. */ if (!has_maybe && res == GIN_TRUE) @@ -571,7 +579,7 @@ make_text_key(const char *str, int len, char flag) * Create a textual representation of a jsonbValue for GIN storage. */ static text * -make_scalar_text_key(const JsonbValue * v, char flag) +make_scalar_key(const JsonbValue * v, char flag) { text *item; char *cstr; @@ -579,7 +587,7 @@ make_scalar_text_key(const JsonbValue * v, char flag) switch (v->type) { case jbvNull: - item = make_text_key(NULL, 0, NULLFLAG); + item = make_text_key(NULL, 0, JNULL); break; case jbvBool: item = make_text_key(v->boolean ? "t" : "f", 1, flag); diff --git a/src/backend/utils/adt/jsonb_op.c b/src/backend/utils/adt/jsonb_op.c index 88bebb2658..6ad42c7b35 100644 --- a/src/backend/utils/adt/jsonb_op.c +++ b/src/backend/utils/adt/jsonb_op.c @@ -25,6 +25,11 @@ jsonb_exists(PG_FUNCTION_ARGS) JsonbValue kval; + /* + * We only match Object keys (which are naturally always Strings), or + * string elements in arrays. In particular, we do not match non-string + * scalar elements. + */ kval.type = jbvString; kval.string.val = VARDATA_ANY(key); kval.string.len = VARSIZE_ANY_EXHDR(key); diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h index e2aa6e4ad7..bc99b985d3 100644 --- a/src/include/utils/jsonb.h +++ b/src/include/utils/jsonb.h @@ -81,9 +81,9 @@ * keys (or only values), but for now it's always keys and values together, or * just array elements. */ -#define KEYELEMFLAG 'K' -#define VALFLAG 'V' -#define NULLFLAG 'N' +#define JKEYELEM 'K' +#define JVAL 'V' +#define JNULL 'N' #define JsonbContainsStrategyNumber 7 #define JsonbExistsStrategyNumber 9 diff --git a/src/test/regress/data/jsonb.data b/src/test/regress/data/jsonb.data index 90bdc6cf6b..1352ebe3ac 100644 --- a/src/test/regress/data/jsonb.data +++ b/src/test/regress/data/jsonb.data @@ -988,6 +988,7 @@ {"world":"CAB", "org":21, "indexed":true, "line":988, "abstract":"ABC"} {"title":"CBC", "status":66, "line":989} {} +{"array":[5]} {"array":["foo", "bar", "baz"]} {"array":["bar", "baz", "foo"]} {"array":["bar", "baz"]} diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index b425307755..d96459750c 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -779,6 +779,21 @@ SELECT count(*) from testjsonb WHERE j->'array' ? 'bar'; 3 (1 row) +-- type sensitive array exists - should return no rows (since "exists" only +-- matches strings that are either object keys or array elements) +SELECT count(*) from testjsonb WHERE j->'array' ? '5'::text; + count +------- + 0 +(1 row) + +-- However, a raw scalar is *contained* within the array +SELECT count(*) from testjsonb WHERE j->'array' @> '5'::jsonb; + count +------- + 1 +(1 row) + SELECT jsonb_exists_any('{"a":null, "b":"qq"}', ARRAY['a','b']); jsonb_exists_any ------------------ @@ -1468,7 +1483,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"array":["bar"]}'; SELECT count(*) FROM testjsonb WHERE j @> '{}'; count ------- - 1008 + 1009 (1 row) SELECT count(*) FROM testjsonb WHERE j ? 'public'; @@ -1497,11 +1512,26 @@ SELECT count(*) from testjsonb WHERE j->'array' ? 'bar'; 3 (1 row) +-- type sensitive array exists - should return no rows (since "exists" only +-- matches strings that are either object keys or array elements) +SELECT count(*) from testjsonb WHERE j->'array' ? '5'::text; + count +------- + 0 +(1 row) + +-- However, a raw scalar is *contained* within the array +SELECT count(*) from testjsonb WHERE j->'array' @> '5'::jsonb; + count +------- + 1 +(1 row) + RESET enable_seqscan; SELECT count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow; count ------- - 4787 + 4788 (1 row) SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GROUP BY key ORDER BY count DESC, key; @@ -1529,7 +1559,7 @@ SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GRO subtitle | 169 auth | 168 abstract | 161 - array | 4 + array | 5 age | 2 (24 rows) @@ -1537,14 +1567,14 @@ SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GRO SELECT count(distinct j) FROM testjsonb; count ------- - 890 + 891 (1 row) SET enable_hashagg = off; SELECT count(*) FROM (SELECT j FROM (SELECT * FROM testjsonb UNION ALL SELECT * FROM testjsonb) js GROUP BY j) js2; count ------- - 890 + 891 (1 row) SET enable_hashagg = on; @@ -1552,7 +1582,7 @@ SET enable_sort = off; SELECT count(*) FROM (SELECT j FROM (SELECT * FROM testjsonb UNION ALL SELECT * FROM testjsonb) js GROUP BY j) js2; count ------- - 890 + 891 (1 row) SELECT distinct * FROM (values (jsonb '{}' || ''),('{}')) v(j); @@ -1619,7 +1649,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"age":25.0}'; SELECT count(*) FROM testjsonb WHERE j @> '{}'; count ------- - 1008 + 1009 (1 row) RESET enable_seqscan; diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out index 65b68a5a80..d8bfec5d57 100644 --- a/src/test/regress/expected/jsonb_1.out +++ b/src/test/regress/expected/jsonb_1.out @@ -779,6 +779,21 @@ SELECT count(*) from testjsonb WHERE j->'array' ? 'bar'; 3 (1 row) +-- type sensitive array exists - should return no rows (since "exists" only +-- matches strings that are either object keys or array elements) +SELECT count(*) from testjsonb WHERE j->'array' ? '5'::text; + count +------- + 0 +(1 row) + +-- However, a raw scalar is *contained* within the array +SELECT count(*) from testjsonb WHERE j->'array' @> '5'::jsonb; + count +------- + 1 +(1 row) + SELECT jsonb_exists_any('{"a":null, "b":"qq"}', ARRAY['a','b']); jsonb_exists_any ------------------ @@ -1468,7 +1483,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"array":["bar"]}'; SELECT count(*) FROM testjsonb WHERE j @> '{}'; count ------- - 1008 + 1009 (1 row) SELECT count(*) FROM testjsonb WHERE j ? 'public'; @@ -1497,11 +1512,26 @@ SELECT count(*) from testjsonb WHERE j->'array' ? 'bar'; 3 (1 row) +-- type sensitive array exists - should return no rows (since "exists" only +-- matches strings that are either object keys or array elements) +SELECT count(*) from testjsonb WHERE j->'array' ? '5'::text; + count +------- + 0 +(1 row) + +-- However, a raw scalar is *contained* within the array +SELECT count(*) from testjsonb WHERE j->'array' @> '5'::jsonb; + count +------- + 1 +(1 row) + RESET enable_seqscan; SELECT count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow; count ------- - 4787 + 4788 (1 row) SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GROUP BY key ORDER BY count DESC, key; @@ -1529,7 +1559,7 @@ SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GRO subtitle | 169 auth | 168 abstract | 161 - array | 4 + array | 5 age | 2 (24 rows) @@ -1537,14 +1567,14 @@ SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GRO SELECT count(distinct j) FROM testjsonb; count ------- - 890 + 891 (1 row) SET enable_hashagg = off; SELECT count(*) FROM (SELECT j FROM (SELECT * FROM testjsonb UNION ALL SELECT * FROM testjsonb) js GROUP BY j) js2; count ------- - 890 + 891 (1 row) SET enable_hashagg = on; @@ -1552,7 +1582,7 @@ SET enable_sort = off; SELECT count(*) FROM (SELECT j FROM (SELECT * FROM testjsonb UNION ALL SELECT * FROM testjsonb) js GROUP BY j) js2; count ------- - 890 + 891 (1 row) SELECT distinct * FROM (values (jsonb '{}' || ''),('{}')) v(j); @@ -1619,7 +1649,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"age":25.0}'; SELECT count(*) FROM testjsonb WHERE j @> '{}'; count ------- - 1008 + 1009 (1 row) RESET enable_seqscan; diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 847aa7a78e..8cd4b3b102 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -177,6 +177,11 @@ SELECT jsonb '{"a":null, "b":"qq"}' ? 'c'; SELECT jsonb '{"a":"null", "b":"qq"}' ? 'a'; -- array exists - array elements should behave as keys SELECT count(*) from testjsonb WHERE j->'array' ? 'bar'; +-- type sensitive array exists - should return no rows (since "exists" only +-- matches strings that are either object keys or array elements) +SELECT count(*) from testjsonb WHERE j->'array' ? '5'::text; +-- However, a raw scalar is *contained* within the array +SELECT count(*) from testjsonb WHERE j->'array' @> '5'::jsonb; SELECT jsonb_exists_any('{"a":null, "b":"qq"}', ARRAY['a','b']); SELECT jsonb_exists_any('{"a":null, "b":"qq"}', ARRAY['b','a']); @@ -335,6 +340,11 @@ SELECT count(*) FROM testjsonb WHERE j ?& ARRAY['public','disabled']; -- array exists - array elements should behave as keys (for GIN index scans too) CREATE INDEX jidx_array ON testjsonb USING gin((j->'array')); SELECT count(*) from testjsonb WHERE j->'array' ? 'bar'; +-- type sensitive array exists - should return no rows (since "exists" only +-- matches strings that are either object keys or array elements) +SELECT count(*) from testjsonb WHERE j->'array' ? '5'::text; +-- However, a raw scalar is *contained* within the array +SELECT count(*) from testjsonb WHERE j->'array' @> '5'::jsonb; RESET enable_seqscan;