From 1e493158d3d25771ed066028c00cbbdb41573496 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 10 Dec 2025 11:55:59 -0800 Subject: [PATCH] Remove char_tolower() API. It's only useful for an ILIKE optimization for the libc provider using a single-byte encoding and a non-C locale, but it creates significant internal complexity. Reviewed-by: Chao Li Reviewed-by: Peter Eisentraut Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com --- src/backend/utils/adt/like.c | 46 ++++++++++---------------- src/backend/utils/adt/like_match.c | 18 ++++++---- src/backend/utils/adt/pg_locale.c | 26 --------------- src/backend/utils/adt/pg_locale_libc.c | 10 ------ src/include/utils/pg_locale.h | 9 ----- 5 files changed, 28 insertions(+), 81 deletions(-) diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 4216ac17f43..28980264307 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -43,8 +43,8 @@ static text *MB_do_like_escape(text *pat, text *esc); static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale); -static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, - pg_locale_t locale); +static int C_IMatchText(const char *t, int tlen, const char *p, int plen, + pg_locale_t locale); static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation); static int Generic_Text_IC_like(text *str, text *pat, Oid collation); @@ -84,22 +84,10 @@ wchareq(const char *p1, const char *p2) * of getting a single character transformed to the system's wchar_t format. * So now, we just downcase the strings using lower() and apply regular LIKE * comparison. This should be revisited when we install better locale support. - */ - -/* - * We do handle case-insensitive matching for single-byte encodings using + * + * We do handle case-insensitive matching for the C locale using * fold-on-the-fly processing, however. */ -static char -SB_lower_char(unsigned char c, pg_locale_t locale) -{ - if (locale->ctype_is_c) - return pg_ascii_tolower(c); - else if (locale->is_default) - return pg_tolower(c); - else - return char_tolower(c, locale); -} #define NextByte(p, plen) ((p)++, (plen)--) @@ -130,10 +118,10 @@ SB_lower_char(unsigned char c, pg_locale_t locale) #include "like_match.c" -/* setup to compile like_match.c for single byte case insensitive matches */ -#define MATCH_LOWER(t, locale) SB_lower_char((unsigned char) (t), locale) +/* setup to compile like_match.c for case-insensitive matches in C locale */ +#define MATCH_LOWER #define NextChar(p, plen) NextByte((p), (plen)) -#define MatchText SB_IMatchText +#define MatchText C_IMatchText #include "like_match.c" @@ -202,22 +190,19 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) errmsg("nondeterministic collations are not supported for ILIKE"))); /* - * For efficiency reasons, in the single byte case we don't call lower() - * on the pattern and text, but instead call SB_lower_char on each - * character. In the multi-byte case we don't have much choice :-(. Also, - * ICU does not support single-character case folding, so we go the long - * way. + * For efficiency reasons, in the C locale we don't call lower() on the + * pattern and text, but instead lowercase each character lazily. + * + * XXX: use casefolding instead? */ - if (locale->ctype_is_c || - (char_tolower_enabled(locale) && - pg_database_encoding_max_length() == 1)) + if (locale->ctype_is_c) { p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); - return SB_IMatchText(s, slen, p, plen, locale); + return C_IMatchText(s, slen, p, plen, locale); } else { @@ -229,10 +214,13 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) PointerGetDatum(str))); s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); + if (GetDatabaseEncoding() == PG_UTF8) return UTF8_MatchText(s, slen, p, plen, 0); - else + else if (pg_database_encoding_max_length() > 1) return MB_MatchText(s, slen, p, plen, 0); + else + return SB_MatchText(s, slen, p, plen, 0); } } diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c index 892f8a745ea..54846c9541d 100644 --- a/src/backend/utils/adt/like_match.c +++ b/src/backend/utils/adt/like_match.c @@ -70,10 +70,14 @@ *-------------------- */ +/* + * MATCH_LOWER is defined for ILIKE in the C locale as an optimization. Other + * locales must casefold the inputs before matching. + */ #ifdef MATCH_LOWER -#define GETCHAR(t, locale) MATCH_LOWER(t, locale) +#define GETCHAR(t) pg_ascii_tolower(t) #else -#define GETCHAR(t, locale) (t) +#define GETCHAR(t) (t) #endif static int @@ -105,7 +109,7 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale) ereport(ERROR, (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), errmsg("LIKE pattern must not end with escape character"))); - if (GETCHAR(*p, locale) != GETCHAR(*t, locale)) + if (GETCHAR(*p) != GETCHAR(*t)) return LIKE_FALSE; } else if (*p == '%') @@ -167,14 +171,14 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale) ereport(ERROR, (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), errmsg("LIKE pattern must not end with escape character"))); - firstpat = GETCHAR(p[1], locale); + firstpat = GETCHAR(p[1]); } else - firstpat = GETCHAR(*p, locale); + firstpat = GETCHAR(*p); while (tlen > 0) { - if (GETCHAR(*t, locale) == firstpat || (locale && !locale->deterministic)) + if (GETCHAR(*t) == firstpat || (locale && !locale->deterministic)) { int matched = MatchText(t, tlen, p, plen, locale); @@ -342,7 +346,7 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale) NextChar(t1, t1len); } } - else if (GETCHAR(*p, locale) != GETCHAR(*t, locale)) + else if (GETCHAR(*p) != GETCHAR(*t)) { /* non-wildcard pattern char fails to match text char */ return LIKE_FALSE; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index d73bab97c15..b00663b9585 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1629,32 +1629,6 @@ char_is_cased(char ch, pg_locale_t locale) return locale->ctype->char_is_cased(ch, locale); } -/* - * char_tolower_enabled() - * - * Does the provider support char_tolower()? - */ -bool -char_tolower_enabled(pg_locale_t locale) -{ - if (locale->ctype == NULL) - return true; - return (locale->ctype->char_tolower != NULL); -} - -/* - * char_tolower() - * - * Convert char (single-byte encoding) to lowercase. - */ -char -char_tolower(unsigned char ch, pg_locale_t locale) -{ - if (locale->ctype == NULL) - return pg_ascii_tolower(ch); - return locale->ctype->char_tolower(ch, locale); -} - /* * Return required encoding ID for the given locale, or -1 if any encoding is * valid for the locale. diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index b125b5da3a6..fa871690e0c 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -248,13 +248,6 @@ wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale) #endif } -static char -char_tolower_libc(unsigned char ch, pg_locale_t locale) -{ - Assert(pg_database_encoding_max_length() == 1); - return tolower_l(ch, locale->lt); -} - static bool char_is_cased_libc(char ch, pg_locale_t locale) { @@ -339,7 +332,6 @@ static const struct ctype_methods ctype_methods_libc_sb = { .wc_isspace = wc_isspace_libc_sb, .wc_isxdigit = wc_isxdigit_libc_sb, .char_is_cased = char_is_cased_libc, - .char_tolower = char_tolower_libc, .wc_toupper = toupper_libc_sb, .wc_tolower = tolower_libc_sb, }; @@ -365,7 +357,6 @@ static const struct ctype_methods ctype_methods_libc_other_mb = { .wc_isspace = wc_isspace_libc_sb, .wc_isxdigit = wc_isxdigit_libc_sb, .char_is_cased = char_is_cased_libc, - .char_tolower = char_tolower_libc, .wc_toupper = toupper_libc_sb, .wc_tolower = tolower_libc_sb, }; @@ -387,7 +378,6 @@ static const struct ctype_methods ctype_methods_libc_utf8 = { .wc_isspace = wc_isspace_libc_mb, .wc_isxdigit = wc_isxdigit_libc_mb, .char_is_cased = char_is_cased_libc, - .char_tolower = char_tolower_libc, .wc_toupper = toupper_libc_mb, .wc_tolower = tolower_libc_mb, }; diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 42e21e7fb8a..50520e50127 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -127,13 +127,6 @@ struct ctype_methods /* required */ bool (*char_is_cased) (char ch, pg_locale_t locale); - - /* - * Optional. If defined, will only be called for single-byte encodings. If - * not defined, or if the encoding is multibyte, will fall back to - * pg_strlower(). - */ - char (*char_tolower) (unsigned char ch, pg_locale_t locale); }; /* @@ -185,8 +178,6 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); extern bool char_is_cased(char ch, pg_locale_t locale); -extern bool char_tolower_enabled(pg_locale_t locale); -extern char char_tolower(unsigned char ch, pg_locale_t locale); extern size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale); -- 2.39.5