From: Jeff Davis Date: Tue, 16 Dec 2025 23:32:57 +0000 (-0800) Subject: Avoid global LC_CTYPE dependency in pg_locale_icu.c. X-Git-Url: http://git.postgresql.org/gitweb/static/documentation.html?a=commitdiff_plain;h=0a90df58cf38cf68d59c6841513be98aeeff250e;p=postgresql.git Avoid global LC_CTYPE dependency in pg_locale_icu.c. ICU still depends on libc for compatibility with certain historical behavior for single-byte encodings. Make the dependency explicit by holding a locale_t object when required. We should consider a better solution in the future, such as decoding the text to UTF-32 and using u_tolower(). That would be a behavior change and require additional infrastructure though; so for now, just avoid the global LC_CTYPE dependency. Reviewed-by: Chao Li Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com --- diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index 69f22b47a68..43d44fe43bd 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -244,6 +244,29 @@ static const struct ctype_methods ctype_methods_icu = { .wc_toupper = toupper_icu, .wc_tolower = tolower_icu, }; + +/* + * ICU still depends on libc for compatibility with certain historical + * behavior for single-byte encodings. See downcase_ident_icu(). + * + * XXX: consider fixing by decoding the single byte into a code point, and + * using u_tolower(). + */ +static locale_t +make_libc_ctype_locale(const char *ctype) +{ + locale_t loc; + +#ifndef WIN32 + loc = newlocale(LC_CTYPE_MASK, ctype, NULL); +#else + loc = _create_locale(LC_ALL, ctype); +#endif + if (!loc) + report_newlocale_failure(ctype); + + return loc; +} #endif pg_locale_t @@ -254,6 +277,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) const char *iculocstr; const char *icurules = NULL; UCollator *collator; + locale_t loc = (locale_t) 0; pg_locale_t result; if (collid == DEFAULT_COLLATION_OID) @@ -276,6 +300,18 @@ create_pg_locale_icu(Oid collid, MemoryContext context) if (!isnull) icurules = TextDatumGetCString(datum); + /* libc only needed for default locale and single-byte encoding */ + if (pg_database_encoding_max_length() == 1) + { + const char *ctype; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tp, + Anum_pg_database_datctype); + ctype = TextDatumGetCString(datum); + + loc = make_libc_ctype_locale(ctype); + } + ReleaseSysCache(tp); } else @@ -306,6 +342,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->icu.locale = MemoryContextStrdup(context, iculocstr); result->icu.ucol = collator; + result->icu.lt = loc; result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; @@ -578,17 +615,19 @@ downcase_ident_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { int i; - bool enc_is_single_byte; + bool libc_lower; + locale_t lt = locale->icu.lt; + + libc_lower = lt && (pg_database_encoding_max_length() == 1); - enc_is_single_byte = pg_database_encoding_max_length() == 1; for (i = 0; i < srclen && i < dstsize; i++) { unsigned char ch = (unsigned char) src[i]; if (ch >= 'A' && ch <= 'Z') ch = pg_ascii_tolower(ch); - else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch)) - ch = tolower(ch); + else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt)) + ch = tolower_l(ch, lt); dst[i] = (char) ch; } diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 1e584819c5e..86016b9344e 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -167,6 +167,7 @@ struct pg_locale_struct { const char *locale; UCollator *ucol; + locale_t lt; } icu; #endif };