From: Jeff Davis <jdavis@postgresql.org>
Date: Tue, 16 Dec 2025 23:32:57 +0000 (-0800)
Subject: Avoid global LC_CTYPE dependency in pg_locale_icu.c.
X-Git-Url: http://git.postgresql.org/gitweb/static/documentation.html?a=commitdiff_plain;h=0a90df58cf38cf68d59c6841513be98aeeff250e;p=postgresql.git

Avoid global LC_CTYPE dependency in pg_locale_icu.c.

ICU still depends on libc for compatibility with certain historical
behavior for single-byte encodings. Make the dependency explicit by
holding a locale_t object when required.

We should consider a better solution in the future, such as decoding
the text to UTF-32 and using u_tolower(). That would be a behavior
change and require additional infrastructure though; so for now, just
avoid the global LC_CTYPE dependency.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
---

diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 69f22b47a68..43d44fe43bd 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -244,6 +244,29 @@ static const struct ctype_methods ctype_methods_icu = {
 	.wc_toupper = toupper_icu,
 	.wc_tolower = tolower_icu,
 };
+
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings.  See downcase_ident_icu().
+ *
+ * XXX: consider fixing by decoding the single byte into a code point, and
+ * using u_tolower().
+ */
+static locale_t
+make_libc_ctype_locale(const char *ctype)
+{
+	locale_t	loc;
+
+#ifndef WIN32
+	loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
+#else
+	loc = _create_locale(LC_ALL, ctype);
+#endif
+	if (!loc)
+		report_newlocale_failure(ctype);
+
+	return loc;
+}
 #endif
 
 pg_locale_t
@@ -254,6 +277,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	const char *iculocstr;
 	const char *icurules = NULL;
 	UCollator  *collator;
+	locale_t	loc = (locale_t) 0;
 	pg_locale_t result;
 
 	if (collid == DEFAULT_COLLATION_OID)
@@ -276,6 +300,18 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 		if (!isnull)
 			icurules = TextDatumGetCString(datum);
 
+		/* libc only needed for default locale and single-byte encoding */
+		if (pg_database_encoding_max_length() == 1)
+		{
+			const char *ctype;
+
+			datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
+										   Anum_pg_database_datctype);
+			ctype = TextDatumGetCString(datum);
+
+			loc = make_libc_ctype_locale(ctype);
+		}
+
 		ReleaseSysCache(tp);
 	}
 	else
@@ -306,6 +342,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
 	result->icu.locale = MemoryContextStrdup(context, iculocstr);
 	result->icu.ucol = collator;
+	result->icu.lt = loc;
 	result->deterministic = deterministic;
 	result->collate_is_c = false;
 	result->ctype_is_c = false;
@@ -578,17 +615,19 @@ downcase_ident_icu(char *dst, size_t dstsize, const char *src,
 				   ssize_t srclen, pg_locale_t locale)
 {
 	int			i;
-	bool		enc_is_single_byte;
+	bool		libc_lower;
+	locale_t	lt = locale->icu.lt;
+
+	libc_lower = lt && (pg_database_encoding_max_length() == 1);
 
-	enc_is_single_byte = pg_database_encoding_max_length() == 1;
 	for (i = 0; i < srclen && i < dstsize; i++)
 	{
 		unsigned char ch = (unsigned char) src[i];
 
 		if (ch >= 'A' && ch <= 'Z')
 			ch = pg_ascii_tolower(ch);
-		else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
-			ch = tolower(ch);
+		else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
+			ch = tolower_l(ch, lt);
 		dst[i] = (char) ch;
 	}
 
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 1e584819c5e..86016b9344e 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -167,6 +167,7 @@ struct pg_locale_struct
 		{
 			const char *locale;
 			UCollator  *ucol;
+			locale_t	lt;
 		}			icu;
 #endif
 	};