From 1e493158d3d25771ed066028c00cbbdb41573496 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jdavis@postgresql.org>
Date: Wed, 10 Dec 2025 11:55:59 -0800
Subject: [PATCH] Remove char_tolower() API.

It's only useful for an ILIKE optimization for the libc provider using
a single-byte encoding and a non-C locale, but it creates significant
internal complexity.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
---
 src/backend/utils/adt/like.c           | 46 ++++++++++----------------
 src/backend/utils/adt/like_match.c     | 18 ++++++----
 src/backend/utils/adt/pg_locale.c      | 26 ---------------
 src/backend/utils/adt/pg_locale_libc.c | 10 ------
 src/include/utils/pg_locale.h          |  9 -----
 5 files changed, 28 insertions(+), 81 deletions(-)

diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 4216ac17f43..28980264307 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -43,8 +43,8 @@ static text *MB_do_like_escape(text *pat, text *esc);
 static int	UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
 						   pg_locale_t locale);
 
-static int	SB_IMatchText(const char *t, int tlen, const char *p, int plen,
-						  pg_locale_t locale);
+static int	C_IMatchText(const char *t, int tlen, const char *p, int plen,
+						 pg_locale_t locale);
 
 static int	GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
 static int	Generic_Text_IC_like(text *str, text *pat, Oid collation);
@@ -84,22 +84,10 @@ wchareq(const char *p1, const char *p2)
  * of getting a single character transformed to the system's wchar_t format.
  * So now, we just downcase the strings using lower() and apply regular LIKE
  * comparison.  This should be revisited when we install better locale support.
- */
-
-/*
- * We do handle case-insensitive matching for single-byte encodings using
+ *
+ * We do handle case-insensitive matching for the C locale using
  * fold-on-the-fly processing, however.
  */
-static char
-SB_lower_char(unsigned char c, pg_locale_t locale)
-{
-	if (locale->ctype_is_c)
-		return pg_ascii_tolower(c);
-	else if (locale->is_default)
-		return pg_tolower(c);
-	else
-		return char_tolower(c, locale);
-}
 
 
 #define NextByte(p, plen)	((p)++, (plen)--)
@@ -130,10 +118,10 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
 
 #include "like_match.c"
 
-/* setup to compile like_match.c for single byte case insensitive matches */
-#define MATCH_LOWER(t, locale) SB_lower_char((unsigned char) (t), locale)
+/* setup to compile like_match.c for case-insensitive matches in C locale */
+#define MATCH_LOWER
 #define NextChar(p, plen) NextByte((p), (plen))
-#define MatchText SB_IMatchText
+#define MatchText C_IMatchText
 
 #include "like_match.c"
 
@@ -202,22 +190,19 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 				 errmsg("nondeterministic collations are not supported for ILIKE")));
 
 	/*
-	 * For efficiency reasons, in the single byte case we don't call lower()
-	 * on the pattern and text, but instead call SB_lower_char on each
-	 * character.  In the multi-byte case we don't have much choice :-(. Also,
-	 * ICU does not support single-character case folding, so we go the long
-	 * way.
+	 * For efficiency reasons, in the C locale we don't call lower() on the
+	 * pattern and text, but instead lowercase each character lazily.
+	 *
+	 * XXX: use casefolding instead?
 	 */
 
-	if (locale->ctype_is_c ||
-		(char_tolower_enabled(locale) &&
-		 pg_database_encoding_max_length() == 1))
+	if (locale->ctype_is_c)
 	{
 		p = VARDATA_ANY(pat);
 		plen = VARSIZE_ANY_EXHDR(pat);
 		s = VARDATA_ANY(str);
 		slen = VARSIZE_ANY_EXHDR(str);
-		return SB_IMatchText(s, slen, p, plen, locale);
+		return C_IMatchText(s, slen, p, plen, locale);
 	}
 	else
 	{
@@ -229,10 +214,13 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 													 PointerGetDatum(str)));
 		s = VARDATA_ANY(str);
 		slen = VARSIZE_ANY_EXHDR(str);
+
 		if (GetDatabaseEncoding() == PG_UTF8)
 			return UTF8_MatchText(s, slen, p, plen, 0);
-		else
+		else if (pg_database_encoding_max_length() > 1)
 			return MB_MatchText(s, slen, p, plen, 0);
+		else
+			return SB_MatchText(s, slen, p, plen, 0);
 	}
 }
 
diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c
index 892f8a745ea..54846c9541d 100644
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -70,10 +70,14 @@
  *--------------------
  */
 
+/*
+ * MATCH_LOWER is defined for ILIKE in the C locale as an optimization. Other
+ * locales must casefold the inputs before matching.
+ */
 #ifdef MATCH_LOWER
-#define GETCHAR(t, locale) MATCH_LOWER(t, locale)
+#define GETCHAR(t) pg_ascii_tolower(t)
 #else
-#define GETCHAR(t, locale) (t)
+#define GETCHAR(t) (t)
 #endif
 
 static int
@@ -105,7 +109,7 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
 						 errmsg("LIKE pattern must not end with escape character")));
-			if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
+			if (GETCHAR(*p) != GETCHAR(*t))
 				return LIKE_FALSE;
 		}
 		else if (*p == '%')
@@ -167,14 +171,14 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
 					ereport(ERROR,
 							(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
 							 errmsg("LIKE pattern must not end with escape character")));
-				firstpat = GETCHAR(p[1], locale);
+				firstpat = GETCHAR(p[1]);
 			}
 			else
-				firstpat = GETCHAR(*p, locale);
+				firstpat = GETCHAR(*p);
 
 			while (tlen > 0)
 			{
-				if (GETCHAR(*t, locale) == firstpat || (locale && !locale->deterministic))
+				if (GETCHAR(*t) == firstpat || (locale && !locale->deterministic))
 				{
 					int			matched = MatchText(t, tlen, p, plen, locale);
 
@@ -342,7 +346,7 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
 					NextChar(t1, t1len);
 			}
 		}
-		else if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
+		else if (GETCHAR(*p) != GETCHAR(*t))
 		{
 			/* non-wildcard pattern char fails to match text char */
 			return LIKE_FALSE;
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index d73bab97c15..b00663b9585 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1629,32 +1629,6 @@ char_is_cased(char ch, pg_locale_t locale)
 	return locale->ctype->char_is_cased(ch, locale);
 }
 
-/*
- * char_tolower_enabled()
- *
- * Does the provider support char_tolower()?
- */
-bool
-char_tolower_enabled(pg_locale_t locale)
-{
-	if (locale->ctype == NULL)
-		return true;
-	return (locale->ctype->char_tolower != NULL);
-}
-
-/*
- * char_tolower()
- *
- * Convert char (single-byte encoding) to lowercase.
- */
-char
-char_tolower(unsigned char ch, pg_locale_t locale)
-{
-	if (locale->ctype == NULL)
-		return pg_ascii_tolower(ch);
-	return locale->ctype->char_tolower(ch, locale);
-}
-
 /*
  * Return required encoding ID for the given locale, or -1 if any encoding is
  * valid for the locale.
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index b125b5da3a6..fa871690e0c 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -248,13 +248,6 @@ wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
 #endif
 }
 
-static char
-char_tolower_libc(unsigned char ch, pg_locale_t locale)
-{
-	Assert(pg_database_encoding_max_length() == 1);
-	return tolower_l(ch, locale->lt);
-}
-
 static bool
 char_is_cased_libc(char ch, pg_locale_t locale)
 {
@@ -339,7 +332,6 @@ static const struct ctype_methods ctype_methods_libc_sb = {
 	.wc_isspace = wc_isspace_libc_sb,
 	.wc_isxdigit = wc_isxdigit_libc_sb,
 	.char_is_cased = char_is_cased_libc,
-	.char_tolower = char_tolower_libc,
 	.wc_toupper = toupper_libc_sb,
 	.wc_tolower = tolower_libc_sb,
 };
@@ -365,7 +357,6 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
 	.wc_isspace = wc_isspace_libc_sb,
 	.wc_isxdigit = wc_isxdigit_libc_sb,
 	.char_is_cased = char_is_cased_libc,
-	.char_tolower = char_tolower_libc,
 	.wc_toupper = toupper_libc_sb,
 	.wc_tolower = tolower_libc_sb,
 };
@@ -387,7 +378,6 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
 	.wc_isspace = wc_isspace_libc_mb,
 	.wc_isxdigit = wc_isxdigit_libc_mb,
 	.char_is_cased = char_is_cased_libc,
-	.char_tolower = char_tolower_libc,
 	.wc_toupper = toupper_libc_mb,
 	.wc_tolower = tolower_libc_mb,
 };
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 42e21e7fb8a..50520e50127 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -127,13 +127,6 @@ struct ctype_methods
 
 	/* required */
 	bool		(*char_is_cased) (char ch, pg_locale_t locale);
-
-	/*
-	 * Optional. If defined, will only be called for single-byte encodings. If
-	 * not defined, or if the encoding is multibyte, will fall back to
-	 * pg_strlower().
-	 */
-	char		(*char_tolower) (unsigned char ch, pg_locale_t locale);
 };
 
 /*
@@ -185,8 +178,6 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 extern char *get_collation_actual_version(char collprovider, const char *collcollate);
 
 extern bool char_is_cased(char ch, pg_locale_t locale);
-extern bool char_tolower_enabled(pg_locale_t locale);
-extern char char_tolower(unsigned char ch, pg_locale_t locale);
 extern size_t pg_strlower(char *dst, size_t dstsize,
 						  const char *src, ssize_t srclen,
 						  pg_locale_t locale);
-- 
2.39.5