static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
pg_locale_t locale);
-static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale);
+static int C_IMatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale);
static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
* of getting a single character transformed to the system's wchar_t format.
* So now, we just downcase the strings using lower() and apply regular LIKE
* comparison. This should be revisited when we install better locale support.
- */
-
-/*
- * We do handle case-insensitive matching for single-byte encodings using
+ *
+ * We do handle case-insensitive matching for the C locale using
* fold-on-the-fly processing, however.
*/
-static char
-SB_lower_char(unsigned char c, pg_locale_t locale)
-{
- if (locale->ctype_is_c)
- return pg_ascii_tolower(c);
- else if (locale->is_default)
- return pg_tolower(c);
- else
- return char_tolower(c, locale);
-}
#define NextByte(p, plen) ((p)++, (plen)--)
#include "like_match.c"
-/* setup to compile like_match.c for single byte case insensitive matches */
-#define MATCH_LOWER(t, locale) SB_lower_char((unsigned char) (t), locale)
+/* setup to compile like_match.c for case-insensitive matches in C locale */
+#define MATCH_LOWER
#define NextChar(p, plen) NextByte((p), (plen))
-#define MatchText SB_IMatchText
+#define MatchText C_IMatchText
#include "like_match.c"
errmsg("nondeterministic collations are not supported for ILIKE")));
/*
- * For efficiency reasons, in the single byte case we don't call lower()
- * on the pattern and text, but instead call SB_lower_char on each
- * character. In the multi-byte case we don't have much choice :-(. Also,
- * ICU does not support single-character case folding, so we go the long
- * way.
+ * For efficiency reasons, in the C locale we don't call lower() on the
+ * pattern and text, but instead lowercase each character lazily.
+ *
+ * XXX: use casefolding instead?
*/
- if (locale->ctype_is_c ||
- (char_tolower_enabled(locale) &&
- pg_database_encoding_max_length() == 1))
+ if (locale->ctype_is_c)
{
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale);
+ return C_IMatchText(s, slen, p, plen, locale);
}
else
{
PointerGetDatum(str)));
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
+
if (GetDatabaseEncoding() == PG_UTF8)
return UTF8_MatchText(s, slen, p, plen, 0);
- else
+ else if (pg_database_encoding_max_length() > 1)
return MB_MatchText(s, slen, p, plen, 0);
+ else
+ return SB_MatchText(s, slen, p, plen, 0);
}
}
*--------------------
*/
+/*
+ * MATCH_LOWER is defined for ILIKE in the C locale as an optimization. Other
+ * locales must casefold the inputs before matching.
+ */
#ifdef MATCH_LOWER
-#define GETCHAR(t, locale) MATCH_LOWER(t, locale)
+#define GETCHAR(t) pg_ascii_tolower(t)
#else
-#define GETCHAR(t, locale) (t)
+#define GETCHAR(t) (t)
#endif
static int
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("LIKE pattern must not end with escape character")));
- if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
+ if (GETCHAR(*p) != GETCHAR(*t))
return LIKE_FALSE;
}
else if (*p == '%')
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("LIKE pattern must not end with escape character")));
- firstpat = GETCHAR(p[1], locale);
+ firstpat = GETCHAR(p[1]);
}
else
- firstpat = GETCHAR(*p, locale);
+ firstpat = GETCHAR(*p);
while (tlen > 0)
{
- if (GETCHAR(*t, locale) == firstpat || (locale && !locale->deterministic))
+ if (GETCHAR(*t) == firstpat || (locale && !locale->deterministic))
{
int matched = MatchText(t, tlen, p, plen, locale);
NextChar(t1, t1len);
}
}
- else if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
+ else if (GETCHAR(*p) != GETCHAR(*t))
{
/* non-wildcard pattern char fails to match text char */
return LIKE_FALSE;
return locale->ctype->char_is_cased(ch, locale);
}
-/*
- * char_tolower_enabled()
- *
- * Does the provider support char_tolower()?
- */
-bool
-char_tolower_enabled(pg_locale_t locale)
-{
- if (locale->ctype == NULL)
- return true;
- return (locale->ctype->char_tolower != NULL);
-}
-
-/*
- * char_tolower()
- *
- * Convert char (single-byte encoding) to lowercase.
- */
-char
-char_tolower(unsigned char ch, pg_locale_t locale)
-{
- if (locale->ctype == NULL)
- return pg_ascii_tolower(ch);
- return locale->ctype->char_tolower(ch, locale);
-}
-
/*
* Return required encoding ID for the given locale, or -1 if any encoding is
* valid for the locale.
#endif
}
-static char
-char_tolower_libc(unsigned char ch, pg_locale_t locale)
-{
- Assert(pg_database_encoding_max_length() == 1);
- return tolower_l(ch, locale->lt);
-}
-
static bool
char_is_cased_libc(char ch, pg_locale_t locale)
{
.wc_isspace = wc_isspace_libc_sb,
.wc_isxdigit = wc_isxdigit_libc_sb,
.char_is_cased = char_is_cased_libc,
- .char_tolower = char_tolower_libc,
.wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb,
};
.wc_isspace = wc_isspace_libc_sb,
.wc_isxdigit = wc_isxdigit_libc_sb,
.char_is_cased = char_is_cased_libc,
- .char_tolower = char_tolower_libc,
.wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb,
};
.wc_isspace = wc_isspace_libc_mb,
.wc_isxdigit = wc_isxdigit_libc_mb,
.char_is_cased = char_is_cased_libc,
- .char_tolower = char_tolower_libc,
.wc_toupper = toupper_libc_mb,
.wc_tolower = tolower_libc_mb,
};
/* required */
bool (*char_is_cased) (char ch, pg_locale_t locale);
-
- /*
- * Optional. If defined, will only be called for single-byte encodings. If
- * not defined, or if the encoding is multibyte, will fall back to
- * pg_strlower().
- */
- char (*char_tolower) (unsigned char ch, pg_locale_t locale);
};
/*
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
extern bool char_is_cased(char ch, pg_locale_t locale);
-extern bool char_tolower_enabled(pg_locale_t locale);
-extern char char_tolower(unsigned char ch, pg_locale_t locale);
extern size_t pg_strlower(char *dst, size_t dstsize,
const char *src, ssize_t srclen,
pg_locale_t locale);