summaryrefslogtreecommitdiffstats
path: root/src/backend/utils/adt/like.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/like.c')
-rw-r--r--src/backend/utils/adt/like.c456
1 files changed, 456 insertions, 0 deletions
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
new file mode 100644
index 0000000..eed183c
--- /dev/null
+++ b/src/backend/utils/adt/like.c
@@ -0,0 +1,456 @@
+/*-------------------------------------------------------------------------
+ *
+ * like.c
+ * like expression handling code.
+ *
+ * NOTES
+ * A big hack of the regexp.c code!! Contributed by
+ * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/like.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "catalog/pg_collation.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/pg_locale.h"
+
+
+#define LIKE_TRUE 1
+#define LIKE_FALSE 0
+#define LIKE_ABORT (-1)
+
+
+static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c);
+static text *SB_do_like_escape(text *, text *);
+
+static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c);
+static text *MB_do_like_escape(text *, text *);
+
+static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c);
+
+static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c);
+
+static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
+static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
+
+/*--------------------
+ * Support routine for MatchText. Compares given multibyte streams
+ * as wide characters. If they match, returns 1 otherwise returns 0.
+ *--------------------
+ */
+static inline int
+wchareq(const char *p1, const char *p2)
+{
+ int p1_len;
+
+ /* Optimization: quickly compare the first byte. */
+ if (*p1 != *p2)
+ return 0;
+
+ p1_len = pg_mblen(p1);
+ if (pg_mblen(p2) != p1_len)
+ return 0;
+
+ /* They are the same length */
+ while (p1_len--)
+ {
+ if (*p1++ != *p2++)
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Formerly we had a routine iwchareq() here that tried to do case-insensitive
+ * comparison of multibyte characters. It did not work at all, however,
+ * because it relied on tolower() which has a single-byte API ... and
+ * towlower() wouldn't be much better since we have no suitably cheap way
+ * of getting a single character transformed to the system's wchar_t format.
+ * So now, we just downcase the strings using lower() and apply regular LIKE
+ * comparison. This should be revisited when we install better locale support.
+ */
+
+/*
+ * We do handle case-insensitive matching for single-byte encodings using
+ * fold-on-the-fly processing, however.
+ */
+static char
+SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
+{
+ if (locale_is_c)
+ return pg_ascii_tolower(c);
+#ifdef HAVE_LOCALE_T
+ else if (locale)
+ return tolower_l(c, locale->info.lt);
+#endif
+ else
+ return pg_tolower(c);
+}
+
+
+#define NextByte(p, plen) ((p)++, (plen)--)
+
+/* Set up to compile like_match.c for multibyte characters */
+#define CHAREQ(p1, p2) wchareq((p1), (p2))
+#define NextChar(p, plen) \
+ do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
+#define CopyAdvChar(dst, src, srclen) \
+ do { int __l = pg_mblen(src); \
+ (srclen) -= __l; \
+ while (__l-- > 0) \
+ *(dst)++ = *(src)++; \
+ } while (0)
+
+#define MatchText MB_MatchText
+#define do_like_escape MB_do_like_escape
+
+#include "like_match.c"
+
+/* Set up to compile like_match.c for single-byte characters */
+#define CHAREQ(p1, p2) (*(p1) == *(p2))
+#define NextChar(p, plen) NextByte((p), (plen))
+#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
+
+#define MatchText SB_MatchText
+#define do_like_escape SB_do_like_escape
+
+#include "like_match.c"
+
+/* setup to compile like_match.c for single byte case insensitive matches */
+#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
+#define NextChar(p, plen) NextByte((p), (plen))
+#define MatchText SB_IMatchText
+
+#include "like_match.c"
+
+/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
+
+#define NextChar(p, plen) \
+ do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
+#define MatchText UTF8_MatchText
+
+#include "like_match.c"
+
+/* Generic for all cases not requiring inline case-folding */
+static inline int
+GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
+{
+ if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID)
+ {
+ pg_locale_t locale = pg_newlocale_from_collation(collation);
+
+ if (locale && !locale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for LIKE")));
+ }
+
+ if (pg_database_encoding_max_length() == 1)
+ return SB_MatchText(s, slen, p, plen, 0, true);
+ else if (GetDatabaseEncoding() == PG_UTF8)
+ return UTF8_MatchText(s, slen, p, plen, 0, true);
+ else
+ return MB_MatchText(s, slen, p, plen, 0, true);
+}
+
+static inline int
+Generic_Text_IC_like(text *str, text *pat, Oid collation)
+{
+ char *s,
+ *p;
+ int slen,
+ plen;
+ pg_locale_t locale = 0;
+ bool locale_is_c = false;
+
+ if (lc_ctype_is_c(collation))
+ locale_is_c = true;
+ else if (collation != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for ILIKE"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ locale = pg_newlocale_from_collation(collation);
+
+ if (locale && !locale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for ILIKE")));
+ }
+
+ /*
+ * For efficiency reasons, in the single byte case we don't call lower()
+ * on the pattern and text, but instead call SB_lower_char on each
+ * character. In the multi-byte case we don't have much choice :-(. Also,
+ * ICU does not support single-character case folding, so we go the long
+ * way.
+ */
+
+ if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
+ {
+ pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
+ PointerGetDatum(pat)));
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
+ PointerGetDatum(str)));
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ if (GetDatabaseEncoding() == PG_UTF8)
+ return UTF8_MatchText(s, slen, p, plen, 0, true);
+ else
+ return MB_MatchText(s, slen, p, plen, 0, true);
+ }
+ else
+ {
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
+ }
+}
+
+/*
+ * interface routines called by the function manager
+ */
+
+Datum
+namelike(PG_FUNCTION_ARGS)
+{
+ Name str = PG_GETARG_NAME(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = NameStr(*str);
+ slen = strlen(s);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+namenlike(PG_FUNCTION_ARGS)
+{
+ Name str = PG_GETARG_NAME(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = NameStr(*str);
+ slen = strlen(s);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+textlike(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+textnlike(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bytealike(PG_FUNCTION_ARGS)
+{
+ bytea *str = PG_GETARG_BYTEA_PP(0);
+ bytea *pat = PG_GETARG_BYTEA_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+byteanlike(PG_FUNCTION_ARGS)
+{
+ bytea *str = PG_GETARG_BYTEA_PP(0);
+ bytea *pat = PG_GETARG_BYTEA_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * Case-insensitive versions
+ */
+
+Datum
+nameiclike(PG_FUNCTION_ARGS)
+{
+ Name str = PG_GETARG_NAME(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ text *strtext;
+
+ strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
+ NameGetDatum(str)));
+ result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+nameicnlike(PG_FUNCTION_ARGS)
+{
+ Name str = PG_GETARG_NAME(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ text *strtext;
+
+ strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
+ NameGetDatum(str)));
+ result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+texticlike(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+
+ result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+texticnlike(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+
+ result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * like_escape() --- given a pattern and an ESCAPE string,
+ * convert the pattern to use Postgres' standard backslash escape convention.
+ */
+Datum
+like_escape(PG_FUNCTION_ARGS)
+{
+ text *pat = PG_GETARG_TEXT_PP(0);
+ text *esc = PG_GETARG_TEXT_PP(1);
+ text *result;
+
+ if (pg_database_encoding_max_length() == 1)
+ result = SB_do_like_escape(pat, esc);
+ else
+ result = MB_do_like_escape(pat, esc);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * like_escape_bytea() --- given a pattern and an ESCAPE string,
+ * convert the pattern to use Postgres' standard backslash escape convention.
+ */
+Datum
+like_escape_bytea(PG_FUNCTION_ARGS)
+{
+ bytea *pat = PG_GETARG_BYTEA_PP(0);
+ bytea *esc = PG_GETARG_BYTEA_PP(1);
+ bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
+
+ PG_RETURN_BYTEA_P((bytea *) result);
+}