summaryrefslogtreecommitdiffstats
path: root/src/backend/tsearch/regis.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/tsearch/regis.c')
-rw-r--r--src/backend/tsearch/regis.c257
1 files changed, 257 insertions, 0 deletions
diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c
new file mode 100644
index 0000000..43cab72
--- /dev/null
+++ b/src/backend/tsearch/regis.c
@@ -0,0 +1,257 @@
+/*-------------------------------------------------------------------------
+ *
+ * regis.c
+ * Fast regex subset
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/tsearch/regis.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/dicts/regis.h"
+#include "tsearch/ts_locale.h"
+
+#define RS_IN_ONEOF 1
+#define RS_IN_ONEOF_IN 2
+#define RS_IN_NONEOF 3
+#define RS_IN_WAIT 4
+
+
+/*
+ * Test whether a regex is of the subset supported here.
+ * Keep this in sync with RS_compile!
+ */
+bool
+RS_isRegis(const char *str)
+{
+ int state = RS_IN_WAIT;
+ const char *c = str;
+
+ while (*c)
+ {
+ if (state == RS_IN_WAIT)
+ {
+ if (t_isalpha(c))
+ /* okay */ ;
+ else if (t_iseq(c, '['))
+ state = RS_IN_ONEOF;
+ else
+ return false;
+ }
+ else if (state == RS_IN_ONEOF)
+ {
+ if (t_iseq(c, '^'))
+ state = RS_IN_NONEOF;
+ else if (t_isalpha(c))
+ state = RS_IN_ONEOF_IN;
+ else
+ return false;
+ }
+ else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
+ {
+ if (t_isalpha(c))
+ /* okay */ ;
+ else if (t_iseq(c, ']'))
+ state = RS_IN_WAIT;
+ else
+ return false;
+ }
+ else
+ elog(ERROR, "internal error in RS_isRegis: state %d", state);
+ c += pg_mblen(c);
+ }
+
+ return (state == RS_IN_WAIT);
+}
+
+static RegisNode *
+newRegisNode(RegisNode *prev, int len)
+{
+ RegisNode *ptr;
+
+ ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
+ if (prev)
+ prev->next = ptr;
+ return ptr;
+}
+
+void
+RS_compile(Regis *r, bool issuffix, const char *str)
+{
+ int len = strlen(str);
+ int state = RS_IN_WAIT;
+ const char *c = str;
+ RegisNode *ptr = NULL;
+
+ memset(r, 0, sizeof(Regis));
+ r->issuffix = (issuffix) ? 1 : 0;
+
+ while (*c)
+ {
+ if (state == RS_IN_WAIT)
+ {
+ if (t_isalpha(c))
+ {
+ if (ptr)
+ ptr = newRegisNode(ptr, len);
+ else
+ ptr = r->node = newRegisNode(NULL, len);
+ COPYCHAR(ptr->data, c);
+ ptr->type = RSF_ONEOF;
+ ptr->len = pg_mblen(c);
+ }
+ else if (t_iseq(c, '['))
+ {
+ if (ptr)
+ ptr = newRegisNode(ptr, len);
+ else
+ ptr = r->node = newRegisNode(NULL, len);
+ ptr->type = RSF_ONEOF;
+ state = RS_IN_ONEOF;
+ }
+ else /* shouldn't get here */
+ elog(ERROR, "invalid regis pattern: \"%s\"", str);
+ }
+ else if (state == RS_IN_ONEOF)
+ {
+ if (t_iseq(c, '^'))
+ {
+ ptr->type = RSF_NONEOF;
+ state = RS_IN_NONEOF;
+ }
+ else if (t_isalpha(c))
+ {
+ COPYCHAR(ptr->data, c);
+ ptr->len = pg_mblen(c);
+ state = RS_IN_ONEOF_IN;
+ }
+ else /* shouldn't get here */
+ elog(ERROR, "invalid regis pattern: \"%s\"", str);
+ }
+ else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
+ {
+ if (t_isalpha(c))
+ {
+ COPYCHAR(ptr->data + ptr->len, c);
+ ptr->len += pg_mblen(c);
+ }
+ else if (t_iseq(c, ']'))
+ state = RS_IN_WAIT;
+ else /* shouldn't get here */
+ elog(ERROR, "invalid regis pattern: \"%s\"", str);
+ }
+ else
+ elog(ERROR, "internal error in RS_compile: state %d", state);
+ c += pg_mblen(c);
+ }
+
+ if (state != RS_IN_WAIT) /* shouldn't get here */
+ elog(ERROR, "invalid regis pattern: \"%s\"", str);
+
+ ptr = r->node;
+ while (ptr)
+ {
+ r->nchar++;
+ ptr = ptr->next;
+ }
+}
+
+void
+RS_free(Regis *r)
+{
+ RegisNode *ptr = r->node,
+ *tmp;
+
+ while (ptr)
+ {
+ tmp = ptr->next;
+ pfree(ptr);
+ ptr = tmp;
+ }
+
+ r->node = NULL;
+}
+
+static bool
+mb_strchr(char *str, char *c)
+{
+ int clen,
+ plen,
+ i;
+ char *ptr = str;
+ bool res = false;
+
+ clen = pg_mblen(c);
+ while (*ptr && !res)
+ {
+ plen = pg_mblen(ptr);
+ if (plen == clen)
+ {
+ i = plen;
+ res = true;
+ while (i--)
+ if (*(ptr + i) != *(c + i))
+ {
+ res = false;
+ break;
+ }
+ }
+
+ ptr += plen;
+ }
+
+ return res;
+}
+
+bool
+RS_execute(Regis *r, char *str)
+{
+ RegisNode *ptr = r->node;
+ char *c = str;
+ int len = 0;
+
+ while (*c)
+ {
+ len++;
+ c += pg_mblen(c);
+ }
+
+ if (len < r->nchar)
+ return 0;
+
+ c = str;
+ if (r->issuffix)
+ {
+ len -= r->nchar;
+ while (len-- > 0)
+ c += pg_mblen(c);
+ }
+
+
+ while (ptr)
+ {
+ switch (ptr->type)
+ {
+ case RSF_ONEOF:
+ if (!mb_strchr((char *) ptr->data, c))
+ return false;
+ break;
+ case RSF_NONEOF:
+ if (mb_strchr((char *) ptr->data, c))
+ return false;
+ break;
+ default:
+ elog(ERROR, "unrecognized regis node type: %d", ptr->type);
+ }
+ ptr = ptr->next;
+ c += pg_mblen(c);
+ }
+
+ return true;
+}