diff options
Diffstat (limited to 'src/backend/tsearch/regis.c')
-rw-r--r-- | src/backend/tsearch/regis.c | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c new file mode 100644 index 0000000..8001717 --- /dev/null +++ b/src/backend/tsearch/regis.c @@ -0,0 +1,257 @@ +/*------------------------------------------------------------------------- + * + * regis.c + * Fast regex subset + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/tsearch/regis.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "tsearch/dicts/regis.h" +#include "tsearch/ts_locale.h" + +#define RS_IN_ONEOF 1 +#define RS_IN_ONEOF_IN 2 +#define RS_IN_NONEOF 3 +#define RS_IN_WAIT 4 + + +/* + * Test whether a regex is of the subset supported here. + * Keep this in sync with RS_compile! + */ +bool +RS_isRegis(const char *str) +{ + int state = RS_IN_WAIT; + const char *c = str; + + while (*c) + { + if (state == RS_IN_WAIT) + { + if (t_isalpha(c)) + /* okay */ ; + else if (t_iseq(c, '[')) + state = RS_IN_ONEOF; + else + return false; + } + else if (state == RS_IN_ONEOF) + { + if (t_iseq(c, '^')) + state = RS_IN_NONEOF; + else if (t_isalpha(c)) + state = RS_IN_ONEOF_IN; + else + return false; + } + else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) + { + if (t_isalpha(c)) + /* okay */ ; + else if (t_iseq(c, ']')) + state = RS_IN_WAIT; + else + return false; + } + else + elog(ERROR, "internal error in RS_isRegis: state %d", state); + c += pg_mblen(c); + } + + return (state == RS_IN_WAIT); +} + +static RegisNode * +newRegisNode(RegisNode *prev, int len) +{ + RegisNode *ptr; + + ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1); + if (prev) + prev->next = ptr; + return ptr; +} + +void +RS_compile(Regis *r, bool issuffix, const char *str) +{ + int len = strlen(str); + int state = RS_IN_WAIT; + const char *c = str; + RegisNode *ptr = NULL; + + memset(r, 0, sizeof(Regis)); + r->issuffix = (issuffix) ? 1 : 0; + + while (*c) + { + if (state == RS_IN_WAIT) + { + if (t_isalpha(c)) + { + if (ptr) + ptr = newRegisNode(ptr, len); + else + ptr = r->node = newRegisNode(NULL, len); + COPYCHAR(ptr->data, c); + ptr->type = RSF_ONEOF; + ptr->len = pg_mblen(c); + } + else if (t_iseq(c, '[')) + { + if (ptr) + ptr = newRegisNode(ptr, len); + else + ptr = r->node = newRegisNode(NULL, len); + ptr->type = RSF_ONEOF; + state = RS_IN_ONEOF; + } + else /* shouldn't get here */ + elog(ERROR, "invalid regis pattern: \"%s\"", str); + } + else if (state == RS_IN_ONEOF) + { + if (t_iseq(c, '^')) + { + ptr->type = RSF_NONEOF; + state = RS_IN_NONEOF; + } + else if (t_isalpha(c)) + { + COPYCHAR(ptr->data, c); + ptr->len = pg_mblen(c); + state = RS_IN_ONEOF_IN; + } + else /* shouldn't get here */ + elog(ERROR, "invalid regis pattern: \"%s\"", str); + } + else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) + { + if (t_isalpha(c)) + { + COPYCHAR(ptr->data + ptr->len, c); + ptr->len += pg_mblen(c); + } + else if (t_iseq(c, ']')) + state = RS_IN_WAIT; + else /* shouldn't get here */ + elog(ERROR, "invalid regis pattern: \"%s\"", str); + } + else + elog(ERROR, "internal error in RS_compile: state %d", state); + c += pg_mblen(c); + } + + if (state != RS_IN_WAIT) /* shouldn't get here */ + elog(ERROR, "invalid regis pattern: \"%s\"", str); + + ptr = r->node; + while (ptr) + { + r->nchar++; + ptr = ptr->next; + } +} + +void +RS_free(Regis *r) +{ + RegisNode *ptr = r->node, + *tmp; + + while (ptr) + { + tmp = ptr->next; + pfree(ptr); + ptr = tmp; + } + + r->node = NULL; +} + +static bool +mb_strchr(char *str, char *c) +{ + int clen, + plen, + i; + char *ptr = str; + bool res = false; + + clen = pg_mblen(c); + while (*ptr && !res) + { + plen = pg_mblen(ptr); + if (plen == clen) + { + i = plen; + res = true; + while (i--) + if (*(ptr + i) != *(c + i)) + { + res = false; + break; + } + } + + ptr += plen; + } + + return res; +} + +bool +RS_execute(Regis *r, char *str) +{ + RegisNode *ptr = r->node; + char *c = str; + int len = 0; + + while (*c) + { + len++; + c += pg_mblen(c); + } + + if (len < r->nchar) + return 0; + + c = str; + if (r->issuffix) + { + len -= r->nchar; + while (len-- > 0) + c += pg_mblen(c); + } + + + while (ptr) + { + switch (ptr->type) + { + case RSF_ONEOF: + if (!mb_strchr((char *) ptr->data, c)) + return false; + break; + case RSF_NONEOF: + if (mb_strchr((char *) ptr->data, c)) + return false; + break; + default: + elog(ERROR, "unrecognized regis node type: %d", ptr->type); + } + ptr = ptr->next; + c += pg_mblen(c); + } + + return true; +} |