summaryrefslogtreecommitdiffstats
path: root/contrib/ltree/ltxtquery_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/ltree/ltxtquery_io.c')
-rw-r--r--contrib/ltree/ltxtquery_io.c599
1 files changed, 599 insertions, 0 deletions
diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c
new file mode 100644
index 0000000..3eca5cb
--- /dev/null
+++ b/contrib/ltree/ltxtquery_io.c
@@ -0,0 +1,599 @@
+/*
+ * txtquery io
+ * Teodor Sigaev <teodor@stack.net>
+ * contrib/ltree/ltxtquery_io.c
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "crc32.h"
+#include "libpq/pqformat.h"
+#include "ltree.h"
+#include "miscadmin.h"
+
+
+/* parser's states */
+#define WAITOPERAND 1
+#define INOPERAND 2
+#define WAITOPERATOR 3
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+ int32 type;
+ int32 val;
+ int16 distance;
+ int16 length;
+ uint16 flag;
+ struct NODE *next;
+} NODE;
+
+typedef struct
+{
+ char *buf;
+ int32 state;
+ int32 count;
+ /* reverse polish notation in list (for temporary usage) */
+ NODE *str;
+ /* number in str */
+ int32 num;
+
+ /* user-friendly operand */
+ int32 lenop;
+ int32 sumlen;
+ char *op;
+ char *curop;
+} QPRS_STATE;
+
+/*
+ * get token from query string
+ */
+static int32
+gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
+{
+ int charlen;
+
+ for (;;)
+ {
+ charlen = pg_mblen(state->buf);
+
+ switch (state->state)
+ {
+ case WAITOPERAND:
+ if (charlen == 1 && t_iseq(state->buf, '!'))
+ {
+ (state->buf)++;
+ *val = (int32) '!';
+ return OPR;
+ }
+ else if (charlen == 1 && t_iseq(state->buf, '('))
+ {
+ state->count++;
+ (state->buf)++;
+ return OPEN;
+ }
+ else if (ISALNUM(state->buf))
+ {
+ state->state = INOPERAND;
+ *strval = state->buf;
+ *lenval = charlen;
+ *flag = 0;
+ }
+ else if (!t_isspace(state->buf))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("operand syntax error")));
+ break;
+ case INOPERAND:
+ if (ISALNUM(state->buf))
+ {
+ if (*flag)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("modifiers syntax error")));
+ *lenval += charlen;
+ }
+ else if (charlen == 1 && t_iseq(state->buf, '%'))
+ *flag |= LVAR_SUBLEXEME;
+ else if (charlen == 1 && t_iseq(state->buf, '@'))
+ *flag |= LVAR_INCASE;
+ else if (charlen == 1 && t_iseq(state->buf, '*'))
+ *flag |= LVAR_ANYEND;
+ else
+ {
+ state->state = WAITOPERATOR;
+ return VAL;
+ }
+ break;
+ case WAITOPERATOR:
+ if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
+ {
+ state->state = WAITOPERAND;
+ *val = (int32) *(state->buf);
+ (state->buf)++;
+ return OPR;
+ }
+ else if (charlen == 1 && t_iseq(state->buf, ')'))
+ {
+ (state->buf)++;
+ state->count--;
+ return (state->count < 0) ? ERR : CLOSE;
+ }
+ else if (*(state->buf) == '\0')
+ return (state->count) ? ERR : END;
+ else if (charlen == 1 && !t_iseq(state->buf, ' '))
+ return ERR;
+ break;
+ default:
+ return ERR;
+ break;
+ }
+
+ state->buf += charlen;
+ }
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
+{
+ NODE *tmp = (NODE *) palloc(sizeof(NODE));
+
+ tmp->type = type;
+ tmp->val = val;
+ tmp->flag = flag;
+ if (distance > 0xffff)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("value is too big")));
+ if (lenval > 0xff)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("operand is too long")));
+ tmp->distance = distance;
+ tmp->length = lenval;
+ tmp->next = state->str;
+ state->str = tmp;
+ state->num++;
+}
+
+/*
+ * This function is used for query text parsing
+ */
+static void
+pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
+{
+ if (lenval > 0xffff)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("word is too long")));
+
+ pushquery(state, type, ltree_crc32_sz(strval, lenval),
+ state->curop - state->op, lenval, flag);
+
+ while (state->curop - state->op + lenval + 1 >= state->lenop)
+ {
+ int32 tmp = state->curop - state->op;
+
+ state->lenop *= 2;
+ state->op = (char *) repalloc((void *) state->op, state->lenop);
+ state->curop = state->op + tmp;
+ }
+ memcpy((void *) state->curop, (void *) strval, lenval);
+ state->curop += lenval;
+ *(state->curop) = '\0';
+ state->curop++;
+ state->sumlen += lenval + 1;
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notation of query
+ */
+static int32
+makepol(QPRS_STATE *state)
+{
+ int32 val = 0,
+ type;
+ int32 lenval = 0;
+ char *strval = NULL;
+ int32 stack[STACKDEPTH];
+ int32 lenstack = 0;
+ uint16 flag = 0;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
+ {
+ switch (type)
+ {
+ case VAL:
+ pushval_asis(state, VAL, strval, lenval, flag);
+ while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
+ stack[lenstack - 1] == (int32) '!'))
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ }
+ break;
+ case OPR:
+ if (lenstack && val == (int32) '|')
+ pushquery(state, OPR, val, 0, 0, 0);
+ else
+ {
+ if (lenstack == STACKDEPTH)
+ /* internal error */
+ elog(ERROR, "stack too short");
+ stack[lenstack] = val;
+ lenstack++;
+ }
+ break;
+ case OPEN:
+ if (makepol(state) == ERR)
+ return ERR;
+ while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
+ stack[lenstack - 1] == (int32) '!'))
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ }
+ break;
+ case CLOSE:
+ while (lenstack)
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ };
+ return END;
+ break;
+ case ERR:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error")));
+
+ return ERR;
+ }
+ }
+ while (lenstack)
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ };
+ return END;
+}
+
+static void
+findoprnd(ITEM *ptr, int32 *pos)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+ {
+ ptr[*pos].left = 0;
+ (*pos)++;
+ }
+ else if (ptr[*pos].val == (int32) '!')
+ {
+ ptr[*pos].left = 1;
+ (*pos)++;
+ findoprnd(ptr, pos);
+ }
+ else
+ {
+ ITEM *curitem = &ptr[*pos];
+ int32 tmp = *pos;
+
+ (*pos)++;
+ findoprnd(ptr, pos);
+ curitem->left = *pos - tmp;
+ findoprnd(ptr, pos);
+ }
+}
+
+
+/*
+ * input
+ */
+static ltxtquery *
+queryin(char *buf)
+{
+ QPRS_STATE state;
+ int32 i;
+ ltxtquery *query;
+ int32 commonlen;
+ ITEM *ptr;
+ NODE *tmp;
+ int32 pos = 0;
+
+#ifdef BS_DEBUG
+ char pbuf[16384],
+ *cur;
+#endif
+
+ /* init state */
+ state.buf = buf;
+ state.state = WAITOPERAND;
+ state.count = 0;
+ state.num = 0;
+ state.str = NULL;
+
+ /* init list of operand */
+ state.sumlen = 0;
+ state.lenop = 64;
+ state.curop = state.op = (char *) palloc(state.lenop);
+ *(state.curop) = '\0';
+
+ /* parse query & make polish notation (postfix, but in reverse order) */
+ makepol(&state);
+ if (!state.num)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error"),
+ errdetail("Empty query.")));
+
+ if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("ltxtquery is too large")));
+ commonlen = COMPUTESIZE(state.num, state.sumlen);
+
+ query = (ltxtquery *) palloc0(commonlen);
+ SET_VARSIZE(query, commonlen);
+ query->size = state.num;
+ ptr = GETQUERY(query);
+
+ /* set item in polish notation */
+ for (i = 0; i < state.num; i++)
+ {
+ ptr[i].type = state.str->type;
+ ptr[i].val = state.str->val;
+ ptr[i].distance = state.str->distance;
+ ptr[i].length = state.str->length;
+ ptr[i].flag = state.str->flag;
+ tmp = state.str->next;
+ pfree(state.str);
+ state.str = tmp;
+ }
+
+ /* set user-friendly operand view */
+ memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+ pfree(state.op);
+
+ /* set left operand's position for every operator */
+ pos = 0;
+ findoprnd(ptr, &pos);
+
+ return query;
+}
+
+/*
+ * in without morphology
+ */
+PG_FUNCTION_INFO_V1(ltxtq_in);
+Datum
+ltxtq_in(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
+}
+
+/*
+ * ltxtquery type recv function
+ *
+ * The type is sent as text in binary mode, so this is almost the same
+ * as the input function, but it's prefixed with a version number so we
+ * can change the binary format sent in future if necessary. For now,
+ * only version 1 is supported.
+ */
+PG_FUNCTION_INFO_V1(ltxtq_recv);
+Datum
+ltxtq_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ int version = pq_getmsgint(buf, 1);
+ char *str;
+ int nbytes;
+ ltxtquery *res;
+
+ if (version != 1)
+ elog(ERROR, "unsupported ltxtquery version number %d", version);
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ res = queryin(str);
+ pfree(str);
+
+ PG_RETURN_POINTER(res);
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+ ITEM *curpol;
+ char *buf;
+ char *cur;
+ char *op;
+ int32 buflen;
+} INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
+{ \
+ int32 len = (inf)->cur - (inf)->buf; \
+ (inf)->buflen *= 2; \
+ (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
+ (inf)->cur = (inf)->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX *in, bool first)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->curpol->type == VAL)
+ {
+ char *op = in->op + in->curpol->distance;
+
+ RESIZEBUF(in, in->curpol->length * 2 + 5);
+ while (*op)
+ {
+ *(in->cur) = *op;
+ op++;
+ in->cur++;
+ }
+ if (in->curpol->flag & LVAR_SUBLEXEME)
+ {
+ *(in->cur) = '%';
+ in->cur++;
+ }
+ if (in->curpol->flag & LVAR_INCASE)
+ {
+ *(in->cur) = '@';
+ in->cur++;
+ }
+ if (in->curpol->flag & LVAR_ANYEND)
+ {
+ *(in->cur) = '*';
+ in->cur++;
+ }
+ *(in->cur) = '\0';
+ in->curpol++;
+ }
+ else if (in->curpol->val == (int32) '!')
+ {
+ bool isopr = false;
+
+ RESIZEBUF(in, 1);
+ *(in->cur) = '!';
+ in->cur++;
+ *(in->cur) = '\0';
+ in->curpol++;
+ if (in->curpol->type == OPR)
+ {
+ isopr = true;
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+ infix(in, isopr);
+ if (isopr)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+ else
+ {
+ int32 op = in->curpol->val;
+ INFIX nrm;
+
+ in->curpol++;
+ if (op == (int32) '|' && !first)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+
+ nrm.curpol = in->curpol;
+ nrm.op = in->op;
+ nrm.buflen = 16;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+ /* get right operand */
+ infix(&nrm, false);
+
+ /* get & print left operand */
+ in->curpol = nrm.curpol;
+ infix(in, false);
+
+ /* print operator & right operand */
+ RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+ sprintf(in->cur, " %c %s", op, nrm.buf);
+ in->cur = strchr(in->cur, '\0');
+ pfree(nrm.buf);
+
+ if (op == (int32) '|' && !first)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+}
+
+PG_FUNCTION_INFO_V1(ltxtq_out);
+Datum
+ltxtq_out(PG_FUNCTION_ARGS)
+{
+ ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
+ INFIX nrm;
+
+ if (query->size == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error"),
+ errdetail("Empty query.")));
+
+ nrm.curpol = GETQUERY(query);
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, true);
+
+ PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * ltxtquery type send function
+ *
+ * The type is sent as text in binary mode, so this is almost the same
+ * as the output function, but it's prefixed with a version number so we
+ * can change the binary format sent in future if necessary. For now,
+ * only version 1 is supported.
+ */
+PG_FUNCTION_INFO_V1(ltxtq_send);
+Datum
+ltxtq_send(PG_FUNCTION_ARGS)
+{
+ ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
+ StringInfoData buf;
+ int version = 1;
+ INFIX nrm;
+
+ if (query->size == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error"),
+ errdetail("Empty query.")));
+
+ nrm.curpol = GETQUERY(query);
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, true);
+
+ pq_begintypsend(&buf);
+ pq_sendint8(&buf, version);
+ pq_sendtext(&buf, nrm.buf, strlen(nrm.buf));
+ pfree(nrm.buf);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}