diff options
Diffstat (limited to 'contrib/ltree/ltxtquery_io.c')
-rw-r--r-- | contrib/ltree/ltxtquery_io.c | 599 |
1 files changed, 599 insertions, 0 deletions
diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c new file mode 100644 index 0000000..3eca5cb --- /dev/null +++ b/contrib/ltree/ltxtquery_io.c @@ -0,0 +1,599 @@ +/* + * txtquery io + * Teodor Sigaev <teodor@stack.net> + * contrib/ltree/ltxtquery_io.c + */ +#include "postgres.h" + +#include <ctype.h> + +#include "crc32.h" +#include "libpq/pqformat.h" +#include "ltree.h" +#include "miscadmin.h" + + +/* parser's states */ +#define WAITOPERAND 1 +#define INOPERAND 2 +#define WAITOPERATOR 3 + +/* + * node of query tree, also used + * for storing polish notation in parser + */ +typedef struct NODE +{ + int32 type; + int32 val; + int16 distance; + int16 length; + uint16 flag; + struct NODE *next; +} NODE; + +typedef struct +{ + char *buf; + int32 state; + int32 count; + /* reverse polish notation in list (for temporary usage) */ + NODE *str; + /* number in str */ + int32 num; + + /* user-friendly operand */ + int32 lenop; + int32 sumlen; + char *op; + char *curop; +} QPRS_STATE; + +/* + * get token from query string + */ +static int32 +gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag) +{ + int charlen; + + for (;;) + { + charlen = pg_mblen(state->buf); + + switch (state->state) + { + case WAITOPERAND: + if (charlen == 1 && t_iseq(state->buf, '!')) + { + (state->buf)++; + *val = (int32) '!'; + return OPR; + } + else if (charlen == 1 && t_iseq(state->buf, '(')) + { + state->count++; + (state->buf)++; + return OPEN; + } + else if (ISALNUM(state->buf)) + { + state->state = INOPERAND; + *strval = state->buf; + *lenval = charlen; + *flag = 0; + } + else if (!t_isspace(state->buf)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("operand syntax error"))); + break; + case INOPERAND: + if (ISALNUM(state->buf)) + { + if (*flag) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("modifiers syntax error"))); + *lenval += charlen; + } + else if (charlen == 1 && t_iseq(state->buf, '%')) + *flag |= LVAR_SUBLEXEME; + else if (charlen == 1 && t_iseq(state->buf, '@')) + *flag |= LVAR_INCASE; + else if (charlen == 1 && t_iseq(state->buf, '*')) + *flag |= LVAR_ANYEND; + else + { + state->state = WAITOPERATOR; + return VAL; + } + break; + case WAITOPERATOR: + if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))) + { + state->state = WAITOPERAND; + *val = (int32) *(state->buf); + (state->buf)++; + return OPR; + } + else if (charlen == 1 && t_iseq(state->buf, ')')) + { + (state->buf)++; + state->count--; + return (state->count < 0) ? ERR : CLOSE; + } + else if (*(state->buf) == '\0') + return (state->count) ? ERR : END; + else if (charlen == 1 && !t_iseq(state->buf, ' ')) + return ERR; + break; + default: + return ERR; + break; + } + + state->buf += charlen; + } +} + +/* + * push new one in polish notation reverse view + */ +static void +pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag) +{ + NODE *tmp = (NODE *) palloc(sizeof(NODE)); + + tmp->type = type; + tmp->val = val; + tmp->flag = flag; + if (distance > 0xffff) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("value is too big"))); + if (lenval > 0xff) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("operand is too long"))); + tmp->distance = distance; + tmp->length = lenval; + tmp->next = state->str; + state->str = tmp; + state->num++; +} + +/* + * This function is used for query text parsing + */ +static void +pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag) +{ + if (lenval > 0xffff) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("word is too long"))); + + pushquery(state, type, ltree_crc32_sz(strval, lenval), + state->curop - state->op, lenval, flag); + + while (state->curop - state->op + lenval + 1 >= state->lenop) + { + int32 tmp = state->curop - state->op; + + state->lenop *= 2; + state->op = (char *) repalloc((void *) state->op, state->lenop); + state->curop = state->op + tmp; + } + memcpy((void *) state->curop, (void *) strval, lenval); + state->curop += lenval; + *(state->curop) = '\0'; + state->curop++; + state->sumlen += lenval + 1; +} + +#define STACKDEPTH 32 +/* + * make polish notation of query + */ +static int32 +makepol(QPRS_STATE *state) +{ + int32 val = 0, + type; + int32 lenval = 0; + char *strval = NULL; + int32 stack[STACKDEPTH]; + int32 lenstack = 0; + uint16 flag = 0; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END) + { + switch (type) + { + case VAL: + pushval_asis(state, VAL, strval, lenval, flag); + while (lenstack && (stack[lenstack - 1] == (int32) '&' || + stack[lenstack - 1] == (int32) '!')) + { + lenstack--; + pushquery(state, OPR, stack[lenstack], 0, 0, 0); + } + break; + case OPR: + if (lenstack && val == (int32) '|') + pushquery(state, OPR, val, 0, 0, 0); + else + { + if (lenstack == STACKDEPTH) + /* internal error */ + elog(ERROR, "stack too short"); + stack[lenstack] = val; + lenstack++; + } + break; + case OPEN: + if (makepol(state) == ERR) + return ERR; + while (lenstack && (stack[lenstack - 1] == (int32) '&' || + stack[lenstack - 1] == (int32) '!')) + { + lenstack--; + pushquery(state, OPR, stack[lenstack], 0, 0, 0); + } + break; + case CLOSE: + while (lenstack) + { + lenstack--; + pushquery(state, OPR, stack[lenstack], 0, 0, 0); + }; + return END; + break; + case ERR: + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error"))); + + return ERR; + } + } + while (lenstack) + { + lenstack--; + pushquery(state, OPR, stack[lenstack], 0, 0, 0); + }; + return END; +} + +static void +findoprnd(ITEM *ptr, int32 *pos) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE) + { + ptr[*pos].left = 0; + (*pos)++; + } + else if (ptr[*pos].val == (int32) '!') + { + ptr[*pos].left = 1; + (*pos)++; + findoprnd(ptr, pos); + } + else + { + ITEM *curitem = &ptr[*pos]; + int32 tmp = *pos; + + (*pos)++; + findoprnd(ptr, pos); + curitem->left = *pos - tmp; + findoprnd(ptr, pos); + } +} + + +/* + * input + */ +static ltxtquery * +queryin(char *buf) +{ + QPRS_STATE state; + int32 i; + ltxtquery *query; + int32 commonlen; + ITEM *ptr; + NODE *tmp; + int32 pos = 0; + +#ifdef BS_DEBUG + char pbuf[16384], + *cur; +#endif + + /* init state */ + state.buf = buf; + state.state = WAITOPERAND; + state.count = 0; + state.num = 0; + state.str = NULL; + + /* init list of operand */ + state.sumlen = 0; + state.lenop = 64; + state.curop = state.op = (char *) palloc(state.lenop); + *(state.curop) = '\0'; + + /* parse query & make polish notation (postfix, but in reverse order) */ + makepol(&state); + if (!state.num) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error"), + errdetail("Empty query."))); + + if (LTXTQUERY_TOO_BIG(state.num, state.sumlen)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("ltxtquery is too large"))); + commonlen = COMPUTESIZE(state.num, state.sumlen); + + query = (ltxtquery *) palloc0(commonlen); + SET_VARSIZE(query, commonlen); + query->size = state.num; + ptr = GETQUERY(query); + + /* set item in polish notation */ + for (i = 0; i < state.num; i++) + { + ptr[i].type = state.str->type; + ptr[i].val = state.str->val; + ptr[i].distance = state.str->distance; + ptr[i].length = state.str->length; + ptr[i].flag = state.str->flag; + tmp = state.str->next; + pfree(state.str); + state.str = tmp; + } + + /* set user-friendly operand view */ + memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); + pfree(state.op); + + /* set left operand's position for every operator */ + pos = 0; + findoprnd(ptr, &pos); + + return query; +} + +/* + * in without morphology + */ +PG_FUNCTION_INFO_V1(ltxtq_in); +Datum +ltxtq_in(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0))); +} + +/* + * ltxtquery type recv function + * + * The type is sent as text in binary mode, so this is almost the same + * as the input function, but it's prefixed with a version number so we + * can change the binary format sent in future if necessary. For now, + * only version 1 is supported. + */ +PG_FUNCTION_INFO_V1(ltxtq_recv); +Datum +ltxtq_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int version = pq_getmsgint(buf, 1); + char *str; + int nbytes; + ltxtquery *res; + + if (version != 1) + elog(ERROR, "unsupported ltxtquery version number %d", version); + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + res = queryin(str); + pfree(str); + + PG_RETURN_POINTER(res); +} + +/* + * out function + */ +typedef struct +{ + ITEM *curpol; + char *buf; + char *cur; + char *op; + int32 buflen; +} INFIX; + +#define RESIZEBUF(inf,addsize) \ +while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ +{ \ + int32 len = (inf)->cur - (inf)->buf; \ + (inf)->buflen *= 2; \ + (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ + (inf)->cur = (inf)->buf + len; \ +} + +/* + * recursive walk on tree and print it in + * infix (human-readable) view + */ +static void +infix(INFIX *in, bool first) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->curpol->type == VAL) + { + char *op = in->op + in->curpol->distance; + + RESIZEBUF(in, in->curpol->length * 2 + 5); + while (*op) + { + *(in->cur) = *op; + op++; + in->cur++; + } + if (in->curpol->flag & LVAR_SUBLEXEME) + { + *(in->cur) = '%'; + in->cur++; + } + if (in->curpol->flag & LVAR_INCASE) + { + *(in->cur) = '@'; + in->cur++; + } + if (in->curpol->flag & LVAR_ANYEND) + { + *(in->cur) = '*'; + in->cur++; + } + *(in->cur) = '\0'; + in->curpol++; + } + else if (in->curpol->val == (int32) '!') + { + bool isopr = false; + + RESIZEBUF(in, 1); + *(in->cur) = '!'; + in->cur++; + *(in->cur) = '\0'; + in->curpol++; + if (in->curpol->type == OPR) + { + isopr = true; + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + infix(in, isopr); + if (isopr) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } + else + { + int32 op = in->curpol->val; + INFIX nrm; + + in->curpol++; + if (op == (int32) '|' && !first) + { + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + + nrm.curpol = in->curpol; + nrm.op = in->op; + nrm.buflen = 16; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + + /* get right operand */ + infix(&nrm, false); + + /* get & print left operand */ + in->curpol = nrm.curpol; + infix(in, false); + + /* print operator & right operand */ + RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); + sprintf(in->cur, " %c %s", op, nrm.buf); + in->cur = strchr(in->cur, '\0'); + pfree(nrm.buf); + + if (op == (int32) '|' && !first) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } +} + +PG_FUNCTION_INFO_V1(ltxtq_out); +Datum +ltxtq_out(PG_FUNCTION_ARGS) +{ + ltxtquery *query = PG_GETARG_LTXTQUERY_P(0); + INFIX nrm; + + if (query->size == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error"), + errdetail("Empty query."))); + + nrm.curpol = GETQUERY(query); + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, true); + + PG_RETURN_POINTER(nrm.buf); +} + +/* + * ltxtquery type send function + * + * The type is sent as text in binary mode, so this is almost the same + * as the output function, but it's prefixed with a version number so we + * can change the binary format sent in future if necessary. For now, + * only version 1 is supported. + */ +PG_FUNCTION_INFO_V1(ltxtq_send); +Datum +ltxtq_send(PG_FUNCTION_ARGS) +{ + ltxtquery *query = PG_GETARG_LTXTQUERY_P(0); + StringInfoData buf; + int version = 1; + INFIX nrm; + + if (query->size == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error"), + errdetail("Empty query."))); + + nrm.curpol = GETQUERY(query); + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, true); + + pq_begintypsend(&buf); + pq_sendint8(&buf, version); + pq_sendtext(&buf, nrm.buf, strlen(nrm.buf)); + pfree(nrm.buf); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} |