summaryrefslogtreecommitdiffstats
path: root/storage/mroonga/vendor/groonga/lib/str.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/str.c')
-rw-r--r--storage/mroonga/vendor/groonga/lib/str.c3276
1 files changed, 3276 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/str.c b/storage/mroonga/vendor/groonga/lib/str.c
new file mode 100644
index 00000000..4f0a3a98
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/lib/str.c
@@ -0,0 +1,3276 @@
+/* -*- c-basic-offset: 2 -*- */
+/* Copyright(C) 2009-2016 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+#include "grn.h"
+#include <limits.h>
+#include <stdarg.h>
+#include <string.h>
+#include "grn_db.h"
+#include "grn_str.h"
+#include "grn_nfkc.h"
+
+#ifndef _ISOC99_SOURCE
+#define _ISOC99_SOURCE
+#endif /* _ISOC99_SOURCE */
+#include <math.h>
+
+#if defined(HAVE__GMTIME64_S) && defined(__GNUC__)
+# ifdef _WIN64
+# define gmtime_s(tm, time) _gmtime64_s(tm, time)
+# else /* _WIN64 */
+# define gmtime_s(tm, time) _gmtime32_s(tm, time)
+# endif /* _WIN64 */
+#endif /* defined(HAVE__GMTIME64_S) && defined(__GNUC__) */
+
+inline static int
+grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char *end)
+{
+ /* MEMO: This function allows non-null-terminated string as str. */
+ /* But requires the end of string. */
+ if (end <= str || !*str) {
+ return 0;
+ }
+ if (*str & 0x80) {
+ int i;
+ int len;
+ GRN_BIT_SCAN_REV(~(((uint) *str) << 24), len);
+ len = 31 - len;
+ if ((unsigned int)(len - 2) >= 3) { /* (len == 1 || len >= 5) */
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "grn_str_charlen_utf8(): first byte is invalid");
+ return 0;
+ }
+ if (str + len > end) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "grn_str_charlen_utf8(): incomplete character");
+ return 0;
+ }
+ for (i = 1; i < len; ++i) {
+ if ((str[i] & 0xc0) != 0x80) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "grn_str_charlen_utf8(): <%d>th byte is invalid",
+ i + 1);
+ return 0;
+ }
+ }
+ return len;
+ } else {
+ return 1;
+ }
+}
+
+unsigned int
+grn_str_charlen(grn_ctx *ctx, const char *str, grn_encoding encoding)
+{
+ /* MEMO: This function requires null-terminated string as str.*/
+ unsigned char *p = (unsigned char *) str;
+ if (!*p) { return 0; }
+ switch (encoding) {
+ case GRN_ENC_EUC_JP :
+ if (*p & 0x80) {
+ if (*(p + 1)) {
+ return 2;
+ } else {
+ /* This is invalid character */
+ GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_str_charlen");
+ return 0;
+ }
+ }
+ return 1;
+ case GRN_ENC_UTF8 :
+ if (*p & 0x80) {
+ int b, w;
+ size_t size;
+ for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
+ if (!w) {
+ GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen");
+ return 0;
+ }
+ for (size = 1; w--; size++) {
+ if (!*++p || (*p & 0xc0) != 0x80) {
+ GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen");
+ return 0;
+ }
+ }
+ return size;
+ } else {
+ return 1;
+ }
+ case GRN_ENC_SJIS :
+ if (*p & 0x80) {
+ /* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */
+ if (0xa0 <= *p && *p <= 0xdf) {
+ /* hankaku-kana */
+ return 1;
+ } else if (!(*(p + 1))) {
+ /* This is invalid character */
+ GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_str_charlen");
+ return 0;
+ } else {
+ return 2;
+ }
+ } else {
+ return 1;
+ }
+ default :
+ return 1;
+ }
+ return 0;
+}
+
+int
+grn_charlen_(grn_ctx *ctx, const char *str, const char *end, grn_encoding encoding)
+{
+ /* MEMO: This function allows non-null-terminated string as str. */
+ /* But requires the end of string. */
+ unsigned char *p = (unsigned char *) str;
+ if (p >= (unsigned char *)end) { return 0; }
+ switch (encoding) {
+ case GRN_ENC_EUC_JP :
+ if (*p & 0x80) {
+ if ((p + 1) < (unsigned char *)end) {
+ return 2;
+ } else {
+ /* This is invalid character */
+ GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_charlen");
+ return 0;
+ }
+ }
+ return 1;
+ case GRN_ENC_UTF8 :
+ return grn_str_charlen_utf8(ctx, p, (unsigned char *)end);
+ case GRN_ENC_SJIS :
+ if (*p & 0x80) {
+ /* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */
+ if (0xa0 <= *p && *p <= 0xdf) {
+ /* hankaku-kana */
+ return 1;
+ } else if (++p >= (unsigned char *)end) {
+ /* This is invalid character */
+ GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_charlen");
+ return 0;
+ } else {
+ return 2;
+ }
+ } else {
+ return 1;
+ }
+ default :
+ return 1;
+ }
+ return 0;
+}
+
+int
+grn_charlen(grn_ctx *ctx, const char *str, const char *end)
+{
+ return grn_charlen_(ctx, str, end, ctx->encoding);
+}
+
+static unsigned char symbol[] = {
+ ',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0,
+ '"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ '+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ '$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+inline static grn_rc
+normalize_euc(grn_ctx *ctx, grn_str *nstr)
+{
+ static uint16_t hankana[] = {
+ 0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
+ 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
+ 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
+ 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
+ 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
+ 0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
+ 0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
+ 0xa1eb
+ };
+ static unsigned char dakuten[] = {
+ 0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
+ 0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
+ 0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
+ 0, 0xdc
+ };
+ static unsigned char handaku[] = {
+ 0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
+ };
+ int16_t *ch;
+ const unsigned char *s, *s_, *e;
+ unsigned char *d, *d0, *d_, b;
+ uint_least8_t *cp, *ctypes, ctype;
+ size_t size = nstr->orig_blen, length = 0;
+ int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+ if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ d0 = (unsigned char *) nstr->norm;
+ if (nstr->flags & GRN_STR_WITH_CHECKS) {
+ if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
+ GRN_FREE(nstr->norm);
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ ch = nstr->checks;
+ if (nstr->flags & GRN_STR_WITH_CTYPES) {
+ if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+ GRN_FREE(nstr->checks);
+ GRN_FREE(nstr->norm);
+ nstr->checks = NULL;
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ cp = ctypes = nstr->ctypes;
+ e = (unsigned char *)nstr->orig + size;
+ for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+ if ((*s & 0x80)) {
+ if (((s + 1) < e) && (*(s + 1) & 0x80)) {
+ unsigned char c1 = *s++, c2 = *s, c3 = 0;
+ switch (c1 >> 4) {
+ case 0x08 :
+ if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
+ uint16_t c = hankana[c2 - 0xa0];
+ switch (c) {
+ case 0xa1ab :
+ if (d > d0 + 1 && d[-2] == 0xa5
+ && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
+ *(d - 1) = b;
+ if (ch) { ch[-1] += 2; s_ += 2; }
+ continue;
+ } else {
+ *d++ = c >> 8; *d = c & 0xff;
+ }
+ break;
+ case 0xa1eb :
+ if (d > d0 + 1 && d[-2] == 0xa5
+ && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
+ *(d - 1) = b;
+ if (ch) { ch[-1] += 2; s_ += 2; }
+ continue;
+ } else {
+ *d++ = c >> 8; *d = c & 0xff;
+ }
+ break;
+ default :
+ *d++ = c >> 8; *d = c & 0xff;
+ break;
+ }
+ ctype = GRN_CHAR_KATAKANA;
+ } else {
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_OTHERS;
+ }
+ break;
+ case 0x09 :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_OTHERS;
+ break;
+ case 0x0a :
+ switch (c1 & 0x0f) {
+ case 1 :
+ switch (c2) {
+ case 0xbc :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_KATAKANA;
+ break;
+ case 0xb9 :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_KANJI;
+ break;
+ case 0xa1 :
+ if (removeblankp) {
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ } else {
+ *d = ' ';
+ ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
+ }
+ break;
+ default :
+ if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
+ *d = c3;
+ ctype = GRN_CHAR_SYMBOL;
+ } else {
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_OTHERS;
+ }
+ break;
+ }
+ break;
+ case 2 :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_SYMBOL;
+ break;
+ case 3 :
+ c3 = c2 - 0x80;
+ if ('a' <= c3 && c3 <= 'z') {
+ ctype = GRN_CHAR_ALPHA;
+ *d = c3;
+ } else if ('A' <= c3 && c3 <= 'Z') {
+ ctype = GRN_CHAR_ALPHA;
+ *d = c3 + 0x20;
+ } else if ('0' <= c3 && c3 <= '9') {
+ ctype = GRN_CHAR_DIGIT;
+ *d = c3;
+ } else {
+ ctype = GRN_CHAR_OTHERS;
+ *d++ = c1; *d = c2;
+ }
+ break;
+ case 4 :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_HIRAGANA;
+ break;
+ case 5 :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_KATAKANA;
+ break;
+ case 6 :
+ case 7 :
+ case 8 :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_SYMBOL;
+ break;
+ default :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_OTHERS;
+ break;
+ }
+ break;
+ default :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_KANJI;
+ break;
+ }
+ } else {
+ /* skip invalid character */
+ continue;
+ }
+ } else {
+ unsigned char c = *s;
+ switch (c >> 4) {
+ case 0 :
+ case 1 :
+ /* skip unprintable ascii */
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ case 2 :
+ if (c == 0x20) {
+ if (removeblankp) {
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ } else {
+ *d = ' ';
+ ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
+ }
+ } else {
+ *d = c;
+ ctype = GRN_CHAR_SYMBOL;
+ }
+ break;
+ case 3 :
+ *d = c;
+ ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
+ break;
+ case 4 :
+ *d = ('A' <= c) ? c + 0x20 : c;
+ ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 5 :
+ *d = (c <= 'Z') ? c + 0x20 : c;
+ ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
+ break;
+ case 6 :
+ *d = c;
+ ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 7 :
+ *d = c;
+ ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
+ break;
+ default :
+ *d = c;
+ ctype = GRN_CHAR_OTHERS;
+ break;
+ }
+ }
+ d++;
+ length++;
+ if (cp) { *cp++ = ctype; }
+ if (ch) {
+ *ch++ = (int16_t)(s + 1 - s_);
+ s_ = s + 1;
+ while (++d_ < d) { *ch++ = 0; }
+ }
+ }
+ if (cp) { *cp = GRN_CHAR_NULL; }
+ *d = '\0';
+ nstr->length = length;
+ nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+ return GRN_SUCCESS;
+}
+
+#ifdef GRN_WITH_NFKC
+inline static grn_rc
+normalize_utf8(grn_ctx *ctx, grn_str *nstr)
+{
+ int16_t *ch;
+ const unsigned char *s, *s_, *s__ = NULL, *p, *p2, *pe, *e;
+ unsigned char *d, *d_, *de;
+ uint_least8_t *cp;
+ size_t length = 0, ls, lp, size = nstr->orig_blen, ds = size * 3;
+ int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+ if (!(nstr->norm = GRN_MALLOC(ds + 1))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (nstr->flags & GRN_STR_WITH_CHECKS) {
+ if (!(nstr->checks = GRN_MALLOC(ds * sizeof(int16_t) + 1))) {
+ GRN_FREE(nstr->norm); nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ ch = nstr->checks;
+ if (nstr->flags & GRN_STR_WITH_CTYPES) {
+ if (!(nstr->ctypes = GRN_MALLOC(ds + 1))) {
+ if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+ GRN_FREE(nstr->norm); nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ cp = nstr->ctypes;
+ d = (unsigned char *)nstr->norm;
+ de = d + ds;
+ d_ = NULL;
+ e = (unsigned char *)nstr->orig + size;
+ for (s = s_ = (unsigned char *)nstr->orig; ; s += ls) {
+ if (!(ls = grn_str_charlen_utf8(ctx, s, e))) {
+ break;
+ }
+ if ((p = (unsigned char *)grn_nfkc_decompose(s))) {
+ pe = p + strlen((char *)p);
+ } else {
+ p = s;
+ pe = p + ls;
+ }
+ if (d_ && (p2 = (unsigned char *)grn_nfkc_compose(d_, p))) {
+ p = p2;
+ pe = p + strlen((char *)p);
+ if (cp) { cp--; }
+ if (ch) {
+ ch -= (d - d_);
+ s_ = s__;
+ }
+ d = d_;
+ length--;
+ }
+ for (; ; p += lp) {
+ if (!(lp = grn_str_charlen_utf8(ctx, p, pe))) {
+ break;
+ }
+ if ((*p == ' ' && removeblankp) || *p < 0x20 /* skip unprintable ascii */ ) {
+ if (cp > nstr->ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ } else {
+ if (de <= d + lp) {
+ unsigned char *norm;
+ ds += (ds >> 1) + lp;
+ if (!(norm = GRN_REALLOC(nstr->norm, ds + 1))) {
+ if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
+ if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+ GRN_FREE(nstr->norm); nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ de = norm + ds;
+ d = norm + (d - (unsigned char *)nstr->norm);
+ nstr->norm = (char *)norm;
+ if (ch) {
+ int16_t *checks;
+ if (!(checks = GRN_REALLOC(nstr->checks, ds * sizeof(int16_t)+ 1))) {
+ if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
+ GRN_FREE(nstr->checks); nstr->checks = NULL;
+ GRN_FREE(nstr->norm); nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ ch = checks + (ch - nstr->checks);
+ nstr->checks = checks;
+ }
+ if (cp) {
+ uint_least8_t *ctypes;
+ if (!(ctypes = GRN_REALLOC(nstr->ctypes, ds + 1))) {
+ GRN_FREE(nstr->ctypes); nstr->ctypes = NULL;
+ if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+ GRN_FREE(nstr->norm); nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ cp = ctypes + (cp - nstr->ctypes);
+ nstr->ctypes = ctypes;
+ }
+ }
+ grn_memcpy(d, p, lp);
+ d_ = d;
+ d += lp;
+ length++;
+ if (cp) { *cp++ = grn_nfkc_char_type(p); }
+ if (ch) {
+ size_t i;
+ if (s_ == s + ls) {
+ *ch++ = -1;
+ } else {
+ *ch++ = (int16_t)(s + ls - s_);
+ s__ = s_;
+ s_ = s + ls;
+ }
+ for (i = lp; i > 1; i--) { *ch++ = 0; }
+ }
+ }
+ }
+ }
+ if (cp) { *cp = GRN_CHAR_NULL; }
+ *d = '\0';
+ nstr->length = length;
+ nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+ return GRN_SUCCESS;
+}
+#endif /* GRN_WITH_NFKC */
+
+inline static grn_rc
+normalize_sjis(grn_ctx *ctx, grn_str *nstr)
+{
+ static uint16_t hankana[] = {
+ 0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342,
+ 0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341,
+ 0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352,
+ 0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365,
+ 0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374,
+ 0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386,
+ 0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a,
+ 0x814b
+ };
+ static unsigned char dakuten[] = {
+ 0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0,
+ 0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66,
+ 0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0,
+ 0, 0x7b
+ };
+ static unsigned char handaku[] = {
+ 0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c
+ };
+ int16_t *ch;
+ const unsigned char *s, *s_;
+ unsigned char *d, *d0, *d_, b, *e;
+ uint_least8_t *cp, *ctypes, ctype;
+ size_t size = nstr->orig_blen, length = 0;
+ int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+ if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ d0 = (unsigned char *) nstr->norm;
+ if (nstr->flags & GRN_STR_WITH_CHECKS) {
+ if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
+ GRN_FREE(nstr->norm);
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ ch = nstr->checks;
+ if (nstr->flags & GRN_STR_WITH_CTYPES) {
+ if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+ GRN_FREE(nstr->checks);
+ GRN_FREE(nstr->norm);
+ nstr->checks = NULL;
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ cp = ctypes = nstr->ctypes;
+ e = (unsigned char *)nstr->orig + size;
+ for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+ if ((*s & 0x80)) {
+ if (0xa0 <= *s && *s <= 0xdf) {
+ uint16_t c = hankana[*s - 0xa0];
+ switch (c) {
+ case 0x814a :
+ if (d > d0 + 1 && d[-2] == 0x83
+ && 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) {
+ *(d - 1) = b;
+ if (ch) { ch[-1]++; s_++; }
+ continue;
+ } else {
+ *d++ = c >> 8; *d = c & 0xff;
+ }
+ break;
+ case 0x814b :
+ if (d > d0 + 1 && d[-2] == 0x83
+ && 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) {
+ *(d - 1) = b;
+ if (ch) { ch[-1]++; s_++; }
+ continue;
+ } else {
+ *d++ = c >> 8; *d = c & 0xff;
+ }
+ break;
+ default :
+ *d++ = c >> 8; *d = c & 0xff;
+ break;
+ }
+ ctype = GRN_CHAR_KATAKANA;
+ } else {
+ if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) {
+ unsigned char c1 = *s++, c2 = *s, c3 = 0;
+ if (0x81 <= c1 && c1 <= 0x87) {
+ switch (c1 & 0x0f) {
+ case 1 :
+ switch (c2) {
+ case 0x5b :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_KATAKANA;
+ break;
+ case 0x58 :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_KANJI;
+ break;
+ case 0x40 :
+ if (removeblankp) {
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ } else {
+ *d = ' ';
+ ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
+ }
+ break;
+ default :
+ if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) {
+ *d = c3;
+ ctype = GRN_CHAR_SYMBOL;
+ } else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) {
+ *d = c3;
+ ctype = GRN_CHAR_SYMBOL;
+ } else {
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_OTHERS;
+ }
+ break;
+ }
+ break;
+ case 2 :
+ c3 = c2 - 0x1f;
+ if (0x4f <= c2 && c2 <= 0x58) {
+ ctype = GRN_CHAR_DIGIT;
+ *d = c2 - 0x1f;
+ } else if (0x60 <= c2 && c2 <= 0x79) {
+ ctype = GRN_CHAR_ALPHA;
+ *d = c2 + 0x01;
+ } else if (0x81 <= c2 && c2 <= 0x9a) {
+ ctype = GRN_CHAR_ALPHA;
+ *d = c2 - 0x20;
+ } else if (0x9f <= c2 && c2 <= 0xf1) {
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_HIRAGANA;
+ } else {
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_OTHERS;
+ }
+ break;
+ case 3 :
+ if (0x40 <= c2 && c2 <= 0x96) {
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_KATAKANA;
+ } else {
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_SYMBOL;
+ }
+ break;
+ case 4 :
+ case 7 :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_SYMBOL;
+ break;
+ default :
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_OTHERS;
+ break;
+ }
+ } else {
+ *d++ = c1; *d = c2;
+ ctype = GRN_CHAR_KANJI;
+ }
+ } else {
+ /* skip invalid character */
+ continue;
+ }
+ }
+ } else {
+ unsigned char c = *s;
+ switch (c >> 4) {
+ case 0 :
+ case 1 :
+ /* skip unprintable ascii */
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ case 2 :
+ if (c == 0x20) {
+ if (removeblankp) {
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ } else {
+ *d = ' ';
+ ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
+ }
+ } else {
+ *d = c;
+ ctype = GRN_CHAR_SYMBOL;
+ }
+ break;
+ case 3 :
+ *d = c;
+ ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
+ break;
+ case 4 :
+ *d = ('A' <= c) ? c + 0x20 : c;
+ ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 5 :
+ *d = (c <= 'Z') ? c + 0x20 : c;
+ ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
+ break;
+ case 6 :
+ *d = c;
+ ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 7 :
+ *d = c;
+ ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
+ break;
+ default :
+ *d = c;
+ ctype = GRN_CHAR_OTHERS;
+ break;
+ }
+ }
+ d++;
+ length++;
+ if (cp) { *cp++ = ctype; }
+ if (ch) {
+ *ch++ = (int16_t)(s + 1 - s_);
+ s_ = s + 1;
+ while (++d_ < d) { *ch++ = 0; }
+ }
+ }
+ if (cp) { *cp = GRN_CHAR_NULL; }
+ *d = '\0';
+ nstr->length = length;
+ nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+ return GRN_SUCCESS;
+}
+
+inline static grn_rc
+normalize_none(grn_ctx *ctx, grn_str *nstr)
+{
+ int16_t *ch;
+ const unsigned char *s, *s_, *e;
+ unsigned char *d, *d0, *d_;
+ uint_least8_t *cp, *ctypes, ctype;
+ size_t size = nstr->orig_blen, length = 0;
+ int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+ if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ d0 = (unsigned char *) nstr->norm;
+ if (nstr->flags & GRN_STR_WITH_CHECKS) {
+ if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+ GRN_FREE(nstr->norm);
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ ch = nstr->checks;
+ if (nstr->flags & GRN_STR_WITH_CTYPES) {
+ if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+ GRN_FREE(nstr->checks);
+ GRN_FREE(nstr->norm);
+ nstr->checks = NULL;
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ cp = ctypes = nstr->ctypes;
+ e = (unsigned char *)nstr->orig + size;
+ for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+ unsigned char c = *s;
+ switch (c >> 4) {
+ case 0 :
+ case 1 :
+ /* skip unprintable ascii */
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ case 2 :
+ if (c == 0x20) {
+ if (removeblankp) {
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ } else {
+ *d = ' ';
+ ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
+ }
+ } else {
+ *d = c;
+ ctype = GRN_CHAR_SYMBOL;
+ }
+ break;
+ case 3 :
+ *d = c;
+ ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
+ break;
+ case 4 :
+ *d = ('A' <= c) ? c + 0x20 : c;
+ ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 5 :
+ *d = (c <= 'Z') ? c + 0x20 : c;
+ ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
+ break;
+ case 6 :
+ *d = c;
+ ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 7 :
+ *d = c;
+ ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
+ break;
+ default :
+ *d = c;
+ ctype = GRN_CHAR_OTHERS;
+ break;
+ }
+ d++;
+ length++;
+ if (cp) { *cp++ = ctype; }
+ if (ch) {
+ *ch++ = (int16_t)(s + 1 - s_);
+ s_ = s + 1;
+ while (++d_ < d) { *ch++ = 0; }
+ }
+ }
+ if (cp) { *cp = GRN_CHAR_NULL; }
+ *d = '\0';
+ nstr->length = length;
+ nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+ return GRN_SUCCESS;
+}
+
+/* use cp1252 as latin1 */
+inline static grn_rc
+normalize_latin1(grn_ctx *ctx, grn_str *nstr)
+{
+ int16_t *ch;
+ const unsigned char *s, *s_, *e;
+ unsigned char *d, *d0, *d_;
+ uint_least8_t *cp, *ctypes, ctype;
+ size_t size = nstr->orig_blen, length = 0;
+ int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+ if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ d0 = (unsigned char *) nstr->norm;
+ if (nstr->flags & GRN_STR_WITH_CHECKS) {
+ if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+ GRN_FREE(nstr->norm);
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ ch = nstr->checks;
+ if (nstr->flags & GRN_STR_WITH_CTYPES) {
+ if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+ GRN_FREE(nstr->checks);
+ GRN_FREE(nstr->norm);
+ nstr->checks = NULL;
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ cp = ctypes = nstr->ctypes;
+ e = (unsigned char *)nstr->orig + size;
+ for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+ unsigned char c = *s;
+ switch (c >> 4) {
+ case 0 :
+ case 1 :
+ /* skip unprintable ascii */
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ case 2 :
+ if (c == 0x20) {
+ if (removeblankp) {
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ } else {
+ *d = ' ';
+ ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
+ }
+ } else {
+ *d = c;
+ ctype = GRN_CHAR_SYMBOL;
+ }
+ break;
+ case 3 :
+ *d = c;
+ ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
+ break;
+ case 4 :
+ *d = ('A' <= c) ? c + 0x20 : c;
+ ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 5 :
+ *d = (c <= 'Z') ? c + 0x20 : c;
+ ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
+ break;
+ case 6 :
+ *d = c;
+ ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 7 :
+ *d = c;
+ ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
+ break;
+ case 8 :
+ if (c == 0x8a || c == 0x8c || c == 0x8e) {
+ *d = c + 0x10;
+ ctype = GRN_CHAR_ALPHA;
+ } else {
+ *d = c;
+ ctype = GRN_CHAR_SYMBOL;
+ }
+ break;
+ case 9 :
+ if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) {
+ *d = (c == 0x9f) ? c + 0x60 : c;
+ ctype = GRN_CHAR_ALPHA;
+ } else {
+ *d = c;
+ ctype = GRN_CHAR_SYMBOL;
+ }
+ break;
+ case 0x0c :
+ *d = c + 0x20;
+ ctype = GRN_CHAR_ALPHA;
+ break;
+ case 0x0d :
+ *d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20;
+ ctype = (c == 0xd7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 0x0e :
+ *d = c;
+ ctype = GRN_CHAR_ALPHA;
+ break;
+ case 0x0f :
+ *d = c;
+ ctype = (c == 0xf7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ default :
+ *d = c;
+ ctype = GRN_CHAR_OTHERS;
+ break;
+ }
+ d++;
+ length++;
+ if (cp) { *cp++ = ctype; }
+ if (ch) {
+ *ch++ = (int16_t)(s + 1 - s_);
+ s_ = s + 1;
+ while (++d_ < d) { *ch++ = 0; }
+ }
+ }
+ if (cp) { *cp = GRN_CHAR_NULL; }
+ *d = '\0';
+ nstr->length = length;
+ nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+ return GRN_SUCCESS;
+}
+
+inline static grn_rc
+normalize_koi8r(grn_ctx *ctx, grn_str *nstr)
+{
+ int16_t *ch;
+ const unsigned char *s, *s_, *e;
+ unsigned char *d, *d0, *d_;
+ uint_least8_t *cp, *ctypes, ctype;
+ size_t size = strlen(nstr->orig), length = 0;
+ int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+ if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ d0 = (unsigned char *) nstr->norm;
+ if (nstr->flags & GRN_STR_WITH_CHECKS) {
+ if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+ GRN_FREE(nstr->norm);
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ ch = nstr->checks;
+ if (nstr->flags & GRN_STR_WITH_CTYPES) {
+ if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+ GRN_FREE(nstr->checks);
+ GRN_FREE(nstr->norm);
+ nstr->checks = NULL;
+ nstr->norm = NULL;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ cp = ctypes = nstr->ctypes;
+ e = (unsigned char *)nstr->orig + size;
+ for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+ unsigned char c = *s;
+ switch (c >> 4) {
+ case 0 :
+ case 1 :
+ /* skip unprintable ascii */
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ case 2 :
+ if (c == 0x20) {
+ if (removeblankp) {
+ if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+ continue;
+ } else {
+ *d = ' ';
+ ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
+ }
+ } else {
+ *d = c;
+ ctype = GRN_CHAR_SYMBOL;
+ }
+ break;
+ case 3 :
+ *d = c;
+ ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
+ break;
+ case 4 :
+ *d = ('A' <= c) ? c + 0x20 : c;
+ ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 5 :
+ *d = (c <= 'Z') ? c + 0x20 : c;
+ ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
+ break;
+ case 6 :
+ *d = c;
+ ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
+ break;
+ case 7 :
+ *d = c;
+ ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
+ break;
+ case 0x0a :
+ *d = c;
+ ctype = (c == 0xa3) ? GRN_CHAR_ALPHA : GRN_CHAR_OTHERS;
+ break;
+ case 0x0b :
+ if (c == 0xb3) {
+ *d = c - 0x10;
+ ctype = GRN_CHAR_ALPHA;
+ } else {
+ *d = c;
+ ctype = GRN_CHAR_OTHERS;
+ }
+ break;
+ case 0x0c :
+ case 0x0d :
+ *d = c;
+ ctype = GRN_CHAR_ALPHA;
+ break;
+ case 0x0e :
+ case 0x0f :
+ *d = c - 0x20;
+ ctype = GRN_CHAR_ALPHA;
+ break;
+ default :
+ *d = c;
+ ctype = GRN_CHAR_OTHERS;
+ break;
+ }
+ d++;
+ length++;
+ if (cp) { *cp++ = ctype; }
+ if (ch) {
+ *ch++ = (int16_t)(s + 1 - s_);
+ s_ = s + 1;
+ while (++d_ < d) { *ch++ = 0; }
+ }
+ }
+ if (cp) { *cp = GRN_CHAR_NULL; }
+ *d = '\0';
+ nstr->length = length;
+ nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+ return GRN_SUCCESS;
+}
+
+static grn_str *
+grn_fakenstr_open(grn_ctx *ctx, const char *str, size_t str_len, grn_encoding encoding, int flags)
+{
+ /* TODO: support GRN_STR_REMOVEBLANK flag and ctypes */
+ grn_str *nstr;
+ if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
+ GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_fakenstr_open failed !");
+ return NULL;
+ }
+ if (!(nstr->norm = GRN_MALLOC(str_len + 1))) {
+ GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation for keyword on grn_snip_add_cond failed !");
+ GRN_FREE(nstr);
+ return NULL;
+ }
+ nstr->orig = str;
+ nstr->orig_blen = str_len;
+ grn_memcpy(nstr->norm, str, str_len);
+ nstr->norm[str_len] = '\0';
+ nstr->norm_blen = str_len;
+ nstr->ctypes = NULL;
+ nstr->flags = flags;
+
+ if (flags & GRN_STR_WITH_CHECKS) {
+ int16_t f = 0;
+ unsigned char c;
+ size_t i;
+ if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) {
+ GRN_FREE(nstr->norm);
+ GRN_FREE(nstr);
+ return NULL;
+ }
+ switch (encoding) {
+ case GRN_ENC_EUC_JP:
+ for (i = 0; i < str_len; i++) {
+ if (!f) {
+ c = (unsigned char) str[i];
+ f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1)
+ );
+ nstr->checks[i] = f;
+ } else {
+ nstr->checks[i] = 0;
+ }
+ f--;
+ }
+ break;
+ case GRN_ENC_SJIS:
+ for (i = 0; i < str_len; i++) {
+ if (!f) {
+ c = (unsigned char) str[i];
+ f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1);
+ nstr->checks[i] = f;
+ } else {
+ nstr->checks[i] = 0;
+ }
+ f--;
+ }
+ break;
+ case GRN_ENC_UTF8:
+ for (i = 0; i < str_len; i++) {
+ if (!f) {
+ c = (unsigned char) str[i];
+ f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3)
+ : 2)
+ : 1);
+ nstr->checks[i] = f;
+ } else {
+ nstr->checks[i] = 0;
+ }
+ f--;
+ }
+ break;
+ default:
+ for (i = 0; i < str_len; i++) {
+ nstr->checks[i] = 1;
+ }
+ break;
+ }
+ } else {
+ nstr->checks = NULL;
+ }
+ return nstr;
+}
+
+grn_str *
+grn_str_open_(grn_ctx *ctx, const char *str, unsigned int str_len, int flags, grn_encoding encoding)
+{
+ grn_rc rc;
+ grn_str *nstr;
+ if (!str || !str_len) { return NULL; }
+
+ if (!(flags & GRN_STR_NORMALIZE)) {
+ return grn_fakenstr_open(ctx, str, str_len, encoding, flags);
+ }
+
+ if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
+ GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_str_open failed !");
+ return NULL;
+ }
+ nstr->orig = str;
+ nstr->orig_blen = str_len;
+ nstr->norm = NULL;
+ nstr->norm_blen = 0;
+ nstr->checks = NULL;
+ nstr->ctypes = NULL;
+ nstr->encoding = encoding;
+ nstr->flags = flags;
+ switch (encoding) {
+ case GRN_ENC_EUC_JP :
+ rc = normalize_euc(ctx, nstr);
+ break;
+ case GRN_ENC_UTF8 :
+#ifdef GRN_WITH_NFKC
+ rc = normalize_utf8(ctx, nstr);
+#else /* GRN_WITH_NFKC */
+ rc = normalize_none(ctx, nstr);
+#endif /* GRN_WITH_NFKC */
+ break;
+ case GRN_ENC_SJIS :
+ rc = normalize_sjis(ctx, nstr);
+ break;
+ case GRN_ENC_LATIN1 :
+ rc = normalize_latin1(ctx, nstr);
+ break;
+ case GRN_ENC_KOI8R :
+ rc = normalize_koi8r(ctx, nstr);
+ break;
+ default :
+ rc = normalize_none(ctx, nstr);
+ break;
+ }
+ if (rc) {
+ grn_str_close(ctx, nstr);
+ return NULL;
+ }
+ return nstr;
+}
+
+grn_str *
+grn_str_open(grn_ctx *ctx, const char *str, unsigned int str_len, int flags)
+{
+ return grn_str_open_(ctx, str, str_len, flags, ctx->encoding);
+}
+
+grn_rc
+grn_str_close(grn_ctx *ctx, grn_str *nstr)
+{
+ if (nstr) {
+ if (nstr->norm) { GRN_FREE(nstr->norm); }
+ if (nstr->ctypes) { GRN_FREE(nstr->ctypes); }
+ if (nstr->checks) { GRN_FREE(nstr->checks); }
+ GRN_FREE(nstr);
+ return GRN_SUCCESS;
+ } else {
+ return GRN_INVALID_ARGUMENT;
+ }
+}
+
+static const char *grn_enc_string[] = {
+ "default",
+ "none",
+ "euc_jp",
+ "utf8",
+ "sjis",
+ "latin1",
+ "koi8r"
+};
+
+const char *
+grn_encoding_to_string(grn_encoding enc)
+{
+ if (enc < (sizeof(grn_enc_string) / sizeof(char *))) {
+ return grn_enc_string[enc];
+ } else {
+ return "unknown";
+ }
+}
+
+grn_encoding
+grn_encoding_parse(const char *str)
+{
+ grn_encoding e = GRN_ENC_UTF8;
+ int i = sizeof(grn_enc_string) / sizeof(grn_enc_string[0]);
+ while (i--) {
+ if (!strcmp(str, grn_enc_string[i])) {
+ e = (grn_encoding)i;
+ }
+ }
+ return e;
+}
+
+size_t
+grn_str_len(grn_ctx *ctx, const char *str, grn_encoding encoding, const char **last)
+{
+ size_t len, tlen;
+ const char *p = NULL;
+ for (len = 0; ; len++) {
+ p = str;
+ if (!(tlen = grn_str_charlen(ctx, str, encoding))) {
+ break;
+ }
+ str += tlen;
+ }
+ if (last) { *last = p; }
+ return len;
+}
+
+int
+grn_isspace(const char *str, grn_encoding encoding)
+{
+ const unsigned char *s = (const unsigned char *) str;
+ if (!s) { return 0; }
+ switch (s[0]) {
+ case ' ' :
+ case '\f' :
+ case '\n' :
+ case '\r' :
+ case '\t' :
+ case '\v' :
+ return 1;
+ case 0x81 :
+ if (encoding == GRN_ENC_SJIS && s[1] == 0x40) { return 2; }
+ break;
+ case 0xA1 :
+ if (encoding == GRN_ENC_EUC_JP && s[1] == 0xA1) { return 2; }
+ break;
+ case 0xE3 :
+ if (encoding == GRN_ENC_UTF8 && s[1] == 0x80 && s[2] == 0x80) { return 3; }
+ break;
+ default :
+ break;
+ }
+ return 0;
+}
+
+int8_t
+grn_atoi8(const char *nptr, const char *end, const char **rest)
+{
+ const char *p = nptr;
+ int8_t v = 0, t, n = 0, o = 0;
+ if (p < end && *p == '-') {
+ p++;
+ n = 1;
+ o = 1;
+ }
+ while (p < end && *p >= '0' && *p <= '9') {
+ t = v * 10 - (*p - '0');
+ if (t > v || (!n && t == INT8_MIN)) { v = 0; break; }
+ v = t;
+ o = 0;
+ p++;
+ }
+ if (rest) { *rest = o ? nptr : p; }
+ return n ? v : -v;
+}
+
+uint8_t
+grn_atoui8(const char *nptr, const char *end, const char **rest)
+{
+ uint8_t v = 0, t;
+ while (nptr < end && *nptr >= '0' && *nptr <= '9') {
+ t = v * 10 + (*nptr - '0');
+ if (t < v) { v = 0; break; }
+ v = t;
+ nptr++;
+ }
+ if (rest) { *rest = nptr; }
+ return v;
+}
+
+int16_t
+grn_atoi16(const char *nptr, const char *end, const char **rest)
+{
+ const char *p = nptr;
+ int16_t v = 0, t, n = 0, o = 0;
+ if (p < end && *p == '-') {
+ p++;
+ n = 1;
+ o = 1;
+ }
+ while (p < end && *p >= '0' && *p <= '9') {
+ t = v * 10 - (*p - '0');
+ if (t > v || (!n && t == INT16_MIN)) { v = 0; break; }
+ v = t;
+ o = 0;
+ p++;
+ }
+ if (rest) { *rest = o ? nptr : p; }
+ return n ? v : -v;
+}
+
+uint16_t
+grn_atoui16(const char *nptr, const char *end, const char **rest)
+{
+ uint16_t v = 0, t;
+ while (nptr < end && *nptr >= '0' && *nptr <= '9') {
+ t = v * 10 + (*nptr - '0');
+ if (t < v) { v = 0; break; }
+ v = t;
+ nptr++;
+ }
+ if (rest) { *rest = nptr; }
+ return v;
+}
+
+int
+grn_atoi(const char *nptr, const char *end, const char **rest)
+{
+ const char *p = nptr;
+ int v = 0, t, n = 0, o = 0;
+ if (p < end && *p == '-') {
+ p++;
+ n = 1;
+ o = 1;
+ }
+ while (p < end && *p >= '0' && *p <= '9') {
+ t = v * 10 - (*p - '0');
+ if (t > v || (!n && t == INT32_MIN)) { v = 0; break; }
+ v = t;
+ o = 0;
+ p++;
+ }
+ if (rest) { *rest = o ? nptr : p; }
+ return n ? v : -v;
+}
+
+unsigned int
+grn_atoui(const char *nptr, const char *end, const char **rest)
+{
+ unsigned int v = 0, t;
+ while (nptr < end && *nptr >= '0' && *nptr <= '9') {
+ t = v * 10 + (*nptr - '0');
+ if (t < v) { v = 0; break; }
+ v = t;
+ nptr++;
+ }
+ if (rest) { *rest = nptr; }
+ return v;
+}
+
+int64_t
+grn_atoll(const char *nptr, const char *end, const char **rest)
+{
+ const char *p = nptr;
+ int o = 0;
+ int64_t v = 0;
+ if (p < end && *p == '-') {
+ p++;
+ o = 1;
+ while (p < end && *p >= '0' && *p <= '9') {
+ int64_t t = v * 10 - (*p - '0');
+ if (t > v) { v = 0; break; }
+ v = t;
+ o = 0;
+ p++;
+ }
+ } else {
+ while (p < end && *p >= '0' && *p <= '9') {
+ int64_t t = v * 10 + (*p - '0');
+ if (t < v) { v = 0; break; }
+ v = t;
+ p++;
+ }
+ }
+ if (rest) { *rest = o ? nptr : p; }
+ return v;
+}
+
+uint64_t
+grn_atoull(const char *nptr, const char *end, const char **rest)
+{
+ uint64_t v = 0, t;
+ while (nptr < end && *nptr >= '0' && *nptr <= '9') {
+ t = v * 10 + (*nptr - '0');
+ if (t < v) { v = 0; break; }
+ v = t;
+ nptr++;
+ }
+ if (rest) { *rest = nptr; }
+ return v;
+}
+
+unsigned int
+grn_htoui(const char *nptr, const char *end, const char **rest)
+{
+ unsigned int v = 0, t;
+ while (nptr < end) {
+ switch (*nptr) {
+ case '0' :
+ case '1' :
+ case '2' :
+ case '3' :
+ case '4' :
+ case '5' :
+ case '6' :
+ case '7' :
+ case '8' :
+ case '9' :
+ t = v * 16 + (*nptr++ - '0');
+ break;
+ case 'a' :
+ case 'b' :
+ case 'c' :
+ case 'd' :
+ case 'e' :
+ case 'f' :
+ t = v * 16 + (*nptr++ - 'a') + 10;
+ break;
+ case 'A' :
+ case 'B' :
+ case 'C' :
+ case 'D' :
+ case 'E' :
+ case 'F' :
+ t = v * 16 + (*nptr++ - 'A') + 10;
+ break;
+ default :
+ v = 0; goto exit;
+ }
+ if (t < v) { v = 0; goto exit; }
+ v = t;
+ }
+exit :
+ if (rest) { *rest = nptr; }
+ return v;
+}
+
+void
+grn_itoh(unsigned int i, char *p, unsigned int len)
+{
+ static const char *hex = "0123456789ABCDEF";
+ p += len - 1;
+ while (len--) {
+ *p-- = hex[i & 0xf];
+ i >>= 4;
+ }
+}
+
+grn_rc
+grn_itoa(int i, char *p, char *end, char **rest)
+{
+ char *q;
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ q = p;
+ if (i < 0) {
+ *p++ = '-';
+ q = p;
+ if (i == INT_MIN) {
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ *p++ = (-(i % 10)) + '0';
+ i /= 10;
+ }
+ i = -i;
+ }
+ do {
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ *p++ = i % 10 + '0';
+ } while ((i /= 10) > 0);
+ if (rest) { *rest = p; }
+ for (p--; q < p; q++, p--) {
+ char t = *q;
+ *q = *p;
+ *p = t;
+ }
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_itoa_padded(int i, char *p, char *end, char ch)
+{
+ char *q;
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ if (i < 0) {
+ *p++ = '-';
+ if (i == INT_MIN) {
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ *p++ = (-(i % 10)) + '0';
+ i /= 10;
+ }
+ i = -i;
+ }
+ q = end - 1;
+ do {
+ if (q < p) { return GRN_INVALID_ARGUMENT; }
+ *q-- = i % 10 + '0';
+ } while ((i /= 10) > 0);
+ while (q >= p) {
+ *q-- = ch;
+ }
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_lltoa(int64_t i, char *p, char *end, char **rest)
+{
+ char *q;
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ q = p;
+ if (i < 0) {
+ *p++ = '-';
+ q = p;
+ if (i == INT64_MIN) {
+ *p++ = (-(i % 10)) + '0';
+ i /= 10;
+ }
+ i = -i;
+ }
+ do {
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ *p++ = i % 10 + '0';
+ } while ((i /= 10) > 0);
+ if (rest) { *rest = p; }
+ for (p--; q < p; q++, p--) {
+ char t = *q;
+ *q = *p;
+ *p = t;
+ }
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_ulltoa(uint64_t i, char *p, char *end, char **rest)
+{
+ char *q;
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ q = p;
+ do {
+ if (p >= end) { return GRN_INVALID_ARGUMENT; }
+ *p++ = i % 10 + '0';
+ } while ((i /= 10) > 0);
+ if (rest) { *rest = p; }
+ for (p--; q < p; q++, p--) {
+ char t = *q;
+ *q = *p;
+ *p = t;
+ }
+ return GRN_SUCCESS;
+}
+
+#define I2B(i) \
+ ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(i) & 0x3f])
+
+#define B2I(b) \
+ (((b) < '+' || 'z' < (b)) ? 0xff : "\x3e\xff\xff\xff\x3f\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33"[(b) - '+'])
+
+#define MASK 0x34d34d34
+
+char *
+grn_itob(grn_id id, char *p)
+{
+ id ^= MASK;
+ *p++ = I2B(id >> 24);
+ *p++ = I2B(id >> 18);
+ *p++ = I2B(id >> 12);
+ *p++ = I2B(id >> 6);
+ *p++ = I2B(id);
+ return p;
+}
+
+grn_id
+grn_btoi(char *b)
+{
+ uint8_t i;
+ grn_id id = 0;
+ int len = 5;
+ while (len--) {
+ char c = *b++;
+ if ((i = B2I(c)) == 0xff) { return 0; }
+ id = (id << 6) + i;
+ }
+ return id ^ MASK;
+}
+
+#define I2B32H(i) ("0123456789ABCDEFGHIJKLMNOPQRSTUV"[(i) & 0x1f])
+
+char *
+grn_lltob32h(int64_t i, char *p)
+{
+ uint64_t u = (uint64_t)i + 0x8000000000000000ULL;
+ *p++ = I2B32H(u >> 60);
+ *p++ = I2B32H(u >> 55);
+ *p++ = I2B32H(u >> 50);
+ *p++ = I2B32H(u >> 45);
+ *p++ = I2B32H(u >> 40);
+ *p++ = I2B32H(u >> 35);
+ *p++ = I2B32H(u >> 30);
+ *p++ = I2B32H(u >> 25);
+ *p++ = I2B32H(u >> 20);
+ *p++ = I2B32H(u >> 15);
+ *p++ = I2B32H(u >> 10);
+ *p++ = I2B32H(u >> 5);
+ *p++ = I2B32H(u);
+ return p;
+}
+
+char *
+grn_ulltob32h(uint64_t i, char *p)
+{
+ char lb = (i >> 59) & 0x10;
+ i += 0x8000000000000000ULL;
+ *p++ = lb + I2B32H(i >> 60);
+ *p++ = I2B32H(i >> 55);
+ *p++ = I2B32H(i >> 50);
+ *p++ = I2B32H(i >> 45);
+ *p++ = I2B32H(i >> 40);
+ *p++ = I2B32H(i >> 35);
+ *p++ = I2B32H(i >> 30);
+ *p++ = I2B32H(i >> 25);
+ *p++ = I2B32H(i >> 20);
+ *p++ = I2B32H(i >> 15);
+ *p++ = I2B32H(i >> 10);
+ *p++ = I2B32H(i >> 5);
+ *p++ = I2B32H(i);
+ return p;
+}
+
+grn_rc
+grn_aton(grn_ctx *ctx, const char *p, const char *end, const char **rest,
+ grn_obj *res)
+{
+ if (*p == '+') {
+ p++;
+ }
+
+ switch (*p) {
+ case '-' :
+ case '0' : case '1' : case '2' : case '3' : case '4' :
+ case '5' : case '6' : case '7' : case '8' : case '9' :
+ {
+ int64_t int64;
+ char rest_char;
+ int64 = grn_atoll(p, end, rest);
+ rest_char = **rest;
+ if (end == *rest) {
+ if ((int64_t)INT32_MIN <= int64 && int64 <= (int64_t)INT32_MAX) {
+ grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
+ GRN_INT32_SET(ctx, res, int64);
+ } else if ((int64_t)INT32_MAX < int64 && int64 <= (int64_t)UINT32_MAX) {
+ grn_obj_reinit(ctx, res, GRN_DB_UINT32, 0);
+ GRN_UINT32_SET(ctx, res, int64);
+ } else {
+ grn_obj_reinit(ctx, res, GRN_DB_INT64, 0);
+ GRN_INT64_SET(ctx, res, int64);
+ }
+ } else {
+ if (*p != '-' && rest_char >= '0' && rest_char <= '9') {
+ uint64_t uint64 = grn_atoull(p, end, rest);
+ if (end == *rest) {
+ grn_obj_reinit(ctx, res, GRN_DB_UINT64, 0);
+ GRN_UINT64_SET(ctx, res, uint64);
+ }
+ }
+ if (end != *rest) {
+ if (rest_char == '.' || rest_char == 'e' || rest_char == 'E' ||
+ (rest_char >= '0' && rest_char <= '9')) {
+ char *rest_float;
+ double d;
+ errno = 0;
+ d = strtod(p, &rest_float);
+ if (!errno && rest_float == end) {
+ grn_obj_reinit(ctx, res, GRN_DB_FLOAT, 0);
+ GRN_FLOAT_SET(ctx, res, d);
+ *rest = rest_float;
+ } else {
+ return GRN_INVALID_ARGUMENT;
+ }
+ }
+ }
+ }
+ }
+ break;
+ default :
+ return GRN_INVALID_ARGUMENT;
+ }
+
+ return GRN_SUCCESS;
+}
+
+int
+grn_str_tok(const char *str, size_t str_len, char delim, const char **tokbuf, int buf_size, const char **rest)
+{
+ const char **tok = tokbuf, **tok_end = tokbuf + buf_size;
+ if (buf_size > 0) {
+ const char *str_end = str + str_len;
+ for (;;str++) {
+ if (str == str_end) {
+ *tok++ = str;
+ break;
+ }
+ if (delim == *str) {
+ // *str = '\0';
+ *tok++ = str;
+ if (tok == tok_end) { break; }
+ }
+ }
+ }
+ if (rest) { *rest = str; }
+ return tok - tokbuf;
+}
+
+inline static int
+op_getopt_flag(int *flags, const grn_str_getopt_opt *o,
+ int argc, char * const argv[], int i, const char *optvalue)
+{
+ switch (o->op) {
+ case GETOPT_OP_NONE:
+ break;
+ case GETOPT_OP_ON:
+ *flags |= o->flag;
+ break;
+ case GETOPT_OP_OFF:
+ *flags &= ~o->flag;
+ break;
+ case GETOPT_OP_UPDATE:
+ *flags = o->flag;
+ break;
+ default:
+ return i;
+ }
+ if (o->arg) {
+ if (optvalue) {
+ *o->arg = (char *)optvalue;
+ } else if (++i < argc) {
+ *o->arg = argv[i];
+ } else {
+ return -1;
+ }
+ }
+ return i;
+}
+
+int
+grn_str_getopt(int argc, char * const argv[], const grn_str_getopt_opt *opts,
+ int *flags)
+{
+ int i;
+ for (i = 1; i < argc; i++) {
+ const char * v = argv[i];
+ if (*v == '-') {
+ const grn_str_getopt_opt *o;
+ int found;
+ if (*++v == '-') {
+ const char *eq;
+ size_t len;
+ found = 0;
+ v++;
+ for (eq = v; *eq != '\0' && *eq != '='; eq++) {}
+ len = eq - v;
+ for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) {
+ if (o->longopt && strlen(o->longopt) == len &&
+ !memcmp(v, o->longopt, len)) {
+ i = op_getopt_flag(flags, o, argc, argv, i,
+ (*eq == '\0' ? NULL : eq + 1));
+ if (i < 0) {
+ fprintf(stderr, "%s: option '--%s' needs argument.\n", argv[0], o->longopt);
+ return -1;
+ }
+ found = 1;
+ break;
+ }
+ }
+ if (!found) { goto exit; }
+ } else {
+ const char *p;
+ for (p = v; *p; p++) {
+ found = 0;
+ for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) {
+ if (o->opt && *p == o->opt) {
+ i = op_getopt_flag(flags, o, argc, argv, i, NULL);
+ if (i < 0) {
+ fprintf(stderr, "%s: option '-%c' needs argument.\n", argv[0], *p);
+ return -1;
+ }
+ found = 1;
+ break;
+ }
+ }
+ if (!found) { goto exit; }
+ }
+ }
+ } else {
+ break;
+ }
+ }
+ return i;
+exit:
+ fprintf(stderr, "%s: cannot recognize option '%s'.\n", argv[0], argv[i]);
+ return -1;
+}
+
+#define UNIT_SIZE (1 << 12)
+#define UNIT_MASK (UNIT_SIZE - 1)
+
+int grn_bulk_margin_size = 0;
+
+grn_rc
+grn_bulk_resize(grn_ctx *ctx, grn_obj *buf, unsigned int newsize)
+{
+ char *head;
+ unsigned int rounded_newsize;
+ newsize += grn_bulk_margin_size + 1;
+ if (GRN_BULK_OUTP(buf)) {
+ rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK;
+ if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; }
+ newsize = rounded_newsize;
+ head = buf->u.b.head - (buf->u.b.head ? grn_bulk_margin_size : 0);
+ if (!(head = GRN_REALLOC(head, newsize))) { return GRN_NO_MEMORY_AVAILABLE; }
+ buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf);
+ buf->u.b.head = head + grn_bulk_margin_size;
+ buf->u.b.tail = head + newsize;
+ } else {
+ if (newsize > GRN_BULK_BUFSIZE) {
+ rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK;
+ if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; }
+ newsize = rounded_newsize;
+ if (!(head = GRN_MALLOC(newsize))) { return GRN_NO_MEMORY_AVAILABLE; }
+ grn_memcpy(head, GRN_BULK_HEAD(buf), GRN_BULK_VSIZE(buf));
+ buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf);
+ buf->u.b.head = head + grn_bulk_margin_size;
+ buf->u.b.tail = head + newsize;
+ buf->header.impl_flags |= GRN_OBJ_OUTPLACE;
+ }
+ }
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_bulk_reinit(grn_ctx *ctx, grn_obj *buf, unsigned int size)
+{
+ GRN_BULK_REWIND(buf);
+ return grn_bulk_resize(ctx, buf, size);
+}
+
+grn_rc
+grn_bulk_write(grn_ctx *ctx, grn_obj *buf, const char *str, unsigned int len)
+{
+ grn_rc rc = GRN_SUCCESS;
+ char *curr;
+ if (GRN_BULK_REST(buf) < len) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
+ }
+ curr = GRN_BULK_CURR(buf);
+ if (str)
+ grn_memcpy(curr, str, len);
+ GRN_BULK_INCR_LEN(buf, len);
+ return rc;
+}
+
+grn_rc
+grn_bulk_write_from(grn_ctx *ctx, grn_obj *bulk,
+ const char *str, unsigned int from, unsigned int len)
+{
+ grn_rc rc = grn_bulk_truncate(ctx, bulk, from);
+ if (!rc) { rc = grn_bulk_write(ctx, bulk, str, len); }
+ return rc;
+}
+
+grn_rc
+grn_bulk_reserve(grn_ctx *ctx, grn_obj *buf, unsigned int len)
+{
+ grn_rc rc = GRN_SUCCESS;
+ if (GRN_BULK_REST(buf) < len) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
+ }
+ return rc;
+}
+
+grn_rc
+grn_bulk_space(grn_ctx *ctx, grn_obj *buf, unsigned int len)
+{
+ grn_rc rc = grn_bulk_reserve(ctx, buf, len);
+ if (!rc) {
+ GRN_BULK_INCR_LEN(buf, len);
+ }
+ return rc;
+}
+
+static grn_rc
+grn_bulk_space_clear(grn_ctx *ctx, grn_obj *buf, unsigned int len)
+{
+ grn_rc rc = grn_bulk_reserve(ctx, buf, len);
+ if (!rc) {
+ memset(GRN_BULK_CURR(buf), 0, len);
+ GRN_BULK_INCR_LEN(buf, len);
+ }
+ return rc;
+}
+
+grn_rc
+grn_bulk_truncate(grn_ctx *ctx, grn_obj *bulk, unsigned int len)
+{
+ if (GRN_BULK_OUTP(bulk)) {
+ if ((bulk->u.b.tail - bulk->u.b.head) < len) {
+ return grn_bulk_space_clear(ctx, bulk, len);
+ } else {
+ bulk->u.b.curr = bulk->u.b.head + len;
+ }
+ } else {
+ if (GRN_BULK_BUFSIZE < len) {
+ return grn_bulk_space_clear(ctx, bulk, len);
+ } else {
+ bulk->header.flags &= ~GRN_BULK_BUFSIZE_MAX;
+ bulk->header.flags += len;
+ }
+ }
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_text_itoa(grn_ctx *ctx, grn_obj *buf, int i)
+{
+ grn_rc rc = GRN_SUCCESS;
+ for (;;) {
+ char *curr = GRN_BULK_CURR(buf);
+ char *tail = GRN_BULK_TAIL(buf);
+ if (grn_itoa(i, curr, tail, &curr)) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
+ } else {
+ GRN_BULK_SET_CURR(buf, curr);
+ break;
+ }
+ }
+ return rc;
+}
+
+grn_rc
+grn_text_itoa_padded(grn_ctx *ctx, grn_obj *buf, int i, char ch, unsigned int len)
+{
+ grn_rc rc = GRN_SUCCESS;
+ char *curr;
+ if ((rc = grn_bulk_reserve(ctx, buf, len))) { return rc; }
+ curr = GRN_BULK_CURR(buf);
+ if (!grn_itoa_padded(i, curr, curr + len, ch)) {
+ GRN_BULK_SET_CURR(buf, curr + len);
+ }
+ return rc;
+}
+
+grn_rc
+grn_text_lltoa(grn_ctx *ctx, grn_obj *buf, long long int i)
+{
+ grn_rc rc = GRN_SUCCESS;
+ for (;;) {
+ char *curr = GRN_BULK_CURR(buf);
+ char *tail = GRN_BULK_TAIL(buf);
+ if (grn_lltoa(i, curr, tail, &curr)) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
+ } else {
+ GRN_BULK_SET_CURR(buf, curr);
+ break;
+ }
+ }
+ return rc;
+}
+
+grn_rc
+grn_text_ulltoa(grn_ctx *ctx, grn_obj *buf, unsigned long long int i)
+{
+ grn_rc rc = GRN_SUCCESS;
+ for (;;) {
+ char *curr = GRN_BULK_CURR(buf);
+ char *tail = GRN_BULK_TAIL(buf);
+ if (grn_ulltoa(i, curr, tail, &curr)) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
+ } else {
+ GRN_BULK_SET_CURR(buf, curr);
+ break;
+ }
+ }
+ return rc;
+}
+
+inline static void
+ftoa_(grn_ctx *ctx, grn_obj *buf, double d)
+{
+ char *start;
+ size_t before_size;
+ size_t len;
+#define DIGIT_NUMBER 16
+#define FIRST_BUFFER_SIZE (DIGIT_NUMBER + 4)
+ before_size = GRN_BULK_VSIZE(buf);
+ grn_bulk_reserve(ctx, buf, FIRST_BUFFER_SIZE);
+ grn_text_printf(ctx, buf, "%#.*g", DIGIT_NUMBER, d);
+ len = GRN_BULK_VSIZE(buf) - before_size;
+ start = GRN_BULK_CURR(buf) - len;
+#undef FIRST_BUFFER_SIZE
+#undef DIGIT_NUMBER
+ if (start[len - 1] == '.') {
+ GRN_TEXT_PUTC(ctx, buf, '0');
+ } else {
+ char *p, *q;
+ start[len] = '\0';
+ if ((p = strchr(start, 'e'))) {
+ for (q = p; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; }
+ grn_memmove(q, p, start + len - q);
+ } else {
+ for (q = start + len; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; }
+ }
+ grn_bulk_truncate(ctx, buf, before_size + len);
+ }
+}
+
+grn_rc
+grn_text_ftoa(grn_ctx *ctx, grn_obj *buf, double d)
+{
+ grn_rc rc = GRN_SUCCESS;
+ if (GRN_BULK_REST(buf) < 32) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 32))) { return rc; }
+ }
+#ifdef HAVE_FPCLASSIFY
+ switch (fpclassify(d)) {
+ case FP_NAN :
+ GRN_TEXT_PUTS(ctx, buf, "#<nan>");
+ break;
+ case FP_INFINITE :
+ GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0");
+ break;
+ default :
+ ftoa_(ctx, buf, d);
+ break;
+ }
+#else /* HAVE_FPCLASSIFY */
+ if (d == d) {
+ if (d != 0 && ((d / 2.0) == d)) {
+ GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0");
+ } else {
+ ftoa_(ctx, buf, d);
+ }
+ } else {
+ GRN_TEXT_PUTS(ctx, buf, "#<nan>");
+ }
+#endif /* HAVE_FPCLASSIFY */
+ return rc;
+}
+
+grn_rc
+grn_text_itoh(grn_ctx *ctx, grn_obj *buf, int i, unsigned int len)
+{
+ grn_rc rc = GRN_SUCCESS;
+ if (GRN_BULK_REST(buf) < len) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
+ }
+ grn_itoh(i, GRN_BULK_CURR(buf), len);
+ GRN_BULK_INCR_LEN(buf, len);
+ return rc;
+}
+
+grn_rc
+grn_text_itob(grn_ctx *ctx, grn_obj *buf, grn_id id)
+{
+ size_t len = 5;
+ grn_rc rc = GRN_SUCCESS;
+ if (GRN_BULK_REST(buf) < len) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
+ }
+ grn_itob(id, GRN_BULK_CURR(buf));
+ GRN_BULK_INCR_LEN(buf, len);
+ return rc;
+}
+
+grn_rc
+grn_text_lltob32h(grn_ctx *ctx, grn_obj *buf, long long int i)
+{
+ size_t len = 13;
+ grn_rc rc = GRN_SUCCESS;
+ if (GRN_BULK_REST(buf) < len) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
+ }
+ grn_lltob32h(i, GRN_BULK_CURR(buf));
+ GRN_BULK_INCR_LEN(buf, len);
+ return rc;
+}
+
+grn_rc
+grn_text_esc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
+{
+ const char *e;
+ unsigned int l;
+ grn_rc rc = GRN_SUCCESS;
+
+ GRN_TEXT_PUTC(ctx, buf, '"');
+ for (e = s + len; s < e; s += l) {
+ if (!(l = grn_charlen(ctx, s, e))) { break; }
+ if (l == 1) {
+ switch (*s) {
+ case '"' :
+ grn_bulk_write(ctx, buf, "\\\"", 2);
+ break;
+ case '\\' :
+ grn_bulk_write(ctx, buf, "\\\\", 2);
+ break;
+ case '\b' :
+ grn_bulk_write(ctx, buf, "\\b", 2);
+ break;
+ case '\f' :
+ grn_bulk_write(ctx, buf, "\\f", 2);
+ break;
+ case '\n' :
+ grn_bulk_write(ctx, buf, "\\n", 2);
+ break;
+ case '\r' :
+ grn_bulk_write(ctx, buf, "\\r", 2);
+ break;
+ case '\t' :
+ grn_bulk_write(ctx, buf, "\\t", 2);
+ break;
+ case '\x00': case '\x01': case '\x02': case '\x03': case '\x04': case '\x05':
+ case '\x06': case '\x07': case '\x0b': case '\x0e': case '\x0f': case '\x10':
+ case '\x11': case '\x12': case '\x13': case '\x14': case '\x15': case '\x16':
+ case '\x17': case '\x18': case '\x19': case '\x1a': case '\x1b': case '\x1c':
+ case '\x1d': case '\x1e': case '\x1f': case '\x7f':
+ if (!(rc = grn_bulk_write(ctx, buf, "\\u", 2))) {
+ if ((rc = grn_text_itoh(ctx, buf, *s, 4))) {
+ GRN_BULK_INCR_LEN(buf, -2);
+ return rc;
+ }
+ } else {
+ return rc;
+ }
+ break;
+ default :
+ GRN_TEXT_PUTC(ctx, buf, *s);
+ }
+ } else if (l == 3) {
+ if (*s == '\xe2' && *(s + 1) == '\x80') {
+ switch (*(s + 2)) {
+ case '\xa8': /* \u2028 */
+ grn_bulk_write(ctx, buf, "\\u2028", 6);
+ break;
+ case '\xa9': /* \u2029 */
+ grn_bulk_write(ctx, buf, "\\u2029", 6);
+ break;
+ default:
+ grn_bulk_write(ctx, buf, s, l);
+ }
+ } else {
+ grn_bulk_write(ctx, buf, s, l);
+ }
+ } else {
+ grn_bulk_write(ctx, buf, s, l);
+ }
+ }
+ GRN_TEXT_PUTC(ctx, buf, '"');
+ return rc;
+}
+
+grn_rc
+grn_text_escape_xml(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
+{
+ const char *e;
+ unsigned int l;
+ grn_rc rc = GRN_SUCCESS;
+
+ for (e = s + len; s < e; s += l) {
+ if (!(l = grn_charlen(ctx, s, e))) { break; }
+ if (l == 1) {
+ switch (*s) {
+ case '"' :
+ grn_bulk_write(ctx, buf, "&quot;", 6);
+ break;
+ case '<' :
+ grn_bulk_write(ctx, buf, "&lt;", 4);
+ break;
+ case '>' :
+ grn_bulk_write(ctx, buf, "&gt;", 4);
+ break;
+ case '&' :
+ grn_bulk_write(ctx, buf, "&amp;", 5);
+ break;
+ default :
+ GRN_TEXT_PUTC(ctx, buf, *s);
+ }
+ } else {
+ grn_bulk_write(ctx, buf, s, l);
+ }
+ }
+ return rc;
+}
+
+#define TOK_ESC (0x80)
+
+const char *
+grn_text_unesc_tok(grn_ctx *ctx, grn_obj *buf, const char *s, const char *e, char *tok_type)
+{
+ const char *p;
+ unsigned int len;
+ uint8_t stat = GRN_TOK_VOID;
+ for (p = s; p < e; p += len) {
+ if (!(len = grn_charlen(ctx, p, e))) {
+ p = e;
+ stat &= ~TOK_ESC;
+ goto exit;
+ }
+ switch (stat) {
+ case GRN_TOK_VOID :
+ if (*p == ' ') { continue; }
+ switch (*p) {
+ case '"' :
+ stat = GRN_TOK_STRING;
+ break;
+ case '\'' :
+ stat = GRN_TOK_QUOTE;
+ break;
+ case ')' :
+ case '(' :
+ GRN_TEXT_PUT(ctx, buf, p, len);
+ p += len;
+ stat = GRN_TOK_SYMBOL;
+ goto exit;
+ case '\\' :
+ stat = GRN_TOK_SYMBOL|TOK_ESC;
+ break;
+ default :
+ stat = GRN_TOK_SYMBOL;
+ GRN_TEXT_PUT(ctx, buf, p, len);
+ break;
+ }
+ break;
+ case GRN_TOK_SYMBOL :
+ if (*p == ' ') { goto exit; }
+ switch (*p) {
+ case '\'' :
+ case '"' :
+ case ')' :
+ case '(' :
+ goto exit;
+ case '\\' :
+ stat |= TOK_ESC;
+ break;
+ default :
+ GRN_TEXT_PUT(ctx, buf, p, len);
+ break;
+ }
+ break;
+ case GRN_TOK_STRING :
+ switch (*p) {
+ case '"' :
+ p += len;
+ goto exit;
+ case '\\' :
+ stat |= TOK_ESC;
+ break;
+ default :
+ GRN_TEXT_PUT(ctx, buf, p, len);
+ break;
+ }
+ break;
+ case GRN_TOK_QUOTE :
+ switch (*p) {
+ case '\'' :
+ p += len;
+ goto exit;
+ case '\\' :
+ stat |= TOK_ESC;
+ break;
+ default :
+ GRN_TEXT_PUT(ctx, buf, p, len);
+ break;
+ }
+ break;
+ case GRN_TOK_SYMBOL|TOK_ESC :
+ case GRN_TOK_STRING|TOK_ESC :
+ case GRN_TOK_QUOTE|TOK_ESC :
+ switch (*p) {
+ case 'b' :
+ GRN_TEXT_PUTC(ctx, buf, '\b');
+ break;
+ case 'f' :
+ GRN_TEXT_PUTC(ctx, buf, '\f');
+ break;
+ case 'n' :
+ GRN_TEXT_PUTC(ctx, buf, '\n');
+ break;
+ case 'r' :
+ GRN_TEXT_PUTC(ctx, buf, '\r');
+ break;
+ case 't' :
+ GRN_TEXT_PUTC(ctx, buf, '\t');
+ break;
+ default :
+ GRN_TEXT_PUT(ctx, buf, p, len);
+ break;
+ }
+ stat &= ~TOK_ESC;
+ break;
+ }
+ }
+exit :
+ *tok_type = stat;
+ return p;
+}
+
+grn_rc
+grn_text_benc(grn_ctx *ctx, grn_obj *buf, unsigned int v)
+{
+ grn_rc rc = GRN_SUCCESS;
+ uint8_t *p;
+ if (GRN_BULK_REST(buf) < 5) {
+ if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 5))) { return rc; }
+ }
+ p = (uint8_t *)GRN_BULK_CURR(buf);
+ GRN_B_ENC(v, p);
+ GRN_BULK_SET_CURR(buf, (char *)p);
+ return rc;
+}
+
+/* 0x00 - 0x7f */
+static const int_least8_t urlenc_tbl[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+};
+
+grn_rc
+grn_text_urlenc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
+{
+ const char *e, c = '%';
+ for (e = s + len; s < e; s++) {
+ if ((signed char)*s < 0 || urlenc_tbl[(int)*s]) {
+ if (!grn_bulk_write(ctx, buf, &c, 1)) {
+ if (grn_text_itoh(ctx, buf, *s, 2)) {
+ GRN_BULK_INCR_LEN(buf, -1);
+ }
+ }
+ } else {
+ GRN_TEXT_PUTC(ctx, buf, *s);
+ }
+ }
+ return GRN_SUCCESS;
+}
+
+static const char *weekdays[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
+static const char *months[12] = {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+
+grn_rc
+grn_text_time2rfc1123(grn_ctx *ctx, grn_obj *bulk, int sec)
+{
+ time_t tsec;
+ struct tm *t;
+#ifdef HAVE__GMTIME64_S
+ struct tm tm;
+ tsec = (time_t)sec;
+ t = (gmtime_s(&tm, &tsec) == 0) ? &tm : NULL;
+#else /* HAVE__GMTIME64_S */
+# ifdef HAVE_GMTIME_R
+ struct tm tm;
+ tsec = (time_t)sec;
+ t = gmtime_r(&tsec, &tm);
+# else /* HAVE_GMTIME_R */
+ tsec = (time_t)sec;
+ t = gmtime(&tsec);
+# endif /* HAVE_GMTIME_R */
+#endif /* HAVE__GMTIME64_S */
+ if (t) {
+ GRN_TEXT_SET(ctx, bulk, weekdays[t->tm_wday], 3);
+ GRN_TEXT_PUTS(ctx, bulk, ", ");
+ grn_text_itoa_padded(ctx, bulk, t->tm_mday, '0', 2);
+ GRN_TEXT_PUTS(ctx, bulk, " ");
+ GRN_TEXT_PUT(ctx, bulk, months[t->tm_mon], 3);
+ GRN_TEXT_PUTS(ctx, bulk, " ");
+ grn_text_itoa(ctx, bulk, t->tm_year + 1900);
+ GRN_TEXT_PUTS(ctx, bulk, " ");
+ grn_text_itoa_padded(ctx, bulk, t->tm_hour, '0', 2);
+ GRN_TEXT_PUTS(ctx, bulk, ":");
+ grn_text_itoa_padded(ctx, bulk, t->tm_min, '0', 2);
+ GRN_TEXT_PUTS(ctx, bulk, ":");
+ grn_text_itoa_padded(ctx, bulk, t->tm_sec, '0', 2);
+ GRN_TEXT_PUTS(ctx, bulk, " GMT");
+ } else {
+ GRN_TEXT_SETS(ctx, bulk, "Mon, 16 Mar 1980 20:40:00 GMT");
+ }
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_text_printf(grn_ctx *ctx, grn_obj *bulk, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ grn_text_vprintf(ctx, bulk, format, args);
+ va_end(args);
+
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_text_vprintf(grn_ctx *ctx, grn_obj *bulk, const char *format, va_list args)
+{
+ grn_bool is_written = GRN_FALSE;
+ int written_size;
+
+ {
+ int rest_size;
+ va_list copied_args;
+
+ rest_size = GRN_BULK_REST(bulk);
+ va_copy(copied_args, args);
+ written_size = vsnprintf(GRN_BULK_CURR(bulk), rest_size,
+ format, copied_args);
+ va_end(copied_args);
+
+ if (0 <= written_size && written_size < rest_size) {
+ is_written = GRN_TRUE;
+ }
+ }
+
+ if (!is_written) {
+#ifdef WIN32
+# define N_NEW_SIZES 3
+ int i;
+ int new_sizes[N_NEW_SIZES];
+
+ new_sizes[0] = GRN_BULK_REST(bulk) + strlen(format) * 2;
+ new_sizes[1] = new_sizes[0] + 4096;
+ new_sizes[2] = new_sizes[0] + 65536;
+
+ for (i = 0; i < N_NEW_SIZES; i++) {
+ grn_rc rc;
+ int new_size = new_sizes[i];
+ va_list copied_args;
+
+ rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + new_size);
+ if (rc) {
+ return rc;
+ }
+ va_copy(copied_args, args);
+ written_size = vsnprintf(GRN_BULK_CURR(bulk), new_size,
+ format, copied_args);
+ va_end(copied_args);
+ if (written_size != -1) {
+ break;
+ }
+ }
+# undef N_NEW_SIZES
+#else /* WIN32 */
+ grn_rc rc;
+ int required_size = written_size + 1; /* "+ 1" for terminate '\0'. */
+
+ rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + required_size);
+ if (rc) {
+ return rc;
+ }
+ written_size = vsnprintf(GRN_BULK_CURR(bulk), required_size,
+ format, args);
+#endif /* WIN32 */
+ }
+
+ if (written_size < 0) {
+ return GRN_INVALID_ARGUMENT;
+ }
+
+ GRN_BULK_INCR_LEN(bulk, written_size);
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_bulk_fin(grn_ctx *ctx, grn_obj *buf)
+{
+ if (!(buf->header.impl_flags & GRN_OBJ_REFER)) {
+ if (GRN_BULK_OUTP(buf) && buf->u.b.head) {
+ GRN_REALLOC(buf->u.b.head - grn_bulk_margin_size, 0);
+ }
+ }
+ buf->header.flags = 0;
+ buf->header.impl_flags &= ~GRN_OBJ_DO_SHALLOW_COPY;
+ buf->u.b.head = NULL;
+ buf->u.b.curr = NULL;
+ buf->u.b.tail = NULL;
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_substring(grn_ctx *ctx, char **str, char **str_end, int start, int end, grn_encoding encoding)
+{
+ int i;
+ size_t l;
+ char *s = *str, *e = *str_end;
+ for (i = 0; s < e; i++, s += l) {
+ if (i == start) { *str = s; }
+ if (!(l = grn_charlen(ctx, s, e))) {
+ return GRN_INVALID_ARGUMENT;
+ }
+ if (i == end) {
+ *str_end = s;
+ break;
+ }
+ }
+ return GRN_SUCCESS;
+}
+
+static void
+grn_text_atoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_id id)
+{
+ uint32_t vs;
+ grn_obj buf;
+ if (obj->header.type == GRN_ACCESSOR) {
+ grn_accessor *a = (grn_accessor *)obj;
+ GRN_TEXT_INIT(&buf, 0);
+ for (;;) {
+ GRN_BULK_REWIND(&buf);
+ switch (a->action) {
+ case GRN_ACCESSOR_GET_ID :
+ GRN_UINT32_PUT(ctx, &buf, id);
+ buf.header.domain = GRN_DB_UINT32;
+ break;
+ case GRN_ACCESSOR_GET_KEY :
+ grn_table_get_key2(ctx, a->obj, id, &buf);
+ buf.header.domain = DB_OBJ(a->obj)->header.domain;
+ break;
+ case GRN_ACCESSOR_GET_VALUE :
+ grn_obj_get_value(ctx, a->obj, id, &buf);
+ buf.header.domain = GRN_DB_INT32; /* fix me */
+ break;
+ case GRN_ACCESSOR_GET_SCORE :
+ {
+ grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
+ int32_t int32_score = ri->score;
+ GRN_INT32_PUT(ctx, &buf, int32_score);
+ }
+ buf.header.domain = GRN_DB_INT32;
+ break;
+ case GRN_ACCESSOR_GET_NSUBRECS :
+ {
+ grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
+ GRN_INT32_PUT(ctx, &buf, ri->n_subrecs);
+ }
+ buf.header.domain = GRN_DB_INT32;
+ break;
+ case GRN_ACCESSOR_GET_COLUMN_VALUE :
+ if ((a->obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
+ if (a->next) {
+ grn_id *idp;
+ grn_obj_get_value(ctx, a->obj, id, &buf);
+ idp = (grn_id *)GRN_BULK_HEAD(&buf);
+ GRN_TEXT_PUTC(ctx, bulk, '[');
+ for (vs = GRN_BULK_VSIZE(&buf) / sizeof(grn_id); vs--; idp++) {
+ grn_text_atoj(ctx, bulk, (grn_obj *)a->next, *idp);
+ if (vs) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ } else {
+ grn_text_atoj(ctx, bulk, a->obj, id);
+ }
+ goto exit;
+ } else {
+ grn_obj_get_value(ctx, a->obj, id, &buf);
+ }
+ break;
+ case GRN_ACCESSOR_GET_DB_OBJ :
+ /* todo */
+ break;
+ case GRN_ACCESSOR_LOOKUP :
+ /* todo */
+ break;
+ case GRN_ACCESSOR_FUNCALL :
+ /* todo */
+ break;
+ }
+ if (a->next) {
+ a = a->next;
+ id = *((grn_id *)GRN_BULK_HEAD(&buf));
+ } else {
+ break;
+ }
+ }
+ } else {
+ switch (obj->header.type) {
+ case GRN_COLUMN_FIX_SIZE :
+ GRN_VALUE_FIX_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
+ break;
+ case GRN_COLUMN_VAR_SIZE :
+ if ((obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
+ grn_obj *range = grn_ctx_at(ctx, DB_OBJ(obj)->range);
+ if (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) {
+ GRN_VALUE_VAR_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
+ } else {
+ GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
+ }
+ } else {
+ GRN_VALUE_VAR_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
+ }
+ break;
+ case GRN_COLUMN_INDEX :
+ GRN_UINT32_INIT(&buf, 0);
+ break;
+ default:
+ GRN_TEXT_INIT(&buf, 0);
+ break;
+ }
+ grn_obj_get_value(ctx, obj, id, &buf);
+ }
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+exit :
+ grn_obj_close(ctx, &buf);
+}
+
+grn_rc
+grn_text_otoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_obj_format *format)
+{
+ grn_obj buf;
+ GRN_TEXT_INIT(&buf, 0);
+ switch (obj->header.type) {
+ case GRN_BULK :
+ switch (obj->header.domain) {
+ case GRN_DB_VOID :
+ case GRN_DB_SHORT_TEXT :
+ case GRN_DB_TEXT :
+ case GRN_DB_LONG_TEXT :
+ grn_text_esc(ctx, bulk, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj));
+ break;
+ case GRN_DB_BOOL :
+ if (*((unsigned char *)GRN_BULK_HEAD(obj))) {
+ GRN_TEXT_PUTS(ctx, bulk, "true");
+ } else {
+ GRN_TEXT_PUTS(ctx, bulk, "false");
+ }
+ break;
+ case GRN_DB_INT8 :
+ grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT8_VALUE(obj) : 0);
+ break;
+ case GRN_DB_UINT8 :
+ grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0);
+ break;
+ case GRN_DB_INT16 :
+ grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT16_VALUE(obj) : 0);
+ break;
+ case GRN_DB_UINT16 :
+ grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT16_VALUE(obj) : 0);
+ break;
+ case GRN_DB_INT32 :
+ grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT32_VALUE(obj) : 0);
+ break;
+ case GRN_DB_UINT32 :
+ grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT32_VALUE(obj) : 0);
+ break;
+ case GRN_DB_INT64 :
+ grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0);
+ break;
+ case GRN_DB_UINT64 :
+ grn_text_ulltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT64_VALUE(obj) : 0);
+ break;
+ case GRN_DB_FLOAT :
+ grn_text_ftoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_FLOAT_VALUE(obj) : 0);
+ break;
+ case GRN_DB_TIME :
+ {
+ double dv = *((int64_t *)GRN_BULK_HEAD(obj));
+ dv /= 1000000.0;
+ grn_text_ftoa(ctx, bulk, dv);
+ }
+ break;
+ case GRN_DB_TOKYO_GEO_POINT :
+ case GRN_DB_WGS84_GEO_POINT :
+ if (GRN_BULK_VSIZE(obj) == sizeof(grn_geo_point)) {
+ grn_geo_point *gp = (grn_geo_point *)GRN_BULK_HEAD(obj);
+ GRN_TEXT_PUTC(ctx, bulk, '"');
+ grn_text_itoa(ctx, bulk, gp->latitude);
+ GRN_TEXT_PUTC(ctx, bulk, 'x');
+ grn_text_itoa(ctx, bulk, gp->longitude);
+ GRN_TEXT_PUTC(ctx, bulk, '"');
+ } else {
+ GRN_TEXT_PUTS(ctx, bulk, "\"\"");
+ }
+ break;
+ default :
+ if (format) {
+ int j;
+ int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
+ grn_id id = GRN_RECORD_VALUE(obj);
+ grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
+ if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
+ GRN_TEXT_PUTS(ctx, bulk, "[");
+ for (j = 0; j < ncolumns; j++) {
+ grn_id range_id;
+ if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ GRN_TEXT_PUTS(ctx, bulk, "[");
+ GRN_BULK_REWIND(&buf);
+ grn_column_name_(ctx, columns[j], &buf);
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+ GRN_TEXT_PUTC(ctx, bulk, ',');
+ /* column range */
+ range_id = grn_obj_get_range(ctx, columns[j]);
+ if (range_id == GRN_ID_NIL) {
+ GRN_TEXT_PUTS(ctx, bulk, "null");
+ } else {
+ int name_len;
+ grn_obj *range_obj;
+ char name_buf[GRN_TABLE_MAX_KEY_SIZE];
+
+ range_obj = grn_ctx_at(ctx, range_id);
+ name_len = grn_obj_name(ctx, range_obj, name_buf,
+ GRN_TABLE_MAX_KEY_SIZE);
+ GRN_BULK_REWIND(&buf);
+ GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+ }
+ GRN_TEXT_PUTS(ctx, bulk, "]");
+ }
+ GRN_TEXT_PUTS(ctx, bulk, "],");
+ }
+ GRN_TEXT_PUTC(ctx, bulk, '[');
+ for (j = 0; j < ncolumns; j++) {
+ if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ grn_text_atoj(ctx, bulk, columns[j], id);
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ } else {
+ if (GRN_BULK_VSIZE(obj) == 0) {
+ GRN_TEXT_PUTS(ctx, bulk, "null");
+ } else {
+ grn_obj *table = grn_ctx_at(ctx, obj->header.domain);
+ grn_id id = GRN_RECORD_VALUE(obj);
+ if (table && table->header.type != GRN_TABLE_NO_KEY) {
+ /* todo : temporal patch. grn_table_at() is kinda costful... */
+ if (grn_table_at(ctx, table, id)) {
+ grn_obj *accessor = grn_obj_column(ctx, table,
+ GRN_COLUMN_NAME_KEY,
+ GRN_COLUMN_NAME_KEY_LEN);
+ if (accessor) {
+ grn_obj_get_value(ctx, accessor, id, &buf);
+ grn_obj_unlink(ctx, accessor);
+ }
+ }
+ grn_text_otoj(ctx, bulk, &buf, format);
+ } else {
+ grn_text_lltoa(ctx, bulk, id);
+ }
+ }
+ }
+ }
+ break;
+ case GRN_UVECTOR :
+ if (format) {
+ if (format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT) {
+ int i, n;
+ grn_obj *domain;
+
+ n = grn_uvector_size(ctx, obj);
+ domain = grn_ctx_at(ctx, obj->header.domain);
+ GRN_TEXT_PUTS(ctx, bulk, "{");
+ for (i = 0; i < n; i++) {
+ grn_id id;
+ unsigned int weight;
+
+ if (i > 0) {
+ GRN_TEXT_PUTC(ctx, bulk, ',');
+ }
+ id = grn_uvector_get_element(ctx, obj, i, &weight);
+ if (domain) {
+ if (domain->header.type == GRN_TABLE_NO_KEY) {
+ GRN_TEXT_PUTC(ctx, bulk, '"');
+ grn_text_ulltoa(ctx, bulk, id);
+ GRN_TEXT_PUTC(ctx, bulk, '"');
+ } else {
+ GRN_BULK_REWIND(&buf);
+ grn_table_get_key2(ctx, domain, id, &buf);
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+ }
+ } else {
+ GRN_TEXT_PUTC(ctx, bulk, '"');
+ grn_text_ulltoa(ctx, bulk, id);
+ GRN_TEXT_PUTC(ctx, bulk, '"');
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ':');
+ grn_text_ulltoa(ctx, bulk, weight);
+ }
+ GRN_TEXT_PUTS(ctx, bulk, "}");
+ } else {
+ /* TODO: Does we still need this code? If we don't need this, we should
+ remove this. */
+ int i, j;
+ grn_id *v = (grn_id *)GRN_BULK_HEAD(obj), *ve = (grn_id *)GRN_BULK_CURR(obj);
+ int ncolumns = GRN_BULK_VSIZE(&format->columns) / sizeof(grn_obj *);
+ grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
+ GRN_TEXT_PUTS(ctx, bulk, "[[");
+ grn_text_itoa(ctx, bulk, ve - v);
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ if (v < ve) {
+ if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
+ GRN_TEXT_PUTS(ctx, bulk, ",[");
+ for (j = 0; j < ncolumns; j++) {
+ grn_id range_id;
+ if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ GRN_TEXT_PUTS(ctx, bulk, "[");
+ GRN_BULK_REWIND(&buf);
+ grn_column_name_(ctx, columns[j], &buf);
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+ GRN_TEXT_PUTC(ctx, bulk, ',');
+ /* column range */
+ range_id = grn_obj_get_range(ctx, columns[j]);
+ if (range_id == GRN_ID_NIL) {
+ GRN_TEXT_PUTS(ctx, bulk, "null");
+ } else {
+ int name_len;
+ grn_obj *range_obj;
+ char name_buf[GRN_TABLE_MAX_KEY_SIZE];
+
+ range_obj = grn_ctx_at(ctx, range_id);
+ name_len = grn_obj_name(ctx, range_obj, name_buf,
+ GRN_TABLE_MAX_KEY_SIZE);
+ GRN_BULK_REWIND(&buf);
+ GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+ }
+ GRN_TEXT_PUTS(ctx, bulk, "]");
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ }
+ for (i = 0;; i++) {
+ GRN_TEXT_PUTS(ctx, bulk, ",[");
+ for (j = 0; j < ncolumns; j++) {
+ if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ GRN_BULK_REWIND(&buf);
+ grn_obj_get_value(ctx, columns[j], *v, &buf);
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ v++;
+ if (v < ve) {
+ GRN_TEXT_PUTC(ctx, bulk, ',');
+ } else {
+ break;
+ }
+ }
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ }
+ } else {
+ grn_obj *range = grn_ctx_at(ctx, obj->header.domain);
+ if (range && range->header.type == GRN_TYPE) {
+ grn_id value_size = ((struct _grn_type *)range)->obj.range;
+ char *v = (char *)GRN_BULK_HEAD(obj),
+ *ve = (char *)GRN_BULK_CURR(obj);
+ GRN_TEXT_PUTC(ctx, bulk, '[');
+ if (v < ve) {
+ for (;;) {
+ grn_obj value;
+ GRN_OBJ_INIT(&value, GRN_BULK, 0, obj->header.domain);
+ grn_bulk_write_from(ctx, &value, v, 0, value_size);
+ grn_text_otoj(ctx, bulk, &value, NULL);
+
+ v += value_size;
+ if (v < ve) {
+ GRN_TEXT_PUTC(ctx, bulk, ',');
+ } else {
+ break;
+ }
+ }
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ } else {
+ grn_id *v = (grn_id *)GRN_BULK_HEAD(obj),
+ *ve = (grn_id *)GRN_BULK_CURR(obj);
+ GRN_TEXT_PUTC(ctx, bulk, '[');
+ if (v < ve) {
+ for (;;) {
+ if (range->header.type != GRN_TABLE_NO_KEY) {
+ grn_obj key;
+ GRN_OBJ_INIT(&key, GRN_BULK, 0, range->header.domain);
+ grn_table_get_key2(ctx, range, *v, &key);
+ grn_text_otoj(ctx, bulk, &key, NULL);
+ GRN_OBJ_FIN(ctx, &key);
+ } else {
+ grn_text_lltoa(ctx, bulk, *v);
+ }
+ v++;
+ if (v < ve) {
+ GRN_TEXT_PUTC(ctx, bulk, ',');
+ } else {
+ break;
+ }
+ }
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ }
+ }
+ break;
+ case GRN_VECTOR :
+ if (obj->header.domain == GRN_DB_VOID) {
+ ERR(GRN_INVALID_ARGUMENT, "invalid obj->header.domain");
+ } else {
+ unsigned int i, n;
+ grn_obj value;
+ grn_obj weight;
+ grn_bool with_weight;
+
+ GRN_VOID_INIT(&value);
+ GRN_UINT32_INIT(&weight, 0);
+ with_weight = (format && format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT);
+ n = grn_vector_size(ctx, obj);
+ if (with_weight) {
+ GRN_TEXT_PUTC(ctx, bulk, '{');
+ } else {
+ GRN_TEXT_PUTC(ctx, bulk, '[');
+ }
+ for (i = 0; i < n; i++) {
+ const char *_value;
+ unsigned int _weight, length;
+ grn_id domain;
+ if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+
+ length = grn_vector_get_element(ctx, obj, i,
+ &_value, &_weight, &domain);
+ if (domain != GRN_DB_VOID) {
+ grn_obj_reinit(ctx, &value, domain, 0);
+ } else {
+ grn_obj_reinit(ctx, &value, obj->header.domain, 0);
+ }
+ grn_bulk_write(ctx, &value, _value, length);
+ grn_text_otoj(ctx, bulk, &value, NULL);
+ if (with_weight) {
+ GRN_TEXT_PUTC(ctx, bulk, ':');
+ GRN_UINT32_SET(ctx, &weight, _weight);
+ grn_text_otoj(ctx, bulk, &weight, NULL);
+ }
+ }
+ if (with_weight) {
+ GRN_TEXT_PUTC(ctx, bulk, '}');
+ } else {
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ }
+ GRN_OBJ_FIN(ctx, &value);
+ GRN_OBJ_FIN(ctx, &weight);
+ }
+ break;
+ case GRN_PVECTOR :
+ if (format) {
+ ERR(GRN_FUNCTION_NOT_IMPLEMENTED,
+ "cannot print GRN_PVECTOR using grn_obj_format");
+ } else {
+ unsigned int i, n;
+ GRN_TEXT_PUTC(ctx, bulk, '[');
+ n = GRN_BULK_VSIZE(obj) / sizeof(grn_obj *);
+ for (i = 0; i < n; i++) {
+ grn_obj *value;
+
+ if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ value = GRN_PTR_VALUE_AT(obj, i);
+ grn_text_otoj(ctx, bulk, value, NULL);
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ }
+ break;
+ case GRN_TABLE_HASH_KEY :
+ case GRN_TABLE_PAT_KEY :
+ case GRN_TABLE_NO_KEY :
+ if (format) {
+ int i, j;
+ int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
+ grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
+ grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
+ format->offset, format->limit,
+ GRN_CURSOR_ASCENDING);
+ if (!tc) { ERRCLR(ctx); }
+ GRN_TEXT_PUTS(ctx, bulk, "[[");
+ grn_text_itoa(ctx, bulk, format->nhits);
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
+ GRN_TEXT_PUTS(ctx, bulk, ",[");
+ for (j = 0; j < ncolumns; j++) {
+ grn_id range_id;
+ if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ GRN_TEXT_PUTS(ctx, bulk, "[");
+ GRN_BULK_REWIND(&buf);
+ grn_column_name_(ctx, columns[j], &buf);
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+ GRN_TEXT_PUTC(ctx, bulk, ',');
+ /* column range */
+ range_id = grn_obj_get_range(ctx, columns[j]);
+ if (range_id == GRN_ID_NIL) {
+ GRN_TEXT_PUTS(ctx, bulk, "null");
+ } else {
+ int name_len;
+ grn_obj *range_obj;
+ char name_buf[GRN_TABLE_MAX_KEY_SIZE];
+
+ range_obj = grn_ctx_at(ctx, range_id);
+ name_len = grn_obj_name(ctx, range_obj, name_buf,
+ GRN_TABLE_MAX_KEY_SIZE);
+ GRN_BULK_REWIND(&buf);
+ GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
+ grn_text_otoj(ctx, bulk, &buf, NULL);
+ }
+ GRN_TEXT_PUTS(ctx, bulk, "]");
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ }
+ if (tc) {
+ grn_id id;
+ for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) {
+ GRN_TEXT_PUTS(ctx, bulk, ",[");
+ for (j = 0; j < ncolumns; j++) {
+ if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ grn_text_atoj(ctx, bulk, columns[j], id);
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ }
+ grn_table_cursor_close(ctx, tc);
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ } else {
+ int i;
+ grn_id id;
+ grn_obj *column = grn_obj_column(ctx, obj,
+ GRN_COLUMN_NAME_KEY,
+ GRN_COLUMN_NAME_KEY_LEN);
+ grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
+ 0, -1, GRN_CURSOR_ASCENDING);
+ GRN_TEXT_PUTC(ctx, bulk, '[');
+ if (tc) {
+ for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) {
+ if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
+ GRN_BULK_REWIND(&buf);
+ grn_obj_get_value(ctx, column, id, &buf);
+ grn_text_esc(ctx, bulk, GRN_BULK_HEAD(&buf), GRN_BULK_VSIZE(&buf));
+ }
+ grn_table_cursor_close(ctx, tc);
+ }
+ GRN_TEXT_PUTC(ctx, bulk, ']');
+ grn_obj_unlink(ctx, column);
+ }
+ break;
+ }
+ grn_obj_close(ctx, &buf);
+ return GRN_SUCCESS;
+}
+
+const char *
+grn_text_urldec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e, char d)
+{
+ while (p < e) {
+ if (*p == d) {
+ p++; break;
+ } else if (*p == '%' && p + 3 <= e) {
+ const char *r;
+ unsigned int c = grn_htoui(p + 1, p + 3, &r);
+ if (p + 3 == r) {
+ GRN_TEXT_PUTC(ctx, buf, c);
+ p += 3;
+ } else {
+ GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)", p[1], p[2]);
+ GRN_TEXT_PUTC(ctx, buf, '%');
+ p += 1;
+ }
+ } else {
+ GRN_TEXT_PUTC(ctx, buf, *p);
+ p++;
+ }
+ }
+ return p;
+}
+
+const char *
+grn_text_cgidec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e,
+ const char *delimiters)
+{
+ while (p < e) {
+ grn_bool found_delimiter = GRN_FALSE;
+ const char *delimiter;
+ for (delimiter = delimiters; *delimiter; delimiter++) {
+ if (*p == *delimiter) {
+ found_delimiter = GRN_TRUE;
+ break;
+ }
+ }
+ if (found_delimiter) {
+ p++;
+ break;
+ }
+
+ if (*p == '+') {
+ GRN_TEXT_PUTC(ctx, buf, ' ');
+ p++;
+ } else if (*p == '%' && p + 3 <= e) {
+ const char *r;
+ unsigned int c = grn_htoui(p + 1, p + 3, &r);
+ if (p + 3 == r) {
+ GRN_TEXT_PUTC(ctx, buf, c);
+ p += 3;
+ } else {
+ GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)", p[1], p[2]);
+ GRN_TEXT_PUTC(ctx, buf, '%');
+ p += 1;
+ }
+ } else {
+ GRN_TEXT_PUTC(ctx, buf, *p);
+ p++;
+ }
+ }
+ return p;
+}
+
+void
+grn_str_url_path_normalize(grn_ctx *ctx, const char *path, size_t path_len,
+ char *buf, size_t buf_len)
+{
+ char *b = buf, *be = buf + buf_len - 1;
+ const char *p = path, *pe = path + path_len, *pc;
+
+ if (buf_len < 2) { return; }
+
+ while (p < pe) {
+ for (pc = p; pc < pe && *pc != '/'; pc++) {}
+ if (*p == '.') {
+ if (pc == p + 2 && *(p + 1) == '.') {
+ /* '..' */
+ if (b - buf >= 2) {
+ for (b -= 2; *b != '/' && b >= buf; b--) {}
+ }
+ if (*b == '/') {
+ b++;
+ ERR(GRN_INVALID_ARGUMENT, "parent path doesn't exist.");
+ }
+ p = pc + 1;
+ continue;
+ } else if (pc == p + 1) {
+ /* '.' */
+ p = pc + 1;
+ continue;
+ }
+ }
+ if (be - b >= pc - p) {
+ grn_memcpy(b, p, (pc - p));
+ b += pc - p;
+ p = pc;
+ if (p < pe && *pc == '/' && be > b) {
+ *b++ = '/';
+ p++;
+ }
+ }
+ }
+ *b = '\0';
+}
+
+grn_bool
+grn_bulk_is_zero(grn_ctx *ctx, grn_obj *obj)
+{
+ const char *v = GRN_BULK_HEAD(obj);
+ unsigned int s = GRN_BULK_VSIZE(obj);
+ for (; s; s--, v++) {
+ if (*v) { return GRN_FALSE; }
+ }
+ return GRN_TRUE;
+}
+