summaryrefslogtreecommitdiffstats
path: root/third_party/heimdal/lib/wind/normalize.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/heimdal/lib/wind/normalize.c')
-rw-r--r--third_party/heimdal/lib/wind/normalize.c325
1 files changed, 325 insertions, 0 deletions
diff --git a/third_party/heimdal/lib/wind/normalize.c b/third_party/heimdal/lib/wind/normalize.c
new file mode 100644
index 0000000..8f3991d
--- /dev/null
+++ b/third_party/heimdal/lib/wind/normalize.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2004 Kungliga Tekniska Högskolan
+ * (Royal Institute of Technology, Stockholm, Sweden).
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the Institute nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "windlocl.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "roken.h"
+
+#include "normalize_table.h"
+
+static int
+translation_cmp(const void *key, const void *data)
+{
+ const struct translation *t1 = (const struct translation *)key;
+ const struct translation *t2 = (const struct translation *)data;
+
+ return t1->key - t2->key;
+}
+
+enum { s_base = 0xAC00};
+enum { s_count = 11172};
+enum { l_base = 0x1100};
+enum { l_count = 19};
+enum { v_base = 0x1161};
+enum { v_count = 21};
+enum { t_base = 0x11A7};
+enum { t_count = 28};
+enum { n_count = v_count * t_count};
+
+static int
+hangul_decomp(const uint32_t *in, size_t in_len,
+ uint32_t *out, size_t *out_len)
+{
+ uint32_t u = *in;
+ unsigned s_index;
+ unsigned l, v, t;
+ unsigned o;
+
+ if (u < s_base || u >= s_base + s_count)
+ return 0;
+ s_index = u - s_base;
+ l = l_base + s_index / n_count;
+ v = v_base + (s_index % n_count) / t_count;
+ t = t_base + s_index % t_count;
+ o = 2;
+ if (t != t_base)
+ ++o;
+ if (*out_len < o)
+ return WIND_ERR_OVERRUN;
+ out[0] = l;
+ out[1] = v;
+ if (t != t_base)
+ out[2] = t;
+ *out_len = o;
+ return 1;
+}
+
+static uint32_t
+hangul_composition(const uint32_t *in, size_t in_len)
+{
+ if (in_len < 2)
+ return 0;
+ if (in[0] >= l_base && in[0] < l_base + l_count) {
+ unsigned l_index = in[0] - l_base;
+ unsigned v_index;
+
+ if (in[1] < v_base || in[1] >= v_base + v_count)
+ return 0;
+ v_index = in[1] - v_base;
+ return (l_index * v_count + v_index) * t_count + s_base;
+ } else if (in[0] >= s_base && in[0] < s_base + s_count) {
+ unsigned s_index = in[0] - s_base;
+ unsigned t_index;
+
+ if (s_index % t_count != 0)
+ return 0;
+ if (in[1] < t_base || in[1] >= t_base + t_count)
+ return 0;
+ t_index = in[1] - t_base;
+ return in[0] + t_index;
+ }
+ return 0;
+}
+
+static int
+compat_decomp(const uint32_t *in, size_t in_len,
+ uint32_t *out, size_t *out_len)
+{
+ unsigned i;
+ unsigned o = 0;
+
+ for (i = 0; i < in_len; ++i) {
+ struct translation ts = {in[i], 0, 0};
+ size_t sub_len = *out_len - o;
+ int ret;
+
+ ret = hangul_decomp(in + i, in_len - i,
+ out + o, &sub_len);
+ if (ret) {
+ if (ret == WIND_ERR_OVERRUN)
+ return ret;
+ o += sub_len;
+ } else {
+ void *s = bsearch(&ts,
+ _wind_normalize_table,
+ _wind_normalize_table_size,
+ sizeof(_wind_normalize_table[0]),
+ translation_cmp);
+ if (s != NULL) {
+ const struct translation *t = (const struct translation *)s;
+
+ ret = compat_decomp(_wind_normalize_val_table + t->val_offset,
+ t->val_len,
+ out + o, &sub_len);
+ if (ret)
+ return ret;
+ o += sub_len;
+ } else {
+ if (o >= *out_len)
+ return WIND_ERR_OVERRUN;
+ out[o++] = in[i];
+
+ }
+ }
+ }
+ *out_len = o;
+ return 0;
+}
+
+static void
+swap_char(uint32_t * a, uint32_t * b)
+{
+ uint32_t t;
+ t = *a;
+ *a = *b;
+ *b = t;
+}
+
+/* Unicode 5.2.0 D109 Canonical Ordering for a sequence of code points
+ * that all have Canonical_Combining_Class > 0 */
+static void
+canonical_reorder_sequence(uint32_t * a, size_t len)
+{
+ size_t i, j;
+
+ if (len <= 1)
+ return;
+
+ for (i = 1; i < len; i++) {
+ for (j = i;
+ j > 0 &&
+ _wind_combining_class(a[j]) < _wind_combining_class(a[j-1]);
+ j--)
+ swap_char(&a[j], &a[j-1]);
+ }
+}
+
+static void
+canonical_reorder(uint32_t *tmp, size_t tmp_len)
+{
+ size_t i;
+
+ for (i = 0; i < tmp_len; ++i) {
+ int cc = _wind_combining_class(tmp[i]);
+ if (cc) {
+ size_t j;
+ for (j = i + 1;
+ j < tmp_len && _wind_combining_class(tmp[j]);
+ ++j)
+ ;
+ canonical_reorder_sequence(&tmp[i], j - i);
+ i = j;
+ }
+ }
+}
+
+static uint32_t
+find_composition(const uint32_t *in, unsigned in_len)
+{
+ unsigned short canon_index = 0;
+ uint32_t cur;
+ unsigned n = 0;
+
+ cur = hangul_composition(in, in_len);
+ if (cur)
+ return cur;
+
+ do {
+ const struct canon_node *c = &_wind_canon_table[canon_index];
+ unsigned i;
+
+ if (n % 5 == 0) {
+ if (in_len-- == 0)
+ return c->val;
+ cur = *in++;
+ }
+
+ i = cur >> 16;
+ if (i < c->next_start || i >= c->next_end)
+ canon_index = 0;
+ else
+ canon_index =
+ _wind_canon_next_table[c->next_offset + i - c->next_start];
+ if (canon_index != 0) {
+ cur = (cur << 4) & 0xFFFFF;
+ ++n;
+ }
+ } while (canon_index != 0);
+ return 0;
+}
+
+static int
+combine(const uint32_t *in, size_t in_len,
+ uint32_t *out, size_t *out_len)
+{
+ unsigned i;
+ int ostarter;
+ unsigned o = 0;
+ int old_cc;
+
+ for (i = 0; i < in_len;) {
+ while (i < in_len && _wind_combining_class(in[i]) != 0) {
+ out[o++] = in[i++];
+ }
+ if (i < in_len) {
+ if (o >= *out_len)
+ return WIND_ERR_OVERRUN;
+ ostarter = o;
+ out[o++] = in[i++];
+ old_cc = -1;
+
+ while (i < in_len) {
+ uint32_t comb;
+ uint32_t v[2];
+ int cc;
+
+ v[0] = out[ostarter];
+ v[1] = in[i];
+
+ cc = _wind_combining_class(in[i]);
+ if (old_cc != cc && (comb = find_composition(v, 2))) {
+ out[ostarter] = comb;
+ } else if (cc == 0) {
+ break;
+ } else {
+ if (o >= *out_len)
+ return WIND_ERR_OVERRUN;
+ out[o++] = in[i];
+ old_cc = cc;
+ }
+ ++i;
+ }
+ }
+ }
+ *out_len = o;
+ return 0;
+}
+
+int
+_wind_stringprep_normalize(const uint32_t *in, size_t in_len,
+ uint32_t *out, size_t *out_len)
+{
+ size_t tmp_len;
+ uint32_t *tmp;
+ int ret;
+
+ if (in_len == 0) {
+ *out_len = 0;
+ return 0;
+ }
+
+ tmp_len = in_len * 4;
+ if (tmp_len < MAX_LENGTH_CANON)
+ tmp_len = MAX_LENGTH_CANON;
+ tmp = malloc(tmp_len * sizeof(uint32_t));
+ if (tmp == NULL)
+ return ENOMEM;
+
+ ret = compat_decomp(in, in_len, tmp, &tmp_len);
+ if (ret) {
+ free(tmp);
+ return ret;
+ }
+ canonical_reorder(tmp, tmp_len);
+ ret = combine(tmp, tmp_len, out, out_len);
+ free(tmp);
+ return ret;
+}