diff options
Diffstat (limited to 'libraries/liblunicode/ucdata/ucpgba.c')
-rw-r--r-- | libraries/liblunicode/ucdata/ucpgba.c | 750 |
1 files changed, 750 insertions, 0 deletions
diff --git a/libraries/liblunicode/ucdata/ucpgba.c b/libraries/liblunicode/ucdata/ucpgba.c new file mode 100644 index 0000000..04446ee --- /dev/null +++ b/libraries/liblunicode/ucdata/ucpgba.c @@ -0,0 +1,750 @@ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software <http://www.openldap.org/>. + * + * Copyright 1998-2018 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +/* Copyright 2001 Computing Research Labs, New Mexico State University + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT + * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +/* $Id: ucpgba.c,v 1.5 2001/01/02 18:46:20 mleisher Exp $ */ + +#include "portable.h" + +#include <stdio.h> +#include <stdlib.h> + +#include "ucdata.h" +#include "ucpgba.h" + +/* + * These macros are used while reordering of RTL runs of text for the + * special case of non-spacing characters being in runs of weakly + * directional text. They check for weak and non-spacing, and digits and + * non-spacing. + */ +#define ISWEAKSPECIAL(cc) ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS) +#define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0) + +/* + * These macros are used while breaking a string into runs of text in + * different directions. Descriptions: + * + * ISLTR_LTR - Test for members of an LTR run in an LTR context. This looks + * for characters with ltr, non-spacing, weak, and neutral + * properties. + * + * ISRTL_RTL - Test for members of an RTL run in an RTL context. This looks + * for characters with rtl, non-spacing, weak, and neutral + * properties. + * + * ISRTL_NEUTRAL - Test for RTL or neutral characters. + * + * ISWEAK_NEUTRAL - Test for weak or neutral characters. + */ +#define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\ + UC_ET|UC_CS|UC_B|UC_S|UC_WS|UC_ON) + +#define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\ + UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON) + +#define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON) +#define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \ + UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS) + +/* + * This table is temporarily hard-coded here until it can be constructed + * automatically somehow. + */ +static unsigned long _symmetric_pairs[] = { + 0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C, + 0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B, + 0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D, + 0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008, + 0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C, + 0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010, + 0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016, + 0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A, + 0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59, + 0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D, + 0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B, + 0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62, +}; + +static int _symmetric_pairs_size = +sizeof(_symmetric_pairs)/sizeof(_symmetric_pairs[0]); + +/* + * This routine looks up the other form of a symmetric pair. + */ +static unsigned long +_ucsymmetric_pair(unsigned long c) +{ + int i; + + for (i = 0; i < _symmetric_pairs_size; i += 2) { + if (_symmetric_pairs[i] == c) + return _symmetric_pairs[i+1]; + } + return c; +} + +/* + * This routine creates a new run, copies the text into it, links it into the + * logical text order chain and returns it to the caller to be linked into + * the visual text order chain. + */ +static ucrun_t * +_add_run(ucstring_t *str, unsigned long *src, + unsigned long start, unsigned long end, int direction) +{ + long i, t; + ucrun_t *run; + + run = (ucrun_t *) malloc(sizeof(ucrun_t)); + run->visual_next = run->visual_prev = 0; + run->direction = direction; + + run->cursor = ~0; + + run->chars = (unsigned long *) + malloc(sizeof(unsigned long) * ((end - start) << 1)); + run->positions = run->chars + (end - start); + + run->source = src; + run->start = start; + run->end = end; + + if (direction == UCPGBA_RTL) { + /* + * Copy the source text into the run in reverse order and select + * replacements for the pairwise punctuation and the <> characters. + */ + for (i = 0, t = end - 1; start < end; start++, t--, i++) { + run->positions[i] = t; + if (ucissymmetric(src[t]) || src[t] == '<' || src[t] == '>') + run->chars[i] = _ucsymmetric_pair(src[t]); + else + run->chars[i] = src[t]; + } + } else { + /* + * Copy the source text into the run directly. + */ + for (i = start; i < end; i++) { + run->positions[i - start] = i; + run->chars[i - start] = src[i]; + } + } + + /* + * Add the run to the logical list for cursor traversal. + */ + if (str->logical_first == 0) + str->logical_first = str->logical_last = run; + else { + run->logical_prev = str->logical_last; + str->logical_last->logical_next = run; + str->logical_last = run; + } + + return run; +} + +static void +_ucadd_rtl_segment(ucstring_t *str, unsigned long *source, unsigned long start, + unsigned long end) +{ + unsigned long s, e; + ucrun_t *run, *lrun; + + /* + * This is used to splice runs into strings with overall LTR direction. + * The `lrun' variable will never be NULL because at least one LTR run was + * added before this RTL run. + */ + lrun = str->visual_last; + + for (e = s = start; s < end;) { + for (; e < end && ISRTL_NEUTRAL(source[e]); e++) ; + + if (e > s) { + run = _add_run(str, source, s, e, UCPGBA_RTL); + + /* + * Add the run to the visual list for cursor traversal. + */ + if (str->visual_first != 0) { + if (str->direction == UCPGBA_LTR) { + run->visual_prev = lrun; + run->visual_next = lrun->visual_next; + if (lrun->visual_next != 0) + lrun->visual_next->visual_prev = run; + lrun->visual_next = run; + if (lrun == str->visual_last) + str->visual_last = run; + } else { + run->visual_next = str->visual_first; + str->visual_first->visual_prev = run; + str->visual_first = run; + } + } else + str->visual_first = str->visual_last = run; + } + + /* + * Handle digits in a special way. This makes sure the weakly + * directional characters appear on the expected sides of a number + * depending on whether that number is Arabic or not. + */ + for (s = e; e < end && ISWEAKSPECIAL(source[e]); e++) { + if (!ISDIGITSPECIAL(source[e]) && + (e + 1 == end || !ISDIGITSPECIAL(source[e + 1]))) + break; + } + + if (e > s) { + run = _add_run(str, source, s, e, UCPGBA_LTR); + + /* + * Add the run to the visual list for cursor traversal. + */ + if (str->visual_first != 0) { + if (str->direction == UCPGBA_LTR) { + run->visual_prev = lrun; + run->visual_next = lrun->visual_next; + if (lrun->visual_next != 0) + lrun->visual_next->visual_prev = run; + lrun->visual_next = run; + if (lrun == str->visual_last) + str->visual_last = run; + } else { + run->visual_next = str->visual_first; + str->visual_first->visual_prev = run; + str->visual_first = run; + } + } else + str->visual_first = str->visual_last = run; + } + + /* + * Collect all weak non-digit sequences for an RTL segment. These + * will appear as part of the next RTL segment or will be added as + * an RTL segment by themselves. + */ + for (s = e; e < end && ucisweak(source[e]) && !ucisdigit(source[e]); + e++) ; + } + + /* + * Capture any weak non-digit sequences that occur at the end of the RTL + * run. + */ + if (e > s) { + run = _add_run(str, source, s, e, UCPGBA_RTL); + + /* + * Add the run to the visual list for cursor traversal. + */ + if (str->visual_first != 0) { + if (str->direction == UCPGBA_LTR) { + run->visual_prev = lrun; + run->visual_next = lrun->visual_next; + if (lrun->visual_next != 0) + lrun->visual_next->visual_prev = run; + lrun->visual_next = run; + if (lrun == str->visual_last) + str->visual_last = run; + } else { + run->visual_next = str->visual_first; + str->visual_first->visual_prev = run; + str->visual_first = run; + } + } else + str->visual_first = str->visual_last = run; + } +} + +static void +_ucadd_ltr_segment(ucstring_t *str, unsigned long *source, unsigned long start, + unsigned long end) +{ + ucrun_t *run; + + run = _add_run(str, source, start, end, UCPGBA_LTR); + + /* + * Add the run to the visual list for cursor traversal. + */ + if (str->visual_first != 0) { + if (str->direction == UCPGBA_LTR) { + run->visual_prev = str->visual_last; + str->visual_last->visual_next = run; + str->visual_last = run; + } else { + run->visual_next = str->visual_first; + str->visual_first->visual_prev = run; + str->visual_first = run; + } + } else + str->visual_first = str->visual_last = run; +} + +ucstring_t * +ucstring_create(unsigned long *source, unsigned long start, unsigned long end, + int default_direction, int cursor_motion) +{ + int rtl_first; + unsigned long s, e, ld; + ucstring_t *str; + + str = (ucstring_t *) malloc(sizeof(ucstring_t)); + + /* + * Set the initial values. + */ + str->cursor_motion = cursor_motion; + str->logical_first = str->logical_last = 0; + str->visual_first = str->visual_last = str->cursor = 0; + str->source = source; + str->start = start; + str->end = end; + + /* + * If the length of the string is 0, then just return it at this point. + */ + if (start == end) + return str; + + /* + * This flag indicates whether the collection loop for RTL is called + * before the LTR loop the first time. + */ + rtl_first = 0; + + /* + * Look for the first character in the string that has strong + * directionality. + */ + for (s = start; s < end && !ucisstrong(source[s]); s++) ; + + if (s == end) + /* + * If the string contains no characters with strong directionality, use + * the default direction. + */ + str->direction = default_direction; + else + str->direction = ucisrtl(source[s]) ? UCPGBA_RTL : UCPGBA_LTR; + + if (str->direction == UCPGBA_RTL) + /* + * Set the flag that causes the RTL collection loop to run first. + */ + rtl_first = 1; + + /* + * This loop now separates the string into runs based on directionality. + */ + for (s = e = 0; s < end; s = e) { + if (!rtl_first) { + /* + * Determine the next run of LTR text. + */ + + ld = s; + while (e < end && ISLTR_LTR(source[e])) { + if (ucisdigit(source[e]) && + !(0x660 <= source[e] && source[e] <= 0x669)) + ld = e; + e++; + } + if (str->direction != UCPGBA_LTR) { + while (e > ld && ISWEAK_NEUTRAL(source[e - 1])) + e--; + } + + /* + * Add the LTR segment to the string. + */ + if (e > s) + _ucadd_ltr_segment(str, source, s, e); + } + + /* + * Determine the next run of RTL text. + */ + ld = s = e; + while (e < end && ISRTL_RTL(source[e])) { + if (ucisdigit(source[e]) && + !(0x660 <= source[e] && source[e] <= 0x669)) + ld = e; + e++; + } + if (str->direction != UCPGBA_RTL) { + while (e > ld && ISWEAK_NEUTRAL(source[e - 1])) + e--; + } + + /* + * Add the RTL segment to the string. + */ + if (e > s) + _ucadd_rtl_segment(str, source, s, e); + + /* + * Clear the flag that allowed the RTL collection loop to run first + * for strings with overall RTL directionality. + */ + rtl_first = 0; + } + + /* + * Set up the initial cursor run. + */ + str->cursor = str->logical_first; + if (str != 0) + str->cursor->cursor = (str->cursor->direction == UCPGBA_RTL) ? + str->cursor->end - str->cursor->start : 0; + + return str; +} + +void +ucstring_free(ucstring_t *s) +{ + ucrun_t *l, *r; + + if (s == 0) + return; + + for (l = 0, r = s->visual_first; r != 0; r = r->visual_next) { + if (r->end > r->start) + free((char *) r->chars); + if (l) + free((char *) l); + l = r; + } + if (l) + free((char *) l); + + free((char *) s); +} + +int +ucstring_set_cursor_motion(ucstring_t *str, int cursor_motion) +{ + int n; + + if (str == 0) + return -1; + + n = str->cursor_motion; + str->cursor_motion = cursor_motion; + return n; +} + +static int +_ucstring_visual_cursor_right(ucstring_t *str, int count) +{ + int cnt = count; + unsigned long size; + ucrun_t *cursor; + + if (str == 0) + return 0; + + cursor = str->cursor; + while (cnt > 0) { + size = cursor->end - cursor->start; + if ((cursor->direction == UCPGBA_RTL && cursor->cursor + 1 == size) || + cursor->cursor + 1 > size) { + /* + * If the next run is NULL, then the cursor is already on the + * far right end already. + */ + if (cursor->visual_next == 0) + /* + * If movement occured, then report it. + */ + return (cnt != count); + + /* + * Move to the next run. + */ + str->cursor = cursor = cursor->visual_next; + cursor->cursor = (cursor->direction == UCPGBA_RTL) ? -1 : 0; + size = cursor->end - cursor->start; + } else + cursor->cursor++; + cnt--; + } + return 1; +} + +static int +_ucstring_logical_cursor_right(ucstring_t *str, int count) +{ + int cnt = count; + unsigned long size; + ucrun_t *cursor; + + if (str == 0) + return 0; + + cursor = str->cursor; + while (cnt > 0) { + size = cursor->end - cursor->start; + if (str->direction == UCPGBA_RTL) { + if (cursor->direction == UCPGBA_RTL) { + if (cursor->cursor + 1 == size) { + if (cursor == str->logical_first) + /* + * Already at the beginning of the string. + */ + return (cnt != count); + + str->cursor = cursor = cursor->logical_prev; + size = cursor->end - cursor->start; + cursor->cursor = (cursor->direction == UCPGBA_LTR) ? + size : 0; + } else + cursor->cursor++; + } else { + if (cursor->cursor == 0) { + if (cursor == str->logical_first) + /* + * At the beginning of the string already. + */ + return (cnt != count); + + str->cursor = cursor = cursor->logical_prev; + size = cursor->end - cursor->start; + cursor->cursor = (cursor->direction == UCPGBA_LTR) ? + size : 0; + } else + cursor->cursor--; + } + } else { + if (cursor->direction == UCPGBA_RTL) { + if (cursor->cursor == 0) { + if (cursor == str->logical_last) + /* + * Already at the end of the string. + */ + return (cnt != count); + + str->cursor = cursor = cursor->logical_next; + size = cursor->end - cursor->start; + cursor->cursor = (cursor->direction == UCPGBA_LTR) ? + 0 : size - 1; + } else + cursor->cursor--; + } else { + if (cursor->cursor + 1 > size) { + if (cursor == str->logical_last) + /* + * Already at the end of the string. + */ + return (cnt != count); + + str->cursor = cursor = cursor->logical_next; + cursor->cursor = (cursor->direction == UCPGBA_LTR) ? + 0 : size - 1; + } else + cursor->cursor++; + } + } + cnt--; + } + return 1; +} + +int +ucstring_cursor_right(ucstring_t *str, int count) +{ + if (str == 0) + return 0; + return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ? + _ucstring_visual_cursor_right(str, count) : + _ucstring_logical_cursor_right(str, count); +} + +static int +_ucstring_visual_cursor_left(ucstring_t *str, int count) +{ + int cnt = count; + unsigned long size; + ucrun_t *cursor; + + if (str == 0) + return 0; + + cursor = str->cursor; + while (cnt > 0) { + size = cursor->end - cursor->start; + if ((cursor->direction == UCPGBA_LTR && cursor->cursor == 0) || + cursor->cursor - 1 < -1) { + /* + * If the preceding run is NULL, then the cursor is already on the + * far left end already. + */ + if (cursor->visual_prev == 0) + /* + * If movement occured, then report it. + */ + return (cnt != count); + + /* + * Move to the previous run. + */ + str->cursor = cursor = cursor->visual_prev; + size = cursor->end - cursor->start; + cursor->cursor = (cursor->direction == UCPGBA_RTL) ? + size : size - 1; + } else + cursor->cursor--; + cnt--; + } + return 1; +} + +static int +_ucstring_logical_cursor_left(ucstring_t *str, int count) +{ + int cnt = count; + unsigned long size; + ucrun_t *cursor; + + if (str == 0) + return 0; + + cursor = str->cursor; + while (cnt > 0) { + size = cursor->end - cursor->start; + if (str->direction == UCPGBA_RTL) { + if (cursor->direction == UCPGBA_RTL) { + if (cursor->cursor == -1) { + if (cursor == str->logical_last) + /* + * Already at the end of the string. + */ + return (cnt != count); + + str->cursor = cursor = cursor->logical_next; + size = cursor->end - cursor->start; + cursor->cursor = (cursor->direction == UCPGBA_LTR) ? + 0 : size - 1; + } else + cursor->cursor--; + } else { + if (cursor->cursor + 1 > size) { + if (cursor == str->logical_last) + /* + * At the end of the string already. + */ + return (cnt != count); + + str->cursor = cursor = cursor->logical_next; + size = cursor->end - cursor->start; + cursor->cursor = (cursor->direction == UCPGBA_LTR) ? + 0 : size - 1; + } else + cursor->cursor++; + } + } else { + if (cursor->direction == UCPGBA_RTL) { + if (cursor->cursor + 1 == size) { + if (cursor == str->logical_first) + /* + * Already at the beginning of the string. + */ + return (cnt != count); + + str->cursor = cursor = cursor->logical_prev; + size = cursor->end - cursor->start; + cursor->cursor = (cursor->direction == UCPGBA_LTR) ? + size : 0; + } else + cursor->cursor++; + } else { + if (cursor->cursor == 0) { + if (cursor == str->logical_first) + /* + * Already at the beginning of the string. + */ + return (cnt != count); + + str->cursor = cursor = cursor->logical_prev; + cursor->cursor = (cursor->direction == UCPGBA_LTR) ? + size : 0; + } else + cursor->cursor--; + } + } + cnt--; + } + return 1; +} + +int +ucstring_cursor_left(ucstring_t *str, int count) +{ + if (str == 0) + return 0; + return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ? + _ucstring_visual_cursor_left(str, count) : + _ucstring_logical_cursor_left(str, count); +} + +void +ucstring_cursor_info(ucstring_t *str, int *direction, unsigned long *position) +{ + long c; + unsigned long size; + ucrun_t *cursor; + + if (str == 0 || direction == 0 || position == 0) + return; + + cursor = str->cursor; + + *direction = cursor->direction; + + c = cursor->cursor; + size = cursor->end - cursor->start; + + if (c == size) + *position = (cursor->direction == UCPGBA_RTL) ? + cursor->start : cursor->positions[c - 1]; + else if (c == -1) + *position = (cursor->direction == UCPGBA_RTL) ? + cursor->end : cursor->start; + else + *position = cursor->positions[c]; +} |