diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/liblunicode/ucdata/api.txt | 401 |
1 files changed, 401 insertions, 0 deletions
diff --git a/libraries/liblunicode/ucdata/api.txt b/libraries/liblunicode/ucdata/api.txt new file mode 100644 index 0000000..f4be819 --- /dev/null +++ b/libraries/liblunicode/ucdata/api.txt @@ -0,0 +1,401 @@ +# +# $Id: api.txt,v 1.3 2001/01/02 18:46:20 mleisher Exp $ +# + + The MUTT UCData API + ------------------- + + +#### +NOTE: This library has been customized for use with OpenLDAP. The character +data tables are hardcoded into the library and the load/unload/reload +functions are no-ops. Also, the MUTT API claimed to be compatible with +John Cowan's library but its ucnumber behavior was broken. This has been +fixed in the OpenLDAP release. + +By default, the implementation specific properties in MUTTUCData.txt are +not incorporated into the OpenLDAP build. You can supply them to ucgendat +and recreate uctable.h if you need them. + -- hyc@openldap.org +#### + + +----------------------------------------------------------------------------- + +Macros that combine to select data tables for ucdata_load(), ucdata_unload(), +and ucdata_reload(). + +#define UCDATA_CASE 0x01 +#define UCDATA_CTYPE 0x02 +#define UCDATA_DECOMP 0x04 +#define UCDATA_CMBCL 0x08 +#define UCDATA_NUM 0x10 +#define UCDATA_COMP 0x20 +#define UCATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\ + UCDATA_CMBCL|UCDATA_NUM|UCDATA_COMP) +----------------------------------------------------------------------------- + +void ucdata_load(char *paths, int masks) + + This function initializes the UCData library by locating the data files in + one of the colon-separated directories in the `paths' parameter. The data + files to be loaded are specified in the `masks' parameter as a bitwise + combination of the macros listed above. + + This should be called before using any of the other functions. + + NOTE: the ucdata_setup(char *paths) function is now a macro that expands + into this function at compile time. + +----------------------------------------------------------------------------- + +void ucdata_unload(int masks) + + This function unloads the data tables specified in the `masks' parameter. + + This function should be called when the application is done using the UCData + package. + + NOTE: the ucdata_cleanup() function is now a macro that expands into this + function at compile time. + +----------------------------------------------------------------------------- + +void ucdata_reload(char *paths, int masks) + + This function reloads the data files from one of the colon-separated + directories in the `paths' parameter. The data files to be reloaded are + specified in the `masks' parameter as a bitwise combination of the macros + listed above. + + If the data files have already been loaded, they are unloaded before the + data files are loaded again. + +----------------------------------------------------------------------------- + +int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) + + This function determines if a character has a decomposition and returns the + decomposition information if it exists. + + If a zero is returned, there is no decomposition. If a non-zero is + returned, then the `num' and `decomp' variables are filled in with the + appropriate values. + + Example call: + + unsigned long i, num, *decomp; + + if (ucdecomp(0x1d5, &num, &decomp) != 0) { + for (i = 0; i < num; i++) + printf("0x%08lX,", decomp[i]); + putchar('\n'); + } + +int uccanondecomp(const unsigned long *in, int inlen, unsigned long **out, + int *outlen) + + This function decomposes an input string and does canonical reordering of + the characters at the same time. + + If a -1 is returned, memory allocation was not successful. If a zero is + returned, no decomposition occurred. Any other value means the output string + contains the fully decomposed string in canonical order. + + If the "outlen" parameter comes back with a value > 0, then the string + returned in the "out" parameter needs to be deallocated by the caller. + +----------------------------------------------------------------------------- + +int ucdecomp_hangul(unsigned long code, unsigned long *num, + unsigned long decomp[]) + + This function determines if a Hangul syllable has a decomposition and + returns the decomposition information. + + An array of at least size 3 should be passed to the function for the + decomposition of the syllable. + + If a zero is returned, the character is not a Hangul syllable. If a + non-zero is returned, the `num' field will be 2 or 3 and the syllable will + be decomposed into the `decomp' array arithmetically. + + Example call: + + unsigned long i, num, decomp[3]; + + if (ucdecomp_hangul(0xb1ba, &num, &decomp) != 0) { + for (i = 0; i < num; i++) + printf("0x%08lX,", decomp[i]); + putchar('\n'); + } + +----------------------------------------------------------------------------- + +int uccomp(unsigned long ch1, unsigned long ch2, unsigned long *comp) + + This function takes a pair of characters and determines if they combine to + form another character. + + If a zero is returned, no composition is formed by the character pair. Any + other value indicates the "comp" parameter has a value. + +int uccomp_hangul(unsigned long *str, int len) + + This function composes the Hangul Jamo in the string. The composition is + done in-place. + + The return value provides the new length of the string. This will be + smaller than "len" if compositions occurred. + +int uccanoncomp(unsigned long *str, int len) + + This function does a canonical composition of characters in the string. + + The return value is the new length of the string. + +----------------------------------------------------------------------------- + +struct ucnumber { + int numerator; + int denominator; +}; + +int ucnumber_lookup(unsigned long code, struct ucnumber *num) + + This function determines if the code is a number and fills in the `num' + field with the numerator and denominator. If the code happens to be a + single digit, the denominator field will be 1. + +#### +The original code would set numerator = denominator for regular digits. +However, the Readme also claimed to be compatible with John Cowan's uctype +library, but this behavior is both nonsensical and incompatible with the +Cowan library. As such, it has been fixed here as described above. + -- hyc@openldap.org +#### + + If the function returns 0, the code is not a number. Any other return + value means the code is a number. + +int ucdigit_lookup(unsigned long code, int *digit) + + This function determines if the code is a digit and fills in the `digit' + field with the digit value. + + If the function returns 0, the code is not a number. Any other return + value means the code is a number. + +struct ucnumber ucgetnumber(unsigned long code) + + This is a compatibility function with John Cowan's "uctype" package. It + uses ucnumber_lookup(). + +int ucgetdigit(unsigned long code) + + This is a compatibility function with John Cowan's "uctype" package. It + uses ucdigit_lookup(). + +----------------------------------------------------------------------------- + +unsigned long uctoupper(unsigned long code) + + This function returns the code unchanged if it is already upper case or has + no upper case equivalent. Otherwise the upper case equivalent is returned. + +----------------------------------------------------------------------------- + +unsigned long uctolower(unsigned long code) + + This function returns the code unchanged if it is already lower case or has + no lower case equivalent. Otherwise the lower case equivalent is returned. + +----------------------------------------------------------------------------- + +unsigned long uctotitle(unsigned long code) + + This function returns the code unchanged if it is already title case or has + no title case equivalent. Otherwise the title case equivalent is returned. + +----------------------------------------------------------------------------- + +int ucisalpha(unsigned long code) +int ucisalnum(unsigned long code) +int ucisdigit(unsigned long code) +int uciscntrl(unsigned long code) +int ucisspace(unsigned long code) +int ucisblank(unsigned long code) +int ucispunct(unsigned long code) +int ucisgraph(unsigned long code) +int ucisprint(unsigned long code) +int ucisxdigit(unsigned long code) + +int ucisupper(unsigned long code) +int ucislower(unsigned long code) +int ucistitle(unsigned long code) + + These functions (actually macros) determine if a character has these + properties. These behave in a fashion very similar to the venerable ctype + package. + +----------------------------------------------------------------------------- + +int ucisisocntrl(unsigned long code) + + Is the character a C0 control character (< 32) ? + +int ucisfmtcntrl(unsigned long code) + + Is the character a format control character? + +int ucissymbol(unsigned long code) + + Is the character a symbol? + +int ucisnumber(unsigned long code) + + Is the character a number or digit? + +int ucisnonspacing(unsigned long code) + + Is the character non-spacing? + +int ucisopenpunct(unsigned long code) + + Is the character an open/left punctuation (i.e. '[') + +int ucisclosepunct(unsigned long code) + + Is the character an close/right punctuation (i.e. ']') + +int ucisinitialpunct(unsigned long code) + + Is the character an initial punctuation (i.e. U+2018 LEFT SINGLE QUOTATION + MARK) + +int ucisfinalpunct(unsigned long code) + + Is the character a final punctuation (i.e. U+2019 RIGHT SINGLE QUOTATION + MARK) + +int uciscomposite(unsigned long code) + + Can the character be decomposed into a set of other characters? + +int ucisquote(unsigned long code) + + Is the character one of the many quotation marks? + +int ucissymmetric(unsigned long code) + + Is the character one that has an opposite form (i.e. <>) + +int ucismirroring(unsigned long code) + + Is the character mirroring (superset of symmetric)? + +int ucisnonbreaking(unsigned long code) + + Is the character non-breaking (i.e. non-breaking space)? + +int ucisrtl(unsigned long code) + + Does the character have strong right-to-left directionality (i.e. Arabic + letters)? + +int ucisltr(unsigned long code) + + Does the character have strong left-to-right directionality (i.e. Latin + letters)? + +int ucisstrong(unsigned long code) + + Does the character have strong directionality? + +int ucisweak(unsigned long code) + + Does the character have weak directionality (i.e. numbers)? + +int ucisneutral(unsigned long code) + + Does the character have neutral directionality (i.e. whitespace)? + +int ucisseparator(unsigned long code) + + Is the character a block or segment separator? + +int ucislsep(unsigned long code) + + Is the character a line separator? + +int ucispsep(unsigned long code) + + Is the character a paragraph separator? + +int ucismark(unsigned long code) + + Is the character a mark of some kind? + +int ucisnsmark(unsigned long code) + + Is the character a non-spacing mark? + +int ucisspmark(unsigned long code) + + Is the character a spacing mark? + +int ucismodif(unsigned long code) + + Is the character a modifier letter? + +int ucismodifsymbol(unsigned long code) + + Is the character a modifier symbol? + +int ucisletnum(unsigned long code) + + Is the character a number represented by a letter? + +int ucisconnect(unsigned long code) + + Is the character connecting punctuation? + +int ucisdash(unsigned long code) + + Is the character dash punctuation? + +int ucismath(unsigned long code) + + Is the character a math character? + +int uciscurrency(unsigned long code) + + Is the character a currency character? + +int ucisenclosing(unsigned long code) + + Is the character enclosing (i.e. enclosing box)? + +int ucisprivate(unsigned long code) + + Is the character from the Private Use Area? + +int ucissurrogate(unsigned long code) + + Is the character one of the surrogate codes? + +int ucisdefined(unsigned long code) + + Is the character defined (appeared in one of the data files)? + +int ucisundefined(unsigned long code) + + Is the character not defined (non-Unicode)? + +int ucishan(unsigned long code) + + Is the character a Han ideograph? + +int ucishangul(unsigned long code) + + Is the character a pre-composed Hangul syllable? |