summaryrefslogtreecommitdiffstats
path: root/libraries/liblunicode/ucdata/api.txt
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--libraries/liblunicode/ucdata/api.txt401
1 files changed, 401 insertions, 0 deletions
diff --git a/libraries/liblunicode/ucdata/api.txt b/libraries/liblunicode/ucdata/api.txt
new file mode 100644
index 0000000..f4be819
--- /dev/null
+++ b/libraries/liblunicode/ucdata/api.txt
@@ -0,0 +1,401 @@
+#
+# $Id: api.txt,v 1.3 2001/01/02 18:46:20 mleisher Exp $
+#
+
+ The MUTT UCData API
+ -------------------
+
+
+####
+NOTE: This library has been customized for use with OpenLDAP. The character
+data tables are hardcoded into the library and the load/unload/reload
+functions are no-ops. Also, the MUTT API claimed to be compatible with
+John Cowan's library but its ucnumber behavior was broken. This has been
+fixed in the OpenLDAP release.
+
+By default, the implementation specific properties in MUTTUCData.txt are
+not incorporated into the OpenLDAP build. You can supply them to ucgendat
+and recreate uctable.h if you need them.
+ -- hyc@openldap.org
+####
+
+
+-----------------------------------------------------------------------------
+
+Macros that combine to select data tables for ucdata_load(), ucdata_unload(),
+and ucdata_reload().
+
+#define UCDATA_CASE 0x01
+#define UCDATA_CTYPE 0x02
+#define UCDATA_DECOMP 0x04
+#define UCDATA_CMBCL 0x08
+#define UCDATA_NUM 0x10
+#define UCDATA_COMP 0x20
+#define UCATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\
+ UCDATA_CMBCL|UCDATA_NUM|UCDATA_COMP)
+-----------------------------------------------------------------------------
+
+void ucdata_load(char *paths, int masks)
+
+ This function initializes the UCData library by locating the data files in
+ one of the colon-separated directories in the `paths' parameter. The data
+ files to be loaded are specified in the `masks' parameter as a bitwise
+ combination of the macros listed above.
+
+ This should be called before using any of the other functions.
+
+ NOTE: the ucdata_setup(char *paths) function is now a macro that expands
+ into this function at compile time.
+
+-----------------------------------------------------------------------------
+
+void ucdata_unload(int masks)
+
+ This function unloads the data tables specified in the `masks' parameter.
+
+ This function should be called when the application is done using the UCData
+ package.
+
+ NOTE: the ucdata_cleanup() function is now a macro that expands into this
+ function at compile time.
+
+-----------------------------------------------------------------------------
+
+void ucdata_reload(char *paths, int masks)
+
+ This function reloads the data files from one of the colon-separated
+ directories in the `paths' parameter. The data files to be reloaded are
+ specified in the `masks' parameter as a bitwise combination of the macros
+ listed above.
+
+ If the data files have already been loaded, they are unloaded before the
+ data files are loaded again.
+
+-----------------------------------------------------------------------------
+
+int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
+
+ This function determines if a character has a decomposition and returns the
+ decomposition information if it exists.
+
+ If a zero is returned, there is no decomposition. If a non-zero is
+ returned, then the `num' and `decomp' variables are filled in with the
+ appropriate values.
+
+ Example call:
+
+ unsigned long i, num, *decomp;
+
+ if (ucdecomp(0x1d5, &num, &decomp) != 0) {
+ for (i = 0; i < num; i++)
+ printf("0x%08lX,", decomp[i]);
+ putchar('\n');
+ }
+
+int uccanondecomp(const unsigned long *in, int inlen, unsigned long **out,
+ int *outlen)
+
+ This function decomposes an input string and does canonical reordering of
+ the characters at the same time.
+
+ If a -1 is returned, memory allocation was not successful. If a zero is
+ returned, no decomposition occurred. Any other value means the output string
+ contains the fully decomposed string in canonical order.
+
+ If the "outlen" parameter comes back with a value > 0, then the string
+ returned in the "out" parameter needs to be deallocated by the caller.
+
+-----------------------------------------------------------------------------
+
+int ucdecomp_hangul(unsigned long code, unsigned long *num,
+ unsigned long decomp[])
+
+ This function determines if a Hangul syllable has a decomposition and
+ returns the decomposition information.
+
+ An array of at least size 3 should be passed to the function for the
+ decomposition of the syllable.
+
+ If a zero is returned, the character is not a Hangul syllable. If a
+ non-zero is returned, the `num' field will be 2 or 3 and the syllable will
+ be decomposed into the `decomp' array arithmetically.
+
+ Example call:
+
+ unsigned long i, num, decomp[3];
+
+ if (ucdecomp_hangul(0xb1ba, &num, &decomp) != 0) {
+ for (i = 0; i < num; i++)
+ printf("0x%08lX,", decomp[i]);
+ putchar('\n');
+ }
+
+-----------------------------------------------------------------------------
+
+int uccomp(unsigned long ch1, unsigned long ch2, unsigned long *comp)
+
+ This function takes a pair of characters and determines if they combine to
+ form another character.
+
+ If a zero is returned, no composition is formed by the character pair. Any
+ other value indicates the "comp" parameter has a value.
+
+int uccomp_hangul(unsigned long *str, int len)
+
+ This function composes the Hangul Jamo in the string. The composition is
+ done in-place.
+
+ The return value provides the new length of the string. This will be
+ smaller than "len" if compositions occurred.
+
+int uccanoncomp(unsigned long *str, int len)
+
+ This function does a canonical composition of characters in the string.
+
+ The return value is the new length of the string.
+
+-----------------------------------------------------------------------------
+
+struct ucnumber {
+ int numerator;
+ int denominator;
+};
+
+int ucnumber_lookup(unsigned long code, struct ucnumber *num)
+
+ This function determines if the code is a number and fills in the `num'
+ field with the numerator and denominator. If the code happens to be a
+ single digit, the denominator field will be 1.
+
+####
+The original code would set numerator = denominator for regular digits.
+However, the Readme also claimed to be compatible with John Cowan's uctype
+library, but this behavior is both nonsensical and incompatible with the
+Cowan library. As such, it has been fixed here as described above.
+ -- hyc@openldap.org
+####
+
+ If the function returns 0, the code is not a number. Any other return
+ value means the code is a number.
+
+int ucdigit_lookup(unsigned long code, int *digit)
+
+ This function determines if the code is a digit and fills in the `digit'
+ field with the digit value.
+
+ If the function returns 0, the code is not a number. Any other return
+ value means the code is a number.
+
+struct ucnumber ucgetnumber(unsigned long code)
+
+ This is a compatibility function with John Cowan's "uctype" package. It
+ uses ucnumber_lookup().
+
+int ucgetdigit(unsigned long code)
+
+ This is a compatibility function with John Cowan's "uctype" package. It
+ uses ucdigit_lookup().
+
+-----------------------------------------------------------------------------
+
+unsigned long uctoupper(unsigned long code)
+
+ This function returns the code unchanged if it is already upper case or has
+ no upper case equivalent. Otherwise the upper case equivalent is returned.
+
+-----------------------------------------------------------------------------
+
+unsigned long uctolower(unsigned long code)
+
+ This function returns the code unchanged if it is already lower case or has
+ no lower case equivalent. Otherwise the lower case equivalent is returned.
+
+-----------------------------------------------------------------------------
+
+unsigned long uctotitle(unsigned long code)
+
+ This function returns the code unchanged if it is already title case or has
+ no title case equivalent. Otherwise the title case equivalent is returned.
+
+-----------------------------------------------------------------------------
+
+int ucisalpha(unsigned long code)
+int ucisalnum(unsigned long code)
+int ucisdigit(unsigned long code)
+int uciscntrl(unsigned long code)
+int ucisspace(unsigned long code)
+int ucisblank(unsigned long code)
+int ucispunct(unsigned long code)
+int ucisgraph(unsigned long code)
+int ucisprint(unsigned long code)
+int ucisxdigit(unsigned long code)
+
+int ucisupper(unsigned long code)
+int ucislower(unsigned long code)
+int ucistitle(unsigned long code)
+
+ These functions (actually macros) determine if a character has these
+ properties. These behave in a fashion very similar to the venerable ctype
+ package.
+
+-----------------------------------------------------------------------------
+
+int ucisisocntrl(unsigned long code)
+
+ Is the character a C0 control character (< 32) ?
+
+int ucisfmtcntrl(unsigned long code)
+
+ Is the character a format control character?
+
+int ucissymbol(unsigned long code)
+
+ Is the character a symbol?
+
+int ucisnumber(unsigned long code)
+
+ Is the character a number or digit?
+
+int ucisnonspacing(unsigned long code)
+
+ Is the character non-spacing?
+
+int ucisopenpunct(unsigned long code)
+
+ Is the character an open/left punctuation (i.e. '[')
+
+int ucisclosepunct(unsigned long code)
+
+ Is the character an close/right punctuation (i.e. ']')
+
+int ucisinitialpunct(unsigned long code)
+
+ Is the character an initial punctuation (i.e. U+2018 LEFT SINGLE QUOTATION
+ MARK)
+
+int ucisfinalpunct(unsigned long code)
+
+ Is the character a final punctuation (i.e. U+2019 RIGHT SINGLE QUOTATION
+ MARK)
+
+int uciscomposite(unsigned long code)
+
+ Can the character be decomposed into a set of other characters?
+
+int ucisquote(unsigned long code)
+
+ Is the character one of the many quotation marks?
+
+int ucissymmetric(unsigned long code)
+
+ Is the character one that has an opposite form (i.e. <>)
+
+int ucismirroring(unsigned long code)
+
+ Is the character mirroring (superset of symmetric)?
+
+int ucisnonbreaking(unsigned long code)
+
+ Is the character non-breaking (i.e. non-breaking space)?
+
+int ucisrtl(unsigned long code)
+
+ Does the character have strong right-to-left directionality (i.e. Arabic
+ letters)?
+
+int ucisltr(unsigned long code)
+
+ Does the character have strong left-to-right directionality (i.e. Latin
+ letters)?
+
+int ucisstrong(unsigned long code)
+
+ Does the character have strong directionality?
+
+int ucisweak(unsigned long code)
+
+ Does the character have weak directionality (i.e. numbers)?
+
+int ucisneutral(unsigned long code)
+
+ Does the character have neutral directionality (i.e. whitespace)?
+
+int ucisseparator(unsigned long code)
+
+ Is the character a block or segment separator?
+
+int ucislsep(unsigned long code)
+
+ Is the character a line separator?
+
+int ucispsep(unsigned long code)
+
+ Is the character a paragraph separator?
+
+int ucismark(unsigned long code)
+
+ Is the character a mark of some kind?
+
+int ucisnsmark(unsigned long code)
+
+ Is the character a non-spacing mark?
+
+int ucisspmark(unsigned long code)
+
+ Is the character a spacing mark?
+
+int ucismodif(unsigned long code)
+
+ Is the character a modifier letter?
+
+int ucismodifsymbol(unsigned long code)
+
+ Is the character a modifier symbol?
+
+int ucisletnum(unsigned long code)
+
+ Is the character a number represented by a letter?
+
+int ucisconnect(unsigned long code)
+
+ Is the character connecting punctuation?
+
+int ucisdash(unsigned long code)
+
+ Is the character dash punctuation?
+
+int ucismath(unsigned long code)
+
+ Is the character a math character?
+
+int uciscurrency(unsigned long code)
+
+ Is the character a currency character?
+
+int ucisenclosing(unsigned long code)
+
+ Is the character enclosing (i.e. enclosing box)?
+
+int ucisprivate(unsigned long code)
+
+ Is the character from the Private Use Area?
+
+int ucissurrogate(unsigned long code)
+
+ Is the character one of the surrogate codes?
+
+int ucisdefined(unsigned long code)
+
+ Is the character defined (appeared in one of the data files)?
+
+int ucisundefined(unsigned long code)
+
+ Is the character not defined (non-Unicode)?
+
+int ucishan(unsigned long code)
+
+ Is the character a Han ideograph?
+
+int ucishangul(unsigned long code)
+
+ Is the character a pre-composed Hangul syllable?