diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/liblunicode/ucdata/ucdata.man | 504 |
1 files changed, 504 insertions, 0 deletions
diff --git a/libraries/liblunicode/ucdata/ucdata.man b/libraries/liblunicode/ucdata/ucdata.man new file mode 100644 index 0000000..54df484 --- /dev/null +++ b/libraries/liblunicode/ucdata/ucdata.man @@ -0,0 +1,504 @@ +.\" +.\" $Id: ucdata.man,v 1.5 2001/01/02 18:46:20 mleisher Exp $ +.\" +.TH ucdata 3 "03 January 2001" +.SH NAME +ucdata \- package for providing Unicode/ISO10646 character information + +.SH SYNOPSIS +#include <ucdata.h> +.sp +void ucdata_load(char * paths, int masks) +.sp +void ucdata_unload(int masks) +.sp +void ucdata_reload(char * paths, int masks) +.sp +int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) +.sp +int uccanondecomp(const unsigned long *in, int inlen, unsigned long **out, +int *outlen) +.sp +int ucdecomp_hangul(unsigned long code, unsigned long *num, +unsigned long decomp[]) +.sp +int uccomp(unsigned long ch1, unsigned long ch2, unsigned long *comp) +.sp +int uccomp_hangul(unsigned long *str, int len) +.sp +int uccanoncomp(unsiged long *str, int len) +.nf +struct ucnumber { + int numerator; + int denominator; +}; +.sp +int ucnumber_lookup(unsigned long code, struct ucnumber *num) +.sp +int ucdigit_lookup(unsigned long code, int *digit) +.sp +struct ucnumber ucgetnumber(unsigned long code) +.sp +int ucgetdigit(unsigned long code) +.sp +unsigned long uctoupper(unsigned long code) +.sp +unsigned long uctolower(unsigned long code) +.sp +unsigned long uctotitle(unsigned long code) +.sp +int ucisalpha(unsigned long code) +.sp +int ucisalnum(unsigned long code) +.sp +int ucisdigit(unsigned long code) +.sp +int uciscntrl(unsigned long code) +.sp +int ucisspace(unsigned long code) +.sp +int ucisblank(unsigned long code) +.sp +int ucispunct(unsigned long code) +.sp +int ucisgraph(unsigned long code) +.sp +int ucisprint(unsigned long code) +.sp +int ucisxdigit(unsigned long code) +.sp +int ucisupper(unsigned long code) +.sp +int ucislower(unsigned long code) +.sp +int ucistitle(unsigned long code) +.sp +int ucisisocntrl(unsigned long code) +.sp +int ucisfmtcntrl(unsigned long code) +.sp +int ucissymbol(unsigned long code) +.sp +int ucisnumber(unsigned long code) +.sp +int ucisnonspacing(unsigned long code) +.sp +int ucisopenpunct(unsigned long code) +.sp +int ucisclosepunct(unsigned long code) +.sp +int ucisinitialpunct(unsigned long code) +.sp +int ucisfinalpunct(unsigned long code) +.sp +int uciscomposite(unsigned long code) +.sp +int ucisquote(unsigned long code) +.sp +int ucissymmetric(unsigned long code) +.sp +int ucismirroring(unsigned long code) +.sp +int ucisnonbreaking(unsigned long code) +.sp +int ucisrtl(unsigned long code) +.sp +int ucisltr(unsigned long code) +.sp +int ucisstrong(unsigned long code) +.sp +int ucisweak(unsigned long code) +.sp +int ucisneutral(unsigned long code) +.sp +int ucisseparator(unsigned long code) +.sp +int ucislsep(unsigned long code) +.sp +int ucispsep(unsigned long code) +.sp +int ucismark(unsigned long code) +.sp +int ucisnsmark(unsigned long code) +.sp +int ucisspmark(unsigned long code) +.sp +int ucismodif(unsigned long code) +.sp +int ucismodifsymbol(unsigned long code) +.sp +int ucisletnum(unsigned long code) +.sp +int ucisconnect(unsigned long code) +.sp +int ucisdash(unsigned long code) +.sp +int ucismath(unsigned long code) +.sp +int uciscurrency(unsigned long code) +.sp +int ucisenclosing(unsigned long code) +.sp +int ucisprivate(unsigned long code) +.sp +int ucissurrogate(unsigned long code) +.sp +int ucisidentstart(unsigned long code) +.sp +int ucisidentpart(unsigned long code) +.sp +int ucisdefined(unsigned long code) +.sp +int ucisundefined(unsigned long code) +.sp +int ucishan(unsigned long code) +.sp +int ucishangul(unsigned long code) + +.SH DESCRIPTION +.TP 4 +.BR Macros +.br +UCDATA_CASE +.br +UCDATA_CTYPE +.br +UCDATA_DECOMP +.br +UCDATA_CMBCL +.br +UCDATA_NUM +.br +UCDATA_ALL +.br +.TP 4 +.BR ucdata_load() +This function initializes the UCData library by locating the data files in one +of the colon-separated directories in the `paths' parameter. The data files +to be loaded are specified in the `masks' parameter as a bitwise combination +of the macros listed above. +.sp +This should be called before using any of the other functions. +.TP 4 +.BR ucdata_unload() +This function unloads the data tables specified in the `masks' parameter. +.sp +This function should be called when the application is done using the UCData +package. +.TP 4 +.BR ucdata_reload() +This function reloads the data files from one of the colon-separated +directories in the `paths' parameter. The data files to be reloaded are +specified in the `masks' parameter as a bitwise combination of the macros +listed above. +.TP 4 +.BR ucdecomp() +This function determines if a character has a decomposition and returns the +decomposition information if it exists. +.sp +If a zero is returned, there is no decomposition. If a non-zero is +returned, then the `num' and `decomp' variables are filled in with the +appropriate values. +.sp +Example call: +.sp +.nf + unsigned long i, num, *decomp; + + if (ucdecomp(0x1d5, &num, &decomp) != 0) { + for (i = 0; i < num; i++) + printf("0x%08lX,", decomp[i]); + putchar('\n'); + } +.TP 4 +.BR uccanondecomp() +This function will decompose a string, insuring the characters are in +canonical order for comparison. +.sp +If a decomposed string is returned, the caller is responsible for deallocating +the string. +.sp +If a -1 is returned, memory allocation failed. If a zero is returned, no +decomposition was done. Any other value means a decomposition string was +created and the values returned in the `out' and `outlen' parameters. +.TP 4 +.BR ucdecomp_hangul() +This function determines if a Hangul syllable has a +decomposition and returns the decomposition information. +.sp +An array of at least size 3 should be passed to the function +for the decomposition of the syllable. +.sp +If a zero is returned, the character is not a Hangul +syllable. If a non-zero is returned, the `num' field +will be 2 or 3 and the syllable will be decomposed into +the `decomp' array arithmetically. +.sp +Example call: +.sp +.nf + unsigned long i, num, decomp[3]; + + if (ucdecomp_hangul(0xb1ba, &num, &decomp) != 0) { + for (i = 0; i < num; i++) + printf("0x%08lX,", decomp[i]); + putchar('\n'); + } +.TP 4 +.BR uccomp() +This function determines if a pair of characters have a composition, and +returns that composition if one exists. +.sp +A zero is returned is no composition exists for the character pair. Any other +value indicates the `comp' field holds the character code representing the +composition of the two character codes. +.TP 4 +.BR uccomp_hangul() +This composes the Hangul Jamo in-place in the string. +.sp +The returned value is the new length of the string. +.TP 4 +.BR uccanoncomp() +This function does a full composition in-place in the string, including the +Hangul composition. +.sp +The returned value is the new length of the string. +.TP 4 +.BR ucnumber_lookup() +This function determines if the code is a number and +fills in the `num' field with the numerator and +denominator. If the code happens to be a single digit, +the numerator and denominator fields will be the same. +.sp +If the function returns 0, the code is not a number. +Any other return value means the code is a number. +.TP 4 +.BR ucdigit_lookup() +This function determines if the code is a digit and +fills in the `digit' field with the digit value. +.sp +If the function returns 0, the code is not a number. +Any other return value means the code is a number. +.TP 4 +.BR ucgetnumber() +This is a compatibility function with John Cowan's +"uctype" package. It uses ucnumber_lookup(). +.TP 4 +.BR ucgetdigit() +This is a compatibility function with John Cowan's +"uctype" package. It uses ucdigit_lookup(). +.TP 4 +.BR uctoupper() +This function returns the code unchanged if it is +already upper case or has no upper case equivalent. +Otherwise the upper case equivalent is returned. +.TP 4 +.BR uctolower() +This function returns the code unchanged if it is +already lower case or has no lower case equivalent. +Otherwise the lower case equivalent is returned. +.TP 4 +.BR uctotitle() +This function returns the code unchanged if it is +already title case or has no title case equivalent. +Otherwise the title case equivalent is returned. +.TP 4 +.BR ucisalpha() +Test if \fIcode\fR is an alpha character. +.TP 4 +.BR ucisalnum() +Test if \fIcode\fR is an alpha or digit character. +.TP 4 +.BR ucisdigit() +Test if \fIcode\fR is a digit character. +.TP 4 +.BR uciscntrl() +Test if \fIcode\fR is a control character. +.TP 4 +.BR ucisspace() +Test if \fIcode\fR is a space character. +.TP 4 +.BR ucisblank() +Test if \fIcode\fR is a blank character. +.TP 4 +.BR ucispunct() +Test if \fIcode\fR is a punctuation character. +.TP 4 +.BR ucisgraph() +Test if \fIcode\fR is a graphical (visible) character. +.TP 4 +.BR ucisprint() +Test if \fIcode\fR is a printable character. +.TP 4 +.BR ucisxdigit() +Test if \fIcode\fR is a hexadecimal digit character. +.TP 4 +.BR ucisupper() +Test if \fIcode\fR is an upper case character. +.TP 4 +.BR ucislower() +Test if \fIcode\fR is a lower case character. +.TP 4 +.BR ucistitle() +Test if \fIcode\fR is a title case character. +.TP 4 +.BR ucisisocntrl() +Is the character a C0 control character (< 32)? +.TP 4 +.BR ucisfmtcntrl() +Is the character a format control character? +.TP 4 +.BR ucissymbol() +Is the character a symbol? +.TP 4 +.BR ucisnumber() +Is the character a number or digit? +.TP 4 +.BR ucisnonspacing() +Is the character non-spacing? +.TP 4 +.BR ucisopenpunct() +Is the character an open/left punctuation (i.e. '[') +.TP 4 +.BR ucisclosepunct() +Is the character an close/right punctuation (i.e. ']') +.TP 4 +.BR ucisinitialpunct() +Is the character an initial punctuation (i.e. U+2018 LEFT +SINGLE QUOTATION MARK) +.TP 4 +.BR ucisfinalpunct() +Is the character a final punctuation (i.e. U+2019 RIGHT +SINGLE QUOTATION MARK) +.TP 4 +.BR uciscomposite() +Can the character be decomposed into a set of other +characters? +.TP 4 +.BR ucisquote() +Is the character one of the many quotation marks? +.TP 4 +.BR ucissymmetric() +Is the character one that has an opposite form +(i.e. <>) +.TP 4 +.BR ucismirroring() +Is the character mirroring (superset of symmetric)? +.TP 4 +.BR ucisnonbreaking() +Is the character non-breaking (i.e. non-breaking +space)? +.TP 4 +.BR ucisrtl() +Does the character have strong right-to-left +directionality (i.e. Arabic letters)? +.TP 4 +.BR ucisltr() +Does the character have strong left-to-right +directionality (i.e. Latin letters)? +.TP 4 +.BR ucisstrong() +Does the character have strong directionality? +.TP 4 +.BR ucisweak() +Does the character have weak directionality +(i.e. numbers)? +.TP 4 +.BR ucisneutral() +Does the character have neutral directionality +(i.e. whitespace)? +.TP 4 +.BR ucisseparator() +Is the character a block or segment separator? +.TP 4 +.BR ucislsep() +Is the character a line separator? +.TP 4 +.BR ucispsep() +Is the character a paragraph separator? +.TP 4 +.BR ucismark() +Is the character a mark of some kind? +.TP 4 +.BR ucisnsmark() +Is the character a non-spacing mark? +.TP 4 +.BR ucisspmark() +Is the character a spacing mark? +.TP 4 +.BR ucismodif() +Is the character a modifier letter? +.TP 4 +.BR ucismodifsymbol() +Is the character a modifier symbol? +.TP 4 +.BR ucisletnum() +Is the character a number represented by a letter? +.TP 4 +.BR ucisconnect() +Is the character connecting punctuation? +.TP 4 +.BR ucisdash() +Is the character dash punctuation? +.TP 4 +.BR ucismath() +Is the character a math character? +.TP 4 +.BR uciscurrency() +Is the character a currency character? +.TP 4 +.BR ucisenclosing() +Is the character enclosing (i.e. enclosing box)? +.TP 4 +.BR ucisprivate() +Is the character from the Private Use Area? +.TP 4 +.BR ucissurrogate() +Is the character one of the surrogate codes? +.TP 4 +.BR ucisidentstart() +Is the character a legal initial character of an identifier? +.TP 4 +.BR ucisidentpart() +Is the character a legal identifier character? +.TP 4 +.BR ucisdefined() +Is the character defined (appeared in one of the data +files)? +.TP 4 +.BR ucisundefined() +Is the character not defined (non-Unicode)? +.TP 4 +.BR ucishan() +Is the character a Han ideograph? +.TP 4 +.BR ucishangul() +Is the character a pre-composed Hangul syllable? + +.SH "SEE ALSO" +ctype(3) + +.SH ACKNOWLEDGMENTS +These are people who have helped with patches or +alerted me about problems. +.sp +John Cowan <cowan@locke.ccil.org> +.br +Bob Verbrugge <bob_verbrugge@nl.compuware.com> +.br +Christophe Pierret <cpierret@businessobjects.com> +.br +Kent Johnson <kent@pondview.mv.com> +.br +Valeriy E. Ushakov <uwe@ptc.spbu.ru> +.br +Stig Venaas <Stig.Venaas@uninett.no> + +.SH AUTHOR +Mark Leisher +.br +Computing Research Lab +.br +New Mexico State University +.br +Email: mleisher@crl.nmsu.edu |