summaryrefslogtreecommitdiffstats
path: root/libraries/liblunicode/utbm/README
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/liblunicode/utbm/README')
-rw-r--r--libraries/liblunicode/utbm/README121
1 files changed, 121 insertions, 0 deletions
diff --git a/libraries/liblunicode/utbm/README b/libraries/liblunicode/utbm/README
new file mode 100644
index 0000000..2a62d3c
--- /dev/null
+++ b/libraries/liblunicode/utbm/README
@@ -0,0 +1,121 @@
+#
+# $Id: README,v 1.1 1999/09/21 15:45:17 mleisher Exp $
+#
+# Copyright 1997, 1998, 1999 Computing Research Labs,
+# New Mexico State University
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+ Unicode and Boyer-Moore Searching
+ Version 0.2
+
+UTBM (Unicode Tuned Boyer-Moore) is a simple package that provides tuned
+Boyer-Moore searches on Unicode UCS2 text (handles high and low surrogates).
+
+---------------------------------------------------------------------------
+
+Assumptions:
+
+ o Search pattern and text already normalized in some fashion.
+
+ o Upper, lower, and title case conversions are one-to-one.
+
+ o For conversions between upper, lower, and title case, UCS2 characters
+ always convert to other UCS2 characters, and UTF-16 characters always
+ convert to other UTF-16 characters.
+
+Flags:
+
+ UTBM provides three processing flags:
+
+ o UTBM_CASEFOLD - search in a case-insensitive manner.
+
+ o UTBM_IGNORE_NONSPACING - ignore non-spacing characters in the pattern and
+ the text.
+
+ o UTBM_SPACE_COMPRESS - view as a *single space*, sequential groups of
+ U+2028, U+2029, '\n', '\r', '\t', and any
+ character identified as a space by the Unicode
+ support on the platform.
+
+ This flag also causes all characters identified
+ as control by the Unicode support on the
+ platform to be ignored (except for '\n', '\r',
+ and '\t').
+
+---------------------------------------------------------------------------
+
+Before using UTBM
+-----------------
+Before UTBM is used, some functions need to be created. The "utbmstub.c" file
+contains stubs that need to be rewritten so they work with the Unicode support
+on the platform on which this package is being used.
+
+Using UTBM
+----------
+
+Sample pseudo-code fragment.
+
+ utbm_pattern_t pat;
+ ucs2_t *pattern, *text;
+ unsigned long patternlen, textlen;
+ unsigned long flags, match_start, match_end;
+
+ /*
+ * Allocate the dynamic storage needed for a search pattern.
+ */
+ pat = utbm_create_pattern();
+
+ /*
+ * Set the search flags desired.
+ */
+ flags = UTBM_CASEFOLD|UTBM_IGNORE_NONSPACING;
+
+ /*
+ * Compile the search pattern.
+ */
+ utbm_compile(pattern, patternlen, flags, pat);
+
+ /*
+ * Find the first occurrence of the search pattern in the text.
+ */
+ if (utbm_exec(pat, text, textlen, &match_start, &match_end))
+ printf("MATCH: %ld %ld\n", match_start, match_end);
+
+ /*
+ * Free the dynamic storage used for the search pattern.
+ */
+ ure_free_pattern(pat);
+
+---------------------------------------------------------------------------
+
+Mark Leisher <mleisher@crl.nmsu.edu>
+2 May 1997
+
+===========================================================================
+
+CHANGES
+-------
+
+Version: 0.2
+Date : 21 September 1999
+==========================
+ 1. Added copyright stuff and put in CVS.
+