1 files changed, 212 insertions, 0 deletions
diff --git a/libraries/liblunicode/ure/README b/libraries/liblunicode/ure/README
new file mode 100644
index 0000000..c9918f5
--- /dev/null
+++ b/libraries/liblunicode/ure/README
@@ -0,0 +1,212 @@
+#
+# $Id: README,v 1.3 1999/09/21 15:47:43 mleisher Exp $
+#
+# Copyright 1997, 1998, 1999 Computing Research Labs,
+# New Mexico State University
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+
+                       Unicode and Regular Expressions
+                                 Version 0.5
+
+This is a simple regular expression package for matching against Unicode text
+in UCS2 form.  The implementation of this URE package is a variation on the
+RE->DFA algorithm done by Mark Hopkins (markh@csd4.csd.uwm.edu).  Mark
+Hopkins' algorithm had the virtue of being very simple, so it was used as a
+model.
+
+---------------------------------------------------------------------------
+
+Assumptions:
+
+  o  Regular expression and text already normalized.
+
+  o  Conversion to lower case assumes a 1-1 mapping.
+
+Definitions:
+
+  Separator - any one of U+2028, U+2029, '\n', '\r'.
+
+Operators:
+  .   - match any character.
+  *   - match zero or more of the last subexpression.
+  +   - match one or more of the last subexpression.
+  ?   - match zero or one of the last subexpression.
+  ()  - subexpression grouping.
+
+  Notes:
+
+    o  The "." operator normally does not match separators, but a flag is
+       available for the ure_exec() function that will allow this operator to
+       match a separator.
+
+Literals and Constants:
+
+  c       - literal UCS2 character.
+  \x....  - hexadecimal number of up to 4 digits.
+  \X....  - hexadecimal number of up to 4 digits.
+  \u....  - hexadecimal number of up to 4 digits.
+  \U....  - hexadecimal number of up to 4 digits.
+
+Character classes:
+
+  [...]           - Character class.
+  [^...]          - Negated character class.
+  \pN1,N2,...,Nn  - Character properties class.
+  \PN1,N2,...,Nn  - Negated character properties class.
+
+  POSIX character classes recognized:
+
+    :alnum:
+    :alpha:
+    :cntrl:
+    :digit:
+    :graph:
+    :lower:
+    :print:
+    :punct:
+    :space:
+    :upper:
+    :xdigit:
+
+  Notes:
+
+    o  Character property classes are \p or \P followed by a comma separated
+       list of integers between 1 and 32.  These integers are references to
+       the following character properties:
+
+        N	Character Property
+        --------------------------
+        1	_URE_NONSPACING
+        2	_URE_COMBINING
+        3	_URE_NUMDIGIT
+        4	_URE_NUMOTHER
+        5	_URE_SPACESEP
+        6	_URE_LINESEP
+        7	_URE_PARASEP
+        8	_URE_CNTRL
+        9	_URE_PUA
+        10	_URE_UPPER
+        11	_URE_LOWER
+        12	_URE_TITLE
+        13	_URE_MODIFIER
+        14	_URE_OTHERLETTER
+        15	_URE_DASHPUNCT
+        16	_URE_OPENPUNCT
+        17	_URE_CLOSEPUNCT
+        18	_URE_OTHERPUNCT
+        19	_URE_MATHSYM
+        20	_URE_CURRENCYSYM
+        21	_URE_OTHERSYM
+        22	_URE_LTR
+        23	_URE_RTL
+        24	_URE_EURONUM
+        25	_URE_EURONUMSEP
+        26	_URE_EURONUMTERM
+        27	_URE_ARABNUM
+        28	_URE_COMMONSEP
+        29	_URE_BLOCKSEP
+        30	_URE_SEGMENTSEP
+        31	_URE_WHITESPACE
+        32	_URE_OTHERNEUT
+
+    o  Character classes can contain literals, constants, and character
+       property classes. Example:
+
+       [abc\U10A\p1,3,4]
+
+---------------------------------------------------------------------------
+
+Before using URE
+----------------
+Before URE is used, two functions need to be created.  One to check if a
+character matches a set of URE character properties, and one to convert a
+character to lower case.
+
+Stubs for these function are located in the urestubs.c file.
+
+Using URE
+---------
+
+Sample pseudo-code fragment.
+
+  ure_buffer_t rebuf;
+  ure_dfa_t dfa;
+  ucs2_t *re, *text;
+  unsigned long relen, textlen;
+  unsigned long match_start, match_end;
+
+  /*
+   * Allocate the dynamic storage needed to compile regular expressions.
+   */
+  rebuf = ure_buffer_create();
+
+  for each regular expression in a list {
+      re = next regular expression;
+      relen = length(re);
+
+      /*
+       * Compile the regular expression with the case insensitive flag
+       * turned on.
+       */
+      dfa = ure_compile(re, relen, 1, rebuf);
+
+      /*
+       * Look for the first match in some text.  The matching will be done
+       * in a case insensitive manner because the expression was compiled
+       * with the case insensitive flag on.
+       */
+      if (ure_exec(dfa, 0, text, textlen, &match_start, &match_end))
+        printf("MATCH: %ld %ld\n", match_start, match_end);
+
+      /*
+       * Look for the first match in some text, ignoring non-spacing
+       * characters.
+       */
+      if (ure_exec(dfa, URE_IGNORE_NONSPACING, text, textlen,
+                   &match_start, &match_end))
+        printf("MATCH: %ld %ld\n", match_start, match_end);
+
+      /*
+       * Free the DFA.
+       */
+      ure_free_dfa(dfa);
+  }
+
+  /*
+   * Free the dynamic storage used for compiling the expressions.
+   */
+  ure_free_buffer(rebuf);
+
+---------------------------------------------------------------------------
+
+Mark Leisher <mleisher@crl.nmsu.edu>
+29 March 1997
+
+===========================================================================
+
+CHANGES
+-------
+
+Version: 0.5
+Date   : 21 September 1999
+==========================
+  1. Added copyright stuff and put in CVS.