25 files changed, 4483 insertions, 0 deletions
diff --git a/intl/lwbrk/LineBreaker.cpp b/intl/lwbrk/LineBreaker.cpp
new file mode 100644
index 0000000000..d4c78c789e
--- /dev/null
+++ b/intl/lwbrk/LineBreaker.cpp
@@ -0,0 +1,1169 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/LineBreaker.h"
+
+#include "jisx4051class.h"
+#include "nsComplexBreaker.h"
+#include "nsTArray.h"
+#include "nsUnicodeProperties.h"
+#include "mozilla/ArrayUtils.h"
+
+using namespace mozilla::unicode;
+using namespace mozilla::intl;
+
+/*static*/
+already_AddRefed<LineBreaker> LineBreaker::Create() {
+  return RefPtr<LineBreaker>(new LineBreaker()).forget();
+}
+
+/*
+
+   Simplification of Pair Table in JIS X 4051
+
+   1. The Origion Table - in 4.1.3
+
+   In JIS x 4051. The pair table is defined as below
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char
+
+              1  2  3  4  5  6  7  8  9 10 11 12 13 13 14 14 15 16 17 18 19 20
+                                                 *  #  *  #
+        1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  E
+        2        X  X  X  X  X                                               X
+        3        X  X  X  X  X                                               X
+        4        X  X  X  X  X                                               X
+        5        X  X  X  X  X                                               X
+        6        X  X  X  X  X                                               X
+        7        X  X  X  X  X  X                                            X
+        8        X  X  X  X  X                                X              E
+        9        X  X  X  X  X                                               X
+       10        X  X  X  X  X                                               X
+       11        X  X  X  X  X                                               X
+       12        X  X  X  X  X                                               X
+       13        X  X  X  X  X                    X                          X
+       14        X  X  X  X  X                          X                    X
+       15        X  X  X  X  X        X                       X        X     X
+       16        X  X  X  X  X                                   X     X     X
+       17        X  X  X  X  X                                               E
+       18        X  X  X  X  X                                X  X     X     X
+       19     X  E  E  E  E  E  X  X  X  X  X  X  X  X  X  X  X  X  E  X  E  E
+       20        X  X  X  X  X                                               E
+
+   * Same Char
+   # Other Char
+
+   X Cannot Break
+
+   The classes mean:
+      1: Open parenthesis
+      2: Close parenthesis
+      3: Prohibit a line break before
+      4: Punctuation for sentence end (except Full stop, e.g., "!" and "?")
+      5: Middle dot (e.g., U+30FB KATAKANA MIDDLE DOT)
+      6: Full stop
+      7: Non-breakable between same characters
+      8: Prefix (e.g., "$", "NO.")
+      9: Postfix (e.g., "%")
+     10: Ideographic space
+     11: Hiragana
+     12: Japanese characters (except class 11)
+     13: Subscript
+     14: Ruby
+     15: Numeric
+     16: Alphabet
+     17: Space for Western language
+     18: Western characters (except class 17)
+     19: Split line note (Warichu) begin quote
+     20: Split line note (Warichu) end quote
+
+   2. Simplified by remove the class which we do not care
+
+   However, since we do not care about class 13(Subscript), 14(Ruby),
+   16 (Aphabet), 19(split line note begin quote), and 20(split line note end
+   quote) we can simplify this par table into the following
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char
+
+              1  2  3  4  5  6  7  8  9 10 11 12 15 17 18
+
+        1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X
+        2        X  X  X  X  X
+        3        X  X  X  X  X
+        4        X  X  X  X  X
+        5        X  X  X  X  X
+        6        X  X  X  X  X
+        7        X  X  X  X  X  X
+        8        X  X  X  X  X                    X
+        9        X  X  X  X  X
+       10        X  X  X  X  X
+       11        X  X  X  X  X
+       12        X  X  X  X  X
+       15        X  X  X  X  X        X           X     X
+       17        X  X  X  X  X
+       18        X  X  X  X  X                    X     X
+
+   3. Simplified by merged classes
+
+   After the 2 simplification, the pair table have some duplication
+   a. class 2, 3, 4, 5, 6,  are the same- we can merged them
+   b. class 10, 11, 12, 17  are the same- we can merged them
+
+   We introduce an extra non-breaking pair at [b]/7 to better match
+   the expectations of CSS line-breaking as tested by WPT tests.
+   This added entry is marked as * in the tables below.
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char
+
+              1 [a] 7  8  9 [b]15 18
+
+        1     X  X  X  X  X  X  X  X
+      [a]        X
+        7        X  X
+        8        X              X
+        9        X
+      [b]        X  *
+       15        X        X     X  X
+       18        X              X  X
+
+
+   4. We add COMPLEX characters and make it breakable w/ all ther class
+      except after class 1 and before class [a]
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char
+
+              1 [a] 7  8  9 [b]15 18 COMPLEX
+
+        1     X  X  X  X  X  X  X  X  X
+      [a]        X
+        7        X  X
+        8        X              X
+        9        X
+      [b]        X  *
+       15        X        X     X  X
+       18        X              X  X
+  COMPLEX        X                    T
+
+     T : need special handling
+
+
+   5. However, we need two special class for some punctuations/parentheses,
+      theirs breaking rules like character class (18), see bug 389056.
+      And also we need character like punctuation that is same behavior with 18,
+      but the characters are not letters of all languages. (e.g., '_')
+      [c]. Based on open parenthesis class (1), but it is not breakable after
+           character class (18) or numeric class (15).
+      [d]. Based on close parenthesis (or punctuation) class (2), but it is not
+           breakable before character class (18) or numeric class (15).
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char
+
+              1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d]
+
+        1     X  X  X  X  X  X  X  X  X       X    X
+      [a]        X                            X    X
+        7        X  X
+        8        X              X
+        9        X
+      [b]        X  *                              X
+       15        X        X     X  X          X    X
+       18        X              X  X          X    X
+  COMPLEX        X                    T
+      [c]     X  X  X  X  X  X  X  X  X       X    X
+      [d]        X              X  X               X
+
+
+   6. And Unicode has "NON-BREAK" characters. The lines should be broken around
+      them. But in JIS X 4051, such class is not, therefore, we create [e].
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char
+
+              1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d] [e]
+
+        1     X  X  X  X  X  X  X  X  X       X    X   X
+      [a]        X                                 X   X
+        7        X  X                                  X
+        8        X              X                      X
+        9        X                                     X
+      [b]        X  *                              X   X
+       15        X        X     X  X          X    X   X
+       18        X              X  X          X    X   X
+  COMPLEX        X                    T                X
+      [c]     X  X  X  X  X  X  X  X  X       X    X   X
+      [d]        X              X  X               X   X
+      [e]     X  X  X  X  X  X  X  X  X       X    X   X
+
+
+   7. Now we use one bit to encode whether it is breakable, and use 2 bytes
+      for one row, then the bit table will look like:
+
+                 18    <-   1
+
+       1  0000 1111 1111 1111  = 0x0FFF
+      [a] 0000 1100 0000 0010  = 0x0C02
+       7  0000 1000 0000 0110  = 0x0806
+       8  0000 1000 0100 0010  = 0x0842
+       9  0000 1000 0000 0010  = 0x0802
+      [b] 0000 1100 0000 0110  = 0x0C06
+      15  0000 1110 1101 0010  = 0x0ED2
+      18  0000 1110 1100 0010  = 0x0EC2
+ COMPLEX  0000 1001 0000 0010  = 0x0902
+      [c] 0000 1111 1111 1111  = 0x0FFF
+      [d] 0000 1100 1100 0010  = 0x0CC2
+      [e] 0000 1111 1111 1111  = 0x0FFF
+*/
+
+#define MAX_CLASSES 12
+
+static const uint16_t gPair[MAX_CLASSES] = {0x0FFF, 0x0C02, 0x0806, 0x0842,
+                                            0x0802, 0x0C06, 0x0ED2, 0x0EC2,
+                                            0x0902, 0x0FFF, 0x0CC2, 0x0FFF};
+
+/*
+
+   8. And if the character is not enough far from word start, word end and
+      another break point, we should not break in non-CJK languages.
+      I.e., Don't break around 15, 18, [c] and [d], but don't change
+      that if they are related to [b].
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char
+
+              1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d] [e]
+
+        1     X  X  X  X  X  X  X  X  X       X    X   X
+      [a]        X              X  X          X    X   X
+        7        X  X           X  X          X    X   X
+        8        X              X  X          X    X   X
+        9        X              X  X          X    X   X
+      [b]        X  *                              X   X
+       15     X  X  X  X  X     X  X  X       X    X   X
+       18     X  X  X  X  X     X  X  X       X    X   X
+  COMPLEX        X              X  X  T       X    X   X
+      [c]     X  X  X  X  X  X  X  X  X       X    X   X
+      [d]     X  X  X  X  X     X  X  X       X    X   X
+      [e]     X  X  X  X  X  X  X  X  X       X    X   X
+
+                 18    <-   1
+
+       1  0000 1111 1111 1111  = 0x0FFF
+      [a] 0000 1110 1100 0010  = 0x0EC2
+       7  0000 1110 1100 0110  = 0x0EC6
+       8  0000 1110 1100 0010  = 0x0EC2
+       9  0000 1110 1100 0010  = 0x0EC2
+      [b] 0000 1100 0000 0110  = 0x0C06
+      15  0000 1111 1101 1111  = 0x0FDF
+      18  0000 1111 1101 1111  = 0x0FDF
+ COMPLEX  0000 1111 1100 0010  = 0x0FC2
+      [c] 0000 1111 1111 1111  = 0x0FFF
+      [d] 0000 1111 1101 1111  = 0x0FDF
+      [e] 0000 1111 1111 1111  = 0x0FFF
+*/
+
+static const uint16_t gPairConservative[MAX_CLASSES] = {
+    0x0FFF, 0x0EC2, 0x0EC6, 0x0EC2, 0x0EC2, 0x0C06,
+    0x0FDF, 0x0FDF, 0x0FC2, 0x0FFF, 0x0FDF, 0x0FFF};
+
+/*
+
+   9. Now we map the class to number
+
+      0: 1
+      1: [a]- 2, 3, 4, 5, 6
+      2: 7
+      3: 8
+      4: 9
+      5: [b]- 10, 11, 12, 17
+      6: 15
+      7: 18
+      8: COMPLEX
+      9: [c]
+      A: [d]
+      B: [e]
+
+    and they mean:
+      0: Open parenthesis
+      1: Punctuation that prohibits break before
+      2: Non-breakable between same classes
+      3: Prefix
+      4: Postfix
+      5: Breakable character (Spaces and Most Japanese characters)
+      6: Numeric
+      7: Characters
+      8: Need special handling characters (E.g., Thai)
+      9: Open parentheses like Character (See bug 389056)
+      A: Close parenthese (or punctuations) like Character (See bug 389056)
+      B: Non breakable (See bug 390920)
+
+*/
+
+#define CLASS_NONE INT8_MAX
+
+#define CLASS_OPEN 0x00
+#define CLASS_CLOSE 0x01
+#define CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS 0x02
+#define CLASS_PREFIX 0x03
+#define CLASS_POSTFFIX 0x04
+#define CLASS_BREAKABLE 0x05
+#define CLASS_NUMERIC 0x06
+#define CLASS_CHARACTER 0x07
+#define CLASS_COMPLEX 0x08
+#define CLASS_OPEN_LIKE_CHARACTER 0x09
+#define CLASS_CLOSE_LIKE_CHARACTER 0x0A
+#define CLASS_NON_BREAKABLE 0x0B
+
+#define U_NULL char16_t(0x0000)
+#define U_SLASH char16_t('/')
+#define U_SPACE char16_t(' ')
+#define U_HYPHEN char16_t('-')
+#define U_EQUAL char16_t('=')
+#define U_PERCENT char16_t('%')
+#define U_AMPERSAND char16_t('&')
+#define U_SEMICOLON char16_t(';')
+#define U_BACKSLASH char16_t('\\')
+#define U_OPEN_SINGLE_QUOTE char16_t(0x2018)
+#define U_OPEN_DOUBLE_QUOTE char16_t(0x201C)
+#define U_OPEN_GUILLEMET char16_t(0x00AB)
+
+#define NEED_CONTEXTUAL_ANALYSIS(c)                                            \
+  (IS_HYPHEN(c) || (c) == U_SLASH || (c) == U_PERCENT || (c) == U_AMPERSAND || \
+   (c) == U_SEMICOLON || (c) == U_BACKSLASH || (c) == U_OPEN_SINGLE_QUOTE ||   \
+   (c) == U_OPEN_DOUBLE_QUOTE || (c) == U_OPEN_GUILLEMET)
+
+#define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
+
+static inline int GETCLASSFROMTABLE(const uint32_t* t, uint16_t l) {
+  return ((((t)[(l >> 3)]) >> ((l & 0x0007) << 2)) & 0x000f);
+}
+
+static inline int IS_HALFWIDTH_IN_JISx4051_CLASS3(char16_t u) {
+  return ((0xff66 <= (u)) && ((u) <= 0xff70));
+}
+
+static inline int IS_CJK_CHAR(char32_t u) {
+  return (
+      (0x1100 <= (u) && (u) <= 0x11ff) || (0x2e80 <= (u) && (u) <= 0xd7ff) ||
+      (0xf900 <= (u) && (u) <= 0xfaff) || (0xff00 <= (u) && (u) <= 0xffef) ||
+      (0x20000 <= (u) && (u) <= 0x2fffd));
+}
+
+static inline bool IS_NONBREAKABLE_SPACE(char16_t u) {
+  return u == 0x00A0 || u == 0x2007;  // NO-BREAK SPACE, FIGURE SPACE
+}
+
+static inline bool IS_HYPHEN(char16_t u) {
+  return (u == U_HYPHEN || u == 0x2010 ||  // HYPHEN
+          u == 0x2012 ||                   // FIGURE DASH
+          u == 0x2013 ||                   // EN DASH
+#if ANDROID
+          /* Bug 1647377: On Android, we don't have a "platform" backend
+           * that supports Tibetan (nsRuleBreaker.cpp only knows about
+           * Thai), so instead we just treat the TSHEG like a hyphen to
+           * provide basic line-breaking possibilities.
+           */
+          u == 0x0F0B ||  // TIBETAN MARK INTERSYLLABIC TSHEG
+#endif
+          u == 0x058A);  // ARMENIAN HYPHEN
+}
+
+static int8_t GetClass(uint32_t u, LineBreaker::Strictness aLevel,
+                       bool aIsChineseOrJapanese) {
+  // Mapping for Unicode LineBreak.txt classes to the (simplified) set of
+  // character classes used here.
+  // XXX The mappings here were derived by comparing the Unicode LineBreak
+  //     values of BMP characters to the classes our existing GetClass returns
+  //     for the same codepoints; in cases where characters with the same
+  //     LineBreak class mapped to various classes here, I picked what seemed
+  //     the most prevalent equivalence.
+  //     Some of these are unclear to me, but currently they are ONLY used
+  //     for characters not handled by the old code below, so all the JISx405
+  //     special cases should already be accounted for.
+  static const int8_t sUnicodeLineBreakToClass[] = {
+      /* UNKNOWN = 0,                       [XX] */ CLASS_CHARACTER,
+      /* AMBIGUOUS = 1,                     [AI] */ CLASS_CHARACTER,
+      /* ALPHABETIC = 2,                    [AL] */ CLASS_CHARACTER,
+      /* BREAK_BOTH = 3,                    [B2] */ CLASS_CHARACTER,
+      /* BREAK_AFTER = 4,                   [BA] */ CLASS_CHARACTER,
+      /* BREAK_BEFORE = 5,                  [BB] */ CLASS_OPEN_LIKE_CHARACTER,
+      /* MANDATORY_BREAK = 6,               [BK] */ CLASS_CHARACTER,
+      /* CONTINGENT_BREAK = 7,              [CB] */ CLASS_CHARACTER,
+      /* CLOSE_PUNCTUATION = 8,             [CL] */ CLASS_CHARACTER,
+      /* COMBINING_MARK = 9,                [CM] */ CLASS_CHARACTER,
+      /* CARRIAGE_RETURN = 10,              [CR] */ CLASS_BREAKABLE,
+      /* EXCLAMATION = 11,                  [EX] */ CLASS_CHARACTER,
+      /* GLUE = 12,                         [GL] */ CLASS_NON_BREAKABLE,
+      /* HYPHEN = 13,                       [HY] */ CLASS_CHARACTER,
+      /* IDEOGRAPHIC = 14,                  [ID] */ CLASS_BREAKABLE,
+      /* INSEPARABLE = 15,                  [IN] */ CLASS_CLOSE_LIKE_CHARACTER,
+      /* INFIX_NUMERIC = 16,                [IS] */ CLASS_CHARACTER,
+      /* LINE_FEED = 17,                    [LF] */ CLASS_BREAKABLE,
+      /* NONSTARTER = 18,                   [NS] */ CLASS_CLOSE_LIKE_CHARACTER,
+      /* NUMERIC = 19,                      [NU] */ CLASS_NUMERIC,
+      /* OPEN_PUNCTUATION = 20,             [OP] */ CLASS_CHARACTER,
+      /* POSTFIX_NUMERIC = 21,              [PO] */ CLASS_CHARACTER,
+      /* PREFIX_NUMERIC = 22,               [PR] */ CLASS_CHARACTER,
+      /* QUOTATION = 23,                    [QU] */ CLASS_CHARACTER,
+      /* COMPLEX_CONTEXT = 24,              [SA] */ CLASS_CHARACTER,
+      /* SURROGATE = 25,                    [SG] */ CLASS_CHARACTER,
+      /* SPACE = 26,                        [SP] */ CLASS_BREAKABLE,
+      /* BREAK_SYMBOLS = 27,                [SY] */ CLASS_CHARACTER,
+      /* ZWSPACE = 28,                      [ZW] */ CLASS_BREAKABLE,
+      /* NEXT_LINE = 29,                    [NL] */ CLASS_CHARACTER,
+      /* WORD_JOINER = 30,                  [WJ] */ CLASS_NON_BREAKABLE,
+      /* H2 = 31,                           [H2] */ CLASS_BREAKABLE,
+      /* H3 = 32,                           [H3] */ CLASS_BREAKABLE,
+      /* JL = 33,                           [JL] */ CLASS_CHARACTER,
+      /* JT = 34,                           [JT] */ CLASS_CHARACTER,
+      /* JV = 35,                           [JV] */ CLASS_CHARACTER,
+      /* CLOSE_PARENTHESIS = 36,            [CP] */ CLASS_CLOSE_LIKE_CHARACTER,
+      /* CONDITIONAL_JAPANESE_STARTER = 37, [CJ] */ CLASS_CLOSE,
+      /* HEBREW_LETTER = 38,                [HL] */ CLASS_CHARACTER,
+      /* REGIONAL_INDICATOR = 39,           [RI] */ CLASS_CHARACTER,
+      /* E_BASE = 40,                       [EB] */ CLASS_BREAKABLE,
+      /* E_MODIFIER = 41,                   [EM] */ CLASS_CHARACTER,
+      /* ZWJ = 42,                          [ZWJ]*/ CLASS_CHARACTER};
+
+  static_assert(U_LB_COUNT == mozilla::ArrayLength(sUnicodeLineBreakToClass),
+                "Gecko vs ICU LineBreak class mismatch");
+
+  auto cls = GetLineBreakClass(u);
+  MOZ_ASSERT(cls < mozilla::ArrayLength(sUnicodeLineBreakToClass));
+
+  // Overrides based on rules for the different line-break values given in
+  // https://drafts.csswg.org/css-text-3/#line-break-property
+  switch (aLevel) {
+    case LineBreaker::Strictness::Auto:
+      // For now, just use legacy Gecko behavior.
+      // XXX Possible enhancement - vary strictness according to line width
+      // or other criteria.
+      break;
+    case LineBreaker::Strictness::Strict:
+      if (cls == U_LB_CONDITIONAL_JAPANESE_STARTER ||
+          (u == 0x3095 || u == 0x3096 || u == 0x30f5 || u == 0x30f6)) {
+        return CLASS_CLOSE;
+      }
+      if (cls == U_LB_INSEPARABLE) {
+        return CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS;
+      }
+      if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
+          u == 0x30FD || u == 0x30FE) {
+        return CLASS_CLOSE_LIKE_CHARACTER;
+      }
+      if (aIsChineseOrJapanese) {
+        if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_CLOSE_LIKE_CHARACTER;
+        }
+        if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_OPEN_LIKE_CHARACTER;
+        }
+        if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+          return CLASS_CLOSE_LIKE_CHARACTER;
+        }
+      }
+      break;
+    case LineBreaker::Strictness::Normal:
+      if (cls == U_LB_CONDITIONAL_JAPANESE_STARTER) {
+        return CLASS_BREAKABLE;
+      }
+      if (cls == U_LB_INSEPARABLE) {
+        return CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS;
+      }
+      if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
+          u == 0x30FD || u == 0x30FE) {
+        return CLASS_CLOSE_LIKE_CHARACTER;
+      }
+      if (aIsChineseOrJapanese) {
+        if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_CLOSE_LIKE_CHARACTER;
+        }
+        if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_OPEN_LIKE_CHARACTER;
+        }
+        if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+          return CLASS_BREAKABLE;
+        }
+      }
+      break;
+    case LineBreaker::Strictness::Loose:
+      if (cls == U_LB_CONDITIONAL_JAPANESE_STARTER) {
+        return CLASS_BREAKABLE;
+      }
+      if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
+          u == 0x30FD || u == 0x30FE) {
+        return CLASS_BREAKABLE;
+      }
+      if (cls == U_LB_INSEPARABLE) {
+        return CLASS_BREAKABLE;
+      }
+      if (aIsChineseOrJapanese) {
+        if (u == 0x30FB || u == 0xFF1A || u == 0xFF1B || u == 0xFF65 ||
+            u == 0x203C || u == 0x2047 || u == 0x2048 || u == 0x2049 ||
+            u == 0xFF01 || u == 0xFF1F) {
+          return CLASS_BREAKABLE;
+        }
+        if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_BREAKABLE;
+        }
+        if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_BREAKABLE;
+        }
+        if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+          return CLASS_BREAKABLE;
+        }
+      }
+      break;
+    case LineBreaker::Strictness::Anywhere:
+      MOZ_ASSERT_UNREACHABLE("should have been handled already");
+      break;
+  }
+
+  if (u < 0x10000) {
+    uint16_t h = u & 0xFF00;
+    uint16_t l = u & 0x00ff;
+
+    // Handle 3 range table first
+    if (0x0000 == h) {
+      return GETCLASSFROMTABLE(gLBClass00, l);
+    }
+    if (0x1700 == h) {
+      return GETCLASSFROMTABLE(gLBClass17, l);
+    }
+    if (NS_NeedsPlatformNativeHandling(u)) {
+      return CLASS_COMPLEX;
+    }
+    if (0x0E00 == h) {
+      return GETCLASSFROMTABLE(gLBClass0E, l);
+    }
+    if (0x2000 == h) {
+      return GETCLASSFROMTABLE(gLBClass20, l);
+    }
+    if (0x2100 == h) {
+      return GETCLASSFROMTABLE(gLBClass21, l);
+    }
+    if (0x3000 == h) {
+      return GETCLASSFROMTABLE(gLBClass30, l);
+    }
+    if (0xff00 == h) {
+      if (l <= 0x0060) {  // Fullwidth ASCII variant
+        // Fullwidth comma and period are exceptions to our map-to-ASCII
+        // behavior: https://bugzilla.mozilla.org/show_bug.cgi?id=1595428
+        if (l + 0x20 == ',' || l + 0x20 == '.') {
+          return CLASS_CLOSE;
+        }
+        // Also special-case fullwidth left/right white parenthesis,
+        // which do not fit the pattern of mapping to the ASCII block
+        if (l == 0x005f) {
+          return CLASS_OPEN;
+        }
+        if (l == 0x0060) {
+          return CLASS_CLOSE;
+        }
+        return GETCLASSFROMTABLE(gLBClass00, (l + 0x20));
+      }
+      if (l < 0x00a0) {  // Halfwidth Katakana variants
+        switch (l) {
+          case 0x61:
+            return GetClass(0x3002, aLevel, aIsChineseOrJapanese);
+          case 0x62:
+            return GetClass(0x300c, aLevel, aIsChineseOrJapanese);
+          case 0x63:
+            return GetClass(0x300d, aLevel, aIsChineseOrJapanese);
+          case 0x64:
+            return GetClass(0x3001, aLevel, aIsChineseOrJapanese);
+          case 0x65:
+            return GetClass(0x30fb, aLevel, aIsChineseOrJapanese);
+          case 0x9e:
+            return GetClass(0x309b, aLevel, aIsChineseOrJapanese);
+          case 0x9f:
+            return GetClass(0x309c, aLevel, aIsChineseOrJapanese);
+          default:
+            if (IS_HALFWIDTH_IN_JISx4051_CLASS3(u)) {
+              return CLASS_CLOSE;  // jis x4051 class 3
+            }
+            return CLASS_BREAKABLE;  // jis x4051 class 11
+        }
+      }
+      if (l < 0x00e0) {
+        return CLASS_CHARACTER;  // Halfwidth Hangul variants
+      }
+      if (l < 0x00f0) {
+        static char16_t NarrowFFEx[16] = {
+            0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
+            0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000};
+        return GetClass(NarrowFFEx[l - 0x00e0], aLevel, aIsChineseOrJapanese);
+      }
+    } else if (0x3100 == h) {
+      if (l <= 0xbf) {  // Hangul Compatibility Jamo, Bopomofo, Kanbun
+                        // XXX: This is per UAX #14, but UAX #14 may change
+                        // the line breaking rules about Kanbun and Bopomofo.
+        return CLASS_BREAKABLE;
+      }
+      if (l >= 0xf0) {  // Katakana small letters for Ainu
+        return CLASS_CLOSE;
+      }
+    } else if (0x0300 == h) {
+      if (0x4F == l || (0x5C <= l && l <= 0x62)) {
+        return CLASS_NON_BREAKABLE;
+      }
+    } else if (0x0500 == h) {
+      // ARMENIAN HYPHEN (for "Breaking Hyphens" of UAX#14)
+      if (l == 0x8A) {
+        return GETCLASSFROMTABLE(gLBClass00, uint16_t(U_HYPHEN));
+      }
+    } else if (0x0F00 == h) {
+      // Tibetan chars with class = BA
+      if (0x34 == l || 0x7f == l || 0x85 == l || 0xbe == l || 0xbf == l ||
+          0xd2 == l) {
+        return CLASS_BREAKABLE;
+      }
+    } else if (0x1800 == h) {
+      if (0x0E == l) {
+        return CLASS_NON_BREAKABLE;
+      }
+    } else if (0x1600 == h) {
+      if (0x80 == l) {  // U+1680 OGHAM SPACE MARK
+        return CLASS_BREAKABLE;
+      }
+    } else if (u == 0xfeff) {
+      return CLASS_NON_BREAKABLE;
+    }
+  }
+
+  return sUnicodeLineBreakToClass[cls];
+}
+
+static bool GetPair(int8_t c1, int8_t c2) {
+  NS_ASSERTION(c1 < MAX_CLASSES, "illegal classes 1");
+  NS_ASSERTION(c2 < MAX_CLASSES, "illegal classes 2");
+
+  return (0 == ((gPair[c1] >> c2) & 0x0001));
+}
+
+static bool GetPairConservative(int8_t c1, int8_t c2) {
+  NS_ASSERTION(c1 < MAX_CLASSES, "illegal classes 1");
+  NS_ASSERTION(c2 < MAX_CLASSES, "illegal classes 2");
+
+  return (0 == ((gPairConservative[c1] >> c2) & 0x0001));
+}
+
+class ContextState {
+ public:
+  ContextState(const char16_t* aText, uint32_t aLength)
+      : mUniText(aText), mText(nullptr), mLength(aLength) {
+    Init();
+  }
+
+  ContextState(const uint8_t* aText, uint32_t aLength)
+      : mUniText(nullptr), mText(aText), mLength(aLength) {
+    Init();
+  }
+
+  uint32_t Length() const { return mLength; }
+  uint32_t Index() const { return mIndex; }
+
+  // This gets a single code unit of the text, without checking for surrogates
+  // (in the case of a 16-bit text buffer). That's OK if we're only checking for
+  // specific characters that are known to be BMP values.
+  char16_t GetCodeUnitAt(uint32_t aIndex) const {
+    MOZ_ASSERT(aIndex < mLength, "Out of range!");
+    return mUniText ? mUniText[aIndex] : char16_t(mText[aIndex]);
+  }
+
+  // This gets a 32-bit Unicode character (codepoint), handling surrogate pairs
+  // as necessary. It must ONLY be called for 16-bit text, not 8-bit.
+  char32_t GetUnicodeCharAt(uint32_t aIndex) const {
+    MOZ_ASSERT(mUniText, "Only for 16-bit text!");
+    MOZ_ASSERT(aIndex < mLength, "Out of range!");
+    char32_t c = mUniText[aIndex];
+    if (aIndex + 1 < mLength && NS_IS_SURROGATE_PAIR(c, mUniText[aIndex + 1])) {
+      c = SURROGATE_TO_UCS4(c, mUniText[aIndex + 1]);
+    }
+    return c;
+  }
+
+  void AdvanceIndex() { ++mIndex; }
+
+  void NotifyBreakBefore() { mLastBreakIndex = mIndex; }
+
+  // A word of western language should not be broken. But even if the word has
+  // only ASCII characters, non-natural context words should be broken, e.g.,
+  // URL and file path. For protecting the natural words, we should use
+  // conservative breaking rules at following conditions:
+  //   1. at near the start of word
+  //   2. at near the end of word
+  //   3. at near the latest broken point
+  // CONSERVATIVE_RANGE_{LETTER,OTHER} define the 'near' in characters,
+  // which varies depending whether we are looking at a letter or a non-letter
+  // character: for non-letters, we use an extended "conservative" range.
+
+#define CONSERVATIVE_RANGE_LETTER 2
+#define CONSERVATIVE_RANGE_OTHER 6
+
+  bool UseConservativeBreaking(uint32_t aOffset = 0) const {
+    if (mHasCJKChar) return false;
+    uint32_t index = mIndex + aOffset;
+
+    // If the character at index is a letter (rather than various punctuation
+    // characters, etc) then we want a shorter "conservative" range
+    uint32_t conservativeRangeStart, conservativeRangeEnd;
+    if (index < mLength &&
+        nsUGenCategory::kLetter ==
+            (mText ? GetGenCategory(mText[index])
+                   : GetGenCategory(GetUnicodeCharAt(index)))) {
+      // Primarily for hyphenated word prefixes/suffixes; we add 1 to Start
+      // to get more balanced behavior (if we break off a 2-letter prefix,
+      // that means the break will actually be three letters from start of
+      // word, to include the hyphen; whereas a 2-letter suffix will be
+      // broken only two letters from end of word).
+      conservativeRangeEnd = CONSERVATIVE_RANGE_LETTER;
+      conservativeRangeStart = CONSERVATIVE_RANGE_LETTER + 1;
+    } else {
+      conservativeRangeEnd = conservativeRangeStart = CONSERVATIVE_RANGE_OTHER;
+    }
+
+    bool result = (index < conservativeRangeStart ||
+                   mLength - index < conservativeRangeEnd ||
+                   index - mLastBreakIndex < conservativeRangeStart);
+    if (result || !mHasNonbreakableSpace) return result;
+
+    // This text has no-breakable space, we need to check whether the index
+    // is near it.
+
+    // Note that index is always larger than conservativeRange here.
+    for (uint32_t i = index; index - conservativeRangeStart < i; --i) {
+      if (IS_NONBREAKABLE_SPACE(GetCodeUnitAt(i - 1))) return true;
+    }
+    // Note that index is always less than mLength - conservativeRange.
+    for (uint32_t i = index + 1; i < index + conservativeRangeEnd; ++i) {
+      if (IS_NONBREAKABLE_SPACE(GetCodeUnitAt(i))) return true;
+    }
+    return false;
+  }
+
+  bool HasPreviousEqualsSign() const { return mHasPreviousEqualsSign; }
+  void NotifySeenEqualsSign() { mHasPreviousEqualsSign = true; }
+
+  bool HasPreviousSlash() const { return mHasPreviousSlash; }
+  void NotifySeenSlash() { mHasPreviousSlash = true; }
+
+  bool HasPreviousBackslash() const { return mHasPreviousBackslash; }
+  void NotifySeenBackslash() { mHasPreviousBackslash = true; }
+
+  uint32_t GetPreviousNonHyphenCharacter() const {
+    return mPreviousNonHyphenCharacter;
+  }
+  void NotifyNonHyphenCharacter(uint32_t ch) {
+    mPreviousNonHyphenCharacter = ch;
+  }
+
+ private:
+  void Init() {
+    mIndex = 0;
+    mLastBreakIndex = 0;
+    mPreviousNonHyphenCharacter = U_NULL;
+    mHasCJKChar = false;
+    mHasNonbreakableSpace = false;
+    mHasPreviousEqualsSign = false;
+    mHasPreviousSlash = false;
+    mHasPreviousBackslash = false;
+
+    if (mText) {
+      // 8-bit text: we only need to check for &nbsp;
+      for (uint32_t i = 0; i < mLength; ++i) {
+        if (IS_NONBREAKABLE_SPACE(mText[i])) {
+          mHasNonbreakableSpace = true;
+          break;
+        }
+      }
+    } else {
+      // 16-bit text: handle surrogates and check for CJK as well as &nbsp;
+      for (uint32_t i = 0; i < mLength; ++i) {
+        char32_t u = GetUnicodeCharAt(i);
+        if (!mHasNonbreakableSpace && IS_NONBREAKABLE_SPACE(u)) {
+          mHasNonbreakableSpace = true;
+          if (mHasCJKChar) {
+            break;
+          }
+        } else if (!mHasCJKChar && IS_CJK_CHAR(u)) {
+          mHasCJKChar = true;
+          if (mHasNonbreakableSpace) {
+            break;
+          }
+        }
+        if (u > 0xFFFFu) {
+          ++i;  // step over trailing low surrogate
+        }
+      }
+    }
+  }
+
+  const char16_t* const mUniText;
+  const uint8_t* const mText;
+
+  uint32_t mIndex;
+  const uint32_t mLength;  // length of text
+  uint32_t mLastBreakIndex;
+  char32_t mPreviousNonHyphenCharacter;  // The last character we have seen
+                                         // which is not U_HYPHEN
+  bool mHasCJKChar;             // if the text has CJK character, this is true.
+  bool mHasNonbreakableSpace;   // if the text has no-breakable space,
+                                // this is true.
+  bool mHasPreviousEqualsSign;  // True if we have seen a U_EQUAL
+  bool mHasPreviousSlash;       // True if we have seen a U_SLASH
+  bool mHasPreviousBackslash;   // True if we have seen a U_BACKSLASH
+};
+
+static int8_t ContextualAnalysis(char32_t prev, char32_t cur, char32_t next,
+                                 ContextState& aState,
+                                 LineBreaker::Strictness aLevel,
+                                 bool aIsChineseOrJapanese) {
+  // Don't return CLASS_OPEN/CLASS_CLOSE if aState.UseJISX4051 is FALSE.
+
+  if (IS_HYPHEN(cur)) {
+    // If next character is hyphen, we don't need to break between them.
+    if (IS_HYPHEN(next)) return CLASS_CHARACTER;
+    // If prev and next characters are numeric, it may be in Math context.
+    // So, we should not break here.
+    bool prevIsNum = IS_ASCII_DIGIT(prev);
+    bool nextIsNum = IS_ASCII_DIGIT(next);
+    if (prevIsNum && nextIsNum) return CLASS_NUMERIC;
+    // If one side is numeric and the other is a character, or if both sides are
+    // characters, the hyphen should be breakable.
+    if (!aState.UseConservativeBreaking(1)) {
+      char32_t prevOfHyphen = aState.GetPreviousNonHyphenCharacter();
+      if (prevOfHyphen && next) {
+        int8_t prevClass = GetClass(prevOfHyphen, aLevel, aIsChineseOrJapanese);
+        int8_t nextClass = GetClass(next, aLevel, aIsChineseOrJapanese);
+        bool prevIsNumOrCharOrClose =
+            prevIsNum ||
+            (prevClass == CLASS_CHARACTER &&
+             !NEED_CONTEXTUAL_ANALYSIS(prevOfHyphen)) ||
+            prevClass == CLASS_CLOSE || prevClass == CLASS_CLOSE_LIKE_CHARACTER;
+        bool nextIsNumOrCharOrOpen =
+            nextIsNum ||
+            (nextClass == CLASS_CHARACTER && !NEED_CONTEXTUAL_ANALYSIS(next)) ||
+            nextClass == CLASS_OPEN || nextClass == CLASS_OPEN_LIKE_CHARACTER ||
+            next == U_OPEN_SINGLE_QUOTE || next == U_OPEN_DOUBLE_QUOTE ||
+            next == U_OPEN_GUILLEMET;
+        if (prevIsNumOrCharOrClose && nextIsNumOrCharOrOpen) {
+          return CLASS_CLOSE;
+        }
+      }
+    }
+  } else {
+    aState.NotifyNonHyphenCharacter(cur);
+    if (cur == U_SLASH || cur == U_BACKSLASH) {
+      // If this is immediately after same char, we should not break here.
+      if (prev == cur) return CLASS_CHARACTER;
+      // If this text has two or more (BACK)SLASHs, this may be file path or
+      // URL. Make sure to compute shouldReturn before we notify on this slash.
+      bool shouldReturn = !aState.UseConservativeBreaking() &&
+                          (cur == U_SLASH ? aState.HasPreviousSlash()
+                                          : aState.HasPreviousBackslash());
+
+      if (cur == U_SLASH) {
+        aState.NotifySeenSlash();
+      } else {
+        aState.NotifySeenBackslash();
+      }
+
+      if (shouldReturn) return CLASS_OPEN;
+    } else if (cur == U_PERCENT) {
+      // If this is a part of the param of URL, we should break before.
+      if (!aState.UseConservativeBreaking()) {
+        if (aState.Index() >= 3 &&
+            aState.GetCodeUnitAt(aState.Index() - 3) == U_PERCENT)
+          return CLASS_OPEN;
+        if (aState.Index() + 3 < aState.Length() &&
+            aState.GetCodeUnitAt(aState.Index() + 3) == U_PERCENT)
+          return CLASS_OPEN;
+      }
+    } else if (cur == U_AMPERSAND || cur == U_SEMICOLON) {
+      // If this may be a separator of params of URL, we should break after.
+      if (!aState.UseConservativeBreaking(1) && aState.HasPreviousEqualsSign())
+        return CLASS_CLOSE;
+    } else if (cur == U_OPEN_SINGLE_QUOTE || cur == U_OPEN_DOUBLE_QUOTE ||
+               cur == U_OPEN_GUILLEMET) {
+      // for CJK usage, we treat these as openers to allow a break before them,
+      // but otherwise treat them as normal characters because quote mark usage
+      // in various Western languages varies too much; see bug #450088
+      // discussion.
+      if (!aState.UseConservativeBreaking() && IS_CJK_CHAR(next))
+        return CLASS_OPEN;
+    } else {
+      NS_ERROR("Forgot to handle the current character!");
+    }
+  }
+  return GetClass(cur, aLevel, aIsChineseOrJapanese);
+}
+
+int32_t LineBreaker::WordMove(const char16_t* aText, uint32_t aLen,
+                              uint32_t aPos, int8_t aDirection) {
+  bool textNeedsJISx4051 = false;
+  int32_t begin, end;
+
+  for (begin = aPos; begin > 0 && !NS_IsSpace(aText[begin - 1]); --begin) {
+    if (IS_CJK_CHAR(aText[begin]) ||
+        NS_NeedsPlatformNativeHandling(aText[begin])) {
+      textNeedsJISx4051 = true;
+    }
+  }
+  for (end = aPos + 1; end < int32_t(aLen) && !NS_IsSpace(aText[end]); ++end) {
+    if (IS_CJK_CHAR(aText[end]) || NS_NeedsPlatformNativeHandling(aText[end])) {
+      textNeedsJISx4051 = true;
+    }
+  }
+
+  int32_t ret;
+  AutoTArray<uint8_t, 2000> breakState;
+  if (!textNeedsJISx4051) {
+    // No complex text character, do not try to do complex line break.
+    // (This is required for serializers. See Bug #344816.)
+    if (aDirection < 0) {
+      ret = (begin == int32_t(aPos)) ? begin - 1 : begin;
+    } else {
+      ret = end;
+    }
+  } else {
+    // XXX(Bug 1631371) Check if this should use a fallible operation as it
+    // pretended earlier.
+    breakState.AppendElements(end - begin);
+    GetJISx4051Breaks(aText + begin, end - begin, WordBreak::Normal,
+                      Strictness::Auto, false, breakState.Elements());
+
+    ret = aPos;
+    do {
+      ret += aDirection;
+    } while (begin < ret && ret < end && !breakState[ret - begin]);
+  }
+
+  return ret;
+}
+
+int32_t LineBreaker::Next(const char16_t* aText, uint32_t aLen, uint32_t aPos) {
+  NS_ASSERTION(aText, "aText shouldn't be null");
+  NS_ASSERTION(aLen > aPos,
+               "Bad position passed to nsJISx4051LineBreaker::Next");
+
+  int32_t nextPos = WordMove(aText, aLen, aPos, 1);
+  return nextPos < int32_t(aLen) ? nextPos : NS_LINEBREAKER_NEED_MORE_TEXT;
+}
+
+int32_t LineBreaker::Prev(const char16_t* aText, uint32_t aLen, uint32_t aPos) {
+  NS_ASSERTION(aText, "aText shouldn't be null");
+  NS_ASSERTION(aLen >= aPos && aPos > 0,
+               "Bad position passed to nsJISx4051LineBreaker::Prev");
+
+  int32_t prevPos = WordMove(aText, aLen, aPos, -1);
+  return prevPos > 0 ? prevPos : NS_LINEBREAKER_NEED_MORE_TEXT;
+}
+
+static bool SuppressBreakForKeepAll(uint32_t aPrev, uint32_t aCh) {
+  auto affectedByKeepAll = [](uint8_t aLBClass) {
+    switch (aLBClass) {
+      // Per https://drafts.csswg.org/css-text-3/#valdef-word-break-keep-all:
+      // "implicit soft wrap opportunities between typographic letter units
+      // (or other typographic character units belonging to the NU, AL, AI,
+      // or ID Unicode line breaking classes [UAX14]) are suppressed..."
+      case U_LB_ALPHABETIC:
+      case U_LB_AMBIGUOUS:
+      case U_LB_NUMERIC:
+      case U_LB_IDEOGRAPHIC:
+      // Additional classes that should be treated similarly, but have been
+      // broken out as separate classes in newer Unicode versions:
+      case U_LB_H2:
+      case U_LB_H3:
+      case U_LB_JL:
+      case U_LB_JV:
+      case U_LB_JT:
+      case U_LB_CONDITIONAL_JAPANESE_STARTER:
+        return true;
+      default:
+        return false;
+    }
+  };
+  return affectedByKeepAll(GetLineBreakClass(aPrev)) &&
+         affectedByKeepAll(GetLineBreakClass(aCh));
+}
+
+void LineBreaker::GetJISx4051Breaks(const char16_t* aChars, uint32_t aLength,
+                                    WordBreak aWordBreak, Strictness aLevel,
+                                    bool aIsChineseOrJapanese,
+                                    uint8_t* aBreakBefore) {
+  uint32_t cur;
+  int8_t lastClass = CLASS_NONE;
+  ContextState state(aChars, aLength);
+
+  for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
+    char32_t ch = state.GetUnicodeCharAt(cur);
+    uint32_t chLen = ch > 0xFFFFu ? 2 : 1;
+    int8_t cl;
+
+    if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
+      char32_t prev, next;
+      if (cur > 0) {
+        // not using state.GetUnicodeCharAt() here because we're looking back
+        // rather than forward for possible surrogates
+        prev = aChars[cur - 1];
+        if (cur > 1 && NS_IS_SURROGATE_PAIR(aChars[cur - 2], prev)) {
+          prev = SURROGATE_TO_UCS4(aChars[cur - 2], prev);
+        }
+      } else {
+        prev = 0;
+      }
+      if (cur + chLen < aLength) {
+        next = state.GetUnicodeCharAt(cur + chLen);
+      } else {
+        next = 0;
+      }
+      cl = ContextualAnalysis(prev, ch, next, state, aLevel,
+                              aIsChineseOrJapanese);
+    } else {
+      if (ch == U_EQUAL) state.NotifySeenEqualsSign();
+      state.NotifyNonHyphenCharacter(ch);
+      cl = GetClass(ch, aLevel, aIsChineseOrJapanese);
+    }
+
+    // To implement word-break:break-all, we overwrite the line-break class of
+    // alphanumeric characters so they are treated the same as ideographic.
+    // The relevant characters will have been assigned CLASS_CHARACTER, _CLOSE,
+    // _CLOSE_LIKE_CHARACTER, or _NUMERIC by GetClass(), but those classes also
+    // include others that we don't want to touch here, so we re-check the
+    // Unicode line-break class to determine which ones to modify.
+    if (aWordBreak == WordBreak::BreakAll &&
+        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE ||
+         cl == CLASS_CLOSE_LIKE_CHARACTER || cl == CLASS_NUMERIC)) {
+      auto cls = GetLineBreakClass(ch);
+      if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
+          cls == U_LB_AMBIGUOUS || cls == U_LB_COMPLEX_CONTEXT ||
+          /* Additional Japanese and Korean LB classes; CSS Text spec doesn't
+             explicitly mention these, but this appears to give expected
+             behavior (spec issue?) */
+          cls == U_LB_CONDITIONAL_JAPANESE_STARTER ||
+          (cls >= U_LB_H2 && cls <= U_LB_JV)) {
+        cl = CLASS_BREAKABLE;
+      }
+    }
+
+    bool allowBreak = false;
+    if (cur > 0) {
+      NS_ASSERTION(CLASS_COMPLEX != lastClass || CLASS_COMPLEX != cl,
+                   "Loop should have prevented adjacent complex chars here");
+      auto prev = [=]() {
+        char32_t c = aChars[cur - 1];
+        if (cur > 1 && NS_IS_SURROGATE_PAIR(aChars[cur - 2], c)) {
+          c = SURROGATE_TO_UCS4(aChars[cur - 2], c);
+        }
+        return c;
+      };
+      allowBreak =
+          (state.UseConservativeBreaking() ? GetPairConservative(lastClass, cl)
+                                           : GetPair(lastClass, cl)) &&
+          (aWordBreak != WordBreak::KeepAll ||
+           !SuppressBreakForKeepAll(prev(), ch));
+    }
+    aBreakBefore[cur] = allowBreak;
+    if (allowBreak) state.NotifyBreakBefore();
+    lastClass = cl;
+    if (CLASS_COMPLEX == cl) {
+      uint32_t end = cur + chLen;
+
+      while (end < aLength) {
+        char32_t c = state.GetUnicodeCharAt(end);
+        if (CLASS_COMPLEX != GetClass(c, aLevel, false)) {
+          break;
+        }
+        ++end;
+        if (c > 0xFFFFU) {  // it was a surrogate pair
+          ++end;
+        }
+      }
+
+      if (aWordBreak == WordBreak::BreakAll) {
+        // For break-all, we don't need to run a dictionary-based breaking
+        // algorithm, we just allow breaks between all grapheme clusters.
+        ClusterIterator ci(aChars + cur, end - cur);
+        while (!ci.AtEnd()) {
+          ci.Next();
+          aBreakBefore[ci - aChars] = true;
+        }
+      } else {
+        NS_GetComplexLineBreaks(aChars + cur, end - cur, aBreakBefore + cur);
+        // restore breakability at chunk begin, which was always set to false
+        // by the complex line breaker
+        aBreakBefore[cur] = allowBreak;
+      }
+
+      cur = end - 1;
+    }
+
+    if (chLen == 2) {
+      // Supplementary-plane character: mark that we cannot break before the
+      // trailing low surrogate, and advance past it.
+      ++cur;
+      aBreakBefore[cur] = false;
+      state.AdvanceIndex();
+    }
+  }
+}
+
+void LineBreaker::GetJISx4051Breaks(const uint8_t* aChars, uint32_t aLength,
+                                    WordBreak aWordBreak, Strictness aLevel,
+                                    bool aIsChineseOrJapanese,
+                                    uint8_t* aBreakBefore) {
+  uint32_t cur;
+  int8_t lastClass = CLASS_NONE;
+  ContextState state(aChars, aLength);
+
+  for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
+    char32_t ch = aChars[cur];
+    int8_t cl;
+
+    if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
+      cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL, ch,
+                              cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
+                              state, aLevel, aIsChineseOrJapanese);
+    } else {
+      if (ch == U_EQUAL) state.NotifySeenEqualsSign();
+      state.NotifyNonHyphenCharacter(ch);
+      cl = GetClass(ch, aLevel, aIsChineseOrJapanese);
+    }
+    if (aWordBreak == WordBreak::BreakAll &&
+        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE ||
+         cl == CLASS_CLOSE_LIKE_CHARACTER || cl == CLASS_NUMERIC)) {
+      auto cls = GetLineBreakClass(ch);
+      // Don't need to check additional Japanese/Korean classes in 8-bit
+      if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
+          cls == U_LB_COMPLEX_CONTEXT) {
+        cl = CLASS_BREAKABLE;
+      }
+    }
+
+    bool allowBreak = false;
+    if (cur > 0) {
+      allowBreak =
+          (state.UseConservativeBreaking() ? GetPairConservative(lastClass, cl)
+                                           : GetPair(lastClass, cl)) &&
+          (aWordBreak != WordBreak::KeepAll ||
+           !SuppressBreakForKeepAll(aChars[cur - 1], ch));
+    }
+    aBreakBefore[cur] = allowBreak;
+    if (allowBreak) state.NotifyBreakBefore();
+    lastClass = cl;
+  }
+}
diff --git a/intl/lwbrk/LineBreaker.h b/intl/lwbrk/LineBreaker.h
new file mode 100644
index 0000000000..eaea8e36cc
--- /dev/null
+++ b/intl/lwbrk/LineBreaker.h
@@ -0,0 +1,88 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef mozilla_intl_LineBreaker_h__
+#define mozilla_intl_LineBreaker_h__
+
+#include "nscore.h"
+#include "nsISupports.h"
+
+#define NS_LINEBREAKER_NEED_MORE_TEXT -1
+
+namespace mozilla {
+namespace intl {
+
+class LineBreaker {
+ public:
+  NS_INLINE_DECL_REFCOUNTING(LineBreaker)
+
+  enum class WordBreak : uint8_t {
+    Normal = 0,    // default
+    BreakAll = 1,  // break all
+    KeepAll = 2    // always keep
+  };
+
+  enum class Strictness : uint8_t {
+    Auto = 0,
+    Loose = 1,
+    Normal = 2,
+    Strict = 3,
+    Anywhere = 4
+  };
+
+  static already_AddRefed<LineBreaker> Create();
+
+  int32_t Next(const char16_t* aText, uint32_t aLen, uint32_t aPos);
+
+  int32_t Prev(const char16_t* aText, uint32_t aLen, uint32_t aPos);
+
+  // Call this on a word with whitespace at either end. We will apply JISx4051
+  // rules to find breaks inside the word. aBreakBefore is set to the break-
+  // before status of each character; aBreakBefore[0] will always be false
+  // because we never return a break before the first character.
+  // aLength is the length of the aText array and also the length of the
+  // aBreakBefore output array.
+  void GetJISx4051Breaks(const char16_t* aText, uint32_t aLength,
+                         WordBreak aWordBreak, Strictness aLevel,
+                         bool aIsChineseOrJapanese, uint8_t* aBreakBefore);
+  void GetJISx4051Breaks(const uint8_t* aText, uint32_t aLength,
+                         WordBreak aWordBreak, Strictness aLevel,
+                         bool aIsChineseOrJapanese, uint8_t* aBreakBefore);
+
+ private:
+  ~LineBreaker() = default;
+
+  int32_t WordMove(const char16_t* aText, uint32_t aLen, uint32_t aPos,
+                   int8_t aDirection);
+};
+
+static inline bool NS_IsSpace(char16_t u) {
+  return u == 0x0020 ||                   // SPACE
+         u == 0x0009 ||                   // CHARACTER TABULATION
+         u == 0x000D ||                   // CARRIAGE RETURN
+         (0x2000 <= u && u <= 0x2006) ||  // EN QUAD, EM QUAD, EN SPACE,
+                                          // EM SPACE, THREE-PER-EM SPACE,
+                                          // FOUR-PER-SPACE, SIX-PER-EM SPACE,
+         (0x2008 <= u && u <= 0x200B) ||  // PUNCTUATION SPACE, THIN SPACE,
+                                          // HAIR SPACE, ZERO WIDTH SPACE
+         u == 0x1361 ||                   // ETHIOPIC WORDSPACE
+         u == 0x1680 ||                   // OGHAM SPACE MARK
+         u == 0x205F;                     // MEDIUM MATHEMATICAL SPACE
+}
+
+static inline bool NS_NeedsPlatformNativeHandling(char16_t aChar) {
+  return
+#if ANDROID  // Bug 1647377: no "platform native" support for Tibetan;
+             // better to just use our class-based breaker.
+      (0x0e01 <= aChar && aChar <= 0x0eff) ||  // Thai, Lao
+#else
+      (0x0e01 <= aChar && aChar <= 0x0fff) ||  // Thai, Lao, Tibetan
+#endif
+      (0x1780 <= aChar && aChar <= 0x17ff);  // Khmer
+}
+
+}  // namespace intl
+}  // namespace mozilla
+
+#endif /* mozilla_intl_LineBreaker_h__ */
diff --git a/intl/lwbrk/WordBreaker.cpp b/intl/lwbrk/WordBreaker.cpp
new file mode 100644
index 0000000000..269d084d93
--- /dev/null
+++ b/intl/lwbrk/WordBreaker.cpp
@@ -0,0 +1,218 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/WordBreaker.h"
+#include "mozilla/StaticPrefs_layout.h"
+#include "nsComplexBreaker.h"
+#include "nsUnicodeProperties.h"
+
+using mozilla::intl::WordBreakClass;
+using mozilla::intl::WordBreaker;
+using mozilla::intl::WordRange;
+using mozilla::unicode::GetScriptCode;
+
+/*static*/
+already_AddRefed<WordBreaker> WordBreaker::Create() {
+  return RefPtr<WordBreaker>(new WordBreaker()).forget();
+}
+
+bool WordBreaker::BreakInBetween(const char16_t* aText1, uint32_t aTextLen1,
+                                 const char16_t* aText2, uint32_t aTextLen2) {
+  MOZ_ASSERT(nullptr != aText1, "null ptr");
+  MOZ_ASSERT(nullptr != aText2, "null ptr");
+
+  if (!aText1 || !aText2 || (0 == aTextLen1) || (0 == aTextLen2)) return false;
+
+  uint8_t c1 = GetClass(aText1[aTextLen1 - 1]);
+  uint8_t c2 = GetClass(aText2[0]);
+
+  if (c1 == c2 && kWbClassScriptioContinua == c1) {
+    nsAutoString text(aText1, aTextLen1);
+    text.Append(aText2, aTextLen2);
+    AutoTArray<uint8_t, 256> breakBefore;
+    breakBefore.SetLength(aTextLen1 + aTextLen2);
+    NS_GetComplexLineBreaks(text.get(), text.Length(), breakBefore.Elements());
+    bool ret = breakBefore[aTextLen1];
+    return ret;
+  }
+
+  return (c1 != c2);
+}
+
+#define IS_ASCII(c) (0 == (0xFF80 & (c)))
+#define ASCII_IS_ALPHA(c) \
+  ((('a' <= (c)) && ((c) <= 'z')) || (('A' <= (c)) && ((c) <= 'Z')))
+#define ASCII_IS_DIGIT(c) (('0' <= (c)) && ((c) <= '9'))
+#define ASCII_IS_SPACE(c) \
+  ((' ' == (c)) || ('\t' == (c)) || ('\r' == (c)) || ('\n' == (c)))
+#define IS_ALPHABETICAL_SCRIPT(c) ((c) < 0x2E80)
+
+// we change the beginning of IS_HAN from 0x4e00 to 0x3400 to relfect
+// Unicode 3.0
+#define IS_HAN(c) \
+  ((0x3400 <= (c)) && ((c) <= 0x9fff)) || ((0xf900 <= (c)) && ((c) <= 0xfaff))
+#define IS_KATAKANA(c) ((0x30A0 <= (c)) && ((c) <= 0x30FF))
+#define IS_HIRAGANA(c) ((0x3040 <= (c)) && ((c) <= 0x309F))
+#define IS_HALFWIDTHKATAKANA(c) ((0xFF60 <= (c)) && ((c) <= 0xFF9F))
+
+// Return true if aChar belongs to a SEAsian script that is written without
+// word spaces, so we need to use the "complex breaker" to find possible word
+// boundaries. (https://en.wikipedia.org/wiki/Scriptio_continua)
+// (How well this works depends on the level of platform support for finding
+// possible line breaks - or possible word boundaries - in the particular
+// script. Thai, at least, works pretty well on the major desktop OSes. If
+// the script is not supported by the platform, we just won't find any useful
+// boundaries.)
+static bool IsScriptioContinua(char16_t aChar) {
+  Script sc = GetScriptCode(aChar);
+  return sc == Script::THAI || sc == Script::MYANMAR || sc == Script::KHMER ||
+         sc == Script::JAVANESE || sc == Script::BALINESE ||
+         sc == Script::SUNDANESE || sc == Script::LAO;
+}
+
+/* static */
+WordBreakClass WordBreaker::GetClass(char16_t c) {
+  // begin of the hack
+
+  if (IS_ALPHABETICAL_SCRIPT(c)) {
+    if (IS_ASCII(c)) {
+      if (ASCII_IS_SPACE(c)) {
+        return kWbClassSpace;
+      }
+      if (ASCII_IS_ALPHA(c) || ASCII_IS_DIGIT(c) ||
+          (c == '_' && !StaticPrefs::layout_word_select_stop_at_underscore())) {
+        return kWbClassAlphaLetter;
+      }
+      return kWbClassPunct;
+    }
+    if (c == 0x00A0 /*NBSP*/) {
+      return kWbClassSpace;
+    }
+    if (GetGenCategory(c) == nsUGenCategory::kPunctuation) {
+      return kWbClassPunct;
+    }
+    if (IsScriptioContinua(c)) {
+      return kWbClassScriptioContinua;
+    }
+    return kWbClassAlphaLetter;
+  }
+  if (IS_HAN(c)) {
+    return kWbClassHanLetter;
+  }
+  if (IS_KATAKANA(c)) {
+    return kWbClassKatakanaLetter;
+  }
+  if (IS_HIRAGANA(c)) {
+    return kWbClassHiraganaLetter;
+  }
+  if (IS_HALFWIDTHKATAKANA(c)) {
+    return kWbClassHWKatakanaLetter;
+  }
+  if (GetGenCategory(c) == nsUGenCategory::kPunctuation) {
+    return kWbClassPunct;
+  }
+  if (IsScriptioContinua(c)) {
+    return kWbClassScriptioContinua;
+  }
+  return kWbClassAlphaLetter;
+}
+
+WordRange WordBreaker::FindWord(const char16_t* aText, uint32_t aTextLen,
+                                uint32_t aOffset) {
+  WordRange range;
+  MOZ_ASSERT(nullptr != aText, "null ptr");
+  MOZ_ASSERT(0 != aTextLen, "len = 0");
+  MOZ_ASSERT(aOffset <= aTextLen, "aOffset > aTextLen");
+
+  range.mBegin = aTextLen + 1;
+  range.mEnd = aTextLen + 1;
+
+  if (!aText || aOffset > aTextLen) return range;
+
+  WordBreakClass c = GetClass(aText[aOffset]);
+  uint32_t i;
+  // Scan forward
+  range.mEnd--;
+  for (i = aOffset + 1; i <= aTextLen; i++) {
+    if (c != GetClass(aText[i])) {
+      range.mEnd = i;
+      break;
+    }
+  }
+
+  // Scan backward
+  range.mBegin = 0;
+  for (i = aOffset; i > 0; i--) {
+    if (c != GetClass(aText[i - 1])) {
+      range.mBegin = i;
+      break;
+    }
+  }
+
+  if (kWbClassScriptioContinua == c) {
+    // we pass the whole text segment to the complex word breaker to find a
+    // shorter answer
+    AutoTArray<uint8_t, 256> breakBefore;
+    breakBefore.SetLength(range.mEnd - range.mBegin);
+    NS_GetComplexLineBreaks(aText + range.mBegin, range.mEnd - range.mBegin,
+                            breakBefore.Elements());
+
+    // Scan forward
+    for (i = aOffset + 1; i < range.mEnd; i++) {
+      if (breakBefore[i - range.mBegin]) {
+        range.mEnd = i;
+        break;
+      }
+    }
+
+    // Scan backward
+    for (i = aOffset; i > range.mBegin; i--) {
+      if (breakBefore[i - range.mBegin]) {
+        range.mBegin = i;
+        break;
+      }
+    }
+  }
+  return range;
+}
+
+int32_t WordBreaker::NextWord(const char16_t* aText, uint32_t aLen,
+                              uint32_t aPos) {
+  WordBreakClass c1, c2;
+  uint32_t cur = aPos;
+  if (cur == aLen) {
+    return NS_WORDBREAKER_NEED_MORE_TEXT;
+  }
+  c1 = GetClass(aText[cur]);
+
+  for (cur++; cur < aLen; cur++) {
+    c2 = GetClass(aText[cur]);
+    if (c2 != c1) {
+      break;
+    }
+  }
+
+  if (kWbClassScriptioContinua == c1) {
+    // we pass the whole text segment to the complex word breaker to find a
+    // shorter answer
+    AutoTArray<uint8_t, 256> breakBefore;
+    breakBefore.SetLength(aLen - aPos);
+    NS_GetComplexLineBreaks(aText + aPos, aLen - aPos, breakBefore.Elements());
+    uint32_t i = 1;
+    while (i < cur - aPos && !breakBefore[i]) {
+      i++;
+    }
+    if (i < cur - aPos) {
+      return aPos + i;
+    }
+  }
+
+  if (cur == aLen) {
+    return NS_WORDBREAKER_NEED_MORE_TEXT;
+  }
+
+  MOZ_ASSERT(cur != aPos);
+  return cur;
+}
diff --git a/intl/lwbrk/WordBreaker.h b/intl/lwbrk/WordBreaker.h
new file mode 100644
index 0000000000..57cb4b18b7
--- /dev/null
+++ b/intl/lwbrk/WordBreaker.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef mozilla_intl_WordBreaker_h__
+#define mozilla_intl_WordBreaker_h__
+
+#include "nscore.h"
+#include "nsISupports.h"
+
+#define NS_WORDBREAKER_NEED_MORE_TEXT -1
+
+namespace mozilla {
+namespace intl {
+
+typedef struct {
+  uint32_t mBegin;
+  uint32_t mEnd;
+} WordRange;
+
+enum WordBreakClass : uint8_t {
+  kWbClassSpace = 0,
+  kWbClassAlphaLetter,
+  kWbClassPunct,
+  kWbClassHanLetter,
+  kWbClassKatakanaLetter,
+  kWbClassHiraganaLetter,
+  kWbClassHWKatakanaLetter,
+  kWbClassScriptioContinua
+};
+
+class WordBreaker {
+ public:
+  NS_INLINE_DECL_REFCOUNTING(WordBreaker)
+
+  static already_AddRefed<WordBreaker> Create();
+
+  bool BreakInBetween(const char16_t* aText1, uint32_t aTextLen1,
+                      const char16_t* aText2, uint32_t aTextLen2);
+  WordRange FindWord(const char16_t* aText1, uint32_t aTextLen1,
+                     uint32_t aOffset);
+  int32_t NextWord(const char16_t* aText, uint32_t aLen, uint32_t aPos);
+
+  static WordBreakClass GetClass(char16_t aChar);
+
+ private:
+  ~WordBreaker() = default;
+};
+
+}  // namespace intl
+}  // namespace mozilla
+
+#endif /* mozilla_intl_WordBreaker_h__ */
diff --git a/intl/lwbrk/crashtests/416721.html b/intl/lwbrk/crashtests/416721.html
new file mode 100644
index 0000000000..0a6625ba8a
--- /dev/null
+++ b/intl/lwbrk/crashtests/416721.html
@@ -0,0 +1,11 @@
+<!DOCTYPE html> 
+<html>
+ <head>
+  <title>Testcase for bug 416721</title>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ </head>
+ <body>
+  <p>กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛</p>
+ </body>
+</html>
+
diff --git a/intl/lwbrk/crashtests/crashtests.list b/intl/lwbrk/crashtests/crashtests.list
new file mode 100644
index 0000000000..a7cb7a173b
--- /dev/null
+++ b/intl/lwbrk/crashtests/crashtests.list
@@ -0,0 +1 @@
+load 416721.html
diff --git a/intl/lwbrk/gtest/TestLineBreak.cpp b/intl/lwbrk/gtest/TestLineBreak.cpp
new file mode 100644
index 0000000000..5c3215c228
--- /dev/null
+++ b/intl/lwbrk/gtest/TestLineBreak.cpp
@@ -0,0 +1,283 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include "nsXPCOM.h"
+#include "nsISupports.h"
+#include "nsServiceManagerUtils.h"
+#include "nsString.h"
+#include "gtest/gtest.h"
+
+#include "mozilla/intl/LineBreaker.h"
+#include "mozilla/intl/WordBreaker.h"
+
+static char teng1[] =
+    //          1         2         3         4         5         6         7
+    // 01234567890123456789012345678901234567890123456789012345678901234567890123456789
+    "This is a test to test(reasonable) line    break. This 0.01123 = 45 x 48.";
+
+static uint32_t lexp1[] = {4,  7,  9,  14, 17, 34, 39, 40, 41,
+                           42, 49, 54, 62, 64, 67, 69, 73};
+
+static uint32_t wexp1[] = {4,  5,  7,  8,  9,  10, 14, 15, 17, 18, 22,
+                           23, 33, 34, 35, 39, 43, 48, 49, 50, 54, 55,
+                           56, 57, 62, 63, 64, 65, 67, 68, 69, 70, 72};
+
+static char teng2[] =
+    //          1         2         3         4         5         6         7
+    // 01234567890123456789012345678901234567890123456789012345678901234567890123456789
+    "()((reasonab(l)e) line  break. .01123=45x48.";
+
+static uint32_t lexp2[] = {17, 22, 23, 30, 44};
+
+static uint32_t wexp2[] = {4,  12, 13, 14, 15, 16, 17, 18, 22,
+                           24, 29, 30, 31, 32, 37, 38, 43};
+
+static char teng3[] =
+    //          1         2         3         4         5         6         7
+    // 01234567890123456789012345678901234567890123456789012345678901234567890123456789
+    "It's a test to test(ronae ) line break....";
+
+static uint32_t lexp3[] = {4, 6, 11, 14, 25, 27, 32, 42};
+
+static uint32_t wexp3[] = {2,  3,  4,  5,  6,  7,  11, 12, 14, 15,
+                           19, 20, 25, 26, 27, 28, 32, 33, 38};
+
+static char ruler1[] =
+    "          1         2         3         4         5         6         7  ";
+static char ruler2[] =
+    "0123456789012345678901234567890123456789012345678901234567890123456789012";
+
+bool Check(const char* in, const uint32_t* out, uint32_t outlen, uint32_t i,
+           uint32_t res[256]) {
+  bool ok = true;
+
+  if (i != outlen) {
+    ok = false;
+    printf("WARNING!!! return size wrong, expect %d but got %d \n", outlen, i);
+  }
+
+  for (uint32_t j = 0; j < i; j++) {
+    if (j < outlen) {
+      if (res[j] != out[j]) {
+        ok = false;
+        printf("[%d] expect %d but got %d\n", j, out[j], res[j]);
+      }
+    } else {
+      ok = false;
+      printf("[%d] additional %d\n", j, res[j]);
+    }
+  }
+
+  if (!ok) {
+    printf("string  = \n%s\n", in);
+    printf("%s\n", ruler1);
+    printf("%s\n", ruler2);
+
+    printf("Expect = \n");
+    for (uint32_t j = 0; j < outlen; j++) {
+      printf("%d,", out[j]);
+    }
+
+    printf("\nResult = \n");
+    for (uint32_t j = 0; j < i; j++) {
+      printf("%d,", res[j]);
+    }
+    printf("\n");
+  }
+
+  return ok;
+}
+
+bool TestASCIILB(mozilla::intl::LineBreaker* lb, const char* in,
+                 const uint32_t* out, uint32_t outlen) {
+  NS_ConvertASCIItoUTF16 eng1(in);
+  uint32_t i;
+  uint32_t res[256];
+  int32_t curr;
+
+  for (i = 0, curr = 0; curr != NS_LINEBREAKER_NEED_MORE_TEXT && i < 256; i++) {
+    curr = lb->Next(eng1.get(), eng1.Length(), curr);
+    res[i] = curr != NS_LINEBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
+  }
+
+  return Check(in, out, outlen, i, res);
+}
+
+bool TestASCIIWB(mozilla::intl::WordBreaker* lb, const char* in,
+                 const uint32_t* out, uint32_t outlen) {
+  NS_ConvertASCIItoUTF16 eng1(in);
+
+  uint32_t i;
+  uint32_t res[256];
+  int32_t curr = 0;
+
+  for (i = 0, curr = lb->NextWord(eng1.get(), eng1.Length(), curr);
+       curr != NS_WORDBREAKER_NEED_MORE_TEXT && i < 256;
+       curr = lb->NextWord(eng1.get(), eng1.Length(), curr), i++) {
+    res[i] = curr != NS_WORDBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
+  }
+
+  return Check(in, out, outlen, i, res);
+}
+
+TEST(LineBreak, LineBreaker)
+{
+  RefPtr<mozilla::intl::LineBreaker> t = mozilla::intl::LineBreaker::Create();
+
+  ASSERT_TRUE(t);
+
+  ASSERT_TRUE(TestASCIILB(t, teng1, lexp1, sizeof(lexp1) / sizeof(uint32_t)));
+  ASSERT_TRUE(TestASCIILB(t, teng2, lexp2, sizeof(lexp2) / sizeof(uint32_t)));
+  ASSERT_TRUE(TestASCIILB(t, teng3, lexp3, sizeof(lexp3) / sizeof(uint32_t)));
+}
+
+TEST(LineBreak, WordBreaker)
+{
+  RefPtr<mozilla::intl::WordBreaker> t = mozilla::intl::WordBreaker::Create();
+  ASSERT_TRUE(t);
+
+  ASSERT_TRUE(TestASCIIWB(t, teng1, wexp1, sizeof(wexp1) / sizeof(uint32_t)));
+  ASSERT_TRUE(TestASCIIWB(t, teng2, wexp2, sizeof(wexp2) / sizeof(uint32_t)));
+  ASSERT_TRUE(TestASCIIWB(t, teng3, wexp3, sizeof(wexp3) / sizeof(uint32_t)));
+}
+
+//                         012345678901234
+static const char wb0[] = "T";
+static const char wb1[] = "h";
+static const char wb2[] = "is   is a int";
+static const char wb3[] = "ernationali";
+static const char wb4[] = "zation work.";
+
+static const char* wb[] = {wb0, wb1, wb2, wb3, wb4};
+
+void TestPrintWordWithBreak() {
+  uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
+  RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();
+
+  nsAutoString result;
+
+  for (uint32_t i = 0; i < numOfFragment; i++) {
+    NS_ConvertASCIItoUTF16 fragText(wb[i]);
+
+    int32_t cur = 0;
+    cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
+    uint32_t start = 0;
+    for (uint32_t j = 0; cur != NS_WORDBREAKER_NEED_MORE_TEXT; j++) {
+      result.Append(Substring(fragText, start, cur - start));
+      result.Append('^');
+      start = (cur >= 0 ? cur : cur - start);
+      cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
+    }
+
+    result.Append(Substring(fragText, fragText.Length() - start));
+
+    if (i != numOfFragment - 1) {
+      NS_ConvertASCIItoUTF16 nextFragText(wb[i + 1]);
+
+      bool canBreak = true;
+      canBreak = wbk->BreakInBetween(fragText.get(), fragText.Length(),
+                                     nextFragText.get(), nextFragText.Length());
+      if (canBreak) {
+        result.Append('^');
+      }
+      fragText.Assign(nextFragText);
+    }
+  }
+  ASSERT_STREQ("is^   ^is^ ^a^ ^  is a intzation^ ^work^ation work.",
+               NS_ConvertUTF16toUTF8(result).get());
+}
+
+void TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
+                                   const char* expected) {
+  uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
+  RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();
+
+  NS_ConvertASCIItoUTF16 fragText(wb[fragN]);
+
+  mozilla::intl::WordRange res =
+      wbk->FindWord(fragText.get(), fragText.Length(), offset);
+
+  bool canBreak;
+  nsAutoString result(Substring(fragText, res.mBegin, res.mEnd - res.mBegin));
+
+  if ((uint32_t)fragText.Length() == res.mEnd) {
+    // if we hit the end of the fragment
+    nsAutoString curFragText = fragText;
+    for (uint32_t p = fragN + 1; p < numOfFragment; p++) {
+      NS_ConvertASCIItoUTF16 nextFragText(wb[p]);
+      canBreak = wbk->BreakInBetween(curFragText.get(), curFragText.Length(),
+                                     nextFragText.get(), nextFragText.Length());
+      if (canBreak) {
+        break;
+      }
+      mozilla::intl::WordRange r =
+          wbk->FindWord(nextFragText.get(), nextFragText.Length(), 0);
+
+      result.Append(Substring(nextFragText, r.mBegin, r.mEnd - r.mBegin));
+
+      if ((uint32_t)nextFragText.Length() != r.mEnd) {
+        break;
+      }
+      nextFragText.Assign(curFragText);
+    }
+  }
+
+  if (0 == res.mBegin) {
+    // if we hit the beginning of the fragment
+    nsAutoString curFragText = fragText;
+    for (uint32_t p = fragN; p > 0; p--) {
+      NS_ConvertASCIItoUTF16 prevFragText(wb[p - 1]);
+      canBreak = wbk->BreakInBetween(prevFragText.get(), prevFragText.Length(),
+                                     curFragText.get(), curFragText.Length());
+      if (canBreak) {
+        break;
+      }
+      mozilla::intl::WordRange r = wbk->FindWord(
+          prevFragText.get(), prevFragText.Length(), prevFragText.Length());
+
+      result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0);
+
+      if (0 != r.mBegin) {
+        break;
+      }
+      prevFragText.Assign(curFragText);
+    }
+  }
+
+  ASSERT_STREQ(expected, NS_ConvertUTF16toUTF8(result).get())
+      << "FindWordBreakFromPosition(" << fragN << ", " << offset << ")";
+}
+
+void TestNextWordBreakWithComplexLanguage() {
+  RefPtr<mozilla::intl::WordBreaker> wbk = mozilla::intl::WordBreaker::Create();
+  nsString fragText(u"\u0e40\u0e1b\u0e47\u0e19\u0e19\u0e31\u0e01");
+
+  int32_t offset = 0;
+  while (offset != NS_WORDBREAKER_NEED_MORE_TEXT) {
+    int32_t newOffset =
+        wbk->NextWord(fragText.get(), fragText.Length(), offset);
+    ASSERT_NE(offset, newOffset);
+    offset = newOffset;
+  }
+  ASSERT_TRUE(true);
+}
+
+TEST(LineBreak, WordBreakUsage)
+{
+  TestPrintWordWithBreak();
+  TestFindWordBreakFromPosition(0, 0, "This");
+  TestFindWordBreakFromPosition(1, 0, "his");
+  TestFindWordBreakFromPosition(2, 0, "is");
+  TestFindWordBreakFromPosition(2, 1, "is");
+  TestFindWordBreakFromPosition(2, 9, " ");
+  TestFindWordBreakFromPosition(2, 10, "internationalization");
+  TestFindWordBreakFromPosition(3, 4, "ernationalization");
+  TestFindWordBreakFromPosition(3, 8, "ernationalization");
+  TestFindWordBreakFromPosition(4, 6, " ");
+  TestFindWordBreakFromPosition(4, 7, "work");
+  TestNextWordBreakWithComplexLanguage();
+}
diff --git a/intl/lwbrk/gtest/moz.build b/intl/lwbrk/gtest/moz.build
new file mode 100644
index 0000000000..c9fbab8e76
--- /dev/null
+++ b/intl/lwbrk/gtest/moz.build
@@ -0,0 +1,11 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+    "TestLineBreak.cpp",
+]
+
+FINAL_LIBRARY = "xul-gtest"
diff --git a/intl/lwbrk/jisx4051class.h b/intl/lwbrk/jisx4051class.h
new file mode 100644
index 0000000000..3140cf63a7
--- /dev/null
+++ b/intl/lwbrk/jisx4051class.h
@@ -0,0 +1,217 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/*
+    DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
+    mozilla/intl/lwbrk/tools/anzx4051.pl
+ */
+static const uint32_t gLBClass00[32] = {
+    0x55555555,  // U+0000 - U+0007
+    0x55555555,  // U+0008 - U+000F
+    0x55555555,  // U+0010 - U+0017
+    0x55555555,  // U+0018 - U+001F
+    0x7AABAAA5,  // U+0020 - U+0027
+    0x7A7AAAA9,  // U+0028 - U+002F
+    0x66666666,  // U+0030 - U+0037
+    0xAAA9AA66,  // U+0038 - U+003F
+    0x77777777,  // U+0040 - U+0047
+    0x77777777,  // U+0048 - U+004F
+    0x77777777,  // U+0050 - U+0057
+    0x77AA9777,  // U+0058 - U+005F
+    0x77777777,  // U+0060 - U+0067
+    0x77777777,  // U+0068 - U+006F
+    0x77777777,  // U+0070 - U+0077
+    0x7AAA9777,  // U+0078 - U+007F
+    0x77777777,  // U+0080 - U+0087
+    0x77777777,  // U+0088 - U+008F
+    0x77777777,  // U+0090 - U+0097
+    0x77777777,  // U+0098 - U+009F
+    0xAA9A9AAB,  // U+00A0 - U+00A7
+    0x77A9777A,  // U+00A8 - U+00AF
+    0xAAAAAAAA,  // U+00B0 - U+00B7
+    0xAAAAAAAA,  // U+00B8 - U+00BF
+    0x77777777,  // U+00C0 - U+00C7
+    0x77777777,  // U+00C8 - U+00CF
+    0x77777777,  // U+00D0 - U+00D7
+    0x77777777,  // U+00D8 - U+00DF
+    0x77777777,  // U+00E0 - U+00E7
+    0x77777777,  // U+00E8 - U+00EF
+    0xA7777777,  // U+00F0 - U+00F7
+    0x77777777,  // U+00F8 - U+00FF
+};
+
+static const uint32_t gLBClass20[32] = {
+    0xB5555555,  // U+2000 - U+2007
+    0x77775555,  // U+2008 - U+200F
+    0x777277B7,  // U+2010 - U+2017
+    0x77A777A7,  // U+2018 - U+201F
+    0xA1117777,  // U+2020 - U+2027
+    0xB7777777,  // U+2028 - U+202F
+    0x77744444,  // U+2030 - U+2037
+    0x7A115107,  // U+2038 - U+203F
+    0x11017777,  // U+2040 - U+2047
+    0x77777711,  // U+2048 - U+204F
+    0x77777777,  // U+2050 - U+2057
+    0x57777777,  // U+2058 - U+205F
+    0x7777777B,  // U+2060 - U+2067
+    0x77777777,  // U+2068 - U+206F
+    0x77777777,  // U+2070 - U+2077
+    0x77777777,  // U+2078 - U+207F
+    0x77777777,  // U+2080 - U+2087
+    0x77777777,  // U+2088 - U+208F
+    0x77777777,  // U+2090 - U+2097
+    0x77777777,  // U+2098 - U+209F
+    0x77777777,  // U+20A0 - U+20A7
+    0x77777777,  // U+20A8 - U+20AF
+    0x77777777,  // U+20B0 - U+20B7
+    0x77777777,  // U+20B8 - U+20BF
+    0x77777777,  // U+20C0 - U+20C7
+    0x77777777,  // U+20C8 - U+20CF
+    0x77777777,  // U+20D0 - U+20D7
+    0x77777777,  // U+20D8 - U+20DF
+    0x77777777,  // U+20E0 - U+20E7
+    0x77777777,  // U+20E8 - U+20EF
+    0x77777777,  // U+20F0 - U+20F7
+    0x77777777,  // U+20F8 - U+20FF
+};
+
+static const uint32_t gLBClass21[32] = {
+    0x77777777,  // U+2100 - U+2107
+    0x77777777,  // U+2108 - U+210F
+    0x73777777,  // U+2110 - U+2117
+    0x77777777,  // U+2118 - U+211F
+    0x77777777,  // U+2120 - U+2127
+    0x77777777,  // U+2128 - U+212F
+    0x77777777,  // U+2130 - U+2137
+    0x77777777,  // U+2138 - U+213F
+    0x77777777,  // U+2140 - U+2147
+    0x77777777,  // U+2148 - U+214F
+    0x77777777,  // U+2150 - U+2157
+    0x77777777,  // U+2158 - U+215F
+    0x55555555,  // U+2160 - U+2167
+    0x55555555,  // U+2168 - U+216F
+    0x55555555,  // U+2170 - U+2177
+    0x55555555,  // U+2178 - U+217F
+    0x77777777,  // U+2180 - U+2187
+    0x77777777,  // U+2188 - U+218F
+    0x77777777,  // U+2190 - U+2197
+    0x77777777,  // U+2198 - U+219F
+    0x77777777,  // U+21A0 - U+21A7
+    0x77777777,  // U+21A8 - U+21AF
+    0x77777777,  // U+21B0 - U+21B7
+    0x77777777,  // U+21B8 - U+21BF
+    0x77777777,  // U+21C0 - U+21C7
+    0x77777777,  // U+21C8 - U+21CF
+    0x77777777,  // U+21D0 - U+21D7
+    0x77777777,  // U+21D8 - U+21DF
+    0x77777777,  // U+21E0 - U+21E7
+    0x77777777,  // U+21E8 - U+21EF
+    0x77777777,  // U+21F0 - U+21F7
+    0x77777777,  // U+21F8 - U+21FF
+};
+
+static const uint32_t gLBClass30[32] = {
+    0x55155115,  // U+3000 - U+3007
+    0x10101010,  // U+3008 - U+300F
+    0x10105510,  // U+3010 - U+3017
+    0x11011010,  // U+3018 - U+301F
+    0x55555555,  // U+3020 - U+3027
+    0x55555555,  // U+3028 - U+302F
+    0x55555555,  // U+3030 - U+3037
+    0x55555555,  // U+3038 - U+303F
+    0x15151515,  // U+3040 - U+3047
+    0x55555515,  // U+3048 - U+304F
+    0x55555555,  // U+3050 - U+3057
+    0x55555555,  // U+3058 - U+305F
+    0x55551555,  // U+3060 - U+3067
+    0x55555555,  // U+3068 - U+306F
+    0x55555555,  // U+3070 - U+3077
+    0x55555555,  // U+3078 - U+307F
+    0x15151555,  // U+3080 - U+3087
+    0x51555555,  // U+3088 - U+308F
+    0x55555555,  // U+3090 - U+3097
+    0x51111115,  // U+3098 - U+309F
+    0x15151515,  // U+30A0 - U+30A7
+    0x55555515,  // U+30A8 - U+30AF
+    0x55555555,  // U+30B0 - U+30B7
+    0x55555555,  // U+30B8 - U+30BF
+    0x55551555,  // U+30C0 - U+30C7
+    0x55555555,  // U+30C8 - U+30CF
+    0x55555555,  // U+30D0 - U+30D7
+    0x55555555,  // U+30D8 - U+30DF
+    0x15151555,  // U+30E0 - U+30E7
+    0x51555555,  // U+30E8 - U+30EF
+    0x51155555,  // U+30F0 - U+30F7
+    0x51111555,  // U+30F8 - U+30FF
+};
+
+static const uint32_t gLBClass0E[32] = {
+    0x88888888,  // U+0E00 - U+0E07
+    0x88888888,  // U+0E08 - U+0E0F
+    0x88888888,  // U+0E10 - U+0E17
+    0x88888888,  // U+0E18 - U+0E1F
+    0x88888888,  // U+0E20 - U+0E27
+    0x18888888,  // U+0E28 - U+0E2F
+    0x88888888,  // U+0E30 - U+0E37
+    0x08888888,  // U+0E38 - U+0E3F
+    0x81888888,  // U+0E40 - U+0E47
+    0x78888888,  // U+0E48 - U+0E4F
+    0x66666666,  // U+0E50 - U+0E57
+    0x88881166,  // U+0E58 - U+0E5F
+    0x88888888,  // U+0E60 - U+0E67
+    0x88888888,  // U+0E68 - U+0E6F
+    0x88888888,  // U+0E70 - U+0E77
+    0x88888888,  // U+0E78 - U+0E7F
+    0x88888888,  // U+0E80 - U+0E87
+    0x88888888,  // U+0E88 - U+0E8F
+    0x88888888,  // U+0E90 - U+0E97
+    0x88888888,  // U+0E98 - U+0E9F
+    0x88888888,  // U+0EA0 - U+0EA7
+    0x18888888,  // U+0EA8 - U+0EAF
+    0x88888888,  // U+0EB0 - U+0EB7
+    0x88888888,  // U+0EB8 - U+0EBF
+    0x81888888,  // U+0EC0 - U+0EC7
+    0x88888888,  // U+0EC8 - U+0ECF
+    0x66666666,  // U+0ED0 - U+0ED7
+    0x88888866,  // U+0ED8 - U+0EDF
+    0x88888888,  // U+0EE0 - U+0EE7
+    0x88888888,  // U+0EE8 - U+0EEF
+    0x88888888,  // U+0EF0 - U+0EF7
+    0x88888888,  // U+0EF8 - U+0EFF
+};
+
+static const uint32_t gLBClass17[32] = {
+    0x77777777,  // U+1700 - U+1707
+    0x77777777,  // U+1708 - U+170F
+    0x77777777,  // U+1710 - U+1717
+    0x77777777,  // U+1718 - U+171F
+    0x77777777,  // U+1720 - U+1727
+    0x77777777,  // U+1728 - U+172F
+    0x70077777,  // U+1730 - U+1737
+    0x77777777,  // U+1738 - U+173F
+    0x77777777,  // U+1740 - U+1747
+    0x77777777,  // U+1748 - U+174F
+    0x77777777,  // U+1750 - U+1757
+    0x77777777,  // U+1758 - U+175F
+    0x77777777,  // U+1760 - U+1767
+    0x77777777,  // U+1768 - U+176F
+    0x77777777,  // U+1770 - U+1777
+    0x77777777,  // U+1778 - U+177F
+    0x88888888,  // U+1780 - U+1787
+    0x88888888,  // U+1788 - U+178F
+    0x88888888,  // U+1790 - U+1797
+    0x88888888,  // U+1798 - U+179F
+    0x88888888,  // U+17A0 - U+17A7
+    0x88888888,  // U+17A8 - U+17AF
+    0x88888888,  // U+17B0 - U+17B7
+    0x88888888,  // U+17B8 - U+17BF
+    0x88888888,  // U+17C0 - U+17C7
+    0x88888888,  // U+17C8 - U+17CF
+    0x88118888,  // U+17D0 - U+17D7
+    0x77888181,  // U+17D8 - U+17DF
+    0x88888888,  // U+17E0 - U+17E7
+    0x77777788,  // U+17E8 - U+17EF
+    0x88888888,  // U+17F0 - U+17F7
+    0x77777788,  // U+17F8 - U+17FF
+};
diff --git a/intl/lwbrk/jisx4051pairtable.txt b/intl/lwbrk/jisx4051pairtable.txt
new file mode 100644
index 0000000000..2bae1b18fe
--- /dev/null
+++ b/intl/lwbrk/jisx4051pairtable.txt
@@ -0,0 +1,286 @@
+
+
+
+/* 
+
+   Simplification of Pair Table in JIS X 4051
+
+   1. The Origion Table - in 4.1.3
+
+   In JIS x 4051. The pair table is defined as below
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char        
+
+              1  2  3  4  5  6  7  8  9 10 11 12 13 13 14 14 15 16 17 18 19 20
+                                                 *  #  *  #
+        1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  E
+        2        X  X  X  X  X                                               X
+        3        X  X  X  X  X                                               X
+        4        X  X  X  X  X                                               X
+        5        X  X  X  X  X                                               X
+        6        X  X  X  X  X                                               X
+        7        X  X  X  X  X  X                                            X 
+        8        X  X  X  X  X                                X              E 
+        9        X  X  X  X  X                                               X
+       10        X  X  X  X  X                                               X
+       11        X  X  X  X  X                                               X
+       12        X  X  X  X  X                                               X  
+       13        X  X  X  X  X                    X                          X
+       14        X  X  X  X  X                          X                    X
+       15        X  X  X  X  X        X                       X        X     X 
+       16        X  X  X  X  X                                   X     X     X
+       17        X  X  X  X  X                                               E 
+       18        X  X  X  X  X                                X  X     X     X 
+       19     X  E  E  E  E  E  X  X  X  X  X  X  X  X  X  X  X  X  E  X  E  E
+       20        X  X  X  X  X                                               E
+
+   * Same Char
+   # Other Char
+
+   2. Simplified by remove the class which we do not care
+
+   However, since we do not care about class 13(Subscript), 14(Ruby), 
+   19(split line note begin quote), and 20(split line note end quote) 
+   we can simplify this par table into the following 
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char        
+
+              1  2  3  4  5  6  7  8  9 10 11 12 15 16 17 18 
+                                                 
+        1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X
+        2        X  X  X  X  X                             
+        3        X  X  X  X  X                            
+        4        X  X  X  X  X                           
+        5        X  X  X  X  X                          
+        6        X  X  X  X  X                         
+        7        X  X  X  X  X  X                      
+        8        X  X  X  X  X                    X    
+        9        X  X  X  X  X                                   
+       10        X  X  X  X  X                                  
+       11        X  X  X  X  X                                 
+       12        X  X  X  X  X                                
+       15        X  X  X  X  X        X           X        X    
+       16        X  X  X  X  X                       X     X    
+       17        X  X  X  X  X                                  
+       18        X  X  X  X  X                    X  X     X    
+
+   3. Simplified by merged classes
+
+   After the 2 simplification, the pair table have some duplication 
+   a. class 2, 3, 4, 5, 6,  are the same- we can merged them
+   b. class 10, 11, 12, 17  are the same- we can merged them
+
+
+   Class of
+   Leading    Class of Trailing Char Class
+   Char        
+
+              1 [a] 7  8  9 [b]15 16 18 
+                                     
+        1     X  X  X  X  X  X  X  X  X
+      [a]        X                             
+        7        X  X                      
+        8        X              X    
+        9        X                                   
+      [b]        X                                  
+       15        X        X     X     X    
+       16        X                 X  X    
+       18        X              X  X  X    
+
+
+   4. Now we use one bit to encode weather it is breakable, and use 2 bytes
+      for one row, then the bit table will look like:
+
+                 18    <-   1
+            
+       1  0000 0001 1111 1111  = 0x01FF
+      [a] 0000 0000 0000 0010  = 0x0002
+       7  0000 0000 0000 0110  = 0x0006
+       8  0000 0000 0100 0010  = 0x0042
+       9  0000 0000 0000 0010  = 0x0002
+      [b] 0000 0000 0000 0010  = 0x0042
+      15  0000 0001 0101 0010  = 0x0152
+      16  0000 0001 1000 0010  = 0x0182
+      17  0000 0001 1100 0010  = 0x01C2
+
+*/
+
+static uint16_t gJISx4051SimplifiedPair[9] = {
+  0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
+};
+
+PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
+{
+  NS_ASSERTION( (aCls1 < 9) "invalid class");
+  NS_ASSERTION( (aCls2 < 9) "invalid class");
+  return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
+}
+
+
+#define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
+
+nsJISx4051Cls XXXX::GetClass(
+   PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
+{
+   // take care the special case in cls 15
+   if( ((0x2C == aChar) || (0x2E == aChar)) &&
+       (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
+   {
+     return kJISx4051Cls_15;
+   }
+   
+   nsJISx4051Cls cls;
+   if(gSingle->Lookup(aChar, &cls))
+     return cls;
+
+   if(gRange->Lookup(aChar, &cls))
+     return cls;
+ 
+   return kJISx4051Cls_15;
+}
+
+
+typedef enum {
+  kJISx4051Cls_1 = 0,
+  kJISx4051Cls_2 = 1,
+  kJISx4051Cls_3 = 1,
+  kJISx4051Cls_4 = 1,
+  kJISx4051Cls_5 = 1,
+  kJISx4051Cls_6 = 1,
+  kJISx4051Cls_7 = 2,
+  kJISx4051Cls_8 = 3,
+  kJISx4051Cls_9 = 4,
+  kJISx4051Cls_10 = 5,
+  kJISx4051Cls_11 = 5,
+  kJISx4051Cls_12 = 5,
+  // kJISx4051Cls_13 = 0,
+  // kJISx4051Cls_14 = 0,
+  kJISx4051Cls_15 = 6,
+  kJISx4051Cls_16 = 7,
+  kJISx4051Cls_17 = 5,
+  kJISx4051Cls_18 = 8,
+  // kJISx4051Cls_19 = 0,
+  // kJISx4051Cls_20 = 0
+} nsJISx4051Cls;
+
+
+  // Table 2
+  YYYY(kJISx4051Cls_1 , 0x0028),
+  YYYY(kJISx4051Cls_1 , 0x005B),
+  YYYY(kJISx4051Cls_1 , 0x007B),
+  YYYY(kJISx4051Cls_1 , 0x2018),
+  YYYY(kJISx4051Cls_1 , 0x201B),
+  YYYY(kJISx4051Cls_1 , 0x201C),
+  YYYY(kJISx4051Cls_1 , 0x201F),
+  YYYY(kJISx4051Cls_1 , 0x3008),
+  YYYY(kJISx4051Cls_1 , 0x300A),
+  YYYY(kJISx4051Cls_1 , 0x300C),
+  YYYY(kJISx4051Cls_1 , 0x300E),
+  YYYY(kJISx4051Cls_1 , 0x3010),
+  YYYY(kJISx4051Cls_1 , 0x3014),
+  YYYY(kJISx4051Cls_1 , 0x3016),
+  YYYY(kJISx4051Cls_1 , 0x3018),
+  YYYY(kJISx4051Cls_1 , 0x301A),
+  YYYY(kJISx4051Cls_1 , 0x301D),
+
+  // Table 3
+  YYYY(kJISx4051Cls_2 , 0x0029),
+  YYYY(kJISx4051Cls_2 , 0x002C),
+  YYYY(kJISx4051Cls_2 , 0x005D),
+  YYYY(kJISx4051Cls_2 , 0x007D),
+  YYYY(kJISx4051Cls_2 , 0x2019),
+  YYYY(kJISx4051Cls_2 , 0x201A),
+  YYYY(kJISx4051Cls_2 , 0x201D),
+  YYYY(kJISx4051Cls_2 , 0x201E),
+  YYYY(kJISx4051Cls_2 , 0x3001),
+  YYYY(kJISx4051Cls_2 , 0x3009),
+  YYYY(kJISx4051Cls_2 , 0x300B),
+  YYYY(kJISx4051Cls_2 , 0x300D),
+  YYYY(kJISx4051Cls_2 , 0x300F),
+  YYYY(kJISx4051Cls_2 , 0x3011),
+  YYYY(kJISx4051Cls_2 , 0x3015),
+  YYYY(kJISx4051Cls_2 , 0x3017),
+  YYYY(kJISx4051Cls_2 , 0x3019),
+  YYYY(kJISx4051Cls_2 , 0x301B),
+  YYYY(kJISx4051Cls_2 , 0x301E),
+  YYYY(kJISx4051Cls_2 , 0x301F),
+
+  // Table 4
+  YYYY(kJISx4051Cls_3 , 0x203C),
+  YYYY(kJISx4051Cls_3 , 0x2044),
+  YYYY(kJISx4051Cls_3 , 0x301C),
+  YYYY(kJISx4051Cls_3 , 0x3041),
+  YYYY(kJISx4051Cls_3 , 0x3043),
+  YYYY(kJISx4051Cls_3 , 0x3045),
+  YYYY(kJISx4051Cls_3 , 0x3047),
+  YYYY(kJISx4051Cls_3 , 0x3049),
+  YYYY(kJISx4051Cls_3 , 0x3063),
+  YYYY(kJISx4051Cls_3 , 0x3083),
+  YYYY(kJISx4051Cls_3 , 0x3085),
+  YYYY(kJISx4051Cls_3 , 0x3087),
+  YYYY(kJISx4051Cls_3 , 0x308E),
+  YYYY(kJISx4051Cls_3 , 0x309D),
+  YYYY(kJISx4051Cls_3 , 0x309E),
+  YYYY(kJISx4051Cls_3 , 0x30A1),
+  YYYY(kJISx4051Cls_3 , 0x30A3),
+  YYYY(kJISx4051Cls_3 , 0x30A5),
+  YYYY(kJISx4051Cls_3 , 0x30A7),
+  YYYY(kJISx4051Cls_3 , 0x30A9),
+  YYYY(kJISx4051Cls_3 , 0x30C3),
+  YYYY(kJISx4051Cls_3 , 0x30E3),
+  YYYY(kJISx4051Cls_3 , 0x30E5),
+  YYYY(kJISx4051Cls_3 , 0x30E7),
+  YYYY(kJISx4051Cls_3 , 0x30EE),
+  YYYY(kJISx4051Cls_3 , 0x30F5),
+  YYYY(kJISx4051Cls_3 , 0x30F6),
+  YYYY(kJISx4051Cls_3 , 0x30FC),
+  YYYY(kJISx4051Cls_3 , 0x30FD),
+  YYYY(kJISx4051Cls_3 , 0x30FE),
+
+  // Table 5
+  YYYY(kJISx4051Cls_4 , 0x0021),
+  YYYY(kJISx4051Cls_4 , 0x003F),
+   
+  // Table 6
+  YYYY(kJISx4051Cls_5 , 0x003A),
+  YYYY(kJISx4051Cls_5 , 0x003B),
+  YYYY(kJISx4051Cls_5 , 0x30FB),
+
+  // Table 7
+  YYYY(kJISx4051Cls_6 , 0x002E),
+  YYYY(kJISx4051Cls_6 , 0x3002),
+
+  // Table 8
+  YYYY(kJISx4051Cls_7 , 0x2014),
+  YYYY(kJISx4051Cls_7 , 0x2024),
+  YYYY(kJISx4051Cls_7 , 0x2025),
+  YYYY(kJISx4051Cls_7 , 0x2026),
+
+  // Table 9
+  YYYY(kJISx4051Cls_8 , 0x0024),
+  YYYY(kJISx4051Cls_8 , 0x00A3),
+  YYYY(kJISx4051Cls_8 , 0x00A5),
+  YYYY(kJISx4051Cls_8 , 0x2116),
+
+  // Table 10
+  YYYY(kJISx4051Cls_9 , 0x0025),
+  YYYY(kJISx4051Cls_9 , 0x00A2),
+  YYYY(kJISx4051Cls_9 , 0x00B0),
+  YYYY(kJISx4051Cls_9 , 0x2030),
+  YYYY(kJISx4051Cls_9 , 0x2031),
+  YYYY(kJISx4051Cls_9 , 0x2032),
+  YYYY(kJISx4051Cls_9 , 0x2033),
+
+  // Table 1
+  YYYY(kJISx4051Cls_10, 0x3000),
+
+  // Table 1
+  ZZZZ(kJISx4051Cls_11, 0x3000),
+
+
+
+
diff --git a/intl/lwbrk/moz.build b/intl/lwbrk/moz.build
new file mode 100644
index 0000000000..b47a49e279
--- /dev/null
+++ b/intl/lwbrk/moz.build
@@ -0,0 +1,40 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+TEST_DIRS += ["gtest"]
+
+EXPORTS.mozilla.intl += [
+    "LineBreaker.h",
+    "WordBreaker.h",
+]
+
+UNIFIED_SOURCES += [
+    "LineBreaker.cpp",
+    "WordBreaker.cpp",
+]
+
+if CONFIG["MOZ_WIDGET_TOOLKIT"] == "gtk":
+    SOURCES += [
+        "nsPangoBreaker.cpp",
+    ]
+    CXXFLAGS += CONFIG["MOZ_PANGO_CFLAGS"]
+elif CONFIG["MOZ_WIDGET_TOOLKIT"] == "windows":
+    SOURCES += [
+        "nsUniscribeBreaker.cpp",
+    ]
+elif CONFIG["MOZ_WIDGET_TOOLKIT"] == "cocoa":
+    UNIFIED_SOURCES += [
+        "nsCarbonBreaker.cpp",
+    ]
+else:
+    SOURCES += [
+        "nsRuleBreaker.cpp",
+    ]
+    SOURCES += [
+        "rulebrk.c",
+    ]
+
+FINAL_LIBRARY = "xul"
diff --git a/intl/lwbrk/nsCarbonBreaker.cpp b/intl/lwbrk/nsCarbonBreaker.cpp
new file mode 100644
index 0000000000..d1d81b2578
--- /dev/null
+++ b/intl/lwbrk/nsCarbonBreaker.cpp
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <CoreFoundation/CoreFoundation.h>
+#include <stdint.h>
+#include "nsDebug.h"
+#include "nscore.h"
+
+void NS_GetComplexLineBreaks(const char16_t* aText, uint32_t aLength,
+                             uint8_t* aBreakBefore) {
+  NS_ASSERTION(aText, "aText shouldn't be null");
+
+  memset(aBreakBefore, 0, aLength * sizeof(uint8_t));
+
+  CFStringRef str = ::CFStringCreateWithCharactersNoCopy(
+      kCFAllocatorDefault, reinterpret_cast<const UniChar*>(aText), aLength,
+      kCFAllocatorNull);
+  if (!str) {
+    return;
+  }
+
+  CFStringTokenizerRef st = ::CFStringTokenizerCreate(
+      kCFAllocatorDefault, str, ::CFRangeMake(0, aLength),
+      kCFStringTokenizerUnitLineBreak, nullptr);
+  if (!st) {
+    ::CFRelease(str);
+    return;
+  }
+
+  CFStringTokenizerTokenType tt = ::CFStringTokenizerAdvanceToNextToken(st);
+  while (tt != kCFStringTokenizerTokenNone) {
+    CFRange r = ::CFStringTokenizerGetCurrentTokenRange(st);
+    if (r.location != 0) {  // Ignore leading edge
+      aBreakBefore[r.location] = true;
+    }
+    tt = CFStringTokenizerAdvanceToNextToken(st);
+  }
+
+  ::CFRelease(st);
+  ::CFRelease(str);
+}
diff --git a/intl/lwbrk/nsComplexBreaker.h b/intl/lwbrk/nsComplexBreaker.h
new file mode 100644
index 0000000000..0b508a4645
--- /dev/null
+++ b/intl/lwbrk/nsComplexBreaker.h
@@ -0,0 +1,18 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsComplexBreaker_h__
+#define nsComplexBreaker_h__
+
+#include "nsString.h"
+
+/**
+ * Find line break opportunities in aText[] of aLength characters,
+ * filling boolean values indicating line break opportunities for
+ * corresponding charactersin aBreakBefore[] on return.
+ */
+void NS_GetComplexLineBreaks(const char16_t* aText, uint32_t aLength,
+                             uint8_t* aBreakBefore);
+
+#endif /* nsComplexBreaker_h__ */
diff --git a/intl/lwbrk/nsLWBrkCIID.h b/intl/lwbrk/nsLWBrkCIID.h
new file mode 100644
index 0000000000..b612155ef0
--- /dev/null
+++ b/intl/lwbrk/nsLWBrkCIID.h
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsLWBrkCIID_h__
+#define nsLWBrkCIID_h__
+
+// {2BF64764-997F-450D-AF96-3028D1A902B0}
+#define NS_LBRK_CID                                 \
+  {                                                 \
+    0x2bf64764, 0x997f, 0x450d, {                   \
+      0xaf, 0x96, 0x30, 0x28, 0xd1, 0xa9, 0x2, 0xb0 \
+    }                                               \
+  }
+
+#define NS_LBRK_CONTRACTID "@mozilla.org/intl/lbrk;1"
+
+// {2BF64765-997F-450D-AF96-3028D1A902B0}
+#define NS_WBRK_CID                                 \
+  {                                                 \
+    0x2bf64765, 0x997f, 0x450d, {                   \
+      0xaf, 0x96, 0x30, 0x28, 0xd1, 0xa9, 0x2, 0xb0 \
+    }                                               \
+  }
+
+#define NS_WBRK_CONTRACTID "@mozilla.org/intl/wbrk;1"
+
+#endif
diff --git a/intl/lwbrk/nsPangoBreaker.cpp b/intl/lwbrk/nsPangoBreaker.cpp
new file mode 100644
index 0000000000..ca3d3d54c9
--- /dev/null
+++ b/intl/lwbrk/nsPangoBreaker.cpp
@@ -0,0 +1,58 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsComplexBreaker.h"
+
+#include <pango/pango-break.h>
+#include "nsUTF8Utils.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+void NS_GetComplexLineBreaks(const char16_t* aText, uint32_t aLength,
+                             uint8_t* aBreakBefore) {
+  NS_ASSERTION(aText, "aText shouldn't be null");
+
+  memset(aBreakBefore, false, aLength * sizeof(uint8_t));
+
+  AutoTArray<PangoLogAttr, 2000> attrBuffer;
+  // XXX(Bug 1631371) Check if this should use a fallible operation as it
+  // pretended earlier.
+  attrBuffer.AppendElements(aLength + 1);
+
+  NS_ConvertUTF16toUTF8 aUTF8(aText, aLength);
+
+  const gchar* p = aUTF8.Data();
+  const gchar* end = p + aUTF8.Length();
+  uint32_t u16Offset = 0;
+
+  static PangoLanguage* language = pango_language_from_string("en");
+
+  while (p < end) {
+    PangoLogAttr* attr = attrBuffer.Elements();
+    pango_get_log_attrs(p, end - p, -1, language, attr, attrBuffer.Length());
+
+    while (p < end) {
+      aBreakBefore[u16Offset] = attr->is_line_break;
+      if (NS_IS_LOW_SURROGATE(aText[u16Offset]))
+        aBreakBefore[++u16Offset] = false;  // Skip high surrogate
+      ++u16Offset;
+
+      // We're iterating over text obtained from NS_ConvertUTF16toUTF8,
+      // so we know we have valid UTF-8 and don't need to check for
+      // errors.
+      uint32_t ch = UTF8CharEnumerator::NextChar(&p, end);
+      ++attr;
+
+      if (!ch) {
+        // pango_break (pango 1.16.2) only analyses text before the
+        // first NUL (but sets one extra attr). Workaround loop to call
+        // pango_break again to analyse after the NUL is done somewhere else
+        // (gfx/thebes/gfxFontconfigFonts.cpp: SetupClusterBoundaries()).
+        // So, we do the same here for pango_get_log_attrs.
+        break;
+      }
+    }
+  }
+}
diff --git a/intl/lwbrk/nsRuleBreaker.cpp b/intl/lwbrk/nsRuleBreaker.cpp
new file mode 100644
index 0000000000..4c1c9aff90
--- /dev/null
+++ b/intl/lwbrk/nsRuleBreaker.cpp
@@ -0,0 +1,17 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsComplexBreaker.h"
+
+#define TH_UNICODE
+#include "rulebrk.h"
+
+void NS_GetComplexLineBreaks(const char16_t* aText, uint32_t aLength,
+                             uint8_t* aBreakBefore) {
+  NS_ASSERTION(aText, "aText shouldn't be null");
+
+  for (uint32_t i = 0; i < aLength; i++)
+    aBreakBefore[i] = (0 == TrbWordBreakPos(aText, i, aText + i, aLength - i));
+}
diff --git a/intl/lwbrk/nsUniscribeBreaker.cpp b/intl/lwbrk/nsUniscribeBreaker.cpp
new file mode 100644
index 0000000000..503b756b61
--- /dev/null
+++ b/intl/lwbrk/nsUniscribeBreaker.cpp
@@ -0,0 +1,60 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsComplexBreaker.h"
+
+#include <windows.h>
+
+#include <usp10.h>
+
+#include "nsUTF8Utils.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+void NS_GetComplexLineBreaks(const char16_t* aText, uint32_t aLength,
+                             uint8_t* aBreakBefore) {
+  NS_ASSERTION(aText, "aText shouldn't be null");
+
+  int outItems = 0;
+  HRESULT result;
+  AutoTArray<SCRIPT_ITEM, 64> items;
+  char16ptr_t text = aText;
+
+  memset(aBreakBefore, false, aLength);
+
+  items.AppendElements(64);
+
+  do {
+    result = ScriptItemize(text, aLength, items.Length(), nullptr, nullptr,
+                           items.Elements(), &outItems);
+
+    if (result == E_OUTOFMEMORY) {
+      // XXX(Bug 1631371) Check if this should use a fallible operation as it
+      // pretended earlier.
+      items.AppendElements(items.Length());
+    }
+  } while (result == E_OUTOFMEMORY);
+
+  for (int iItem = 0; iItem < outItems; ++iItem) {
+    uint32_t endOffset =
+        (iItem + 1 == outItems ? aLength : items[iItem + 1].iCharPos);
+    uint32_t startOffset = items[iItem].iCharPos;
+    AutoTArray<SCRIPT_LOGATTR, 64> sla;
+
+    // XXX(Bug 1631371) Check if this should use a fallible operation as it
+    // pretended earlier.
+    sla.AppendElements(endOffset - startOffset);
+
+    if (ScriptBreak(text + startOffset, endOffset - startOffset,
+                    &items[iItem].a, sla.Elements()) < 0)
+      return;
+
+    // We don't want to set a potential break position at the start of text;
+    // that's the responsibility of a higher level.
+    for (uint32_t j = startOffset ? 0 : 1; j + startOffset < endOffset; ++j) {
+      aBreakBefore[j + startOffset] = sla[j].fSoftBreak;
+    }
+  }
+}
diff --git a/intl/lwbrk/rulebrk.c b/intl/lwbrk/rulebrk.c
new file mode 100644
index 0000000000..d7574b929f
--- /dev/null
+++ b/intl/lwbrk/rulebrk.c
@@ -0,0 +1,388 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#define TH_UNICODE
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include "th_char.h"
+#define th_isalpha(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
+#define th_isspace(c) ((c) == ' ' || (c) == '\t')
+
+/*
+/////////////////////////////////////////////////
+// Thai character type array
+*/
+
+typedef unsigned short twb_t;
+extern const twb_t _TwbType[0x100 - 0xa0];
+
+/*
+// bit definition
+*/
+
+#define VRS 0x0001
+#define VRE 0x0002
+#define VRX 0x0004
+
+#define VRA 0x0008
+
+#define VLA 0x0010
+#define VLO 0x0020
+#define VLI 0x0040
+
+#define VC 0x0080
+
+#define CC 0x0100
+#define CS 0x0200
+
+#define C2 0x0400
+#define CHB 0x0800
+#define CHE 0x1000
+
+#define MT 0x2000
+/*
+//_#define me 0x2000
+*/
+#define M 0x4000
+
+#define T 0x8000
+
+#define VL (VLA | VLO | VLI)
+#define VR (VRS | VRE | VRX)
+#define NE (VL | VRS)
+#define NB (VR | M)
+#define V (VL | VR)
+#define CX (CC | CS)
+#define C (CX | VC)
+#define A (C | V | M)
+
+#define twbtype(c) (_TwbType[th_zcode(c)])
+
+#ifndef TRUE
+#  define TRUE 1
+#  define FALSE 0
+#endif
+#define RETURN(b) return (b)
+
+/*
+/////////////////////////////////////////////////
+*/
+
+int TrbWordBreakPos(const th_char* pstr, int left, const th_char* rstr,
+                    int right)
+/*                 const ThBreakIterator *it, const th_char **p)*/
+{
+  /*
+  //int left, right;
+  //const th_char *s = *p;
+  */
+  const th_char* lstr = pstr + left;
+  th_char _c[6];
+  twb_t _t[6];
+#define c(i) (_c[(i) + 3])
+#define t(i) (_t[(i) + 3])
+  int i, j;
+
+  /*
+  //left = s - it->begin;
+  */
+  if (left < 0) return -1;
+  /*
+  //right = (it->end == NULL) ? 4 : it->begin - s;
+  */
+  if (right < 1) return -1;
+
+  /*
+  // get c(0), t(0)
+  */
+  c(0) = rstr[0]; /* may be '\0' */
+  if (!th_isthai(c(0))) return -1;
+  t(0) = twbtype(c(0));
+  if (!(t(0) & A)) return -1;
+
+  /*
+  // get c(-1), t(-1)
+  */
+  if (left >= 1) {
+    c(-1) = lstr[-1];
+    if (!th_isthai(c(-1))) return 0;
+    t(-1) = twbtype(c(-1));
+    if (!(t(-1) & A)) return 0; /* handle punctuation marks here */
+  } else {
+    c(-1) = 0;
+    t(-1) = 0;
+  }
+
+  /*
+  // get c(1..2), t(1..2)
+  */
+  for (i = 1; i <= 2; i++) {
+    if (i >= right) {
+      c(i) = 0;
+      t(i) = 0;
+    } else {
+      c(i) = rstr[i]; /* may be '\0'; */
+      if (!th_isthai(c(i)))
+        right = i--;
+      else {
+        t(i) = twbtype(c(i));
+        if (!(t(i) & A)) right = i--;
+      }
+    }
+  }
+  /*
+  // get c(-2..-3), t(-2..-3)
+  */
+  for (i = -2, j = -2; i >= -3; j--) {
+    if (j < -left) {
+      c(i) = 0;
+      t(i) = 0;
+      i--;
+    } else {
+      c(i) = lstr[j];
+      if (!th_isthai(c(i)))
+        left = 0;
+      else {
+        t(i) = (twb_t)(th_isthai(c(i)) ? twbtype(c(i)) : 0);
+        if (!(t(i) & A))
+          left = 0;
+        else {
+          if ((t(i + 1) & MT) && ((t(i) & VR) || (t(i + 2) & VR))) {
+            c(i + 1) = c(i);
+            t(i + 1) = t(i);
+          } else
+            i--;
+        }
+      }
+    }
+  }
+
+  /*
+  // prohibit the unlikely
+  */
+  if ((t(-1) & C) && (t(0) & C)) {
+    if ((t(-1) & CHE) || (t(0) & CHB)) return -1;
+  }
+  /*
+  // special case : vlao, C/ sara_a|aa, !sara_a
+  */
+  if ((t(-3) & (VLA | VLO)) && (t(-2) & C) && (c(0) != TH_SARA_A) &&
+      (c(-1) == TH_SARA_A || c(-0) == TH_SARA_AA))
+    return 0;
+
+  /*
+  // prohibit break
+  */
+  if (t(0) & NB) return -1;
+  if (t(-1) & NE) return -1;
+
+  /*
+        // apply 100% rules
+  */
+  if (t(-1) & VRE) {
+    if (c(-2) == TH_SARA_AA && c(-1) == TH_SARA_A) return 0;
+    return -1; /* usually too short syllable, part of word */
+  }
+
+  if (t(-2) & VRE) return -1;
+
+  if ((t(0) & C) && (t(1) & (VR | MT)) &&
+      (c(2) != TH_THANTHAKHAT)) {                              /*?C, NB */
+    if ((t(-1) & (VRS | VRX)) && c(1) == TH_SARA_I) return -1; /* exception */
+    if (t(-1) & (V | M)) return 0;                             /* !C/ C, NB */
+    if (t(-2) & VRS) return 0;               /* VRS, C / C, NB */
+    if (!(t(0) & C2) && c(1) == TH_SARA_I) { /*	/ !C2 or /c, sara_i */
+      if (t(-2) & VRX) return 0;             /* VRX, C / C, NB ? 100%? */
+      if (t(-2) & VC) return 0;              /* VC, C / C, NB ? 100% */
+    }
+  }
+  if ((t(-1) & VRX) && (t(0) & CC)) return 0; /* VRX/ CC */
+  if ((t(-2) & VRS) && (t(-1) & C) && (t(0) & (V | M)))
+    return 0; /* VRS, C/ !C */
+
+  if ((t(0) & CX) && (t(1) & C2) && (c(2) != TH_THANTHAKHAT)) {
+    if ((t(-2) & A) && (t(-1) & CX)) return 0;  /* A, CX / CX, C2 */
+    if ((t(-2) & CX) && (t(-1) & MT)) return 0; /* CX, MT / CX, C2 */
+  }
+  /*
+  // apply 90% rules
+  */
+  if (t(0) & VL) return 0;
+  if (t(1) & VL) return -1;
+  if (c(-1) == TH_THANTHAKHAT && c(-2) != TH_RORUA && c(-2) != TH_LOLING)
+    return 0;
+
+  /*
+  //return -1;
+  // apply 80% rules
+  */
+  if (t(0) & CHE) {
+    if ((t(-2) & VRS) && (t(-1) & C)) return 0; /* VRS, C/ CHE */
+    /*if(t(-1) & VRX) return 0;					// VRX/ CHE */
+    if (t(-1) & VC) return 0; /* VC/ CHE */
+  }
+  if (t(-1) & CHB) {
+    if ((t(0) & C) && (t(1) & VR)) return 0; /* CHB/ CC, VR */
+    if (t(0) & VC) return 0;                 /* CHB/ VC */
+  }
+
+  if ((t(-2) & VL) && (t(1) & VR)) { /* VL, C? C, VR */
+    if (t(-2) & VLI)
+      return 0;                        /* VLI,C/C,VR .*/
+    else {                             /* vlao, C ? C , VR */
+      if (c(1) == TH_SARA_A) return 2; /* vlao, C, C, sara_a/ */
+      if (t(-2) & VLO) return 0;       /* VLO, C/ C, !sara_a */
+      if (!(t(1) & VRA)) return 0;     /* VLA, C/ C, !vca */
+    }
+  }
+  /* C,MT,C */
+  if ((t(-2) & C) && (t(-1) & MT) && (t(0) & CX)) return 1;
+
+  return -1;
+}
+
+int TrbFollowing(const th_char* begin, int length, int offset)
+/*
+//(ThBreakIterator *this, int offset)
+*/
+{
+  const th_char* w = begin + offset;
+  const th_char* end = begin + length;
+  while (w < end && *w && !th_isthai(*w) && th_isspace(*w)) w++;
+
+  if (w < end && *w && !th_isthai(*w)) {
+    int english = FALSE;
+    while (w < end && *w && !th_isthai(*w) && !th_isspace(*w)) {
+      if (th_isalpha(*w)) english = TRUE;
+      w++;
+    }
+    if (english || w == end || (!th_isthai(*w) && th_isspace(*w)))
+      return w - begin;
+  }
+  if (w == end || *w == 0 || !th_isthai(*w)) return w - begin;
+  w++;
+  if (w < end && *w && th_isthai(*w)) {
+    int brk = TrbWordBreakPos(begin, w - begin, w, end - w);
+    while (brk < 0) {
+      w++;
+      if (w == end || *w == 0 || !th_isthai(*w)) break;
+      brk = TrbWordBreakPos(begin, w - begin, w, end - w);
+    }
+    if (brk > 0) w += brk;
+  }
+  if (w < end && *w && !th_isthai(*w)) {
+    while (w < end && *w && !th_isthai(*w) && !th_isalpha(*w) &&
+           !th_isspace(*w))
+      w++;
+  }
+  return w - begin;
+}
+
+/*
+/////////////////////////////////////////////////
+*/
+const twb_t _TwbType[0x100 - 0xa0] = {
+#if 0
+/* 80  */	T,
+/* 81-8f */	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+/* 90  */	T,
+/* 91-9f */	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+#endif
+    /* a0   */ 0,
+    /* a1 ¡ */ CS,
+    /* a2 ¢ */ CS | CHE,
+    /* a3 £ */ CC | CHE,
+    /* a4 € */ CS | CHE,
+    /* a5 ¥ */ CC | CHE,
+    /* a6 Š */ CS,
+    /* a7 § */ CS | CHB,
+    /* a8 š */ CS,
+    /* a9 © */ CC | CHE,
+    /* aa ª */ CS,
+    /* ab « */ CC | CHE,
+    /* ac ¬ */ CC | CHB | CHE,
+    /* ad  */ CS | CHB,
+    /* ae ® */ CS | CHB,
+    /* af ¯ */ CS | CHB,
+    /* b0 ° */ CS,
+    /* b1 ± */ CS | CHB | CHE,
+    /* b2 ² */ CS | CHB | CHE,
+    /* b3 ³ */ CS | CHB,
+    /* b4 Ž */ CS,
+    /* b5 µ */ CS,
+    /* b6 ¶ */ CS,
+    /* b7 · */ CS,
+    /* b8 ž */ CS,
+    /* b9 ¹ */ CS,
+    /* ba º */ CS,
+    /* bb » */ CS,
+    /* bc Œ */ CC | CHE,
+    /* bd œ */ CC | CHE,
+    /* be Ÿ */ CS,
+    /* bf ¿ */ CS,
+    /* c0 À */ CS | CHE,
+    /* c1 Á */ CS,
+    /* c2 Â */ CS,
+    /* c3 Ã */ CS | C2 | CHE, /* ? add CHE  */
+    /* c4 Ä */ VC | CHE,
+    /* c5 Å */ CS | C2,
+    /* c6 Æ */ VC | CHE,
+    /* c7 Ç */ VC | C2,
+    /* c8 È */ CS,
+    /* c9 É */ CS | CHB,
+    /* ca Ê */ CS | CHE,
+    /* cb Ë */ CC | CHE,
+    /* CC Ì */ CS | CHB | CHE,
+    /* cd Í */ VC,
+    /* ce Î */ CC | CHE,
+    /* cf Ï */ T,
+    /* d0 Ð */ VRE | VRA,
+    /* d1  Ñ */ VRS,
+    /* d2 Ò */ VRX | VRA,
+    /* d3  Ó */ VRE,
+    /* d4  Ô */ VRX | VRA,
+    /* d5  Õ */ VRX | VRA,
+    /* d6  Ö */ VRS,
+    /* d7  × */ VRS | VRA,
+    /* d8  Ø */ VRX,
+    /* d9  Ù */ VRX,
+    /* da  Ú */ T,
+    /* db Û */ 0,
+    /* dc Ü */ 0,
+    /* dd Ý */ 0,
+    /* de Þ */ 0,
+    /* df ß */ T,
+    /* e0 à */ VLA,
+    /* e1 á */ VLO,
+    /* e2 â */ VLO,
+    /* e3 ã */ VLI,
+    /* e4 ä */ VLI,
+    /* e5 å */ VRE,
+    /* e6 æ */ M,
+    /* e7  ç */ M,
+    /* e8  è */ M | MT,
+    /* e9  é */ M | MT,
+    /* ea  ê */ M | MT,
+    /* eb  ë */ M | MT,
+    /* ec  ì */ M,
+    /* ed  í */ T,
+    /* ee  î */ T,
+    /* ef ï */ T,
+    /* f0 ð */ T,
+    /* f1 ñ */ T,
+    /* f2 ò */ T,
+    /* f3 ó */ T,
+    /* f4 ô */ T,
+    /* f5 õ */ T,
+    /* f6 ö */ T,
+    /* f7 ÷ */ T,
+    /* f8 ø */ T,
+    /* f9 ù */ T,
+    /* fa ú */ T,
+    /* fb û */ T,
+    /* fc ü */ 0,
+    /* fd ý */ 0,
+    /* fe þ */ 0,
+    /* ff  */ 0};
diff --git a/intl/lwbrk/rulebrk.h b/intl/lwbrk/rulebrk.h
new file mode 100644
index 0000000000..c1f2e0957b
--- /dev/null
+++ b/intl/lwbrk/rulebrk.h
@@ -0,0 +1,26 @@
+/*
+Copyright (c) 1999 Samphan Raruenrom <samphan@thai.com>
+Permission to use, copy, modify, distribute and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear
+in supporting documentation.  Samphan Raruenrom makes no
+representations about the suitability of this software for any
+purpose.  It is provided "as is" without express or implied warranty.
+*/
+#ifndef __RULEBRK_H__
+#define __RULEBRK_H__
+#include "th_char.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TrbWordBreakPos(const th_char* pstr, int left, const th_char* rstr,
+                    int right);
+int TrbFollowing(const th_char* begin, int length, int offset);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/intl/lwbrk/th_char.h b/intl/lwbrk/th_char.h
new file mode 100644
index 0000000000..a088228fff
--- /dev/null
+++ b/intl/lwbrk/th_char.h
@@ -0,0 +1,133 @@
+/*
+Copyright (c) 1999 Samphan Raruenrom <samphan@thai.com>
+Permission to use, copy, modify, distribute and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear
+in supporting documentation.  Samphan Raruenrom makes no
+representations about the suitability of this software for any
+purpose.  It is provided "as is" without express or implied warranty.
+*/
+#ifndef __TH_CHAR_H__
+#define __TH_CHAR_H__
+
+typedef unsigned char tis_char;
+
+#ifdef TH_UNICODE
+/*
+ * The char16_t type is only usable in C++ code, so we need this ugly hack to
+ * select a binary compatible C type for the expat C code to use.
+ */
+#  ifdef __cplusplus
+typedef char16_t th_char;
+#  else
+typedef uint16_t th_char;
+#  endif
+#  define TH_THAIBEGIN_ 0x0e00
+#  define th_isthai(c) (0x0e00 <= (c) && (c) <= 0x0e5f)
+#else
+typedef tis_char th_char;
+#  define TH_THAIBEGIN_ 0xa0
+#  define th_isthai(c) ((c) >= 0xa0)
+#endif
+#define th_zcode(c) ((c)-TH_THAIBEGIN_)
+
+enum TH_CHARNAME {
+  TH_THAIBEGIN = TH_THAIBEGIN_,
+  TH_KOKAI,
+  TH_KHOKHAI,
+  TH_KHOKHUAT,
+  TH_KHOKHWAI,
+  TH_KHOKHON,
+  TH_KHORAKHANG,
+  TH_NGONGU,
+  TH_CHOCHAN,
+  TH_CHOCHING,
+  TH_CHOCHANG,
+  TH_SOSO,
+  TH_CHOCHOE,
+  TH_YOYING,
+  TH_DOCHADA,
+  TH_TOPATAK,
+  TH_THOTHAN,
+  TH_THONANGMONTHO,
+  TH_THOPHUTHAO,
+  TH_NONEN,
+  TH_DODEK,
+  TH_TOTAO,
+  TH_THOTHUNG,
+  TH_THOTHAHAN,
+  TH_THOTHONG,
+  TH_NONU,
+  TH_BOBAIMAI,
+  TH_POPLA,
+  TH_PHOPHUNG,
+  TH_FOFA,
+  TH_PHOPHAN,
+  TH_FOFAN,
+  TH_PHOSAMPHAO,
+  TH_MOMA,
+  TH_YOYAK,
+  TH_RORUA,
+  TH_RU,
+  TH_LOLING,
+  TH_LU,
+  TH_WOWAEN,
+  TH_SOSALA,
+  TH_SORUSI,
+  TH_SOSUA,
+  TH_HOHIP,
+  TH_LOCHULA,
+  TH_OANG,
+  TH_HONOKHUK,
+  TH_PAIYANNOI,
+  TH_SARA_A,
+  TH_MAIHANAKAT,
+  TH_SARA_AA,
+  TH_SARA_AM,
+  TH_SARA_I,
+  TH_SARA_II,
+  TH_SARA_UE,
+  TH_SARA_UEE,
+  TH_SARA_U,
+  TH_SARA_UU,
+  TH_PHINTHU,
+  TH_REM_CHERNG_,
+  TH_TAC_WBRK_,
+  TH_UNDEF_DD,
+  TH_UNDEF_DE,
+  TH_BAHT,
+  TH_SARA_E,
+  TH_SARA_AE,
+  TH_SARA_O,
+  TH_MAIMUAN,
+  TH_MAIMALAI,
+  TH_LAKKHANGYAO,
+  TH_MAIYAMOK,
+  TH_MAITAIKHU,
+  TH_MAIEK,
+  TH_MAITHO,
+  TH_MAITRI,
+  TH_MAICHATTAWA,
+  TH_THANTHAKHAT,
+  TH_NIKHAHIT,
+  TH_YAMAKKAN,
+  TH_FONGMAN,
+  TH_THAIZERO,
+  TH_THAIONE,
+  TH_THAITWO,
+  TH_THAITHREE,
+  TH_THAIFOUR,
+  TH_THAIFIVE,
+  TH_THAISIX,
+  TH_THAISEVEN,
+  TH_THAIEIGHT,
+  TH_THAININE,
+  TH_ANGKHANKHU,
+  TH_KHOMUT,
+  TH_UNDEF_FC,
+  TH_UNDEF_FD,
+  TH_UNDEF_FE,
+  TH_THAIEND
+};
+#endif
diff --git a/intl/lwbrk/tools/anzx4051.html b/intl/lwbrk/tools/anzx4051.html
new file mode 100644
index 0000000000..295f8741e0
--- /dev/null
+++ b/intl/lwbrk/tools/anzx4051.html
@@ -0,0 +1,669 @@
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<HTML>
+<HEAD>
+<TITLE>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</TITLE>
+</HEAD>
+<BODY>
+<H1>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</H1>
+<TABLE BORDER=3>
+<TR BGCOLOR=blue><TH><TH>
+<TD BGCOLOR=red>C</TD>
+<TD BGCOLOR=red>L</TD>
+<TD BGCOLOR=red>M</TD>
+<TD BGCOLOR=red>N</TD>
+<TD BGCOLOR=red>P</TD>
+<TD BGCOLOR=red>S</TD>
+<TD BGCOLOR=red>Z</TD>
+<TD BGCOLOR=white>Total</TD>
+<TD BGCOLOR=yellow>Cc</TD>
+<TD BGCOLOR=yellow>Cf</TD>
+<TD BGCOLOR=yellow>Co</TD>
+<TD BGCOLOR=yellow>Cs</TD>
+<TD BGCOLOR=yellow>Ll</TD>
+<TD BGCOLOR=yellow>Lm</TD>
+<TD BGCOLOR=yellow>Lo</TD>
+<TD BGCOLOR=yellow>Lt</TD>
+<TD BGCOLOR=yellow>Lu</TD>
+<TD BGCOLOR=yellow>Mc</TD>
+<TD BGCOLOR=yellow>Me</TD>
+<TD BGCOLOR=yellow>Mn</TD>
+<TD BGCOLOR=yellow>Nd</TD>
+<TD BGCOLOR=yellow>Nl</TD>
+<TD BGCOLOR=yellow>No</TD>
+<TD BGCOLOR=yellow>Pc</TD>
+<TD BGCOLOR=yellow>Pd</TD>
+<TD BGCOLOR=yellow>Pe</TD>
+<TD BGCOLOR=yellow>Pf</TD>
+<TD BGCOLOR=yellow>Pi</TD>
+<TD BGCOLOR=yellow>Po</TD>
+<TD BGCOLOR=yellow>Ps</TD>
+<TD BGCOLOR=yellow>Sc</TD>
+<TD BGCOLOR=yellow>Sk</TD>
+<TD BGCOLOR=yellow>Sm</TD>
+<TD BGCOLOR=yellow>So</TD>
+<TD BGCOLOR=yellow>Zl</TD>
+<TD BGCOLOR=yellow>Zp</TD>
+<TD BGCOLOR=yellow>Zs</TD>
+</TR>
+<TR><TH>00_1<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>14</TD>
+<TD>1</TD>
+<TD></TD>
+<TD BGCOLOR=white>15</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD>2</TD>
+<TD>11</TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>01_[a]<TH>
+<TD></TD>
+<TD>32</TD>
+<TD>2</TD>
+<TD></TD>
+<TD>31</TD>
+<TD>3</TD>
+<TD></TD>
+<TD BGCOLOR=white>68</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>8</TD>
+<TD>24</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD>12</TD>
+<TD>1</TD>
+<TD></TD>
+<TD>17</TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>02_7<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD BGCOLOR=white>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>03_8<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD BGCOLOR=white>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>04_9<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>5</TD>
+<TD></TD>
+<TD></TD>
+<TD BGCOLOR=white>5</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>5</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>05_[b]<TH>
+<TD>33</TD>
+<TD>153</TD>
+<TD></TD>
+<TD>33</TD>
+<TD>2</TD>
+<TD>5</TD>
+<TD>13</TD>
+<TD BGCOLOR=white>239</TD>
+<TD>32</TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>153</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>33</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>5</TD>
+<TD></TD>
+<TD></TD>
+<TD>13</TD>
+</TR>
+<TR><TH>06_15<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>30</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD BGCOLOR=white>30</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>30</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>07_18<TH>
+<TD>18</TD>
+<TD>157</TD>
+<TD></TD>
+<TD>33</TD>
+<TD>56</TD>
+<TD>125</TD>
+<TD>2</TD>
+<TD BGCOLOR=white>391</TD>
+<TD></TD>
+<TD>18</TD>
+<TD></TD>
+<TD></TD>
+<TD>64</TD>
+<TD>7</TD>
+<TD>5</TD>
+<TD></TD>
+<TD>81</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+<TD>30</TD>
+<TD>4</TD>
+<TD>5</TD>
+<TD>2</TD>
+<TD></TD>
+<TD>5</TD>
+<TD>36</TD>
+<TD>4</TD>
+<TD></TD>
+<TD>3</TD>
+<TD>24</TD>
+<TD>98</TD>
+<TD>1</TD>
+<TD>1</TD>
+<TD></TD>
+</TR>
+<TR><TH>08_COMPLEX<TH>
+<TD></TD>
+<TD>54</TD>
+<TD>33</TD>
+<TD>20</TD>
+<TD>2</TD>
+<TD>1</TD>
+<TD></TD>
+<TD BGCOLOR=white>110</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD>53</TD>
+<TD></TD>
+<TD></TD>
+<TD>11</TD>
+<TD></TD>
+<TD>22</TD>
+<TD>10</TD>
+<TD></TD>
+<TD>10</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>09_[c]<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+<TD>4</TD>
+<TD></TD>
+<TD BGCOLOR=white>7</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+<TD>2</TD>
+<TD></TD>
+<TD>2</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>0A_[d]<TH>
+<TD>1</TD>
+<TD>2</TD>
+<TD></TD>
+<TD>6</TD>
+<TD>25</TD>
+<TD>14</TD>
+<TD></TD>
+<TD BGCOLOR=white>48</TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>6</TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+<TD>3</TD>
+<TD></TD>
+<TD>19</TD>
+<TD></TD>
+<TD>2</TD>
+<TD>3</TD>
+<TD>7</TD>
+<TD>2</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>0B_[e]<TH>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD>1</TD>
+<TD>3</TD>
+<TD BGCOLOR=white>6</TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+</TR>
+<TR><TH>X<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD BGCOLOR=white>0</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+</TABLE>
+<TABLE BORDER=3>
+<TR BGCOLOR=blue><TH><TH>
+<TD BGCOLOR=red>00_1</TD>
+<TD BGCOLOR=red>01_[a]</TD>
+<TD BGCOLOR=red>02_7</TD>
+<TD BGCOLOR=red>03_8</TD>
+<TD BGCOLOR=red>04_9</TD>
+<TD BGCOLOR=red>05_[b]</TD>
+<TD BGCOLOR=red>06_15</TD>
+<TD BGCOLOR=red>07_18</TD>
+<TD BGCOLOR=red>08_COMPLEX</TD>
+<TD BGCOLOR=red>09_[c]</TD>
+<TD BGCOLOR=red>0A_[d]</TD>
+<TD BGCOLOR=red>0B_[e]</TD>
+<TD BGCOLOR=red>X</TD>
+</TR>
+<TR><TH>00<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>33</TD>
+<TD>10</TD>
+<TD>127</TD>
+<TD></TD>
+<TD>7</TD>
+<TD>44</TD>
+<TD>2</TD>
+<TD></TD>
+</TR>
+<TR><TH>0E<TH>
+<TD>1</TD>
+<TD>6</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>20</TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>17<TH>
+<TD>2</TD>
+<TD>4</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>110</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>20<TH>
+<TD>2</TD>
+<TD>11</TD>
+<TD>1</TD>
+<TD></TD>
+<TD>5</TD>
+<TD>13</TD>
+<TD></TD>
+<TD>100</TD>
+<TD></TD>
+<TD></TD>
+<TD>4</TD>
+<TD>4</TD>
+<TD></TD>
+</TR>
+<TR><TH>21<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD>32</TD>
+<TD></TD>
+<TD>163</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>30<TH>
+<TD>10</TD>
+<TD>47</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>161</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+</TABLE>
diff --git a/intl/lwbrk/tools/anzx4051.pl b/intl/lwbrk/tools/anzx4051.pl
new file mode 100644
index 0000000000..e76eac6207
--- /dev/null
+++ b/intl/lwbrk/tools/anzx4051.pl
@@ -0,0 +1,356 @@
+#!/usr/bin/perl 
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+######################################################################
+#
+# Initial global variable
+#
+######################################################################
+%utot = ();
+$ui=0;
+$li=0;
+
+######################################################################
+#
+# Open the unicode database file
+#
+######################################################################
+open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt") 
+   || die "cannot find UnicodeData-Latest.txt";
+
+######################################################################
+#
+# Open the JIS X 4051 Class file
+#
+######################################################################
+open ( CLASS , "< jisx4051class.txt") 
+   || die "cannot find jisx4051class.txt";
+
+######################################################################
+#
+# Open the JIS X 4051 Class simplified mapping
+#
+######################################################################
+open ( SIMP , "< jisx4051simp.txt") 
+   || die "cannot find jisx4051simp.txt";
+
+######################################################################
+#
+# Open the output file
+#
+######################################################################
+open ( OUT , "> anzx4051.html") 
+  || die "cannot open output anzx4051.html file";
+
+######################################################################
+#
+# Open the output file
+#
+######################################################################
+open ( HEADER , "> ../jisx4051class.h")
+  || die "cannot open output ../jisx4051class.h file";
+
+######################################################################
+#
+# Generate license and header
+#
+######################################################################
+$hthmlheader = <<END_OF_HTML;
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<HTML>
+<HEAD>
+<TITLE>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</TITLE>
+</HEAD>
+<BODY>
+<H1>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</H1>
+END_OF_HTML
+print OUT $hthmlheader;
+
+######################################################################
+#
+# Generate license and header
+#
+######################################################################
+$npl = <<END_OF_NPL;
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/*
+    DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
+    mozilla/intl/lwbrk/tools/anzx4051.pl
+ */
+END_OF_NPL
+print HEADER $npl;
+
+%occ = ();
+%gcat = ();
+%dcat = ();
+%simp = ();
+%gcount = ();
+%dcount = ();
+%sccount = ();
+%rangecount = ();
+
+######################################################################
+#
+# Process the file line by line
+#
+######################################################################
+while(<UNICODATA>) {
+   chop;
+   ######################################################################
+   #
+   # Get value from fields
+   #
+   ######################################################################
+   @f = split(/;/ , $_); 
+   $c = $f[0];   # The unicode value
+   $g = $f[2]; 
+   $d = substr($g, 0, 1);
+
+   $gcat{$c} = $g;
+   $dcat{$c} = $d;
+   $gcount{$g}++;
+   $dcount{$d}++;
+}
+close(UNIDATA);
+
+while(<SIMP>) {
+   chop;
+   ######################################################################
+   #
+   # Get value from fields
+   #
+   ######################################################################
+   @f = split(/;/ , $_); 
+
+   $simp{$f[0]} = $f[1];
+   $sccount{$f[1]}++;
+}
+close(SIMP);
+
+sub GetClass{
+  my ($u) = @_;
+  my $hex = DecToHex($u);
+  $g = $gcat{$hex};
+  if($g ne "") {
+    return $g;
+  } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 )  ) {
+    return "Han";
+  } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 )  ) {
+    return "Lo";
+  } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f )  ) {
+    return "Cs";
+  } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff )  ) {
+    return "Cs";
+  } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff )  ) {
+    return "Cs";
+  } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff )  ) {
+    return "Co";
+  } else {
+    printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex;
+  }
+}
+sub GetDClass{
+  my ($u) = @_;
+  my $hex = DecToHex($u);
+  $g = $dcat{$hex};
+  if($g ne "") {
+    return $g;
+  } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 )  ) {
+    return "Han";
+  } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 )  ) {
+    return "L";
+  } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f )  ) {
+    return "C";
+  } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff )  ) {
+    return "C";
+  } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff )  ) {
+    return "C";
+  } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff )  ) {
+    return "C";
+  } else {
+    printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex;
+  }
+}
+sub DecToHex{
+     my ($d) = @_;
+     return sprintf("%04X", $d); 
+}
+%gtotal = ();
+%dtotal = ();
+while(<CLASS>) {
+   chop;
+   ######################################################################
+   #
+   # Get value from fields
+   #
+   ######################################################################
+   @f = split(/;/ , $_); 
+
+   if( substr($f[2], 0, 1) ne "a")
+   {
+     $sc = $simp{$f[2]};
+     $l = hex($f[0]);
+     if($f[1] eq "")
+     {
+       $h = $l;
+     } else {
+       $h = hex($f[1]);
+     }
+     for($k = $l; $k <= $h ; $k++)
+     {
+       if( exists($occ{$k}))
+       {
+          #  printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n", 
+          #         DecToHex($k),  $occ{$k} , $f[2] , $sc;
+       }
+       else
+       {
+           $occ{$k} = $sc . " | " . $f[2];
+           $gclass = GetClass($k); 
+           $dclass = GetDClass($k);
+           $gtotal{$sc . $gclass}++;
+           $dtotal{$sc . $dclass}++;
+           $u = DecToHex($k);
+           $rk = " " . substr($u,0,2) . ":" . $sc;
+           $rangecount{$rk}++;
+       }
+     }
+  }
+}
+
+#print %gtotal;
+#print %dtotal;
+
+sub printreport 
+{
+    print OUT "<TABLE BORDER=3>\n";
+    print OUT "<TR BGCOLOR=blue><TH><TH>\n";
+    
+    foreach $d (sort(keys %dcount)) {
+       print OUT "<TD BGCOLOR=red>$d</TD>\n";
+    }
+    
+    print OUT "<TD BGCOLOR=white>Total</TD>\n";
+    foreach $g (sort(keys %gcount)) {
+       print OUT "<TD BGCOLOR=yellow>$g</TD>\n";
+    }
+    print OUT "</TR>\n";
+    foreach $sc (sort(keys %sccount)) {
+    
+       print OUT "<TR><TH>$sc<TH>\n";
+    
+       $total = 0; 
+       foreach $d (sort (keys %dcount)) {
+         $count = $dtotal{$sc . $d};
+         $total += $count;
+         print OUT "<TD>$count</TD>\n";
+       }
+    
+       print OUT "<TD BGCOLOR=white>$total</TD>\n";
+    
+       foreach $g (sort(keys %gcount)) {
+         $count = $gtotal{$sc . $g};
+         print OUT "<TD>$count</TD>\n";
+       }
+    
+    
+       print OUT "</TR>\n";
+    }
+    print OUT "</TABLE>\n";
+    
+    
+    print OUT "<TABLE BORDER=3>\n";
+    print OUT "<TR BGCOLOR=blue><TH><TH>\n";
+    
+    foreach $sc (sort(keys %sccount)) 
+    {
+       print OUT "<TD BGCOLOR=red>$sc</TD>\n";
+    }
+    
+    print OUT "</TR>\n";
+    
+    
+    for($rr = 0; $rr < 0x4f; $rr++)
+    {
+       $empty = 0;
+       $r = sprintf("%02X" , $rr) ;
+       $tmp = "<TR><TH>" . $r . "<TH>\n";
+    
+       foreach $sc (sort(keys %sccount)) {
+         $count = $rangecount{ " " .$r . ":" .$sc};
+         $tmp .= sprintf("<TD>%s</TD>\n", $count);
+         $empty += $count;
+       }
+    
+       $tmp .=  "</TR>\n";
+    
+       if($empty ne 0) 
+       {
+          print OUT $tmp;
+       }
+    }
+    print OUT "</TABLE>\n";
+    
+}
+printreport();
+
+sub printarray
+{
+   my($r, $def) = @_;
+printf "[%s || %s]\n", $r, $def;
+   $k = hex($r) * 256;
+   printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r;
+   for($i = 0 ; $i < 256; $i+= 8)
+   {  
+      for($j = 7 ; $j >= 0; $j-- )
+      {  
+          $v = $k + $i + $j;
+          if( exists($occ{$v})) 
+	  {
+             $p = substr($occ{$v}, 1,1);
+          } else {
+             $p = $def;
+          }
+
+          if($j eq 7 ) 
+          {
+             printf HEADER "0x%s" , $p;
+          } else {
+             printf HEADER "%s", $p ;
+          }
+      }
+      printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7);
+   }
+   print HEADER "};\n\n";
+}
+printarray("00", "7");
+printarray("20", "7");
+printarray("21", "7");
+printarray("30", "5");
+printarray("0E", "8");
+printarray("17", "7");
+
+#print %rangecount;
+
+######################################################################
+#
+# Close files
+#
+######################################################################
+close(HEADER);
+close(CLASS);
+close(OUT);
+
diff --git a/intl/lwbrk/tools/jisx4051class.txt b/intl/lwbrk/tools/jisx4051class.txt
new file mode 100644
index 0000000000..c435c1ae55
--- /dev/null
+++ b/intl/lwbrk/tools/jisx4051class.txt
@@ -0,0 +1,159 @@
+0000;001f;17
+0020;;17
+0024;;24
+0027;;18
+0028;;22
+002D;;18
+002F;;18
+0021;002F;23
+0030;0039;15
+003C;;22
+003A;003F;23
+0040;;18
+0041;005A;18
+005B;;22
+005E;;18
+005F;;18
+005B;005F;23
+0060;;18
+0061;007A;18
+007B;;22
+007B;007E;23
+00A0;;24
+00A3;;22
+00A5;;22
+00A9;;18
+00AA;;18
+00AB;;18
+00AC;;22
+00AE;;18
+00AF;;18
+00A1;00BF;23
+00B0;;18
+00F7;;23
+00C0;00FF;18
+0E3F;;1
+0E2F;;4
+0E46;;4
+0E5A;0E5B;4
+0E50;0E59;15
+0E4F;;18
+0EAF;;4
+0EC6;;4
+0ED0;0ED9;15
+1735;1736;1
+17D4;17D5;4
+17D8;;4
+17DA;;4
+1780;17DD;21
+17E0;17E9;21
+17F0;17F9;21
+2007;;24
+2000;200B;17
+200C;200F;18
+2010;;18
+2011;;24
+2012;2013;18
+2014;;7
+2015;;18
+2016;2017;18
+2019;;23
+201D;;23
+2018;201F;18
+2020;2023;18
+2024;2026;2
+2027;;23
+2028;202E;18
+202F;;24
+2030;2034;9
+2035;2038;18
+2039;;1
+203A;;2
+203B;;12
+203C;203D;3
+203E;;23
+203F;2043;18
+2044;;3
+2045;;1
+2046;;2
+2047;2049;3
+204A;205E;18
+205F;;17
+2060;;24
+2061;2063;18
+206A;206F;18
+2070;2071;18
+2074;208E;18
+2090;2094;18
+2116;;8
+2160;217F;12
+2190;21EA;a12
+2126;;18
+2100;2138;18
+2153;2182;18
+2190;21EA;18
+3008;;1
+300A;;1
+300C;;1
+300E;;1
+3010;;1
+3014;;1
+3016;;1
+3018;;1
+301A;;1
+301D;;1
+3001;;2
+3009;;2
+300B;;2
+300D;;2
+300F;;2
+3011;;2
+3015;;2
+3017;;2
+3019;;2
+301B;;2
+301E;;2
+301F;;2
+3005;;3
+301C;;3
+3041;;3
+3043;;3
+3045;;3
+3047;;3
+3049;;3
+3063;;3
+3083;;3
+3085;;3
+3087;;3
+308E;;3
+309D;;3
+309E;;3
+30A1;;3
+30A3;;3
+30A5;;3
+30A7;;3
+30A9;;3
+30C3;;3
+30E3;;3
+30E5;;3
+30E7;;3
+30EE;;3
+30F5;;3
+30F6;;3
+30FC;;3
+30FD;;3
+30FE;;3
+30FB;;5
+3002;;6
+3000;;10
+3042;3094;11
+3099;309E;3
+3003;;12
+3004;;12
+3006;;12
+3007;;12
+3012;;12
+3013;;12
+3020;;12
+3036;;12
+30A2;30FA;12
diff --git a/intl/lwbrk/tools/jisx4051simp.txt b/intl/lwbrk/tools/jisx4051simp.txt
new file mode 100644
index 0000000000..e12a7fd805
--- /dev/null
+++ b/intl/lwbrk/tools/jisx4051simp.txt
@@ -0,0 +1,24 @@
+1;00_1
+2;01_[a]
+3;01_[a]
+4;01_[a]
+5;01_[a]
+6;01_[a]
+7;02_7
+8;03_8
+9;04_9
+10;05_[b]
+11;05_[b]
+12;05_[b]
+13;X
+14;X
+15;06_15
+16;X
+17;05_[b]
+18;07_18
+19;X
+20;X
+21;08_COMPLEX
+22;09_[c]
+23;0A_[d]
+24;0B_[e]
diff --git a/intl/lwbrk/tools/spec_table.html b/intl/lwbrk/tools/spec_table.html
new file mode 100644
index 0000000000..519f98c534
--- /dev/null
+++ b/intl/lwbrk/tools/spec_table.html
@@ -0,0 +1,127 @@
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title></title>
+<style type="text/css">
+table {
+	border: solid 1px;
+	border-collapse: collapse;
+}
+tbody, tfoot {
+	border-top: solid 2px;
+}
+td, th {
+	border: solid 1px;
+}
+td {
+	text-align: center;
+}
+</style>
+</head>
+<body>
+<p>This is a specification table for line breaking.</p>
+<p>The values of IE7 and Opera9: 'A' means that the line is breakable After the character, and 'B' means Before. 'BA' means Before and After.</p>
+<p>(C) which is the tail of the IE7 and the Opera9 means Character. (N) means Numeric.
+This means that they are around the character at testing. E.g., "a$a" is a testcase for (C), "0$0" is a testcase for (N).</p>
+<p>Gecko is not breaking the lines on most western language context. But for file paths, URLs and very long word which is connected hyphens,
+some characters might be breakable. They are 'breakable' in the table. However, they are not always breakable,
+they <em>depend on the context</em> in the word.</p>
+<table border="1">
+<thead>
+<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
+</thead>
+<tfoot>
+<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
+</tfoot>
+<tbody>
+<tr><th>0x21</th><th>&#x21;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x22</th><th>&#x22;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x23</th><th>&#x23;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x24</th><th>&#x24;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
+<tr><th>0x25</th><th>&#x25;</th><td>breakable</td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x26</th><th>&#x26;</th><td>breakable</td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x27</th><th>&#x27;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x28</th><th>&#x28;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
+<tr><th>0x29</th><th>&#x29;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x2A</th><th>&#x2A;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x2B</th><th>&#x2B;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x2C</th><th>&#x2C;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x2D</th><th>&#x2D;</th><td>breakable</td><td>BA</td><td>BA</td><td>A</td><td>A</td></tr>
+<tr><th>0x2E</th><th>&#x2E;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x2F</th><th>&#x2F;</th><td>breakable</td><td></td><td></td><td>A</td><td>A</td></tr>
+</tbody>
+<tbody>
+<tr><th>0x3A</th><th>&#x3A;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3B</th><th>&#x3B;</th><td>breakable</td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3C</th><th>&#x3C;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3D</th><th>&#x3D;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3E</th><th>&#x3E;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3F</th><th>&#x3F;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0x40</th><th>&#x40;</th><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0x5B</th><th>&#x5B;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
+<tr><th>0x5C</th><th>&#x5C;</th><td>breakable</td><td></td><td>B</td><td></td><td></td></tr>
+<tr><th>0x5D</th><th>&#x5D;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x5E</th><th>&#x5E;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x5F</th><th>&#x5F;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0x60</th><th>&#x60;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0x7B</th><th>&#x7B;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
+<tr><th>0x7C</th><th>&#x7C;</th><td></td><td></td><td></td><td>A</td><td>A</td></tr>
+<tr><th>0x7D</th><th>&#x7D;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x7E</th><th>&#x7E;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0xA1</th><th>&#xA1;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA2</th><th>&#xA2;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0xA3</th><th>&#xA3;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
+<tr><th>0xA4</th><th>&#xA4;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA5</th><th>&#xA5;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
+<tr><th>0xA6</th><th>&#xA6;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA7</th><th>&#xA7;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA8</th><th>&#xA8;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA9</th><th>&#xA9;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAA</th><th>&#xAA;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAB</th><th>&#xAB;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAC</th><th>&#xAC;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAE</th><th>&#xAE;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAF</th><th>&#xAF;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0xB0</th><th>&#xB0;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0xB1</th><th>&#xB1;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB2</th><th>&#xB2;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB3</th><th>&#xB3;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB4</th><th>&#xB4;</th><td></td><td></td><td></td><td>B</td><td>B</td></tr>
+<tr><th>0xB5</th><th>&#xB5;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB6</th><th>&#xB6;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB7</th><th>&#xB7;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB8</th><th>&#xB8;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB9</th><th>&#xB9;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBA</th><th>&#xBA;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBB</th><th>&#xBB;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBC</th><th>&#xBC;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBD</th><th>&#xBD;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBE</th><th>&#xBE;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBF</th><th>&#xBF;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0xD7</th><th>&#xD7;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0xF7</th><th>&#xF7;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+</table>
+</body>
+</html>