From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- intl/icu/source/i18n/regexcst.h | 570 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 570 insertions(+) create mode 100644 intl/icu/source/i18n/regexcst.h (limited to 'intl/icu/source/i18n/regexcst.h') diff --git a/intl/icu/source/i18n/regexcst.h b/intl/icu/source/i18n/regexcst.h new file mode 100644 index 0000000000..a475b6b363 --- /dev/null +++ b/intl/icu/source/i18n/regexcst.h @@ -0,0 +1,570 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +//--------------------------------------------------------------------------------- +// +// Generated Header File. Do not edit by hand. +// This file contains the state table for the ICU Regular Expression Pattern Parser +// It is generated by the Perl script "regexcst.pl" from +// the rule parser state definitions file "regexcst.txt". +// +// Copyright (C) 2002-2016 International Business Machines Corporation +// and others. All rights reserved. +// +//--------------------------------------------------------------------------------- +#ifndef RBBIRPT_H +#define RBBIRPT_H + +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN +// +// Character classes for regex pattern scanning. +// + static const uint8_t kRuleSet_digit_char = 128; + static const uint8_t kRuleSet_ascii_letter = 129; + static const uint8_t kRuleSet_rule_char = 130; + constexpr uint32_t kRuleSet_count = 131-128; + +enum Regex_PatternParseAction { + doSetBackslash_D, + doBackslashh, + doBackslashH, + doSetLiteralEscaped, + doOpenLookAheadNeg, + doCompleteNamedBackRef, + doPatStart, + doBackslashS, + doBackslashD, + doNGStar, + doNOP, + doBackslashX, + doSetLiteral, + doContinueNamedCapture, + doBackslashG, + doBackslashR, + doSetBegin, + doSetBackslash_v, + doPossessivePlus, + doPerlInline, + doBackslashZ, + doSetAddAmp, + doSetBeginDifference1, + doIntervalError, + doSetNegate, + doIntervalInit, + doSetIntersection2, + doPossessiveInterval, + doRuleError, + doBackslashW, + doContinueNamedBackRef, + doOpenNonCaptureParen, + doExit, + doSetNamedChar, + doSetBackslash_V, + doConditionalExpr, + doEscapeError, + doBadOpenParenType, + doPossessiveStar, + doSetAddDash, + doEscapedLiteralChar, + doSetBackslash_w, + doIntervalUpperDigit, + doBackslashv, + doSetBackslash_S, + doSetNoCloseError, + doSetProp, + doBackslashB, + doSetEnd, + doSetRange, + doMatchModeParen, + doPlus, + doBackslashV, + doSetMatchMode, + doBackslashz, + doSetNamedRange, + doOpenLookBehindNeg, + doInterval, + doBadNamedCapture, + doBeginMatchMode, + doBackslashd, + doPatFinish, + doNamedChar, + doNGPlus, + doSetDifference2, + doSetBackslash_H, + doCloseParen, + doDotAny, + doOpenCaptureParen, + doEnterQuoteMode, + doOpenAtomicParen, + doBadModeFlag, + doSetBackslash_d, + doSetFinish, + doProperty, + doBeginNamedBackRef, + doBackRef, + doOpt, + doDollar, + doBeginNamedCapture, + doNGInterval, + doSetOpError, + doSetPosixProp, + doSetBeginIntersection1, + doBackslashb, + doSetBeginUnion, + doIntevalLowerDigit, + doSetBackslash_h, + doStar, + doMatchMode, + doBackslashA, + doOpenLookBehind, + doPossessiveOpt, + doOrOperator, + doBackslashw, + doBackslashs, + doLiteralChar, + doSuppressComments, + doCaret, + doIntervalSame, + doNGOpt, + doOpenLookAhead, + doSetBackslash_W, + doMismatchedParenErr, + doSetBackslash_s, + rbbiLastAction}; + +//------------------------------------------------------------------------------- +// +// RegexTableEl represents the structure of a row in the transition table +// for the pattern parser state machine. +//------------------------------------------------------------------------------- +struct RegexTableEl { + Regex_PatternParseAction fAction; + uint8_t fCharClass; // 0-127: an individual ASCII character + // 128-255: character class index + uint8_t fNextState; // 0-250: normal next-state numbers + // 255: pop next-state from stack. + uint8_t fPushState; + UBool fNextChar; +}; + +static const struct RegexTableEl gRuleParseStateTable[] = { + {doNOP, 0, 0, 0, true} + , {doPatStart, 255, 2,0, false} // 1 start + , {doLiteralChar, 254, 14,0, true} // 2 term + , {doLiteralChar, 130, 14,0, true} // 3 + , {doSetBegin, 91 /* [ */, 123, 205, true} // 4 + , {doNOP, 40 /* ( */, 27,0, true} // 5 + , {doDotAny, 46 /* . */, 14,0, true} // 6 + , {doCaret, 94 /* ^ */, 14,0, true} // 7 + , {doDollar, 36 /* $ */, 14,0, true} // 8 + , {doNOP, 92 /* \ */, 89,0, true} // 9 + , {doOrOperator, 124 /* | */, 2,0, true} // 10 + , {doCloseParen, 41 /* ) */, 255,0, true} // 11 + , {doPatFinish, 253, 2,0, false} // 12 + , {doRuleError, 255, 206,0, false} // 13 + , {doNOP, 42 /* * */, 68,0, true} // 14 expr-quant + , {doNOP, 43 /* + */, 71,0, true} // 15 + , {doNOP, 63 /* ? */, 74,0, true} // 16 + , {doIntervalInit, 123 /* { */, 77,0, true} // 17 + , {doNOP, 40 /* ( */, 23,0, true} // 18 + , {doNOP, 255, 20,0, false} // 19 + , {doOrOperator, 124 /* | */, 2,0, true} // 20 expr-cont + , {doCloseParen, 41 /* ) */, 255,0, true} // 21 + , {doNOP, 255, 2,0, false} // 22 + , {doSuppressComments, 63 /* ? */, 25,0, true} // 23 open-paren-quant + , {doNOP, 255, 27,0, false} // 24 + , {doNOP, 35 /* # */, 50, 14, true} // 25 open-paren-quant2 + , {doNOP, 255, 29,0, false} // 26 + , {doSuppressComments, 63 /* ? */, 29,0, true} // 27 open-paren + , {doOpenCaptureParen, 255, 2, 14, false} // 28 + , {doOpenNonCaptureParen, 58 /* : */, 2, 14, true} // 29 open-paren-extended + , {doOpenAtomicParen, 62 /* > */, 2, 14, true} // 30 + , {doOpenLookAhead, 61 /* = */, 2, 20, true} // 31 + , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, true} // 32 + , {doNOP, 60 /* < */, 46,0, true} // 33 + , {doNOP, 35 /* # */, 50, 2, true} // 34 + , {doBeginMatchMode, 105 /* i */, 53,0, false} // 35 + , {doBeginMatchMode, 100 /* d */, 53,0, false} // 36 + , {doBeginMatchMode, 109 /* m */, 53,0, false} // 37 + , {doBeginMatchMode, 115 /* s */, 53,0, false} // 38 + , {doBeginMatchMode, 117 /* u */, 53,0, false} // 39 + , {doBeginMatchMode, 119 /* w */, 53,0, false} // 40 + , {doBeginMatchMode, 120 /* x */, 53,0, false} // 41 + , {doBeginMatchMode, 45 /* - */, 53,0, false} // 42 + , {doConditionalExpr, 40 /* ( */, 206,0, true} // 43 + , {doPerlInline, 123 /* { */, 206,0, true} // 44 + , {doBadOpenParenType, 255, 206,0, false} // 45 + , {doOpenLookBehind, 61 /* = */, 2, 20, true} // 46 open-paren-lookbehind + , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, true} // 47 + , {doBeginNamedCapture, 129, 64,0, false} // 48 + , {doBadOpenParenType, 255, 206,0, false} // 49 + , {doNOP, 41 /* ) */, 255,0, true} // 50 paren-comment + , {doMismatchedParenErr, 253, 206,0, false} // 51 + , {doNOP, 255, 50,0, true} // 52 + , {doMatchMode, 105 /* i */, 53,0, true} // 53 paren-flag + , {doMatchMode, 100 /* d */, 53,0, true} // 54 + , {doMatchMode, 109 /* m */, 53,0, true} // 55 + , {doMatchMode, 115 /* s */, 53,0, true} // 56 + , {doMatchMode, 117 /* u */, 53,0, true} // 57 + , {doMatchMode, 119 /* w */, 53,0, true} // 58 + , {doMatchMode, 120 /* x */, 53,0, true} // 59 + , {doMatchMode, 45 /* - */, 53,0, true} // 60 + , {doSetMatchMode, 41 /* ) */, 2,0, true} // 61 + , {doMatchModeParen, 58 /* : */, 2, 14, true} // 62 + , {doBadModeFlag, 255, 206,0, false} // 63 + , {doContinueNamedCapture, 129, 64,0, true} // 64 named-capture + , {doContinueNamedCapture, 128, 64,0, true} // 65 + , {doOpenCaptureParen, 62 /* > */, 2, 14, true} // 66 + , {doBadNamedCapture, 255, 206,0, false} // 67 + , {doNGStar, 63 /* ? */, 20,0, true} // 68 quant-star + , {doPossessiveStar, 43 /* + */, 20,0, true} // 69 + , {doStar, 255, 20,0, false} // 70 + , {doNGPlus, 63 /* ? */, 20,0, true} // 71 quant-plus + , {doPossessivePlus, 43 /* + */, 20,0, true} // 72 + , {doPlus, 255, 20,0, false} // 73 + , {doNGOpt, 63 /* ? */, 20,0, true} // 74 quant-opt + , {doPossessiveOpt, 43 /* + */, 20,0, true} // 75 + , {doOpt, 255, 20,0, false} // 76 + , {doNOP, 128, 79,0, false} // 77 interval-open + , {doIntervalError, 255, 206,0, false} // 78 + , {doIntevalLowerDigit, 128, 79,0, true} // 79 interval-lower + , {doNOP, 44 /* , */, 83,0, true} // 80 + , {doIntervalSame, 125 /* } */, 86,0, true} // 81 + , {doIntervalError, 255, 206,0, false} // 82 + , {doIntervalUpperDigit, 128, 83,0, true} // 83 interval-upper + , {doNOP, 125 /* } */, 86,0, true} // 84 + , {doIntervalError, 255, 206,0, false} // 85 + , {doNGInterval, 63 /* ? */, 20,0, true} // 86 interval-type + , {doPossessiveInterval, 43 /* + */, 20,0, true} // 87 + , {doInterval, 255, 20,0, false} // 88 + , {doBackslashA, 65 /* A */, 2,0, true} // 89 backslash + , {doBackslashB, 66 /* B */, 2,0, true} // 90 + , {doBackslashb, 98 /* b */, 2,0, true} // 91 + , {doBackslashd, 100 /* d */, 14,0, true} // 92 + , {doBackslashD, 68 /* D */, 14,0, true} // 93 + , {doBackslashG, 71 /* G */, 2,0, true} // 94 + , {doBackslashh, 104 /* h */, 14,0, true} // 95 + , {doBackslashH, 72 /* H */, 14,0, true} // 96 + , {doNOP, 107 /* k */, 115,0, true} // 97 + , {doNamedChar, 78 /* N */, 14,0, false} // 98 + , {doProperty, 112 /* p */, 14,0, false} // 99 + , {doProperty, 80 /* P */, 14,0, false} // 100 + , {doBackslashR, 82 /* R */, 14,0, true} // 101 + , {doEnterQuoteMode, 81 /* Q */, 2,0, true} // 102 + , {doBackslashS, 83 /* S */, 14,0, true} // 103 + , {doBackslashs, 115 /* s */, 14,0, true} // 104 + , {doBackslashv, 118 /* v */, 14,0, true} // 105 + , {doBackslashV, 86 /* V */, 14,0, true} // 106 + , {doBackslashW, 87 /* W */, 14,0, true} // 107 + , {doBackslashw, 119 /* w */, 14,0, true} // 108 + , {doBackslashX, 88 /* X */, 14,0, true} // 109 + , {doBackslashZ, 90 /* Z */, 2,0, true} // 110 + , {doBackslashz, 122 /* z */, 2,0, true} // 111 + , {doBackRef, 128, 14,0, true} // 112 + , {doEscapeError, 253, 206,0, false} // 113 + , {doEscapedLiteralChar, 255, 14,0, true} // 114 + , {doBeginNamedBackRef, 60 /* < */, 117,0, true} // 115 named-backref + , {doBadNamedCapture, 255, 206,0, false} // 116 + , {doContinueNamedBackRef, 129, 119,0, true} // 117 named-backref-2 + , {doBadNamedCapture, 255, 206,0, false} // 118 + , {doContinueNamedBackRef, 129, 119,0, true} // 119 named-backref-3 + , {doContinueNamedBackRef, 128, 119,0, true} // 120 + , {doCompleteNamedBackRef, 62 /* > */, 14,0, true} // 121 + , {doBadNamedCapture, 255, 206,0, false} // 122 + , {doSetNegate, 94 /* ^ */, 126,0, true} // 123 set-open + , {doSetPosixProp, 58 /* : */, 128,0, false} // 124 + , {doNOP, 255, 126,0, false} // 125 + , {doSetLiteral, 93 /* ] */, 141,0, true} // 126 set-open2 + , {doNOP, 255, 131,0, false} // 127 + , {doSetEnd, 93 /* ] */, 255,0, true} // 128 set-posix + , {doNOP, 58 /* : */, 131,0, false} // 129 + , {doRuleError, 255, 206,0, false} // 130 + , {doSetEnd, 93 /* ] */, 255,0, true} // 131 set-start + , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 132 + , {doNOP, 92 /* \ */, 191,0, true} // 133 + , {doNOP, 45 /* - */, 137,0, true} // 134 + , {doNOP, 38 /* & */, 139,0, true} // 135 + , {doSetLiteral, 255, 141,0, true} // 136 + , {doRuleError, 45 /* - */, 206,0, false} // 137 set-start-dash + , {doSetAddDash, 255, 141,0, false} // 138 + , {doRuleError, 38 /* & */, 206,0, false} // 139 set-start-amp + , {doSetAddAmp, 255, 141,0, false} // 140 + , {doSetEnd, 93 /* ] */, 255,0, true} // 141 set-after-lit + , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 142 + , {doNOP, 45 /* - */, 178,0, true} // 143 + , {doNOP, 38 /* & */, 169,0, true} // 144 + , {doNOP, 92 /* \ */, 191,0, true} // 145 + , {doSetNoCloseError, 253, 206,0, false} // 146 + , {doSetLiteral, 255, 141,0, true} // 147 + , {doSetEnd, 93 /* ] */, 255,0, true} // 148 set-after-set + , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 149 + , {doNOP, 45 /* - */, 171,0, true} // 150 + , {doNOP, 38 /* & */, 166,0, true} // 151 + , {doNOP, 92 /* \ */, 191,0, true} // 152 + , {doSetNoCloseError, 253, 206,0, false} // 153 + , {doSetLiteral, 255, 141,0, true} // 154 + , {doSetEnd, 93 /* ] */, 255,0, true} // 155 set-after-range + , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 156 + , {doNOP, 45 /* - */, 174,0, true} // 157 + , {doNOP, 38 /* & */, 176,0, true} // 158 + , {doNOP, 92 /* \ */, 191,0, true} // 159 + , {doSetNoCloseError, 253, 206,0, false} // 160 + , {doSetLiteral, 255, 141,0, true} // 161 + , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 162 set-after-op + , {doSetOpError, 93 /* ] */, 206,0, false} // 163 + , {doNOP, 92 /* \ */, 191,0, true} // 164 + , {doSetLiteral, 255, 141,0, true} // 165 + , {doSetBeginIntersection1, 91 /* [ */, 123, 148, true} // 166 set-set-amp + , {doSetIntersection2, 38 /* & */, 162,0, true} // 167 + , {doSetAddAmp, 255, 141,0, false} // 168 + , {doSetIntersection2, 38 /* & */, 162,0, true} // 169 set-lit-amp + , {doSetAddAmp, 255, 141,0, false} // 170 + , {doSetBeginDifference1, 91 /* [ */, 123, 148, true} // 171 set-set-dash + , {doSetDifference2, 45 /* - */, 162,0, true} // 172 + , {doSetAddDash, 255, 141,0, false} // 173 + , {doSetDifference2, 45 /* - */, 162,0, true} // 174 set-range-dash + , {doSetAddDash, 255, 141,0, false} // 175 + , {doSetIntersection2, 38 /* & */, 162,0, true} // 176 set-range-amp + , {doSetAddAmp, 255, 141,0, false} // 177 + , {doSetDifference2, 45 /* - */, 162,0, true} // 178 set-lit-dash + , {doSetAddDash, 91 /* [ */, 141,0, false} // 179 + , {doSetAddDash, 93 /* ] */, 141,0, false} // 180 + , {doNOP, 92 /* \ */, 183,0, true} // 181 + , {doSetRange, 255, 155,0, true} // 182 + , {doSetOpError, 115 /* s */, 206,0, false} // 183 set-lit-dash-escape + , {doSetOpError, 83 /* S */, 206,0, false} // 184 + , {doSetOpError, 119 /* w */, 206,0, false} // 185 + , {doSetOpError, 87 /* W */, 206,0, false} // 186 + , {doSetOpError, 100 /* d */, 206,0, false} // 187 + , {doSetOpError, 68 /* D */, 206,0, false} // 188 + , {doSetNamedRange, 78 /* N */, 155,0, false} // 189 + , {doSetRange, 255, 155,0, true} // 190 + , {doSetProp, 112 /* p */, 148,0, false} // 191 set-escape + , {doSetProp, 80 /* P */, 148,0, false} // 192 + , {doSetNamedChar, 78 /* N */, 141,0, false} // 193 + , {doSetBackslash_s, 115 /* s */, 155,0, true} // 194 + , {doSetBackslash_S, 83 /* S */, 155,0, true} // 195 + , {doSetBackslash_w, 119 /* w */, 155,0, true} // 196 + , {doSetBackslash_W, 87 /* W */, 155,0, true} // 197 + , {doSetBackslash_d, 100 /* d */, 155,0, true} // 198 + , {doSetBackslash_D, 68 /* D */, 155,0, true} // 199 + , {doSetBackslash_h, 104 /* h */, 155,0, true} // 200 + , {doSetBackslash_H, 72 /* H */, 155,0, true} // 201 + , {doSetBackslash_v, 118 /* v */, 155,0, true} // 202 + , {doSetBackslash_V, 86 /* V */, 155,0, true} // 203 + , {doSetLiteralEscaped, 255, 141,0, true} // 204 + , {doSetFinish, 255, 14,0, false} // 205 set-finish + , {doExit, 255, 206,0, true} // 206 errorDeath + }; +static const char * const RegexStateNames[] = { 0, + "start", + "term", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "expr-quant", + 0, + 0, + 0, + 0, + 0, + "expr-cont", + 0, + 0, + "open-paren-quant", + 0, + "open-paren-quant2", + 0, + "open-paren", + 0, + "open-paren-extended", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "open-paren-lookbehind", + 0, + 0, + 0, + "paren-comment", + 0, + 0, + "paren-flag", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "named-capture", + 0, + 0, + 0, + "quant-star", + 0, + 0, + "quant-plus", + 0, + 0, + "quant-opt", + 0, + 0, + "interval-open", + 0, + "interval-lower", + 0, + 0, + 0, + "interval-upper", + 0, + 0, + "interval-type", + 0, + 0, + "backslash", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "named-backref", + 0, + "named-backref-2", + 0, + "named-backref-3", + 0, + 0, + 0, + "set-open", + 0, + 0, + "set-open2", + 0, + "set-posix", + 0, + 0, + "set-start", + 0, + 0, + 0, + 0, + 0, + "set-start-dash", + 0, + "set-start-amp", + 0, + "set-after-lit", + 0, + 0, + 0, + 0, + 0, + 0, + "set-after-set", + 0, + 0, + 0, + 0, + 0, + 0, + "set-after-range", + 0, + 0, + 0, + 0, + 0, + 0, + "set-after-op", + 0, + 0, + 0, + "set-set-amp", + 0, + 0, + "set-lit-amp", + 0, + "set-set-dash", + 0, + 0, + "set-range-dash", + 0, + "set-range-amp", + 0, + "set-lit-dash", + 0, + 0, + 0, + 0, + "set-lit-dash-escape", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "set-escape", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "set-finish", + "errorDeath", + 0}; + +U_NAMESPACE_END +#endif -- cgit v1.2.3