summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/rbbidata.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/common/rbbidata.h
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/common/rbbidata.h')
-rw-r--r--intl/icu/source/common/rbbidata.h212
1 files changed, 212 insertions, 0 deletions
diff --git a/intl/icu/source/common/rbbidata.h b/intl/icu/source/common/rbbidata.h
new file mode 100644
index 0000000000..c48de3d935
--- /dev/null
+++ b/intl/icu/source/common/rbbidata.h
@@ -0,0 +1,212 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2014 International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: rbbidata.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* RBBI data formats Includes
+*
+* Structs that describes the format of the Binary RBBI data,
+* as it is stored in ICU's data file.
+*
+* RBBIDataWrapper - Instances of this class sit between the
+* raw data structs and the RulesBasedBreakIterator objects
+* that are created by applications. The wrapper class
+* provides reference counting for the underlying data,
+* and direct pointers to data that would not otherwise
+* be accessible without ugly pointer arithmetic. The
+* wrapper does not attempt to provide any higher level
+* abstractions for the data itself.
+*
+* There will be only one instance of RBBIDataWrapper for any
+* set of RBBI run time data being shared by instances
+* (clones) of RulesBasedBreakIterator.
+*/
+
+#ifndef __RBBIDATA_H__
+#define __RBBIDATA_H__
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+#include "udataswp.h"
+
+/**
+ * Swap RBBI data. See udataswp.h.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_swap(const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+
+#include "unicode/ucptrie.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/uversion.h"
+#include "umutex.h"
+
+
+U_NAMESPACE_BEGIN
+
+// The current RBBI data format version.
+static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {6, 0, 0, 0};
+
+/*
+ * The following structs map exactly onto the raw data from ICU common data file.
+ */
+struct RBBIDataHeader {
+ uint32_t fMagic; /* == 0xbla0 */
+ UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */
+ /* if there is one associated with this data. */
+ /* (version originates in rbbi, is copied to UDataInfo) */
+ uint32_t fLength; /* Total length in bytes of this RBBI Data, */
+ /* including all sections, not just the header. */
+ uint32_t fCatCount; /* Number of character categories. */
+
+ /* */
+ /* Offsets and sizes of each of the subsections within the RBBI data. */
+ /* All offsets are bytes from the start of the RBBIDataHeader. */
+ /* All sizes are in bytes. */
+ /* */
+ uint32_t fFTable; /* forward state transition table. */
+ uint32_t fFTableLen;
+ uint32_t fRTable; /* Offset to the reverse state transition table. */
+ uint32_t fRTableLen;
+ uint32_t fTrie; /* Offset to Trie data for character categories */
+ uint32_t fTrieLen;
+ uint32_t fRuleSource; /* Offset to the source for for the break */
+ uint32_t fRuleSourceLen; /* rules. Stored char16_t *. */
+ uint32_t fStatusTable; /* Offset to the table of rule status values */
+ uint32_t fStatusTableLen;
+
+ uint32_t fReserved[6]; /* Reserved for expansion */
+
+};
+
+
+
+template <typename T>
+struct RBBIStateTableRowT {
+ T fAccepting; // Non-zero if this row is for an accepting state.
+ // Value 0: not an accepting state.
+ // 1: (ACCEPTING_UNCONDITIONAL) Unconditional Accepting state.
+ // >1: Look-ahead match has completed.
+ // Actual boundary position happened earlier.
+ // Value here == fLookAhead in earlier
+ // state, at actual boundary pos.
+ T fLookAhead; // Non-zero if this row is for a state that
+ // corresponds to a '/' in the rule source.
+ // Value is the same as the fAccepting
+ // value for the rule (which will appear
+ // in a different state.
+ T fTagsIdx; // Non-zero if this row covers a {tagged} position
+ // from a rule. Value is the index in the
+ // StatusTable of the set of matching
+ // tags (rule status values)
+ T fNextState[1]; // Next State, indexed by char category.
+ // Variable-length array declared with length 1
+ // to disable bounds checkers.
+ // Array Size is actually fData->fHeader->fCatCount
+ // CAUTION: see RBBITableBuilder::getTableSize()
+ // before changing anything here.
+};
+
+typedef RBBIStateTableRowT<uint8_t> RBBIStateTableRow8;
+typedef RBBIStateTableRowT<uint16_t> RBBIStateTableRow16;
+
+constexpr uint16_t ACCEPTING_UNCONDITIONAL = 1; // Value constant for RBBIStateTableRow::fAccepting
+
+union RBBIStateTableRow {
+ RBBIStateTableRow16 r16;
+ RBBIStateTableRow8 r8;
+};
+
+struct RBBIStateTable {
+ uint32_t fNumStates; // Number of states.
+ uint32_t fRowLen; // Length of a state table row, in bytes.
+ uint32_t fDictCategoriesStart; // Char category number of the first dictionary
+ // char class, or the the largest category number + 1
+ // if there are no dictionary categories.
+ uint32_t fLookAheadResultsSize; // Size of run-time array required for holding
+ // look-ahead results. Indexed by row.fLookAhead.
+ uint32_t fFlags; // Option Flags for this state table.
+ char fTableData[1]; // First RBBIStateTableRow begins here.
+ // Variable-length array declared with length 1
+ // to disable bounds checkers.
+ // (making it char[] simplifies ugly address
+ // arithmetic for indexing variable length rows.)
+};
+
+constexpr uint32_t RBBI_LOOKAHEAD_HARD_BREAK = 1;
+constexpr uint32_t RBBI_BOF_REQUIRED = 2;
+constexpr uint32_t RBBI_8BITS_ROWS = 4;
+
+
+/* */
+/* The reference counting wrapper class */
+/* */
+class RBBIDataWrapper : public UMemory {
+public:
+ enum EDontAdopt {
+ kDontAdopt
+ };
+ RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
+ RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
+ RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
+ ~RBBIDataWrapper();
+
+ static UBool isDataVersionAcceptable(const UVersionInfo version);
+
+ void init0();
+ void init(const RBBIDataHeader *data, UErrorCode &status);
+ RBBIDataWrapper *addReference();
+ void removeReference();
+ bool operator ==(const RBBIDataWrapper &other) const;
+ int32_t hashCode();
+ const UnicodeString &getRuleSourceString() const;
+ void printData();
+ void printTable(const char *heading, const RBBIStateTable *table);
+
+ /* */
+ /* Pointers to items within the data */
+ /* */
+ const RBBIDataHeader *fHeader;
+ const RBBIStateTable *fForwardTable;
+ const RBBIStateTable *fReverseTable;
+ const char *fRuleSource;
+ const int32_t *fRuleStatusTable;
+
+ /* number of int32_t values in the rule status table. Used to sanity check indexing */
+ int32_t fStatusMaxIdx;
+
+ UCPTrie *fTrie;
+
+private:
+ u_atomic_int32_t fRefCount;
+ UDataMemory *fUDataMem;
+ UnicodeString fRuleString;
+ UBool fDontFreeData;
+
+ RBBIDataWrapper(const RBBIDataWrapper &other) = delete; /* forbid copying of this class */
+ RBBIDataWrapper &operator=(const RBBIDataWrapper &other) = delete; /* forbid copying of this class */
+};
+
+
+
+U_NAMESPACE_END
+
+U_CFUNC UBool rbbi_cleanup();
+
+#endif /* C++ */
+
+#endif