1 files changed, 408 insertions, 0 deletions
diff --git a/intl/icu/source/tools/genrb/rle.c b/intl/icu/source/tools/genrb/rle.c
new file mode 100644
index 0000000000..f737c45491
--- /dev/null
+++ b/intl/icu/source/tools/genrb/rle.c
@@ -0,0 +1,408 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000-2003, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*
+* File writejava.c
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   01/11/02    Ram        Creation.
+*******************************************************************************
+*/
+#include <stdbool.h>
+#include "rle.h"
+/**
+ * The ESCAPE character is used during run-length encoding.  It signals
+ * a run of identical chars.
+ */
+static const uint16_t ESCAPE = 0xA5A5;
+
+/**
+ * The ESCAPE_BYTE character is used during run-length encoding.  It signals
+ * a run of identical bytes.
+ */
+static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
+
+/**
+ * Append a byte to the given StringBuffer, packing two bytes into each
+ * character.  The state parameter maintains intermediary data between
+ * calls.
+ * @param state A two-element array, with state[0] == 0 if this is the
+ * first byte of a pair, or state[0] != 0 if this is the second byte
+ * of a pair, in which case state[1] is the first byte.
+ */
+static uint16_t*
+appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
+    if(!status || U_FAILURE(*status)){
+        return NULL;
+    }
+    if (state[0] != 0) {
+        uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
+        if(buffer < buffLimit){
+            *buffer++ = c;
+        }else{
+            *status = U_BUFFER_OVERFLOW_ERROR;
+        }
+        state[0] = 0;
+        return buffer;
+    }
+    else {
+        state[0] = 1;
+        state[1] = value;
+        return buffer;
+    }
+}
+/**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFF.
+ */
+static uint16_t*
+encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
+    if(!status || U_FAILURE(*status)){
+        return NULL;
+    }
+    if (length < 4) {
+        int32_t j=0;
+        for (; j<length; ++j) {
+            if (value == ESCAPE_BYTE) {
+                buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
+            }
+            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
+        }
+    }
+    else {
+        if (length == ESCAPE_BYTE) {
+            if (value == ESCAPE_BYTE){
+               buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
+            }
+            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
+            --length;
+        }
+        buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
+        buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
+        buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
+    }
+    return buffer;
+}
+
+#define APPEND( buffer, bufLimit, value, num, status) UPRV_BLOCK_MACRO_BEGIN { \
+    if(buffer<bufLimit){                    \
+        *buffer++=(value);                  \
+    }else{                                  \
+        *status = U_BUFFER_OVERFLOW_ERROR;  \
+    }                                       \
+    num++;                                  \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFFFF.
+ */
+static uint16_t*
+encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
+    int32_t num=0;
+    if (length < 4) {
+        int j=0;
+        for (; j<length; ++j) {
+            if (value == (int32_t) ESCAPE){
+                APPEND(buffer,bufLimit,ESCAPE, num, status);
+
+            }
+            APPEND(buffer,bufLimit,value,num, status);
+        }
+    }
+    else {
+        if (length == (int32_t) ESCAPE) {
+            if (value == (int32_t) ESCAPE){
+                APPEND(buffer,bufLimit,ESCAPE,num,status);
+
+            }
+            APPEND(buffer,bufLimit,value,num,status);
+            --length;
+        }
+        APPEND(buffer,bufLimit,ESCAPE,num,status);
+        APPEND(buffer,bufLimit,(uint16_t) length, num,status);
+        APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
+    }
+    return buffer;
+}
+
+/**
+ * Construct a string representing a char array.  Use run-length encoding.
+ * A character represents itself, unless it is the ESCAPE character.  Then
+ * the following notations are possible:
+ *   ESCAPE ESCAPE   ESCAPE literal
+ *   ESCAPE n c      n instances of character c
+ * Since an encoded run occupies 3 characters, we only encode runs of 4 or
+ * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
+ * If we encounter a run where n == ESCAPE, we represent this as:
+ *   c ESCAPE n-1 c
+ * The ESCAPE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+int32_t
+usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
+    uint16_t* bufLimit =  buffer+bufLen;
+    uint16_t* saveBuffer = buffer;
+    if(buffer < bufLimit){
+        *buffer++ =  (uint16_t)(srcLen>>16);
+        if(buffer<bufLimit){
+            uint16_t runValue = src[0];
+            int32_t runLength = 1;
+            int i=1;
+            *buffer++ = (uint16_t) srcLen;
+
+            for (; i<srcLen; ++i) {
+                uint16_t s = src[i];
+                if (s == runValue && runLength < 0xFFFF){
+                    ++runLength;
+                }else {
+                    buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
+                    runValue = s;
+                    runLength = 1;
+                }
+            }
+            buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
+        }else{
+            *status = U_BUFFER_OVERFLOW_ERROR;
+        }
+    }else{
+        *status = U_BUFFER_OVERFLOW_ERROR;
+    }
+    return (int32_t)(buffer - saveBuffer);
+}
+
+/**
+ * Construct a string representing a byte array.  Use run-length encoding.
+ * Two bytes are packed into a single char, with a single extra zero byte at
+ * the end if needed.  A byte represents itself, unless it is the
+ * ESCAPE_BYTE.  Then the following notations are possible:
+ *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
+ *   ESCAPE_BYTE n b           n instances of byte b
+ * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
+ * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
+ * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
+ *   b ESCAPE_BYTE n-1 b
+ * The ESCAPE_BYTE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+int32_t
+byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
+    const uint16_t* saveBuf = buffer;
+    uint16_t* bufLimit =  buffer+bufLen;
+    if(buffer < bufLimit){
+        *buffer++ = ((uint16_t) (srcLen >> 16));
+
+        if(buffer<bufLimit){
+            uint8_t runValue = src[0];
+            int runLength = 1;
+            uint8_t state[2]= {0};
+            int i=1;
+            *buffer++=((uint16_t) srcLen);
+            for (; i<srcLen; ++i) {
+                uint8_t b = src[i];
+                if (b == runValue && runLength < 0xFF){
+                    ++runLength;
+                }
+                else {
+                    buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
+                    runValue = b;
+                    runLength = 1;
+                }
+            }
+            buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
+
+            /* We must save the final byte, if there is one, by padding
+             * an extra zero.
+             */
+            if (state[0] != 0) {
+                buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
+            }
+        }else{
+            *status = U_BUFFER_OVERFLOW_ERROR;
+        }
+    }else{
+        *status = U_BUFFER_OVERFLOW_ERROR;
+    }
+    return (int32_t) (buffer - saveBuf);
+}
+
+
+/**
+ * Construct an array of shorts from a run-length encoded string.
+ */
+int32_t
+rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
+    int32_t length = 0;
+    int32_t ai = 0;
+    int i=2;
+
+    if(!status || U_FAILURE(*status)){
+        return 0;
+    }
+    /* the source is null terminated */
+    if(srcLen == -1){
+        srcLen = u_strlen(src);
+    }
+    if(srcLen <= 2){
+        return 2;
+    }
+    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
+
+    if(target == NULL){
+        return length;
+    }
+    if(tgtLen < length){
+        *status = U_BUFFER_OVERFLOW_ERROR;
+        return length;
+    }
+
+    for (; i<srcLen; ++i) {
+        uint16_t c = src[i];
+        if (c == ESCAPE) {
+            c = src[++i];
+            if (c == ESCAPE) {
+                target[ai++] = c;
+            } else {
+                int32_t runLength = (int32_t) c;
+                uint16_t runValue = src[++i];
+                int j=0;
+                for (; j<runLength; ++j) {
+                    target[ai++] = runValue;
+                }
+            }
+        }
+        else {
+            target[ai++] = c;
+        }
+    }
+
+    if (ai != length){
+        *status = U_INTERNAL_PROGRAM_ERROR;
+    }
+
+    return length;
+}
+
+/**
+ * Construct an array of bytes from a run-length encoded string.
+ */
+int32_t
+rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
+
+    int32_t length = 0;
+    UBool nextChar = true;
+    uint16_t c = 0;
+    int32_t node = 0;
+    int32_t runLength = 0;
+    int32_t i = 2;
+    int32_t ai=0;
+
+    if(!status || U_FAILURE(*status)){
+        return 0;
+    }
+    /* the source is null terminated */
+    if(srcLen == -1){
+        srcLen = u_strlen(src);
+    }
+    if(srcLen <= 2){
+        return 2;
+    }
+    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
+
+    if(target == NULL){
+        return length;
+    }
+    if(tgtLen < length){
+        *status = U_BUFFER_OVERFLOW_ERROR;
+        return length;
+    }
+
+    for (; ai<tgtLen; ) {
+       /* This part of the loop places the next byte into the local
+        * variable 'b' each time through the loop.  It keeps the
+        * current character in 'c' and uses the boolean 'nextChar'
+        * to see if we've taken both bytes out of 'c' yet.
+        */
+        uint8_t b;
+        if (nextChar) {
+            c = src[i++];
+            b = (uint8_t) (c >> 8);
+            nextChar = false;
+        }
+        else {
+            b = (uint8_t) (c & 0xFF);
+            nextChar = true;
+        }
+
+       /* This part of the loop is a tiny state machine which handles
+        * the parsing of the run-length encoding.  This would be simpler
+        * if we could look ahead, but we can't, so we use 'node' to
+        * move between three nodes in the state machine.
+        */
+        switch (node) {
+        case 0:
+            /* Normal idle node */
+            if (b == ESCAPE_BYTE) {
+                node = 1;
+            }
+            else {
+                target[ai++] = b;
+            }
+            break;
+        case 1:
+           /* We have seen one ESCAPE_BYTE; we expect either a second
+            * one, or a run length and value.
+            */
+            if (b == ESCAPE_BYTE) {
+                target[ai++] = ESCAPE_BYTE;
+                node = 0;
+            }
+            else {
+                runLength = b;
+                node = 2;
+            }
+            break;
+        case 2:
+            {
+                int j=0;
+               /* We have seen an ESCAPE_BYTE and length byte.  We interpret
+                * the next byte as the value to be repeated.
+                */
+                for (; j<runLength; ++j){
+                    if(ai<tgtLen){
+                        target[ai++] = b;
+                    }else{
+                        *status = U_BUFFER_OVERFLOW_ERROR;
+                        return ai;
+                    }
+                }
+                node = 0;
+                break;
+            }
+        }
+    }
+
+    if (node != 0){
+        *status = U_INTERNAL_PROGRAM_ERROR;
+        /*("Bad run-length encoded byte array")*/
+        return 0;
+    }
+
+
+    if (i != srcLen){
+        /*("Excess data in RLE byte array string");*/
+        *status = U_INTERNAL_PROGRAM_ERROR;
+        return ai;
+    }
+
+    return ai;
+}
+