summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/genrb/rle.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--intl/icu/source/tools/genrb/rle.c407
1 files changed, 407 insertions, 0 deletions
diff --git a/intl/icu/source/tools/genrb/rle.c b/intl/icu/source/tools/genrb/rle.c
new file mode 100644
index 0000000000..279684aad0
--- /dev/null
+++ b/intl/icu/source/tools/genrb/rle.c
@@ -0,0 +1,407 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2000-2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File writejava.c
+*
+* Modification History:
+*
+* Date Name Description
+* 01/11/02 Ram Creation.
+*******************************************************************************
+*/
+#include "rle.h"
+/**
+ * The ESCAPE character is used during run-length encoding. It signals
+ * a run of identical chars.
+ */
+static const uint16_t ESCAPE = 0xA5A5;
+
+/**
+ * The ESCAPE_BYTE character is used during run-length encoding. It signals
+ * a run of identical bytes.
+ */
+static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
+
+/**
+ * Append a byte to the given StringBuffer, packing two bytes into each
+ * character. The state parameter maintains intermediary data between
+ * calls.
+ * @param state A two-element array, with state[0] == 0 if this is the
+ * first byte of a pair, or state[0] != 0 if this is the second byte
+ * of a pair, in which case state[1] is the first byte.
+ */
+static uint16_t*
+appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
+ if(!status || U_FAILURE(*status)){
+ return NULL;
+ }
+ if (state[0] != 0) {
+ uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
+ if(buffer < buffLimit){
+ *buffer++ = c;
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ state[0] = 0;
+ return buffer;
+ }
+ else {
+ state[0] = 1;
+ state[1] = value;
+ return buffer;
+ }
+}
+/**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFF.
+ */
+static uint16_t*
+encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
+ if(!status || U_FAILURE(*status)){
+ return NULL;
+ }
+ if (length < 4) {
+ int32_t j=0;
+ for (; j<length; ++j) {
+ if (value == ESCAPE_BYTE) {
+ buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
+ }
+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
+ }
+ }
+ else {
+ if (length == ESCAPE_BYTE) {
+ if (value == ESCAPE_BYTE){
+ buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
+ }
+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
+ --length;
+ }
+ buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
+ buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
+ }
+ return buffer;
+}
+
+#define APPEND( buffer, bufLimit, value, num, status) UPRV_BLOCK_MACRO_BEGIN { \
+ if(buffer<bufLimit){ \
+ *buffer++=(value); \
+ }else{ \
+ *status = U_BUFFER_OVERFLOW_ERROR; \
+ } \
+ num++; \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFFFF.
+ */
+static uint16_t*
+encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
+ int32_t num=0;
+ if (length < 4) {
+ int j=0;
+ for (; j<length; ++j) {
+ if (value == (int32_t) ESCAPE){
+ APPEND(buffer,bufLimit,ESCAPE, num, status);
+
+ }
+ APPEND(buffer,bufLimit,value,num, status);
+ }
+ }
+ else {
+ if (length == (int32_t) ESCAPE) {
+ if (value == (int32_t) ESCAPE){
+ APPEND(buffer,bufLimit,ESCAPE,num,status);
+
+ }
+ APPEND(buffer,bufLimit,value,num,status);
+ --length;
+ }
+ APPEND(buffer,bufLimit,ESCAPE,num,status);
+ APPEND(buffer,bufLimit,(uint16_t) length, num,status);
+ APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
+ }
+ return buffer;
+}
+
+/**
+ * Construct a string representing a char array. Use run-length encoding.
+ * A character represents itself, unless it is the ESCAPE character. Then
+ * the following notations are possible:
+ * ESCAPE ESCAPE ESCAPE literal
+ * ESCAPE n c n instances of character c
+ * Since an encoded run occupies 3 characters, we only encode runs of 4 or
+ * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
+ * If we encounter a run where n == ESCAPE, we represent this as:
+ * c ESCAPE n-1 c
+ * The ESCAPE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+int32_t
+usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
+ uint16_t* bufLimit = buffer+bufLen;
+ uint16_t* saveBuffer = buffer;
+ if(buffer < bufLimit){
+ *buffer++ = (uint16_t)(srcLen>>16);
+ if(buffer<bufLimit){
+ uint16_t runValue = src[0];
+ int32_t runLength = 1;
+ int i=1;
+ *buffer++ = (uint16_t) srcLen;
+
+ for (; i<srcLen; ++i) {
+ uint16_t s = src[i];
+ if (s == runValue && runLength < 0xFFFF){
+ ++runLength;
+ }else {
+ buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
+ runValue = s;
+ runLength = 1;
+ }
+ }
+ buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ return (int32_t)(buffer - saveBuffer);
+}
+
+/**
+ * Construct a string representing a byte array. Use run-length encoding.
+ * Two bytes are packed into a single char, with a single extra zero byte at
+ * the end if needed. A byte represents itself, unless it is the
+ * ESCAPE_BYTE. Then the following notations are possible:
+ * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
+ * ESCAPE_BYTE n b n instances of byte b
+ * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
+ * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
+ * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
+ * b ESCAPE_BYTE n-1 b
+ * The ESCAPE_BYTE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+int32_t
+byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
+ const uint16_t* saveBuf = buffer;
+ uint16_t* bufLimit = buffer+bufLen;
+ if(buffer < bufLimit){
+ *buffer++ = ((uint16_t) (srcLen >> 16));
+
+ if(buffer<bufLimit){
+ uint8_t runValue = src[0];
+ int runLength = 1;
+ uint8_t state[2]= {0};
+ int i=1;
+ *buffer++=((uint16_t) srcLen);
+ for (; i<srcLen; ++i) {
+ uint8_t b = src[i];
+ if (b == runValue && runLength < 0xFF){
+ ++runLength;
+ }
+ else {
+ buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
+ runValue = b;
+ runLength = 1;
+ }
+ }
+ buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
+
+ /* We must save the final byte, if there is one, by padding
+ * an extra zero.
+ */
+ if (state[0] != 0) {
+ buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
+ }
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ return (int32_t) (buffer - saveBuf);
+}
+
+
+/**
+ * Construct an array of shorts from a run-length encoded string.
+ */
+int32_t
+rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
+ int32_t length = 0;
+ int32_t ai = 0;
+ int i=2;
+
+ if(!status || U_FAILURE(*status)){
+ return 0;
+ }
+ /* the source is null terminated */
+ if(srcLen == -1){
+ srcLen = u_strlen(src);
+ }
+ if(srcLen <= 2){
+ return 2;
+ }
+ length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
+
+ if(target == NULL){
+ return length;
+ }
+ if(tgtLen < length){
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return length;
+ }
+
+ for (; i<srcLen; ++i) {
+ uint16_t c = src[i];
+ if (c == ESCAPE) {
+ c = src[++i];
+ if (c == ESCAPE) {
+ target[ai++] = c;
+ } else {
+ int32_t runLength = (int32_t) c;
+ uint16_t runValue = src[++i];
+ int j=0;
+ for (; j<runLength; ++j) {
+ target[ai++] = runValue;
+ }
+ }
+ }
+ else {
+ target[ai++] = c;
+ }
+ }
+
+ if (ai != length){
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ }
+
+ return length;
+}
+
+/**
+ * Construct an array of bytes from a run-length encoded string.
+ */
+int32_t
+rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
+
+ int32_t length = 0;
+ UBool nextChar = TRUE;
+ uint16_t c = 0;
+ int32_t node = 0;
+ int32_t runLength = 0;
+ int32_t i = 2;
+ int32_t ai=0;
+
+ if(!status || U_FAILURE(*status)){
+ return 0;
+ }
+ /* the source is null terminated */
+ if(srcLen == -1){
+ srcLen = u_strlen(src);
+ }
+ if(srcLen <= 2){
+ return 2;
+ }
+ length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
+
+ if(target == NULL){
+ return length;
+ }
+ if(tgtLen < length){
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return length;
+ }
+
+ for (; ai<tgtLen; ) {
+ /* This part of the loop places the next byte into the local
+ * variable 'b' each time through the loop. It keeps the
+ * current character in 'c' and uses the boolean 'nextChar'
+ * to see if we've taken both bytes out of 'c' yet.
+ */
+ uint8_t b;
+ if (nextChar) {
+ c = src[i++];
+ b = (uint8_t) (c >> 8);
+ nextChar = FALSE;
+ }
+ else {
+ b = (uint8_t) (c & 0xFF);
+ nextChar = TRUE;
+ }
+
+ /* This part of the loop is a tiny state machine which handles
+ * the parsing of the run-length encoding. This would be simpler
+ * if we could look ahead, but we can't, so we use 'node' to
+ * move between three nodes in the state machine.
+ */
+ switch (node) {
+ case 0:
+ /* Normal idle node */
+ if (b == ESCAPE_BYTE) {
+ node = 1;
+ }
+ else {
+ target[ai++] = b;
+ }
+ break;
+ case 1:
+ /* We have seen one ESCAPE_BYTE; we expect either a second
+ * one, or a run length and value.
+ */
+ if (b == ESCAPE_BYTE) {
+ target[ai++] = ESCAPE_BYTE;
+ node = 0;
+ }
+ else {
+ runLength = b;
+ node = 2;
+ }
+ break;
+ case 2:
+ {
+ int j=0;
+ /* We have seen an ESCAPE_BYTE and length byte. We interpret
+ * the next byte as the value to be repeated.
+ */
+ for (; j<runLength; ++j){
+ if(ai<tgtLen){
+ target[ai++] = b;
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return ai;
+ }
+ }
+ node = 0;
+ break;
+ }
+ }
+ }
+
+ if (node != 0){
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ /*("Bad run-length encoded byte array")*/
+ return 0;
+ }
+
+
+ if (i != srcLen){
+ /*("Excess data in RLE byte array string");*/
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return ai;
+ }
+
+ return ai;
+}
+