1 files changed, 732 insertions, 0 deletions
diff --git a/intl/icu/source/io/ustdio.cpp b/intl/icu/source/io/ustdio.cpp
new file mode 100644
index 0000000000..4130f34044
--- /dev/null
+++ b/intl/icu/source/io/ustdio.cpp
@@ -0,0 +1,732 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ******************************************************************************
+ *
+ *   Copyright (C) 1998-2016, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ ******************************************************************************
+ *
+ * File ustdio.c
+ *
+ * Modification History:
+ *
+ *   Date        Name        Description
+ *   11/18/98    stephen     Creation.
+ *   03/12/99    stephen     Modified for new C API.
+ *   07/19/99    stephen     Fixed read() and gets()
+ ******************************************************************************
+ */
+
+#include "unicode/ustdio.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ufile.h"
+#include "ufmt_cmn.h"
+#include "unicode/ucnv.h"
+#include "unicode/ustring.h"
+
+#include <string.h>
+
+#define DELIM_LF 0x000A
+#define DELIM_VT 0x000B
+#define DELIM_FF 0x000C
+#define DELIM_CR 0x000D
+#define DELIM_NEL 0x0085
+#define DELIM_LS 0x2028
+#define DELIM_PS 0x2029
+
+/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
+#if U_PLATFORM_USES_ONLY_WIN32_API
+static const char16_t DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
+static const uint32_t DELIMITERS_LEN = 2;
+/* TODO: Default newline writing should be detected based upon the converter being used. */
+#else
+static const char16_t DELIMITERS [] = { DELIM_LF, 0x0000 };
+static const uint32_t DELIMITERS_LEN = 1;
+#endif
+
+#define IS_FIRST_STRING_DELIMITER(c1) \
+ (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
+        || (c1) == DELIM_NEL \
+        || (c1) == DELIM_LS \
+        || (c1) == DELIM_PS)
+#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
+#define IS_COMBINED_STRING_DELIMITER(c1, c2) \
+ (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
+
+
+#if !UCONFIG_NO_TRANSLITERATION
+
+U_CAPI UTransliterator* U_EXPORT2
+u_fsettransliterator(UFILE *file, UFileDirection direction,
+                     UTransliterator *adopt, UErrorCode *status)
+{
+    UTransliterator *old = nullptr;
+
+    if(U_FAILURE(*status))
+    {
+        return adopt;
+    }
+
+    if(!file)
+    {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return adopt;
+    }
+
+    if(direction & U_READ)
+    {
+        /** TODO: implement */
+        *status = U_UNSUPPORTED_ERROR;
+        return adopt;
+    }
+
+    if(adopt == nullptr) /* they are clearing it */
+    {
+        if(file->fTranslit != nullptr)
+        {
+            /* TODO: Check side */
+            old = file->fTranslit->translit;
+            uprv_free(file->fTranslit->buffer);
+            file->fTranslit->buffer=nullptr;
+            uprv_free(file->fTranslit);
+            file->fTranslit=nullptr;
+        }
+    }
+    else
+    {
+        if(file->fTranslit == nullptr)
+        {
+            file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
+            if(!file->fTranslit)
+            {
+                *status = U_MEMORY_ALLOCATION_ERROR;
+                return adopt;
+            }
+            file->fTranslit->capacity = 0;
+            file->fTranslit->length = 0;
+            file->fTranslit->pos = 0;
+            file->fTranslit->buffer = nullptr;
+        }
+        else
+        {
+            old = file->fTranslit->translit;
+            ufile_flush_translit(file);
+        }
+
+        file->fTranslit->translit = adopt;
+    }
+
+    return old;
+}
+
+static const char16_t * u_file_translit(UFILE *f, const char16_t *src, int32_t *count, UBool flush)
+{
+    int32_t newlen;
+    int32_t junkCount = 0;
+    int32_t textLength;
+    int32_t textLimit;
+    UTransPosition pos;
+    UErrorCode status = U_ZERO_ERROR;
+
+    if(count == nullptr)
+    {
+        count = &junkCount;
+    }
+
+    if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
+    {
+        /* fast path */
+        return src;
+    }
+
+    /* First: slide over everything */
+    if(f->fTranslit->length > f->fTranslit->pos)
+    {
+        memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
+            (f->fTranslit->length - f->fTranslit->pos)*sizeof(char16_t));
+    }
+    f->fTranslit->length -= f->fTranslit->pos; /* always */
+    f->fTranslit->pos = 0;
+
+    /* Calculate new buffer size needed */
+    newlen = (*count + f->fTranslit->length) * 4;
+
+    if(newlen > f->fTranslit->capacity)
+    {
+        if(f->fTranslit->buffer == nullptr)
+        {
+            f->fTranslit->buffer = (char16_t*)uprv_malloc(newlen * sizeof(char16_t));
+        }
+        else
+        {
+            f->fTranslit->buffer = (char16_t*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(char16_t));
+        }
+        /* Check for malloc/realloc failure. */
+        if (f->fTranslit->buffer == nullptr) {
+        	return nullptr;
+        }
+        f->fTranslit->capacity = newlen;
+    }
+
+    /* Now, copy any data over */
+    u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
+        src,
+        *count);
+    f->fTranslit->length += *count;
+
+    /* Now, translit in place as much as we can  */
+    if(flush == false)
+    {
+        textLength = f->fTranslit->length;
+        pos.contextStart = 0;
+        pos.contextLimit = textLength;
+        pos.start        = 0;
+        pos.limit        = textLength;
+
+        utrans_transIncrementalUChars(f->fTranslit->translit,
+            f->fTranslit->buffer, /* because we shifted */
+            &textLength,
+            f->fTranslit->capacity,
+            &pos,
+            &status);
+
+        /* now: start/limit point to the transliterated text */
+        /* Transliterated is [buffer..pos.start) */
+        *count            = pos.start;
+        f->fTranslit->pos = pos.start;
+        f->fTranslit->length = pos.limit;
+
+        return f->fTranslit->buffer;
+    }
+    else
+    {
+        textLength = f->fTranslit->length;
+        textLimit = f->fTranslit->length;
+
+        utrans_transUChars(f->fTranslit->translit,
+            f->fTranslit->buffer,
+            &textLength,
+            f->fTranslit->capacity,
+            0,
+            &textLimit,
+            &status);
+
+        /* out: converted len */
+        *count = textLimit;
+
+        /* Set pointers to 0 */
+        f->fTranslit->pos = 0;
+        f->fTranslit->length = 0;
+
+        return f->fTranslit->buffer;
+    }
+}
+
+#endif
+
+void
+ufile_flush_translit(UFILE *f)
+{
+#if !UCONFIG_NO_TRANSLITERATION
+    if((!f)||(!f->fTranslit))
+        return;
+#endif
+
+    u_file_write_flush(nullptr, 0, f, false, true);
+}
+
+
+void
+ufile_flush_io(UFILE *f)
+{
+  if((!f) || (!f->fFile)) {
+    return; /* skip if no file */
+  }
+
+  u_file_write_flush(nullptr, 0, f, true, false);
+}
+
+
+void
+ufile_close_translit(UFILE *f)
+{
+#if !UCONFIG_NO_TRANSLITERATION
+    if((!f)||(!f->fTranslit))
+        return;
+#endif
+
+    ufile_flush_translit(f);
+
+#if !UCONFIG_NO_TRANSLITERATION
+    if(f->fTranslit->translit)
+        utrans_close(f->fTranslit->translit);
+
+    if(f->fTranslit->buffer)
+    {
+        uprv_free(f->fTranslit->buffer);
+    }
+
+    uprv_free(f->fTranslit);
+    f->fTranslit = nullptr;
+#endif
+}
+
+
+/* Input/output */
+
+U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fputs(const char16_t *s,
+        UFILE        *f)
+{
+    int32_t count = u_file_write(s, u_strlen(s), f);
+    count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
+    return count;
+}
+
+U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fputc(UChar32      uc,
+        UFILE        *f)
+{
+    char16_t buf[2];
+    int32_t idx = 0;
+    UBool isError = false;
+
+    U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError);
+    if (isError) {
+        return U_EOF;
+    }
+    return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
+}
+
+
+U_CFUNC int32_t U_EXPORT2
+u_file_write_flush(const char16_t *chars,
+                   int32_t     count,
+                   UFILE       *f,
+                   UBool       flushIO,
+                   UBool       flushTranslit)
+{
+    /* Set up conversion parameters */
+    UErrorCode  status       = U_ZERO_ERROR;
+    const char16_t *mySource    = chars;
+    const char16_t *mySourceBegin;
+    const char16_t *mySourceEnd;
+    char        charBuffer[UFILE_CHARBUFFER_SIZE];
+    char        *myTarget   = charBuffer;
+    int32_t     written      = 0;
+    int32_t     numConverted = 0;
+
+    if (count < 0) {
+        count = u_strlen(chars);
+    }
+
+#if !UCONFIG_NO_TRANSLITERATION
+    if((f->fTranslit) && (f->fTranslit->translit))
+    {
+        /* Do the transliteration */
+        mySource = u_file_translit(f, chars, &count, flushTranslit);
+    }
+#endif
+
+    /* Write to a string. */
+    if (!f->fFile) {
+        int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
+        if (flushIO && charsLeft > count) {
+            count++;
+        }
+        written = ufmt_min(count, charsLeft);
+        u_strncpy(f->str.fPos, mySource, written);
+        f->str.fPos += written;
+        return written;
+    }
+
+    mySourceEnd = mySource + count;
+
+    /* Perform the conversion in a loop */
+    do {
+        mySourceBegin = mySource; /* beginning location for this loop */
+        status     = U_ZERO_ERROR;
+        if(f->fConverter != nullptr) { /* We have a valid converter */
+            ucnv_fromUnicode(f->fConverter,
+                &myTarget,
+                charBuffer + UFILE_CHARBUFFER_SIZE,
+                &mySource,
+                mySourceEnd,
+                nullptr,
+                flushIO,
+                &status);
+        } else { /*weiv: do the invariant conversion */
+            int32_t convertChars = (int32_t) (mySourceEnd - mySource); 
+            if (convertChars > UFILE_CHARBUFFER_SIZE) { 
+                convertChars = UFILE_CHARBUFFER_SIZE; 
+                status = U_BUFFER_OVERFLOW_ERROR; 
+            } 
+            u_UCharsToChars(mySource, myTarget, convertChars); 
+            mySource += convertChars; 
+            myTarget += convertChars; 
+        }
+        numConverted = (int32_t)(myTarget - charBuffer);
+
+        if (numConverted > 0) {
+            /* write the converted bytes */
+            fwrite(charBuffer,
+                sizeof(char),
+                numConverted,
+                f->fFile);
+
+            written     += (int32_t) (mySource - mySourceBegin);
+        }
+        myTarget     = charBuffer;
+    }
+    while(status == U_BUFFER_OVERFLOW_ERROR);
+
+    /* return # of chars written */
+    return written;
+}
+
+U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_file_write(    const char16_t  *chars,
+             int32_t        count,
+             UFILE         *f)
+{
+    return u_file_write_flush(chars,count,f,false,false);
+}
+
+
+/* private function used for buffering input */
+void
+ufile_fill_uchar_buffer(UFILE *f)
+{
+    UErrorCode  status;
+    const char  *mySource;
+    const char  *mySourceEnd;
+    char16_t    *myTarget;
+    int32_t     bufferSize;
+    int32_t     maxCPBytes;
+    int32_t     bytesRead;
+    int32_t     availLength;
+    int32_t     dataSize;
+    char        charBuffer[UFILE_CHARBUFFER_SIZE];
+    u_localized_string *str;
+
+    if (f->fFile == nullptr) {
+        /* There is nothing to do. It's a string. */
+        return;
+    }
+
+    str = &f->str;
+    dataSize = (int32_t)(str->fLimit - str->fPos);
+    if (f->fFileno == 0 && dataSize > 0) {
+        /* Don't read from stdin too many times. There is still some data. */
+        return;
+    }
+
+    /* shift the buffer if it isn't empty */
+    if(dataSize != 0) {
+        u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */
+    }
+
+
+    /* record how much buffer space is available */
+    availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
+
+    /* Determine the # of codepage bytes needed to fill our char16_t buffer */
+    /* weiv: if converter is nullptr, we use invariant converter with charwidth = 1)*/
+    maxCPBytes = availLength / (f->fConverter!=nullptr?(2*ucnv_getMinCharSize(f->fConverter)):1);
+
+    /* Read in the data to convert */
+    if (f->fFileno == 0) {
+        /* Special case. Read from stdin one line at a time. */
+        char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
+        bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
+    }
+    else {
+        /* A normal file */
+        bytesRead = (int32_t)fread(charBuffer,
+            sizeof(char),
+            ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
+            f->fFile);
+    }
+
+    /* Set up conversion parameters */
+    status      = U_ZERO_ERROR;
+    mySource    = charBuffer;
+    mySourceEnd = charBuffer + bytesRead;
+    myTarget    = f->fUCBuffer + dataSize;
+    bufferSize  = UFILE_UCHARBUFFER_SIZE;
+
+    if(f->fConverter != nullptr) { /* We have a valid converter */
+        /* Perform the conversion */
+        ucnv_toUnicode(f->fConverter,
+            &myTarget,
+            f->fUCBuffer + bufferSize,
+            &mySource,
+            mySourceEnd,
+            nullptr,
+            (UBool)(feof(f->fFile) != 0),
+            &status);
+
+    } else { /*weiv: do the invariant conversion */
+        u_charsToUChars(mySource, myTarget, bytesRead);
+        myTarget += bytesRead;
+    }
+
+    /* update the pointers into our array */
+    str->fPos    = str->fBuffer;
+    str->fLimit  = myTarget;
+}
+
+U_CAPI char16_t* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fgets(char16_t     *s,
+        int32_t       n,
+        UFILE        *f)
+{
+    int32_t dataSize;
+    int32_t count;
+    char16_t *alias;
+    const char16_t *limit;
+    char16_t *sItr;
+    char16_t currDelim = 0;
+    u_localized_string *str;
+
+    if (n <= 0) {
+        /* Caller screwed up. We need to write the null terminatior. */
+        return nullptr;
+    }
+
+    /* fill the buffer if needed */
+    str = &f->str;
+    if (str->fPos >= str->fLimit) {
+        ufile_fill_uchar_buffer(f);
+    }
+
+    /* subtract 1 from n to compensate for the terminator */
+    --n;
+
+    /* determine the amount of data in the buffer */
+    dataSize = (int32_t)(str->fLimit - str->fPos);
+
+    /* if 0 characters were left, return 0 */
+    if (dataSize == 0)
+        return nullptr;
+
+    /* otherwise, iteratively fill the buffer and copy */
+    count = 0;
+    sItr = s;
+    currDelim = 0;
+    while (dataSize > 0 && count < n) {
+        alias = str->fPos;
+
+        /* Find how much to copy */
+        if (dataSize < (n - count)) {
+            limit = str->fLimit;
+        }
+        else {
+            limit = alias + (n - count);
+        }
+
+        if (!currDelim) {
+            /* Copy UChars until we find the first occurrence of a delimiter character */
+            while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
+                count++;
+                *(sItr++) = *(alias++);
+            }
+            /* Preserve the newline */
+            if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
+                if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
+                    currDelim = *alias;
+                }
+                else {
+                    currDelim = 1;  /* This isn't a newline, but it's used to say
+                                    that we should break later. We've checked all
+                                    possible newline combinations even across buffer
+                                    boundaries. */
+                }
+                count++;
+                *(sItr++) = *(alias++);
+            }
+        }
+        /* If we have a CRLF combination, preserve that too. */
+        if (alias < limit) {
+            if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
+                count++;
+                *(sItr++) = *(alias++);
+            }
+            currDelim = 1;  /* This isn't a newline, but it's used to say
+                            that we should break later. We've checked all
+                            possible newline combinations even across buffer
+                            boundaries. */
+        }
+
+        /* update the current buffer position */
+        str->fPos = alias;
+
+        /* if we found a delimiter */
+        if (currDelim == 1) {
+            /* break out */
+            break;
+        }
+
+        /* refill the buffer */
+        ufile_fill_uchar_buffer(f);
+
+        /* determine the amount of data in the buffer */
+        dataSize = (int32_t)(str->fLimit - str->fPos);
+    }
+
+    /* add the terminator and return s */
+    *sItr = 0x0000;
+    return s;
+}
+
+U_CFUNC UBool U_EXPORT2
+ufile_getch(UFILE *f, char16_t *ch)
+{
+    UBool isValidChar = false;
+
+    *ch = U_EOF;
+    /* if we have an available character in the buffer, return it */
+    if(f->str.fPos < f->str.fLimit){
+        *ch = *(f->str.fPos)++;
+        isValidChar = true;
+    }
+    else {
+        /* otherwise, fill the buffer and return the next character */
+        if(f->str.fPos >= f->str.fLimit) {
+            ufile_fill_uchar_buffer(f);
+        }
+        if(f->str.fPos < f->str.fLimit) {
+            *ch = *(f->str.fPos)++;
+            isValidChar = true;
+        }
+    }
+    return isValidChar;
+}
+
+U_CAPI char16_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fgetc(UFILE        *f)
+{
+    char16_t ch;
+    ufile_getch(f, &ch);
+    return ch;
+}
+
+U_CFUNC UBool U_EXPORT2
+ufile_getch32(UFILE *f, UChar32 *c32)
+{
+    UBool isValidChar = false;
+    u_localized_string *str;
+
+    *c32 = U_EOF;
+
+    /* Fill the buffer if it is empty */
+    str = &f->str;
+    if (str->fPos + 1 >= str->fLimit) {
+        ufile_fill_uchar_buffer(f);
+    }
+
+    /* Get the next character in the buffer */
+    if (str->fPos < str->fLimit) {
+        *c32 = *(str->fPos)++;
+        if (U_IS_LEAD(*c32)) {
+            if (str->fPos < str->fLimit) {
+                char16_t c16 = *(str->fPos)++;
+                *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
+                isValidChar = true;
+            }
+            else {
+                *c32 = U_EOF;
+            }
+        }
+        else {
+            isValidChar = true;
+        }
+    }
+
+    return isValidChar;
+}
+
+U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fgetcx(UFILE        *f)
+{
+    UChar32 ch;
+    ufile_getch32(f, &ch);
+    return ch;
+}
+
+U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fungetc(UChar32        ch,
+    UFILE        *f)
+{
+    u_localized_string *str;
+
+    str = &f->str;
+
+    /* if we're at the beginning of the buffer, sorry! */
+    if (str->fPos == str->fBuffer
+        || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
+    {
+        ch = U_EOF;
+    }
+    else {
+        /* otherwise, put the character back */
+        /* Remember, read them back on in the reverse order. */
+        if (U_IS_LEAD(ch)) {
+            if (*--(str->fPos) != U16_TRAIL(ch)
+                || *--(str->fPos) != U16_LEAD(ch))
+            {
+                ch = U_EOF;
+            }
+        }
+        else if (*--(str->fPos) != ch) {
+            ch = U_EOF;
+        }
+    }
+    return ch;
+}
+
+U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_file_read(    char16_t     *chars,
+    int32_t        count,
+    UFILE         *f)
+{
+    int32_t dataSize;
+    int32_t read = 0;
+    u_localized_string *str = &f->str;
+
+    do {
+
+        /* determine the amount of data in the buffer */
+        dataSize = (int32_t)(str->fLimit - str->fPos);
+        if (dataSize <= 0) {
+            /* fill the buffer */
+            ufile_fill_uchar_buffer(f);
+            dataSize = (int32_t)(str->fLimit - str->fPos);
+        }
+
+        /* Make sure that we don't read too much */
+        if (dataSize > (count - read)) {
+            dataSize = count - read;
+        }
+
+        /* copy the current data in the buffer */
+        memcpy(chars + read, str->fPos, dataSize * sizeof(char16_t));
+
+        /* update number of items read */
+        read += dataSize;
+
+        /* update the current buffer position */
+        str->fPos += dataSize;
+    }
+    while (dataSize != 0 && read < count);
+
+    return read;
+}
+#endif