summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/io/ustdio.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/io/ustdio.cpp')
-rw-r--r--intl/icu/source/io/ustdio.cpp732
1 files changed, 732 insertions, 0 deletions
diff --git a/intl/icu/source/io/ustdio.cpp b/intl/icu/source/io/ustdio.cpp
new file mode 100644
index 0000000000..4130f34044
--- /dev/null
+++ b/intl/icu/source/io/ustdio.cpp
@@ -0,0 +1,732 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ******************************************************************************
+ *
+ * Copyright (C) 1998-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ ******************************************************************************
+ *
+ * File ustdio.c
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 11/18/98 stephen Creation.
+ * 03/12/99 stephen Modified for new C API.
+ * 07/19/99 stephen Fixed read() and gets()
+ ******************************************************************************
+ */
+
+#include "unicode/ustdio.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ufile.h"
+#include "ufmt_cmn.h"
+#include "unicode/ucnv.h"
+#include "unicode/ustring.h"
+
+#include <string.h>
+
+#define DELIM_LF 0x000A
+#define DELIM_VT 0x000B
+#define DELIM_FF 0x000C
+#define DELIM_CR 0x000D
+#define DELIM_NEL 0x0085
+#define DELIM_LS 0x2028
+#define DELIM_PS 0x2029
+
+/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
+#if U_PLATFORM_USES_ONLY_WIN32_API
+static const char16_t DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
+static const uint32_t DELIMITERS_LEN = 2;
+/* TODO: Default newline writing should be detected based upon the converter being used. */
+#else
+static const char16_t DELIMITERS [] = { DELIM_LF, 0x0000 };
+static const uint32_t DELIMITERS_LEN = 1;
+#endif
+
+#define IS_FIRST_STRING_DELIMITER(c1) \
+ (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
+ || (c1) == DELIM_NEL \
+ || (c1) == DELIM_LS \
+ || (c1) == DELIM_PS)
+#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
+#define IS_COMBINED_STRING_DELIMITER(c1, c2) \
+ (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
+
+
+#if !UCONFIG_NO_TRANSLITERATION
+
+U_CAPI UTransliterator* U_EXPORT2
+u_fsettransliterator(UFILE *file, UFileDirection direction,
+ UTransliterator *adopt, UErrorCode *status)
+{
+ UTransliterator *old = nullptr;
+
+ if(U_FAILURE(*status))
+ {
+ return adopt;
+ }
+
+ if(!file)
+ {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return adopt;
+ }
+
+ if(direction & U_READ)
+ {
+ /** TODO: implement */
+ *status = U_UNSUPPORTED_ERROR;
+ return adopt;
+ }
+
+ if(adopt == nullptr) /* they are clearing it */
+ {
+ if(file->fTranslit != nullptr)
+ {
+ /* TODO: Check side */
+ old = file->fTranslit->translit;
+ uprv_free(file->fTranslit->buffer);
+ file->fTranslit->buffer=nullptr;
+ uprv_free(file->fTranslit);
+ file->fTranslit=nullptr;
+ }
+ }
+ else
+ {
+ if(file->fTranslit == nullptr)
+ {
+ file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
+ if(!file->fTranslit)
+ {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return adopt;
+ }
+ file->fTranslit->capacity = 0;
+ file->fTranslit->length = 0;
+ file->fTranslit->pos = 0;
+ file->fTranslit->buffer = nullptr;
+ }
+ else
+ {
+ old = file->fTranslit->translit;
+ ufile_flush_translit(file);
+ }
+
+ file->fTranslit->translit = adopt;
+ }
+
+ return old;
+}
+
+static const char16_t * u_file_translit(UFILE *f, const char16_t *src, int32_t *count, UBool flush)
+{
+ int32_t newlen;
+ int32_t junkCount = 0;
+ int32_t textLength;
+ int32_t textLimit;
+ UTransPosition pos;
+ UErrorCode status = U_ZERO_ERROR;
+
+ if(count == nullptr)
+ {
+ count = &junkCount;
+ }
+
+ if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
+ {
+ /* fast path */
+ return src;
+ }
+
+ /* First: slide over everything */
+ if(f->fTranslit->length > f->fTranslit->pos)
+ {
+ memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
+ (f->fTranslit->length - f->fTranslit->pos)*sizeof(char16_t));
+ }
+ f->fTranslit->length -= f->fTranslit->pos; /* always */
+ f->fTranslit->pos = 0;
+
+ /* Calculate new buffer size needed */
+ newlen = (*count + f->fTranslit->length) * 4;
+
+ if(newlen > f->fTranslit->capacity)
+ {
+ if(f->fTranslit->buffer == nullptr)
+ {
+ f->fTranslit->buffer = (char16_t*)uprv_malloc(newlen * sizeof(char16_t));
+ }
+ else
+ {
+ f->fTranslit->buffer = (char16_t*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(char16_t));
+ }
+ /* Check for malloc/realloc failure. */
+ if (f->fTranslit->buffer == nullptr) {
+ return nullptr;
+ }
+ f->fTranslit->capacity = newlen;
+ }
+
+ /* Now, copy any data over */
+ u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
+ src,
+ *count);
+ f->fTranslit->length += *count;
+
+ /* Now, translit in place as much as we can */
+ if(flush == false)
+ {
+ textLength = f->fTranslit->length;
+ pos.contextStart = 0;
+ pos.contextLimit = textLength;
+ pos.start = 0;
+ pos.limit = textLength;
+
+ utrans_transIncrementalUChars(f->fTranslit->translit,
+ f->fTranslit->buffer, /* because we shifted */
+ &textLength,
+ f->fTranslit->capacity,
+ &pos,
+ &status);
+
+ /* now: start/limit point to the transliterated text */
+ /* Transliterated is [buffer..pos.start) */
+ *count = pos.start;
+ f->fTranslit->pos = pos.start;
+ f->fTranslit->length = pos.limit;
+
+ return f->fTranslit->buffer;
+ }
+ else
+ {
+ textLength = f->fTranslit->length;
+ textLimit = f->fTranslit->length;
+
+ utrans_transUChars(f->fTranslit->translit,
+ f->fTranslit->buffer,
+ &textLength,
+ f->fTranslit->capacity,
+ 0,
+ &textLimit,
+ &status);
+
+ /* out: converted len */
+ *count = textLimit;
+
+ /* Set pointers to 0 */
+ f->fTranslit->pos = 0;
+ f->fTranslit->length = 0;
+
+ return f->fTranslit->buffer;
+ }
+}
+
+#endif
+
+void
+ufile_flush_translit(UFILE *f)
+{
+#if !UCONFIG_NO_TRANSLITERATION
+ if((!f)||(!f->fTranslit))
+ return;
+#endif
+
+ u_file_write_flush(nullptr, 0, f, false, true);
+}
+
+
+void
+ufile_flush_io(UFILE *f)
+{
+ if((!f) || (!f->fFile)) {
+ return; /* skip if no file */
+ }
+
+ u_file_write_flush(nullptr, 0, f, true, false);
+}
+
+
+void
+ufile_close_translit(UFILE *f)
+{
+#if !UCONFIG_NO_TRANSLITERATION
+ if((!f)||(!f->fTranslit))
+ return;
+#endif
+
+ ufile_flush_translit(f);
+
+#if !UCONFIG_NO_TRANSLITERATION
+ if(f->fTranslit->translit)
+ utrans_close(f->fTranslit->translit);
+
+ if(f->fTranslit->buffer)
+ {
+ uprv_free(f->fTranslit->buffer);
+ }
+
+ uprv_free(f->fTranslit);
+ f->fTranslit = nullptr;
+#endif
+}
+
+
+/* Input/output */
+
+U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fputs(const char16_t *s,
+ UFILE *f)
+{
+ int32_t count = u_file_write(s, u_strlen(s), f);
+ count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
+ return count;
+}
+
+U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fputc(UChar32 uc,
+ UFILE *f)
+{
+ char16_t buf[2];
+ int32_t idx = 0;
+ UBool isError = false;
+
+ U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError);
+ if (isError) {
+ return U_EOF;
+ }
+ return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
+}
+
+
+U_CFUNC int32_t U_EXPORT2
+u_file_write_flush(const char16_t *chars,
+ int32_t count,
+ UFILE *f,
+ UBool flushIO,
+ UBool flushTranslit)
+{
+ /* Set up conversion parameters */
+ UErrorCode status = U_ZERO_ERROR;
+ const char16_t *mySource = chars;
+ const char16_t *mySourceBegin;
+ const char16_t *mySourceEnd;
+ char charBuffer[UFILE_CHARBUFFER_SIZE];
+ char *myTarget = charBuffer;
+ int32_t written = 0;
+ int32_t numConverted = 0;
+
+ if (count < 0) {
+ count = u_strlen(chars);
+ }
+
+#if !UCONFIG_NO_TRANSLITERATION
+ if((f->fTranslit) && (f->fTranslit->translit))
+ {
+ /* Do the transliteration */
+ mySource = u_file_translit(f, chars, &count, flushTranslit);
+ }
+#endif
+
+ /* Write to a string. */
+ if (!f->fFile) {
+ int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
+ if (flushIO && charsLeft > count) {
+ count++;
+ }
+ written = ufmt_min(count, charsLeft);
+ u_strncpy(f->str.fPos, mySource, written);
+ f->str.fPos += written;
+ return written;
+ }
+
+ mySourceEnd = mySource + count;
+
+ /* Perform the conversion in a loop */
+ do {
+ mySourceBegin = mySource; /* beginning location for this loop */
+ status = U_ZERO_ERROR;
+ if(f->fConverter != nullptr) { /* We have a valid converter */
+ ucnv_fromUnicode(f->fConverter,
+ &myTarget,
+ charBuffer + UFILE_CHARBUFFER_SIZE,
+ &mySource,
+ mySourceEnd,
+ nullptr,
+ flushIO,
+ &status);
+ } else { /*weiv: do the invariant conversion */
+ int32_t convertChars = (int32_t) (mySourceEnd - mySource);
+ if (convertChars > UFILE_CHARBUFFER_SIZE) {
+ convertChars = UFILE_CHARBUFFER_SIZE;
+ status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ u_UCharsToChars(mySource, myTarget, convertChars);
+ mySource += convertChars;
+ myTarget += convertChars;
+ }
+ numConverted = (int32_t)(myTarget - charBuffer);
+
+ if (numConverted > 0) {
+ /* write the converted bytes */
+ fwrite(charBuffer,
+ sizeof(char),
+ numConverted,
+ f->fFile);
+
+ written += (int32_t) (mySource - mySourceBegin);
+ }
+ myTarget = charBuffer;
+ }
+ while(status == U_BUFFER_OVERFLOW_ERROR);
+
+ /* return # of chars written */
+ return written;
+}
+
+U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_file_write( const char16_t *chars,
+ int32_t count,
+ UFILE *f)
+{
+ return u_file_write_flush(chars,count,f,false,false);
+}
+
+
+/* private function used for buffering input */
+void
+ufile_fill_uchar_buffer(UFILE *f)
+{
+ UErrorCode status;
+ const char *mySource;
+ const char *mySourceEnd;
+ char16_t *myTarget;
+ int32_t bufferSize;
+ int32_t maxCPBytes;
+ int32_t bytesRead;
+ int32_t availLength;
+ int32_t dataSize;
+ char charBuffer[UFILE_CHARBUFFER_SIZE];
+ u_localized_string *str;
+
+ if (f->fFile == nullptr) {
+ /* There is nothing to do. It's a string. */
+ return;
+ }
+
+ str = &f->str;
+ dataSize = (int32_t)(str->fLimit - str->fPos);
+ if (f->fFileno == 0 && dataSize > 0) {
+ /* Don't read from stdin too many times. There is still some data. */
+ return;
+ }
+
+ /* shift the buffer if it isn't empty */
+ if(dataSize != 0) {
+ u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */
+ }
+
+
+ /* record how much buffer space is available */
+ availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
+
+ /* Determine the # of codepage bytes needed to fill our char16_t buffer */
+ /* weiv: if converter is nullptr, we use invariant converter with charwidth = 1)*/
+ maxCPBytes = availLength / (f->fConverter!=nullptr?(2*ucnv_getMinCharSize(f->fConverter)):1);
+
+ /* Read in the data to convert */
+ if (f->fFileno == 0) {
+ /* Special case. Read from stdin one line at a time. */
+ char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
+ bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
+ }
+ else {
+ /* A normal file */
+ bytesRead = (int32_t)fread(charBuffer,
+ sizeof(char),
+ ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
+ f->fFile);
+ }
+
+ /* Set up conversion parameters */
+ status = U_ZERO_ERROR;
+ mySource = charBuffer;
+ mySourceEnd = charBuffer + bytesRead;
+ myTarget = f->fUCBuffer + dataSize;
+ bufferSize = UFILE_UCHARBUFFER_SIZE;
+
+ if(f->fConverter != nullptr) { /* We have a valid converter */
+ /* Perform the conversion */
+ ucnv_toUnicode(f->fConverter,
+ &myTarget,
+ f->fUCBuffer + bufferSize,
+ &mySource,
+ mySourceEnd,
+ nullptr,
+ (UBool)(feof(f->fFile) != 0),
+ &status);
+
+ } else { /*weiv: do the invariant conversion */
+ u_charsToUChars(mySource, myTarget, bytesRead);
+ myTarget += bytesRead;
+ }
+
+ /* update the pointers into our array */
+ str->fPos = str->fBuffer;
+ str->fLimit = myTarget;
+}
+
+U_CAPI char16_t* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fgets(char16_t *s,
+ int32_t n,
+ UFILE *f)
+{
+ int32_t dataSize;
+ int32_t count;
+ char16_t *alias;
+ const char16_t *limit;
+ char16_t *sItr;
+ char16_t currDelim = 0;
+ u_localized_string *str;
+
+ if (n <= 0) {
+ /* Caller screwed up. We need to write the null terminatior. */
+ return nullptr;
+ }
+
+ /* fill the buffer if needed */
+ str = &f->str;
+ if (str->fPos >= str->fLimit) {
+ ufile_fill_uchar_buffer(f);
+ }
+
+ /* subtract 1 from n to compensate for the terminator */
+ --n;
+
+ /* determine the amount of data in the buffer */
+ dataSize = (int32_t)(str->fLimit - str->fPos);
+
+ /* if 0 characters were left, return 0 */
+ if (dataSize == 0)
+ return nullptr;
+
+ /* otherwise, iteratively fill the buffer and copy */
+ count = 0;
+ sItr = s;
+ currDelim = 0;
+ while (dataSize > 0 && count < n) {
+ alias = str->fPos;
+
+ /* Find how much to copy */
+ if (dataSize < (n - count)) {
+ limit = str->fLimit;
+ }
+ else {
+ limit = alias + (n - count);
+ }
+
+ if (!currDelim) {
+ /* Copy UChars until we find the first occurrence of a delimiter character */
+ while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
+ count++;
+ *(sItr++) = *(alias++);
+ }
+ /* Preserve the newline */
+ if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
+ if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
+ currDelim = *alias;
+ }
+ else {
+ currDelim = 1; /* This isn't a newline, but it's used to say
+ that we should break later. We've checked all
+ possible newline combinations even across buffer
+ boundaries. */
+ }
+ count++;
+ *(sItr++) = *(alias++);
+ }
+ }
+ /* If we have a CRLF combination, preserve that too. */
+ if (alias < limit) {
+ if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
+ count++;
+ *(sItr++) = *(alias++);
+ }
+ currDelim = 1; /* This isn't a newline, but it's used to say
+ that we should break later. We've checked all
+ possible newline combinations even across buffer
+ boundaries. */
+ }
+
+ /* update the current buffer position */
+ str->fPos = alias;
+
+ /* if we found a delimiter */
+ if (currDelim == 1) {
+ /* break out */
+ break;
+ }
+
+ /* refill the buffer */
+ ufile_fill_uchar_buffer(f);
+
+ /* determine the amount of data in the buffer */
+ dataSize = (int32_t)(str->fLimit - str->fPos);
+ }
+
+ /* add the terminator and return s */
+ *sItr = 0x0000;
+ return s;
+}
+
+U_CFUNC UBool U_EXPORT2
+ufile_getch(UFILE *f, char16_t *ch)
+{
+ UBool isValidChar = false;
+
+ *ch = U_EOF;
+ /* if we have an available character in the buffer, return it */
+ if(f->str.fPos < f->str.fLimit){
+ *ch = *(f->str.fPos)++;
+ isValidChar = true;
+ }
+ else {
+ /* otherwise, fill the buffer and return the next character */
+ if(f->str.fPos >= f->str.fLimit) {
+ ufile_fill_uchar_buffer(f);
+ }
+ if(f->str.fPos < f->str.fLimit) {
+ *ch = *(f->str.fPos)++;
+ isValidChar = true;
+ }
+ }
+ return isValidChar;
+}
+
+U_CAPI char16_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fgetc(UFILE *f)
+{
+ char16_t ch;
+ ufile_getch(f, &ch);
+ return ch;
+}
+
+U_CFUNC UBool U_EXPORT2
+ufile_getch32(UFILE *f, UChar32 *c32)
+{
+ UBool isValidChar = false;
+ u_localized_string *str;
+
+ *c32 = U_EOF;
+
+ /* Fill the buffer if it is empty */
+ str = &f->str;
+ if (str->fPos + 1 >= str->fLimit) {
+ ufile_fill_uchar_buffer(f);
+ }
+
+ /* Get the next character in the buffer */
+ if (str->fPos < str->fLimit) {
+ *c32 = *(str->fPos)++;
+ if (U_IS_LEAD(*c32)) {
+ if (str->fPos < str->fLimit) {
+ char16_t c16 = *(str->fPos)++;
+ *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
+ isValidChar = true;
+ }
+ else {
+ *c32 = U_EOF;
+ }
+ }
+ else {
+ isValidChar = true;
+ }
+ }
+
+ return isValidChar;
+}
+
+U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fgetcx(UFILE *f)
+{
+ UChar32 ch;
+ ufile_getch32(f, &ch);
+ return ch;
+}
+
+U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_fungetc(UChar32 ch,
+ UFILE *f)
+{
+ u_localized_string *str;
+
+ str = &f->str;
+
+ /* if we're at the beginning of the buffer, sorry! */
+ if (str->fPos == str->fBuffer
+ || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
+ {
+ ch = U_EOF;
+ }
+ else {
+ /* otherwise, put the character back */
+ /* Remember, read them back on in the reverse order. */
+ if (U_IS_LEAD(ch)) {
+ if (*--(str->fPos) != U16_TRAIL(ch)
+ || *--(str->fPos) != U16_LEAD(ch))
+ {
+ ch = U_EOF;
+ }
+ }
+ else if (*--(str->fPos) != ch) {
+ ch = U_EOF;
+ }
+ }
+ return ch;
+}
+
+U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
+u_file_read( char16_t *chars,
+ int32_t count,
+ UFILE *f)
+{
+ int32_t dataSize;
+ int32_t read = 0;
+ u_localized_string *str = &f->str;
+
+ do {
+
+ /* determine the amount of data in the buffer */
+ dataSize = (int32_t)(str->fLimit - str->fPos);
+ if (dataSize <= 0) {
+ /* fill the buffer */
+ ufile_fill_uchar_buffer(f);
+ dataSize = (int32_t)(str->fLimit - str->fPos);
+ }
+
+ /* Make sure that we don't read too much */
+ if (dataSize > (count - read)) {
+ dataSize = count - read;
+ }
+
+ /* copy the current data in the buffer */
+ memcpy(chars + read, str->fPos, dataSize * sizeof(char16_t));
+
+ /* update number of items read */
+ read += dataSize;
+
+ /* update the current buffer position */
+ str->fPos += dataSize;
+ }
+ while (dataSize != 0 && read < count);
+
+ return read;
+}
+#endif