diff options
Diffstat (limited to 'intl/icu/source/io/ustdio.cpp')
-rw-r--r-- | intl/icu/source/io/ustdio.cpp | 732 |
1 files changed, 732 insertions, 0 deletions
diff --git a/intl/icu/source/io/ustdio.cpp b/intl/icu/source/io/ustdio.cpp new file mode 100644 index 0000000000..4130f34044 --- /dev/null +++ b/intl/icu/source/io/ustdio.cpp @@ -0,0 +1,732 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ****************************************************************************** + * + * Copyright (C) 1998-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** + * + * File ustdio.c + * + * Modification History: + * + * Date Name Description + * 11/18/98 stephen Creation. + * 03/12/99 stephen Modified for new C API. + * 07/19/99 stephen Fixed read() and gets() + ****************************************************************************** + */ + +#include "unicode/ustdio.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" +#include "ufile.h" +#include "ufmt_cmn.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" + +#include <string.h> + +#define DELIM_LF 0x000A +#define DELIM_VT 0x000B +#define DELIM_FF 0x000C +#define DELIM_CR 0x000D +#define DELIM_NEL 0x0085 +#define DELIM_LS 0x2028 +#define DELIM_PS 0x2029 + +/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ +#if U_PLATFORM_USES_ONLY_WIN32_API +static const char16_t DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; +static const uint32_t DELIMITERS_LEN = 2; +/* TODO: Default newline writing should be detected based upon the converter being used. */ +#else +static const char16_t DELIMITERS [] = { DELIM_LF, 0x0000 }; +static const uint32_t DELIMITERS_LEN = 1; +#endif + +#define IS_FIRST_STRING_DELIMITER(c1) \ + (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ + || (c1) == DELIM_NEL \ + || (c1) == DELIM_LS \ + || (c1) == DELIM_PS) +#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) +#define IS_COMBINED_STRING_DELIMITER(c1, c2) \ + (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) + + +#if !UCONFIG_NO_TRANSLITERATION + +U_CAPI UTransliterator* U_EXPORT2 +u_fsettransliterator(UFILE *file, UFileDirection direction, + UTransliterator *adopt, UErrorCode *status) +{ + UTransliterator *old = nullptr; + + if(U_FAILURE(*status)) + { + return adopt; + } + + if(!file) + { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return adopt; + } + + if(direction & U_READ) + { + /** TODO: implement */ + *status = U_UNSUPPORTED_ERROR; + return adopt; + } + + if(adopt == nullptr) /* they are clearing it */ + { + if(file->fTranslit != nullptr) + { + /* TODO: Check side */ + old = file->fTranslit->translit; + uprv_free(file->fTranslit->buffer); + file->fTranslit->buffer=nullptr; + uprv_free(file->fTranslit); + file->fTranslit=nullptr; + } + } + else + { + if(file->fTranslit == nullptr) + { + file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); + if(!file->fTranslit) + { + *status = U_MEMORY_ALLOCATION_ERROR; + return adopt; + } + file->fTranslit->capacity = 0; + file->fTranslit->length = 0; + file->fTranslit->pos = 0; + file->fTranslit->buffer = nullptr; + } + else + { + old = file->fTranslit->translit; + ufile_flush_translit(file); + } + + file->fTranslit->translit = adopt; + } + + return old; +} + +static const char16_t * u_file_translit(UFILE *f, const char16_t *src, int32_t *count, UBool flush) +{ + int32_t newlen; + int32_t junkCount = 0; + int32_t textLength; + int32_t textLimit; + UTransPosition pos; + UErrorCode status = U_ZERO_ERROR; + + if(count == nullptr) + { + count = &junkCount; + } + + if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) + { + /* fast path */ + return src; + } + + /* First: slide over everything */ + if(f->fTranslit->length > f->fTranslit->pos) + { + memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, + (f->fTranslit->length - f->fTranslit->pos)*sizeof(char16_t)); + } + f->fTranslit->length -= f->fTranslit->pos; /* always */ + f->fTranslit->pos = 0; + + /* Calculate new buffer size needed */ + newlen = (*count + f->fTranslit->length) * 4; + + if(newlen > f->fTranslit->capacity) + { + if(f->fTranslit->buffer == nullptr) + { + f->fTranslit->buffer = (char16_t*)uprv_malloc(newlen * sizeof(char16_t)); + } + else + { + f->fTranslit->buffer = (char16_t*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(char16_t)); + } + /* Check for malloc/realloc failure. */ + if (f->fTranslit->buffer == nullptr) { + return nullptr; + } + f->fTranslit->capacity = newlen; + } + + /* Now, copy any data over */ + u_strncpy(f->fTranslit->buffer + f->fTranslit->length, + src, + *count); + f->fTranslit->length += *count; + + /* Now, translit in place as much as we can */ + if(flush == false) + { + textLength = f->fTranslit->length; + pos.contextStart = 0; + pos.contextLimit = textLength; + pos.start = 0; + pos.limit = textLength; + + utrans_transIncrementalUChars(f->fTranslit->translit, + f->fTranslit->buffer, /* because we shifted */ + &textLength, + f->fTranslit->capacity, + &pos, + &status); + + /* now: start/limit point to the transliterated text */ + /* Transliterated is [buffer..pos.start) */ + *count = pos.start; + f->fTranslit->pos = pos.start; + f->fTranslit->length = pos.limit; + + return f->fTranslit->buffer; + } + else + { + textLength = f->fTranslit->length; + textLimit = f->fTranslit->length; + + utrans_transUChars(f->fTranslit->translit, + f->fTranslit->buffer, + &textLength, + f->fTranslit->capacity, + 0, + &textLimit, + &status); + + /* out: converted len */ + *count = textLimit; + + /* Set pointers to 0 */ + f->fTranslit->pos = 0; + f->fTranslit->length = 0; + + return f->fTranslit->buffer; + } +} + +#endif + +void +ufile_flush_translit(UFILE *f) +{ +#if !UCONFIG_NO_TRANSLITERATION + if((!f)||(!f->fTranslit)) + return; +#endif + + u_file_write_flush(nullptr, 0, f, false, true); +} + + +void +ufile_flush_io(UFILE *f) +{ + if((!f) || (!f->fFile)) { + return; /* skip if no file */ + } + + u_file_write_flush(nullptr, 0, f, true, false); +} + + +void +ufile_close_translit(UFILE *f) +{ +#if !UCONFIG_NO_TRANSLITERATION + if((!f)||(!f->fTranslit)) + return; +#endif + + ufile_flush_translit(f); + +#if !UCONFIG_NO_TRANSLITERATION + if(f->fTranslit->translit) + utrans_close(f->fTranslit->translit); + + if(f->fTranslit->buffer) + { + uprv_free(f->fTranslit->buffer); + } + + uprv_free(f->fTranslit); + f->fTranslit = nullptr; +#endif +} + + +/* Input/output */ + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fputs(const char16_t *s, + UFILE *f) +{ + int32_t count = u_file_write(s, u_strlen(s), f); + count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); + return count; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fputc(UChar32 uc, + UFILE *f) +{ + char16_t buf[2]; + int32_t idx = 0; + UBool isError = false; + + U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError); + if (isError) { + return U_EOF; + } + return u_file_write(buf, idx, f) == idx ? uc : U_EOF; +} + + +U_CFUNC int32_t U_EXPORT2 +u_file_write_flush(const char16_t *chars, + int32_t count, + UFILE *f, + UBool flushIO, + UBool flushTranslit) +{ + /* Set up conversion parameters */ + UErrorCode status = U_ZERO_ERROR; + const char16_t *mySource = chars; + const char16_t *mySourceBegin; + const char16_t *mySourceEnd; + char charBuffer[UFILE_CHARBUFFER_SIZE]; + char *myTarget = charBuffer; + int32_t written = 0; + int32_t numConverted = 0; + + if (count < 0) { + count = u_strlen(chars); + } + +#if !UCONFIG_NO_TRANSLITERATION + if((f->fTranslit) && (f->fTranslit->translit)) + { + /* Do the transliteration */ + mySource = u_file_translit(f, chars, &count, flushTranslit); + } +#endif + + /* Write to a string. */ + if (!f->fFile) { + int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); + if (flushIO && charsLeft > count) { + count++; + } + written = ufmt_min(count, charsLeft); + u_strncpy(f->str.fPos, mySource, written); + f->str.fPos += written; + return written; + } + + mySourceEnd = mySource + count; + + /* Perform the conversion in a loop */ + do { + mySourceBegin = mySource; /* beginning location for this loop */ + status = U_ZERO_ERROR; + if(f->fConverter != nullptr) { /* We have a valid converter */ + ucnv_fromUnicode(f->fConverter, + &myTarget, + charBuffer + UFILE_CHARBUFFER_SIZE, + &mySource, + mySourceEnd, + nullptr, + flushIO, + &status); + } else { /*weiv: do the invariant conversion */ + int32_t convertChars = (int32_t) (mySourceEnd - mySource); + if (convertChars > UFILE_CHARBUFFER_SIZE) { + convertChars = UFILE_CHARBUFFER_SIZE; + status = U_BUFFER_OVERFLOW_ERROR; + } + u_UCharsToChars(mySource, myTarget, convertChars); + mySource += convertChars; + myTarget += convertChars; + } + numConverted = (int32_t)(myTarget - charBuffer); + + if (numConverted > 0) { + /* write the converted bytes */ + fwrite(charBuffer, + sizeof(char), + numConverted, + f->fFile); + + written += (int32_t) (mySource - mySourceBegin); + } + myTarget = charBuffer; + } + while(status == U_BUFFER_OVERFLOW_ERROR); + + /* return # of chars written */ + return written; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_file_write( const char16_t *chars, + int32_t count, + UFILE *f) +{ + return u_file_write_flush(chars,count,f,false,false); +} + + +/* private function used for buffering input */ +void +ufile_fill_uchar_buffer(UFILE *f) +{ + UErrorCode status; + const char *mySource; + const char *mySourceEnd; + char16_t *myTarget; + int32_t bufferSize; + int32_t maxCPBytes; + int32_t bytesRead; + int32_t availLength; + int32_t dataSize; + char charBuffer[UFILE_CHARBUFFER_SIZE]; + u_localized_string *str; + + if (f->fFile == nullptr) { + /* There is nothing to do. It's a string. */ + return; + } + + str = &f->str; + dataSize = (int32_t)(str->fLimit - str->fPos); + if (f->fFileno == 0 && dataSize > 0) { + /* Don't read from stdin too many times. There is still some data. */ + return; + } + + /* shift the buffer if it isn't empty */ + if(dataSize != 0) { + u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ + } + + + /* record how much buffer space is available */ + availLength = UFILE_UCHARBUFFER_SIZE - dataSize; + + /* Determine the # of codepage bytes needed to fill our char16_t buffer */ + /* weiv: if converter is nullptr, we use invariant converter with charwidth = 1)*/ + maxCPBytes = availLength / (f->fConverter!=nullptr?(2*ucnv_getMinCharSize(f->fConverter)):1); + + /* Read in the data to convert */ + if (f->fFileno == 0) { + /* Special case. Read from stdin one line at a time. */ + char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); + bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); + } + else { + /* A normal file */ + bytesRead = (int32_t)fread(charBuffer, + sizeof(char), + ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), + f->fFile); + } + + /* Set up conversion parameters */ + status = U_ZERO_ERROR; + mySource = charBuffer; + mySourceEnd = charBuffer + bytesRead; + myTarget = f->fUCBuffer + dataSize; + bufferSize = UFILE_UCHARBUFFER_SIZE; + + if(f->fConverter != nullptr) { /* We have a valid converter */ + /* Perform the conversion */ + ucnv_toUnicode(f->fConverter, + &myTarget, + f->fUCBuffer + bufferSize, + &mySource, + mySourceEnd, + nullptr, + (UBool)(feof(f->fFile) != 0), + &status); + + } else { /*weiv: do the invariant conversion */ + u_charsToUChars(mySource, myTarget, bytesRead); + myTarget += bytesRead; + } + + /* update the pointers into our array */ + str->fPos = str->fBuffer; + str->fLimit = myTarget; +} + +U_CAPI char16_t* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgets(char16_t *s, + int32_t n, + UFILE *f) +{ + int32_t dataSize; + int32_t count; + char16_t *alias; + const char16_t *limit; + char16_t *sItr; + char16_t currDelim = 0; + u_localized_string *str; + + if (n <= 0) { + /* Caller screwed up. We need to write the null terminatior. */ + return nullptr; + } + + /* fill the buffer if needed */ + str = &f->str; + if (str->fPos >= str->fLimit) { + ufile_fill_uchar_buffer(f); + } + + /* subtract 1 from n to compensate for the terminator */ + --n; + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + + /* if 0 characters were left, return 0 */ + if (dataSize == 0) + return nullptr; + + /* otherwise, iteratively fill the buffer and copy */ + count = 0; + sItr = s; + currDelim = 0; + while (dataSize > 0 && count < n) { + alias = str->fPos; + + /* Find how much to copy */ + if (dataSize < (n - count)) { + limit = str->fLimit; + } + else { + limit = alias + (n - count); + } + + if (!currDelim) { + /* Copy UChars until we find the first occurrence of a delimiter character */ + while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { + count++; + *(sItr++) = *(alias++); + } + /* Preserve the newline */ + if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { + if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { + currDelim = *alias; + } + else { + currDelim = 1; /* This isn't a newline, but it's used to say + that we should break later. We've checked all + possible newline combinations even across buffer + boundaries. */ + } + count++; + *(sItr++) = *(alias++); + } + } + /* If we have a CRLF combination, preserve that too. */ + if (alias < limit) { + if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { + count++; + *(sItr++) = *(alias++); + } + currDelim = 1; /* This isn't a newline, but it's used to say + that we should break later. We've checked all + possible newline combinations even across buffer + boundaries. */ + } + + /* update the current buffer position */ + str->fPos = alias; + + /* if we found a delimiter */ + if (currDelim == 1) { + /* break out */ + break; + } + + /* refill the buffer */ + ufile_fill_uchar_buffer(f); + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + } + + /* add the terminator and return s */ + *sItr = 0x0000; + return s; +} + +U_CFUNC UBool U_EXPORT2 +ufile_getch(UFILE *f, char16_t *ch) +{ + UBool isValidChar = false; + + *ch = U_EOF; + /* if we have an available character in the buffer, return it */ + if(f->str.fPos < f->str.fLimit){ + *ch = *(f->str.fPos)++; + isValidChar = true; + } + else { + /* otherwise, fill the buffer and return the next character */ + if(f->str.fPos >= f->str.fLimit) { + ufile_fill_uchar_buffer(f); + } + if(f->str.fPos < f->str.fLimit) { + *ch = *(f->str.fPos)++; + isValidChar = true; + } + } + return isValidChar; +} + +U_CAPI char16_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetc(UFILE *f) +{ + char16_t ch; + ufile_getch(f, &ch); + return ch; +} + +U_CFUNC UBool U_EXPORT2 +ufile_getch32(UFILE *f, UChar32 *c32) +{ + UBool isValidChar = false; + u_localized_string *str; + + *c32 = U_EOF; + + /* Fill the buffer if it is empty */ + str = &f->str; + if (str->fPos + 1 >= str->fLimit) { + ufile_fill_uchar_buffer(f); + } + + /* Get the next character in the buffer */ + if (str->fPos < str->fLimit) { + *c32 = *(str->fPos)++; + if (U_IS_LEAD(*c32)) { + if (str->fPos < str->fLimit) { + char16_t c16 = *(str->fPos)++; + *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); + isValidChar = true; + } + else { + *c32 = U_EOF; + } + } + else { + isValidChar = true; + } + } + + return isValidChar; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetcx(UFILE *f) +{ + UChar32 ch; + ufile_getch32(f, &ch); + return ch; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fungetc(UChar32 ch, + UFILE *f) +{ + u_localized_string *str; + + str = &f->str; + + /* if we're at the beginning of the buffer, sorry! */ + if (str->fPos == str->fBuffer + || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) + { + ch = U_EOF; + } + else { + /* otherwise, put the character back */ + /* Remember, read them back on in the reverse order. */ + if (U_IS_LEAD(ch)) { + if (*--(str->fPos) != U16_TRAIL(ch) + || *--(str->fPos) != U16_LEAD(ch)) + { + ch = U_EOF; + } + } + else if (*--(str->fPos) != ch) { + ch = U_EOF; + } + } + return ch; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_file_read( char16_t *chars, + int32_t count, + UFILE *f) +{ + int32_t dataSize; + int32_t read = 0; + u_localized_string *str = &f->str; + + do { + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + if (dataSize <= 0) { + /* fill the buffer */ + ufile_fill_uchar_buffer(f); + dataSize = (int32_t)(str->fLimit - str->fPos); + } + + /* Make sure that we don't read too much */ + if (dataSize > (count - read)) { + dataSize = count - read; + } + + /* copy the current data in the buffer */ + memcpy(chars + read, str->fPos, dataSize * sizeof(char16_t)); + + /* update number of items read */ + read += dataSize; + + /* update the current buffer position */ + str->fPos += dataSize; + } + while (dataSize != 0 && read < count); + + return read; +} +#endif |