// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * * Copyright (C) 1998-2016, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * * File ustdio.c * * Modification History: * * Date Name Description * 11/18/98 stephen Creation. * 03/12/99 stephen Modified for new C API. * 07/19/99 stephen Fixed read() and gets() ****************************************************************************** */ #include "unicode/ustdio.h" #if !UCONFIG_NO_CONVERSION #include "unicode/putil.h" #include "cmemory.h" #include "cstring.h" #include "ufile.h" #include "ufmt_cmn.h" #include "unicode/ucnv.h" #include "unicode/ustring.h" #include #define DELIM_LF 0x000A #define DELIM_VT 0x000B #define DELIM_FF 0x000C #define DELIM_CR 0x000D #define DELIM_NEL 0x0085 #define DELIM_LS 0x2028 #define DELIM_PS 0x2029 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ #if U_PLATFORM_USES_ONLY_WIN32_API static const char16_t DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; static const uint32_t DELIMITERS_LEN = 2; /* TODO: Default newline writing should be detected based upon the converter being used. */ #else static const char16_t DELIMITERS [] = { DELIM_LF, 0x0000 }; static const uint32_t DELIMITERS_LEN = 1; #endif #define IS_FIRST_STRING_DELIMITER(c1) \ (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ || (c1) == DELIM_NEL \ || (c1) == DELIM_LS \ || (c1) == DELIM_PS) #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) #define IS_COMBINED_STRING_DELIMITER(c1, c2) \ (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) #if !UCONFIG_NO_TRANSLITERATION U_CAPI UTransliterator* U_EXPORT2 u_fsettransliterator(UFILE *file, UFileDirection direction, UTransliterator *adopt, UErrorCode *status) { UTransliterator *old = nullptr; if(U_FAILURE(*status)) { return adopt; } if(!file) { *status = U_ILLEGAL_ARGUMENT_ERROR; return adopt; } if(direction & U_READ) { /** TODO: implement */ *status = U_UNSUPPORTED_ERROR; return adopt; } if(adopt == nullptr) /* they are clearing it */ { if(file->fTranslit != nullptr) { /* TODO: Check side */ old = file->fTranslit->translit; uprv_free(file->fTranslit->buffer); file->fTranslit->buffer=nullptr; uprv_free(file->fTranslit); file->fTranslit=nullptr; } } else { if(file->fTranslit == nullptr) { file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); if(!file->fTranslit) { *status = U_MEMORY_ALLOCATION_ERROR; return adopt; } file->fTranslit->capacity = 0; file->fTranslit->length = 0; file->fTranslit->pos = 0; file->fTranslit->buffer = nullptr; } else { old = file->fTranslit->translit; ufile_flush_translit(file); } file->fTranslit->translit = adopt; } return old; } static const char16_t * u_file_translit(UFILE *f, const char16_t *src, int32_t *count, UBool flush) { int32_t newlen; int32_t junkCount = 0; int32_t textLength; int32_t textLimit; UTransPosition pos; UErrorCode status = U_ZERO_ERROR; if(count == nullptr) { count = &junkCount; } if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) { /* fast path */ return src; } /* First: slide over everything */ if(f->fTranslit->length > f->fTranslit->pos) { memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, (f->fTranslit->length - f->fTranslit->pos)*sizeof(char16_t)); } f->fTranslit->length -= f->fTranslit->pos; /* always */ f->fTranslit->pos = 0; /* Calculate new buffer size needed */ newlen = (*count + f->fTranslit->length) * 4; if(newlen > f->fTranslit->capacity) { if(f->fTranslit->buffer == nullptr) { f->fTranslit->buffer = (char16_t*)uprv_malloc(newlen * sizeof(char16_t)); } else { f->fTranslit->buffer = (char16_t*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(char16_t)); } /* Check for malloc/realloc failure. */ if (f->fTranslit->buffer == nullptr) { return nullptr; } f->fTranslit->capacity = newlen; } /* Now, copy any data over */ u_strncpy(f->fTranslit->buffer + f->fTranslit->length, src, *count); f->fTranslit->length += *count; /* Now, translit in place as much as we can */ if(flush == false) { textLength = f->fTranslit->length; pos.contextStart = 0; pos.contextLimit = textLength; pos.start = 0; pos.limit = textLength; utrans_transIncrementalUChars(f->fTranslit->translit, f->fTranslit->buffer, /* because we shifted */ &textLength, f->fTranslit->capacity, &pos, &status); /* now: start/limit point to the transliterated text */ /* Transliterated is [buffer..pos.start) */ *count = pos.start; f->fTranslit->pos = pos.start; f->fTranslit->length = pos.limit; return f->fTranslit->buffer; } else { textLength = f->fTranslit->length; textLimit = f->fTranslit->length; utrans_transUChars(f->fTranslit->translit, f->fTranslit->buffer, &textLength, f->fTranslit->capacity, 0, &textLimit, &status); /* out: converted len */ *count = textLimit; /* Set pointers to 0 */ f->fTranslit->pos = 0; f->fTranslit->length = 0; return f->fTranslit->buffer; } } #endif void ufile_flush_translit(UFILE *f) { #if !UCONFIG_NO_TRANSLITERATION if((!f)||(!f->fTranslit)) return; #endif u_file_write_flush(nullptr, 0, f, false, true); } void ufile_flush_io(UFILE *f) { if((!f) || (!f->fFile)) { return; /* skip if no file */ } u_file_write_flush(nullptr, 0, f, true, false); } void ufile_close_translit(UFILE *f) { #if !UCONFIG_NO_TRANSLITERATION if((!f)||(!f->fTranslit)) return; #endif ufile_flush_translit(f); #if !UCONFIG_NO_TRANSLITERATION if(f->fTranslit->translit) utrans_close(f->fTranslit->translit); if(f->fTranslit->buffer) { uprv_free(f->fTranslit->buffer); } uprv_free(f->fTranslit); f->fTranslit = nullptr; #endif } /* Input/output */ U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fputs(const char16_t *s, UFILE *f) { int32_t count = u_file_write(s, u_strlen(s), f); count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); return count; } U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fputc(UChar32 uc, UFILE *f) { char16_t buf[2]; int32_t idx = 0; UBool isError = false; U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError); if (isError) { return U_EOF; } return u_file_write(buf, idx, f) == idx ? uc : U_EOF; } U_CFUNC int32_t U_EXPORT2 u_file_write_flush(const char16_t *chars, int32_t count, UFILE *f, UBool flushIO, UBool flushTranslit) { /* Set up conversion parameters */ UErrorCode status = U_ZERO_ERROR; const char16_t *mySource = chars; const char16_t *mySourceBegin; const char16_t *mySourceEnd; char charBuffer[UFILE_CHARBUFFER_SIZE]; char *myTarget = charBuffer; int32_t written = 0; int32_t numConverted = 0; if (count < 0) { count = u_strlen(chars); } #if !UCONFIG_NO_TRANSLITERATION if((f->fTranslit) && (f->fTranslit->translit)) { /* Do the transliteration */ mySource = u_file_translit(f, chars, &count, flushTranslit); } #endif /* Write to a string. */ if (!f->fFile) { int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); if (flushIO && charsLeft > count) { count++; } written = ufmt_min(count, charsLeft); u_strncpy(f->str.fPos, mySource, written); f->str.fPos += written; return written; } mySourceEnd = mySource + count; /* Perform the conversion in a loop */ do { mySourceBegin = mySource; /* beginning location for this loop */ status = U_ZERO_ERROR; if(f->fConverter != nullptr) { /* We have a valid converter */ ucnv_fromUnicode(f->fConverter, &myTarget, charBuffer + UFILE_CHARBUFFER_SIZE, &mySource, mySourceEnd, nullptr, flushIO, &status); } else { /*weiv: do the invariant conversion */ int32_t convertChars = (int32_t) (mySourceEnd - mySource); if (convertChars > UFILE_CHARBUFFER_SIZE) { convertChars = UFILE_CHARBUFFER_SIZE; status = U_BUFFER_OVERFLOW_ERROR; } u_UCharsToChars(mySource, myTarget, convertChars); mySource += convertChars; myTarget += convertChars; } numConverted = (int32_t)(myTarget - charBuffer); if (numConverted > 0) { /* write the converted bytes */ fwrite(charBuffer, sizeof(char), numConverted, f->fFile); written += (int32_t) (mySource - mySourceBegin); } myTarget = charBuffer; } while(status == U_BUFFER_OVERFLOW_ERROR); /* return # of chars written */ return written; } U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_file_write( const char16_t *chars, int32_t count, UFILE *f) { return u_file_write_flush(chars,count,f,false,false); } /* private function used for buffering input */ void ufile_fill_uchar_buffer(UFILE *f) { UErrorCode status; const char *mySource; const char *mySourceEnd; char16_t *myTarget; int32_t bufferSize; int32_t maxCPBytes; int32_t bytesRead; int32_t availLength; int32_t dataSize; char charBuffer[UFILE_CHARBUFFER_SIZE]; u_localized_string *str; if (f->fFile == nullptr) { /* There is nothing to do. It's a string. */ return; } str = &f->str; dataSize = (int32_t)(str->fLimit - str->fPos); if (f->fFileno == 0 && dataSize > 0) { /* Don't read from stdin too many times. There is still some data. */ return; } /* shift the buffer if it isn't empty */ if(dataSize != 0) { u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ } /* record how much buffer space is available */ availLength = UFILE_UCHARBUFFER_SIZE - dataSize; /* Determine the # of codepage bytes needed to fill our char16_t buffer */ /* weiv: if converter is nullptr, we use invariant converter with charwidth = 1)*/ maxCPBytes = availLength / (f->fConverter!=nullptr?(2*ucnv_getMinCharSize(f->fConverter)):1); /* Read in the data to convert */ if (f->fFileno == 0) { /* Special case. Read from stdin one line at a time. */ char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); } else { /* A normal file */ bytesRead = (int32_t)fread(charBuffer, sizeof(char), ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); } /* Set up conversion parameters */ status = U_ZERO_ERROR; mySource = charBuffer; mySourceEnd = charBuffer + bytesRead; myTarget = f->fUCBuffer + dataSize; bufferSize = UFILE_UCHARBUFFER_SIZE; if(f->fConverter != nullptr) { /* We have a valid converter */ /* Perform the conversion */ ucnv_toUnicode(f->fConverter, &myTarget, f->fUCBuffer + bufferSize, &mySource, mySourceEnd, nullptr, (UBool)(feof(f->fFile) != 0), &status); } else { /*weiv: do the invariant conversion */ u_charsToUChars(mySource, myTarget, bytesRead); myTarget += bytesRead; } /* update the pointers into our array */ str->fPos = str->fBuffer; str->fLimit = myTarget; } U_CAPI char16_t* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fgets(char16_t *s, int32_t n, UFILE *f) { int32_t dataSize; int32_t count; char16_t *alias; const char16_t *limit; char16_t *sItr; char16_t currDelim = 0; u_localized_string *str; if (n <= 0) { /* Caller screwed up. We need to write the null terminatior. */ return nullptr; } /* fill the buffer if needed */ str = &f->str; if (str->fPos >= str->fLimit) { ufile_fill_uchar_buffer(f); } /* subtract 1 from n to compensate for the terminator */ --n; /* determine the amount of data in the buffer */ dataSize = (int32_t)(str->fLimit - str->fPos); /* if 0 characters were left, return 0 */ if (dataSize == 0) return nullptr; /* otherwise, iteratively fill the buffer and copy */ count = 0; sItr = s; currDelim = 0; while (dataSize > 0 && count < n) { alias = str->fPos; /* Find how much to copy */ if (dataSize < (n - count)) { limit = str->fLimit; } else { limit = alias + (n - count); } if (!currDelim) { /* Copy UChars until we find the first occurrence of a delimiter character */ while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { count++; *(sItr++) = *(alias++); } /* Preserve the newline */ if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { currDelim = *alias; } else { currDelim = 1; /* This isn't a newline, but it's used to say that we should break later. We've checked all possible newline combinations even across buffer boundaries. */ } count++; *(sItr++) = *(alias++); } } /* If we have a CRLF combination, preserve that too. */ if (alias < limit) { if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { count++; *(sItr++) = *(alias++); } currDelim = 1; /* This isn't a newline, but it's used to say that we should break later. We've checked all possible newline combinations even across buffer boundaries. */ } /* update the current buffer position */ str->fPos = alias; /* if we found a delimiter */ if (currDelim == 1) { /* break out */ break; } /* refill the buffer */ ufile_fill_uchar_buffer(f); /* determine the amount of data in the buffer */ dataSize = (int32_t)(str->fLimit - str->fPos); } /* add the terminator and return s */ *sItr = 0x0000; return s; } U_CFUNC UBool U_EXPORT2 ufile_getch(UFILE *f, char16_t *ch) { UBool isValidChar = false; *ch = U_EOF; /* if we have an available character in the buffer, return it */ if(f->str.fPos < f->str.fLimit){ *ch = *(f->str.fPos)++; isValidChar = true; } else { /* otherwise, fill the buffer and return the next character */ if(f->str.fPos >= f->str.fLimit) { ufile_fill_uchar_buffer(f); } if(f->str.fPos < f->str.fLimit) { *ch = *(f->str.fPos)++; isValidChar = true; } } return isValidChar; } U_CAPI char16_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fgetc(UFILE *f) { char16_t ch; ufile_getch(f, &ch); return ch; } U_CFUNC UBool U_EXPORT2 ufile_getch32(UFILE *f, UChar32 *c32) { UBool isValidChar = false; u_localized_string *str; *c32 = U_EOF; /* Fill the buffer if it is empty */ str = &f->str; if (str->fPos + 1 >= str->fLimit) { ufile_fill_uchar_buffer(f); } /* Get the next character in the buffer */ if (str->fPos < str->fLimit) { *c32 = *(str->fPos)++; if (U_IS_LEAD(*c32)) { if (str->fPos < str->fLimit) { char16_t c16 = *(str->fPos)++; *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); isValidChar = true; } else { *c32 = U_EOF; } } else { isValidChar = true; } } return isValidChar; } U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fgetcx(UFILE *f) { UChar32 ch; ufile_getch32(f, &ch); return ch; } U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fungetc(UChar32 ch, UFILE *f) { u_localized_string *str; str = &f->str; /* if we're at the beginning of the buffer, sorry! */ if (str->fPos == str->fBuffer || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) { ch = U_EOF; } else { /* otherwise, put the character back */ /* Remember, read them back on in the reverse order. */ if (U_IS_LEAD(ch)) { if (*--(str->fPos) != U16_TRAIL(ch) || *--(str->fPos) != U16_LEAD(ch)) { ch = U_EOF; } } else if (*--(str->fPos) != ch) { ch = U_EOF; } } return ch; } U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_file_read( char16_t *chars, int32_t count, UFILE *f) { int32_t dataSize; int32_t read = 0; u_localized_string *str = &f->str; do { /* determine the amount of data in the buffer */ dataSize = (int32_t)(str->fLimit - str->fPos); if (dataSize <= 0) { /* fill the buffer */ ufile_fill_uchar_buffer(f); dataSize = (int32_t)(str->fLimit - str->fPos); } /* Make sure that we don't read too much */ if (dataSize > (count - read)) { dataSize = count - read; } /* copy the current data in the buffer */ memcpy(chars + read, str->fPos, dataSize * sizeof(char16_t)); /* update number of items read */ read += dataSize; /* update the current buffer position */ str->fPos += dataSize; } while (dataSize != 0 && read < count); return read; } #endif