From 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:47:29 +0200 Subject: Adding upstream version 115.8.0esr. Signed-off-by: Daniel Baumann --- intl/icu/source/io/Makefile.in | 177 ++++ intl/icu/source/io/io.rc | 110 +++ intl/icu/source/io/io.vcxproj | 116 +++ intl/icu/source/io/io.vcxproj.filters | 88 ++ intl/icu/source/io/locbund.cpp | 185 ++++ intl/icu/source/io/locbund.h | 82 ++ intl/icu/source/io/sources.txt | 12 + intl/icu/source/io/sprintf.cpp | 274 ++++++ intl/icu/source/io/sscanf.cpp | 130 +++ intl/icu/source/io/ucln_io.cpp | 71 ++ intl/icu/source/io/ucln_io.h | 40 + intl/icu/source/io/ufile.cpp | 383 ++++++++ intl/icu/source/io/ufile.h | 140 +++ intl/icu/source/io/ufmt_cmn.cpp | 259 ++++++ intl/icu/source/io/ufmt_cmn.h | 162 ++++ intl/icu/source/io/unicode/ustdio.h | 1021 +++++++++++++++++++++ intl/icu/source/io/unicode/ustream.h | 69 ++ intl/icu/source/io/uprintf.cpp | 219 +++++ intl/icu/source/io/uprintf.h | 103 +++ intl/icu/source/io/uprntf_p.cpp | 1606 +++++++++++++++++++++++++++++++++ intl/icu/source/io/uscanf.cpp | 108 +++ intl/icu/source/io/uscanf.h | 38 + intl/icu/source/io/uscanf_p.cpp | 1463 ++++++++++++++++++++++++++++++ intl/icu/source/io/ustdio.cpp | 732 +++++++++++++++ intl/icu/source/io/ustream.cpp | 170 ++++ 25 files changed, 7758 insertions(+) create mode 100644 intl/icu/source/io/Makefile.in create mode 100644 intl/icu/source/io/io.rc create mode 100644 intl/icu/source/io/io.vcxproj create mode 100644 intl/icu/source/io/io.vcxproj.filters create mode 100644 intl/icu/source/io/locbund.cpp create mode 100644 intl/icu/source/io/locbund.h create mode 100644 intl/icu/source/io/sources.txt create mode 100644 intl/icu/source/io/sprintf.cpp create mode 100644 intl/icu/source/io/sscanf.cpp create mode 100644 intl/icu/source/io/ucln_io.cpp create mode 100644 intl/icu/source/io/ucln_io.h create mode 100644 intl/icu/source/io/ufile.cpp create mode 100644 intl/icu/source/io/ufile.h create mode 100644 intl/icu/source/io/ufmt_cmn.cpp create mode 100644 intl/icu/source/io/ufmt_cmn.h create mode 100644 intl/icu/source/io/unicode/ustdio.h create mode 100644 intl/icu/source/io/unicode/ustream.h create mode 100644 intl/icu/source/io/uprintf.cpp create mode 100644 intl/icu/source/io/uprintf.h create mode 100644 intl/icu/source/io/uprntf_p.cpp create mode 100644 intl/icu/source/io/uscanf.cpp create mode 100644 intl/icu/source/io/uscanf.h create mode 100644 intl/icu/source/io/uscanf_p.cpp create mode 100644 intl/icu/source/io/ustdio.cpp create mode 100644 intl/icu/source/io/ustream.cpp (limited to 'intl/icu/source/io') diff --git a/intl/icu/source/io/Makefile.in b/intl/icu/source/io/Makefile.in new file mode 100644 index 0000000000..9bf9359ead --- /dev/null +++ b/intl/icu/source/io/Makefile.in @@ -0,0 +1,177 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +#****************************************************************************** +# +# Copyright (C) 1999-2011, International Business Machines +# Corporation and others. All Rights Reserved. +# +#****************************************************************************** +## Makefile.in for ICU - icuio.so +## Stephen F. Booth + +## Source directory information +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ + +top_builddir = .. + +## All the flags and other definitions are included here. +include $(top_builddir)/icudefs.mk + +## Build directory information +subdir = io + +## Extra files to remove for 'make clean' +CLEANFILES = *~ $(DEPS) $(IMPORT_LIB) $(MIDDLE_IMPORT_LIB) $(FINAL_IMPORT_LIB) + +## Target information + +TARGET_STUBNAME=$(IO_STUBNAME) + +ifneq ($(ENABLE_STATIC),) +TARGET = $(LIBDIR)/$(LIBSICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(A) +endif + +ifneq ($(ENABLE_SHARED),) +SO_TARGET = $(LIBDIR)/$(LIBICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(SO) +ALL_SO_TARGETS = $(SO_TARGET) $(MIDDLE_SO_TARGET) $(FINAL_SO_TARGET) $(SHARED_OBJECT) + +ifeq ($(ENABLE_SO_VERSION_DATA),1) +SO_VERSION_DATA = io.res +endif + +ifeq ($(OS390BATCH),1) +BATCH_TARGET = $(BATCH_IO_TARGET) +BATCH_LIBS = $(BATCH_LIBICUUC) $(BATCH_LIBICUI18N) -lm +endif # OS390BATCH + +endif # ENABLE_SHARED + +ALL_TARGETS = $(TARGET) $(ALL_SO_TARGETS) $(BATCH_TARGET) + +DYNAMICCPPFLAGS = $(SHAREDLIBCPPFLAGS) +DYNAMICCFLAGS = $(SHAREDLIBCFLAGS) +DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS) +CFLAGS += $(LIBCFLAGS) +CXXFLAGS += $(LIBCXXFLAGS) + +CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n $(LIBCPPFLAGS) $(CPPFLAGSICUIO) +DEFS += -DU_IO_IMPLEMENTATION +LDFLAGS += $(LDFLAGSICUIO) +LIBS = $(LIBICUUC) $(LIBICUI18N) $(DEFAULT_LIBS) + +SOURCES = $(shell cat $(srcdir)/sources.txt) +OBJECTS = $(SOURCES:.cpp=.o) + +## Header files to install +HEADERS = $(srcdir)/unicode/*.h + +STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O)) + +DEPS = $(OBJECTS:.o=.d) + +-include Makefile.local + +## List of phony targets +.PHONY : all all-local install install-local clean clean-local \ +distclean distclean-local install-library install-headers dist \ +dist-local check check-local check-exhaustive + +## Clear suffix list +.SUFFIXES : + +## List of standard targets +all: all-local +install: install-local +clean: clean-local +distclean : distclean-local +dist: dist-local +check: all check-local + +check-exhaustive: check + +all-local: $(ALL_TARGETS) + +install-local: install-headers install-library + +install-library: all-local + $(MKINSTALLDIRS) $(DESTDIR)$(libdir) +ifneq ($(ENABLE_STATIC),) + $(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir) +endif +ifneq ($(ENABLE_SHARED),) +# For MinGW, do we want the DLL to go in the bin location? +ifeq ($(MINGW_MOVEDLLSTOBINDIR),YES) + $(MKINSTALLDIRS) $(DESTDIR)$(bindir) + $(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(bindir) +else + $(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir) +ifneq ($(FINAL_SO_TARGET),$(SO_TARGET)) + cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET)) +ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET)) + cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET)) +endif +endif +endif +ifneq ($(IMPORT_LIB_EXT),) + $(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir) +ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB)) + cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(IMPORT_LIB)) +endif +ifneq ($(MIDDLE_IMPORT_LIB),$(FINAL_IMPORT_LIB)) + cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(MIDDLE_IMPORT_LIB)) +endif +endif +endif + +install-headers: + $(MKINSTALLDIRS) $(DESTDIR)$(includedir)/unicode + @for file in $(HEADERS); do \ + echo "$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode"; \ + $(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode || exit; \ + done + +dist-local: + +clean-local: + test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) + $(RMV) $(OBJECTS) $(STATIC_OBJECTS) $(ALL_TARGETS) $(SO_VERSION_DATA) + +distclean-local: clean-local + $(RMV) Makefile + +check-local: + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +ifneq ($(ENABLE_STATIC),) +$(TARGET): $(STATIC_OBJECTS) + $(AR) $(ARFLAGS) $(AR_OUTOPT)$@ $^ + $(RANLIB) $@ +endif + +ifneq ($(ENABLE_SHARED),) +$(SHARED_OBJECT): $(OBJECTS) $(SO_VERSION_DATA) + $(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(LIBS) +ifeq ($(ENABLE_RPATH),YES) +ifneq ($(wildcard $(libdir)/$(MIDDLE_SO_TARGET)),) + $(warning RPATH warning: --enable-rpath means test programs may use existing $(libdir)/$(MIDDLE_SO_TARGET)) +endif +endif + +ifeq ($(OS390BATCH),1) +$(BATCH_TARGET):$(OBJECTS) + $(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(BATCH_LIBS) +endif # OS390BATCH +endif # ENABLE_SHARED + +ifeq (,$(MAKECMDGOALS)) +-include $(DEPS) +else +ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),) +-include $(DEPS) +endif +endif + diff --git a/intl/icu/source/io/io.rc b/intl/icu/source/io/io.rc new file mode 100644 index 0000000000..b3c750b5f0 --- /dev/null +++ b/intl/icu/source/io/io.rc @@ -0,0 +1,110 @@ +// Do not edit with Microsoft Developer Studio Resource Editor. +// It will permanently substitute version numbers that are intended to be +// picked up by the pre-processor during each build. +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// Copyright (c) 2001-2010 International Business Machines +// Corporation and others. All Rights Reserved. +// +#include "../common/msvcres.h" + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// + +LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL +#pragma code_page(1252) + +#ifdef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// TEXTINCLUDE +// + +1 TEXTINCLUDE +BEGIN + "../../common/msvcres.h\0" +END + +2 TEXTINCLUDE +BEGIN + "#include \0" +END + +3 TEXTINCLUDE +BEGIN + "\r\n" + "\0" +END + +#endif // APSTUDIO_INVOKED + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// +#define STR(s) #s +#define CommaVersionString(a, b, c, d) STR(a) ", " STR(b) ", " STR(c) ", " STR(d) "\0" + +VS_VERSION_INFO VERSIONINFO + FILEVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM + PRODUCTVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS VOS__WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "00000000" + BEGIN + VALUE "Comments", ICU_WEBSITE "\0" + VALUE "CompanyName", ICU_COMPANY "\0" + VALUE "FileDescription", ICU_PRODUCT_PREFIX " I/O DLL\0" + VALUE "FileVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM) + VALUE "LegalCopyright", U_COPYRIGHT_STRING "\0" +#ifdef _DEBUG + VALUE "OriginalFilename", "icuio" U_ICU_VERSION_SHORT "d.dll\0" +#else + VALUE "OriginalFilename", "icuio" U_ICU_VERSION_SHORT ".dll\0" +#endif + VALUE "PrivateBuild", "\0" + VALUE "ProductName", ICU_PRODUCT "\0" + VALUE "ProductVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM) + VALUE "SpecialBuild", "\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x000, 0000 + END +END + +///////////////////////////////////////////////////////////////////////////// + + + +#ifndef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 3 resource. +// + + +///////////////////////////////////////////////////////////////////////////// +#endif // not APSTUDIO_INVOKED + diff --git a/intl/icu/source/io/io.vcxproj b/intl/icu/source/io/io.vcxproj new file mode 100644 index 0000000000..ff3485fd4d --- /dev/null +++ b/intl/icu/source/io/io.vcxproj @@ -0,0 +1,116 @@ + + + + {C2B04507-2521-4801-BF0D-5FD79D6D518C} + + + DynamicLibrary + false + MultiByte + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + .\$(Platform)\$(Configuration)\ + .\$(Platform)\$(Configuration)\ + + .\x86\$(Configuration)\ + .\x86\$(Configuration)\ + + true + false + + + + + $(OutDir)\icuio.tlb + + + ..\..\include;..\common;%(AdditionalIncludeDirectories) + U_IO_IMPLEMENTATION;%(PreprocessorDefinitions) + false + Level3 + Default + ProgramDatabase + $(OutDir)/icuio.pch + $(OutDir)/ + $(OutDir)/ + $(OutDir)/icuio.pdb + + + ..\common;%(AdditionalIncludeDirectories) + + + ..\..\$(IcuLibOutputDir);%(AdditionalLibraryDirectories) + + + + + + true + MultiThreadedDebugDLL + + + true + ..\..\$(IcuBinOutputDir)\icuio$(IcuMajorVersion)d.dll + ..\..\$(IcuLibOutputDir)\icuiod.pdb + ..\..\$(IcuLibOutputDir)\icuiod.lib + icuucd.lib;icuind.lib;%(AdditionalDependencies) + + + + + + MultiThreadedDLL + true + + + ..\..\$(IcuBinOutputDir)\icuio$(IcuMajorVersion).dll + ..\..\$(IcuLibOutputDir)\icuio.pdb + ..\..\$(IcuLibOutputDir)\icuio.lib + icuuc.lib;icuin.lib;%(AdditionalDependencies) + + + + + + + + false + + + + + + + + + + false + + + + + + + + + + + + + + + + + + + diff --git a/intl/icu/source/io/io.vcxproj.filters b/intl/icu/source/io/io.vcxproj.filters new file mode 100644 index 0000000000..4a6841ba28 --- /dev/null +++ b/intl/icu/source/io/io.vcxproj.filters @@ -0,0 +1,88 @@ + + + + + {851db102-16a0-4e9d-b6bc-aa5cce5119d6} + cpp;c;cxx;rc;def;r;odl;idl;hpj;bat + + + {051f18b2-608f-486c-9b00-308b7a429167} + h;hpp;hxx;hm;inl + + + {9fd3bb57-16c4-405d-a045-2be622895424} + ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Resource Files + + + + + Header Files + + + Header Files + + + diff --git a/intl/icu/source/io/locbund.cpp b/intl/icu/source/io/locbund.cpp new file mode 100644 index 0000000000..3f6d6309ac --- /dev/null +++ b/intl/icu/source/io/locbund.cpp @@ -0,0 +1,185 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* +* File locbund.cpp +* +* Modification History: +* +* Date Name Description +* 11/18/98 stephen Creation. +* 12/10/1999 bobbyr(at)optiosoftware.com Fix for memory leak + string allocation bugs +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "locbund.h" + +#include "cmemory.h" +#include "cstring.h" +#include "ucln_io.h" +#include "mutex.h" +#include "umutex.h" +#include "unicode/ustring.h" +#include "unicode/uloc.h" + +static UNumberFormat *gPosixNumberFormat[ULOCALEBUNDLE_NUMBERFORMAT_COUNT]; + +U_CDECL_BEGIN +static UBool U_CALLCONV locbund_cleanup() { + int32_t style; + for (style = 0; style < ULOCALEBUNDLE_NUMBERFORMAT_COUNT; style++) { + unum_close(gPosixNumberFormat[style]); + gPosixNumberFormat[style] = nullptr; + } + return true; +} +U_CDECL_END + +static inline UNumberFormat * copyInvariantFormatter(ULocaleBundle *result, UNumberFormatStyle style) { + U_NAMESPACE_USE + static UMutex gLock; + Mutex lock(&gLock); + if (result->fNumberFormat[style-1] == nullptr) { + if (gPosixNumberFormat[style-1] == nullptr) { + UErrorCode status = U_ZERO_ERROR; + UNumberFormat *formatAlias = unum_open(style, nullptr, 0, "en_US_POSIX", nullptr, &status); + if (U_SUCCESS(status)) { + gPosixNumberFormat[style-1] = formatAlias; + ucln_io_registerCleanup(UCLN_IO_LOCBUND, locbund_cleanup); + } + } + /* Copy the needed formatter. */ + if (gPosixNumberFormat[style-1] != nullptr) { + UErrorCode status = U_ZERO_ERROR; + result->fNumberFormat[style-1] = unum_clone(gPosixNumberFormat[style-1], &status); + } + } + return result->fNumberFormat[style-1]; +} + +U_CAPI ULocaleBundle * +u_locbund_init(ULocaleBundle *result, const char *loc) +{ + int32_t len; + + if(result == 0) + return 0; + + if (loc == nullptr) { + loc = uloc_getDefault(); + } + + uprv_memset(result, 0, sizeof(ULocaleBundle)); + + len = (int32_t)strlen(loc); + result->fLocale = (char*) uprv_malloc(len + 1); + if(result->fLocale == 0) { + return 0; + } + + uprv_strcpy(result->fLocale, loc); + + result->isInvariantLocale = uprv_strcmp(result->fLocale, "en_US_POSIX") == 0; + + return result; +} + +/*U_CAPI ULocaleBundle * +u_locbund_new(const char *loc) +{ + ULocaleBundle *result = (ULocaleBundle*) uprv_malloc(sizeof(ULocaleBundle)); + return u_locbund_init(result, loc); +} + +U_CAPI ULocaleBundle * +u_locbund_clone(const ULocaleBundle *bundle) +{ + ULocaleBundle *result = (ULocaleBundle*)uprv_malloc(sizeof(ULocaleBundle)); + UErrorCode status = U_ZERO_ERROR; + int32_t styleIdx; + + if(result == 0) + return 0; + + result->fLocale = (char*) uprv_malloc(strlen(bundle->fLocale) + 1); + if(result->fLocale == 0) { + uprv_free(result); + return 0; + } + + strcpy(result->fLocale, bundle->fLocale ); + + for (styleIdx = 0; styleIdx < ULOCALEBUNDLE_NUMBERFORMAT_COUNT; styleIdx++) { + status = U_ZERO_ERROR; + if (result->fNumberFormat[styleIdx]) { + result->fNumberFormat[styleIdx] = unum_clone(bundle->fNumberFormat[styleIdx], &status); + if (U_FAILURE(status)) { + result->fNumberFormat[styleIdx] = nullptr; + } + } + else { + result->fNumberFormat[styleIdx] = nullptr; + } + } + result->fDateFormat = (bundle->fDateFormat == 0 ? 0 : + udat_clone(bundle->fDateFormat, &status)); + result->fTimeFormat = (bundle->fTimeFormat == 0 ? 0 : + udat_clone(bundle->fTimeFormat, &status)); + + return result; +}*/ + +U_CAPI void +u_locbund_close(ULocaleBundle *bundle) +{ + int32_t styleIdx; + + uprv_free(bundle->fLocale); + + for (styleIdx = 0; styleIdx < ULOCALEBUNDLE_NUMBERFORMAT_COUNT; styleIdx++) { + if (bundle->fNumberFormat[styleIdx]) { + unum_close(bundle->fNumberFormat[styleIdx]); + } + } + + uprv_memset(bundle, 0, sizeof(ULocaleBundle)); +/* uprv_free(bundle);*/ +} + +U_CAPI UNumberFormat * +u_locbund_getNumberFormat(ULocaleBundle *bundle, UNumberFormatStyle style) +{ + UNumberFormat *formatAlias = nullptr; + if (style > UNUM_IGNORE) { + formatAlias = bundle->fNumberFormat[style-1]; + if (formatAlias == nullptr) { + if (bundle->isInvariantLocale) { + formatAlias = copyInvariantFormatter(bundle, style); + } + else { + UErrorCode status = U_ZERO_ERROR; + formatAlias = unum_open(style, nullptr, 0, bundle->fLocale, nullptr, &status); + if (U_FAILURE(status)) { + unum_close(formatAlias); + formatAlias = nullptr; + } + else { + bundle->fNumberFormat[style-1] = formatAlias; + } + } + } + } + return formatAlias; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/intl/icu/source/io/locbund.h b/intl/icu/source/io/locbund.h new file mode 100644 index 0000000000..5879e28f01 --- /dev/null +++ b/intl/icu/source/io/locbund.h @@ -0,0 +1,82 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1998-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* +* File locbund.h +* +* Modification History: +* +* Date Name Description +* 10/16/98 stephen Creation. +* 02/25/99 stephen Modified for new C API. +******************************************************************************* +*/ + +#ifndef LOCBUND_H +#define LOCBUND_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/unum.h" + +#define ULOCALEBUNDLE_NUMBERFORMAT_COUNT ((int32_t)UNUM_SPELLOUT) + +typedef struct ULocaleBundle { + char *fLocale; + + UNumberFormat *fNumberFormat[ULOCALEBUNDLE_NUMBERFORMAT_COUNT]; + UBool isInvariantLocale; +} ULocaleBundle; + + +/** + * Initialize a ULocaleBundle, initializing all formatters to 0. + * @param result A ULocaleBundle to initialize. + * @param loc The locale of the ULocaleBundle. + * @return A pointer to a ULocaleBundle, or 0 if loc was invalid. + */ +U_CAPI ULocaleBundle * +u_locbund_init(ULocaleBundle *result, const char *loc); + +/** + * Create a new ULocaleBundle, initializing all formatters to 0. + * @param loc The locale of the ULocaleBundle. + * @return A pointer to a ULocaleBundle, or 0 if loc was invalid. + */ +/*U_CAPI ULocaleBundle * +u_locbund_new(const char *loc);*/ + +/** + * Create a deep copy of this ULocaleBundle; + * @param bundle The ULocaleBundle to clone. + * @return A new ULocaleBundle. + */ +/*U_CAPI ULocaleBundle * +u_locbund_clone(const ULocaleBundle *bundle);*/ + +/** + * Delete the specified ULocaleBundle, freeing all associated memory. + * @param bundle The ULocaleBundle to delete + */ +U_CAPI void +u_locbund_close(ULocaleBundle *bundle); + +/** + * Get the NumberFormat used to format and parse numbers in a ULocaleBundle. + * @param bundle The ULocaleBundle to use + * @return A pointer to the NumberFormat used for number formatting and parsing. + */ +U_CAPI UNumberFormat * +u_locbund_getNumberFormat(ULocaleBundle *bundle, UNumberFormatStyle style); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/io/sources.txt b/intl/icu/source/io/sources.txt new file mode 100644 index 0000000000..733104399f --- /dev/null +++ b/intl/icu/source/io/sources.txt @@ -0,0 +1,12 @@ +locbund.cpp +sprintf.cpp +sscanf.cpp +ucln_io.cpp +ufile.cpp +ufmt_cmn.cpp +uprintf.cpp +uprntf_p.cpp +uscanf.cpp +uscanf_p.cpp +ustdio.cpp +ustream.cpp diff --git a/intl/icu/source/io/sprintf.cpp b/intl/icu/source/io/sprintf.cpp new file mode 100644 index 0000000000..48d9bcc27c --- /dev/null +++ b/intl/icu/source/io/sprintf.cpp @@ -0,0 +1,274 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File sprintf.c +* +* Modification History: +* +* Date Name Description +* 02/08/2001 george Creation. Copied from uprintf.c +* 03/27/2002 Mark Schneckloth Many fixes regarding alignment, null termination +* (mschneckloth@atomz.com) and other various problems. +* 08/07/2003 george Reunify printf implementations +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/ustdio.h" +#include "unicode/ustring.h" +#include "unicode/putil.h" + +#include "uprintf.h" +#include "locbund.h" + +#include "cmemory.h" +#include + +/* u_minstrncpy copies the minimum number of code units of (count or output->available) */ +static int32_t +u_sprintf_write(void *context, + const char16_t *str, + int32_t count) +{ + u_localized_print_string *output = (u_localized_print_string *)context; + + /* just calculating buffer size */ + if (output->str == 0) { + return count; + } + + int32_t size = ufmt_min(count, output->available); + + u_strncpy(output->str + (output->len - output->available), str, size); + output->available -= size; + return size; +} + +static int32_t +u_sprintf_pad_and_justify(void *context, + const u_printf_spec_info *info, + const char16_t *result, + int32_t resultLen) +{ + u_localized_print_string *output = (u_localized_print_string *)context; + int32_t written = 0; + int32_t lengthOfResult = resultLen; + + /* just calculating buffer size */ + if (output->str == 0 && + info->fWidth != -1 && resultLen < info->fWidth) { + return info->fWidth; + } + + resultLen = ufmt_min(resultLen, output->available); + + /* pad and justify, if needed */ + if(info->fWidth != -1 && resultLen < info->fWidth) { + int32_t paddingLeft = info->fWidth - resultLen; + int32_t outputPos = output->len - output->available; + + if (paddingLeft + resultLen > output->available) { + paddingLeft = output->available - resultLen; + if (paddingLeft < 0) { + paddingLeft = 0; + } + /* paddingLeft = output->available - resultLen;*/ + } + written += paddingLeft; + + /* left justify */ + if(info->fLeft) { + written += u_sprintf_write(output, result, resultLen); + u_memset(&output->str[outputPos + resultLen], info->fPadChar, paddingLeft); + output->available -= paddingLeft; + } + /* right justify */ + else { + u_memset(&output->str[outputPos], info->fPadChar, paddingLeft); + output->available -= paddingLeft; + written += u_sprintf_write(output, result, resultLen); + } + } + /* just write the formatted output */ + else { + written = u_sprintf_write(output, result, resultLen); + } + + if (written >= 0 && lengthOfResult > written) { + return lengthOfResult; + } + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_sprintf(char16_t *buffer, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t written; + + va_start(ap, patternSpecification); + written = u_vsnprintf(buffer, INT32_MAX, patternSpecification, ap); + va_end(ap); + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_sprintf_u(char16_t *buffer, + const char16_t *patternSpecification, + ... ) +{ + va_list ap; + int32_t written; + + va_start(ap, patternSpecification); + written = u_vsnprintf_u(buffer, INT32_MAX, patternSpecification, ap); + va_end(ap); + + return written; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsprintf(char16_t *buffer, + const char *patternSpecification, + va_list ap) +{ + return u_vsnprintf(buffer, INT32_MAX, patternSpecification, ap); +} + +U_CAPI int32_t U_EXPORT2 +u_snprintf(char16_t *buffer, + int32_t count, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t written; + + va_start(ap, patternSpecification); + written = u_vsnprintf(buffer, count, patternSpecification, ap); + va_end(ap); + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_snprintf_u(char16_t *buffer, + int32_t count, + const char16_t *patternSpecification, + ... ) +{ + va_list ap; + int32_t written; + + va_start(ap, patternSpecification); + written = u_vsnprintf_u(buffer, count, patternSpecification, ap); + va_end(ap); + + return written; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsnprintf(char16_t *buffer, + int32_t count, + const char *patternSpecification, + va_list ap) +{ + int32_t written; + char16_t *pattern; + char16_t patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)strlen(patternSpecification) + 1; + + /* convert from the default codepage to Unicode */ + if (size >= (int32_t)MAX_UCHAR_BUFFER_SIZE(patBuffer)) { + pattern = (char16_t *)uprv_malloc(size * sizeof(char16_t)); + if(pattern == 0) { + return 0; + } + } + else { + pattern = patBuffer; + } + u_charsToUChars(patternSpecification, pattern, size); + + /* do the work */ + written = u_vsnprintf_u(buffer, count, pattern, ap); + + /* clean up */ + if (pattern != patBuffer) { + uprv_free(pattern); + } + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_vsprintf_u(char16_t *buffer, + const char16_t *patternSpecification, + va_list ap) +{ + return u_vsnprintf_u(buffer, INT32_MAX, patternSpecification, ap); +} + +static const u_printf_stream_handler g_sprintf_stream_handler = { + u_sprintf_write, + u_sprintf_pad_and_justify +}; + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsnprintf_u(char16_t *buffer, + int32_t count, + const char16_t *patternSpecification, + va_list ap) +{ + int32_t written = 0; /* haven't written anything yet */ + int32_t result = 0; /* test the return value of u_printf_parse */ + + u_localized_print_string outStr; + + if (count < 0) { + count = INT32_MAX; + } + + outStr.str = buffer; + outStr.len = count; + outStr.available = count; + + if(u_locbund_init(&outStr.fBundle, "en_US_POSIX") == 0) { + return 0; + } + + /* parse and print the whole format string */ + result = u_printf_parse(&g_sprintf_stream_handler, patternSpecification, &outStr, &outStr, &outStr.fBundle, &written, ap); + + /* Terminate the buffer, if there's room. */ + if (outStr.available > 0) { + buffer[outStr.len - outStr.available] = 0x0000; + } + + /* Release the cloned bundle, if we cloned it. */ + u_locbund_close(&outStr.fBundle); + + /* parsing error */ + if (result < 0) { + return result; + } + /* return # of UChars written */ + return written; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/intl/icu/source/io/sscanf.cpp b/intl/icu/source/io/sscanf.cpp new file mode 100644 index 0000000000..8940127eb1 --- /dev/null +++ b/intl/icu/source/io/sscanf.cpp @@ -0,0 +1,130 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File sscanf.c +* +* Modification History: +* +* Date Name Description +* 02/08/00 george Creation. Copied from uscanf.c +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "unicode/ustdio.h" +#include "unicode/ustring.h" +#include "uscanf.h" +#include "ufile.h" +#include "ufmt_cmn.h" + +#include "cmemory.h" +#include "cstring.h" + + +U_CAPI int32_t U_EXPORT2 +u_sscanf(const char16_t *buffer, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t converted; + + va_start(ap, patternSpecification); + converted = u_vsscanf(buffer, patternSpecification, ap); + va_end(ap); + + return converted; +} + +U_CAPI int32_t U_EXPORT2 +u_sscanf_u(const char16_t *buffer, + const char16_t *patternSpecification, + ... ) +{ + va_list ap; + int32_t converted; + + va_start(ap, patternSpecification); + converted = u_vsscanf_u(buffer, patternSpecification, ap); + va_end(ap); + + return converted; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsscanf(const char16_t *buffer, + const char *patternSpecification, + va_list ap) +{ + int32_t converted; + char16_t *pattern; + char16_t patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)uprv_strlen(patternSpecification) + 1; + + /* convert from the default codepage to Unicode */ + if (size >= (int32_t)MAX_UCHAR_BUFFER_SIZE(patBuffer)) { + pattern = (char16_t *)uprv_malloc(size * sizeof(char16_t)); + if(pattern == 0) { + return 0; + } + } + else { + pattern = patBuffer; + } + u_charsToUChars(patternSpecification, pattern, size); + + /* do the work */ + converted = u_vsscanf_u(buffer, pattern, ap); + + /* clean up */ + if (pattern != patBuffer) { + uprv_free(pattern); + } + + return converted; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsscanf_u(const char16_t *buffer, + const char16_t *patternSpecification, + va_list ap) +{ + int32_t converted; + UFILE inStr; + + inStr.fConverter = nullptr; + inStr.fFile = nullptr; + inStr.fOwnFile = false; +#if !UCONFIG_NO_TRANSLITERATION + inStr.fTranslit = nullptr; +#endif + inStr.fUCBuffer[0] = 0; + inStr.str.fBuffer = (char16_t *)buffer; + inStr.str.fPos = (char16_t *)buffer; + inStr.str.fLimit = buffer + u_strlen(buffer); + + if(u_locbund_init(&inStr.str.fBundle, "en_US_POSIX") == 0) { + return 0; + } + + converted = u_scanf_parse(&inStr, patternSpecification, ap); + + u_locbund_close(&inStr.str.fBundle); + + /* return # of items converted */ + return converted; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/intl/icu/source/io/ucln_io.cpp b/intl/icu/source/io/ucln_io.cpp new file mode 100644 index 0000000000..e2ddb35870 --- /dev/null +++ b/intl/icu/source/io/ucln_io.cpp @@ -0,0 +1,71 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* * +* Copyright (C) 2001-2014, International Business Machines * +* Corporation and others. All Rights Reserved. * +* * +****************************************************************************** +* file name: ucln_io.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2006August11 +* created by: George Rhoten +*/ + +#include "unicode/utypes.h" +#include "mutex.h" +#include "ucln.h" +#include "ucln_io.h" +#include "uassert.h" + +#ifndef U_IO_IMPLEMENTATION +#error U_IO_IMPLEMENTATION not set - must be set for all ICU source files in io/ - see https://unicode-org.github.io/icu/userguide/howtouseicu +#endif + + +/** Auto-client */ +#define UCLN_TYPE UCLN_IO +#include "ucln_imp.h" + +/* Leave this copyright notice here! It needs to go somewhere in this library. */ +static const char copyright[] = U_COPYRIGHT_STRING; + +static cleanupFunc *gCleanupFunctions[UCLN_IO_COUNT]; + +static UBool U_CALLCONV io_cleanup() +{ + int32_t libType = UCLN_IO_START; + + (void)copyright; // Suppress unused variable warning. + while (++libType_file) +#define fileno(__F) _fileno(__F) +#endif + +#include "locmap.h" +#include "unicode/ustdio.h" + +#if !UCONFIG_NO_CONVERSION + +#include + +#include "ufile.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" +#include "unicode/unistr.h" +#include "cstring.h" +#include "cmemory.h" + +#if U_PLATFORM_USES_ONLY_WIN32_API && !defined(fileno) +/* We will just create an alias to Microsoft's implementation, + which is prefixed with _ as they deprecated non-ansi-standard POSIX function names. + https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/posix-fileno?view=msvc-170 +*/ +#define fileno _fileno +#endif + +static UFILE* +finit_owner(FILE *f, + const char *locale, + const char *codepage, + UBool takeOwnership + ) +{ + UErrorCode status = U_ZERO_ERROR; + UFILE *result; + if(f == nullptr) { + return 0; + } + result = (UFILE*) uprv_malloc(sizeof(UFILE)); + if(result == nullptr) { + return 0; + } + + uprv_memset(result, 0, sizeof(UFILE)); + result->fFileno = fileno(f); + result->fFile = f; + + result->str.fBuffer = result->fUCBuffer; + result->str.fPos = result->fUCBuffer; + result->str.fLimit = result->fUCBuffer; + +#if !UCONFIG_NO_FORMATTING + /* if locale is 0, use the default */ + if(u_locbund_init(&result->str.fBundle, locale) == 0) { + /* DO NOT FCLOSE HERE! */ + uprv_free(result); + return 0; + } +#endif + + /* If the codepage is not "" use the ucnv_open default behavior */ + if(codepage == nullptr || *codepage != '\0') { + result->fConverter = ucnv_open(codepage, &status); + } + /* else result->fConverter is already memset'd to nullptr. */ + + if(U_SUCCESS(status)) { + result->fOwnFile = takeOwnership; + } + else { +#if !UCONFIG_NO_FORMATTING + u_locbund_close(&result->str.fBundle); +#endif + /* DO NOT fclose here!!!!!! */ + uprv_free(result); + result = nullptr; + } + + return result; +} + +U_CAPI UFILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_finit(FILE *f, + const char *locale, + const char *codepage) +{ + return finit_owner(f, locale, codepage, false); +} + +U_CAPI UFILE* U_EXPORT2 +u_fadopt(FILE *f, + const char *locale, + const char *codepage) +{ + return finit_owner(f, locale, codepage, true); +} + +U_CAPI UFILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fopen(const char *filename, + const char *perm, + const char *locale, + const char *codepage) +{ + UFILE *result; + FILE *systemFile = fopen(filename, perm); + if(systemFile == 0) { + return 0; + } + + result = finit_owner(systemFile, locale, codepage, true); + + if (!result) { + /* Something bad happened. + Maybe the converter couldn't be opened. */ + fclose(systemFile); + } + + return result; /* not a file leak */ +} + +// FILENAME_BUF_MAX represents the largest size that we are willing to use for a +// stack-allocated buffer to contain a file name or path. If PATH_MAX (POSIX) or MAX_PATH +// (Windows) are defined and are smaller than this we will use their defined value; +// otherwise, we will use FILENAME_BUF_MAX for the stack-allocated buffer, and dynamically +// allocate a buffer for any file name or path that is that length or longer. +#define FILENAME_BUF_MAX 296 +#if defined PATH_MAX && PATH_MAX < FILENAME_BUF_MAX +#define FILENAME_BUF_CAPACITY PATH_MAX +#elif defined MAX_PATH && MAX_PATH < FILENAME_BUF_MAX +#define FILENAME_BUF_CAPACITY MAX_PATH +#else +#define FILENAME_BUF_CAPACITY FILENAME_BUF_MAX +#endif + +U_CAPI UFILE* U_EXPORT2 +u_fopen_u(const char16_t *filename, + const char *perm, + const char *locale, + const char *codepage) +{ + UFILE *result; + char buffer[FILENAME_BUF_CAPACITY]; + char *filenameBuffer = buffer; + + icu::UnicodeString filenameString(true, filename, -1); // readonly aliasing, does not allocate memory + // extract with conversion to platform default codepage, return full length (not including 0 termination) + int32_t filenameLength = filenameString.extract(0, filenameString.length(), filenameBuffer, FILENAME_BUF_CAPACITY); + if (filenameLength >= FILENAME_BUF_CAPACITY) { // could not fit (with zero termination) in buffer + filenameBuffer = static_cast(uprv_malloc(++filenameLength)); // add one for zero termination + if (!filenameBuffer) { + return nullptr; + } + filenameString.extract(0, filenameString.length(), filenameBuffer, filenameLength); + } + + result = u_fopen(filenameBuffer, perm, locale, codepage); +#if U_PLATFORM_USES_ONLY_WIN32_API + /* Try Windows API _wfopen if the above fails. */ + if (!result) { + // TODO: test this code path, including wperm. + wchar_t wperm[40] = {}; + size_t retVal; + mbstowcs_s(&retVal, wperm, UPRV_LENGTHOF(wperm), perm, _TRUNCATE); + FILE *systemFile = _wfopen(reinterpret_cast(filename), wperm); // may return nullptr for long filename + if (systemFile) { + result = finit_owner(systemFile, locale, codepage, true); + } + if (!result && systemFile) { + /* Something bad happened. + Maybe the converter couldn't be opened. + Bu do not fclose(systemFile) if systemFile is nullptr. */ + fclose(systemFile); + } + } +#endif + if (filenameBuffer != buffer) { + uprv_free(filenameBuffer); + } + return result; /* not a file leak */ +} + + +U_CAPI UFILE* U_EXPORT2 +u_fstropen(char16_t *stringBuf, + int32_t capacity, + const char *locale) +{ + UFILE *result; + + if (capacity < 0) { + return nullptr; + } + + result = (UFILE*) uprv_malloc(sizeof(UFILE)); + /* Null pointer test */ + if (result == nullptr) { + return nullptr; /* Just get out. */ + } + uprv_memset(result, 0, sizeof(UFILE)); + result->str.fBuffer = stringBuf; + result->str.fPos = stringBuf; + result->str.fLimit = stringBuf+capacity; + +#if !UCONFIG_NO_FORMATTING + /* if locale is 0, use the default */ + if(u_locbund_init(&result->str.fBundle, locale) == 0) { + /* DO NOT FCLOSE HERE! */ + uprv_free(result); + return 0; + } +#endif + + return result; +} + +U_CAPI UBool U_EXPORT2 +u_feof(UFILE *f) +{ + UBool endOfBuffer; + if (f == nullptr) { + return true; + } + endOfBuffer = (UBool)(f->str.fPos >= f->str.fLimit); + if (f->fFile != nullptr) { + return endOfBuffer && feof(f->fFile); + } + return endOfBuffer; +} + +U_CAPI void U_EXPORT2 +u_fflush(UFILE *file) +{ + ufile_flush_translit(file); + ufile_flush_io(file); + if (file->fFile) { + fflush(file->fFile); + } + else if (file->str.fPos < file->str.fLimit) { + *(file->str.fPos++) = 0; + } + /* TODO: flush input */ +} + +U_CAPI void +u_frewind(UFILE *file) +{ + u_fflush(file); + ucnv_reset(file->fConverter); + if (file->fFile) { + rewind(file->fFile); + file->str.fLimit = file->fUCBuffer; + file->str.fPos = file->fUCBuffer; + } + else { + file->str.fPos = file->str.fBuffer; + } +} + +U_CAPI void U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fclose(UFILE *file) +{ + if (file) { + u_fflush(file); + ufile_close_translit(file); + + if(file->fOwnFile) + fclose(file->fFile); + +#if !UCONFIG_NO_FORMATTING + u_locbund_close(&file->str.fBundle); +#endif + + ucnv_close(file->fConverter); + uprv_free(file); + } +} + +U_CAPI FILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetfile( UFILE *f) +{ + return f->fFile; +} + +#if !UCONFIG_NO_FORMATTING + +U_CAPI const char* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetlocale( UFILE *file) +{ + return file->str.fBundle.fLocale; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fsetlocale(UFILE *file, + const char *locale) +{ + u_locbund_close(&file->str.fBundle); + + return u_locbund_init(&file->str.fBundle, locale) == 0 ? -1 : 0; +} + +#endif + +U_CAPI const char* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetcodepage(UFILE *file) +{ + UErrorCode status = U_ZERO_ERROR; + const char *codepage = nullptr; + + if (file->fConverter) { + codepage = ucnv_getName(file->fConverter, &status); + if(U_FAILURE(status)) + return 0; + } + return codepage; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fsetcodepage( const char *codepage, + UFILE *file) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t retVal = -1; + + /* We use the normal default codepage for this system, and not the one for the locale. */ + if ((file->str.fPos == file->str.fBuffer) && (file->str.fLimit == file->str.fBuffer)) { + ucnv_close(file->fConverter); + file->fConverter = ucnv_open(codepage, &status); + if(U_SUCCESS(status)) { + retVal = 0; + } + } + return retVal; +} + + +U_CAPI UConverter * U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetConverter(UFILE *file) +{ + return file->fConverter; +} +#if !UCONFIG_NO_FORMATTING +U_CAPI const UNumberFormat* U_EXPORT2 u_fgetNumberFormat(UFILE *file) +{ + return u_locbund_getNumberFormat(&file->str.fBundle, UNUM_DECIMAL); +} +#endif + +#endif diff --git a/intl/icu/source/io/ufile.h b/intl/icu/source/io/ufile.h new file mode 100644 index 0000000000..88fa40911e --- /dev/null +++ b/intl/icu/source/io/ufile.h @@ -0,0 +1,140 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * + * Copyright (C) 1998-2014, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + * + * File ufile.h + * + * Modification History: + * + * Date Name Description + * 12/01/98 stephen Creation. + * 03/12/99 stephen Modified for new C API. + ******************************************************************************* + */ + +#ifndef UFILE_H +#define UFILE_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include + +#include "unicode/ucnv.h" +#include "unicode/utrans.h" +#include "locbund.h" + +/* The buffer size for fromUnicode calls */ +#define UFILE_CHARBUFFER_SIZE 1024 + +/* The buffer size for toUnicode calls */ +#define UFILE_UCHARBUFFER_SIZE 1024 + +/* A UFILE */ + +#if !UCONFIG_NO_TRANSLITERATION + +typedef struct { + UChar *buffer; /* Beginning of buffer */ + int32_t capacity; /* Capacity of buffer */ + int32_t pos; /* Beginning of untranslitted data */ + int32_t length; /* Length *from beginning of buffer* of untranslitted data */ + UTransliterator *translit; +} UFILETranslitBuffer; + +#endif + +typedef struct u_localized_string { + UChar *fPos; /* current pos in fUCBuffer */ + const UChar *fLimit; /* data limit in fUCBuffer */ + UChar *fBuffer; /* Place to write the string */ + +#if !UCONFIG_NO_FORMATTING + ULocaleBundle fBundle; /* formatters */ +#endif +} u_localized_string; + +struct UFILE { +#if !UCONFIG_NO_TRANSLITERATION + UFILETranslitBuffer *fTranslit; +#endif + + FILE *fFile; /* the actual filesystem interface */ + + UConverter *fConverter; /* for codeset conversion */ + + u_localized_string str; /* struct to handle strings for number formatting */ + + UChar fUCBuffer[UFILE_UCHARBUFFER_SIZE];/* buffer used for toUnicode */ + + UBool fOwnFile; /* true if fFile should be closed */ + + int32_t fFileno; /* File number. Useful to determine if it's stdin. */ +}; + +/** + * Like u_file_write but takes a flush parameter + */ +U_CFUNC int32_t U_EXPORT2 +u_file_write_flush( const UChar *chars, + int32_t count, + UFILE *f, + UBool flushIO, + UBool flushTranslit); + +/** + * Fill a UFILE's buffer with converted codepage data. + * @param f The UFILE containing the buffer to fill. + */ +void +ufile_fill_uchar_buffer(UFILE *f); + +/** + * Get one code unit and detect whether the end of file has been reached. + * @param f The UFILE containing the characters. + * @param ch The read in character + * @return true if the character is valid, or false when EOF has been detected + */ +U_CFUNC UBool U_EXPORT2 +ufile_getch(UFILE *f, UChar *ch); + +/** + * Get one character and detect whether the end of file has been reached. + * @param f The UFILE containing the characters. + * @param ch The read in character + * @return true if the character is valid, or false when EOF has been detected + */ +U_CFUNC UBool U_EXPORT2 +ufile_getch32(UFILE *f, UChar32 *ch); + +/** + * Close out the transliterator and flush any data therein. + * @param f flu + */ +void +ufile_close_translit(UFILE *f); + +/** + * Flush the buffer in the transliterator + * @param f UFile to flush + */ +void +ufile_flush_translit(UFILE *f); + +/** + * Flush the IO buffer + * @param f UFile to flush + */ +void +ufile_flush_io(UFILE *f); + + +#endif +#endif diff --git a/intl/icu/source/io/ufmt_cmn.cpp b/intl/icu/source/io/ufmt_cmn.cpp new file mode 100644 index 0000000000..3d8f6413d8 --- /dev/null +++ b/intl/icu/source/io/ufmt_cmn.cpp @@ -0,0 +1,259 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File ufmt_cmn.c +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +* 03/15/99 stephen Added defaultCPToUnicode, unicodeToDefaultCP +* 07/19/99 stephen Fixed bug in defaultCPToUnicode +****************************************************************************** +*/ + +#include "cstring.h" +#include "cmemory.h" +#include "ufmt_cmn.h" +#include "unicode/uchar.h" +#include "unicode/ucnv.h" +#include "ustr_cnv.h" + +#if !UCONFIG_NO_CONVERSION + + +#define DIGIT_0 0x0030 +#define DIGIT_9 0x0039 +#define LOWERCASE_A 0x0061 +#define UPPERCASE_A 0x0041 +#define LOWERCASE_Z 0x007A +#define UPPERCASE_Z 0x005A + +int +ufmt_digitvalue(char16_t c) +{ + if( ((c>=DIGIT_0)&&(c<=DIGIT_9)) || + ((c>=LOWERCASE_A)&&(c<=LOWERCASE_Z)) || + ((c>=UPPERCASE_A)&&(c<=UPPERCASE_Z)) ) + { + return c - DIGIT_0 - (c >= 0x0041 ? (c >= 0x0061 ? 39 : 7) : 0); + } + else + { + return -1; + } +} + +UBool +ufmt_isdigit(char16_t c, + int32_t radix) +{ + int digitVal = ufmt_digitvalue(c); + + return (UBool)(digitVal < radix && digitVal >= 0); +} + +#define TO_UC_DIGIT(a) a <= 9 ? (DIGIT_0 + a) : (0x0037 + a) +#define TO_LC_DIGIT(a) a <= 9 ? (DIGIT_0 + a) : (0x0057 + a) + +void +ufmt_64tou(char16_t *buffer, + int32_t *len, + uint64_t value, + uint8_t radix, + UBool uselower, + int32_t minDigits) +{ + int32_t length = 0; + uint32_t digit; + char16_t *left, *right, temp; + + do { + digit = (uint32_t)(value % radix); + value = value / radix; + buffer[length++] = (char16_t)(uselower ? TO_LC_DIGIT(digit) + : TO_UC_DIGIT(digit)); + } while(value); + + /* pad with zeroes to make it minDigits long */ + if(minDigits != -1 && length < minDigits) { + while(length < minDigits && length < *len) + buffer[length++] = DIGIT_0; /*zero padding */ + } + + /* reverse the buffer */ + left = buffer; + right = buffer + length; + while(left < --right) { + temp = *left; + *left++ = *right; + *right = temp; + } + + *len = length; +} + +void +ufmt_ptou(char16_t *buffer, + int32_t *len, + void *value, + UBool uselower) +{ + int32_t i; + int32_t length = 0; + uint8_t *ptrIdx = (uint8_t *)&value; + +#if U_IS_BIG_ENDIAN + for (i = 0; i < (int32_t)sizeof(void *); i++) +#else + for (i = (int32_t)sizeof(void *)-1; i >= 0 ; i--) +#endif + { + uint8_t byteVal = ptrIdx[i]; + uint16_t firstNibble = (uint16_t)(byteVal>>4); + uint16_t secondNibble = (uint16_t)(byteVal&0xF); + if (uselower) { + buffer[length++]=TO_LC_DIGIT(firstNibble); + buffer[length++]=TO_LC_DIGIT(secondNibble); + } + else { + buffer[length++]=TO_UC_DIGIT(firstNibble); + buffer[length++]=TO_UC_DIGIT(secondNibble); + } + } + + *len = length; +} + +int64_t +ufmt_uto64(const char16_t *buffer, + int32_t *len, + int8_t radix) +{ + const char16_t *limit; + int32_t count; + uint64_t result; + + + /* initialize parameters */ + limit = buffer + *len; + count = 0; + result = 0; + + /* iterate through buffer */ + while(ufmt_isdigit(*buffer, radix) && buffer < limit) { + + /* read the next digit */ + result *= radix; + result += ufmt_digitvalue(*buffer++); + + /* increment our count */ + ++count; + } + + *len = count; + return static_cast(result); +} + +#define NIBBLE_PER_BYTE 2 +void * +ufmt_utop(const char16_t *buffer, + int32_t *len) +{ + int32_t count, resultIdx, incVal, offset; + /* This union allows the pointer to be written as an array. */ + union { + void *ptr; + uint8_t bytes[sizeof(void*)]; + } result; + + /* initialize variables */ + count = 0; + offset = 0; + result.ptr = nullptr; + + /* Skip the leading zeros */ + while(buffer[count] == DIGIT_0 || u_isspace(buffer[count])) { + count++; + offset++; + } + + /* iterate through buffer, stop when you hit the end */ + while(count < *len && ufmt_isdigit(buffer[count], 16)) { + /* increment the count consumed */ + ++count; + } + + /* detect overflow */ + if (count - offset > (int32_t)(sizeof(void*)*NIBBLE_PER_BYTE)) { + offset = count - (int32_t)(sizeof(void*)*NIBBLE_PER_BYTE); + } + + /* Initialize the direction of the input */ +#if U_IS_BIG_ENDIAN + incVal = -1; + resultIdx = (int32_t)(sizeof(void*) - 1); +#else + incVal = 1; + resultIdx = 0; +#endif + /* Write how much was consumed. */ + *len = count; + while(--count >= offset) { + /* Get the first nibble of the byte */ + uint8_t byte = (uint8_t)ufmt_digitvalue(buffer[count]); + + if (count > offset) { + /* Get the second nibble of the byte when available */ + byte = (uint8_t)(byte + (ufmt_digitvalue(buffer[--count]) << 4)); + } + /* Write the byte into the array */ + result.bytes[resultIdx] = byte; + resultIdx += incVal; + } + + return result.ptr; +} + +char16_t* +ufmt_defaultCPToUnicode(const char *s, int32_t sSize, + char16_t *target, int32_t tSize) +{ + char16_t *alias; + UErrorCode status = U_ZERO_ERROR; + UConverter *defConverter = u_getDefaultConverter(&status); + + if(U_FAILURE(status) || defConverter == 0) + return 0; + + if(sSize <= 0) { + sSize = static_cast(uprv_strlen(s)) + 1; + } + + /* perform the conversion in one swoop */ + if(target != 0) { + + alias = target; + ucnv_toUnicode(defConverter, &alias, alias + tSize, &s, s + sSize - 1, + nullptr, true, &status); + + + /* add the null terminator */ + *alias = 0x0000; + } + + u_releaseDefaultConverter(defConverter); + + return target; +} + + +#endif diff --git a/intl/icu/source/io/ufmt_cmn.h b/intl/icu/source/io/ufmt_cmn.h new file mode 100644 index 0000000000..d040fdce5a --- /dev/null +++ b/intl/icu/source/io/ufmt_cmn.h @@ -0,0 +1,162 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File ufmt_cmn.h +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +* 03/15/99 stephen Added defaultCPToUnicode, unicodeToDefaultCP +****************************************************************************** +*/ + +#ifndef UFMT_CMN_H +#define UFMT_CMN_H + +#include "unicode/utypes.h" +#include "unicode/utf16.h" + +#define UFMT_DEFAULT_BUFFER_SIZE 128 +#define MAX_UCHAR_BUFFER_SIZE(buffer) ((int32_t)(sizeof(buffer)/(U16_MAX_LENGTH*sizeof(UChar)))) +#define MAX_UCHAR_BUFFER_NEEDED(strLen) ((strLen+1)*U16_MAX_LENGTH*sizeof(UChar)) + +/** + * Enum representing the possible argument types for uprintf/uscanf + */ +typedef enum ufmt_type_info { + ufmt_empty = 0, + ufmt_simple_percent, /* %% do nothing */ + ufmt_count, /* special flag for count */ + ufmt_int, /* int */ + ufmt_char, /* int, cast to char */ + ufmt_string, /* char* */ + ufmt_pointer, /* void* */ + ufmt_float, /* float */ + ufmt_double, /* double */ + ufmt_uchar, /* int, cast to UChar */ + ufmt_ustring /* UChar* */ + /*ufmt_wchar,*/ /* wchar_t */ + /*ufmt_wstring,*/ /* wchar_t* */ + /*ufmt_date,*/ /* Date */ + /*ufmt_last*/ +} ufmt_type_info; + +/** + * Union representing a uprintf/uscanf argument + */ +typedef union ufmt_args { + int64_t int64Value; /* int, UChar */ + float floatValue; /* float */ + double doubleValue; /* double */ + void *ptrValue; /* any pointer - void*, char*, wchar_t*, UChar* */ + /*wchar_t wcharValue;*/ /* wchar_t */ /* TODO: Should wchar_t be used? */ + /*UDate dateValue;*/ /* Date */ +} ufmt_args; + +/** + * Macro for determining the minimum of two numbers. + * @param a An integer + * @param b An integer + * @return a if a < b, b otherwise + */ +#define ufmt_min(a,b) ((a) < (b) ? (a) : (b)) + +/** + * Convert a UChar in hex radix to an integer value. + * @param c The UChar to convert. + * @return The integer value of c. + */ +int +ufmt_digitvalue(UChar c); + +/** + * Determine if a UChar is a digit for a specified radix. + * @param c The UChar to check. + * @param radix The desired radix. + * @return true if c is a digit in radix, false otherwise. + */ +UBool +ufmt_isdigit(UChar c, + int32_t radix); + +/** + * Convert an int64_t to a UChar* in a specified radix + * @param buffer The target buffer + * @param len On input, the size of buffer. On output, + * the number of UChars written to buffer. + * @param value The value to be converted + * @param radix The desired radix + * @param uselower true means lower case will be used, false means upper case + * @param minDigits The minimum number of digits for for the formatted number, + * which will be padded with zeroes. -1 means do not pad. + */ +void +ufmt_64tou(UChar *buffer, + int32_t *len, + uint64_t value, + uint8_t radix, + UBool uselower, + int32_t minDigits); + +/** + * It's like ufmt_64tou, but with a pointer. + * This functions avoids size constraints of 64-bit types. + * Pointers can be at 32-128 bits in size. + */ +void +ufmt_ptou(UChar *buffer, + int32_t *len, + void *value, + UBool uselower); + +/** + * Convert a UChar* in a specified radix to an int64_t. + * @param buffer The target buffer + * @param len On input, the size of buffer. On output, + * the number of UChars read from buffer. + * @param radix The desired radix + * @return The numeric value. + */ +int64_t +ufmt_uto64(const UChar *buffer, + int32_t *len, + int8_t radix); + +/** + * Convert a UChar* in a specified radix to a pointer, + * @param buffer The target buffer + * @param len On input, the size of buffer. On output, + * the number of UChars read from buffer. + * @param radix The desired radix + * @return The pointer value. + */ +void * +ufmt_utop(const UChar *buffer, + int32_t *len); + +/** + * Convert a string from the default codepage to Unicode. + * @param s The string to convert, in the default codepage. + * @param sSize The size of s to convert. + * @param target The buffer to convert to. + * @param tSize The size of target + * @return A pointer to a newly allocated converted version of s, or 0 + * on error. + */ +UChar* +ufmt_defaultCPToUnicode(const char *s, int32_t sSize, + UChar *target, int32_t tSize); + + + +#endif + diff --git a/intl/icu/source/io/unicode/ustdio.h b/intl/icu/source/io/unicode/ustdio.h new file mode 100644 index 0000000000..5aad6b9bbe --- /dev/null +++ b/intl/icu/source/io/unicode/ustdio.h @@ -0,0 +1,1021 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File ustdio.h +* +* Modification History: +* +* Date Name Description +* 10/16/98 stephen Creation. +* 11/06/98 stephen Modified per code review. +* 03/12/99 stephen Modified for new C API. +* 07/19/99 stephen Minor doc update. +* 02/01/01 george Added sprintf & sscanf with all of its variants +****************************************************************************** +*/ + +#ifndef USTDIO_H +#define USTDIO_H + +#include +#include + +#include "unicode/utypes.h" +#include "unicode/ucnv.h" +#include "unicode/utrans.h" +#include "unicode/unum.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_CONVERSION + +/* + TODO + The following is a small list as to what is currently wrong/suggestions for + ustdio. + + * Make sure that * in the scanf format specification works for all formats. + * Each UFILE takes up at least 2KB. + Look into adding setvbuf() for configurable buffers. + * This library does buffering. The OS should do this for us already. Check on + this, and remove it from this library, if this is the case. Double buffering + wastes a lot of time and space. + * Test stdin and stdout with the u_f* functions + * Testing should be done for reading and writing multi-byte encodings, + and make sure that a character that is contained across buffer boundaries + works even for incomplete characters. + * Make sure that the last character is flushed when the file/string is closed. + * snprintf should follow the C99 standard for the return value, which is + return the number of characters (excluding the trailing '\0') + which would have been written to the destination string regardless + of available space. This is like pre-flighting. + * Everything that uses %s should do what operator>> does for UnicodeString. + It should convert one byte at a time, and once a character is + converted then check to see if it's whitespace or in the scanset. + If it's whitespace or in the scanset, put all the bytes back (do nothing + for sprintf/sscanf). + * If bad string data is encountered, make sure that the function fails + without memory leaks and the unconvertable characters are valid + substitution or are escaped characters. + * u_fungetc() can't unget a character when it's at the beginning of the + internal conversion buffer. For example, read the buffer size # of + characters, and then ungetc to get the previous character that was + at the end of the last buffer. + * u_fflush() and u_fclose should return an int32_t like C99 functions. + 0 is returned if the operation was successful and EOF otherwise. + * u_fsettransliterator does not support U_READ side of transliteration. + * The format specifier should limit the size of a format or honor it in + order to prevent buffer overruns. (e.g. %256.256d). + * u_fread and u_fwrite don't exist. They're needed for reading and writing + data structures without any conversion. + * u_file_read and u_file_write are used for writing strings. u_fgets and + u_fputs or u_fread and u_fwrite should be used to do this. + * The width parameter for all scanf formats, including scanset, needs + better testing. This prevents buffer overflows. + * Figure out what is suppose to happen when a codepage is changed midstream. + Maybe a flush or a rewind are good enough. + * Make sure that a UFile opened with "rw" can be used after using + u_fflush with a u_frewind. + * scanf(%i) should detect what type of number to use. + * Add more testing of the alternate format, %# + * Look at newline handling of fputs/puts + * Think more about codeunit/codepoint error handling/support in %S,%s,%C,%c,%[] + * Complete the file documentation with proper doxygen formatting. + See http://oss.software.ibm.com/pipermail/icu/2003-July/005647.html +*/ + +/** + * \file + * \brief C API: Unicode stdio-like API + * + *

Unicode stdio-like C API

+ * + *

This API provides an stdio-like API wrapper around ICU's other + * formatting and parsing APIs. It is meant to ease the transition of adding + * Unicode support to a preexisting applications using stdio. The following + * is a small list of noticeable differences between stdio and ICU I/O's + * ustdio implementation.

+ * + *
    + *
  • Locale specific formatting and parsing is only done with file IO.
  • + *
  • u_fstropen can be used to simulate file IO with strings. + * This is similar to the iostream API, and it allows locale specific + * formatting and parsing to be used.
  • + *
  • This API provides uniform formatting and parsing behavior between + * platforms (unlike the standard stdio implementations found on various + * platforms).
  • + *
  • This API is better suited for text data handling than binary data + * handling when compared to the typical stdio implementation.
  • + *
  • You can specify a Transliterator while using the file IO.
  • + *
  • You can specify a file's codepage separately from the default + * system codepage.
  • + *
+ * + *

Formatting and Parsing Specification

+ * + * General printf format:
+ * %[format modifier][width][.precision][type modifier][format] + * + * General scanf format:
+ * %[*][format modifier][width][type modifier][format] + * + + + + + + + + + + + + + + + + + + + + + +
formatdefault
printf
type
default
scanf
type
description
%EdoublefloatScientific with an uppercase exponent
%edoublefloatScientific with a lowercase exponent
%GdoublefloatUse %E or %f for best format
%gdoublefloatUse %e or %f for best format
%fdoublefloatSimple floating point without the exponent
%Xint32_tint32_tustdio special uppercase hex radix formatting
%xint32_tint32_tustdio special lowercase hex radix formatting
%dint32_tint32_tDecimal format
%iint32_tint32_tSame as %d
%nint32_tint32_tcount (write the number of UTF-16 codeunits read/written)
%oint32_tint32_tustdio special octal radix formatting
%uuint32_tuint32_tDecimal format
%pvoid *void *Prints the pointer value
%schar *char *Use default converter or specified converter from fopen
%ccharcharUse default converter or specified converter from fopen
+When width is specified for scanf, this acts like a non-NULL-terminated char * string.
+By default, only one char is written.
%SUChar *UChar *Null terminated UTF-16 string
%CUCharUChar16-bit Unicode code unit
+When width is specified for scanf, this acts like a non-NULL-terminated UChar * string
+By default, only one codepoint is written.
%[] UChar *Null terminated UTF-16 string which contains the filtered set of characters specified by the UnicodeSet
%%  Show a percent sign
+ +Format modifiers + + + + + + + + + + + + + + + + + + + + + + +
modifierformatstypecomments
%h%d, %i, %o, %xint16_tshort format
%h%uuint16_tshort format
%hcchar(Unimplemented) Use invariant converter
%hschar *(Unimplemented) Use invariant converter
%hCchar(Unimplemented) 8-bit Unicode code unit
%hSchar *(Unimplemented) Null terminated UTF-8 string
%l%d, %i, %o, %xint32_tlong format (no effect)
%l%uuint32_tlong format (no effect)
%lcN/A(Unimplemented) Reserved for future implementation
%lsN/A(Unimplemented) Reserved for future implementation
%lCUChar32(Unimplemented) 32-bit Unicode code unit
%lSUChar32 *(Unimplemented) Null terminated UTF-32 string
%ll%d, %i, %o, %xint64_tlong long format
%ll%uuint64_t(Unimplemented) long long format
%-allN/ALeft justify
%+%d, %i, %o, %x, %e, %f, %g, %E, %GN/AAlways show the plus or minus sign. Needs data for plus sign.
% %d, %i, %o, %x, %e, %f, %g, %E, %GN/AInstead of a "+" output a blank character for positive numbers.
%#%d, %i, %o, %x, %e, %f, %g, %E, %GN/APrecede octal value with 0, hex with 0x and show the + decimal point for floats.
%nallN/AWidth of input/output. num is an actual number from 0 to + some large number.
%.n%e, %f, %g, %E, %F, %GN/ASignificant digits precision. num is an actual number from + 0 to some large number.
If * is used in printf, then the precision is passed in as an argument before the number to be formatted.
+ +printf modifier +%* int32_t Next argument after this one specifies the width + +scanf modifier +%* N/A This field is scanned, but not stored + +

If you are using this C API instead of the ustream.h API for C++, +you can use one of the following u_fprintf examples to display a UnicodeString.

+ +

+    UFILE *out = u_finit(stdout, NULL, NULL);
+    UnicodeString string1("string 1");
+    UnicodeString string2("string 2");
+    u_fprintf(out, "%S\n", string1.getTerminatedBuffer());
+    u_fprintf(out, "%.*S\n", string2.length(), string2.getBuffer());
+    u_fclose(out);
+
+ + */ + + +/** + * When an end of file is encountered, this value can be returned. + * @see u_fgetc + * @stable 3.0 + */ +#define U_EOF 0xFFFF + +/** Forward declaration of a Unicode-aware file @stable 3.0 */ +typedef struct UFILE UFILE; + +/** + * Enum for which direction of stream a transliterator applies to. + * @see u_fsettransliterator + * @stable ICU 3.0 + */ +typedef enum { + U_READ = 1, + U_WRITE = 2, + U_READWRITE =3 /* == (U_READ | U_WRITE) */ +} UFileDirection; + +/** + * Open a UFILE. + * A UFILE is a wrapper around a FILE* that is locale and codepage aware. + * That is, data written to a UFILE will be formatted using the conventions + * specified by that UFILE's Locale; this data will be in the character set + * specified by that UFILE's codepage. + * @param filename The name of the file to open. Must be 0-terminated. + * @param perm The read/write permission for the UFILE; one of "r", "w", "rw" + * @param locale The locale whose conventions will be used to format + * and parse output. If this parameter is NULL, the default locale will + * be used. + * @param codepage The codepage in which data will be written to and + * read from the file. If this parameter is NULL the system default codepage + * will be used. + * @return A new UFILE, or NULL if an error occurred. + * @stable ICU 3.0 + */ +U_CAPI UFILE* U_EXPORT2 +u_fopen(const char *filename, + const char *perm, + const char *locale, + const char *codepage); + +/** + * Open a UFILE with a UChar* filename + * A UFILE is a wrapper around a FILE* that is locale and codepage aware. + * That is, data written to a UFILE will be formatted using the conventions + * specified by that UFILE's Locale; this data will be in the character set + * specified by that UFILE's codepage. + * @param filename The name of the file to open. Must be 0-terminated. + * @param perm The read/write permission for the UFILE; one of "r", "w", "rw" + * @param locale The locale whose conventions will be used to format + * and parse output. If this parameter is NULL, the default locale will + * be used. + * @param codepage The codepage in which data will be written to and + * read from the file. If this parameter is NULL the system default codepage + * will be used. + * @return A new UFILE, or NULL if an error occurred. + * @stable ICU 54 + */ +U_CAPI UFILE* U_EXPORT2 +u_fopen_u(const UChar *filename, + const char *perm, + const char *locale, + const char *codepage); + +/** + * Open a UFILE on top of an existing FILE* stream. The FILE* stream + * ownership remains with the caller. To have the UFILE take over + * ownership and responsibility for the FILE* stream, use the + * function u_fadopt. + * @param f The FILE* to which this UFILE will attach and use. + * @param locale The locale whose conventions will be used to format + * and parse output. If this parameter is NULL, the default locale will + * be used. + * @param codepage The codepage in which data will be written to and + * read from the file. If this parameter is NULL, data will be written and + * read using the default codepage for locale, unless locale + * is NULL, in which case the system default codepage will be used. + * @return A new UFILE, or NULL if an error occurred. + * @stable ICU 3.0 + */ +U_CAPI UFILE* U_EXPORT2 +u_finit(FILE *f, + const char *locale, + const char *codepage); + +/** + * Open a UFILE on top of an existing FILE* stream. The FILE* stream + * ownership is transferred to the new UFILE. It will be closed when the + * UFILE is closed. + * @param f The FILE* which this UFILE will take ownership of. + * @param locale The locale whose conventions will be used to format + * and parse output. If this parameter is NULL, the default locale will + * be used. + * @param codepage The codepage in which data will be written to and + * read from the file. If this parameter is NULL, data will be written and + * read using the default codepage for locale, unless locale + * is NULL, in which case the system default codepage will be used. + * @return A new UFILE, or NULL if an error occurred. If an error occurs + * the ownership of the FILE* stream remains with the caller. + * @stable ICU 4.4 + */ +U_CAPI UFILE* U_EXPORT2 +u_fadopt(FILE *f, + const char *locale, + const char *codepage); + +/** + * Create a UFILE that can be used for localized formatting or parsing. + * The u_sprintf and u_sscanf functions do not read or write numbers for a + * specific locale. The ustdio.h file functions can be used on this UFILE. + * The string is usable once u_fclose or u_fflush has been called on the + * returned UFILE. + * @param stringBuf The string used for reading or writing. + * @param capacity The number of code units available for use in stringBuf + * @param locale The locale whose conventions will be used to format + * and parse output. If this parameter is NULL, the default locale will + * be used. + * @return A new UFILE, or NULL if an error occurred. + * @stable ICU 3.0 + */ +U_CAPI UFILE* U_EXPORT2 +u_fstropen(UChar *stringBuf, + int32_t capacity, + const char *locale); + +/** + * Close a UFILE. Implies u_fflush first. + * @param file The UFILE to close. + * @stable ICU 3.0 + * @see u_fflush + */ +U_CAPI void U_EXPORT2 +u_fclose(UFILE *file); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUFILEPointer + * "Smart pointer" class, closes a UFILE via u_fclose(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUFILEPointer, UFILE, u_fclose); + +U_NAMESPACE_END + +#endif + +/** + * Tests if the UFILE is at the end of the file stream. + * @param f The UFILE from which to read. + * @return Returns true after the first read operation that attempts to + * read past the end of the file. It returns false if the current position is + * not end of file. + * @stable ICU 3.0 +*/ +U_CAPI UBool U_EXPORT2 +u_feof(UFILE *f); + +/** + * Flush output of a UFILE. Implies a flush of + * converter/transliterator state. (That is, a logical break is + * made in the output stream - for example if a different type of + * output is desired.) The underlying OS level file is also flushed. + * Note that for a stateful encoding, the converter may write additional + * bytes to return the stream to default state. + * @param file The UFILE to flush. + * @stable ICU 3.0 + */ +U_CAPI void U_EXPORT2 +u_fflush(UFILE *file); + +/** + * Rewind the file pointer to the beginning of the file. + * @param file The UFILE to rewind. + * @stable ICU 3.0 + */ +U_CAPI void +u_frewind(UFILE *file); + +/** + * Get the FILE* associated with a UFILE. + * @param f The UFILE + * @return A FILE*, owned by the UFILE. (The FILE must not be modified or closed) + * @stable ICU 3.0 + */ +U_CAPI FILE* U_EXPORT2 +u_fgetfile(UFILE *f); + +#if !UCONFIG_NO_FORMATTING + +/** + * Get the locale whose conventions are used to format and parse output. + * This is the same locale passed in the preceding call tou_fsetlocale + * or u_fopen. + * @param file The UFILE to set. + * @return The locale whose conventions are used to format and parse output. + * @stable ICU 3.0 + */ +U_CAPI const char* U_EXPORT2 +u_fgetlocale(UFILE *file); + +/** + * Set the locale whose conventions will be used to format and parse output. + * @param locale The locale whose conventions will be used to format + * and parse output. + * @param file The UFILE to query. + * @return NULL if successful, otherwise a negative number. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_fsetlocale(UFILE *file, + const char *locale); + +#endif + +/** + * Get the codepage in which data is written to and read from the UFILE. + * This is the same codepage passed in the preceding call to + * u_fsetcodepage or u_fopen. + * @param file The UFILE to query. + * @return The codepage in which data is written to and read from the UFILE, + * or NULL if an error occurred. + * @stable ICU 3.0 + */ +U_CAPI const char* U_EXPORT2 +u_fgetcodepage(UFILE *file); + +/** + * Set the codepage in which data will be written to and read from the UFILE. + * All Unicode data written to the UFILE will be converted to this codepage + * before it is written to the underlying FILE*. It it generally a bad idea to + * mix codepages within a file. This should only be called right + * after opening the UFile, or after calling u_frewind. + * @param codepage The codepage in which data will be written to + * and read from the file. For example "latin-1" or "ibm-943". + * A value of NULL means the default codepage for the UFILE's current + * locale will be used. + * @param file The UFILE to set. + * @return 0 if successful, otherwise a negative number. + * @see u_frewind + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_fsetcodepage(const char *codepage, + UFILE *file); + + +/** + * Returns an alias to the converter being used for this file. + * @param f The UFILE to get the value from + * @return alias to the converter (The converter must not be modified or closed) + * @stable ICU 3.0 + */ +U_CAPI UConverter* U_EXPORT2 u_fgetConverter(UFILE *f); + +#if !UCONFIG_NO_FORMATTING +/** + * Returns an alias to the number formatter being used for this file. + * @param f The UFILE to get the value from + * @return alias to the number formatter (The formatter must not be modified or closed) + * @stable ICU 51 +*/ + U_CAPI const UNumberFormat* U_EXPORT2 u_fgetNumberFormat(UFILE *f); + +/* Output functions */ + +/** + * Write formatted data to stdout. + * @param patternSpecification A pattern specifying how u_printf will + * interpret the variable arguments received and format the data. + * @return The number of Unicode characters written to stdout + * @stable ICU 49 + */ +U_CAPI int32_t U_EXPORT2 +u_printf(const char *patternSpecification, + ... ); + +/** + * Write formatted data to a UFILE. + * @param f The UFILE to which to write. + * @param patternSpecification A pattern specifying how u_fprintf will + * interpret the variable arguments received and format the data. + * @return The number of Unicode characters written to f. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_fprintf(UFILE *f, + const char *patternSpecification, + ... ); + +/** + * Write formatted data to a UFILE. + * This is identical to u_fprintf, except that it will + * not call va_start and va_end. + * @param f The UFILE to which to write. + * @param patternSpecification A pattern specifying how u_fprintf will + * interpret the variable arguments received and format the data. + * @param ap The argument list to use. + * @return The number of Unicode characters written to f. + * @see u_fprintf + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vfprintf(UFILE *f, + const char *patternSpecification, + va_list ap); + +/** + * Write formatted data to stdout. + * @param patternSpecification A pattern specifying how u_printf_u will + * interpret the variable arguments received and format the data. + * @return The number of Unicode characters written to stdout + * @stable ICU 49 + */ +U_CAPI int32_t U_EXPORT2 +u_printf_u(const UChar *patternSpecification, + ... ); + +/** + * Get a UFILE for stdout. + * @return UFILE that writes to stdout + * @stable ICU 49 + */ +U_CAPI UFILE * U_EXPORT2 +u_get_stdout(void); + +/** + * Write formatted data to a UFILE. + * @param f The UFILE to which to write. + * @param patternSpecification A pattern specifying how u_fprintf will + * interpret the variable arguments received and format the data. + * @return The number of Unicode characters written to f. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_fprintf_u(UFILE *f, + const UChar *patternSpecification, + ... ); + +/** + * Write formatted data to a UFILE. + * This is identical to u_fprintf_u, except that it will + * not call va_start and va_end. + * @param f The UFILE to which to write. + * @param patternSpecification A pattern specifying how u_fprintf will + * interpret the variable arguments received and format the data. + * @param ap The argument list to use. + * @return The number of Unicode characters written to f. + * @see u_fprintf_u + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vfprintf_u(UFILE *f, + const UChar *patternSpecification, + va_list ap); +#endif +/** + * Write a Unicode to a UFILE. The null (U+0000) terminated UChar* + * s will be written to f, excluding the NULL terminator. + * A newline will be added to f. + * @param s The UChar* to write. + * @param f The UFILE to which to write. + * @return A non-negative number if successful, EOF otherwise. + * @see u_file_write + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_fputs(const UChar *s, + UFILE *f); + +/** + * Write a UChar to a UFILE. + * @param uc The UChar to write. + * @param f The UFILE to which to write. + * @return The character written if successful, EOF otherwise. + * @stable ICU 3.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_fputc(UChar32 uc, + UFILE *f); + +/** + * Write Unicode to a UFILE. + * The ustring passed in will be converted to the UFILE's underlying + * codepage before it is written. + * @param ustring A pointer to the Unicode data to write. + * @param count The number of Unicode characters to write + * @param f The UFILE to which to write. + * @return The number of Unicode characters written. + * @see u_fputs + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_file_write(const UChar *ustring, + int32_t count, + UFILE *f); + + +/* Input functions */ +#if !UCONFIG_NO_FORMATTING + +/** + * Read formatted data from a UFILE. + * @param f The UFILE from which to read. + * @param patternSpecification A pattern specifying how u_fscanf will + * interpret the variable arguments received and parse the data. + * @return The number of items successfully converted and assigned, or EOF + * if an error occurred. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_fscanf(UFILE *f, + const char *patternSpecification, + ... ); + +/** + * Read formatted data from a UFILE. + * This is identical to u_fscanf, except that it will + * not call va_start and va_end. + * @param f The UFILE from which to read. + * @param patternSpecification A pattern specifying how u_fscanf will + * interpret the variable arguments received and parse the data. + * @param ap The argument list to use. + * @return The number of items successfully converted and assigned, or EOF + * if an error occurred. + * @see u_fscanf + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vfscanf(UFILE *f, + const char *patternSpecification, + va_list ap); + +/** + * Read formatted data from a UFILE. + * @param f The UFILE from which to read. + * @param patternSpecification A pattern specifying how u_fscanf will + * interpret the variable arguments received and parse the data. + * @return The number of items successfully converted and assigned, or EOF + * if an error occurred. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_fscanf_u(UFILE *f, + const UChar *patternSpecification, + ... ); + +/** + * Read formatted data from a UFILE. + * This is identical to u_fscanf_u, except that it will + * not call va_start and va_end. + * @param f The UFILE from which to read. + * @param patternSpecification A pattern specifying how u_fscanf will + * interpret the variable arguments received and parse the data. + * @param ap The argument list to use. + * @return The number of items successfully converted and assigned, or EOF + * if an error occurred. + * @see u_fscanf_u + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vfscanf_u(UFILE *f, + const UChar *patternSpecification, + va_list ap); +#endif + +/** + * Read one line of text into a UChar* string from a UFILE. The newline + * at the end of the line is read into the string. The string is always + * null terminated + * @param f The UFILE from which to read. + * @param n The maximum number of characters - 1 to read. + * @param s The UChar* to receive the read data. Characters will be + * stored successively in s until a newline or EOF is + * reached. A null character (U+0000) will be appended to s. + * @return A pointer to s, or NULL if no characters were available. + * @stable ICU 3.0 + */ +U_CAPI UChar* U_EXPORT2 +u_fgets(UChar *s, + int32_t n, + UFILE *f); + +/** + * Read a UChar from a UFILE. It is recommended that u_fgetcx + * used instead for proper parsing functions, but sometimes reading + * code units is needed instead of codepoints. + * + * @param f The UFILE from which to read. + * @return The UChar value read, or U+FFFF if no character was available. + * @stable ICU 3.0 + */ +U_CAPI UChar U_EXPORT2 +u_fgetc(UFILE *f); + +/** + * Read a UChar32 from a UFILE. + * + * @param f The UFILE from which to read. + * @return The UChar32 value read, or U_EOF if no character was + * available, or U+FFFFFFFF if an ill-formed character was + * encountered. + * @see u_unescape() + * @stable ICU 3.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_fgetcx(UFILE *f); + +/** + * Unget a UChar from a UFILE. + * If this function is not the first to operate on f after a call + * to u_fgetc, the results are undefined. + * If this function is passed a character that was not received from the + * previous u_fgetc or u_fgetcx call, the results are undefined. + * @param c The UChar to put back on the stream. + * @param f The UFILE to receive c. + * @return The UChar32 value put back if successful, U_EOF otherwise. + * @stable ICU 3.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_fungetc(UChar32 c, + UFILE *f); + +/** + * Read Unicode from a UFILE. + * Bytes will be converted from the UFILE's underlying codepage, with + * subsequent conversion to Unicode. The data will not be NULL terminated. + * @param chars A pointer to receive the Unicode data. + * @param count The number of Unicode characters to read. + * @param f The UFILE from which to read. + * @return The number of Unicode characters read. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_file_read(UChar *chars, + int32_t count, + UFILE *f); + +#if !UCONFIG_NO_TRANSLITERATION + +/** + * Set a transliterator on the UFILE. The transliterator will be owned by the + * UFILE. + * @param file The UFILE to set transliteration on + * @param adopt The UTransliterator to set. Can be NULL, which will + * mean that no transliteration is used. + * @param direction either U_READ, U_WRITE, or U_READWRITE - sets + * which direction the transliterator is to be applied to. If + * U_READWRITE, the "Read" transliteration will be in the inverse + * direction. + * @param status ICU error code. + * @return The previously set transliterator, owned by the + * caller. If U_READWRITE is specified, only the WRITE transliterator + * is returned. In most cases, the caller should call utrans_close() + * on the result of this function. + * @stable ICU 3.0 + */ +U_CAPI UTransliterator* U_EXPORT2 +u_fsettransliterator(UFILE *file, UFileDirection direction, + UTransliterator *adopt, UErrorCode *status); + +#endif + + +/* Output string functions */ +#if !UCONFIG_NO_FORMATTING + + +/** + * Write formatted data to a Unicode string. + * + * @param buffer The Unicode String to which to write. + * @param patternSpecification A pattern specifying how u_sprintf will + * interpret the variable arguments received and format the data. + * @return The number of Unicode code units written to buffer. This + * does not include the terminating null character. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_sprintf(UChar *buffer, + const char *patternSpecification, + ... ); + +/** + * Write formatted data to a Unicode string. When the number of code units + * required to store the data exceeds count, then count code + * units of data are stored in buffer and a negative value is + * returned. When the number of code units required to store the data equals + * count, the string is not null terminated and count is + * returned. + * + * @param buffer The Unicode String to which to write. + * @param count The number of code units to read. + * @param patternSpecification A pattern specifying how u_sprintf will + * interpret the variable arguments received and format the data. + * @return The number of Unicode characters that would have been written to + * buffer had count been sufficiently large. This does not include + * the terminating null character. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_snprintf(UChar *buffer, + int32_t count, + const char *patternSpecification, + ... ); + +/** + * Write formatted data to a Unicode string. + * This is identical to u_sprintf, except that it will + * not call va_start and va_end. + * + * @param buffer The Unicode string to which to write. + * @param patternSpecification A pattern specifying how u_sprintf will + * interpret the variable arguments received and format the data. + * @param ap The argument list to use. + * @return The number of Unicode characters written to buffer. + * @see u_sprintf + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vsprintf(UChar *buffer, + const char *patternSpecification, + va_list ap); + +/** + * Write formatted data to a Unicode string. + * This is identical to u_snprintf, except that it will + * not call va_start and va_end.

+ * When the number of code units required to store the data exceeds + * count, then count code units of data are stored in + * buffer and a negative value is returned. When the number of code + * units required to store the data equals count, the string is not + * null terminated and count is returned. + * + * @param buffer The Unicode string to which to write. + * @param count The number of code units to read. + * @param patternSpecification A pattern specifying how u_sprintf will + * interpret the variable arguments received and format the data. + * @param ap The argument list to use. + * @return The number of Unicode characters that would have been written to + * buffer had count been sufficiently large. + * @see u_sprintf + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vsnprintf(UChar *buffer, + int32_t count, + const char *patternSpecification, + va_list ap); + +/** + * Write formatted data to a Unicode string. + * + * @param buffer The Unicode string to which to write. + * @param patternSpecification A pattern specifying how u_sprintf will + * interpret the variable arguments received and format the data. + * @return The number of Unicode characters written to buffer. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_sprintf_u(UChar *buffer, + const UChar *patternSpecification, + ... ); + +/** + * Write formatted data to a Unicode string. When the number of code units + * required to store the data exceeds count, then count code + * units of data are stored in buffer and a negative value is + * returned. When the number of code units required to store the data equals + * count, the string is not null terminated and count is + * returned. + * + * @param buffer The Unicode string to which to write. + * @param count The number of code units to read. + * @param patternSpecification A pattern specifying how u_sprintf will + * interpret the variable arguments received and format the data. + * @return The number of Unicode characters that would have been written to + * buffer had count been sufficiently large. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_snprintf_u(UChar *buffer, + int32_t count, + const UChar *patternSpecification, + ... ); + +/** + * Write formatted data to a Unicode string. + * This is identical to u_sprintf_u, except that it will + * not call va_start and va_end. + * + * @param buffer The Unicode string to which to write. + * @param patternSpecification A pattern specifying how u_sprintf will + * interpret the variable arguments received and format the data. + * @param ap The argument list to use. + * @return The number of Unicode characters written to f. + * @see u_sprintf_u + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vsprintf_u(UChar *buffer, + const UChar *patternSpecification, + va_list ap); + +/** + * Write formatted data to a Unicode string. + * This is identical to u_snprintf_u, except that it will + * not call va_start and va_end. + * When the number of code units required to store the data exceeds + * count, then count code units of data are stored in + * buffer and a negative value is returned. When the number of code + * units required to store the data equals count, the string is not + * null terminated and count is returned. + * + * @param buffer The Unicode string to which to write. + * @param count The number of code units to read. + * @param patternSpecification A pattern specifying how u_sprintf will + * interpret the variable arguments received and format the data. + * @param ap The argument list to use. + * @return The number of Unicode characters that would have been written to + * f had count been sufficiently large. + * @see u_sprintf_u + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vsnprintf_u(UChar *buffer, + int32_t count, + const UChar *patternSpecification, + va_list ap); + +/* Input string functions */ + +/** + * Read formatted data from a Unicode string. + * + * @param buffer The Unicode string from which to read. + * @param patternSpecification A pattern specifying how u_sscanf will + * interpret the variable arguments received and parse the data. + * @return The number of items successfully converted and assigned, or EOF + * if an error occurred. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_sscanf(const UChar *buffer, + const char *patternSpecification, + ... ); + +/** + * Read formatted data from a Unicode string. + * This is identical to u_sscanf, except that it will + * not call va_start and va_end. + * + * @param buffer The Unicode string from which to read. + * @param patternSpecification A pattern specifying how u_sscanf will + * interpret the variable arguments received and parse the data. + * @param ap The argument list to use. + * @return The number of items successfully converted and assigned, or EOF + * if an error occurred. + * @see u_sscanf + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vsscanf(const UChar *buffer, + const char *patternSpecification, + va_list ap); + +/** + * Read formatted data from a Unicode string. + * + * @param buffer The Unicode string from which to read. + * @param patternSpecification A pattern specifying how u_sscanf will + * interpret the variable arguments received and parse the data. + * @return The number of items successfully converted and assigned, or EOF + * if an error occurred. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_sscanf_u(const UChar *buffer, + const UChar *patternSpecification, + ... ); + +/** + * Read formatted data from a Unicode string. + * This is identical to u_sscanf_u, except that it will + * not call va_start and va_end. + * + * @param buffer The Unicode string from which to read. + * @param patternSpecification A pattern specifying how u_sscanf will + * interpret the variable arguments received and parse the data. + * @param ap The argument list to use. + * @return The number of items successfully converted and assigned, or EOF + * if an error occurred. + * @see u_sscanf_u + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +u_vsscanf_u(const UChar *buffer, + const UChar *patternSpecification, + va_list ap); + + +#endif +#endif +#endif + + diff --git a/intl/icu/source/io/unicode/ustream.h b/intl/icu/source/io/unicode/ustream.h new file mode 100644 index 0000000000..927342cb03 --- /dev/null +++ b/intl/icu/source/io/unicode/ustream.h @@ -0,0 +1,69 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2014 International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* FILE NAME : ustream.h +* +* Modification History: +* +* Date Name Description +* 06/25/2001 grhoten Move iostream from unistr.h +****************************************************************************** +*/ + +#ifndef USTREAM_H +#define USTREAM_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/unistr.h" + +#if !UCONFIG_NO_CONVERSION // not available without conversion + +/** + * \file + * \brief C++ API: Unicode iostream like API + * + * At this time, this API is very limited. It contains + * operator<< and operator>> for UnicodeString manipulation with the + * C++ I/O stream API. + */ + +#if defined(__GLIBCXX__) +namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364 +#endif + +#include + +U_NAMESPACE_BEGIN + +/** + * Write the contents of a UnicodeString to a C++ ostream. This functions writes + * the characters in a UnicodeString to an ostream. The UChars in the + * UnicodeString are converted to the char based ostream with the default + * converter. + * @stable 3.0 + */ +U_IO_API std::ostream & U_EXPORT2 operator<<(std::ostream& stream, const UnicodeString& s); + +/** + * Write the contents from a C++ istream to a UnicodeString. The UChars in the + * UnicodeString are converted from the char based istream with the default + * converter. + * @stable 3.0 + */ +U_IO_API std::istream & U_EXPORT2 operator>>(std::istream& stream, UnicodeString& s); +U_NAMESPACE_END + +#endif + +/* No operator for UChar because it can conflict with wchar_t */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/intl/icu/source/io/uprintf.cpp b/intl/icu/source/io/uprintf.cpp new file mode 100644 index 0000000000..7effa61993 --- /dev/null +++ b/intl/icu/source/io/uprintf.cpp @@ -0,0 +1,219 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File uprintf.cpp +* +* Modification History: +* +* Date Name Description +* 11/19/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +* Added conversion from default codepage. +* 08/07/2003 george Reunify printf implementations +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/ustdio.h" +#include "unicode/ustring.h" +#include "unicode/unum.h" +#include "unicode/udat.h" +#include "unicode/putil.h" + +#include "cmemory.h" +#include "locbund.h" +#include "mutex.h" +#include "uassert.h" +#include "uprintf.h" +#include "ufile.h" +#include "ucln_io.h" + +U_NAMESPACE_USE + +static UFILE *gStdOut = nullptr; +static UInitOnce gStdOutInitOnce {}; + +static UBool U_CALLCONV uprintf_cleanup() +{ + if (gStdOut != nullptr) { + u_fclose(gStdOut); + gStdOut = nullptr; + } + gStdOutInitOnce.reset(); + return true; +} + +static void U_CALLCONV u_stdout_init() { + U_ASSERT(gStdOut == nullptr); + gStdOut = u_finit(stdout, nullptr, nullptr); + ucln_io_registerCleanup(UCLN_IO_PRINTF, &uprintf_cleanup); +} + +U_CAPI UFILE * U_EXPORT2 +u_get_stdout() +{ + umtx_initOnce(gStdOutInitOnce, &u_stdout_init); + return gStdOut; +} + +static int32_t U_EXPORT2 +u_printf_write(void *context, + const char16_t *str, + int32_t count) +{ + return u_file_write(str, count, (UFILE *)context); +} + +static int32_t +u_printf_pad_and_justify(void *context, + const u_printf_spec_info *info, + const char16_t *result, + int32_t resultLen) +{ + UFILE *output = (UFILE *)context; + int32_t written, i; + + /* pad and justify, if needed */ + if(info->fWidth != -1 && resultLen < info->fWidth) { + /* left justify */ + if(info->fLeft) { + written = u_file_write(result, resultLen, output); + for(i = 0; i < info->fWidth - resultLen; ++i) { + written += u_file_write(&info->fPadChar, 1, output); + } + } + /* right justify */ + else { + written = 0; + for(i = 0; i < info->fWidth - resultLen; ++i) { + written += u_file_write(&info->fPadChar, 1, output); + } + written += u_file_write(result, resultLen, output); + } + } + /* just write the formatted output */ + else { + written = u_file_write(result, resultLen, output); + } + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_fprintf( UFILE *f, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t count; + + va_start(ap, patternSpecification); + count = u_vfprintf(f, patternSpecification, ap); + va_end(ap); + + return count; +} + +U_CAPI int32_t U_EXPORT2 +u_printf(const char *patternSpecification, + ...) +{ + va_list ap; + int32_t count; + va_start(ap, patternSpecification); + count = u_vfprintf(u_get_stdout(), patternSpecification, ap); + va_end(ap); + return count; +} + +U_CAPI int32_t U_EXPORT2 +u_fprintf_u( UFILE *f, + const char16_t *patternSpecification, + ... ) +{ + va_list ap; + int32_t count; + + va_start(ap, patternSpecification); + count = u_vfprintf_u(f, patternSpecification, ap); + va_end(ap); + + return count; +} + +U_CAPI int32_t U_EXPORT2 +u_printf_u(const char16_t *patternSpecification, + ...) +{ + va_list ap; + int32_t count; + va_start(ap, patternSpecification); + count = u_vfprintf_u(u_get_stdout(), patternSpecification, ap); + va_end(ap); + return count; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vfprintf( UFILE *f, + const char *patternSpecification, + va_list ap) +{ + int32_t count; + char16_t *pattern; + char16_t buffer[UFMT_DEFAULT_BUFFER_SIZE]; + size_t size = strlen(patternSpecification) + 1; + + /* convert from the default codepage to Unicode */ + if (size >= MAX_UCHAR_BUFFER_SIZE(buffer)) { + pattern = (char16_t *)uprv_malloc(size * sizeof(char16_t)); + if(pattern == 0) { + return 0; + } + } + else { + pattern = buffer; + } + u_charsToUChars(patternSpecification, pattern, static_cast(size)); + + /* do the work */ + count = u_vfprintf_u(f, pattern, ap); + + /* clean up */ + if (pattern != buffer) { + uprv_free(pattern); + } + + return count; +} + +static const u_printf_stream_handler g_stream_handler = { + u_printf_write, + u_printf_pad_and_justify +}; + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vfprintf_u( UFILE *f, + const char16_t *patternSpecification, + va_list ap) +{ + int32_t written = 0; /* haven't written anything yet */ + + /* parse and print the whole format string */ + u_printf_parse(&g_stream_handler, patternSpecification, f, nullptr, &f->str.fBundle, &written, ap); + + /* return # of UChars written */ + return written; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/intl/icu/source/io/uprintf.h b/intl/icu/source/io/uprintf.h new file mode 100644 index 0000000000..0fd6066e56 --- /dev/null +++ b/intl/icu/source/io/uprintf.h @@ -0,0 +1,103 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2006, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File uprintf.h +* +* Modification History: +* +* Date Name Description +* 11/19/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +****************************************************************************** +*/ + +#ifndef UPRINTF_H +#define UPRINTF_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/ustdio.h" +#include "ufmt_cmn.h" +#include "locbund.h" + +/** + * Struct encapsulating a single uprintf format specification. + */ +typedef struct u_printf_spec_info { + int32_t fPrecision; /* Precision */ + int32_t fWidth; /* Width */ + + UChar fOrigSpec; /* Conversion specification */ + UChar fSpec; /* Conversion specification */ + UChar fPadChar; /* Padding character */ + + UBool fAlt; /* # flag */ + UBool fSpace; /* Space flag */ + UBool fLeft; /* - flag */ + UBool fShowSign; /* + flag */ + UBool fZero; /* 0 flag */ + + UBool fIsLongDouble; /* L flag */ + UBool fIsShort; /* h flag */ + UBool fIsLong; /* l flag */ + UBool fIsLongLong; /* ll flag */ +} u_printf_spec_info; + +typedef int32_t U_EXPORT2 +u_printf_write_stream(void *context, + const UChar *str, + int32_t count); + +typedef int32_t U_EXPORT2 +u_printf_pad_and_justify_stream(void *context, + const u_printf_spec_info *info, + const UChar *result, + int32_t resultLen); + +typedef struct u_printf_stream_handler { + u_printf_write_stream *write; + u_printf_pad_and_justify_stream *pad_and_justify; +} u_printf_stream_handler; + +/* Used by sprintf */ +typedef struct u_localized_print_string { + UChar *str; /* Place to write the string */ + int32_t available;/* Number of codeunits available to write to */ + int32_t len; /* Maximum number of code units that can be written to output */ + + ULocaleBundle fBundle; /* formatters */ +} u_localized_print_string; + +#define UP_PERCENT 0x0025 + +/** + * Parse a single u_printf format string. + * @param fmt A pointer to a '%' character in a u_printf format specification. + * @param spec A pointer to a u_printf_spec to receive the parsed + * format specifier. + * @param locStringContext If present, will make sure that it will only write + * to the buffer when space is available. It's done this way because + * va_list sometimes can't be passed by pointer. + * @return The number of characters contained in this specifier. + */ +U_CFUNC int32_t +u_printf_parse(const u_printf_stream_handler *streamHandler, + const UChar *fmt, + void *context, + u_localized_print_string *locStringContext, + ULocaleBundle *formatBundle, + int32_t *written, + va_list ap); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/io/uprntf_p.cpp b/intl/icu/source/io/uprntf_p.cpp new file mode 100644 index 0000000000..399d2dc751 --- /dev/null +++ b/intl/icu/source/io/uprntf_p.cpp @@ -0,0 +1,1606 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File uprntf_p.c +* +* Modification History: +* +* Date Name Description +* 11/23/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +* 08/07/2003 george Reunify printf implementations +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "uprintf.h" +#include "ufmt_cmn.h" +#include "cmemory.h" +#include "putilimp.h" + +/* ANSI style formatting */ +/* Use US-ASCII characters only for formatting */ + +/* % */ +#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_printf_simple_percent_handler} +/* s */ +#define UFMT_STRING {ufmt_string, u_printf_string_handler} +/* c */ +#define UFMT_CHAR {ufmt_char, u_printf_char_handler} +/* d, i */ +#define UFMT_INT {ufmt_int, u_printf_integer_handler} +/* u */ +#define UFMT_UINT {ufmt_int, u_printf_uinteger_handler} +/* o */ +#define UFMT_OCTAL {ufmt_int, u_printf_octal_handler} +/* x, X */ +#define UFMT_HEX {ufmt_int, u_printf_hex_handler} +/* f */ +#define UFMT_DOUBLE {ufmt_double, u_printf_double_handler} +/* e, E */ +#define UFMT_SCIENTIFIC {ufmt_double, u_printf_scientific_handler} +/* g, G */ +#define UFMT_SCIDBL {ufmt_double, u_printf_scidbl_handler} +/* n */ +#define UFMT_COUNT {ufmt_count, u_printf_count_handler} + +/* non-ANSI extensions */ +/* Use US-ASCII characters only for formatting */ + +/* p */ +#define UFMT_POINTER {ufmt_pointer, u_printf_pointer_handler} +/* V */ +#define UFMT_SPELLOUT {ufmt_double, u_printf_spellout_handler} +/* P */ +#define UFMT_PERCENT {ufmt_double, u_printf_percent_handler} +/* C K is old format */ +#define UFMT_UCHAR {ufmt_uchar, u_printf_uchar_handler} +/* S U is old format */ +#define UFMT_USTRING {ufmt_ustring, u_printf_ustring_handler} + + +#define UFMT_EMPTY {ufmt_empty, nullptr} + +/** + * A u_printf handler function. + * A u_printf handler is responsible for handling a single u_printf + * format specification, for example 'd' or 's'. + * @param stream The UFILE to which to write output. + * @param info A pointer to a u_printf_spec_info struct containing + * information on the format specification. + * @param args A pointer to the argument data + * @return The number of Unicode characters written to stream. + */ +typedef int32_t U_EXPORT2 +u_printf_handler(const u_printf_stream_handler *handler, + + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args); + +typedef struct u_printf_info { + ufmt_type_info info; + u_printf_handler *handler; +} u_printf_info; + +/** + * Struct encapsulating a single uprintf format specification. + */ +typedef struct u_printf_spec { + u_printf_spec_info fInfo; /* Information on this spec */ + int32_t fWidthPos; /* Position of width in arg list */ + int32_t fPrecisionPos; /* Position of precision in arg list */ + int32_t fArgPos; /* Position of data in arg list */ +} u_printf_spec; + +#define UPRINTF_NUM_FMT_HANDLERS 108 + +/* We do not use handlers for 0-0x1f */ +#define UPRINTF_BASE_FMT_HANDLERS 0x20 + +/* buffer size for formatting */ +#define UPRINTF_BUFFER_SIZE 1024 +#define UPRINTF_SYMBOL_BUFFER_SIZE 8 + +static const char16_t gNullStr[] = {0x28, 0x6E, 0x75, 0x6C, 0x6C, 0x29, 0}; /* "(null)" */ +static const char16_t gSpaceStr[] = {0x20, 0}; /* " " */ + +/* Sets the sign of a format based on u_printf_spec_info */ +/* TODO: Is setting the prefix symbol to a positive sign a good idea in all locales? */ +static void +u_printf_set_sign(UNumberFormat *format, + const u_printf_spec_info *info, + char16_t *prefixBuffer, + int32_t *prefixBufLen, + UErrorCode *status) +{ + if(info->fShowSign) { + *prefixBufLen = unum_getTextAttribute(format, + UNUM_POSITIVE_PREFIX, + prefixBuffer, + *prefixBufLen, + status); + if (info->fSpace) { + /* Setting UNUM_PLUS_SIGN_SYMBOL affects the exponent too. */ + /* unum_setSymbol(format, UNUM_PLUS_SIGN_SYMBOL, gSpaceStr, 1, &status); */ + unum_setTextAttribute(format, UNUM_POSITIVE_PREFIX, gSpaceStr, 1, status); + } + else { + char16_t plusSymbol[UPRINTF_SYMBOL_BUFFER_SIZE]; + int32_t symbolLen; + + symbolLen = unum_getSymbol(format, + UNUM_PLUS_SIGN_SYMBOL, + plusSymbol, + UPRV_LENGTHOF(plusSymbol), + status); + unum_setTextAttribute(format, + UNUM_POSITIVE_PREFIX, + plusSymbol, + symbolLen, + status); + } + } + else { + *prefixBufLen = 0; + } +} + +static void +u_printf_reset_sign(UNumberFormat *format, + const u_printf_spec_info *info, + char16_t *prefixBuffer, + int32_t *prefixBufLen, + UErrorCode *status) +{ + if(info->fShowSign) { + unum_setTextAttribute(format, + UNUM_POSITIVE_PREFIX, + prefixBuffer, + *prefixBufLen, + status); + } +} + + +/* handle a '%' */ +static int32_t +u_printf_simple_percent_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + (void)info; + (void)args; + static const char16_t PERCENT[] = { UP_PERCENT }; + + /* put a single '%' onto the output */ + return handler->write(context, PERCENT, 1); +} + +/* handle 's' */ +static int32_t +u_printf_string_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + char16_t *s; + char16_t buffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t len, written; + int32_t argSize; + const char *arg = (const char*)(args[0].ptrValue); + + /* convert from the default codepage to Unicode */ + if (arg) { + argSize = (int32_t)strlen(arg) + 1; + if (argSize >= MAX_UCHAR_BUFFER_SIZE(buffer)) { + s = ufmt_defaultCPToUnicode(arg, argSize, + (char16_t *)uprv_malloc(MAX_UCHAR_BUFFER_NEEDED(argSize)), + MAX_UCHAR_BUFFER_NEEDED(argSize)); + if(s == nullptr) { + return 0; + } + } + else { + s = ufmt_defaultCPToUnicode(arg, argSize, buffer, + UPRV_LENGTHOF(buffer)); + } + } + else { + s = (char16_t *)gNullStr; + } + len = u_strlen(s); + + /* width = minimum # of characters to write */ + /* precision = maximum # of characters to write */ + if (info->fPrecision != -1 && info->fPrecision < len) { + len = info->fPrecision; + } + + written = handler->pad_and_justify(context, info, s, len); + + /* clean up */ + if (gNullStr != s && buffer != s) { + uprv_free(s); + } + + return written; +} + +static int32_t +u_printf_char_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + char16_t s[U16_MAX_LENGTH+1]; + int32_t len = 1, written; + unsigned char arg = (unsigned char)(args[0].int64Value); + + /* convert from default codepage to Unicode */ + ufmt_defaultCPToUnicode((const char *)&arg, 2, s, UPRV_LENGTHOF(s)); + + /* Remember that this may be an MBCS character */ + if (arg != 0) { + len = u_strlen(s); + } + + /* width = minimum # of characters to write */ + /* precision = maximum # of characters to write */ + /* precision is ignored when handling a char */ + + written = handler->pad_and_justify(context, info, s, len); + + return written; +} + +static int32_t +u_printf_double_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + double num = (double) (args[0].doubleValue); + UNumberFormat *format; + char16_t result[UPRINTF_BUFFER_SIZE]; + char16_t prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDecimalDigits; + int32_t maxDecimalDigits; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + /* if(! info->fIsLongDouble) + num &= DBL_MAX;*/ + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* save the formatter's state */ + minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); + maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); + + /* set the appropriate flags and number of decimal digits on the formatter */ + if(info->fPrecision != -1) { + /* set the # of decimal digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); + } + else if(info->fAlt) { + /* '#' means always show decimal point */ + /* copy of printf behavior on Solaris - '#' shows 6 digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + else { + /* # of decimal digits is 6 if precision not specified regardless of locale */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + + /* set whether to show the sign */ + if (info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + /* TODO: Is this needed? */ + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +/* HSYS */ +static int32_t +u_printf_integer_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + int64_t num = args[0].int64Value; + UNumberFormat *format; + char16_t result[UPRINTF_BUFFER_SIZE]; + char16_t prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDigits = -1; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + if (info->fIsShort) + num = (int16_t)num; + else if (!info->fIsLongLong) + num = (int32_t)num; + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + /* set the minimum integer digits */ + if(info->fPrecision != -1) { + /* set the minimum # of digits */ + minDigits = unum_getAttribute(format, UNUM_MIN_INTEGER_DIGITS); + unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, info->fPrecision); + } + + /* set whether to show the sign */ + if(info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatInt64(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + if (minDigits != -1) { + unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, minDigits); + } + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +static int32_t +u_printf_hex_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + int64_t num = args[0].int64Value; + char16_t result[UPRINTF_BUFFER_SIZE]; + int32_t len = UPRINTF_BUFFER_SIZE; + + + /* mask off any necessary bits */ + if (info->fIsShort) + num &= UINT16_MAX; + else if (!info->fIsLongLong) + num &= UINT32_MAX; + + /* format the number, preserving the minimum # of digits */ + ufmt_64tou(result, &len, num, 16, + (UBool)(info->fSpec == 0x0078), + (info->fPrecision == -1 && info->fZero) ? info->fWidth : info->fPrecision); + + /* convert to alt form, if desired */ + if(num != 0 && info->fAlt && len < UPRINTF_BUFFER_SIZE - 2) { + /* shift the formatted string right by 2 chars */ + memmove(result + 2, result, len * sizeof(char16_t)); + result[0] = 0x0030; + result[1] = info->fSpec; + len += 2; + } + + return handler->pad_and_justify(context, info, result, len); +} + +static int32_t +u_printf_octal_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + int64_t num = args[0].int64Value; + char16_t result[UPRINTF_BUFFER_SIZE]; + int32_t len = UPRINTF_BUFFER_SIZE; + + + /* mask off any necessary bits */ + if (info->fIsShort) + num &= UINT16_MAX; + else if (!info->fIsLongLong) + num &= UINT32_MAX; + + /* format the number, preserving the minimum # of digits */ + ufmt_64tou(result, &len, num, 8, + false, /* doesn't matter for octal */ + info->fPrecision == -1 && info->fZero ? info->fWidth : info->fPrecision); + + /* convert to alt form, if desired */ + if(info->fAlt && result[0] != 0x0030 && len < UPRINTF_BUFFER_SIZE - 1) { + /* shift the formatted string right by 1 char */ + memmove(result + 1, result, len * sizeof(char16_t)); + result[0] = 0x0030; + len += 1; + } + + return handler->pad_and_justify(context, info, result, len); +} + +static int32_t +u_printf_uinteger_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + int64_t num = args[0].int64Value; + UNumberFormat *format; + char16_t result[UPRINTF_BUFFER_SIZE]; + int32_t minDigits = -1; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + /* TODO: Fix this once uint64_t can be formatted. */ + if (info->fIsShort) + num &= UINT16_MAX; + else if (!info->fIsLongLong) + num &= UINT32_MAX; + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + /* set the minimum integer digits */ + if(info->fPrecision != -1) { + /* set the minimum # of digits */ + minDigits = unum_getAttribute(format, UNUM_MIN_INTEGER_DIGITS); + unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, info->fPrecision); + } + + /* To mirror other stdio implementations, we ignore the sign argument */ + + /* format the number */ + resultLen = unum_formatInt64(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + if (minDigits != -1) { + unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, minDigits); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +static int32_t +u_printf_pointer_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + char16_t result[UPRINTF_BUFFER_SIZE]; + int32_t len = UPRINTF_BUFFER_SIZE; + + /* format the pointer in hex */ + ufmt_ptou(result, &len, args[0].ptrValue, true/*, info->fPrecision*/); + + return handler->pad_and_justify(context, info, result, len); +} + +static int32_t +u_printf_scientific_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + double num = (double) (args[0].doubleValue); + UNumberFormat *format; + char16_t result[UPRINTF_BUFFER_SIZE]; + char16_t prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDecimalDigits; + int32_t maxDecimalDigits; + UErrorCode status = U_ZERO_ERROR; + char16_t srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + int32_t srcLen, expLen; + int32_t resultLen; + char16_t expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + /* if(! info->fIsLongDouble) + num &= DBL_MAX;*/ + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_SCIENTIFIC); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + srcLen = unum_getSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + srcExpBuf, + sizeof(srcExpBuf), + &status); + + /* Upper/lower case the e */ + if (info->fSpec == (char16_t)0x65 /* e */) { + expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + formatBundle->fLocale, + &status); + } + else { + expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + formatBundle->fLocale, + &status); + } + + unum_setSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + expBuf, + expLen, + &status); + + /* save the formatter's state */ + minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); + maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); + + /* set the appropriate flags and number of decimal digits on the formatter */ + if(info->fPrecision != -1) { + /* set the # of decimal digits */ + if (info->fOrigSpec == (char16_t)0x65 /* e */ || info->fOrigSpec == (char16_t)0x45 /* E */) { + unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); + } + else { + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, 1); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, info->fPrecision); + } + } + else if(info->fAlt) { + /* '#' means always show decimal point */ + /* copy of printf behavior on Solaris - '#' shows 6 digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + else { + /* # of decimal digits is 6 if precision not specified */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + + /* set whether to show the sign */ + if (info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + /* TODO: Is this needed? */ + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); + + /* Since we're the only one using the scientific + format, we don't need to save the old exponent value. */ + /*unum_setSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + srcExpBuf, + srcLen, + &status);*/ + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +static int32_t +u_printf_percent_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + double num = (double) (args[0].doubleValue); + UNumberFormat *format; + char16_t result[UPRINTF_BUFFER_SIZE]; + char16_t prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDecimalDigits; + int32_t maxDecimalDigits; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + /* if(! info->fIsLongDouble) + num &= DBL_MAX;*/ + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_PERCENT); + + /* handle error */ + if(format == 0) + return 0; + + /* save the formatter's state */ + minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); + maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); + + /* set the appropriate flags and number of decimal digits on the formatter */ + if(info->fPrecision != -1) { + /* set the # of decimal digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); + } + else if(info->fAlt) { + /* '#' means always show decimal point */ + /* copy of printf behavior on Solaris - '#' shows 6 digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + else { + /* # of decimal digits is 6 if precision not specified */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + + /* set whether to show the sign */ + if (info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + /* TODO: Is this needed? */ + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +static int32_t +u_printf_ustring_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + int32_t len, written; + const char16_t *arg = (const char16_t*)(args[0].ptrValue); + + /* allocate enough space for the buffer */ + if (arg == nullptr) { + arg = gNullStr; + } + len = u_strlen(arg); + + /* width = minimum # of characters to write */ + /* precision = maximum # of characters to write */ + if (info->fPrecision != -1 && info->fPrecision < len) { + len = info->fPrecision; + } + + /* determine if the string should be padded */ + written = handler->pad_and_justify(context, info, arg, len); + + return written; +} + +static int32_t +u_printf_uchar_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + int32_t written = 0; + char16_t arg = (char16_t)(args[0].int64Value); + + /* width = minimum # of characters to write */ + /* precision = maximum # of characters to write */ + /* precision is ignored when handling a uchar */ + + /* determine if the string should be padded */ + written = handler->pad_and_justify(context, info, &arg, 1); + + return written; +} + +static int32_t +u_printf_scidbl_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + u_printf_spec_info scidbl_info; + double num = args[0].doubleValue; + int32_t retVal; + UNumberFormat *format; + int32_t maxSigDecimalDigits, significantDigits; + + memcpy(&scidbl_info, info, sizeof(u_printf_spec_info)); + + /* determine whether to use 'd', 'e' or 'f' notation */ + if (scidbl_info.fPrecision == -1 && num == uprv_trunc(num)) + { + /* use 'f' notation */ + scidbl_info.fSpec = 0x0066; + scidbl_info.fPrecision = 0; + /* call the double handler */ + retVal = u_printf_double_handler(handler, context, formatBundle, &scidbl_info, args); + } + else if(num < 0.0001 || (scidbl_info.fPrecision < 1 && 1000000.0 <= num) + || (scidbl_info.fPrecision != -1 && num > uprv_pow10(scidbl_info.fPrecision))) + { + /* use 'e' or 'E' notation */ + scidbl_info.fSpec = scidbl_info.fSpec - 2; + if (scidbl_info.fPrecision == -1) { + scidbl_info.fPrecision = 5; + } + /* call the scientific handler */ + retVal = u_printf_scientific_handler(handler, context, formatBundle, &scidbl_info, args); + } + else { + format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); + /* Check for null pointer */ + if (format == nullptr) { + return 0; + } + maxSigDecimalDigits = unum_getAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS); + significantDigits = scidbl_info.fPrecision; + + /* use 'f' notation */ + scidbl_info.fSpec = 0x0066; + if (significantDigits == -1) { + significantDigits = 6; + } + unum_setAttribute(format, UNUM_SIGNIFICANT_DIGITS_USED, true); + unum_setAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS, significantDigits); + /* call the double handler */ + retVal = u_printf_double_handler(handler, context, formatBundle, &scidbl_info, args); + unum_setAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS, maxSigDecimalDigits); + unum_setAttribute(format, UNUM_SIGNIFICANT_DIGITS_USED, false); + } + return retVal; +} + +static int32_t +u_printf_count_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)handler; + (void)context; + (void)formatBundle; + int32_t *count = (int32_t*)(args[0].ptrValue); + + /* in the special case of count, the u_printf_spec_info's width */ + /* will contain the # of chars written thus far */ + *count = info->fWidth; + + return 0; +} + +static int32_t +u_printf_spellout_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + double num = (double) (args[0].doubleValue); + UNumberFormat *format; + char16_t result[UPRINTF_BUFFER_SIZE]; + char16_t prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDecimalDigits; + int32_t maxDecimalDigits; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + /* if(! info->fIsLongDouble) + num &= DBL_MAX;*/ + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_SPELLOUT); + + /* handle error */ + if(format == 0) + return 0; + + /* save the formatter's state */ + minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); + maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); + + /* set the appropriate flags and number of decimal digits on the formatter */ + if(info->fPrecision != -1) { + /* set the # of decimal digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); + } + else if(info->fAlt) { + /* '#' means always show decimal point */ + /* copy of printf behavior on Solaris - '#' shows 6 digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + else { + /* # of decimal digits is 6 if precision not specified */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + + /* set whether to show the sign */ + if (info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + /* TODO: Is this needed? */ + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +/* Use US-ASCII characters only for formatting. Most codepages have + characters 20-7F from Unicode. Using any other codepage specific + characters will make it very difficult to format the string on + non-Unicode machines */ +static const u_printf_info g_u_printf_infos[UPRINTF_NUM_FMT_HANDLERS] = { +/* 0x20 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x30 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x40 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, + UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +#endif + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x50 */ + UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, +#endif + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x60 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, + UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, + UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, + +/* 0x70 */ + UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, + UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +}; + +/* flag characters for uprintf */ +#define FLAG_MINUS 0x002D +#define FLAG_PLUS 0x002B +#define FLAG_SPACE 0x0020 +#define FLAG_POUND 0x0023 +#define FLAG_ZERO 0x0030 +#define FLAG_PAREN 0x0028 + +#define ISFLAG(s) (s) == FLAG_MINUS || \ + (s) == FLAG_PLUS || \ + (s) == FLAG_SPACE || \ + (s) == FLAG_POUND || \ + (s) == FLAG_ZERO || \ + (s) == FLAG_PAREN + +/* special characters for uprintf */ +#define SPEC_ASTERISK 0x002A +#define SPEC_DOLLARSIGN 0x0024 +#define SPEC_PERIOD 0x002E +#define SPEC_PERCENT 0x0025 + +/* unicode digits */ +#define DIGIT_ZERO 0x0030 +#define DIGIT_ONE 0x0031 +#define DIGIT_TWO 0x0032 +#define DIGIT_THREE 0x0033 +#define DIGIT_FOUR 0x0034 +#define DIGIT_FIVE 0x0035 +#define DIGIT_SIX 0x0036 +#define DIGIT_SEVEN 0x0037 +#define DIGIT_EIGHT 0x0038 +#define DIGIT_NINE 0x0039 + +#define ISDIGIT(s) (s) == DIGIT_ZERO || \ + (s) == DIGIT_ONE || \ + (s) == DIGIT_TWO || \ + (s) == DIGIT_THREE || \ + (s) == DIGIT_FOUR || \ + (s) == DIGIT_FIVE || \ + (s) == DIGIT_SIX || \ + (s) == DIGIT_SEVEN || \ + (s) == DIGIT_EIGHT || \ + (s) == DIGIT_NINE + +/* u_printf modifiers */ +#define MOD_H 0x0068 +#define MOD_LOWERL 0x006C +#define MOD_L 0x004C + +#define ISMOD(s) (s) == MOD_H || \ + (s) == MOD_LOWERL || \ + (s) == MOD_L +/* Returns an array of the parsed argument type given in the format string. */ +static ufmt_args* parseArguments(const char16_t *alias, va_list ap, UErrorCode *status) { + ufmt_args *arglist = nullptr; + ufmt_type_info *typelist = nullptr; + UBool *islonglong = nullptr; + int32_t size = 0; + int32_t pos = 0; + char16_t type; + uint16_t handlerNum; + const char16_t *aliasStart = alias; + + /* get maximum number of arguments */ + for(;;) { + /* find % */ + while(*alias != UP_PERCENT && *alias != 0x0000) { + alias++; + } + + if(*alias == 0x0000) { + break; + } + + alias++; + + /* handle the pos number */ + if(ISDIGIT(*alias)) { + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + pos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + pos *= 10; + pos += (int) (*alias++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*alias != SPEC_DOLLARSIGN) { + return nullptr; + } + } else { + return nullptr; + } + + if (pos > size) { + size = pos; + } + } + + /* create the parsed argument list */ + typelist = (ufmt_type_info*)uprv_malloc(sizeof(ufmt_type_info) * size); + islonglong = (UBool*)uprv_malloc(sizeof(UBool) * size); + arglist = (ufmt_args*)uprv_malloc(sizeof(ufmt_args) * size); + + /* If malloc failed, return nullptr */ + if (!typelist || !islonglong || !arglist) { + if (typelist) { + uprv_free(typelist); + } + + if (islonglong) { + uprv_free(islonglong); + } + + if (arglist) { + uprv_free(arglist); + } + + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + + /* reset alias back to the beginning */ + alias = aliasStart; + + for(;;) { + /* find % */ + while(*alias != UP_PERCENT && *alias != 0x0000) { + alias++; + } + + if(*alias == 0x0000) { + break; + } + + alias++; + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + pos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + pos *= 10; + pos += (int) (*alias++ - DIGIT_ZERO); + } + } + /* offset position by 1 */ + pos--; + + /* skip over everything except for the type */ + while (ISMOD(*alias) || ISFLAG(*alias) || ISDIGIT(*alias) || + *alias == SPEC_ASTERISK || *alias == SPEC_PERIOD || *alias == SPEC_DOLLARSIGN) { + islonglong[pos] = false; + if (ISMOD(*alias)) { + alias++; + if (*alias == MOD_LOWERL) { + islonglong[pos] = true; + } + } + alias++; + } + type = *alias; + + /* store the argument type in the correct position of the parsed argument list */ + handlerNum = (uint16_t)(type - UPRINTF_BASE_FMT_HANDLERS); + if (handlerNum < UPRINTF_NUM_FMT_HANDLERS) { + typelist[pos] = g_u_printf_infos[ handlerNum ].info; + } else { + typelist[pos] = ufmt_empty; + } + } + + /* store argument in arglist */ + for (pos = 0; pos < size; pos++) { + switch (typelist[pos]) { + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + arglist[pos].ptrValue = va_arg(ap, void*); + break; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + if (islonglong[pos]) { + arglist[pos].int64Value = va_arg(ap, int64_t); + } + else { + arglist[pos].int64Value = va_arg(ap, int32_t); + } + break; + case ufmt_float: + arglist[pos].floatValue = (float) va_arg(ap, double); + break; + case ufmt_double: + arglist[pos].doubleValue = va_arg(ap, double); + break; + default: + /* else args is ignored */ + arglist[pos].ptrValue = nullptr; + break; + } + } + + uprv_free(typelist); + uprv_free(islonglong); + + return arglist; +} + +/* We parse the argument list in Unicode */ +U_CFUNC int32_t +u_printf_parse(const u_printf_stream_handler *streamHandler, + const char16_t *fmt, + void *context, + u_localized_print_string *locStringContext, + ULocaleBundle *formatBundle, + int32_t *written, + va_list ap) +{ + uint16_t handlerNum; + ufmt_args args; + ufmt_type_info argType; + u_printf_handler *handler; + u_printf_spec spec; + u_printf_spec_info *info = &(spec.fInfo); + + const char16_t *alias = fmt; + const char16_t *backup; + const char16_t *lastAlias; + const char16_t *orgAlias = fmt; + /* parsed argument list */ + ufmt_args *arglist = nullptr; /* initialized it to avoid compiler warnings */ + UErrorCode status = U_ZERO_ERROR; + if (!locStringContext || locStringContext->available >= 0) { + /* get the parsed list of argument types */ + arglist = parseArguments(orgAlias, ap, &status); + + /* Return error if parsing failed. */ + if (U_FAILURE(status)) { + return -1; + } + } + + /* iterate through the pattern */ + while(!locStringContext || locStringContext->available >= 0) { + + /* find the next '%' */ + lastAlias = alias; + while(*alias != UP_PERCENT && *alias != 0x0000) { + alias++; + } + + /* write any characters before the '%' */ + if(alias > lastAlias) { + *written += (streamHandler->write)(context, lastAlias, (int32_t)(alias - lastAlias)); + } + + /* break if at end of string */ + if(*alias == 0x0000) { + break; + } + + /* initialize spec to default values */ + spec.fWidthPos = -1; + spec.fPrecisionPos = -1; + spec.fArgPos = -1; + + uprv_memset(info, 0, sizeof(*info)); + info->fPrecision = -1; + info->fWidth = -1; + info->fPadChar = 0x0020; + + /* skip over the initial '%' */ + alias++; + + /* Check for positional argument */ + if(ISDIGIT(*alias)) { + + /* Save the current position */ + backup = alias; + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + spec.fArgPos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + spec.fArgPos *= 10; + spec.fArgPos += (int) (*alias++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*alias != SPEC_DOLLARSIGN) { + spec.fArgPos = -1; + alias = backup; + } + /* munge the '$' */ + else + alias++; + } + + /* Get any format flags */ + while(ISFLAG(*alias)) { + switch(*alias++) { + + /* left justify */ + case FLAG_MINUS: + info->fLeft = true; + break; + + /* always show sign */ + case FLAG_PLUS: + info->fShowSign = true; + break; + + /* use space if no sign present */ + case FLAG_SPACE: + info->fShowSign = true; + info->fSpace = true; + break; + + /* use alternate form */ + case FLAG_POUND: + info->fAlt = true; + break; + + /* pad with leading zeroes */ + case FLAG_ZERO: + info->fZero = true; + info->fPadChar = 0x0030; + break; + + /* pad character specified */ + case FLAG_PAREN: + + /* TODO test that all four are numbers */ + /* first four characters are hex values for pad char */ + info->fPadChar = (char16_t)ufmt_digitvalue(*alias++); + info->fPadChar = (char16_t)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); + info->fPadChar = (char16_t)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); + info->fPadChar = (char16_t)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); + + /* final character is ignored */ + alias++; + + break; + } + } + + /* Get the width */ + + /* width is specified out of line */ + if(*alias == SPEC_ASTERISK) { + + info->fWidth = -2; + + /* Skip the '*' */ + alias++; + + /* Save the current position */ + backup = alias; + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + spec.fWidthPos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + spec.fWidthPos *= 10; + spec.fWidthPos += (int) (*alias++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*alias != SPEC_DOLLARSIGN) { + spec.fWidthPos = -1; + alias = backup; + } + /* munge the '$' */ + else + alias++; + } + /* read the width, if present */ + else if(ISDIGIT(*alias)){ + info->fWidth = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + info->fWidth *= 10; + info->fWidth += (int) (*alias++ - DIGIT_ZERO); + } + } + + /* Get the precision */ + + if(*alias == SPEC_PERIOD) { + + /* eat up the '.' */ + alias++; + + /* precision is specified out of line */ + if(*alias == SPEC_ASTERISK) { + + info->fPrecision = -2; + + /* Skip the '*' */ + alias++; + + /* save the current position */ + backup = alias; + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + spec.fPrecisionPos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + spec.fPrecisionPos *= 10; + spec.fPrecisionPos += (int) (*alias++ - DIGIT_ZERO); + } + + /* if there is no '$', don't read anything */ + if(*alias != SPEC_DOLLARSIGN) { + spec.fPrecisionPos = -1; + alias = backup; + } + else { + /* munge the '$' */ + alias++; + } + } + } + /* read the precision */ + else if(ISDIGIT(*alias)){ + info->fPrecision = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + info->fPrecision *= 10; + info->fPrecision += (int) (*alias++ - DIGIT_ZERO); + } + } + } + + /* Get any modifiers */ + if(ISMOD(*alias)) { + switch(*alias++) { + + /* short */ + case MOD_H: + info->fIsShort = true; + break; + + /* long or long long */ + case MOD_LOWERL: + if(*alias == MOD_LOWERL) { + info->fIsLongLong = true; + /* skip over the next 'l' */ + alias++; + } + else + info->fIsLong = true; + break; + + /* long double */ + case MOD_L: + info->fIsLongDouble = true; + break; + } + } + + /* finally, get the specifier letter */ + info->fSpec = *alias++; + info->fOrigSpec = info->fSpec; + + /* fill in the precision and width, if specified out of line */ + + /* width specified out of line */ + if(spec.fInfo.fWidth == -2) { + if(spec.fWidthPos == -1) { + /* read the width from the argument list */ + info->fWidth = va_arg(ap, int32_t); + } + /* else handle positional parameter */ + + /* if it's negative, take the absolute value and set left alignment */ + if(info->fWidth < 0) { + info->fWidth *= -1; /* Make positive */ + info->fLeft = true; + } + } + + /* precision specified out of line */ + if(info->fPrecision == -2) { + if(spec.fPrecisionPos == -1) { + /* read the precision from the argument list */ + info->fPrecision = va_arg(ap, int32_t); + } + /* else handle positional parameter */ + + /* if it's negative, set it to zero */ + if(info->fPrecision < 0) + info->fPrecision = 0; + } + + handlerNum = (uint16_t)(info->fSpec - UPRINTF_BASE_FMT_HANDLERS); + if (handlerNum < UPRINTF_NUM_FMT_HANDLERS) { + /* query the info function for argument information */ + argType = g_u_printf_infos[ handlerNum ].info; + + /* goto the correct argument on arg_list if position is specified */ + if (spec.fArgPos > 0) { + /* offset position by 1 */ + spec.fArgPos--; + switch(argType) { + case ufmt_count: + /* set the spec's width to the # of chars written */ + info->fWidth = *written; + /* fall through to set the pointer */ + U_FALLTHROUGH; + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + args.ptrValue = arglist[spec.fArgPos].ptrValue; + break; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + args.int64Value = arglist[spec.fArgPos].int64Value; + break; + case ufmt_float: + args.floatValue = arglist[spec.fArgPos].floatValue; + break; + case ufmt_double: + args.doubleValue = arglist[spec.fArgPos].doubleValue; + break; + default: + /* else args is ignored */ + args.ptrValue = nullptr; + break; + } + } else { /* no positional argument specified */ + switch(argType) { + case ufmt_count: + /* set the spec's width to the # of chars written */ + info->fWidth = *written; + /* fall through to set the pointer */ + U_FALLTHROUGH; + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + args.ptrValue = va_arg(ap, void*); + break; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + if (info->fIsLongLong) { + args.int64Value = va_arg(ap, int64_t); + } + else { + args.int64Value = va_arg(ap, int32_t); + } + break; + case ufmt_float: + args.floatValue = (float) va_arg(ap, double); + break; + case ufmt_double: + args.doubleValue = va_arg(ap, double); + break; + default: + /* else args is ignored */ + args.ptrValue = nullptr; + break; + } + } + + /* call the handler function */ + handler = g_u_printf_infos[ handlerNum ].handler; + if(handler != 0) { + *written += (*handler)(streamHandler, context, formatBundle, info, &args); + } + else { + /* just echo unknown tags */ + *written += (streamHandler->write)(context, fmt, (int32_t)(alias - lastAlias)); + } + } + else { + /* just echo unknown tags */ + *written += (streamHandler->write)(context, fmt, (int32_t)(alias - lastAlias)); + } + } + /* delete parsed argument list */ + if (arglist != nullptr) { + uprv_free(arglist); + } + /* return # of characters in this format that have been parsed. */ + return (int32_t)(alias - fmt); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/intl/icu/source/io/uscanf.cpp b/intl/icu/source/io/uscanf.cpp new file mode 100644 index 0000000000..2d72ccf515 --- /dev/null +++ b/intl/icu/source/io/uscanf.cpp @@ -0,0 +1,108 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File uscanf.c +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/13/99 stephen Modified for new C API. +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "unicode/ustdio.h" +#include "unicode/ustring.h" +#include "uscanf.h" +#include "ufile.h" +#include "ufmt_cmn.h" + +#include "cmemory.h" +#include "cstring.h" + + +U_CAPI int32_t U_EXPORT2 +u_fscanf(UFILE *f, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t converted; + + va_start(ap, patternSpecification); + converted = u_vfscanf(f, patternSpecification, ap); + va_end(ap); + + return converted; +} + +U_CAPI int32_t U_EXPORT2 +u_fscanf_u(UFILE *f, + const char16_t *patternSpecification, + ... ) +{ + va_list ap; + int32_t converted; + + va_start(ap, patternSpecification); + converted = u_vfscanf_u(f, patternSpecification, ap); + va_end(ap); + + return converted; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vfscanf(UFILE *f, + const char *patternSpecification, + va_list ap) +{ + int32_t converted; + char16_t *pattern; + char16_t patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)uprv_strlen(patternSpecification) + 1; + + /* convert from the default codepage to Unicode */ + if (size >= MAX_UCHAR_BUFFER_SIZE(patBuffer)) { + pattern = (char16_t *)uprv_malloc(size * sizeof(char16_t)); + if(pattern == 0) { + return 0; + } + } + else { + pattern = patBuffer; + } + u_charsToUChars(patternSpecification, pattern, size); + + /* do the work */ + converted = u_vfscanf_u(f, pattern, ap); + + /* clean up */ + if (pattern != patBuffer) { + uprv_free(pattern); + } + + return converted; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vfscanf_u(UFILE *f, + const char16_t *patternSpecification, + va_list ap) +{ + return u_scanf_parse(f, patternSpecification, ap); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/intl/icu/source/io/uscanf.h b/intl/icu/source/io/uscanf.h new file mode 100644 index 0000000000..ebb8e79188 --- /dev/null +++ b/intl/icu/source/io/uscanf.h @@ -0,0 +1,38 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File uscanf.h +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/13/99 stephen Modified for new C API. +****************************************************************************** +*/ + +#ifndef USCANF_H +#define USCANF_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/ustdio.h" + +U_CFUNC int32_t +u_scanf_parse(UFILE *f, + const UChar *patternSpecification, + va_list ap); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif + diff --git a/intl/icu/source/io/uscanf_p.cpp b/intl/icu/source/io/uscanf_p.cpp new file mode 100644 index 0000000000..0a41dfe07d --- /dev/null +++ b/intl/icu/source/io/uscanf_p.cpp @@ -0,0 +1,1463 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* +* File uscnnf_p.c +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/13/99 stephen Modified for new C API. +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/uchar.h" +#include "unicode/ustring.h" +#include "unicode/unum.h" +#include "unicode/udat.h" +#include "unicode/uset.h" +#include "uscanf.h" +#include "ufmt_cmn.h" +#include "ufile.h" +#include "locbund.h" + +#include "cmemory.h" +#include "ustr_cnv.h" + +/* flag characters for u_scanf */ +#define FLAG_ASTERISK 0x002A +#define FLAG_PAREN 0x0028 + +#define ISFLAG(s) (s) == FLAG_ASTERISK || \ + (s) == FLAG_PAREN + +/* special characters for u_scanf */ +#define SPEC_DOLLARSIGN 0x0024 + +/* unicode digits */ +#define DIGIT_ZERO 0x0030 +#define DIGIT_ONE 0x0031 +#define DIGIT_TWO 0x0032 +#define DIGIT_THREE 0x0033 +#define DIGIT_FOUR 0x0034 +#define DIGIT_FIVE 0x0035 +#define DIGIT_SIX 0x0036 +#define DIGIT_SEVEN 0x0037 +#define DIGIT_EIGHT 0x0038 +#define DIGIT_NINE 0x0039 + +#define ISDIGIT(s) (s) == DIGIT_ZERO || \ + (s) == DIGIT_ONE || \ + (s) == DIGIT_TWO || \ + (s) == DIGIT_THREE || \ + (s) == DIGIT_FOUR || \ + (s) == DIGIT_FIVE || \ + (s) == DIGIT_SIX || \ + (s) == DIGIT_SEVEN || \ + (s) == DIGIT_EIGHT || \ + (s) == DIGIT_NINE + +/* u_scanf modifiers */ +#define MOD_H 0x0068 +#define MOD_LOWERL 0x006C +#define MOD_L 0x004C + +#define ISMOD(s) (s) == MOD_H || \ + (s) == MOD_LOWERL || \ + (s) == MOD_L + +/** + * Struct encapsulating a single uscanf format specification. + */ +typedef struct u_scanf_spec_info { + int32_t fWidth; /* Width */ + + char16_t fSpec; /* Format specification */ + + char16_t fPadChar; /* Padding character */ + + UBool fSkipArg; /* true if arg should be skipped */ + UBool fIsLongDouble; /* L flag */ + UBool fIsShort; /* h flag */ + UBool fIsLong; /* l flag */ + UBool fIsLongLong; /* ll flag */ + UBool fIsString; /* true if this is a NUL-terminated string. */ +} u_scanf_spec_info; + + +/** + * Struct encapsulating a single u_scanf format specification. + */ +typedef struct u_scanf_spec { + u_scanf_spec_info fInfo; /* Information on this spec */ + int32_t fArgPos; /* Position of data in arg list */ +} u_scanf_spec; + +/** + * Parse a single u_scanf format specifier in Unicode. + * @param fmt A pointer to a '%' character in a u_scanf format specification. + * @param spec A pointer to a u_scanf_spec to receive the parsed + * format specifier. + * @return The number of characters contained in this specifier. + */ +static int32_t +u_scanf_parse_spec (const char16_t *fmt, + u_scanf_spec *spec) +{ + const char16_t *s = fmt; + const char16_t *backup; + u_scanf_spec_info *info = &(spec->fInfo); + + /* initialize spec to default values */ + spec->fArgPos = -1; + + info->fWidth = -1; + info->fSpec = 0x0000; + info->fPadChar = 0x0020; + info->fSkipArg = false; + info->fIsLongDouble = false; + info->fIsShort = false; + info->fIsLong = false; + info->fIsLongLong = false; + info->fIsString = true; + + + /* skip over the initial '%' */ + s++; + + /* Check for positional argument */ + if(ISDIGIT(*s)) { + + /* Save the current position */ + backup = s; + + /* handle positional parameters */ + if(ISDIGIT(*s)) { + spec->fArgPos = (int) (*s++ - DIGIT_ZERO); + + while(ISDIGIT(*s)) { + spec->fArgPos *= 10; + spec->fArgPos += (int) (*s++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*s != SPEC_DOLLARSIGN) { + spec->fArgPos = -1; + s = backup; + } + /* munge the '$' */ + else + s++; + } + + /* Get any format flags */ + while(ISFLAG(*s)) { + switch(*s++) { + + /* skip argument */ + case FLAG_ASTERISK: + info->fSkipArg = true; + break; + + /* pad character specified */ + case FLAG_PAREN: + + /* first four characters are hex values for pad char */ + info->fPadChar = (char16_t)ufmt_digitvalue(*s++); + info->fPadChar = (char16_t)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + info->fPadChar = (char16_t)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + info->fPadChar = (char16_t)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + + /* final character is ignored */ + s++; + + break; + } + } + + /* Get the width */ + if(ISDIGIT(*s)){ + info->fWidth = (int) (*s++ - DIGIT_ZERO); + + while(ISDIGIT(*s)) { + info->fWidth *= 10; + info->fWidth += (int) (*s++ - DIGIT_ZERO); + } + } + + /* Get any modifiers */ + if(ISMOD(*s)) { + switch(*s++) { + + /* short */ + case MOD_H: + info->fIsShort = true; + break; + + /* long or long long */ + case MOD_LOWERL: + if(*s == MOD_LOWERL) { + info->fIsLongLong = true; + /* skip over the next 'l' */ + s++; + } + else + info->fIsLong = true; + break; + + /* long double */ + case MOD_L: + info->fIsLongDouble = true; + break; + } + } + + /* finally, get the specifier letter */ + info->fSpec = *s++; + + /* return # of characters in this specifier */ + return (int32_t)(s - fmt); +} + +#define UP_PERCENT 0x0025 + + +/* ANSI style formatting */ +/* Use US-ASCII characters only for formatting */ + +/* % */ +#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} +/* s */ +#define UFMT_STRING {ufmt_string, u_scanf_string_handler} +/* c */ +#define UFMT_CHAR {ufmt_string, u_scanf_char_handler} +/* d, i */ +#define UFMT_INT {ufmt_int, u_scanf_integer_handler} +/* u */ +#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} +/* o */ +#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} +/* x, X */ +#define UFMT_HEX {ufmt_int, u_scanf_hex_handler} +/* f */ +#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} +/* e, E */ +#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} +/* g, G */ +#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} +/* n */ +#define UFMT_COUNT {ufmt_count, u_scanf_count_handler} +/* [ */ +#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} + +/* non-ANSI extensions */ +/* Use US-ASCII characters only for formatting */ + +/* p */ +#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} +/* V */ +#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} +/* P */ +#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} +/* C K is old format */ +#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} +/* S U is old format */ +#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} + + +#define UFMT_EMPTY {ufmt_empty, nullptr} + +/** + * A u_scanf handler function. + * A u_scanf handler is responsible for handling a single u_scanf + * format specification, for example 'd' or 's'. + * @param stream The UFILE to which to write output. + * @param info A pointer to a u_scanf_spec_info struct containing + * information on the format specification. + * @param args A pointer to the argument data + * @param fmt A pointer to the first character in the format string + * following the spec. + * @param fmtConsumed On output, set to the number of characters consumed + * in fmt. Do nothing, if the argument isn't variable width. + * @param argConverted The number of arguments converted and assigned, or -1 if an + * error occurred. + * @return The number of code points consumed during reading. + */ +typedef int32_t (*u_scanf_handler) (UFILE *stream, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted); + +typedef struct u_scanf_info { + ufmt_type_info info; + u_scanf_handler handler; +} u_scanf_info; + +#define USCANF_NUM_FMT_HANDLERS 108 +#define USCANF_SYMBOL_BUFFER_SIZE 8 + +/* We do not use handlers for 0-0x1f */ +#define USCANF_BASE_FMT_HANDLERS 0x20 + + +static int32_t +u_scanf_skip_leading_ws(UFILE *input, + char16_t pad) +{ + char16_t c; + int32_t count = 0; + UBool isNotEOF; + + /* skip all leading ws in the input */ + while( ((isNotEOF = ufile_getch(input, &c))==(UBool)true) && (c == pad || u_isWhitespace(c)) ) + { + count++; + } + + /* put the final character back on the input */ + if(isNotEOF) + u_fungetc(c, input); + + return count; +} + +/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ +static int32_t +u_scanf_skip_leading_positive_sign(UFILE *input, + UNumberFormat *format, + UErrorCode *status) +{ + char16_t c; + int32_t count = 0; + UBool isNotEOF; + char16_t plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; + int32_t symbolLen; + UErrorCode localStatus = U_ZERO_ERROR; + + if (U_SUCCESS(*status)) { + symbolLen = unum_getSymbol(format, + UNUM_PLUS_SIGN_SYMBOL, + plusSymbol, + UPRV_LENGTHOF(plusSymbol), + &localStatus); + + if (U_SUCCESS(localStatus)) { + /* skip all leading ws in the input */ + while( ((isNotEOF = ufile_getch(input, &c))==(UBool)true) && (count < symbolLen && c == plusSymbol[count]) ) + { + count++; + } + + /* put the final character back on the input */ + if(isNotEOF) { + u_fungetc(c, input); + } + } + } + + return count; +} + +static int32_t +u_scanf_simple_percent_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)info; + (void)args; + (void)fmt; + (void)fmtConsumed; + + /* make sure the next character in the input is a percent */ + *argConverted = 0; + if(u_fgetc(input) != 0x0025) { + *argConverted = -1; + } + return 1; +} + +static int32_t +u_scanf_count_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)input; + (void)fmt; + (void)fmtConsumed; + + /* in the special case of count, the u_scanf_spec_info's width */ + /* will contain the # of items converted thus far */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); + else if (info->fIsLongLong) + *(int64_t*)(args[0].ptrValue) = info->fWidth; + else + *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); + } + *argConverted = 0; + + /* we converted 0 args */ + return 0; +} + +static int32_t +u_scanf_double_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +#define UPRINTF_SYMBOL_BUFFER_SIZE 8 + +static int32_t +u_scanf_scientific_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + char16_t srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + int32_t srcLen, expLen; + char16_t expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + srcLen = unum_getSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + srcExpBuf, + sizeof(srcExpBuf), + &status); + + /* Upper/lower case the e */ + if (info->fSpec == (char16_t)0x65 /* e */) { + expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + input->str.fBundle.fLocale, + &status); + } + else { + expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + input->str.fBundle.fLocale, + &status); + } + + unum_setSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + expBuf, + expLen, + &status); + + + + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_scidbl_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *scientificFormat, *genericFormat; + /*int32_t scientificResult, genericResult;*/ + double scientificResult, genericResult; + int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; + int32_t skipped; + UErrorCode scientificStatus = U_ZERO_ERROR; + UErrorCode genericStatus = U_ZERO_ERROR; + + + /* since we can't determine by scanning the characters whether */ + /* a number was formatted in the 'f' or 'g' styles, parse the */ + /* string with both formatters, and assume whichever one */ + /* parsed the most is the correct formatter to use */ + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatters */ + scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); + genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(scientificFormat == 0 || genericFormat == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); + + /* parse the number using each format*/ + + scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, + &scientificParsePos, &scientificStatus); + + genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, + &genericParsePos, &genericStatus); + + /* determine which parse made it farther */ + if(scientificParsePos > genericParsePos) { + /* stash the result in num */ + num = scientificResult; + /* update the input's position to reflect consumed data */ + parsePos += scientificParsePos; + } + else { + /* stash the result in num */ + num = genericResult; + /* update the input's position to reflect consumed data */ + parsePos += genericParsePos; + } + input->str.fPos += parsePos; + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_integer_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + void *num = (void*) (args[0].ptrValue); + UNumberFormat *format, *localFormat; + int32_t parsePos = 0; + int32_t skipped; + int32_t parseIntOnly = 0; + UErrorCode status = U_ZERO_ERROR; + int64_t result; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* for integer types, do not attempt to parse fractions */ + localFormat = unum_clone(format, &status); + if(U_FAILURE(status)) + return 0; + + if(info->fSpec == 'd' || info->fSpec == 'i' || info->fSpec == 'u') + parseIntOnly = 1; + unum_setAttribute(localFormat, UNUM_PARSE_INT_ONLY, parseIntOnly); + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, localFormat, &status); + + /* parse the number */ + result = unum_parseInt64(localFormat, input->str.fPos, len, &parsePos, &status); + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* cleanup cloned formatter */ + unum_close(localFormat); + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_uinteger_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + /* TODO Fix this when Numberformat handles uint64_t */ + return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_percent_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + *(double*)(args[0].ptrValue) = num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos; +} + +static int32_t +u_scanf_string_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + const char16_t *source; + UConverter *conv; + char *arg = (char*)(args[0].ptrValue); + char *alias = arg; + char *limit; + UErrorCode status = U_ZERO_ERROR; + int32_t count; + int32_t skipped = 0; + char16_t c; + UBool isNotEOF = false; + + /* skip all ws in the input */ + if (info->fIsString) { + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + } + + /* get the string one character at a time, truncating to the width */ + count = 0; + + /* open the default converter */ + conv = u_getDefaultConverter(&status); + + if(U_FAILURE(status)) + return -1; + + while( (info->fWidth == -1 || count < info->fWidth) + && ((isNotEOF = ufile_getch(input, &c))==(UBool)true) + && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) + { + + if (!info->fSkipArg) { + /* put the character from the input onto the target */ + source = &c; + /* Since we do this one character at a time, do it this way. */ + if (info->fWidth > 0) { + limit = alias + info->fWidth - count; + } + else { + limit = alias + ucnv_getMaxCharSize(conv); + } + + /* convert the character to the default codepage */ + ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, + nullptr, true, &status); + + if(U_FAILURE(status)) { + /* clean up */ + u_releaseDefaultConverter(conv); + return -1; + } + } + + /* increment the count */ + ++count; + } + + /* put the final character we read back on the input */ + if (!info->fSkipArg) { + if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) + u_fungetc(c, input); + + /* add the terminator */ + if (info->fIsString) { + *alias = 0x00; + } + } + + /* clean up */ + u_releaseDefaultConverter(conv); + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return count + skipped; +} + +static int32_t +u_scanf_char_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + if (info->fWidth < 0) { + info->fWidth = 1; + } + info->fIsString = false; + return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_ustring_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + char16_t *arg = (char16_t*)(args[0].ptrValue); + char16_t *alias = arg; + int32_t count; + int32_t skipped = 0; + char16_t c; + UBool isNotEOF = false; + + /* skip all ws in the input */ + if (info->fIsString) { + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + } + + /* get the string one character at a time, truncating to the width */ + count = 0; + + while( (info->fWidth == -1 || count < info->fWidth) + && ((isNotEOF = ufile_getch(input, &c))==(UBool)true) + && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) + { + + /* put the character from the input onto the target */ + if (!info->fSkipArg) { + *alias++ = c; + } + + /* increment the count */ + ++count; + } + + /* put the final character we read back on the input */ + if (!info->fSkipArg) { + if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { + u_fungetc(c, input); + } + + /* add the terminator */ + if (info->fIsString) { + *alias = 0x0000; + } + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return count + skipped; +} + +static int32_t +u_scanf_uchar_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + if (info->fWidth < 0) { + info->fWidth = 1; + } + info->fIsString = false; + return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_spellout_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + /* This is not applicable to RBNF. */ + /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + *(double*)(args[0].ptrValue) = num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_hex_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *num = (void*) (args[0].ptrValue); + int64_t result; + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* check for alternate form */ + if( *(input->str.fPos) == 0x0030 && + (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { + + /* skip the '0' and 'x' or 'X' if present */ + input->str.fPos += 2; + len -= 2; + } + + /* parse the number */ + result = ufmt_uto64(input->str.fPos, &len, 16); + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_octal_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *num = (void*) (args[0].ptrValue); + int64_t result; + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* parse the number */ + result = ufmt_uto64(input->str.fPos, &len, 8); + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_pointer_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *result; + void **p = (void**)(args[0].ptrValue); + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) { + len = ufmt_min(len, info->fWidth); + } + + /* Make sure that we don't consume too much */ + if (len > (int32_t)(sizeof(void*)*2)) { + len = (int32_t)(sizeof(void*)*2); + } + + /* parse the pointer - assign to temporary value */ + result = ufmt_utop(input->str.fPos, &len); + + if (!info->fSkipArg) { + *p = result; + } + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_scanset_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const char16_t *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + USet *scanset; + UErrorCode status = U_ZERO_ERROR; + int32_t chLeft = INT32_MAX; + UChar32 c; + char16_t *alias = (char16_t*) (args[0].ptrValue); + UBool isNotEOF = false; + UBool readCharacter = false; + + /* Create an empty set */ + scanset = uset_open(0, -1); + + /* Back up one to get the [ */ + fmt--; + + /* truncate to the width, if specified and alias the target */ + if(info->fWidth >= 0) { + chLeft = info->fWidth; + } + + /* parse the scanset from the fmt string */ + *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); + + /* verify that the parse was successful */ + if (U_SUCCESS(status)) { + c=0; + + /* grab characters one at a time and make sure they are in the scanset */ + while(chLeft > 0) { + if ( ((isNotEOF = ufile_getch32(input, &c))==(UBool)true) && uset_contains(scanset, c) ) { + readCharacter = true; + if (!info->fSkipArg) { + int32_t idx = 0; + UBool isError = false; + + U16_APPEND(alias, idx, chLeft, c, isError); + if (isError) { + break; + } + alias += idx; + } + chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); + } + else { + /* if the character's not in the scanset, break out */ + break; + } + } + + /* put the final character we read back on the input */ + if(isNotEOF && chLeft > 0) { + u_fungetc(c, input); + } + } + + uset_close(scanset); + + /* if we didn't match at least 1 character, fail */ + if(!readCharacter) + return -1; + /* otherwise, add the terminator */ + else if (!info->fSkipArg) { + *alias = 0x00; + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; +} + +/* Use US-ASCII characters only for formatting. Most codepages have + characters 20-7F from Unicode. Using any other codepage specific + characters will make it very difficult to format the string on + non-Unicode machines */ +static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { +/* 0x20 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x30 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x40 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, + UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +#endif + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x50 */ + UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, +#endif + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x60 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, + UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, + UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, + +/* 0x70 */ + UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, + UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +}; + +U_CFUNC int32_t +u_scanf_parse(UFILE *f, + const char16_t *patternSpecification, + va_list ap) +{ + const char16_t *alias; + int32_t count, converted, argConsumed, cpConsumed; + uint16_t handlerNum; + + ufmt_args args; + u_scanf_spec spec; + ufmt_type_info info; + u_scanf_handler handler; + + /* alias the pattern */ + alias = patternSpecification; + + /* haven't converted anything yet */ + argConsumed = 0; + converted = 0; + cpConsumed = 0; + + /* iterate through the pattern */ + for(;;) { + + /* match any characters up to the next '%' */ + while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { + alias++; + } + + /* if we aren't at a '%', or if we're at end of string, break*/ + if(*alias != UP_PERCENT || *alias == 0x0000) + break; + + /* parse the specifier */ + count = u_scanf_parse_spec(alias, &spec); + + /* update the pointer in pattern */ + alias += count; + + handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); + if (handlerNum < USCANF_NUM_FMT_HANDLERS) { + /* skip the argument, if necessary */ + /* query the info function for argument information */ + info = g_u_scanf_infos[ handlerNum ].info; + if (info != ufmt_count && u_feof(f)) { + break; + } + else if(spec.fInfo.fSkipArg) { + args.ptrValue = nullptr; + } + else { + switch(info) { + case ufmt_count: + /* set the spec's width to the # of items converted */ + spec.fInfo.fWidth = cpConsumed; + U_FALLTHROUGH; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + case ufmt_float: + case ufmt_double: + args.ptrValue = va_arg(ap, void*); + break; + + default: + /* else args is ignored */ + args.ptrValue = nullptr; + break; + } + } + + /* call the handler function */ + handler = g_u_scanf_infos[ handlerNum ].handler; + if(handler != 0) { + + /* reset count to 1 so that += for alias works. */ + count = 1; + + cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); + + /* if the handler encountered an error condition, break */ + if(argConsumed < 0) { + converted = -1; + break; + } + + /* add to the # of items converted */ + converted += argConsumed; + + /* update the pointer in pattern */ + alias += count-1; + } + /* else do nothing */ + } + /* else do nothing */ + + /* just ignore unknown tags */ + } + + /* return # of items converted */ + return converted; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/intl/icu/source/io/ustdio.cpp b/intl/icu/source/io/ustdio.cpp new file mode 100644 index 0000000000..4130f34044 --- /dev/null +++ b/intl/icu/source/io/ustdio.cpp @@ -0,0 +1,732 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ****************************************************************************** + * + * Copyright (C) 1998-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** + * + * File ustdio.c + * + * Modification History: + * + * Date Name Description + * 11/18/98 stephen Creation. + * 03/12/99 stephen Modified for new C API. + * 07/19/99 stephen Fixed read() and gets() + ****************************************************************************** + */ + +#include "unicode/ustdio.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" +#include "ufile.h" +#include "ufmt_cmn.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" + +#include + +#define DELIM_LF 0x000A +#define DELIM_VT 0x000B +#define DELIM_FF 0x000C +#define DELIM_CR 0x000D +#define DELIM_NEL 0x0085 +#define DELIM_LS 0x2028 +#define DELIM_PS 0x2029 + +/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ +#if U_PLATFORM_USES_ONLY_WIN32_API +static const char16_t DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; +static const uint32_t DELIMITERS_LEN = 2; +/* TODO: Default newline writing should be detected based upon the converter being used. */ +#else +static const char16_t DELIMITERS [] = { DELIM_LF, 0x0000 }; +static const uint32_t DELIMITERS_LEN = 1; +#endif + +#define IS_FIRST_STRING_DELIMITER(c1) \ + (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ + || (c1) == DELIM_NEL \ + || (c1) == DELIM_LS \ + || (c1) == DELIM_PS) +#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) +#define IS_COMBINED_STRING_DELIMITER(c1, c2) \ + (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) + + +#if !UCONFIG_NO_TRANSLITERATION + +U_CAPI UTransliterator* U_EXPORT2 +u_fsettransliterator(UFILE *file, UFileDirection direction, + UTransliterator *adopt, UErrorCode *status) +{ + UTransliterator *old = nullptr; + + if(U_FAILURE(*status)) + { + return adopt; + } + + if(!file) + { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return adopt; + } + + if(direction & U_READ) + { + /** TODO: implement */ + *status = U_UNSUPPORTED_ERROR; + return adopt; + } + + if(adopt == nullptr) /* they are clearing it */ + { + if(file->fTranslit != nullptr) + { + /* TODO: Check side */ + old = file->fTranslit->translit; + uprv_free(file->fTranslit->buffer); + file->fTranslit->buffer=nullptr; + uprv_free(file->fTranslit); + file->fTranslit=nullptr; + } + } + else + { + if(file->fTranslit == nullptr) + { + file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); + if(!file->fTranslit) + { + *status = U_MEMORY_ALLOCATION_ERROR; + return adopt; + } + file->fTranslit->capacity = 0; + file->fTranslit->length = 0; + file->fTranslit->pos = 0; + file->fTranslit->buffer = nullptr; + } + else + { + old = file->fTranslit->translit; + ufile_flush_translit(file); + } + + file->fTranslit->translit = adopt; + } + + return old; +} + +static const char16_t * u_file_translit(UFILE *f, const char16_t *src, int32_t *count, UBool flush) +{ + int32_t newlen; + int32_t junkCount = 0; + int32_t textLength; + int32_t textLimit; + UTransPosition pos; + UErrorCode status = U_ZERO_ERROR; + + if(count == nullptr) + { + count = &junkCount; + } + + if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) + { + /* fast path */ + return src; + } + + /* First: slide over everything */ + if(f->fTranslit->length > f->fTranslit->pos) + { + memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, + (f->fTranslit->length - f->fTranslit->pos)*sizeof(char16_t)); + } + f->fTranslit->length -= f->fTranslit->pos; /* always */ + f->fTranslit->pos = 0; + + /* Calculate new buffer size needed */ + newlen = (*count + f->fTranslit->length) * 4; + + if(newlen > f->fTranslit->capacity) + { + if(f->fTranslit->buffer == nullptr) + { + f->fTranslit->buffer = (char16_t*)uprv_malloc(newlen * sizeof(char16_t)); + } + else + { + f->fTranslit->buffer = (char16_t*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(char16_t)); + } + /* Check for malloc/realloc failure. */ + if (f->fTranslit->buffer == nullptr) { + return nullptr; + } + f->fTranslit->capacity = newlen; + } + + /* Now, copy any data over */ + u_strncpy(f->fTranslit->buffer + f->fTranslit->length, + src, + *count); + f->fTranslit->length += *count; + + /* Now, translit in place as much as we can */ + if(flush == false) + { + textLength = f->fTranslit->length; + pos.contextStart = 0; + pos.contextLimit = textLength; + pos.start = 0; + pos.limit = textLength; + + utrans_transIncrementalUChars(f->fTranslit->translit, + f->fTranslit->buffer, /* because we shifted */ + &textLength, + f->fTranslit->capacity, + &pos, + &status); + + /* now: start/limit point to the transliterated text */ + /* Transliterated is [buffer..pos.start) */ + *count = pos.start; + f->fTranslit->pos = pos.start; + f->fTranslit->length = pos.limit; + + return f->fTranslit->buffer; + } + else + { + textLength = f->fTranslit->length; + textLimit = f->fTranslit->length; + + utrans_transUChars(f->fTranslit->translit, + f->fTranslit->buffer, + &textLength, + f->fTranslit->capacity, + 0, + &textLimit, + &status); + + /* out: converted len */ + *count = textLimit; + + /* Set pointers to 0 */ + f->fTranslit->pos = 0; + f->fTranslit->length = 0; + + return f->fTranslit->buffer; + } +} + +#endif + +void +ufile_flush_translit(UFILE *f) +{ +#if !UCONFIG_NO_TRANSLITERATION + if((!f)||(!f->fTranslit)) + return; +#endif + + u_file_write_flush(nullptr, 0, f, false, true); +} + + +void +ufile_flush_io(UFILE *f) +{ + if((!f) || (!f->fFile)) { + return; /* skip if no file */ + } + + u_file_write_flush(nullptr, 0, f, true, false); +} + + +void +ufile_close_translit(UFILE *f) +{ +#if !UCONFIG_NO_TRANSLITERATION + if((!f)||(!f->fTranslit)) + return; +#endif + + ufile_flush_translit(f); + +#if !UCONFIG_NO_TRANSLITERATION + if(f->fTranslit->translit) + utrans_close(f->fTranslit->translit); + + if(f->fTranslit->buffer) + { + uprv_free(f->fTranslit->buffer); + } + + uprv_free(f->fTranslit); + f->fTranslit = nullptr; +#endif +} + + +/* Input/output */ + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fputs(const char16_t *s, + UFILE *f) +{ + int32_t count = u_file_write(s, u_strlen(s), f); + count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); + return count; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fputc(UChar32 uc, + UFILE *f) +{ + char16_t buf[2]; + int32_t idx = 0; + UBool isError = false; + + U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError); + if (isError) { + return U_EOF; + } + return u_file_write(buf, idx, f) == idx ? uc : U_EOF; +} + + +U_CFUNC int32_t U_EXPORT2 +u_file_write_flush(const char16_t *chars, + int32_t count, + UFILE *f, + UBool flushIO, + UBool flushTranslit) +{ + /* Set up conversion parameters */ + UErrorCode status = U_ZERO_ERROR; + const char16_t *mySource = chars; + const char16_t *mySourceBegin; + const char16_t *mySourceEnd; + char charBuffer[UFILE_CHARBUFFER_SIZE]; + char *myTarget = charBuffer; + int32_t written = 0; + int32_t numConverted = 0; + + if (count < 0) { + count = u_strlen(chars); + } + +#if !UCONFIG_NO_TRANSLITERATION + if((f->fTranslit) && (f->fTranslit->translit)) + { + /* Do the transliteration */ + mySource = u_file_translit(f, chars, &count, flushTranslit); + } +#endif + + /* Write to a string. */ + if (!f->fFile) { + int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); + if (flushIO && charsLeft > count) { + count++; + } + written = ufmt_min(count, charsLeft); + u_strncpy(f->str.fPos, mySource, written); + f->str.fPos += written; + return written; + } + + mySourceEnd = mySource + count; + + /* Perform the conversion in a loop */ + do { + mySourceBegin = mySource; /* beginning location for this loop */ + status = U_ZERO_ERROR; + if(f->fConverter != nullptr) { /* We have a valid converter */ + ucnv_fromUnicode(f->fConverter, + &myTarget, + charBuffer + UFILE_CHARBUFFER_SIZE, + &mySource, + mySourceEnd, + nullptr, + flushIO, + &status); + } else { /*weiv: do the invariant conversion */ + int32_t convertChars = (int32_t) (mySourceEnd - mySource); + if (convertChars > UFILE_CHARBUFFER_SIZE) { + convertChars = UFILE_CHARBUFFER_SIZE; + status = U_BUFFER_OVERFLOW_ERROR; + } + u_UCharsToChars(mySource, myTarget, convertChars); + mySource += convertChars; + myTarget += convertChars; + } + numConverted = (int32_t)(myTarget - charBuffer); + + if (numConverted > 0) { + /* write the converted bytes */ + fwrite(charBuffer, + sizeof(char), + numConverted, + f->fFile); + + written += (int32_t) (mySource - mySourceBegin); + } + myTarget = charBuffer; + } + while(status == U_BUFFER_OVERFLOW_ERROR); + + /* return # of chars written */ + return written; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_file_write( const char16_t *chars, + int32_t count, + UFILE *f) +{ + return u_file_write_flush(chars,count,f,false,false); +} + + +/* private function used for buffering input */ +void +ufile_fill_uchar_buffer(UFILE *f) +{ + UErrorCode status; + const char *mySource; + const char *mySourceEnd; + char16_t *myTarget; + int32_t bufferSize; + int32_t maxCPBytes; + int32_t bytesRead; + int32_t availLength; + int32_t dataSize; + char charBuffer[UFILE_CHARBUFFER_SIZE]; + u_localized_string *str; + + if (f->fFile == nullptr) { + /* There is nothing to do. It's a string. */ + return; + } + + str = &f->str; + dataSize = (int32_t)(str->fLimit - str->fPos); + if (f->fFileno == 0 && dataSize > 0) { + /* Don't read from stdin too many times. There is still some data. */ + return; + } + + /* shift the buffer if it isn't empty */ + if(dataSize != 0) { + u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ + } + + + /* record how much buffer space is available */ + availLength = UFILE_UCHARBUFFER_SIZE - dataSize; + + /* Determine the # of codepage bytes needed to fill our char16_t buffer */ + /* weiv: if converter is nullptr, we use invariant converter with charwidth = 1)*/ + maxCPBytes = availLength / (f->fConverter!=nullptr?(2*ucnv_getMinCharSize(f->fConverter)):1); + + /* Read in the data to convert */ + if (f->fFileno == 0) { + /* Special case. Read from stdin one line at a time. */ + char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); + bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); + } + else { + /* A normal file */ + bytesRead = (int32_t)fread(charBuffer, + sizeof(char), + ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), + f->fFile); + } + + /* Set up conversion parameters */ + status = U_ZERO_ERROR; + mySource = charBuffer; + mySourceEnd = charBuffer + bytesRead; + myTarget = f->fUCBuffer + dataSize; + bufferSize = UFILE_UCHARBUFFER_SIZE; + + if(f->fConverter != nullptr) { /* We have a valid converter */ + /* Perform the conversion */ + ucnv_toUnicode(f->fConverter, + &myTarget, + f->fUCBuffer + bufferSize, + &mySource, + mySourceEnd, + nullptr, + (UBool)(feof(f->fFile) != 0), + &status); + + } else { /*weiv: do the invariant conversion */ + u_charsToUChars(mySource, myTarget, bytesRead); + myTarget += bytesRead; + } + + /* update the pointers into our array */ + str->fPos = str->fBuffer; + str->fLimit = myTarget; +} + +U_CAPI char16_t* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgets(char16_t *s, + int32_t n, + UFILE *f) +{ + int32_t dataSize; + int32_t count; + char16_t *alias; + const char16_t *limit; + char16_t *sItr; + char16_t currDelim = 0; + u_localized_string *str; + + if (n <= 0) { + /* Caller screwed up. We need to write the null terminatior. */ + return nullptr; + } + + /* fill the buffer if needed */ + str = &f->str; + if (str->fPos >= str->fLimit) { + ufile_fill_uchar_buffer(f); + } + + /* subtract 1 from n to compensate for the terminator */ + --n; + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + + /* if 0 characters were left, return 0 */ + if (dataSize == 0) + return nullptr; + + /* otherwise, iteratively fill the buffer and copy */ + count = 0; + sItr = s; + currDelim = 0; + while (dataSize > 0 && count < n) { + alias = str->fPos; + + /* Find how much to copy */ + if (dataSize < (n - count)) { + limit = str->fLimit; + } + else { + limit = alias + (n - count); + } + + if (!currDelim) { + /* Copy UChars until we find the first occurrence of a delimiter character */ + while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { + count++; + *(sItr++) = *(alias++); + } + /* Preserve the newline */ + if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { + if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { + currDelim = *alias; + } + else { + currDelim = 1; /* This isn't a newline, but it's used to say + that we should break later. We've checked all + possible newline combinations even across buffer + boundaries. */ + } + count++; + *(sItr++) = *(alias++); + } + } + /* If we have a CRLF combination, preserve that too. */ + if (alias < limit) { + if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { + count++; + *(sItr++) = *(alias++); + } + currDelim = 1; /* This isn't a newline, but it's used to say + that we should break later. We've checked all + possible newline combinations even across buffer + boundaries. */ + } + + /* update the current buffer position */ + str->fPos = alias; + + /* if we found a delimiter */ + if (currDelim == 1) { + /* break out */ + break; + } + + /* refill the buffer */ + ufile_fill_uchar_buffer(f); + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + } + + /* add the terminator and return s */ + *sItr = 0x0000; + return s; +} + +U_CFUNC UBool U_EXPORT2 +ufile_getch(UFILE *f, char16_t *ch) +{ + UBool isValidChar = false; + + *ch = U_EOF; + /* if we have an available character in the buffer, return it */ + if(f->str.fPos < f->str.fLimit){ + *ch = *(f->str.fPos)++; + isValidChar = true; + } + else { + /* otherwise, fill the buffer and return the next character */ + if(f->str.fPos >= f->str.fLimit) { + ufile_fill_uchar_buffer(f); + } + if(f->str.fPos < f->str.fLimit) { + *ch = *(f->str.fPos)++; + isValidChar = true; + } + } + return isValidChar; +} + +U_CAPI char16_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetc(UFILE *f) +{ + char16_t ch; + ufile_getch(f, &ch); + return ch; +} + +U_CFUNC UBool U_EXPORT2 +ufile_getch32(UFILE *f, UChar32 *c32) +{ + UBool isValidChar = false; + u_localized_string *str; + + *c32 = U_EOF; + + /* Fill the buffer if it is empty */ + str = &f->str; + if (str->fPos + 1 >= str->fLimit) { + ufile_fill_uchar_buffer(f); + } + + /* Get the next character in the buffer */ + if (str->fPos < str->fLimit) { + *c32 = *(str->fPos)++; + if (U_IS_LEAD(*c32)) { + if (str->fPos < str->fLimit) { + char16_t c16 = *(str->fPos)++; + *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); + isValidChar = true; + } + else { + *c32 = U_EOF; + } + } + else { + isValidChar = true; + } + } + + return isValidChar; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetcx(UFILE *f) +{ + UChar32 ch; + ufile_getch32(f, &ch); + return ch; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fungetc(UChar32 ch, + UFILE *f) +{ + u_localized_string *str; + + str = &f->str; + + /* if we're at the beginning of the buffer, sorry! */ + if (str->fPos == str->fBuffer + || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) + { + ch = U_EOF; + } + else { + /* otherwise, put the character back */ + /* Remember, read them back on in the reverse order. */ + if (U_IS_LEAD(ch)) { + if (*--(str->fPos) != U16_TRAIL(ch) + || *--(str->fPos) != U16_LEAD(ch)) + { + ch = U_EOF; + } + } + else if (*--(str->fPos) != ch) { + ch = U_EOF; + } + } + return ch; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_file_read( char16_t *chars, + int32_t count, + UFILE *f) +{ + int32_t dataSize; + int32_t read = 0; + u_localized_string *str = &f->str; + + do { + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + if (dataSize <= 0) { + /* fill the buffer */ + ufile_fill_uchar_buffer(f); + dataSize = (int32_t)(str->fLimit - str->fPos); + } + + /* Make sure that we don't read too much */ + if (dataSize > (count - read)) { + dataSize = count - read; + } + + /* copy the current data in the buffer */ + memcpy(chars + read, str->fPos, dataSize * sizeof(char16_t)); + + /* update number of items read */ + read += dataSize; + + /* update the current buffer position */ + str->fPos += dataSize; + } + while (dataSize != 0 && read < count); + + return read; +} +#endif diff --git a/intl/icu/source/io/ustream.cpp b/intl/icu/source/io/ustream.cpp new file mode 100644 index 0000000000..fd783b3b23 --- /dev/null +++ b/intl/icu/source/io/ustream.cpp @@ -0,0 +1,170 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* FILE NAME : ustream.cpp +* +* Modification History: +* +* Date Name Description +* 06/25/2001 grhoten Move iostream from unistr.h to here +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/uobject.h" +#include "unicode/ustream.h" +#include "unicode/ucnv.h" +#include "unicode/uchar.h" +#include "unicode/utf16.h" +#include "ustr_cnv.h" +#include "cmemory.h" +#include + +// console IO + +#define STD_NAMESPACE std:: + +#define STD_OSTREAM STD_NAMESPACE ostream +#define STD_ISTREAM STD_NAMESPACE istream + +U_NAMESPACE_BEGIN + +U_IO_API STD_OSTREAM & U_EXPORT2 +operator<<(STD_OSTREAM& stream, const UnicodeString& str) +{ + if(str.length() > 0) { + char buffer[200]; + UConverter *converter; + UErrorCode errorCode = U_ZERO_ERROR; + + // use the default converter to convert chunks of text + converter = u_getDefaultConverter(&errorCode); + if(U_SUCCESS(errorCode)) { + const char16_t *us = str.getBuffer(); + const char16_t *uLimit = us + str.length(); + char *s, *sLimit = buffer + (sizeof(buffer) - 1); + do { + errorCode = U_ZERO_ERROR; + s = buffer; + ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, false, &errorCode); + *s = 0; + + // write this chunk + if(s > buffer) { + stream << buffer; + } + } while(errorCode == U_BUFFER_OVERFLOW_ERROR); + u_releaseDefaultConverter(converter); + } + } + +/* stream.flush();*/ + return stream; +} + +U_IO_API STD_ISTREAM & U_EXPORT2 +operator>>(STD_ISTREAM& stream, UnicodeString& str) +{ + // This is like ICU status checking. + if (stream.fail()) { + return stream; + } + + /* ipfx should eat whitespace when ios::skipws is set */ + char16_t uBuffer[16]; + char buffer[16]; + int32_t idx = 0; + UConverter *converter; + UErrorCode errorCode = U_ZERO_ERROR; + + // use the default converter to convert chunks of text + converter = u_getDefaultConverter(&errorCode); + if(U_SUCCESS(errorCode)) { + char16_t *us = uBuffer; + const char16_t *uLimit = uBuffer + UPRV_LENGTHOF(uBuffer); + const char *s, *sLimit; + char ch; + char16_t ch32; + UBool initialWhitespace = true; + UBool continueReading = true; + + /* We need to consume one byte at a time to see what is considered whitespace. */ + while (continueReading) { + ch = stream.get(); + if (stream.eof()) { + // The EOF is only set after the get() of an unavailable byte. + if (!initialWhitespace) { + stream.clear(stream.eofbit); + } + continueReading = false; + } + sLimit = &ch + (int)continueReading; + us = uBuffer; + s = &ch; + errorCode = U_ZERO_ERROR; + /* + Since we aren't guaranteed to see the state before this call, + this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding. + We flush on the last byte to ensure that we output truncated multibyte characters. + */ + ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, !continueReading, &errorCode); + if(U_FAILURE(errorCode)) { + /* Something really bad happened. setstate() isn't always an available API */ + stream.clear(stream.failbit); + goto STOP_READING; + } + /* Was the character consumed? */ + if (us != uBuffer) { + /* Reminder: ibm-1390 & JISX0213 can output 2 Unicode code points */ + int32_t uBuffSize = static_cast(us-uBuffer); + int32_t uBuffIdx = 0; + while (uBuffIdx < uBuffSize) { + U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32); + if (u_isWhitespace(ch32)) { + if (!initialWhitespace) { + buffer[idx++] = ch; + while (idx > 0) { + stream.putback(buffer[--idx]); + } + goto STOP_READING; + } + /* else skip intialWhitespace */ + } + else { + if (initialWhitespace) { + /* + When initialWhitespace is true, we haven't appended any + character yet. This is where we truncate the string, + to avoid modifying the string before we know if we can + actually read from the stream. + */ + str.truncate(0); + initialWhitespace = false; + } + str.append(ch32); + } + } + idx = 0; + } + else { + buffer[idx++] = ch; + } + } +STOP_READING: + u_releaseDefaultConverter(converter); + } + +/* stream.flush();*/ + return stream; +} + +U_NAMESPACE_END + +#endif -- cgit v1.2.3