summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/formatted_string_builder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/formatted_string_builder.cpp')
-rw-r--r--intl/icu/source/i18n/formatted_string_builder.cpp470
1 files changed, 470 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/formatted_string_builder.cpp b/intl/icu/source/i18n/formatted_string_builder.cpp
new file mode 100644
index 0000000000..8dbf954af9
--- /dev/null
+++ b/intl/icu/source/i18n/formatted_string_builder.cpp
@@ -0,0 +1,470 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "formatted_string_builder.h"
+#include "putilimp.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "unicode/unum.h" // for UNumberFormatFields literals
+
+namespace {
+
+// A version of uprv_memcpy that checks for length 0.
+// By default, uprv_memcpy requires a length of at least 1.
+inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
+ if (len > 0) {
+ uprv_memcpy(dest, src, len);
+ }
+}
+
+// A version of uprv_memmove that checks for length 0.
+// By default, uprv_memmove requires a length of at least 1.
+inline void uprv_memmove2(void* dest, const void* src, size_t len) {
+ if (len > 0) {
+ uprv_memmove(dest, src, len);
+ }
+}
+
+} // namespace
+
+
+U_NAMESPACE_BEGIN
+
+FormattedStringBuilder::FormattedStringBuilder() {
+#if U_DEBUG
+ // Initializing the memory to non-zero helps catch some bugs that involve
+ // reading from an improperly terminated string.
+ for (int32_t i=0; i<getCapacity(); i++) {
+ getCharPtr()[i] = 1;
+ }
+#endif
+}
+
+FormattedStringBuilder::~FormattedStringBuilder() {
+ if (fUsingHeap) {
+ uprv_free(fChars.heap.ptr);
+ uprv_free(fFields.heap.ptr);
+ }
+}
+
+FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
+ *this = other;
+}
+
+FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
+ // Check for self-assignment
+ if (this == &other) {
+ return *this;
+ }
+
+ // Continue with deallocation and copying
+ if (fUsingHeap) {
+ uprv_free(fChars.heap.ptr);
+ uprv_free(fFields.heap.ptr);
+ fUsingHeap = false;
+ }
+
+ int32_t capacity = other.getCapacity();
+ if (capacity > DEFAULT_CAPACITY) {
+ // FIXME: uprv_malloc
+ // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
+ auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
+ auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
+ if (newChars == nullptr || newFields == nullptr) {
+ // UErrorCode is not available; fail silently.
+ uprv_free(newChars);
+ uprv_free(newFields);
+ *this = FormattedStringBuilder(); // can't fail
+ return *this;
+ }
+
+ fUsingHeap = true;
+ fChars.heap.capacity = capacity;
+ fChars.heap.ptr = newChars;
+ fFields.heap.capacity = capacity;
+ fFields.heap.ptr = newFields;
+ }
+
+ uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
+ uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
+
+ fZero = other.fZero;
+ fLength = other.fLength;
+ return *this;
+}
+
+int32_t FormattedStringBuilder::length() const {
+ return fLength;
+}
+
+int32_t FormattedStringBuilder::codePointCount() const {
+ return u_countChar32(getCharPtr() + fZero, fLength);
+}
+
+UChar32 FormattedStringBuilder::getFirstCodePoint() const {
+ if (fLength == 0) {
+ return -1;
+ }
+ UChar32 cp;
+ U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
+ return cp;
+}
+
+UChar32 FormattedStringBuilder::getLastCodePoint() const {
+ if (fLength == 0) {
+ return -1;
+ }
+ int32_t offset = fLength;
+ U16_BACK_1(getCharPtr() + fZero, 0, offset);
+ UChar32 cp;
+ U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
+ return cp;
+}
+
+UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
+ UChar32 cp;
+ U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
+ return cp;
+}
+
+UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
+ int32_t offset = index;
+ U16_BACK_1(getCharPtr() + fZero, 0, offset);
+ UChar32 cp;
+ U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
+ return cp;
+}
+
+FormattedStringBuilder &FormattedStringBuilder::clear() {
+ // TODO: Reset the heap here?
+ fZero = getCapacity() / 2;
+ fLength = 0;
+ return *this;
+}
+
+int32_t
+FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
+ int32_t count = U16_LENGTH(codePoint);
+ int32_t position = prepareForInsert(index, count, status);
+ if (U_FAILURE(status)) {
+ return count;
+ }
+ if (count == 1) {
+ getCharPtr()[position] = (char16_t) codePoint;
+ getFieldPtr()[position] = field;
+ } else {
+ getCharPtr()[position] = U16_LEAD(codePoint);
+ getCharPtr()[position + 1] = U16_TRAIL(codePoint);
+ getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
+ }
+ return count;
+}
+
+int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
+ UErrorCode &status) {
+ if (unistr.length() == 0) {
+ // Nothing to insert.
+ return 0;
+ } else if (unistr.length() == 1) {
+ // Fast path: insert using insertCodePoint.
+ return insertCodePoint(index, unistr.charAt(0), field, status);
+ } else {
+ return insert(index, unistr, 0, unistr.length(), field, status);
+ }
+}
+
+int32_t
+FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
+ Field field, UErrorCode &status) {
+ int32_t count = end - start;
+ int32_t position = prepareForInsert(index, count, status);
+ if (U_FAILURE(status)) {
+ return count;
+ }
+ for (int32_t i = 0; i < count; i++) {
+ getCharPtr()[position + i] = unistr.charAt(start + i);
+ getFieldPtr()[position + i] = field;
+ }
+ return count;
+}
+
+int32_t
+FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
+ int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
+ int32_t thisLength = endThis - startThis;
+ int32_t otherLength = endOther - startOther;
+ int32_t count = otherLength - thisLength;
+ if (U_FAILURE(status)) {
+ return count;
+ }
+ int32_t position;
+ if (count > 0) {
+ // Overall, chars need to be added.
+ position = prepareForInsert(startThis, count, status);
+ } else {
+ // Overall, chars need to be removed or kept the same.
+ position = remove(startThis, -count);
+ }
+ if (U_FAILURE(status)) {
+ return count;
+ }
+ for (int32_t i = 0; i < otherLength; i++) {
+ getCharPtr()[position + i] = unistr.charAt(startOther + i);
+ getFieldPtr()[position + i] = field;
+ }
+ return count;
+}
+
+int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
+ return insert(fLength, other, status);
+}
+
+int32_t
+FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+ if (this == &other) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ int32_t count = other.fLength;
+ if (count == 0) {
+ // Nothing to insert.
+ return 0;
+ }
+ int32_t position = prepareForInsert(index, count, status);
+ if (U_FAILURE(status)) {
+ return count;
+ }
+ for (int32_t i = 0; i < count; i++) {
+ getCharPtr()[position + i] = other.charAt(i);
+ getFieldPtr()[position + i] = other.fieldAt(i);
+ }
+ return count;
+}
+
+void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
+ int32_t position = prepareForInsert(fLength, 1, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ getCharPtr()[position] = 0;
+ getFieldPtr()[position] = kUndefinedField;
+ fLength--;
+}
+
+int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
+ U_ASSERT(index >= 0);
+ U_ASSERT(index <= fLength);
+ U_ASSERT(count >= 0);
+ U_ASSERT(fZero >= 0);
+ U_ASSERT(fLength >= 0);
+ U_ASSERT(getCapacity() - fZero >= fLength);
+ if (U_FAILURE(status)) {
+ return count;
+ }
+ if (index == 0 && fZero - count >= 0) {
+ // Append to start
+ fZero -= count;
+ fLength += count;
+ return fZero;
+ } else if (index == fLength && count <= getCapacity() - fZero - fLength) {
+ // Append to end
+ fLength += count;
+ return fZero + fLength - count;
+ } else {
+ // Move chars around and/or allocate more space
+ return prepareForInsertHelper(index, count, status);
+ }
+}
+
+int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
+ int32_t oldCapacity = getCapacity();
+ int32_t oldZero = fZero;
+ char16_t *oldChars = getCharPtr();
+ Field *oldFields = getFieldPtr();
+ int32_t newLength;
+ if (uprv_add32_overflow(fLength, count, &newLength)) {
+ status = U_INPUT_TOO_LONG_ERROR;
+ return -1;
+ }
+ int32_t newZero;
+ if (newLength > oldCapacity) {
+ if (newLength > INT32_MAX / 2) {
+ // We do not support more than 1G char16_t in this code because
+ // dealing with >2G *bytes* can cause subtle bugs.
+ status = U_INPUT_TOO_LONG_ERROR;
+ return -1;
+ }
+ // Keep newCapacity also to at most 1G char16_t.
+ int32_t newCapacity = newLength * 2;
+ newZero = (newCapacity - newLength) / 2;
+
+ // C++ note: malloc appears in two places: here and in the assignment operator.
+ auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity)));
+ auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity)));
+ if (newChars == nullptr || newFields == nullptr) {
+ uprv_free(newChars);
+ uprv_free(newFields);
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return -1;
+ }
+
+ // First copy the prefix and then the suffix, leaving room for the new chars that the
+ // caller wants to insert.
+ // C++ note: memcpy is OK because the src and dest do not overlap.
+ uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
+ uprv_memcpy2(newChars + newZero + index + count,
+ oldChars + oldZero + index,
+ sizeof(char16_t) * (fLength - index));
+ uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
+ uprv_memcpy2(newFields + newZero + index + count,
+ oldFields + oldZero + index,
+ sizeof(Field) * (fLength - index));
+
+ if (fUsingHeap) {
+ uprv_free(oldChars);
+ uprv_free(oldFields);
+ }
+ fUsingHeap = true;
+ fChars.heap.ptr = newChars;
+ fChars.heap.capacity = newCapacity;
+ fFields.heap.ptr = newFields;
+ fFields.heap.capacity = newCapacity;
+ } else {
+ newZero = (oldCapacity - newLength) / 2;
+
+ // C++ note: memmove is required because src and dest may overlap.
+ // First copy the entire string to the location of the prefix, and then move the suffix
+ // to make room for the new chars that the caller wants to insert.
+ uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
+ uprv_memmove2(oldChars + newZero + index + count,
+ oldChars + newZero + index,
+ sizeof(char16_t) * (fLength - index));
+ uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
+ uprv_memmove2(oldFields + newZero + index + count,
+ oldFields + newZero + index,
+ sizeof(Field) * (fLength - index));
+ }
+ fZero = newZero;
+ fLength = newLength;
+ return fZero + index;
+}
+
+int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
+ U_ASSERT(0 <= index);
+ U_ASSERT(index <= fLength);
+ U_ASSERT(count <= (fLength - index));
+ U_ASSERT(index <= getCapacity() - fZero);
+
+ int32_t position = index + fZero;
+ // TODO: Reset the heap here? (If the string after removal can fit on stack?)
+ uprv_memmove2(getCharPtr() + position,
+ getCharPtr() + position + count,
+ sizeof(char16_t) * (fLength - index - count));
+ uprv_memmove2(getFieldPtr() + position,
+ getFieldPtr() + position + count,
+ sizeof(Field) * (fLength - index - count));
+ fLength -= count;
+ return position;
+}
+
+UnicodeString FormattedStringBuilder::toUnicodeString() const {
+ return UnicodeString(getCharPtr() + fZero, fLength);
+}
+
+const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
+ // Readonly-alias constructor:
+ return UnicodeString(false, getCharPtr() + fZero, fLength);
+}
+
+UnicodeString FormattedStringBuilder::toDebugString() const {
+ UnicodeString sb;
+ sb.append(u"<FormattedStringBuilder [", -1);
+ sb.append(toUnicodeString());
+ sb.append(u"] [", -1);
+ for (int i = 0; i < fLength; i++) {
+ if (fieldAt(i) == kUndefinedField) {
+ sb.append(u'n');
+ } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
+ char16_t c;
+ switch (fieldAt(i).getField()) {
+ case UNUM_SIGN_FIELD:
+ c = u'-';
+ break;
+ case UNUM_INTEGER_FIELD:
+ c = u'i';
+ break;
+ case UNUM_FRACTION_FIELD:
+ c = u'f';
+ break;
+ case UNUM_EXPONENT_FIELD:
+ c = u'e';
+ break;
+ case UNUM_EXPONENT_SIGN_FIELD:
+ c = u'+';
+ break;
+ case UNUM_EXPONENT_SYMBOL_FIELD:
+ c = u'E';
+ break;
+ case UNUM_DECIMAL_SEPARATOR_FIELD:
+ c = u'.';
+ break;
+ case UNUM_GROUPING_SEPARATOR_FIELD:
+ c = u',';
+ break;
+ case UNUM_PERCENT_FIELD:
+ c = u'%';
+ break;
+ case UNUM_PERMILL_FIELD:
+ c = u'‰';
+ break;
+ case UNUM_CURRENCY_FIELD:
+ c = u'$';
+ break;
+ default:
+ c = u'0' + fieldAt(i).getField();
+ break;
+ }
+ sb.append(c);
+ } else {
+ sb.append(u'0' + fieldAt(i).getCategory());
+ }
+ }
+ sb.append(u"]>", -1);
+ return sb;
+}
+
+const char16_t *FormattedStringBuilder::chars() const {
+ return getCharPtr() + fZero;
+}
+
+bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
+ if (fLength != other.fLength) {
+ return false;
+ }
+ for (int32_t i = 0; i < fLength; i++) {
+ if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool FormattedStringBuilder::containsField(Field field) const {
+ for (int32_t i = 0; i < fLength; i++) {
+ if (field == fieldAt(i)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */