summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/simpleformatter.cpp
blob: 16256270727e8d59b7ff8bf996a33b7ae41d6381 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
* Copyright (C) 2014-2016, International Business Machines
* Corporation and others.  All Rights Reserved.
******************************************************************************
* simpleformatter.cpp
*/

#include "unicode/utypes.h"
#include "unicode/simpleformatter.h"
#include "unicode/unistr.h"
#include "uassert.h"

U_NAMESPACE_BEGIN

namespace {

/**
 * Argument numbers must be smaller than this limit.
 * Text segment lengths are offset by this much.
 * This is currently the only unused char value in compiled patterns,
 * except it is the maximum value of the first unit (max arg +1).
 */
const int32_t ARG_NUM_LIMIT = 0x100;
/**
 * Initial and maximum char/char16_t value set for a text segment.
 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
 */
const char16_t SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
/**
 * Maximum length of a text segment. Longer segments are split into shorter ones.
 */
const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;

enum {
    APOS = 0x27,
    DIGIT_ZERO = 0x30,
    DIGIT_ONE = 0x31,
    DIGIT_NINE = 0x39,
    OPEN_BRACE = 0x7b,
    CLOSE_BRACE = 0x7d
};

inline UBool isInvalidArray(const void *array, int32_t length) {
   return (length < 0 || (array == nullptr && length != 0));
}

}  // namespace

SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) {
    if (this == &other) {
        return *this;
    }
    compiledPattern = other.compiledPattern;
    return *this;
}

SimpleFormatter::~SimpleFormatter() {}

UBool SimpleFormatter::applyPatternMinMaxArguments(
        const UnicodeString &pattern,
        int32_t min, int32_t max,
        UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) {
        return false;
    }
    // Parse consistent with MessagePattern, but
    // - support only simple numbered arguments
    // - build a simple binary structure into the result string
    const char16_t *patternBuffer = pattern.getBuffer();
    int32_t patternLength = pattern.length();
    // Reserve the first char for the number of arguments.
    compiledPattern.setTo((char16_t)0);
    int32_t textLength = 0;
    int32_t maxArg = -1;
    UBool inQuote = false;
    for (int32_t i = 0; i < patternLength;) {
        char16_t c = patternBuffer[i++];
        if (c == APOS) {
            if (i < patternLength && (c = patternBuffer[i]) == APOS) {
                // double apostrophe, skip the second one
                ++i;
            } else if (inQuote) {
                // skip the quote-ending apostrophe
                inQuote = false;
                continue;
            } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
                // Skip the quote-starting apostrophe, find the end of the quoted literal text.
                ++i;
                inQuote = true;
            } else {
                // The apostrophe is part of literal text.
                c = APOS;
            }
        } else if (!inQuote && c == OPEN_BRACE) {
            if (textLength > 0) {
                compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
                                          (char16_t)(ARG_NUM_LIMIT + textLength));
                textLength = 0;
            }
            int32_t argNumber;
            if ((i + 1) < patternLength &&
                    0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
                    patternBuffer[i + 1] == CLOSE_BRACE) {
                i += 2;
            } else {
                // Multi-digit argument number (no leading zero) or syntax error.
                // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
                // around the number, but this class does not.
                argNumber = -1;
                if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
                    argNumber = c - DIGIT_ZERO;
                    while (i < patternLength &&
                            DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
                        argNumber = argNumber * 10 + (c - DIGIT_ZERO);
                        if (argNumber >= ARG_NUM_LIMIT) {
                            break;
                        }
                    }
                }
                if (argNumber < 0 || c != CLOSE_BRACE) {
                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                    return false;
                }
            }
            if (argNumber > maxArg) {
                maxArg = argNumber;
            }
            compiledPattern.append((char16_t)argNumber);
            continue;
        }  // else: c is part of literal text
        // Append c and track the literal-text segment length.
        if (textLength == 0) {
            // Reserve a char for the length of a new text segment, preset the maximum length.
            compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
        }
        compiledPattern.append(c);
        if (++textLength == MAX_SEGMENT_LENGTH) {
            textLength = 0;
        }
    }
    if (textLength > 0) {
        compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
                                  (char16_t)(ARG_NUM_LIMIT + textLength));
    }
    int32_t argCount = maxArg + 1;
    if (argCount < min || max < argCount) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return false;
    }
    compiledPattern.setCharAt(0, (char16_t)argCount);
    return true;
}

UnicodeString& SimpleFormatter::format(
        const UnicodeString &value0,
        UnicodeString &appendTo, UErrorCode &errorCode) const {
    const UnicodeString *values[] = { &value0 };
    return formatAndAppend(values, 1, appendTo, nullptr, 0, errorCode);
}

UnicodeString& SimpleFormatter::format(
        const UnicodeString &value0,
        const UnicodeString &value1,
        UnicodeString &appendTo, UErrorCode &errorCode) const {
    const UnicodeString *values[] = { &value0, &value1 };
    return formatAndAppend(values, 2, appendTo, nullptr, 0, errorCode);
}

UnicodeString& SimpleFormatter::format(
        const UnicodeString &value0,
        const UnicodeString &value1,
        const UnicodeString &value2,
        UnicodeString &appendTo, UErrorCode &errorCode) const {
    const UnicodeString *values[] = { &value0, &value1, &value2 };
    return formatAndAppend(values, 3, appendTo, nullptr, 0, errorCode);
}

UnicodeString& SimpleFormatter::formatAndAppend(
        const UnicodeString *const *values, int32_t valuesLength,
        UnicodeString &appendTo,
        int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
    if (U_FAILURE(errorCode)) {
        return appendTo;
    }
    if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
            valuesLength < getArgumentLimit()) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return appendTo;
    }
    return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
                  appendTo, nullptr, true,
                  offsets, offsetsLength, errorCode);
}

UnicodeString &SimpleFormatter::formatAndReplace(
        const UnicodeString *const *values, int32_t valuesLength,
        UnicodeString &result,
        int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
    if (U_FAILURE(errorCode)) {
        return result;
    }
    if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return result;
    }
    const char16_t *cp = compiledPattern.getBuffer();
    int32_t cpLength = compiledPattern.length();
    if (valuesLength < getArgumentLimit(cp, cpLength)) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return result;
    }

    // If the pattern starts with an argument whose value is the same object
    // as the result, then we keep the result contents and append to it.
    // Otherwise we replace its contents.
    int32_t firstArg = -1;
    // If any non-initial argument value is the same object as the result,
    // then we first copy its contents and use that instead while formatting.
    UnicodeString resultCopy;
    if (getArgumentLimit(cp, cpLength) > 0) {
        for (int32_t i = 1; i < cpLength;) {
            int32_t n = cp[i++];
            if (n < ARG_NUM_LIMIT) {
                if (values[n] == &result) {
                    if (i == 2) {
                        firstArg = n;
                    } else if (resultCopy.isEmpty() && !result.isEmpty()) {
                        resultCopy = result;
                    }
                }
            } else {
                i += n - ARG_NUM_LIMIT;
            }
        }
    }
    if (firstArg < 0) {
        result.remove();
    }
    return format(cp, cpLength, values,
                  result, &resultCopy, false,
                  offsets, offsetsLength, errorCode);
}

UnicodeString SimpleFormatter::getTextWithNoArguments(
        const char16_t *compiledPattern,
        int32_t compiledPatternLength,
        int32_t* offsets,
        int32_t offsetsLength) {
    for (int32_t i = 0; i < offsetsLength; i++) {
        offsets[i] = -1;
    }
    int32_t capacity = compiledPatternLength - 1 -
            getArgumentLimit(compiledPattern, compiledPatternLength);
    UnicodeString sb(capacity, 0, 0);  // Java: StringBuilder
    for (int32_t i = 1; i < compiledPatternLength;) {
        int32_t n = compiledPattern[i++];
        if (n > ARG_NUM_LIMIT) {
            n -= ARG_NUM_LIMIT;
            sb.append(compiledPattern + i, n);
            i += n;
        } else if (n < offsetsLength) {
            // TODO(ICU-20406): This does not distinguish between "{0}{1}" and "{1}{0}".
            // Consider removing this function and replacing it with an iterator interface.
            offsets[n] = sb.length();
        }
    }
    return sb;
}

UnicodeString &SimpleFormatter::format(
        const char16_t *compiledPattern, int32_t compiledPatternLength,
        const UnicodeString *const *values,
        UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
        int32_t *offsets, int32_t offsetsLength,
        UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) {
        return result;
    }
    for (int32_t i = 0; i < offsetsLength; i++) {
        offsets[i] = -1;
    }
    for (int32_t i = 1; i < compiledPatternLength;) {
        int32_t n = compiledPattern[i++];
        if (n < ARG_NUM_LIMIT) {
            const UnicodeString *value = values[n];
            if (value == nullptr) {
                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                return result;
            }
            if (value == &result) {
                if (forbidResultAsValue) {
                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                    return result;
                }
                if (i == 2) {
                    // We are appending to result which is also the first value object.
                    if (n < offsetsLength) {
                        offsets[n] = 0;
                    }
                } else {
                    if (n < offsetsLength) {
                        offsets[n] = result.length();
                    }
                    result.append(*resultCopy);
                }
            } else {
                if (n < offsetsLength) {
                    offsets[n] = result.length();
                }
                result.append(*value);
            }
        } else {
            int32_t length = n - ARG_NUM_LIMIT;
            result.append(compiledPattern + i, length);
            i += length;
        }
    }
    return result;
}

U_NAMESPACE_END