summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/unicode/bytestream.h
blob: 7fe24062228ce346fdd10c7584e3a586444eaaff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Copyright (C) 2009-2012, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sanjay@google.com (Sanjay Ghemawat)
//
// Abstract interface that consumes a sequence of bytes (ByteSink).
//
// Used so that we can write a single piece of code that can operate
// on a variety of output string types.
//
// Various implementations of this interface are provided:
//   ByteSink:
//      CheckedArrayByteSink    Write to a flat array, with bounds checking
//      StringByteSink          Write to an STL string

// This code is a contribution of Google code, and the style used here is
// a compromise between the original Google code and the ICU coding guidelines.
// For example, data types are ICU-ified (size_t,int->int32_t),
// and API comments doxygen-ified, but function names and behavior are
// as in the original, if possible.
// Assertion-style error handling, not available in ICU, was changed to
// parameter "pinning" similar to UnicodeString.
//
// In addition, this is only a partial port of the original Google code,
// limited to what was needed so far. The (nearly) complete original code
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
// (see ICU ticket 6765, r25517).

#ifndef __BYTESTREAM_H__
#define __BYTESTREAM_H__

/**
 * \file
 * \brief C++ API: Interface for writing bytes, and implementation classes.
 */

#include "unicode/utypes.h"

#if U_SHOW_CPLUSPLUS_API

#include "unicode/uobject.h"
#include "unicode/std_string.h"

U_NAMESPACE_BEGIN

/**
 * A ByteSink can be filled with bytes.
 * @stable ICU 4.2
 */
class U_COMMON_API ByteSink : public UMemory {
public:
  /**
   * Default constructor.
   * @stable ICU 4.2
   */
  ByteSink() { }
  /**
   * Virtual destructor.
   * @stable ICU 4.2
   */
  virtual ~ByteSink();

  /**
   * Append "bytes[0,n-1]" to this.
   * @param bytes the pointer to the bytes
   * @param n the number of bytes; must be non-negative
   * @stable ICU 4.2
   */
  virtual void Append(const char* bytes, int32_t n) = 0;

#ifndef U_HIDE_DRAFT_API
  /**
   * Appends n bytes to this. Same as Append().
   * Call AppendU8() with u8"string literals" which are const char * in C++11
   * but const char8_t * in C++20.
   * If the compiler does support char8_t as a distinct type,
   * then an AppendU8() overload for that is defined and will be chosen.
   *
   * @param bytes the pointer to the bytes
   * @param n the number of bytes; must be non-negative
   * @draft ICU 67
   */
  inline void AppendU8(const char* bytes, int32_t n) {
    Append(bytes, n);
  }

#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
  /**
   * Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
   * Call AppendU8() with u8"string literals" which are const char * in C++11
   * but const char8_t * in C++20.
   * If the compiler does support char8_t as a distinct type,
   * then this AppendU8() overload for that is defined and will be chosen.
   *
   * @param bytes the pointer to the bytes
   * @param n the number of bytes; must be non-negative
   * @draft ICU 67
   */
  inline void AppendU8(const char8_t* bytes, int32_t n) {
    Append(reinterpret_cast<const char*>(bytes), n);
  }
#endif
#endif  // U_HIDE_DRAFT_API

  /**
   * Returns a writable buffer for appending and writes the buffer's capacity to
   * *result_capacity. Guarantees *result_capacity>=min_capacity.
   * May return a pointer to the caller-owned scratch buffer which must have
   * scratch_capacity>=min_capacity.
   * The returned buffer is only valid until the next operation
   * on this ByteSink.
   *
   * After writing at most *result_capacity bytes, call Append() with the
   * pointer returned from this function and the number of bytes written.
   * Many Append() implementations will avoid copying bytes if this function
   * returned an internal buffer.
   *
   * Partial usage example:
   *  int32_t capacity;
   *  char* buffer = sink->GetAppendBuffer(..., &capacity);
   *  ... Write n bytes into buffer, with n <= capacity.
   *  sink->Append(buffer, n);
   * In many implementations, that call to Append will avoid copying bytes.
   *
   * If the ByteSink allocates or reallocates an internal buffer, it should use
   * the desired_capacity_hint if appropriate.
   * If a caller cannot provide a reasonable guess at the desired capacity,
   * it should pass desired_capacity_hint=0.
   *
   * If a non-scratch buffer is returned, the caller may only pass
   * a prefix to it to Append().
   * That is, it is not correct to pass an interior pointer to Append().
   *
   * The default implementation always returns the scratch buffer.
   *
   * @param min_capacity required minimum capacity of the returned buffer;
   *                     must be non-negative
   * @param desired_capacity_hint desired capacity of the returned buffer;
   *                              must be non-negative
   * @param scratch default caller-owned buffer
   * @param scratch_capacity capacity of the scratch buffer
   * @param result_capacity pointer to an integer which will be set to the
   *                        capacity of the returned buffer
   * @return a buffer with *result_capacity>=min_capacity
   * @stable ICU 4.2
   */
  virtual char* GetAppendBuffer(int32_t min_capacity,
                                int32_t desired_capacity_hint,
                                char* scratch, int32_t scratch_capacity,
                                int32_t* result_capacity);

  /**
   * Flush internal buffers.
   * Some byte sinks use internal buffers or provide buffering
   * and require calling Flush() at the end of the stream.
   * The ByteSink should be ready for further Append() calls after Flush().
   * The default implementation of Flush() does nothing.
   * @stable ICU 4.2
   */
  virtual void Flush();

private:
  ByteSink(const ByteSink &) = delete;
  ByteSink &operator=(const ByteSink &) = delete;
};

// -------------------------------------------------------------
// Some standard implementations

/** 
 * Implementation of ByteSink that writes to a flat byte array,
 * with bounds-checking:
 * This sink will not write more than capacity bytes to outbuf.
 * If more than capacity bytes are Append()ed, then excess bytes are ignored,
 * and Overflowed() will return true.
 * Overflow does not cause a runtime error.
 * @stable ICU 4.2
 */
class U_COMMON_API CheckedArrayByteSink : public ByteSink {
public:
  /**
   * Constructs a ByteSink that will write to outbuf[0..capacity-1].
   * @param outbuf buffer to write to
   * @param capacity size of the buffer
   * @stable ICU 4.2
   */
  CheckedArrayByteSink(char* outbuf, int32_t capacity);
  /**
   * Destructor.
   * @stable ICU 4.2
   */
  virtual ~CheckedArrayByteSink();
  /**
   * Returns the sink to its original state, without modifying the buffer.
   * Useful for reusing both the buffer and the sink for multiple streams.
   * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
   * and Overflowed()=FALSE.
   * @return *this
   * @stable ICU 4.6
   */
  virtual CheckedArrayByteSink& Reset();
  /**
   * Append "bytes[0,n-1]" to this.
   * @param bytes the pointer to the bytes
   * @param n the number of bytes; must be non-negative
   * @stable ICU 4.2
   */
  virtual void Append(const char* bytes, int32_t n);
  /**
   * Returns a writable buffer for appending and writes the buffer's capacity to
   * *result_capacity. For details see the base class documentation.
   * @param min_capacity required minimum capacity of the returned buffer;
   *                     must be non-negative
   * @param desired_capacity_hint desired capacity of the returned buffer;
   *                              must be non-negative
   * @param scratch default caller-owned buffer
   * @param scratch_capacity capacity of the scratch buffer
   * @param result_capacity pointer to an integer which will be set to the
   *                        capacity of the returned buffer
   * @return a buffer with *result_capacity>=min_capacity
   * @stable ICU 4.2
   */
  virtual char* GetAppendBuffer(int32_t min_capacity,
                                int32_t desired_capacity_hint,
                                char* scratch, int32_t scratch_capacity,
                                int32_t* result_capacity);
  /**
   * Returns the number of bytes actually written to the sink.
   * @return number of bytes written to the buffer
   * @stable ICU 4.2
   */
  int32_t NumberOfBytesWritten() const { return size_; }
  /**
   * Returns true if any bytes were discarded, i.e., if there was an
   * attempt to write more than 'capacity' bytes.
   * @return TRUE if more than 'capacity' bytes were Append()ed
   * @stable ICU 4.2
   */
  UBool Overflowed() const { return overflowed_; }
  /**
   * Returns the number of bytes appended to the sink.
   * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
   * else they return the same number.
   * @return number of bytes written to the buffer
   * @stable ICU 4.6
   */
  int32_t NumberOfBytesAppended() const { return appended_; }
private:
  char* outbuf_;
  const int32_t capacity_;
  int32_t size_;
  int32_t appended_;
  UBool overflowed_;

  CheckedArrayByteSink() = delete;
  CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
  CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
};

/** 
 * Implementation of ByteSink that writes to a "string".
 * The StringClass is usually instantiated with a std::string.
 * @stable ICU 4.2
 */
template<typename StringClass>
class StringByteSink : public ByteSink {
 public:
  /**
   * Constructs a ByteSink that will append bytes to the dest string.
   * @param dest pointer to string object to append to
   * @stable ICU 4.2
   */
  StringByteSink(StringClass* dest) : dest_(dest) { }
  /**
   * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
   * 
   * @param dest pointer to string object to append to
   * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
   * @stable ICU 60
   */
  StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
    if (initialAppendCapacity > 0 &&
        (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
      dest->reserve(dest->length() + initialAppendCapacity);
    }
  }
  /**
   * Append "bytes[0,n-1]" to this.
   * @param data the pointer to the bytes
   * @param n the number of bytes; must be non-negative
   * @stable ICU 4.2
   */
  virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
 private:
  StringClass* dest_;

  StringByteSink() = delete;
  StringByteSink(const StringByteSink &) = delete;
  StringByteSink &operator=(const StringByteSink &) = delete;
};

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */

#endif  // __BYTESTREAM_H__