summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/toolutil/ucbuf.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--intl/icu/source/tools/toolutil/ucbuf.h218
1 files changed, 218 insertions, 0 deletions
diff --git a/intl/icu/source/tools/toolutil/ucbuf.h b/intl/icu/source/tools/toolutil/ucbuf.h
new file mode 100644
index 0000000000..117920b794
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ucbuf.h
@@ -0,0 +1,218 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File ucbuf.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/10/01 Ram Creation.
+*
+* This API reads in files and returns UChars
+*******************************************************************************
+*/
+
+#include "unicode/localpointer.h"
+#include "unicode/ucnv.h"
+#include "filestrm.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#ifndef UCBUF_H
+#define UCBUF_H 1
+
+typedef struct UCHARBUF UCHARBUF;
+/**
+ * End of file value
+ */
+#define U_EOF ((int32_t)0xFFFFFFFF)
+/**
+ * Error value if a sequence cannot be unescaped
+ */
+#define U_ERR ((int32_t)0xFFFFFFFE)
+
+typedef struct ULine ULine;
+
+struct ULine {
+ UChar *name;
+ int32_t len;
+};
+
+/**
+ * Opens the UCHARBUF with the given file stream and code page for conversion
+ * @param fileName Name of the file to open.
+ * @param codepage The encoding of the file stream to convert to Unicode.
+ * If *codepage is NULL on input the API will try to autodetect
+ * popular Unicode encodings
+ * @param showWarning Flag to print out warnings to STDOUT
+ * @param buffered If true performs a buffered read of the input file. If false reads
+ * the whole file into memory and converts it.
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ * @return pointer to the newly opened UCHARBUF
+ */
+U_CAPI UCHARBUF* U_EXPORT2
+ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
+
+/**
+ * Gets a UTF-16 code unit at the current position from the converted buffer
+ * and increments the current position
+ * @param buf Pointer to UCHARBUF structure
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ */
+U_CAPI int32_t U_EXPORT2
+ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
+
+/**
+ * Gets a UTF-32 code point at the current position from the converted buffer
+ * and increments the current position
+ * @param buf Pointer to UCHARBUF structure
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ */
+U_CAPI int32_t U_EXPORT2
+ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
+
+/**
+ * Gets a UTF-16 code unit at the current position from the converted buffer after
+ * unescaping and increments the current position. If the escape sequence is for UTF-32
+ * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
+ * @param buf Pointer to UCHARBUF structure
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ */
+U_CAPI int32_t U_EXPORT2
+ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
+
+/**
+ * Gets a pointer to the current position in the internal buffer and length of the line.
+ * It imperative to make a copy of the returned buffer before performing operations on it.
+ * @param buf Pointer to UCHARBUF structure
+ * @param len Output param to receive the len of the buffer returned till end of the line
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ * Error: U_TRUNCATED_CHAR_FOUND
+ * @return Pointer to the internal buffer, NULL if EOF
+ */
+U_CAPI const UChar* U_EXPORT2
+ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
+
+
+/**
+ * Resets the buffers and the underlying file stream.
+ * @param buf Pointer to UCHARBUF structure
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ */
+U_CAPI void U_EXPORT2
+ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
+
+/**
+ * Returns a pointer to the internal converted buffer
+ * @param buf Pointer to UCHARBUF structure
+ * @param len Pointer to int32_t to receive the length of buffer
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ * @return Pointer to internal UChar buffer
+ */
+U_CAPI const UChar* U_EXPORT2
+ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
+
+/**
+ * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
+ * @param buf Pointer to UCHARBUF structure
+ */
+U_CAPI void U_EXPORT2
+ucbuf_close(UCHARBUF* buf);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCHARBUFPointer
+ * "Smart pointer" class, closes a UCHARBUF via ucbuf_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
+ */
+U_CAPI void U_EXPORT2
+ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
+
+
+/**
+ * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
+ * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
+ * the converter to correct state for converting the rest of the stream. So the UConverter parameter
+ * is necessary.
+ * If the charset was autodetected, the caller must close both the input FileStream
+ * and the converter.
+ *
+ * @param fileName The file name to be opened and encoding autodected
+ * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
+ * @param cp Output param to receive the detected encoding
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ * @return The input FileStream if its charset was autodetected; NULL otherwise.
+ */
+U_CAPI FileStream * U_EXPORT2
+ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
+int32_t* signatureLength, UErrorCode* status);
+
+/**
+ * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
+ * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
+ * the converter to correct state for converting the rest of the stream. So the UConverter parameter
+ * is necessary.
+ * If the charset was autodetected, the caller must close the converter.
+ *
+ * @param fileStream The file stream whose encoding is to be detected
+ * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
+ * @param cp Output param to receive the detected encoding
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ * indicates a failure on entry, the function will immediately return.
+ * On exit the value will indicate the success of the operation.
+ * @return Boolean whether the Unicode charset was autodetected.
+ */
+
+U_CAPI UBool U_EXPORT2
+ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
+
+/**
+ * Returns the approximate size in UChars required for converting the file to UChars
+ */
+U_CAPI int32_t U_EXPORT2
+ucbuf_size(UCHARBUF* buf);
+
+U_CAPI const char* U_EXPORT2
+ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
+
+#endif
+#endif
+