diff options
Diffstat (limited to 'intl/icu/source/tools/toolutil/writesrc.cpp')
-rw-r--r-- | intl/icu/source/tools/toolutil/writesrc.cpp | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/intl/icu/source/tools/toolutil/writesrc.cpp b/intl/icu/source/tools/toolutil/writesrc.cpp new file mode 100644 index 0000000000..55c2f277b3 --- /dev/null +++ b/intl/icu/source/tools/toolutil/writesrc.cpp @@ -0,0 +1,515 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2005-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: writesrc.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2005apr23 +* created by: Markus W. Scherer +* +* Helper functions for writing source code for data. +*/ + +#include <stdio.h> +#include <time.h> + +// The C99 standard suggested that C++ implementations not define PRId64 etc. constants +// unless this macro is defined. +// See the Notes at https://en.cppreference.com/w/cpp/types/integer . +// Similar to defining __STDC_LIMIT_MACROS in unicode/ptypes.h . +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS +#endif +#include <cinttypes> + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/ucptrie.h" +#include "unicode/errorcode.h" +#include "unicode/uniset.h" +#include "unicode/usetiter.h" +#include "unicode/utf16.h" +#include "utrie2.h" +#include "cstring.h" +#include "writesrc.h" +#include "util.h" + +U_NAMESPACE_BEGIN + +ValueNameGetter::~ValueNameGetter() {} + +U_NAMESPACE_END + +U_NAMESPACE_USE + +static FILE * +usrc_createWithoutHeader(const char *path, const char *filename) { + char buffer[1024]; + const char *p; + char *q; + FILE *f; + char c; + + if(path==nullptr) { + p=filename; + } else { + /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */ + uprv_strcpy(buffer, path); + q=buffer+uprv_strlen(buffer); + if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { + *q++=U_FILE_SEP_CHAR; + } + uprv_strcpy(q, filename); + p=buffer; + } + + f=fopen(p, "w"); + if (f==nullptr) { + fprintf( + stderr, + "usrc_create(%s, %s): unable to create file\n", + path!=nullptr ? path : "", filename); + } + return f; +} + +U_CAPI FILE * U_EXPORT2 +usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) { + FILE *f = usrc_createWithoutHeader(path, filename); + if (f == nullptr) { + return f; + } + usrc_writeCopyrightHeader(f, "//", copyrightYear); + usrc_writeFileNameGeneratedBy(f, "//", filename, generator); + return f; +} + +U_CAPI FILE * U_EXPORT2 +usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) { + FILE *f = usrc_createWithoutHeader(path, filename); + if (f == nullptr) { + return f; + } + usrc_writeCopyrightHeader(f, "#", copyrightYear); + usrc_writeFileNameGeneratedBy(f, "#", filename, generator); + return f; +} + +U_CAPI void U_EXPORT2 +usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) { + fprintf(f, + "%s Copyright (C) %d and later: Unicode, Inc. and others.\n" + "%s License & terms of use: http://www.unicode.org/copyright.html\n", + prefix, copyrightYear, prefix); + if (copyrightYear <= 2016) { + fprintf(f, + "%s Copyright (C) 1999-2016, International Business Machines\n" + "%s Corporation and others. All Rights Reserved.\n", + prefix, prefix); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeFileNameGeneratedBy( + FILE *f, + const char *prefix, + const char *filename, + const char *generator) { + char buffer[1024]; + const struct tm *lt; + time_t t; + + const char *pattern = + "%s\n" + "%s file name: %s\n" + "%s\n" + "%s machine-generated by: %s\n" + "\n"; + + time(&t); + lt=localtime(&t); + if(generator==nullptr) { + strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt); + fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer); + } else { + fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeArray(FILE *f, + const char *prefix, + const void *p, int32_t width, int32_t length, + const char *indent, + const char *postfix) { + const uint8_t *p8; + const uint16_t *p16; + const uint32_t *p32; + const int64_t *p64; // Signed due to TOML! + int64_t value; // Signed due to TOML! + int32_t i, col; + + p8=nullptr; + p16=nullptr; + p32=nullptr; + p64=nullptr; + switch(width) { + case 8: + p8=(const uint8_t *)p; + break; + case 16: + p16=(const uint16_t *)p; + break; + case 32: + p32=(const uint32_t *)p; + break; + case 64: + p64=(const int64_t *)p; + break; + default: + fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width); + return; + } + if(prefix!=nullptr) { + fprintf(f, prefix, (long)length); + } + for(i=col=0; i<length; ++i, ++col) { + if(i>0) { + if(col<16) { + fputc(',', f); + } else { + fputs(",\n", f); + fputs(indent, f); + col=0; + } + } + switch(width) { + case 8: + value=p8[i]; + break; + case 16: + value=p16[i]; + break; + case 32: + value=p32[i]; + break; + case 64: + value=p64[i]; + break; + default: + value=0; /* unreachable */ + break; + } + fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value); + } + if(postfix!=nullptr) { + fputs(postfix, f); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeUTrie2Arrays(FILE *f, + const char *indexPrefix, const char *data32Prefix, + const UTrie2 *pTrie, + const char *postfix) { + if(pTrie->data32==nullptr) { + /* 16-bit trie */ + usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix); + } else { + /* 32-bit trie */ + usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix); + usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeUTrie2Struct(FILE *f, + const char *prefix, + const UTrie2 *pTrie, + const char *indexName, const char *data32Name, + const char *postfix) { + if(prefix!=nullptr) { + fputs(prefix, f); + } + if(pTrie->data32==nullptr) { + /* 16-bit trie */ + fprintf( + f, + " %s,\n" /* index */ + " %s+%ld,\n" /* data16 */ + " nullptr,\n", /* data32 */ + indexName, + indexName, + (long)pTrie->indexLength); + } else { + /* 32-bit trie */ + fprintf( + f, + " %s,\n" /* index */ + " nullptr,\n" /* data16 */ + " %s,\n", /* data32 */ + indexName, + data32Name); + } + fprintf( + f, + " %ld,\n" /* indexLength */ + " %ld,\n" /* dataLength */ + " 0x%hx,\n" /* index2NullOffset */ + " 0x%hx,\n" /* dataNullOffset */ + " 0x%lx,\n" /* initialValue */ + " 0x%lx,\n" /* errorValue */ + " 0x%lx,\n" /* highStart */ + " 0x%lx,\n" /* highValueIndex */ + " nullptr, 0, false, false, 0, nullptr\n", + (long)pTrie->indexLength, (long)pTrie->dataLength, + (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset, + (long)pTrie->initialValue, (long)pTrie->errorValue, + (long)pTrie->highStart, (long)pTrie->highValueIndex); + if(postfix!=nullptr) { + fputs(postfix, f); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieArrays(FILE *f, + const char *indexPrefix, const char *dataPrefix, + const UCPTrie *pTrie, + const char *postfix, + UTargetSyntax syntax) { + const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? " " : ""; + usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix); + int32_t width= + pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; + usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix); +} + +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieStruct(FILE *f, + const char *prefix, + const UCPTrie *pTrie, + const char *indexName, const char *dataName, + const char *postfix, + UTargetSyntax syntax) { + if(prefix!=nullptr) { + fputs(prefix, f); + } + if (syntax == UPRV_TARGET_SYNTAX_CCODE) { + fprintf( + f, + " %s,\n" // index + " { %s },\n", // data (union) + indexName, + dataName); + } + const char* pattern = + (syntax == UPRV_TARGET_SYNTAX_CCODE) ? + " %ld, %ld,\n" // indexLength, dataLength + " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart + " %d, %d,\n" // type, valueWidth + " 0, 0,\n" // reserved32, reserved16 + " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset + " 0x%lx,\n" // nullValue + : + "indexLength = %ld\n" + "dataLength = %ld\n" + "highStart = 0x%lx\n" + "shifted12HighStart = 0x%x\n" + "type = %d\n" + "valueWidth = %d\n" + "index3NullOffset = 0x%x\n" + "dataNullOffset = 0x%lx\n" + "nullValue = 0x%lx\n" + ; + fprintf( + f, + pattern, + (long)pTrie->indexLength, (long)pTrie->dataLength, + (long)pTrie->highStart, pTrie->shifted12HighStart, + pTrie->type, pTrie->valueWidth, + pTrie->index3NullOffset, (long)pTrie->dataNullOffset, + (long)pTrie->nullValue); + if(postfix!=nullptr) { + fputs(postfix, f); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) { + int32_t width= + pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; + char line[100], line2[100], line3[100], line4[100]; + + switch (syntax) { + case UPRV_TARGET_SYNTAX_CCODE: + snprintf(line, sizeof(line), "static const uint16_t %s_trieIndex[%%ld]={\n", name); + snprintf(line2, sizeof(line2), "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name); + snprintf(line3, sizeof(line3), "\n};\n\n"); + break; + case UPRV_TARGET_SYNTAX_TOML: + snprintf(line, sizeof(line), "index = [\n "); + snprintf(line2, sizeof(line2), "data_%d = [\n ", (int)width); + snprintf(line3, sizeof(line3), "\n]\n"); + break; + default: + UPRV_UNREACHABLE_EXIT; + } + usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax); + + switch (syntax) { + case UPRV_TARGET_SYNTAX_CCODE: + snprintf(line, sizeof(line), "static const UCPTrie %s_trie={\n", name); + snprintf(line2, sizeof(line2), "%s_trieIndex", name); + snprintf(line3, sizeof(line3), "%s_trieData", name); + snprintf(line4, sizeof(line4), "};\n\n"); + break; + case UPRV_TARGET_SYNTAX_TOML: + line[0] = 0; + line2[0] = 0; + line3[0] = 0; + line4[0] = 0; + break; + default: + UPRV_UNREACHABLE_EXIT; + } + usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax); +} + +U_CAPI void U_EXPORT2 +usrc_writeUnicodeSet( + FILE *f, + const USet *pSet, + UTargetSyntax syntax) { + // ccode is not yet supported + U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML); + + // Write out a list of ranges + const UnicodeSet* set = UnicodeSet::fromUSet(pSet); + UnicodeSetIterator it(*set); + fprintf(f, "# Inclusive ranges of the code points in the set.\n"); + fprintf(f, "ranges = [\n"); + bool seenFirstString = false; + while (it.nextRange()) { + if (it.isString()) { + if (!seenFirstString) { + seenFirstString = true; + fprintf(f, "]\nstrings = [\n"); + } + const UnicodeString& str = it.getString(); + fprintf(f, " "); + usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax); + fprintf(f, ",\n"); + } else { + U_ASSERT(!seenFirstString); + UChar32 start = it.getCodepoint(); + UChar32 end = it.getCodepointEnd(); + fprintf(f, " [0x%x, 0x%x],\n", start, end); + } + } + fprintf(f, "]\n"); +} + +U_CAPI void U_EXPORT2 +usrc_writeUCPMap( + FILE *f, + const UCPMap *pMap, + icu::ValueNameGetter *valueNameGetter, + UTargetSyntax syntax) { + // ccode is not yet supported + U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML); + (void) syntax; // silence unused variable errors + + // Print out list of ranges + UChar32 start = 0, end; + uint32_t value; + fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n"); + fprintf(f, "ranges = [\n"); + while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) { + if (valueNameGetter != nullptr) { + const char *name = valueNameGetter->getName(value); + fprintf(f, " {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name); + } else { + fprintf(f, " {a=0x%x, b=0x%x, v=%u},\n", start, end, value); + } + start = end + 1; + } + fprintf(f, "]\n"); +} + +U_CAPI void U_EXPORT2 +usrc_writeArrayOfMostlyInvChars(FILE *f, + const char *prefix, + const char *p, int32_t length, + const char *postfix) { + int32_t i, col; + int prev2, prev, c; + + if(prefix!=nullptr) { + fprintf(f, prefix, (long)length); + } + prev2=prev=-1; + for(i=col=0; i<length; ++i, ++col) { + c=(uint8_t)p[i]; + if(i>0) { + /* Break long lines. Try to break at interesting places, to minimize revision diffs. */ + if( + /* Very long line. */ + col>=32 || + /* Long line, break after terminating NUL. */ + (col>=24 && prev2>=0x20 && prev==0) || + /* Medium-long line, break before non-NUL, non-character byte. */ + (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20) + ) { + fputs(",\n", f); + col=0; + } else { + fputc(',', f); + } + } + fprintf(f, c<0x20 ? "%u" : "'%c'", c); + prev2=prev; + prev=c; + } + if(postfix!=nullptr) { + fputs(postfix, f); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeStringAsASCII(FILE *f, + const char16_t* ptr, int32_t length, + UTargetSyntax) { + // For now, assume all UTargetSyntax values are valid here. + fprintf(f, "\""); + int32_t i = 0; + UChar32 cp; + while (i < length) { + U16_NEXT(ptr, i, length, cp); + if (cp == u'"') { + fprintf(f, "\\\""); + } else if (ICU_Utility::isUnprintable(cp)) { + UnicodeString u16result; + ICU_Utility::escapeUnprintable(u16result, cp); + std::string u8result; + u16result.toUTF8String(u8result); + fprintf(f, "%s", u8result.data()); + } else { + U_ASSERT(cp < 0x80); + char s[2] = {static_cast<char>(cp), 0}; + fprintf(f, "%s", s); + } + } + fprintf(f, "\""); +} |