summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/toolutil/pkg_gencmn.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/tools/toolutil/pkg_gencmn.cpp')
-rw-r--r--intl/icu/source/tools/toolutil/pkg_gencmn.cpp578
1 files changed, 578 insertions, 0 deletions
diff --git a/intl/icu/source/tools/toolutil/pkg_gencmn.cpp b/intl/icu/source/tools/toolutil/pkg_gencmn.cpp
new file mode 100644
index 0000000000..a301c322eb
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkg_gencmn.cpp
@@ -0,0 +1,578 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ * Copyright (C) 2008-2012, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "filestrm.h"
+#include "toolutil.h"
+#include "unicode/uclean.h"
+#include "unewdata.h"
+#include "putilimp.h"
+#include "pkg_gencmn.h"
+
+#define STRING_STORE_SIZE 200000
+
+#define COMMON_DATA_NAME U_ICUDATA_NAME
+#define DATA_TYPE "dat"
+
+/* ICU package data file format (.dat files) ------------------------------- ***
+
+Description of the data format after the usual ICU data file header
+(UDataInfo etc.).
+
+Format version 1
+
+A .dat package file contains a simple Table of Contents of item names,
+followed by the items themselves:
+
+1. ToC table
+
+uint32_t count; - number of items
+UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
+ uint32_t nameOffset; - offset of the item name
+ uint32_t dataOffset; - offset of the item data
+both are byte offsets from the beginning of the data
+
+2. item name strings
+
+All item names are stored as char * strings in one block between the ToC table
+and the data items.
+
+3. data items
+
+The data items are stored following the item names block.
+Each data item is 16-aligned.
+The data items are stored in the sorted order of their names.
+
+Therefore, the top of the name strings block is the offset of the first item,
+the length of the last item is the difference between its offset and
+the .dat file length, and the length of all previous items is the difference
+between its offset and the next one.
+
+----------------------------------------------------------------------------- */
+
+/* UDataInfo cf. udata.h */
+static const UDataInfo dataInfo={
+ sizeof(UDataInfo),
+ 0,
+
+ U_IS_BIG_ENDIAN,
+ U_CHARSET_FAMILY,
+ sizeof(char16_t),
+ 0,
+
+ {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
+ {1, 0, 0, 0}, /* formatVersion */
+ {3, 0, 0, 0} /* dataVersion */
+};
+
+static uint32_t maxSize;
+
+static char stringStore[STRING_STORE_SIZE];
+static uint32_t stringTop=0, basenameTotal=0;
+
+typedef struct {
+ char *pathname, *basename;
+ uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
+} File;
+
+#define CHUNK_FILE_COUNT 256
+static File *files = nullptr;
+static uint32_t fileCount=0;
+static uint32_t fileMax = 0;
+
+
+static char *symPrefix = nullptr;
+
+#define LINE_BUFFER_SIZE 512
+/* prototypes --------------------------------------------------------------- */
+
+static void
+addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
+
+static char *
+allocString(uint32_t length);
+
+U_CDECL_BEGIN
+static int
+compareFiles(const void *file1, const void *file2);
+U_CDECL_END
+
+static char *
+pathToFullPath(const char *path, const char *source);
+
+/* map non-tree separator (such as '\') to tree separator ('/') inplace. */
+static void
+fixDirToTreePath(char *s);
+/* -------------------------------------------------------------------------- */
+
+U_CAPI void U_EXPORT2
+createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
+ const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
+ static char buffer[4096];
+ char *line;
+ char *linePtr;
+ char *s = nullptr;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ uint32_t i, fileOffset, basenameOffset, length, nread;
+ FileStream *in, *file;
+
+ line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
+ if (line == nullptr) {
+ fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
+ exit(U_MEMORY_ALLOCATION_ERROR);
+ }
+
+ linePtr = line;
+
+ maxSize = max_size;
+
+ if (destDir == nullptr) {
+ destDir = u_getDataDirectory();
+ }
+ if (name == nullptr) {
+ name = COMMON_DATA_NAME;
+ }
+ if (type == nullptr) {
+ type = DATA_TYPE;
+ }
+ if (source == nullptr) {
+ source = ".";
+ }
+
+ if (dataFile == nullptr) {
+ in = T_FileStream_stdin();
+ } else {
+ in = T_FileStream_open(dataFile, "r");
+ if(in == nullptr) {
+ fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
+ exit(U_FILE_ACCESS_ERROR);
+ }
+ }
+
+ if (verbose) {
+ if(sourceTOC) {
+ printf("generating %s_%s.c (table of contents source file)\n", name, type);
+ } else {
+ printf("generating %s.%s (common data file with table of contents)\n", name, type);
+ }
+ }
+
+ /* read the list of files and get their lengths */
+ while((s != nullptr && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
+ LINE_BUFFER_SIZE))!=nullptr) {
+ /* remove trailing newline characters and parse space separated items */
+ if (s != nullptr && *s != 0) {
+ line=s;
+ } else {
+ s=line;
+ }
+ while(*s!=0) {
+ if(*s==' ') {
+ *s=0;
+ ++s;
+ break;
+ } else if(*s=='\r' || *s=='\n') {
+ *s=0;
+ break;
+ }
+ ++s;
+ }
+
+ /* check for comment */
+
+ if (*line == '#') {
+ continue;
+ }
+
+ /* add the file */
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
+ {
+ char *t;
+ while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
+ *t = U_FILE_SEP_CHAR;
+ }
+ }
+#endif
+ addFile(getLongPathname(line), name, source, sourceTOC, verbose);
+ }
+
+ uprv_free(linePtr);
+
+ if(in!=T_FileStream_stdin()) {
+ T_FileStream_close(in);
+ }
+
+ if(fileCount==0) {
+ fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == nullptr ? "<stdin>" : dataFile);
+ return;
+ }
+
+ /* sort the files by basename */
+ qsort(files, fileCount, sizeof(File), compareFiles);
+
+ if(!sourceTOC) {
+ UNewDataMemory *out;
+
+ /* determine the offsets of all basenames and files in this common one */
+ basenameOffset=4+8*fileCount;
+ fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
+ for(i=0; i<fileCount; ++i) {
+ files[i].fileOffset=fileOffset;
+ fileOffset+=(files[i].fileSize+15)&~0xf;
+ files[i].basenameOffset=basenameOffset;
+ basenameOffset+=files[i].basenameLength;
+ }
+
+ /* create the output file */
+ out=udata_create(destDir, type, name,
+ &dataInfo,
+ copyRight == nullptr ? U_COPYRIGHT_STRING : copyRight,
+ &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
+ destDir, name, type,
+ u_errorName(errorCode));
+ exit(errorCode);
+ }
+
+ /* write the table of contents */
+ udata_write32(out, fileCount);
+ for(i=0; i<fileCount; ++i) {
+ udata_write32(out, files[i].basenameOffset);
+ udata_write32(out, files[i].fileOffset);
+ }
+
+ /* write the basenames */
+ for(i=0; i<fileCount; ++i) {
+ udata_writeString(out, files[i].basename, files[i].basenameLength);
+ }
+ length=4+8*fileCount+basenameTotal;
+
+ /* copy the files */
+ for(i=0; i<fileCount; ++i) {
+ /* pad to 16-align the next file */
+ length&=0xf;
+ if(length!=0) {
+ udata_writePadding(out, 16-length);
+ }
+
+ if (verbose) {
+ printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
+ }
+
+ /* copy the next file */
+ file=T_FileStream_open(files[i].pathname, "rb");
+ if(file==nullptr) {
+ fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
+ exit(U_FILE_ACCESS_ERROR);
+ }
+ for(nread = 0;;) {
+ length=T_FileStream_read(file, buffer, sizeof(buffer));
+ if(length <= 0) {
+ break;
+ }
+ nread += length;
+ udata_writeBlock(out, buffer, length);
+ }
+ T_FileStream_close(file);
+ length=files[i].fileSize;
+
+ if (nread != files[i].fileSize) {
+ fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
+ exit(U_FILE_ACCESS_ERROR);
+ }
+ }
+
+ /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
+ length&=0xf;
+ if(length!=0) {
+ udata_writePadding(out, 16-length);
+ }
+
+ /* finish */
+ udata_finish(out, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
+ exit(errorCode);
+ }
+ } else {
+ /* write a .c source file with the table of contents */
+ char *filename;
+ FileStream *out;
+
+ /* create the output filename */
+ filename=s=buffer;
+ uprv_strcpy(filename, destDir);
+ s=filename+uprv_strlen(filename);
+ if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
+ *s++=U_FILE_SEP_CHAR;
+ }
+ uprv_strcpy(s, name);
+ if(*(type)!=0) {
+ s+=uprv_strlen(s);
+ *s++='_';
+ uprv_strcpy(s, type);
+ }
+ s+=uprv_strlen(s);
+ uprv_strcpy(s, ".c");
+
+ /* open the output file */
+ out=T_FileStream_open(filename, "w");
+ if (gencmnFileName != nullptr) {
+ uprv_strcpy(gencmnFileName, filename);
+ }
+ if(out==nullptr) {
+ fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
+ exit(U_FILE_ACCESS_ERROR);
+ }
+
+ /* write the source file */
+ snprintf(buffer, sizeof(buffer),
+ "/*\n"
+ " * ICU common data table of contents for %s.%s\n"
+ " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
+ " */\n\n"
+ "#include \"unicode/utypes.h\"\n"
+ "#include \"unicode/udata.h\"\n"
+ "\n"
+ "/* external symbol declarations for data (%d files) */\n",
+ name, type, fileCount);
+ T_FileStream_writeLine(out, buffer);
+
+ snprintf(buffer, sizeof(buffer), "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
+ T_FileStream_writeLine(out, buffer);
+ for(i=1; i<fileCount; ++i) {
+ snprintf(buffer, sizeof(buffer), ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
+ T_FileStream_writeLine(out, buffer);
+ }
+ T_FileStream_writeLine(out, ";\n\n");
+
+ snprintf(
+ buffer, sizeof(buffer),
+ "U_EXPORT struct {\n"
+ " uint16_t headerSize;\n"
+ " uint8_t magic1, magic2;\n"
+ " UDataInfo info;\n"
+ " char padding[%lu];\n"
+ " uint32_t count, reserved;\n"
+ " struct {\n"
+ " const char *name;\n"
+ " const void *data;\n"
+ " } toc[%lu];\n"
+ "} U_EXPORT2 %s_dat = {\n"
+ " 32, 0xda, 0x27, {\n"
+ " %lu, 0,\n"
+ " %u, %u, %u, 0,\n"
+ " {0x54, 0x6f, 0x43, 0x50},\n"
+ " {1, 0, 0, 0},\n"
+ " {0, 0, 0, 0}\n"
+ " },\n"
+ " \"\", %lu, 0, {\n",
+ static_cast<unsigned long>(32-4-sizeof(UDataInfo)),
+ static_cast<unsigned long>(fileCount),
+ entrypointName,
+ static_cast<unsigned long>(sizeof(UDataInfo)),
+ U_IS_BIG_ENDIAN,
+ U_CHARSET_FAMILY,
+ U_SIZEOF_UCHAR,
+ static_cast<unsigned long>(fileCount)
+ );
+ T_FileStream_writeLine(out, buffer);
+
+ snprintf(buffer, sizeof(buffer), " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
+ T_FileStream_writeLine(out, buffer);
+ for(i=1; i<fileCount; ++i) {
+ snprintf(buffer, sizeof(buffer), ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
+ T_FileStream_writeLine(out, buffer);
+ }
+
+ T_FileStream_writeLine(out, "\n }\n};\n");
+ T_FileStream_close(out);
+
+ uprv_free(symPrefix);
+ }
+}
+
+static void
+addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
+ char *s;
+ uint32_t length;
+ char *fullPath = nullptr;
+
+ if(fileCount==fileMax) {
+ fileMax += CHUNK_FILE_COUNT;
+ files = (File *)uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
+ if(files==nullptr) {
+ fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount);
+ exit(U_MEMORY_ALLOCATION_ERROR);
+ }
+ }
+
+ if(!sourceTOC) {
+ FileStream *file;
+
+ if(uprv_pathIsAbsolute(filename)) {
+ fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
+ exit(U_ILLEGAL_ARGUMENT_ERROR);
+ }
+ fullPath = pathToFullPath(filename, source);
+ /* store the pathname */
+ length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
+ s=allocString(length);
+ uprv_strcpy(s, name);
+ uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
+ uprv_strcat(s, filename);
+
+ /* get the basename */
+ fixDirToTreePath(s);
+ files[fileCount].basename=s;
+ files[fileCount].basenameLength=length;
+
+ files[fileCount].pathname=fullPath;
+
+ basenameTotal+=length;
+
+ /* try to open the file */
+ file=T_FileStream_open(fullPath, "rb");
+ if(file==nullptr) {
+ fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
+ exit(U_FILE_ACCESS_ERROR);
+ }
+
+ /* get the file length */
+ length=T_FileStream_size(file);
+ if(T_FileStream_error(file) || length<=20) {
+ fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
+ exit(U_FILE_ACCESS_ERROR);
+ }
+
+ T_FileStream_close(file);
+
+ /* do not add files that are longer than maxSize */
+ if(maxSize && length>maxSize) {
+ if (verbose) {
+ printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
+ }
+ return;
+ }
+ files[fileCount].fileSize=length;
+ } else {
+ char *t;
+ /* get and store the basename */
+ /* need to include the package name */
+ length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
+ s=allocString(length);
+ uprv_strcpy(s, name);
+ uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
+ uprv_strcat(s, filename);
+ fixDirToTreePath(s);
+ files[fileCount].basename=s;
+ /* turn the basename into an entry point name and store in the pathname field */
+ t=files[fileCount].pathname=allocString(length);
+ while(--length>0) {
+ if(*s=='.' || *s=='-' || *s=='/') {
+ *t='_';
+ } else {
+ *t=*s;
+ }
+ ++s;
+ ++t;
+ }
+ *t=0;
+ }
+ ++fileCount;
+}
+
+static char *
+allocString(uint32_t length) {
+ uint32_t top=stringTop+length;
+ char *p;
+
+ if(top>STRING_STORE_SIZE) {
+ fprintf(stderr, "gencmn: out of memory\n");
+ exit(U_MEMORY_ALLOCATION_ERROR);
+ }
+ p=stringStore+stringTop;
+ stringTop=top;
+ return p;
+}
+
+static char *
+pathToFullPath(const char *path, const char *source) {
+ int32_t length;
+ int32_t newLength;
+ char *fullPath;
+ int32_t n;
+
+ length = (uint32_t)(uprv_strlen(path) + 1);
+ newLength = (length + 1 + (int32_t)uprv_strlen(source));
+ fullPath = (char *)uprv_malloc(newLength);
+ if(source != nullptr) {
+ uprv_strcpy(fullPath, source);
+ uprv_strcat(fullPath, U_FILE_SEP_STRING);
+ } else {
+ fullPath[0] = 0;
+ }
+ n = (int32_t)uprv_strlen(fullPath);
+ fullPath[n] = 0; /* Suppress compiler warning for unused variable n */
+ /* when conditional code below is not compiled. */
+ uprv_strcat(fullPath, path);
+
+#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
+#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
+ /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
+ for(;fullPath[n];n++) {
+ if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
+ fullPath[n] = U_FILE_SEP_CHAR;
+ }
+ }
+#endif
+#endif
+#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
+ /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
+ for(;fullPath[n];n++) {
+ if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
+ fullPath[n] = U_FILE_SEP_CHAR;
+ }
+ }
+#endif
+ return fullPath;
+}
+
+U_CDECL_BEGIN
+static int
+compareFiles(const void *file1, const void *file2) {
+ /* sort by basename */
+ return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
+}
+U_CDECL_END
+
+static void
+fixDirToTreePath(char *s)
+{
+ (void)s;
+#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
+ char *t;
+#endif
+#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
+ for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
+ *t = U_TREE_ENTRY_SEP_CHAR;
+ }
+#endif
+#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
+ for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
+ *t = U_TREE_ENTRY_SEP_CHAR;
+ }
+#endif
+}