summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/icuswap/icuswap.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/tools/icuswap/icuswap.cpp')
-rw-r--r--intl/icu/source/tools/icuswap/icuswap.cpp649
1 files changed, 649 insertions, 0 deletions
diff --git a/intl/icu/source/tools/icuswap/icuswap.cpp b/intl/icu/source/tools/icuswap/icuswap.cpp
new file mode 100644
index 0000000000..228554c816
--- /dev/null
+++ b/intl/icu/source/tools/icuswap/icuswap.cpp
@@ -0,0 +1,649 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: icuswap.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug08
+* created by: Markus W. Scherer
+*
+* This tool takes an ICU data file and "swaps" it, that is, changes its
+* platform properties between big-/little-endianness and ASCII/EBCDIC charset
+* families.
+* The modified data file is written to a new file.
+* Useful as an install-time tool for shipping only one flavor of ICU data
+* and preparing data files for the target platform.
+* Will not work with data DLLs (shared libraries).
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uinvchar.h"
+#include "uarrsort.h"
+#include "ucmndata.h"
+#include "udataswp.h"
+#include "swapimpl.h"
+#include "toolutil.h"
+#include "uoptions.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* definitions */
+
+#define DEFAULT_PADDING_LENGTH 15
+
+static UOption options[]={
+ UOPTION_HELP_H,
+ UOPTION_HELP_QUESTION_MARK,
+ UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG)
+};
+
+enum {
+ OPT_HELP_H,
+ OPT_HELP_QUESTION_MARK,
+ OPT_OUT_TYPE
+};
+
+static int32_t
+fileSize(FILE *f) {
+ int32_t size;
+
+ fseek(f, 0, SEEK_END);
+ size=(int32_t)ftell(f);
+ fseek(f, 0, SEEK_SET);
+ return size;
+}
+
+/**
+ * Swap an ICU .dat package, including swapping of enclosed items.
+ */
+U_CFUNC int32_t U_CALLCONV
+udata_swapPackage(const char *inFilename, const char *outFilename,
+ const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode);
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+printError(void *context, const char *fmt, va_list args) {
+ vfprintf((FILE *)context, fmt, args);
+}
+U_CDECL_END
+
+static int
+printUsage(const char *pname, UBool ishelp) {
+ fprintf(stderr,
+ "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
+ ishelp ? 'U' : 'u', pname);
+ if(ishelp) {
+ fprintf(stderr,
+ "\nOptions: -h, -?, --help print this message and exit\n"
+ " Read the input file, swap its platform properties according\n"
+ " to the -t or --type option, and write the result to the output file.\n"
+ " -tl change to little-endian/ASCII charset family\n"
+ " -tb change to big-endian/ASCII charset family\n"
+ " -te change to big-endian/EBCDIC charset family\n");
+ }
+
+ return !ishelp;
+}
+
+extern int
+main(int argc, char *argv[]) {
+ FILE *in, *out;
+ const char *pname;
+ char *data;
+ int32_t length;
+ UBool ishelp;
+ int rc;
+
+ UDataSwapper *ds;
+ const UDataInfo *pInfo;
+ UErrorCode errorCode;
+ uint8_t outCharset;
+ UBool outIsBigEndian;
+
+ U_MAIN_INIT_ARGS(argc, argv);
+
+ fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
+
+ /* get the program basename */
+ pname=strrchr(argv[0], U_FILE_SEP_CHAR);
+ if(pname==NULL) {
+ pname=strrchr(argv[0], '/');
+ }
+ if(pname!=NULL) {
+ ++pname;
+ } else {
+ pname=argv[0];
+ }
+
+ argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
+ ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
+ if(ishelp || argc!=3) {
+ return printUsage(pname, ishelp);
+ }
+
+ /* parse the output type option */
+ data=(char *)options[OPT_OUT_TYPE].value;
+ if(data[0]==0 || data[1]!=0) {
+ /* the type must be exactly one letter */
+ return printUsage(pname, FALSE);
+ }
+ switch(data[0]) {
+ case 'l':
+ outIsBigEndian=FALSE;
+ outCharset=U_ASCII_FAMILY;
+ break;
+ case 'b':
+ outIsBigEndian=TRUE;
+ outCharset=U_ASCII_FAMILY;
+ break;
+ case 'e':
+ outIsBigEndian=TRUE;
+ outCharset=U_EBCDIC_FAMILY;
+ break;
+ default:
+ return printUsage(pname, FALSE);
+ }
+
+ in=out=NULL;
+ data=NULL;
+
+ /* open the input file, get its length, allocate memory for it, read the file */
+ in=fopen(argv[1], "rb");
+ if(in==NULL) {
+ fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
+ rc=2;
+ goto done;
+ }
+
+ length=fileSize(in);
+ if(length<DEFAULT_PADDING_LENGTH) {
+ fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
+ rc=2;
+ goto done;
+ }
+
+ /*
+ * +15: udata_swapPackage() may need to add a few padding bytes to the
+ * last item if charset swapping is done,
+ * because the last item may be resorted into the middle and then needs
+ * additional padding bytes
+ */
+ data=(char *)malloc(length+DEFAULT_PADDING_LENGTH);
+ if(data==NULL) {
+ fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
+ rc=2;
+ goto done;
+ }
+
+ /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
+ uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH);
+
+ if(length!=(int32_t)fread(data, 1, length, in)) {
+ fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
+ rc=3;
+ goto done;
+ }
+
+ fclose(in);
+ in=NULL;
+
+ /* swap the data in-place */
+ errorCode=U_ZERO_ERROR;
+ ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
+ pname, argv[1], u_errorName(errorCode));
+ rc=4;
+ goto done;
+ }
+
+ ds->printError=printError;
+ ds->printErrorContext=stderr;
+
+ /* speculative cast, protected by the following length check */
+ pInfo=(const UDataInfo *)((const char *)data+4);
+
+ if( length>=20 &&
+ pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
+ pInfo->dataFormat[1]==0x6d &&
+ pInfo->dataFormat[2]==0x6e &&
+ pInfo->dataFormat[3]==0x44
+ ) {
+ /*
+ * swap the .dat package
+ * udata_swapPackage() needs to rename ToC name entries from the old package
+ * name to the new one.
+ * We pass it the filenames, and udata_swapPackage() will extract the
+ * package names.
+ */
+ length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode);
+ udata_closeSwapper(ds);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n",
+ pname, argv[1], u_errorName(errorCode));
+ rc=4;
+ goto done;
+ }
+ } else {
+ /* swap the data, which is not a .dat package */
+ length=udata_swap(ds, data, length, data, &errorCode);
+ udata_closeSwapper(ds);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
+ pname, argv[1], u_errorName(errorCode));
+ rc=4;
+ goto done;
+ }
+ }
+
+ out=fopen(argv[2], "wb");
+ if(out==NULL) {
+ fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
+ rc=5;
+ goto done;
+ }
+
+ if(length!=(int32_t)fwrite(data, 1, length, out)) {
+ fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
+ rc=6;
+ goto done;
+ }
+
+ fclose(out);
+ out=NULL;
+
+ /* all done */
+ rc=0;
+
+done:
+ if(in!=NULL) {
+ fclose(in);
+ }
+ if(out!=NULL) {
+ fclose(out);
+ }
+ if(data!=NULL) {
+ free(data);
+ }
+ return rc;
+}
+
+/* swap .dat package files -------------------------------------------------- */
+
+static int32_t
+extractPackageName(const UDataSwapper *ds, const char *filename,
+ char pkg[], int32_t capacity,
+ UErrorCode *pErrorCode) {
+ const char *basename;
+ int32_t len;
+
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ basename=findBasename(filename);
+ len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */
+
+ if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) {
+ udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
+ basename);
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if(len>=capacity) {
+ udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
+ (long)capacity);
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ uprv_memcpy(pkg, basename, len);
+ pkg[len]=0;
+ return len;
+}
+
+struct ToCEntry {
+ uint32_t nameOffset, inOffset, outOffset, length;
+};
+
+U_CDECL_BEGIN
+static int32_t U_CALLCONV
+compareToCEntries(const void *context, const void *left, const void *right) {
+ const char *chars=(const char *)context;
+ return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset,
+ chars+((const ToCEntry *)right)->nameOffset);
+}
+U_CDECL_END
+
+U_CFUNC int32_t U_CALLCONV
+udata_swapPackage(const char *inFilename, const char *outFilename,
+ const UDataSwapper *ds,
+ const void *inData, int32_t length, void *outData,
+ UErrorCode *pErrorCode) {
+ const UDataInfo *pInfo;
+ int32_t headerSize;
+
+ const uint8_t *inBytes;
+ uint8_t *outBytes;
+
+ uint32_t itemCount, offset, i;
+ int32_t itemLength;
+
+ const UDataOffsetTOCEntry *inEntries;
+ UDataOffsetTOCEntry *outEntries;
+
+ ToCEntry *table;
+
+ char inPkgName[32], outPkgName[32];
+ int32_t inPkgNameLength, outPkgNameLength;
+
+ /* udata_swapDataHeader checks the arguments */
+ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* check data format and format version */
+ pInfo=(const UDataInfo *)((const char *)inData+4);
+ if(!(
+ pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
+ pInfo->dataFormat[1]==0x6d &&
+ pInfo->dataFormat[2]==0x6e &&
+ pInfo->dataFormat[3]==0x44 &&
+ pInfo->formatVersion[0]==1
+ )) {
+ udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
+ pInfo->dataFormat[0], pInfo->dataFormat[1],
+ pInfo->dataFormat[2], pInfo->dataFormat[3],
+ pInfo->formatVersion[0]);
+ *pErrorCode=U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+
+ /*
+ * We need to change the ToC name entries so that they have the correct
+ * package name prefix.
+ * Extract the package names from the in/out filenames.
+ */
+ inPkgNameLength=extractPackageName(
+ ds, inFilename,
+ inPkgName, (int32_t)sizeof(inPkgName),
+ pErrorCode);
+ outPkgNameLength=extractPackageName(
+ ds, outFilename,
+ outPkgName, (int32_t)sizeof(outPkgName),
+ pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /*
+ * It is possible to work with inPkgNameLength!=outPkgNameLength,
+ * but then the length of the data file would change more significantly,
+ * which we are not currently prepared for.
+ */
+ if(inPkgNameLength!=outPkgNameLength) {
+ udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
+ inPkgName, outPkgName);
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ inBytes=(const uint8_t *)inData+headerSize;
+ inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
+
+ if(length<0) {
+ /* preflighting */
+ itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
+ if(itemCount==0) {
+ /* no items: count only the item count and return */
+ return headerSize+4;
+ }
+
+ /* read the last item's offset and preflight it */
+ offset=ds->readUInt32(inEntries[itemCount-1].dataOffset);
+ itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode);
+
+ if(U_SUCCESS(*pErrorCode)) {
+ return headerSize+offset+(uint32_t)itemLength;
+ } else {
+ return 0;
+ }
+ } else {
+ /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
+ length-=headerSize;
+ if(length<4) {
+ /* itemCount does not fit */
+ offset=0xffffffff;
+ itemCount=0; /* make compilers happy */
+ } else {
+ itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
+ if(itemCount==0) {
+ offset=4;
+ } else if((uint32_t)length<(4+8*itemCount)) {
+ /* ToC table does not fit */
+ offset=0xffffffff;
+ } else {
+ /* offset of the last item plus at least 20 bytes for its header */
+ offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset);
+ }
+ }
+ if((uint32_t)length<offset) {
+ udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
+ length);
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+
+ outBytes=(uint8_t *)outData+headerSize;
+
+ /* swap the item count */
+ ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode);
+
+ if(itemCount==0) {
+ /* no items: just return now */
+ return headerSize+4;
+ }
+
+ /* swap the item name strings */
+ offset=4+8*itemCount;
+ itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset);
+ udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n");
+ return 0;
+ }
+ /* keep offset and itemLength in case we allocate and copy the strings below */
+
+ /* swap the package names into the output charset */
+ if(ds->outCharset!=U_CHARSET_FAMILY) {
+ UDataSwapper *ds2;
+ ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode);
+ ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode);
+ ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode);
+ udata_closeSwapper(ds2);
+ if(U_FAILURE(*pErrorCode)) {
+ udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n");
+ }
+ }
+
+ /* change the prefix of each ToC entry name from the old to the new package name */
+ {
+ char *entryName;
+
+ for(i=0; i<itemCount; ++i) {
+ entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset);
+
+ if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) {
+ uprv_memcpy(entryName, outPkgName, inPkgNameLength);
+ } else {
+ udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
+ (long)i);
+ *pErrorCode=U_INVALID_FORMAT_ERROR;
+ return 0;
+ }
+ }
+ }
+
+ /*
+ * Allocate the ToC table and, if necessary, a temporary buffer for
+ * pseudo-in-place swapping.
+ *
+ * We cannot swap in-place because:
+ *
+ * 1. If the swapping of an item fails mid-way, then in-place swapping
+ * has destroyed its data.
+ * Out-of-place swapping allows us to then copy its original data.
+ *
+ * 2. If swapping changes the charset family, then we must resort
+ * not only the ToC table but also the data items themselves.
+ * This requires a permutation and is best done with separate in/out
+ * buffers.
+ *
+ * We swapped the strings above to avoid the malloc below if string swapping fails.
+ */
+ if(inData==outData) {
+ /* +15: prepare for extra padding of a newly-last item */
+ table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH);
+ if(table!=NULL) {
+ outBytes=(uint8_t *)(table+itemCount);
+
+ /* copy the item count and the swapped strings */
+ uprv_memcpy(outBytes, inBytes, 4);
+ uprv_memcpy(outBytes+offset, inBytes+offset, itemLength);
+ }
+ } else {
+ table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry));
+ }
+ if(table==NULL) {
+ udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n",
+ inData==outData ?
+ itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH :
+ itemCount*sizeof(ToCEntry));
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ outEntries=(UDataOffsetTOCEntry *)(outBytes+4);
+
+ /* read the ToC table */
+ for(i=0; i<itemCount; ++i) {
+ table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset);
+ table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset);
+ if(i>0) {
+ table[i-1].length=table[i].inOffset-table[i-1].inOffset;
+ }
+ }
+ table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset;
+
+ if(ds->inCharset==ds->outCharset) {
+ /* no charset swapping, no resorting: keep item offsets the same */
+ for(i=0; i<itemCount; ++i) {
+ table[i].outOffset=table[i].inOffset;
+ }
+ } else {
+ /* charset swapping: resort items by their swapped names */
+
+ /*
+ * Before the actual sorting, we need to make sure that each item
+ * has a length that is a multiple of 16 bytes so that all items
+ * are 16-aligned.
+ * Only the old last item may be missing up to 15 padding bytes.
+ * Add padding bytes for it.
+ * Since the icuswap main() function has already allocated enough
+ * input buffer space and set the last 15 bytes there to 0xaa,
+ * we only need to increase the total data length and the length
+ * of the last item here.
+ */
+ if((length&0xf)!=0) {
+ int32_t delta=16-(length&0xf);
+ length+=delta;
+ table[itemCount-1].length+=(uint32_t)delta;
+ }
+
+ /* Save the offset before we sort the TOC. */
+ offset=table[0].inOffset;
+ /* sort the TOC entries */
+ uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry),
+ compareToCEntries, outBytes, FALSE, pErrorCode);
+
+ /*
+ * Note: Before sorting, the inOffset values were in order.
+ * Now the outOffset values are in order.
+ */
+
+ /* assign outOffset values */
+ for(i=0; i<itemCount; ++i) {
+ table[i].outOffset=offset;
+ offset+=table[i].length;
+ }
+ }
+
+ /* write the output ToC table */
+ for(i=0; i<itemCount; ++i) {
+ ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset);
+ ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset);
+ }
+
+ /* swap each data item */
+ for(i=0; i<itemCount; ++i) {
+ /* first copy the item bytes to make sure that unreachable bytes are copied */
+ uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
+
+ /* swap the item */
+ udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length,
+ outBytes+table[i].outOffset, pErrorCode);
+
+ if(U_FAILURE(*pErrorCode)) {
+ if(ds->outCharset==U_CHARSET_FAMILY) {
+ udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
+ " at inOffset 0x%x length 0x%x - %s\n"
+ " the data item will be copied, not swapped\n\n",
+ (char *)outBytes+table[i].nameOffset,
+ table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
+ } else {
+ udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n"
+ " at inOffset 0x%x length 0x%x - %s\n"
+ " the data item will be copied, not swapped\n\n",
+ table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
+ }
+ /* reset the error code, copy the data item, and continue */
+ *pErrorCode=U_ZERO_ERROR;
+ uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
+ }
+ }
+
+ if(inData==outData) {
+ /* copy the data from the temporary buffer to the in-place buffer */
+ uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length);
+ }
+ uprv_free(table);
+
+ return headerSize+length;
+ }
+}
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */