summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/escapesrc
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/tools/escapesrc')
-rw-r--r--intl/icu/source/tools/escapesrc/Makefile.in112
-rw-r--r--intl/icu/source/tools/escapesrc/cptbl.h521
-rw-r--r--intl/icu/source/tools/escapesrc/escapesrc.cpp427
-rw-r--r--intl/icu/source/tools/escapesrc/expect-simple.cpp17
-rw-r--r--intl/icu/source/tools/escapesrc/tblgen.cpp80
-rw-r--r--intl/icu/source/tools/escapesrc/test-nochange.cpp5
-rw-r--r--intl/icu/source/tools/escapesrc/test-simple.cpp17
7 files changed, 1179 insertions, 0 deletions
diff --git a/intl/icu/source/tools/escapesrc/Makefile.in b/intl/icu/source/tools/escapesrc/Makefile.in
new file mode 100644
index 0000000000..7580ccdc31
--- /dev/null
+++ b/intl/icu/source/tools/escapesrc/Makefile.in
@@ -0,0 +1,112 @@
+## Makefile.in for ICU - tools/escapesrc
+## Copyright (C) 2016 and later: Unicode, Inc. and others.
+## License & terms of use: http://www.unicode.org/copyright.html
+## Copyright (c) 1999-2011, International Business Machines Corporation and
+## others. All Rights Reserved.
+## Steven R. Loomis
+
+# To avoid recursion
+SKIP_ESCAPING=YES
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = tools/escapesrc
+
+TARGET_STUB_NAME = escapesrc
+
+SECTION = 8
+
+#MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS) $(MAN_FILES) ./output-*.cpp
+
+## Target information
+TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
+
+CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
+#LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC)
+LIBS += $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = escapesrc.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local \
+distclean distclean-local dist dist-local check check-local install-man
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET) $(MAN_FILES)
+
+install-local: all-local install-man
+ $(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
+ $(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
+
+install-man: $(MAN_FILES)
+# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
+# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
+
+
+dist-local:
+
+clean-local:
+ test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+ $(RMV) $(TARGET) $(OBJECTS)
+
+distclean-local: clean-local
+ $(RMV) Makefile
+
+check-local: all-local
+ @echo Testing test-nochange.cpp
+ @$(INVOKE) $(TARGET) $(srcdir)/test-nochange.cpp ./output-nochange.cpp
+ @-diff -I '#line.*' $(srcdir)/test-nochange.cpp ./output-nochange.cpp || (echo >&2 'warning: diff failed or not found' ; true)
+ @echo Testing test-simple.cpp
+ @$(INVOKE) $(TARGET) $(srcdir)/test-simple.cpp ./output-simple.cpp
+ @-diff -I '#line.*' $(srcdir)/expect-simple.cpp ./output-simple.cpp || (echo >&2 'warning: diff failed or not found' ; true)
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+ $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
+ $(POST_BUILD_STEP)
+
+
+%.$(SECTION): $(srcdir)/%.$(SECTION).in
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+# depends on ICU being built
+gen-table: tblgen$(EXEEXT)
+ $(INVOKE) ./tblgen$(EXEEXT) > $(srcdir)/cptbl.h
+
+tblgen$(EXEEXT): tblgen.o
+ $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS) $(LIBICUUC)
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+
diff --git a/intl/icu/source/tools/escapesrc/cptbl.h b/intl/icu/source/tools/escapesrc/cptbl.h
new file mode 100644
index 0000000000..898e16c925
--- /dev/null
+++ b/intl/icu/source/tools/escapesrc/cptbl.h
@@ -0,0 +1,521 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html
+// generated by tblgen. You weren't going to edit it by hand, were you?
+
+static const char cp1047_8859_1[256] = {
+ (char)0x00, /* 00 */
+ (char)0x01, /* 01 */
+ (char)0x02, /* 02 */
+ (char)0x03, /* 03 */
+ (char)0x9C, /* 04 */
+ (char)0x09, /* 05 */
+ (char)0x86, /* 06 */
+ (char)0x7F, /* 07 */
+ (char)0x97, /* 08 */
+ (char)0x8D, /* 09 */
+ (char)0x8E, /* 0A */
+ (char)0x0B, /* 0B */
+ (char)0x0C, /* 0C */
+ (char)0x0D, /* 0D */
+ (char)0x0E, /* 0E */
+ (char)0x0F, /* 0F */
+ (char)0x10, /* 10 */
+ (char)0x11, /* 11 */
+ (char)0x12, /* 12 */
+ (char)0x13, /* 13 */
+ (char)0x9D, /* 14 */
+ (char)0x85, /* 15 */
+ (char)0x08, /* 16 */
+ (char)0x87, /* 17 */
+ (char)0x18, /* 18 */
+ (char)0x19, /* 19 */
+ (char)0x92, /* 1A */
+ (char)0x8F, /* 1B */
+ (char)0x1C, /* 1C */
+ (char)0x1D, /* 1D */
+ (char)0x1E, /* 1E */
+ (char)0x1F, /* 1F */
+ (char)0x80, /* 20 */
+ (char)0x81, /* 21 */
+ (char)0x82, /* 22 */
+ (char)0x83, /* 23 */
+ (char)0x84, /* 24 */
+ (char)0x0A, /* 25 */
+ (char)0x17, /* 26 */
+ (char)0x1B, /* 27 */
+ (char)0x88, /* 28 */
+ (char)0x89, /* 29 */
+ (char)0x8A, /* 2A */
+ (char)0x8B, /* 2B */
+ (char)0x8C, /* 2C */
+ (char)0x05, /* 2D */
+ (char)0x06, /* 2E */
+ (char)0x07, /* 2F */
+ (char)0x90, /* 30 */
+ (char)0x91, /* 31 */
+ (char)0x16, /* 32 */
+ (char)0x93, /* 33 */
+ (char)0x94, /* 34 */
+ (char)0x95, /* 35 */
+ (char)0x96, /* 36 */
+ (char)0x04, /* 37 */
+ (char)0x98, /* 38 */
+ (char)0x99, /* 39 */
+ (char)0x9A, /* 3A */
+ (char)0x9B, /* 3B */
+ (char)0x14, /* 3C */
+ (char)0x15, /* 3D */
+ (char)0x9E, /* 3E */
+ (char)0x1A, /* 3F */
+ (char)0x20, /* 40 */
+ (char)0xA0, /* 41 */
+ (char)0xE2, /* 42 */
+ (char)0xE4, /* 43 */
+ (char)0xE0, /* 44 */
+ (char)0xE1, /* 45 */
+ (char)0xE3, /* 46 */
+ (char)0xE5, /* 47 */
+ (char)0xE7, /* 48 */
+ (char)0xF1, /* 49 */
+ (char)0xA2, /* 4A */
+ (char)0x2E, /* 4B */
+ (char)0x3C, /* 4C */
+ (char)0x28, /* 4D */
+ (char)0x2B, /* 4E */
+ (char)0x7C, /* 4F */
+ (char)0x26, /* 50 */
+ (char)0xE9, /* 51 */
+ (char)0xEA, /* 52 */
+ (char)0xEB, /* 53 */
+ (char)0xE8, /* 54 */
+ (char)0xED, /* 55 */
+ (char)0xEE, /* 56 */
+ (char)0xEF, /* 57 */
+ (char)0xEC, /* 58 */
+ (char)0xDF, /* 59 */
+ (char)0x21, /* 5A */
+ (char)0x24, /* 5B */
+ (char)0x2A, /* 5C */
+ (char)0x29, /* 5D */
+ (char)0x3B, /* 5E */
+ (char)0x5E, /* 5F */
+ (char)0x2D, /* 60 */
+ (char)0x2F, /* 61 */
+ (char)0xC2, /* 62 */
+ (char)0xC4, /* 63 */
+ (char)0xC0, /* 64 */
+ (char)0xC1, /* 65 */
+ (char)0xC3, /* 66 */
+ (char)0xC5, /* 67 */
+ (char)0xC7, /* 68 */
+ (char)0xD1, /* 69 */
+ (char)0xA6, /* 6A */
+ (char)0x2C, /* 6B */
+ (char)0x25, /* 6C */
+ (char)0x5F, /* 6D */
+ (char)0x3E, /* 6E */
+ (char)0x3F, /* 6F */
+ (char)0xF8, /* 70 */
+ (char)0xC9, /* 71 */
+ (char)0xCA, /* 72 */
+ (char)0xCB, /* 73 */
+ (char)0xC8, /* 74 */
+ (char)0xCD, /* 75 */
+ (char)0xCE, /* 76 */
+ (char)0xCF, /* 77 */
+ (char)0xCC, /* 78 */
+ (char)0x60, /* 79 */
+ (char)0x3A, /* 7A */
+ (char)0x23, /* 7B */
+ (char)0x40, /* 7C */
+ (char)0x27, /* 7D */
+ (char)0x3D, /* 7E */
+ (char)0x22, /* 7F */
+ (char)0xD8, /* 80 */
+ (char)0x61, /* 81 */
+ (char)0x62, /* 82 */
+ (char)0x63, /* 83 */
+ (char)0x64, /* 84 */
+ (char)0x65, /* 85 */
+ (char)0x66, /* 86 */
+ (char)0x67, /* 87 */
+ (char)0x68, /* 88 */
+ (char)0x69, /* 89 */
+ (char)0xAB, /* 8A */
+ (char)0xBB, /* 8B */
+ (char)0xF0, /* 8C */
+ (char)0xFD, /* 8D */
+ (char)0xFE, /* 8E */
+ (char)0xB1, /* 8F */
+ (char)0xB0, /* 90 */
+ (char)0x6A, /* 91 */
+ (char)0x6B, /* 92 */
+ (char)0x6C, /* 93 */
+ (char)0x6D, /* 94 */
+ (char)0x6E, /* 95 */
+ (char)0x6F, /* 96 */
+ (char)0x70, /* 97 */
+ (char)0x71, /* 98 */
+ (char)0x72, /* 99 */
+ (char)0xAA, /* 9A */
+ (char)0xBA, /* 9B */
+ (char)0xE6, /* 9C */
+ (char)0xB8, /* 9D */
+ (char)0xC6, /* 9E */
+ (char)0xA4, /* 9F */
+ (char)0xB5, /* A0 */
+ (char)0x7E, /* A1 */
+ (char)0x73, /* A2 */
+ (char)0x74, /* A3 */
+ (char)0x75, /* A4 */
+ (char)0x76, /* A5 */
+ (char)0x77, /* A6 */
+ (char)0x78, /* A7 */
+ (char)0x79, /* A8 */
+ (char)0x7A, /* A9 */
+ (char)0xA1, /* AA */
+ (char)0xBF, /* AB */
+ (char)0xD0, /* AC */
+ (char)0x5B, /* AD */
+ (char)0xDE, /* AE */
+ (char)0xAE, /* AF */
+ (char)0xAC, /* B0 */
+ (char)0xA3, /* B1 */
+ (char)0xA5, /* B2 */
+ (char)0xB7, /* B3 */
+ (char)0xA9, /* B4 */
+ (char)0xA7, /* B5 */
+ (char)0xB6, /* B6 */
+ (char)0xBC, /* B7 */
+ (char)0xBD, /* B8 */
+ (char)0xBE, /* B9 */
+ (char)0xDD, /* BA */
+ (char)0xA8, /* BB */
+ (char)0xAF, /* BC */
+ (char)0x5D, /* BD */
+ (char)0xB4, /* BE */
+ (char)0xD7, /* BF */
+ (char)0x7B, /* C0 */
+ (char)0x41, /* C1 */
+ (char)0x42, /* C2 */
+ (char)0x43, /* C3 */
+ (char)0x44, /* C4 */
+ (char)0x45, /* C5 */
+ (char)0x46, /* C6 */
+ (char)0x47, /* C7 */
+ (char)0x48, /* C8 */
+ (char)0x49, /* C9 */
+ (char)0xAD, /* CA */
+ (char)0xF4, /* CB */
+ (char)0xF6, /* CC */
+ (char)0xF2, /* CD */
+ (char)0xF3, /* CE */
+ (char)0xF5, /* CF */
+ (char)0x7D, /* D0 */
+ (char)0x4A, /* D1 */
+ (char)0x4B, /* D2 */
+ (char)0x4C, /* D3 */
+ (char)0x4D, /* D4 */
+ (char)0x4E, /* D5 */
+ (char)0x4F, /* D6 */
+ (char)0x50, /* D7 */
+ (char)0x51, /* D8 */
+ (char)0x52, /* D9 */
+ (char)0xB9, /* DA */
+ (char)0xFB, /* DB */
+ (char)0xFC, /* DC */
+ (char)0xF9, /* DD */
+ (char)0xFA, /* DE */
+ (char)0xFF, /* DF */
+ (char)0x5C, /* E0 */
+ (char)0xF7, /* E1 */
+ (char)0x53, /* E2 */
+ (char)0x54, /* E3 */
+ (char)0x55, /* E4 */
+ (char)0x56, /* E5 */
+ (char)0x57, /* E6 */
+ (char)0x58, /* E7 */
+ (char)0x59, /* E8 */
+ (char)0x5A, /* E9 */
+ (char)0xB2, /* EA */
+ (char)0xD4, /* EB */
+ (char)0xD6, /* EC */
+ (char)0xD2, /* ED */
+ (char)0xD3, /* EE */
+ (char)0xD5, /* EF */
+ (char)0x30, /* F0 */
+ (char)0x31, /* F1 */
+ (char)0x32, /* F2 */
+ (char)0x33, /* F3 */
+ (char)0x34, /* F4 */
+ (char)0x35, /* F5 */
+ (char)0x36, /* F6 */
+ (char)0x37, /* F7 */
+ (char)0x38, /* F8 */
+ (char)0x39, /* F9 */
+ (char)0xB3, /* FA */
+ (char)0xDB, /* FB */
+ (char)0xDC, /* FC */
+ (char)0xD9, /* FD */
+ (char)0xDA, /* FE */
+ (char)0x9F, /* FF */
+};
+
+static const bool oldIllegal[256] = {
+ false, /* U+0000 */
+ false, /* U+0001 */
+ false, /* U+0002 */
+ false, /* U+0003 */
+ false, /* U+0004 */
+ false, /* U+0005 */
+ false, /* U+0006 */
+ false, /* U+0007 */
+ false, /* U+0008 */
+ false, /* U+0009 */
+ false, /* U+000A */
+ false, /* U+000B */
+ false, /* U+000C */
+ false, /* U+000D */
+ false, /* U+000E */
+ false, /* U+000F */
+ false, /* U+0010 */
+ false, /* U+0011 */
+ false, /* U+0012 */
+ false, /* U+0013 */
+ false, /* U+0014 */
+ false, /* U+0015 */
+ false, /* U+0016 */
+ false, /* U+0017 */
+ false, /* U+0018 */
+ false, /* U+0019 */
+ false, /* U+001A */
+ false, /* U+001B */
+ false, /* U+001C */
+ false, /* U+001D */
+ false, /* U+001E */
+ false, /* U+001F */
+ true, /* U+0020 */
+ true, /* U+0021 */
+ true, /* U+0022 */
+ true, /* U+0023 */
+ false, /* U+0024 */
+ true, /* U+0025 */
+ true, /* U+0026 */
+ true, /* U+0027 */
+ true, /* U+0028 */
+ true, /* U+0029 */
+ true, /* U+002A */
+ true, /* U+002B */
+ true, /* U+002C */
+ true, /* U+002D */
+ true, /* U+002E */
+ true, /* U+002F */
+ true, /* U+0030 */
+ true, /* U+0031 */
+ true, /* U+0032 */
+ true, /* U+0033 */
+ true, /* U+0034 */
+ true, /* U+0035 */
+ true, /* U+0036 */
+ true, /* U+0037 */
+ true, /* U+0038 */
+ true, /* U+0039 */
+ true, /* U+003A */
+ true, /* U+003B */
+ true, /* U+003C */
+ true, /* U+003D */
+ true, /* U+003E */
+ true, /* U+003F */
+ false, /* U+0040 */
+ true, /* U+0041 */
+ true, /* U+0042 */
+ true, /* U+0043 */
+ true, /* U+0044 */
+ true, /* U+0045 */
+ true, /* U+0046 */
+ true, /* U+0047 */
+ true, /* U+0048 */
+ true, /* U+0049 */
+ true, /* U+004A */
+ true, /* U+004B */
+ true, /* U+004C */
+ true, /* U+004D */
+ true, /* U+004E */
+ true, /* U+004F */
+ true, /* U+0050 */
+ true, /* U+0051 */
+ true, /* U+0052 */
+ true, /* U+0053 */
+ true, /* U+0054 */
+ true, /* U+0055 */
+ true, /* U+0056 */
+ true, /* U+0057 */
+ true, /* U+0058 */
+ true, /* U+0059 */
+ true, /* U+005A */
+ true, /* U+005B */
+ false, /* U+005C */
+ true, /* U+005D */
+ true, /* U+005E */
+ true, /* U+005F */
+ false, /* U+0060 */
+ true, /* U+0061 */
+ true, /* U+0062 */
+ true, /* U+0063 */
+ true, /* U+0064 */
+ true, /* U+0065 */
+ true, /* U+0066 */
+ true, /* U+0067 */
+ true, /* U+0068 */
+ true, /* U+0069 */
+ true, /* U+006A */
+ true, /* U+006B */
+ true, /* U+006C */
+ true, /* U+006D */
+ true, /* U+006E */
+ true, /* U+006F */
+ true, /* U+0070 */
+ true, /* U+0071 */
+ true, /* U+0072 */
+ true, /* U+0073 */
+ true, /* U+0074 */
+ true, /* U+0075 */
+ true, /* U+0076 */
+ true, /* U+0077 */
+ true, /* U+0078 */
+ true, /* U+0079 */
+ true, /* U+007A */
+ true, /* U+007B */
+ true, /* U+007C */
+ true, /* U+007D */
+ true, /* U+007E */
+ false, /* U+007F */
+ false, /* U+0080 */
+ false, /* U+0081 */
+ false, /* U+0082 */
+ false, /* U+0083 */
+ false, /* U+0084 */
+ false, /* U+0085 */
+ false, /* U+0086 */
+ false, /* U+0087 */
+ false, /* U+0088 */
+ false, /* U+0089 */
+ false, /* U+008A */
+ false, /* U+008B */
+ false, /* U+008C */
+ false, /* U+008D */
+ false, /* U+008E */
+ false, /* U+008F */
+ false, /* U+0090 */
+ false, /* U+0091 */
+ false, /* U+0092 */
+ false, /* U+0093 */
+ false, /* U+0094 */
+ false, /* U+0095 */
+ false, /* U+0096 */
+ false, /* U+0097 */
+ false, /* U+0098 */
+ false, /* U+0099 */
+ false, /* U+009A */
+ false, /* U+009B */
+ false, /* U+009C */
+ false, /* U+009D */
+ false, /* U+009E */
+ false, /* U+009F */
+ false, /* U+00A0 */
+ false, /* U+00A1 */
+ false, /* U+00A2 */
+ false, /* U+00A3 */
+ false, /* U+00A4 */
+ false, /* U+00A5 */
+ false, /* U+00A6 */
+ false, /* U+00A7 */
+ false, /* U+00A8 */
+ false, /* U+00A9 */
+ false, /* U+00AA */
+ false, /* U+00AB */
+ false, /* U+00AC */
+ false, /* U+00AD */
+ false, /* U+00AE */
+ false, /* U+00AF */
+ false, /* U+00B0 */
+ false, /* U+00B1 */
+ false, /* U+00B2 */
+ false, /* U+00B3 */
+ false, /* U+00B4 */
+ false, /* U+00B5 */
+ false, /* U+00B6 */
+ false, /* U+00B7 */
+ false, /* U+00B8 */
+ false, /* U+00B9 */
+ false, /* U+00BA */
+ false, /* U+00BB */
+ false, /* U+00BC */
+ false, /* U+00BD */
+ false, /* U+00BE */
+ false, /* U+00BF */
+ false, /* U+00C0 */
+ false, /* U+00C1 */
+ false, /* U+00C2 */
+ false, /* U+00C3 */
+ false, /* U+00C4 */
+ false, /* U+00C5 */
+ false, /* U+00C6 */
+ false, /* U+00C7 */
+ false, /* U+00C8 */
+ false, /* U+00C9 */
+ false, /* U+00CA */
+ false, /* U+00CB */
+ false, /* U+00CC */
+ false, /* U+00CD */
+ false, /* U+00CE */
+ false, /* U+00CF */
+ false, /* U+00D0 */
+ false, /* U+00D1 */
+ false, /* U+00D2 */
+ false, /* U+00D3 */
+ false, /* U+00D4 */
+ false, /* U+00D5 */
+ false, /* U+00D6 */
+ false, /* U+00D7 */
+ false, /* U+00D8 */
+ false, /* U+00D9 */
+ false, /* U+00DA */
+ false, /* U+00DB */
+ false, /* U+00DC */
+ false, /* U+00DD */
+ false, /* U+00DE */
+ false, /* U+00DF */
+ false, /* U+00E0 */
+ false, /* U+00E1 */
+ false, /* U+00E2 */
+ false, /* U+00E3 */
+ false, /* U+00E4 */
+ false, /* U+00E5 */
+ false, /* U+00E6 */
+ false, /* U+00E7 */
+ false, /* U+00E8 */
+ false, /* U+00E9 */
+ false, /* U+00EA */
+ false, /* U+00EB */
+ false, /* U+00EC */
+ false, /* U+00ED */
+ false, /* U+00EE */
+ false, /* U+00EF */
+ false, /* U+00F0 */
+ false, /* U+00F1 */
+ false, /* U+00F2 */
+ false, /* U+00F3 */
+ false, /* U+00F4 */
+ false, /* U+00F5 */
+ false, /* U+00F6 */
+ false, /* U+00F7 */
+ false, /* U+00F8 */
+ false, /* U+00F9 */
+ false, /* U+00FA */
+ false, /* U+00FB */
+ false, /* U+00FC */
+ false, /* U+00FD */
+ false, /* U+00FE */
+ false, /* U+00FF */
+};
+
diff --git a/intl/icu/source/tools/escapesrc/escapesrc.cpp b/intl/icu/source/tools/escapesrc/escapesrc.cpp
new file mode 100644
index 0000000000..10ac3d1aef
--- /dev/null
+++ b/intl/icu/source/tools/escapesrc/escapesrc.cpp
@@ -0,0 +1,427 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <stdio.h>
+#include <string>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <iostream>
+#include <fstream>
+
+// We only use U8_* macros, which are entirely inline.
+#include "unicode/utf8.h"
+
+// This contains a codepage and ISO 14882:1998 illegality table.
+// Use "make gen-table" to rebuild it.
+#include "cptbl.h"
+
+/**
+ * What is this?
+ *
+ * "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code
+ * in utf-8 into something consumable by certain compilers (Solaris, xlC)
+ * which aren't quite standards compliant.
+ *
+ * - u"<unicode>" or u'<unicode>' gets converted to u"\uNNNN" or u'\uNNNN'
+ * - u8"<unicode>" gets converted to "\xAA\xBB\xCC\xDD" etc.
+ * (some compilers do not support the u8 prefix correctly.)
+ * - if the system is EBCDIC-based, that is used to correct the input characters.
+ *
+ * Usage:
+ * escapesrc infile.cpp outfile.cpp
+ * Normally this is invoked by the build stage, with a rule such as:
+ *
+ * _%.cpp: $(srcdir)/%.cpp
+ * @$(BINDIR)/escapesrc$(EXEEXT) $< $@
+ * %.o: _%.cpp
+ * $(COMPILE.cc) ... $@ $<
+ *
+ * In the Makefiles, SKIP_ESCAPING=YES is used to prevent escapesrc.cpp
+ * from being itself escaped.
+ */
+
+
+static const char
+ kSPACE = 0x20,
+ kTAB = 0x09,
+ kLF = 0x0A,
+ kCR = 0x0D;
+
+// For convenience
+# define cp1047_to_8859(c) cp1047_8859_1[c]
+
+// Our app's name
+std::string prog;
+
+/**
+ * Give the usual 1-line documentation and exit
+ */
+void usage() {
+ fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str());
+}
+
+/**
+ * Delete the output file (if any)
+ * We want to delete even if we didn't generate, because it might be stale.
+ */
+int cleanup(const std::string &outfile) {
+ const char *outstr = outfile.c_str();
+ if(outstr && *outstr) {
+ int rc = std::remove(outstr);
+ if(rc == 0) {
+ fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr);
+ return 0;
+ } else {
+ if( errno == ENOENT ) {
+ return 0; // File did not exist - no error.
+ } else {
+ perror("std::remove");
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * Skip across any known whitespace.
+ * @param p startpoint
+ * @param e limit
+ * @return first non-whitespace char
+ */
+inline const char *skipws(const char *p, const char *e) {
+ for(;p<e;p++) {
+ switch(*p) {
+ case kSPACE:
+ case kTAB:
+ case kLF:
+ case kCR:
+ break;
+ default:
+ return p; // non ws
+ }
+ }
+ return p;
+}
+
+/**
+ * Append a byte, hex encoded
+ * @param outstr sstring to append to
+ * @param byte the byte to append
+ */
+void appendByte(std::string &outstr,
+ uint8_t byte) {
+ char tmp2[5];
+ snprintf(tmp2, sizeof(tmp2), "\\x%02X", 0xFF & (int)(byte));
+ outstr += tmp2;
+}
+
+/**
+ * Append the bytes from 'linestr' into outstr, with escaping
+ * @param outstr the output buffer
+ * @param linestr the input buffer
+ * @param pos in/out: the current char under consideration
+ * @param chars the number of chars to consider
+ * @return true on failure
+ */
+bool appendUtf8(std::string &outstr,
+ const std::string &linestr,
+ size_t &pos,
+ size_t chars) {
+ char tmp[9];
+ for(size_t i=0;i<chars;i++) {
+ tmp[i] = linestr[++pos];
+ }
+ tmp[chars] = 0;
+ unsigned int c;
+ sscanf(tmp, "%X", &c);
+ UChar32 ch = c & 0x1FFFFF;
+
+ // now to append \\x%% etc
+ uint8_t bytesNeeded = U8_LENGTH(ch);
+ if(bytesNeeded == 0) {
+ fprintf(stderr, "Illegal code point U+%X\n", ch);
+ return true;
+ }
+ uint8_t bytes[4];
+ uint8_t *s = bytes;
+ size_t i = 0;
+ U8_APPEND_UNSAFE(s, i, ch);
+ for(size_t t = 0; t<i; t++) {
+ appendByte(outstr, s[t]);
+ }
+ return false;
+}
+
+/**
+ * Fixup u8"x"
+ * @param linestr string to mutate. Already escaped into \u format.
+ * @param origpos beginning, points to 'u8"'
+ * @param pos end, points to "
+ * @return false for no-problem, true for failure!
+ */
+bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) {
+ size_t pos = origpos + 3;
+ std::string outstr;
+ outstr += '\"'; // local encoding
+ for(;pos<endpos;pos++) {
+ char c = linestr[pos];
+ if(c == '\\') {
+ char c2 = linestr[++pos];
+ switch(c2) {
+ case '\'':
+ case '"':
+#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
+ c2 = cp1047_to_8859(c2);
+#endif
+ appendByte(outstr, c2);
+ break;
+ case 'u':
+ appendUtf8(outstr, linestr, pos, 4);
+ break;
+ case 'U':
+ appendUtf8(outstr, linestr, pos, 8);
+ break;
+ }
+ } else {
+#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
+ c = cp1047_to_8859(c);
+#endif
+ appendByte(outstr, c);
+ }
+ }
+ outstr += ('\"');
+
+ linestr.replace(origpos, (endpos-origpos+1), outstr);
+
+ return false; // OK
+}
+
+/**
+ * fix the u"x"/u'x'/u8"x" string at the position
+ * u8'x' is not supported, sorry.
+ * @param linestr the input string
+ * @param pos the position
+ * @return false = no err, true = had err
+ */
+bool fixAt(std::string &linestr, size_t pos) {
+ size_t origpos = pos;
+
+ if(linestr[pos] != 'u') {
+ fprintf(stderr, "Not a 'u'?");
+ return true;
+ }
+
+ pos++; // past 'u'
+
+ bool utf8 = false;
+
+ if(linestr[pos] == '8') { // u8"
+ utf8 = true;
+ pos++;
+ }
+
+ char quote = linestr[pos];
+
+ if(quote != '\'' && quote != '\"') {
+ fprintf(stderr, "Quote is '%c' - not sure what to do.\n", quote);
+ return true;
+ }
+
+ if(quote == '\'' && utf8) {
+ fprintf(stderr, "Cannot do u8'...'\n");
+ return true;
+ }
+
+ pos ++;
+
+ //printf("u%c…%c\n", quote, quote);
+
+ for(; pos < linestr.size(); pos++) {
+ if(linestr[pos] == quote) {
+ if(utf8) {
+ return fixu8(linestr, origpos, pos); // fix u8"..."
+ } else {
+ return false; // end of quote
+ }
+ }
+ if(linestr[pos] == '\\') {
+ pos++;
+ if(linestr[pos] == quote) continue; // quoted quote
+ if(linestr[pos] == 'u') continue; // for now ... unicode escape
+ if(linestr[pos] == '\\') continue;
+ // some other escape… ignore
+ } else {
+ size_t old_pos = pos;
+ int32_t i = pos;
+#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
+ // mogrify 1-4 bytes from 1047 'back' to utf-8
+ char old_byte = linestr[pos];
+ linestr[pos] = cp1047_to_8859(linestr[pos]);
+ // how many more?
+ int32_t trail = U8_COUNT_TRAIL_BYTES(linestr[pos]);
+ for(size_t pos2 = pos+1; trail>0; pos2++,trail--) {
+ linestr[pos2] = cp1047_to_8859(linestr[pos2]);
+ if(linestr[pos2] == 0x0A) {
+ linestr[pos2] = 0x85; // NL is ambiguous here
+ }
+ }
+#endif
+
+ // Proceed to decode utf-8
+ const uint8_t *s = (const uint8_t*) (linestr.c_str());
+ int32_t length = linestr.size();
+ UChar32 c;
+ if(U8_IS_SINGLE((uint8_t)s[i]) && oldIllegal[s[i]]) {
+#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
+ linestr[pos] = old_byte; // put it back
+#endif
+ continue; // single code point not previously legal for \u escaping
+ }
+
+ // otherwise, convert it to \u / \U
+ {
+ U8_NEXT(s, i, length, c);
+ }
+ if(c<0) {
+ fprintf(stderr, "Illegal utf-8 sequence at Column: %d\n", (int)old_pos);
+ fprintf(stderr, "Line: >>%s<<\n", linestr.c_str());
+ return true;
+ }
+
+ size_t seqLen = (i-pos);
+
+ //printf("U+%04X pos %d [len %d]\n", c, pos, seqLen);fflush(stdout);
+
+ char newSeq[20];
+ if( c <= 0xFFFF) {
+ snprintf(newSeq, sizeof(newSeq), "\\u%04X", c);
+ } else {
+ snprintf(newSeq, sizeof(newSeq), "\\U%08X", c);
+ }
+ linestr.replace(pos, seqLen, newSeq);
+ pos += strlen(newSeq) - 1;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Fixup an entire line
+ * false = no err
+ * true = had err
+ * @param no the line number (not used)
+ * @param linestr the string to fix
+ * @return true if any err, else false
+ */
+bool fixLine(int /*no*/, std::string &linestr) {
+ const char *line = linestr.c_str();
+ size_t len = linestr.size();
+
+ // no u' in the line?
+ if(!strstr(line, "u'") && !strstr(line, "u\"") && !strstr(line, "u8\"")) {
+ return false; // Nothing to do. No u' or u" detected
+ }
+
+ // start from the end and find all u" cases
+ size_t pos = len = linestr.size();
+ if(len>INT32_MAX/2) {
+ return true;
+ }
+ while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) {
+ //printf("found doublequote at %d\n", pos);
+ if(fixAt(linestr, pos)) return true;
+ if(pos == 0) break;
+ pos--;
+ }
+
+ // reset and find all u' cases
+ pos = len = linestr.size();
+ while((pos>0) && (pos = linestr.rfind("u'", pos)) != std::string::npos) {
+ //printf("found singlequote at %d\n", pos);
+ if(fixAt(linestr, pos)) return true;
+ if(pos == 0) break;
+ pos--;
+ }
+
+ // reset and find all u8" cases
+ pos = len = linestr.size();
+ while((pos>0) && (pos = linestr.rfind("u8\"", pos)) != std::string::npos) {
+ if(fixAt(linestr, pos)) return true;
+ if(pos == 0) break;
+ pos--;
+ }
+
+ //fprintf(stderr, "%d - fixed\n", no);
+ return false;
+}
+
+/**
+ * Convert a whole file
+ * @param infile
+ * @param outfile
+ * @return 1 on err, 0 otherwise
+ */
+int convert(const std::string &infile, const std::string &outfile) {
+ fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str());
+
+ std::ifstream inf;
+
+ inf.open(infile.c_str(), std::ios::in);
+
+ if(!inf.is_open()) {
+ fprintf(stderr, "%s: could not open input file %s\n", prog.c_str(), infile.c_str());
+ cleanup(outfile);
+ return 1;
+ }
+
+ std::ofstream outf;
+
+ outf.open(outfile.c_str(), std::ios::out);
+
+ if(!outf.is_open()) {
+ fprintf(stderr, "%s: could not open output file %s\n", prog.c_str(), outfile.c_str());
+ return 1;
+ }
+
+ // TODO: any platform variations of #line?
+ outf << "#line 1 \"" << infile << "\"" << '\n';
+
+ int no = 0;
+ std::string linestr;
+ while( getline( inf, linestr)) {
+ no++;
+ if(fixLine(no, linestr)) {
+ goto fail;
+ }
+ outf << linestr << '\n';
+ }
+
+ if(inf.eof()) {
+ return 0;
+ }
+fail:
+ outf.close();
+ fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str());
+ cleanup(outfile);
+ return 1;
+}
+
+/**
+ * Main function
+ */
+int main(int argc, const char *argv[]) {
+ prog = argv[0];
+
+ if(argc != 3) {
+ usage();
+ return 1;
+ }
+
+ std::string infile = argv[1];
+ std::string outfile = argv[2];
+
+ return convert(infile, outfile);
+}
diff --git a/intl/icu/source/tools/escapesrc/expect-simple.cpp b/intl/icu/source/tools/escapesrc/expect-simple.cpp
new file mode 100644
index 0000000000..a6019a8d40
--- /dev/null
+++ b/intl/icu/source/tools/escapesrc/expect-simple.cpp
@@ -0,0 +1,17 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+u"sa\u0127\u0127a";
+u'\u6587';
+u"\U000219F2";
+u"\u039C\u03C5\u03C3\u03C4\u03AE\u03C1\u03B9\u03BF";
+
+ u"sa\u0127\u0127a";
+ u'\u6587'; u"\U000219F2";
+
+"\x20\xCC\x81";
+"\xCC\x88\x20";
+"\x73\x61\xC4\xA7\xC4\xA7\x61";
+"\xE6\x96\x87";
+"\xF0\xA1\xA7\xB2";
+"\x73\x61\xC4\xA7\xC4\xA7\x61";
diff --git a/intl/icu/source/tools/escapesrc/tblgen.cpp b/intl/icu/source/tools/escapesrc/tblgen.cpp
new file mode 100644
index 0000000000..dce4af6867
--- /dev/null
+++ b/intl/icu/source/tools/escapesrc/tblgen.cpp
@@ -0,0 +1,80 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+#include "unicode/ucnv.h"
+#include "unicode/uniset.h"
+#include <stdio.h>
+
+static const char *kConverter = "ibm-1047";
+
+int main(int argc, const char *argv[]) {
+ printf("// %s\n", U_COPYRIGHT_STRING);
+ printf("// generated by tblgen. You weren't going to edit it by hand, were you?\n");
+ printf("\n");
+
+ UErrorCode status = U_ZERO_ERROR;
+ LocalUConverterPointer cnv(ucnv_open(kConverter, &status));
+
+ if(U_FAILURE(status)) {
+ fprintf(stderr, "Failed to open %s: %s\n", kConverter, u_errorName(status));
+ return 1;
+ }
+
+ printf("static const char cp1047_8859_1[256] = { \n");
+ for(int i=0x00; i<0x100; i++) {
+ char cp1047[1];
+ cp1047[0] = i;
+ char16_t u[1];
+ char16_t *target = u;
+ const char *source = cp1047;
+ ucnv_toUnicode(cnv.getAlias(), &target, u+1, &source, cp1047+1, nullptr, true, &status);
+ if(U_FAILURE(status)) {
+ fprintf(stderr, "Conversion failure at #%X: %s\n", i, u_errorName(status));
+ return 2;
+ }
+ printf(" (char)0x%02X, /* %02X */\n", u[0], i);
+ }
+ printf("};\n\n");
+
+ //
+ // UnicodeSet oldIllegal("[:print:]", status); // [a-zA-Z0-9_}{#)(><%:;.?*+-/^&|~!=,\\u005b\\u005d\\u005c]", status);
+ UnicodeSet oldIllegal("[0-9 a-z A-Z "
+ "_ \\{ \\} \\[ \\] # \\( \\) < > % \\: ; . "
+ "? * + \\- / \\^ \\& | ~ ! = , \\ \" ' ]", status);
+
+ /*
+
+http://www.lirmm.fr/~ducour/Doc-objets/ISO+IEC+14882-1998.pdf ( note: 1998 ) page 10, section 2.2 says:
+
+1 The basic source character set consists of 96 characters: the space character, the control characters repre- 15)
+senting horizontal tab, vertical tab, form feed, and new-line, plus the following 91 graphical characters:
+a b c d e f g h i j k l m n opqrstuvwxyz
+A B C D E F G H I J K L M N OPQRSTUVWXYZ
+0 12 3 4 5 6 7 8 9
+ _ { } [ ] # ( ) < > % : ; . ?*+-/^&|~!=,\"
+2 The universal-character-name construct provides a way to name other characters. hex-quad:
+hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
+universal-character-name: \u hex-quad
+\U hex-quad hex-quad
+The character designated by the universal-character-name \UNNNNNNNN is that character whose character short name in ISO/IEC 10646 is NNNNNNNN; the character designated by the universal-character-name \uNNNN is that character whose character short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value for a universal character name is less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the uni- versal character name designates a character in the basic source character set, then the program is ill- formed.
+
+
+So basically: printable ASCII plus 0x00-0x1F, 0x7F-0x9F, was all illegal.
+
+Some discussion at http://unicode.org/mail-arch/unicode-ml/y2003-m10/0471.html
+
+ */
+
+
+
+ printf("static const bool oldIllegal[256] = { \n");
+ for(char16_t i=0x00; i<0x100;i++) {
+ printf(" %s, /* U+%04X */\n",
+ (oldIllegal.contains(i))?" true":"false",
+ i);
+ }
+ printf("};\n\n");
+
+ return 0;
+}
diff --git a/intl/icu/source/tools/escapesrc/test-nochange.cpp b/intl/icu/source/tools/escapesrc/test-nochange.cpp
new file mode 100644
index 0000000000..8c0d04b809
--- /dev/null
+++ b/intl/icu/source/tools/escapesrc/test-nochange.cpp
@@ -0,0 +1,5 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// This is a source file with no changes needed in it.
+// In fact, the only non-ASCII character is the comment line at top.
diff --git a/intl/icu/source/tools/escapesrc/test-simple.cpp b/intl/icu/source/tools/escapesrc/test-simple.cpp
new file mode 100644
index 0000000000..b03f28f706
--- /dev/null
+++ b/intl/icu/source/tools/escapesrc/test-simple.cpp
@@ -0,0 +1,17 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+u"saħħa";
+u'文';
+u"𡧲";
+u"Μυστήριο";
+
+ u"saħħa";
+ u'文'; u"𡧲";
+
+u8" \u0301";
+u8"\u0308 ";
+u8"saħħa";
+u8"文";
+u8"𡧲";
+u8"saħ\u0127a";