Adding upstream version 115.8.0esr.upstream/115.8.0esr

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 01:47:29 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 01:47:29 +0000
commit: 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
tree: a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /intl/icu/source/tools/gensprep
parent: Initial commit. (diff)
download: firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz
firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip
9 files changed, 2191 insertions, 0 deletions
diff --git a/intl/icu/source/tools/gensprep/Makefile.in b/intl/icu/source/tools/gensprep/Makefile.in
new file mode 100644
index 0000000000..7f475aeb56
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/Makefile.in
@@ -0,0 +1,97 @@
+## Makefile.in for ICU - tools/gensprep
+## Copyright (C) 2016 and later: Unicode, Inc. and others.
+## License & terms of use: http://www.unicode.org/copyright.html
+## Copyright (c) 2001-2011, International Business Machines Corporation and
+## others. All Rights Reserved.
+## Steven R. Loomis/Markus W. Scherer
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = tools/gensprep
+
+TARGET_STUB_NAME = gensprep
+
+SECTION = 8
+
+MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
+
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS) $(MAN_FILES)
+
+## Target information
+TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
+
+CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
+LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
+
+SOURCES = $(shell cat $(srcdir)/sources.txt)
+OBJECTS = $(SOURCES:.c=.o)
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local	\
+distclean distclean-local dist dist-local check check-local install-man
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET) $(MAN_FILES)
+
+install-local: all-local install-man
+	$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
+	$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
+
+install-man: $(MAN_FILES)
+	$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
+	$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
+
+dist-local:
+
+clean-local:
+	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+	$(RMV) $(TARGET) $(OBJECTS)
+
+distclean-local: clean-local
+	$(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+	$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS) 
+	$(POST_BUILD_STEP)
+
+
+%.$(SECTION): $(srcdir)/%.$(SECTION).in
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+
diff --git a/intl/icu/source/tools/gensprep/filterRFC3454.pl b/intl/icu/source/tools/gensprep/filterRFC3454.pl
new file mode 100755
index 0000000000..321b03512c
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/filterRFC3454.pl
@@ -0,0 +1,678 @@
+#!/usr/bin/perl
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Copyright (c) 2001-2015 International Business Machines
+# Corporation and others. All Rights Reserved.
+
+####################################################################################
+# filterRFC3454.pl:
+# This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
+# to be used in NamePrepProfile
+#
+# Author: Ram Viswanadha
+#        
+####################################################################################
+
+use File::Find;
+use File::Basename;
+use IO::File;
+use Cwd;
+use File::Copy;
+use Getopt::Long;
+use File::Path;
+use File::Copy;
+use Time::localtime;
+
+$icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n";
+$copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002).  All Rights Reserved. \n###################\n\n";
+$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n";
+#run the program)
+main();
+
+#---------------------------------------------------------------------
+# The main program
+
+sub main(){
+  GetOptions(
+           "--sourcedir=s" => \$sourceDir,
+           "--destdir=s" => \$destDir,
+           "--src-filename=s" => \$srcFileName,
+           "--dest-filename=s" => \$destFileName,
+           "--A1"  => \$a1,
+           "--B1"  => \$b1,
+           "--B2"  => \$b2,
+           "--B3"  => \$b3,
+           "--C11" => \$c11,
+           "--C12" => \$c12,
+           "--C21" => \$c21,
+           "--C22" => \$c22,
+           "--C3"  => \$c3,
+           "--C4"  => \$c4,
+           "--C5"  => \$c5,
+           "--C6"  => \$c6,
+           "--C7"  => \$c7,
+           "--C8"  => \$c8,
+           "--C9"  => \$c9,
+           "--iscsi" => \$writeISCSIProhibitedExtra,
+           "--xmpp-node" => \$writeXMPPNodeProhibitedExtra,
+           "--sasl" => \$writeSASLMap,
+           "--ldap" => \$writeLDAPMap,
+           "--normalize" => \$norm,
+           "--check-bidi" => \$checkBidi,
+           );
+  usage() unless defined $sourceDir;
+  usage() unless defined $destDir;
+  usage() unless defined $srcFileName;
+  usage() unless defined $destFileName;
+
+  $infile = $sourceDir."/".$srcFileName;
+  $inFH = IO::File->new($infile,"r")
+            or die  "could not open the file $infile for reading: $! \n";
+  $outfile = $destDir."/".$destFileName;
+
+  unlink($outfile);
+  $outFH = IO::File->new($outfile,"a")
+            or die  "could not open the file $outfile for writing: $! \n";
+
+  printf $outFH  $icu_copyright, localtime->year()+1900;
+  print $outFH  $copyright;
+  print $outFH  $warning;
+
+  if(defined $norm) {
+      print $outFH "\@normalize;;\n";
+  }
+  if(defined $checkBidi) {
+      print $outFH "\@check-bidi;;\n";
+  }
+  print $outFH "\n";
+  close($outFH);
+
+  if(defined $b2 && defined $b3){
+      die "ERROR: --B2 and --B3 are both specified\!\n";
+  }
+
+  while(defined ($line=<$inFH>)){
+      next unless $line=~ /Start\sTable/;
+      if($line =~ /A.1/){
+            createUnassignedTable($inFH,$outfile);
+      }
+      if($line =~ /B.1/ && defined $b1){
+            createMapToNothing($inFH,$outfile);
+      }
+      if($line =~ /B.2/ && defined $b2){
+            createCaseMapNorm($inFH,$outfile);
+      }
+      if($line =~ /B.3/ && defined $b3){
+            createCaseMapNoNorm($inFH,$outfile);
+      }
+      if($line =~ /C.1.1/ && defined $c11 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.1.2/ && defined $c12 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.2.1/ && defined $c21 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.2.2/ && defined $c22 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.3/ && defined $c3 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.4/ && defined $c4 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.5/ && defined $c5 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.6/ && defined $c6 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.7/ && defined $c7 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.8/ && defined $c8 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+      if($line =~ /C.9/ && defined $c9 ){
+            createProhibitedTable($inFH,$outfile,$line);
+      }
+  }
+  if( defined $writeISCSIProhibitedExtra){
+      create_iSCSIExtraProhibitedTable($inFH, $outfile);
+  }
+  if( defined $writeXMPPNodeProhibitedExtra){
+      create_XMPPNodeExtraProhibitedTable($inFH, $outfile);
+  }
+  if( defined $writeSASLMap){
+      create_SASLMapTable($inFH, $outfile);
+  }
+  if( defined $writeLDAPMap){
+      create_LDAPMapTable($inFH, $outfile);
+  }
+  close($inFH);
+}
+
+#-----------------------------------------------------------------------
+sub readPrint{
+    local ($inFH, $outFH,$comment, $table) = @_;
+    $count = 0;
+    print $outFH $comment."\n";
+    while(defined ($line = <$inFH>)){
+        next if $line =~ /Hoffman\s\&\sBlanchet/;  # ignore heading
+        next if $line =~ /RFC\s3454/; # ignore heading
+        next if $line =~ /\f/;  # ignore form feed
+        next if $line eq "\n";  # ignore blank lines
+        # break if "End Table" is found
+        if( $line =~ /End\sTable/){
+            print $outFH "\n# Total code points $count\n\n";
+            return;
+        }
+        if($print==1){
+            print $line;
+        }
+        $line =~ s/-/../;
+        $line =~ s/^\s+//;
+        if($line =~ /\;/){
+        }else{
+            $line =~ s/$/;/;
+        }
+        if($table =~ /A/ ){
+            ($code, $noise) = split /;/ , $line;
+            $line = $code."; ; UNASSIGNED\n";
+        }elsif ( $table =~ /B\.1/ ){
+            $line =~ s/Map to nothing/MAP/;
+        }elsif ( $table =~ /B\.[23]/ ){
+            $line =~ s/Case map/MAP/;
+            $line =~ s/Additional folding/MAP/;
+        }elsif ( $table =~ /C/ ) {
+            ($code, $noise) = split /;/ , $line;   
+            $line = $code."; ; PROHIBITED\n";
+        }
+        if($line =~ /\.\./){
+            ($code, $noise) = split /;/ , $line;
+            ($startStr, $endStr ) = split /\.\./, $code;
+            $start = atoi($startStr);
+            $end   = atoi($endStr);
+            #print $start."     ".$end."\n";
+            while($start <= $end){
+                $count++;
+                $start++;
+            }
+        }else{
+              $count++;
+        }
+        print $outFH $line;
+    }
+}
+#-----------------------------------------------------------------------
+sub atoi {
+    my $t;
+    foreach my $d (split(//, shift())) {
+        $t = $t * 16 + $d;
+    }
+    return $t;
+}
+#-----------------------------------------------------------------------
+sub createUnassignedTable{
+    ($inFH,$outfile) = @_;
+    $outFH = IO::File->new($outfile,"a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    $comment = "# This table contains code points from Table A.1 from RFC 3454\n";
+    readPrint($inFH,$outFH, $comment, "A");
+    close($outFH);
+}
+#-----------------------------------------------------------------------
+sub createMapToNothing{
+    ($inFH,$outfile) = @_;
+    $outFH = IO::File->new($outfile,"a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    $comment = "# This table contains code points from Table B.1 from RFC 3454\n";
+    readPrint($inFH,$outFH,$comment, "B.1");
+    close($outFH);
+}
+#-----------------------------------------------------------------------
+sub createCaseMapNorm{
+    ($inFH,$outfile) = @_;
+    $outFH = IO::File->new($outfile,"a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
+    readPrint($inFH,$outFH,$comment, "B.2");
+    close($outFH);
+}
+#-----------------------------------------------------------------------
+sub createCaseMapNoNorm{
+    ($inFH,$outfile) = @_;
+    $outFH = IO::File->new($outfile,"a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n";
+    readPrint($inFH,$outFH,$comment, "B.3");
+    close($outFH);
+}
+#-----------------------------------------------------------------------
+sub createProhibitedTable{
+    ($inFH,$outfile,$line) = @_;
+    $line =~ s/Start//;
+    $line =~ s/-//g;
+    $comment = "# code points from $line";
+
+    $outFH = IO::File->new($outfile, "a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    readPrint($inFH,$outFH,$comment, "C");
+    close($outFH);
+}
+
+#-----------------------------------------------------------------------
+sub create_iSCSIExtraProhibitedTable{
+    ($inFH,$outfile,$line) = @_;
+    $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n";
+
+    $outFH = IO::File->new($outfile, "a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    print $outFH $comment;
+    print $outFH "0021..002C; ; PROHIBITED\n";
+    print $outFH "002F; ; PROHIBITED\n";
+    print $outFH "003B..0040; ; PROHIBITED\n";
+    print $outFH "005B..0060; ; PROHIBITED\n";
+    print $outFH "007B..007E; ; PROHIBITED\n";
+    print $outFH "3002; ; PROHIBITED\n";
+    print $outFH "\n# Total code points 30\n";
+    close($outFH);
+}
+#-----------------------------------------------------------------------
+sub create_XMPPNodeExtraProhibitedTable{
+    ($inFH,$outfile,$line) = @_;
+    $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n";
+
+    $outFH = IO::File->new($outfile, "a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    print $outFH $comment;
+    print $outFH "0022; ; PROHIBITED\n";
+    print $outFH "0026; ; PROHIBITED\n";
+    print $outFH "0027; ; PROHIBITED\n";
+    print $outFH "002F; ; PROHIBITED\n";
+    print $outFH "003A; ; PROHIBITED\n";
+    print $outFH "003C; ; PROHIBITED\n";
+    print $outFH "003E; ; PROHIBITED\n";
+    print $outFH "0040; ; PROHIBITED\n";
+    print $outFH "\n# Total code points 8\n";
+    close($outFH);
+}
+#-----------------------------------------------------------------------
+sub create_SASLMapTable{
+    ($inFH,$outfile,$line) = @_;
+    $comment ="# Map table for SASL profile (rfc4013.txt)\n\n";
+
+    $outFH = IO::File->new($outfile, "a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    print $outFH $comment;
+    # non-ASCII space characters [C.1.2] to SPACE
+    print $outFH "00A0; 0020; MAP\n";
+    print $outFH "1680; 0020; MAP\n";
+    print $outFH "2000; 0020; MAP\n";
+    print $outFH "2001; 0020; MAP\n";
+    print $outFH "2002; 0020; MAP\n";
+    print $outFH "2003; 0020; MAP\n";
+    print $outFH "2004; 0020; MAP\n";
+    print $outFH "2005; 0020; MAP\n";
+    print $outFH "2006; 0020; MAP\n";
+    print $outFH "2007; 0020; MAP\n";
+    print $outFH "2008; 0020; MAP\n";
+    print $outFH "2009; 0020; MAP\n";
+    print $outFH "200A; 0020; MAP\n";
+    print $outFH "200B; 0020; MAP\n";
+    print $outFH "202F; 0020; MAP\n";
+    print $outFH "205F; 0020; MAP\n";
+    print $outFH "3000; 0020; MAP\n";
+
+    # commonly mapped to nothing characters except U+200B to nothing
+    print $outFH "00AD; ; MAP\n";
+    print $outFH "034F; ; MAP\n";
+    print $outFH "1806; ; MAP\n";
+    print $outFH "180B; ; MAP\n";
+    print $outFH "180C; ; MAP\n";
+    print $outFH "180D; ; MAP\n";
+    print $outFH "200C; ; MAP\n";
+    print $outFH "200D; ; MAP\n";
+    print $outFH "2060; ; MAP\n";
+    print $outFH "FE00; ; MAP\n";
+    print $outFH "FE01; ; MAP\n";
+    print $outFH "FE02; ; MAP\n";
+    print $outFH "FE03; ; MAP\n";
+    print $outFH "FE04; ; MAP\n";
+    print $outFH "FE05; ; MAP\n";
+    print $outFH "FE06; ; MAP\n";
+    print $outFH "FE07; ; MAP\n";
+    print $outFH "FE08; ; MAP\n";
+    print $outFH "FE09; ; MAP\n";
+    print $outFH "FE0A; ; MAP\n";
+    print $outFH "FE0B; ; MAP\n";
+    print $outFH "FE0C; ; MAP\n";
+    print $outFH "FE0D; ; MAP\n";
+    print $outFH "FE0E; ; MAP\n";
+    print $outFH "FE0F; ; MAP\n";
+    print $outFH "FEFF; ; MAP\n";
+    print $outFH "\n# Total code points 43\n";
+    close($outFH);
+}
+#-----------------------------------------------------------------------
+sub create_LDAPMapTable{
+    ($inFH,$outfile,$line) = @_;
+    $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n";
+
+    $outFH = IO::File->new($outfile, "a")
+            or die  "could not open the file $outfile for writing: $! \n";
+    print $outFH $comment;
+
+    #   SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
+    #   points are mapped to nothing.  COMBINING GRAPHEME JOINER (U+034F) and
+    #   VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
+    #   mapped to nothing.  The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
+    #   mapped to nothing.
+
+    print $outFH "00AD; ; MAP\n";
+    print $outFH "034F; ; MAP\n";
+    print $outFH "1806; ; MAP\n";
+    print $outFH "180B; ; MAP\n";
+    print $outFH "180C; ; MAP\n";
+    print $outFH "180D; ; MAP\n";
+    print $outFH "FE00; ; MAP\n";
+    print $outFH "FE01; ; MAP\n";
+    print $outFH "FE02; ; MAP\n";
+    print $outFH "FE03; ; MAP\n";
+    print $outFH "FE04; ; MAP\n";
+    print $outFH "FE05; ; MAP\n";
+    print $outFH "FE06; ; MAP\n";
+    print $outFH "FE07; ; MAP\n";
+    print $outFH "FE08; ; MAP\n";
+    print $outFH "FE09; ; MAP\n";
+    print $outFH "FE0A; ; MAP\n";
+    print $outFH "FE0B; ; MAP\n";
+    print $outFH "FE0C; ; MAP\n";
+    print $outFH "FE0D; ; MAP\n";
+    print $outFH "FE0E; ; MAP\n";
+    print $outFH "FE0F; ; MAP\n";
+    print $outFH "FFFC; ; MAP\n";
+
+#   CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
+#   TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
+#   (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
+
+    print $outFH "0009; 0020; MAP\n";
+    print $outFH "000A; 0020; MAP\n";
+    print $outFH "000B; 0020; MAP\n";
+    print $outFH "000C; 0020; MAP\n";
+    print $outFH "000D; 0020; MAP\n";
+    print $outFH "0085; 0020; MAP\n";
+
+    #   All other control code (e.g., Cc) points or code points with a
+    #   control function (e.g., Cf) are mapped to nothing.  The following is
+    #   a complete list of these code points: U+0000-0008, 000E-001F, 007F-
+    #   0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
+    #   206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
+
+    print $outFH "0000; ; MAP\n";
+    print $outFH "0001; ; MAP\n";
+    print $outFH "0002; ; MAP\n";
+    print $outFH "0003; ; MAP\n";
+    print $outFH "0004; ; MAP\n";
+    print $outFH "0005; ; MAP\n";
+    print $outFH "0006; ; MAP\n";
+    print $outFH "0007; ; MAP\n";
+    print $outFH "0008; ; MAP\n";
+    print $outFH "000E; ; MAP\n";
+    print $outFH "000F; ; MAP\n";
+    print $outFH "0010; ; MAP\n";
+    print $outFH "0011; ; MAP\n";
+    print $outFH "0012; ; MAP\n";
+    print $outFH "0013; ; MAP\n";
+    print $outFH "0014; ; MAP\n";
+    print $outFH "0015; ; MAP\n";
+    print $outFH "0016; ; MAP\n";
+    print $outFH "0017; ; MAP\n";
+    print $outFH "0018; ; MAP\n";
+    print $outFH "0019; ; MAP\n";
+    print $outFH "001A; ; MAP\n";
+    print $outFH "001B; ; MAP\n";
+    print $outFH "001C; ; MAP\n";
+    print $outFH "001D; ; MAP\n";
+    print $outFH "001E; ; MAP\n";
+    print $outFH "001F; ; MAP\n";
+    print $outFH "007F; ; MAP\n";
+    print $outFH "0080; ; MAP\n";
+    print $outFH "0081; ; MAP\n";
+    print $outFH "0082; ; MAP\n";
+    print $outFH "0083; ; MAP\n";
+    print $outFH "0084; ; MAP\n";
+    print $outFH "0086; ; MAP\n";
+    print $outFH "0087; ; MAP\n";
+    print $outFH "0088; ; MAP\n";
+    print $outFH "0089; ; MAP\n";
+    print $outFH "008A; ; MAP\n";
+    print $outFH "008B; ; MAP\n";
+    print $outFH "008C; ; MAP\n";
+    print $outFH "008D; ; MAP\n";
+    print $outFH "008E; ; MAP\n";
+    print $outFH "008F; ; MAP\n";
+    print $outFH "0090; ; MAP\n";
+    print $outFH "0091; ; MAP\n";
+    print $outFH "0092; ; MAP\n";
+    print $outFH "0093; ; MAP\n";
+    print $outFH "0094; ; MAP\n";
+    print $outFH "0095; ; MAP\n";
+    print $outFH "0096; ; MAP\n";
+    print $outFH "0097; ; MAP\n";
+    print $outFH "0098; ; MAP\n";
+    print $outFH "0099; ; MAP\n";
+    print $outFH "009A; ; MAP\n";
+    print $outFH "009B; ; MAP\n";
+    print $outFH "009C; ; MAP\n";
+    print $outFH "009D; ; MAP\n";
+    print $outFH "009E; ; MAP\n";
+    print $outFH "009F; ; MAP\n";
+    print $outFH "06DD; ; MAP\n";
+    print $outFH "070F; ; MAP\n";
+    print $outFH "180E; ; MAP\n";
+    print $outFH "200C; ; MAP\n";
+    print $outFH "200D; ; MAP\n";
+    print $outFH "200E; ; MAP\n";
+    print $outFH "200F; ; MAP\n";
+    print $outFH "202A; ; MAP\n";
+    print $outFH "202B; ; MAP\n";
+    print $outFH "202C; ; MAP\n";
+    print $outFH "202D; ; MAP\n";
+    print $outFH "202E; ; MAP\n";
+    print $outFH "2060; ; MAP\n";
+    print $outFH "2061; ; MAP\n";
+    print $outFH "2062; ; MAP\n";
+    print $outFH "2063; ; MAP\n";
+    print $outFH "206A; ; MAP\n";
+    print $outFH "206B; ; MAP\n";
+    print $outFH "206C; ; MAP\n";
+    print $outFH "206D; ; MAP\n";
+    print $outFH "206E; ; MAP\n";
+    print $outFH "206F; ; MAP\n";
+    print $outFH "FEFF; ; MAP\n";
+    print $outFH "FFF9; ; MAP\n";
+    print $outFH "FFFA; ; MAP\n";
+    print $outFH "FFFB; ; MAP\n";
+    print $outFH "1D173; ; MAP\n";
+    print $outFH "1D174; ; MAP\n";
+    print $outFH "1D175; ; MAP\n";
+    print $outFH "1D176; ; MAP\n";
+    print $outFH "1D177; ; MAP\n";
+    print $outFH "1D178; ; MAP\n";
+    print $outFH "1D179; ; MAP\n";
+    print $outFH "1D17A; ; MAP\n";
+    print $outFH "E0001; ; MAP\n";
+    print $outFH "E0020; ; MAP\n";
+    print $outFH "E0021; ; MAP\n";
+    print $outFH "E0022; ; MAP\n";
+    print $outFH "E0023; ; MAP\n";
+    print $outFH "E0024; ; MAP\n";
+    print $outFH "E0025; ; MAP\n";
+    print $outFH "E0026; ; MAP\n";
+    print $outFH "E0027; ; MAP\n";
+    print $outFH "E0028; ; MAP\n";
+    print $outFH "E0029; ; MAP\n";
+    print $outFH "E002A; ; MAP\n";
+    print $outFH "E002B; ; MAP\n";
+    print $outFH "E002C; ; MAP\n";
+    print $outFH "E002D; ; MAP\n";
+    print $outFH "E002E; ; MAP\n";
+    print $outFH "E002F; ; MAP\n";
+    print $outFH "E0030; ; MAP\n";
+    print $outFH "E0031; ; MAP\n";
+    print $outFH "E0032; ; MAP\n";
+    print $outFH "E0033; ; MAP\n";
+    print $outFH "E0034; ; MAP\n";
+    print $outFH "E0035; ; MAP\n";
+    print $outFH "E0036; ; MAP\n";
+    print $outFH "E0037; ; MAP\n";
+    print $outFH "E0038; ; MAP\n";
+    print $outFH "E0039; ; MAP\n";
+    print $outFH "E003A; ; MAP\n";
+    print $outFH "E003B; ; MAP\n";
+    print $outFH "E003C; ; MAP\n";
+    print $outFH "E003D; ; MAP\n";
+    print $outFH "E003E; ; MAP\n";
+    print $outFH "E003F; ; MAP\n";
+    print $outFH "E0040; ; MAP\n";
+    print $outFH "E0041; ; MAP\n";
+    print $outFH "E0042; ; MAP\n";
+    print $outFH "E0043; ; MAP\n";
+    print $outFH "E0044; ; MAP\n";
+    print $outFH "E0045; ; MAP\n";
+    print $outFH "E0046; ; MAP\n";
+    print $outFH "E0047; ; MAP\n";
+    print $outFH "E0048; ; MAP\n";
+    print $outFH "E0049; ; MAP\n";
+    print $outFH "E004A; ; MAP\n";
+    print $outFH "E004B; ; MAP\n";
+    print $outFH "E004C; ; MAP\n";
+    print $outFH "E004D; ; MAP\n";
+    print $outFH "E004E; ; MAP\n";
+    print $outFH "E004F; ; MAP\n";
+    print $outFH "E0050; ; MAP\n";
+    print $outFH "E0051; ; MAP\n";
+    print $outFH "E0052; ; MAP\n";
+    print $outFH "E0053; ; MAP\n";
+    print $outFH "E0054; ; MAP\n";
+    print $outFH "E0055; ; MAP\n";
+    print $outFH "E0056; ; MAP\n";
+    print $outFH "E0057; ; MAP\n";
+    print $outFH "E0058; ; MAP\n";
+    print $outFH "E0059; ; MAP\n";
+    print $outFH "E005A; ; MAP\n";
+    print $outFH "E005B; ; MAP\n";
+    print $outFH "E005C; ; MAP\n";
+    print $outFH "E005D; ; MAP\n";
+    print $outFH "E005E; ; MAP\n";
+    print $outFH "E005F; ; MAP\n";
+    print $outFH "E0060; ; MAP\n";
+    print $outFH "E0061; ; MAP\n";
+    print $outFH "E0062; ; MAP\n";
+    print $outFH "E0063; ; MAP\n";
+    print $outFH "E0064; ; MAP\n";
+    print $outFH "E0065; ; MAP\n";
+    print $outFH "E0066; ; MAP\n";
+    print $outFH "E0067; ; MAP\n";
+    print $outFH "E0068; ; MAP\n";
+    print $outFH "E0069; ; MAP\n";
+    print $outFH "E006A; ; MAP\n";
+    print $outFH "E006B; ; MAP\n";
+    print $outFH "E006C; ; MAP\n";
+    print $outFH "E006D; ; MAP\n";
+    print $outFH "E006E; ; MAP\n";
+    print $outFH "E006F; ; MAP\n";
+    print $outFH "E0070; ; MAP\n";
+    print $outFH "E0071; ; MAP\n";
+    print $outFH "E0072; ; MAP\n";
+    print $outFH "E0073; ; MAP\n";
+    print $outFH "E0074; ; MAP\n";
+    print $outFH "E0075; ; MAP\n";
+    print $outFH "E0076; ; MAP\n";
+    print $outFH "E0077; ; MAP\n";
+    print $outFH "E0078; ; MAP\n";
+    print $outFH "E0079; ; MAP\n";
+    print $outFH "E007A; ; MAP\n";
+    print $outFH "E007B; ; MAP\n";
+    print $outFH "E007C; ; MAP\n";
+    print $outFH "E007D; ; MAP\n";
+    print $outFH "E007E; ; MAP\n";
+    print $outFH "E007F; ; MAP\n";
+
+    #   ZERO WIDTH SPACE (U+200B) is mapped to nothing.  All other code
+    #   points with Separator (space, line, or paragraph) property (e.g., Zs,
+    #   Zl, or Zp) are mapped to SPACE (U+0020).  The following is a complete
+    #   list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
+    #   202F, 205F, 3000.
+
+    print $outFH "200B; ; MAP\n";
+    print $outFH "00A0; 0020; MAP\n";
+    print $outFH "1680; 0020; MAP\n";
+    print $outFH "2000; 0020; MAP\n";
+    print $outFH "2001; 0020; MAP\n";
+    print $outFH "2002; 0020; MAP\n";
+    print $outFH "2003; 0020; MAP\n";
+    print $outFH "2004; 0020; MAP\n";
+    print $outFH "2005; 0020; MAP\n";
+    print $outFH "2006; 0020; MAP\n";
+    print $outFH "2007; 0020; MAP\n";
+    print $outFH "2008; 0020; MAP\n";
+    print $outFH "2009; 0020; MAP\n";
+    print $outFH "200A; 0020; MAP\n";
+    print $outFH "2028; 0020; MAP\n";
+    print $outFH "2029; 0020; MAP\n";
+    print $outFH "202F; 0020; MAP\n";
+    print $outFH "205F; 0020; MAP\n";
+    print $outFH "3000; 0020; MAP\n";
+
+    print $outFH "\n# Total code points 238\n";
+    close($outFH);
+}
+#-----------------------------------------------------------------------
+sub usage {
+    print << "END";
+Usage:
+filterRFC3454.pl
+Options:
+        --sourcedir=<directory>
+        --destdir=<directory>
+        --src-filename=<name of RFC file>
+        --dest-filename=<name of destination file>
+        --A1             Generate data for table A.1
+        --B1             Generate data for table B.1
+        --B2             Generate data for table B.2
+        --B3             Generate data for table B.3
+        --C11            Generate data for table C.1.1
+        --C12            Generate data for table C.1.2
+        --C21            Generate data for table C.2.1
+        --C22            Generate data for table C.2.2
+        --C3             Generate data for table C.3
+        --C4             Generate data for table C.4
+        --C5             Generate data for table C.5
+        --C6             Generate data for table C.6
+        --C7             Generate data for table C.7
+        --C8             Generate data for table C.8
+        --C9             Generate data for table C.9
+        --iscsi          Generate data for iSCSI extra prohibited table
+        --xmpp-node      Generate data for XMPP extra prohibited table
+        --sasl           Generate data for SASL map table
+        --ldap           Generate data for LDAP map table
+        --normalize      Embed the normalization directive in the output file
+        --check-bidi     Embed the check bidi directove in the output file
+
+Note, --B2 and --B3 are mutually exclusive.
+
+e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt  --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --normalize --check-bidi
+
+filterRFC3454.pl filters the RFC file and creates String prep table files.
+The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
+
+END
+  exit(0);
+}
+
+
diff --git a/intl/icu/source/tools/gensprep/gensprep.8.in b/intl/icu/source/tools/gensprep/gensprep.8.in
new file mode 100644
index 0000000000..e1e9fb32e2
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/gensprep.8.in
@@ -0,0 +1,104 @@
+.\" Hey, Emacs! This is -*-nroff-*- you know...
+.\"
+.\" gensprep.8: manual page for the gensprep utility
+.\"
+.\" Copyright (C) 2016 and later: Unicode, Inc. and others.
+.\" License & terms of use: http://www.unicode.org/copyright.html
+.\" Copyright (C) 2003 IBM, Inc. and others.
+.\"
+.TH gensprep 8 "18 March 2003" "ICU MANPAGE" "ICU @VERSION@ Manual"
+.SH NAME
+.B gensprep
+\- compile StringPrep data from files filtered by filterRFC3454.pl 
+.SH SYNOPSIS
+.B gensprep
+[
+.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
+]
+[
+.BR "\-v\fP, \fB\-\-verbose"
+]
+[
+.BI "\-c\fP, \fB\-\-copyright"
+]
+[
+.BI "\-s\fP, \fB\-\-sourcedir" " source"
+]
+[
+.BI "\-d\fP, \fB\-\-destdir" " destination"
+]
+.SH DESCRIPTION
+.B gensprep
+reads filtered RFC 3454 files and compiles their
+information into a binary form.
+The resulting file,
+.BR <name>.icu ,
+can then be read directly by ICU, or used by
+.BR pkgdata (8)
+for incorporation into a larger archive or library.
+.LP
+The files read by
+.B gensprep
+are described in the
+.B FILES
+section. 
+.SH OPTIONS
+.TP
+.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
+Print help about usage and exit.
+.TP
+.BR "\-v\fP, \fB\-\-verbose"
+Display extra informative messages during execution.
+.TP
+.BI "\-c\fP, \fB\-\-copyright"
+Include a copyright notice into the binary data.
+.TP
+.BI "\-s\fP, \fB\-\-sourcedir" " source"
+Set the source directory to
+.IR source .
+The default source directory is specified by the environment variable
+.BR ICU_DATA .
+.TP
+.BI "\-d\fP, \fB\-\-destdir" " destination"
+Set the destination directory to
+.IR destination .
+The default destination directory is specified by the environment variable
+.BR ICU_DATA .
+.SH ENVIRONMENT
+.TP 10
+.B ICU_DATA
+Specifies the directory containing ICU data. Defaults to
+.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
+Some tools in ICU depend on the presence of the trailing slash. It is thus
+important to make sure that it is present if
+.B ICU_DATA
+is set.
+.SH FILES
+The following files are read by
+.B gensprep
+and are looked for in the
+.I source
+/misc for rfc3454_*.txt files and in 
+.I source
+/unidata for NormalizationCorrections.txt.
+.TP 20
+.B rfc3453_A_1.txt 
+Contains the list of unassigned codepoints in Unicode version 3.2.0.\|.\|.. 
+.TP
+.B rfc3454_B_1.txt
+Contains the list of code points that are commonly mapped to nothing.\|.\|..
+.TP
+.B rfc3454_B_2.txt
+Contains the list of mappings for casefolding of  code points when Normalization form NFKC is specified.\|.\|..
+.TP
+.B rfc3454_C_X.txt
+Contains the list of code points that are prohibited for IDNA.
+.TP
+.B NormalizationCorrections.txt
+Contains the list of code points whose normalization has changed since Unicode Version 3.2.0. 
+.SH VERSION
+@VERSION@
+.SH COPYRIGHT
+Copyright (C) 2000-2002 IBM, Inc. and others.
+.SH SEE ALSO
+.BR pkgdata (8)
diff --git a/intl/icu/source/tools/gensprep/gensprep.c b/intl/icu/source/tools/gensprep/gensprep.c
new file mode 100644
index 0000000000..10b0e45390
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/gensprep.c
@@ -0,0 +1,460 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  gensprep.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003-02-06
+*   created by: Ram Viswanadha
+*
+*   This program reads the Profile.txt files,
+*   parses them, and extracts the data for StringPrep profile.
+*   It then preprocesses it and writes a binary file for efficient use
+*   in various StringPrep conversion processes.
+*/
+
+#define USPREP_TYPE_NAMES_ARRAY 1
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cmemory.h"
+#include "cstring.h"
+#include "toolutil.h"
+#include "unewdata.h"
+#include "uoptions.h"
+#include "uparse.h"
+#include "sprpimpl.h"
+
+#include "unicode/uclean.h"
+#include "unicode/udata.h"
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+
+
+U_CDECL_BEGIN
+#include "gensprep.h"
+U_CDECL_END
+
+UBool beVerbose=false, haveCopyright=true;
+
+#define NORM_CORRECTIONS_FILE_NAME "NormalizationCorrections.txt"
+
+#define NORMALIZE_DIRECTIVE "normalize"
+#define NORMALIZE_DIRECTIVE_LEN 9
+#define CHECK_BIDI_DIRECTIVE "check-bidi"
+#define CHECK_BIDI_DIRECTIVE_LEN 10
+
+/* prototypes --------------------------------------------------------------- */
+
+static void
+parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode);
+
+static void
+parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode);
+
+
+/* -------------------------------------------------------------------------- */
+
+static UOption options[]={
+    UOPTION_HELP_H,
+    UOPTION_HELP_QUESTION_MARK,
+    UOPTION_VERBOSE,
+    UOPTION_COPYRIGHT,
+    UOPTION_DESTDIR,
+    UOPTION_SOURCEDIR,
+    UOPTION_ICUDATADIR,
+    UOPTION_BUNDLE_NAME,
+    { "normalization", NULL, NULL, NULL, 'n', UOPT_REQUIRES_ARG, 0 },
+    { "norm-correction", NULL, NULL, NULL, 'm', UOPT_REQUIRES_ARG, 0 },
+    { "check-bidi", NULL, NULL, NULL,  'k', UOPT_NO_ARG, 0},
+    { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
+};
+
+enum{
+    HELP,
+    HELP_QUESTION_MARK,
+    VERBOSE,
+    COPYRIGHT,
+    DESTDIR,
+    SOURCEDIR,
+    ICUDATADIR,
+    BUNDLE_NAME,
+    NORMALIZE,
+    NORM_CORRECTION_DIR,
+    CHECK_BIDI,
+    UNICODE_VERSION
+};
+
+static int printHelp(int argc, char* argv[]){
+    /*
+     * Broken into chucks because the C89 standard says the minimum
+     * required supported string length is 509 bytes.
+     */
+    fprintf(stderr,
+        "Usage: %s [-options] [file_name]\n"
+        "\n"
+        "Read the files specified and\n"
+        "create a binary file [package-name]_[bundle-name]." DATA_TYPE " with the StringPrep profile data\n"
+        "\n",
+        argv[0]);
+    fprintf(stderr,
+        "Options:\n"
+        "\t-h or -? or --help       print this usage text\n"
+        "\t-v or --verbose          verbose output\n"
+        "\t-c or --copyright        include a copyright notice\n");
+    fprintf(stderr,
+        "\t-d or --destdir          destination directory, followed by the path\n"
+        "\t-s or --sourcedir        source directory of ICU data, followed by the path\n"
+        "\t-b or --bundle-name      generate the output data file with the name specified\n"
+        "\t-i or --icudatadir       directory for locating any needed intermediate data files,\n"
+        "\t                         followed by path, defaults to %s\n",
+        u_getDataDirectory());
+    fprintf(stderr,
+        "\t-n or --normalize        turn on the option for normalization and include mappings\n"
+        "\t                         from NormalizationCorrections.txt from the given path,\n"
+        "\t                         e.g: /test/icu/source/data/unidata\n");
+    fprintf(stderr,
+        "\t-m or --norm-correction  use NormalizationCorrections.txt from the given path\n"
+        "\t                         when the input file contains a normalization directive.\n"
+        "\t                         unlike -n/--normalize, this option does not force the\n"
+        "\t                         normalization.\n");
+    fprintf(stderr,
+        "\t-k or --check-bidi       turn on the option for checking for BiDi in the profile\n"
+        "\t-u or --unicode          version of Unicode to be used with this profile followed by the version\n"
+        );
+    return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+}
+
+
+extern int
+main(int argc, char* argv[]) {
+#if !UCONFIG_NO_IDNA
+    char* filename = NULL;
+#endif
+    const char *srcDir=NULL, *destDir=NULL, *icuUniDataDir=NULL;
+    const char *bundleName=NULL, *inputFileName = NULL;
+    char *basename=NULL;
+    int32_t sprepOptions = 0;
+
+    UErrorCode errorCode=U_ZERO_ERROR;
+
+    U_MAIN_INIT_ARGS(argc, argv);
+
+    /* preset then read command line options */
+    options[DESTDIR].value=u_getDataDirectory();
+    options[SOURCEDIR].value="";
+    options[UNICODE_VERSION].value="0"; /* don't assume the unicode version */
+    options[BUNDLE_NAME].value = DATA_NAME;
+    options[NORMALIZE].value = "";
+
+    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
+
+    /* error handling, printing usage message */
+    if(argc<0) {
+        fprintf(stderr,
+            "error in command line argument \"%s\"\n",
+            argv[-argc]);
+    }
+    if(argc<0 || options[HELP].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
+        return printHelp(argc, argv);
+        
+    }
+
+    /* get the options values */
+    beVerbose=options[VERBOSE].doesOccur;
+    haveCopyright=options[COPYRIGHT].doesOccur;
+    srcDir=options[SOURCEDIR].value;
+    destDir=options[DESTDIR].value;
+    bundleName = options[BUNDLE_NAME].value;
+    if(options[NORMALIZE].doesOccur) {
+        icuUniDataDir = options[NORMALIZE].value;
+    } else {
+        icuUniDataDir = options[NORM_CORRECTION_DIR].value;
+    }
+
+    if(argc<2) {
+        /* print the help message */
+        return printHelp(argc, argv);
+    } else {
+        inputFileName = argv[1];
+    }
+    if(!options[UNICODE_VERSION].doesOccur){
+        return printHelp(argc, argv);
+    }
+    if(options[ICUDATADIR].doesOccur) {
+        u_setDataDirectory(options[ICUDATADIR].value);
+    }
+#if UCONFIG_NO_IDNA
+
+    fprintf(stderr,
+        "gensprep writes dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
+        " because UCONFIG_NO_IDNA is set, \n"
+        "see icu/source/common/unicode/uconfig.h\n");
+    generateData(destDir, bundleName);
+
+#else
+
+    setUnicodeVersion(options[UNICODE_VERSION].value);
+    filename = (char* ) uprv_malloc(uprv_strlen(srcDir) + uprv_strlen(inputFileName) + (icuUniDataDir == NULL ? 0 : uprv_strlen(icuUniDataDir)) + 40); /* hopefully this should be enough */
+   
+    /* prepare the filename beginning with the source dir */
+    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL && uprv_strchr(srcDir,U_FILE_ALT_SEP_CHAR) == NULL){
+        filename[0] = '.';
+        filename[1] = U_FILE_SEP_CHAR;
+        uprv_strcpy(filename+2,srcDir);
+    }else{
+        uprv_strcpy(filename, srcDir);
+    }
+    
+    basename=filename+uprv_strlen(filename);
+    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
+        *basename++=U_FILE_SEP_CHAR;
+    }
+    
+    /* initialize */
+    init();
+
+    /* process the file */
+    uprv_strcpy(basename,inputFileName);
+    parseMappings(filename,false, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "Could not open file %s for reading. Error: %s \n", filename, u_errorName(errorCode));
+        return errorCode;
+    }
+    
+    if(options[NORMALIZE].doesOccur){ /* this option might be set by @normalize;; in the source file */
+        /* set up directory for NormalizationCorrections.txt */
+        uprv_strcpy(filename,icuUniDataDir);
+        basename=filename+uprv_strlen(filename);
+        if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
+            *basename++=U_FILE_SEP_CHAR;
+        }
+
+        *basename++=U_FILE_SEP_CHAR;
+        uprv_strcpy(basename,NORM_CORRECTIONS_FILE_NAME);
+    
+        parseNormalizationCorrections(filename,&errorCode);
+        if(U_FAILURE(errorCode)){
+            fprintf(stderr,"Could not open file %s for reading \n", filename);
+            return errorCode;
+        }
+        sprepOptions |= _SPREP_NORMALIZATION_ON;
+    }
+    
+    if(options[CHECK_BIDI].doesOccur){ /* this option might be set by @check-bidi;; in the source file */
+        sprepOptions |= _SPREP_CHECK_BIDI_ON;
+    }
+
+    setOptions(sprepOptions);
+
+    /* process parsed data */
+    if(U_SUCCESS(errorCode)) {
+        /* write the data file */
+        generateData(destDir, bundleName);
+
+        cleanUpData();
+    }
+
+    uprv_free(filename);
+
+    u_cleanup();
+
+#endif
+
+    return errorCode;
+}
+
+#if !UCONFIG_NO_IDNA
+
+static void U_CALLCONV
+normalizationCorrectionsLineFn(void *context,
+                    char *fields[][2], int32_t fieldCount,
+                    UErrorCode *pErrorCode) {
+    (void)context; // suppress compiler warnings about unused variable
+    (void)fieldCount; // suppress compiler warnings about unused variable
+    uint32_t mapping[40];
+    char *end, *s;
+    uint32_t code;
+    int32_t length;
+    UVersionInfo version;
+    UVersionInfo thisVersion;
+
+    /* get the character code, field 0 */
+    code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
+    if(U_FAILURE(*pErrorCode)) {
+        fprintf(stderr, "gensprep: error parsing NormalizationCorrections.txt mapping at %s\n", fields[0][0]);
+        exit(*pErrorCode);
+    }
+    /* Original (erroneous) decomposition */
+    s = fields[1][0];
+
+    /* parse the mapping string */
+    length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);
+
+    /* ignore corrected decomposition */
+
+    u_versionFromString(version,fields[3][0] );
+    u_versionFromString(thisVersion, "3.2.0");
+
+
+
+    if(U_FAILURE(*pErrorCode)) {
+        fprintf(stderr, "gensprep error parsing NormalizationCorrections.txt of U+%04lx - %s\n",
+                (long)code, u_errorName(*pErrorCode));
+        exit(*pErrorCode);
+    }
+
+    /* store the mapping */
+    if( version[0] > thisVersion[0] || 
+        ((version[0]==thisVersion[0]) && (version[1] > thisVersion[1]))
+        ){
+        storeMapping(code,mapping, length, USPREP_MAP, pErrorCode);
+    }
+    setUnicodeVersionNC(version);
+}
+
+static void
+parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode) {
+    char *fields[4][2];
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    u_parseDelimitedFile(filename, ';', fields, 4, normalizationCorrectionsLineFn, NULL, pErrorCode);
+
+    /* fprintf(stdout,"Number of code points that have NormalizationCorrections mapping with length >1 : %i\n",len); */
+
+    if(U_FAILURE(*pErrorCode) && ( *pErrorCode!=U_FILE_ACCESS_ERROR)) {
+        fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
+        exit(*pErrorCode);
+    }
+}
+
+static void U_CALLCONV
+strprepProfileLineFn(void *context,
+              char *fields[][2], int32_t fieldCount,
+              UErrorCode *pErrorCode) {
+    (void)fieldCount; // suppress compiler warnings about unused variable  
+    uint32_t mapping[40];
+    char *end, *map;
+    uint32_t code;
+    int32_t length;
+   /*UBool* mapWithNorm = (UBool*) context;*/
+    const char* typeName;
+    uint32_t rangeStart=0,rangeEnd =0;
+    const char* filename = (const char*) context;
+    const char *s;
+
+    s = u_skipWhitespace(fields[0][0]);
+    if (*s == '@') {
+        /* special directive */
+        s++;
+        length = (int32_t)(fields[0][1] - s);
+        if (length >= NORMALIZE_DIRECTIVE_LEN
+            && uprv_strncmp(s, NORMALIZE_DIRECTIVE, NORMALIZE_DIRECTIVE_LEN) == 0) {
+            options[NORMALIZE].doesOccur = true;
+            return;
+        }
+        else if (length >= CHECK_BIDI_DIRECTIVE_LEN
+            && uprv_strncmp(s, CHECK_BIDI_DIRECTIVE, CHECK_BIDI_DIRECTIVE_LEN) == 0) {
+            options[CHECK_BIDI].doesOccur = true;
+            return;
+        }
+        else {
+            fprintf(stderr, "gensprep error parsing a directive %s.", fields[0][0]);
+        }
+    }
+
+    typeName = fields[2][0];
+    map = fields[1][0];
+
+    if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
+
+        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
+        if(U_FAILURE(*pErrorCode)){
+            fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
+            return;
+        }
+
+        /* store the range */
+        storeRange(rangeStart,rangeEnd,USPREP_UNASSIGNED, pErrorCode);
+
+    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
+
+        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
+        if(U_FAILURE(*pErrorCode)){
+            fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
+            return;
+        }
+
+        /* store the range */
+        storeRange(rangeStart,rangeEnd,USPREP_PROHIBITED, pErrorCode);
+
+    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
+
+        /* get the character code, field 0 */
+        code=(uint32_t)uprv_strtoul(s, &end, 16);
+        if(end<=s || end!=fields[0][1]) {
+            fprintf(stderr, "gensprep: syntax error in field 0 at %s\n", fields[0][0]);
+            *pErrorCode=U_PARSE_ERROR;
+            exit(U_PARSE_ERROR);
+        }
+
+        /* parse the mapping string */
+        length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
+        
+        /* store the mapping */
+        storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);
+
+    }else{
+        *pErrorCode = U_INVALID_FORMAT_ERROR;
+    }
+    
+    if(U_FAILURE(*pErrorCode)) {
+        fprintf(stderr, "gensprep error parsing  %s line %s at %s. Error: %s\n",filename,
+               fields[0][0],fields[2][0],u_errorName(*pErrorCode));
+        exit(*pErrorCode);
+    }
+
+}
+
+static void
+parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode) {
+    char *fields[3][2];
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
+
+    /*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/
+
+    if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
+        fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
+        exit(*pErrorCode);
+    }
+}
+
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/intl/icu/source/tools/gensprep/gensprep.h b/intl/icu/source/tools/gensprep/gensprep.h
new file mode 100644
index 0000000000..a2e5e61f9a
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/gensprep.h
@@ -0,0 +1,83 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  gensprep.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003-02-06
+*   created by: Ram Viswanadha
+*/
+
+#ifndef __GENIDN_H__
+#define __GENIDN_H__
+
+#include "unicode/utypes.h"
+#include "sprpimpl.h"
+
+/* file definitions */
+#define DATA_NAME "sprep"
+#define DATA_TYPE "spp"
+
+/*
+ * data structure that holds the IDN properties for one or more
+ * code point(s) at build time
+ */
+
+ 
+/* global flags */
+extern UBool beVerbose, haveCopyright;
+
+/* prototypes */
+
+extern void
+setUnicodeVersion(const char *v);
+
+extern void
+setUnicodeVersionNC(UVersionInfo version);
+
+extern void
+init(void);
+
+#if !UCONFIG_NO_IDNA
+extern void
+storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UStringPrepType type, UErrorCode* status);
+extern void
+storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status);
+#endif
+
+extern void
+generateData(const char *dataDir, const char* bundleName);
+
+extern void
+setOptions(int32_t options);
+
+extern void
+cleanUpData(void);
+
+/*
+extern void
+storeIDN(uint32_t code, IDN *idn);
+
+extern void
+processData(void);
+
+
+*/
+#endif
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/intl/icu/source/tools/gensprep/gensprep.vcxproj b/intl/icu/source/tools/gensprep/gensprep.vcxproj
new file mode 100644
index 0000000000..c6f7bbd861
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/gensprep.vcxproj
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{631C23CE-6C1D-4875-88F0-85E0A42B36EA}</ProjectGuid>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <!-- The following import will include the 'default' configuration options for VS projects. -->
+  <Import Project="..\..\allinone\Build.Windows.ProjectConfiguration.props" />
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir>.\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>.\$(Platform)\$(Configuration)\</IntDir>
+    <!-- The ICU projects use "Win32" to mean "x86", so we need to special case it. -->
+    <OutDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</IntDir>
+    <!-- Disable Incremental Linking for Release builds as it prevents Link-time Code Generation -->
+    <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental>
+  </PropertyGroup>
+  <!-- Options that are common to *all* configurations -->
+  <ItemDefinitionGroup>
+    <Midl>
+      <TypeLibraryName>$(OutDir)\gensprep.tlb</TypeLibraryName>
+    </Midl>
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <CompileAs>Default</CompileAs>
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+      <AdditionalIncludeDirectories>..\..\common;..\toolutil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeaderOutputFile>$(OutDir)\gensprep.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(OutDir)/</AssemblerListingLocation>
+      <ObjectFileName>$(OutDir)/</ObjectFileName>
+      <ProgramDataBaseFileName>$(OutDir)\gensprep.pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <OutputFile>$(OutDir)\gensprep.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\..\$(IcuLibOutputDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+    <CustomBuildStep>
+      <Command>copy "$(TargetPath)" ..\..\..\$(IcuBinOutputDir)</Command>
+      <Outputs>..\..\..\$(IcuBinOutputDir)\$(TargetFileName);%(Outputs)</Outputs>
+    </CustomBuildStep>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all 'Debug' project configurations -->
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+    <ClCompile>
+      <BrowseInformation>true</BrowseInformation>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>icuucd.lib;icutud.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all 'Release' project configurations -->
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+    <ClCompile>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>icuuc.lib;icutu.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="gensprep.c" />
+    <ClCompile Include="store.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="gensprep.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/intl/icu/source/tools/gensprep/gensprep.vcxproj.filters b/intl/icu/source/tools/gensprep/gensprep.vcxproj.filters
new file mode 100644
index 0000000000..2791b3aa6a
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/gensprep.vcxproj.filters
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{bb521e6b-d70a-4efd-9399-408729059da6}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{837c7f4e-341d-4455-aa1e-f6ff7a03b065}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{a80f327a-7fb8-4737-8bd9-0f4b26c2c344}</UniqueIdentifier>
+      <Extensions>ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="gensprep.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="store.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="gensprep.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/intl/icu/source/tools/gensprep/sources.txt b/intl/icu/source/tools/gensprep/sources.txt
new file mode 100644
index 0000000000..c369456cb3
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/sources.txt
@@ -0,0 +1,2 @@
+gensprep.c
+store.c
diff --git a/intl/icu/source/tools/gensprep/store.c b/intl/icu/source/tools/gensprep/store.c
new file mode 100644
index 0000000000..c3712febb4
--- /dev/null
+++ b/intl/icu/source/tools/gensprep/store.c
@@ -0,0 +1,653 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  store.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003-02-06
+*   created by: Ram Viswanadha
+*
+*/
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "filestrm.h"
+#include "toolutil.h"
+#include "unicode/udata.h"
+#include "unicode/utf16.h"
+#include "utrie.h"
+#include "unewdata.h"
+#include "gensprep.h"
+#include "uhash.h"
+
+
+#define DO_DEBUG_OUT 0
+
+
+/*
+ * StringPrep profile file format ------------------------------------
+ *
+ * The file format prepared and written here contains a 16-bit trie and a mapping table.
+ *
+ * Before the data contents described below, there are the headers required by
+ * the udata API for loading ICU data. Especially, a UDataInfo structure
+ * precedes the actual data. It contains platform properties values and the
+ * file format version.
+ *
+ * The following is a description of format version 2.
+ *
+ * Data contents:
+ *
+ * The contents is a parsed, binary form of RFC3454 and possibly
+ * NormalizationCorrections.txt depending on the options specified on the profile.
+ *
+ * Any Unicode code point from 0 to 0x10ffff can be looked up to get
+ * the trie-word, if any, for that code point. This means that the input
+ * to the lookup are 21-bit unsigned integers, with not all of the
+ * 21-bit range used.
+ *
+ * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
+ * After that there are the following structures:
+ *
+ * int32_t indexes[_SPREP_INDEX_TOP];           -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
+ *
+ * UTrie stringPrepTrie;                        -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
+ *
+ * uint16_t mappingTable[];                     -- Contains the sequence of code units that the code point maps to
+ *                                                 size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
+ *
+ * The indexes array contains the following values:
+ *  indexes[_SPREP_INDEX_TRIE_SIZE]                  -- The size of the StringPrep trie in bytes
+ *  indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]          -- The size of the mappingTable in bytes
+ *  indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]  -- The index of Unicode version of last entry in NormalizationCorrections.txt
+ *  indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START]    -- The starting index of 1 UChar  mapping index in the mapping table
+ *  indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]   -- The starting index of 2 UChars mapping index in the mapping table
+ *  indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
+ *  indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]  -- The starting index of 4 UChars mapping index in the mapping table
+ *  indexes[_SPREP_OPTIONS]                          -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
+ *
+ *
+ * StringPrep Trie :
+ *
+ * The StringPrep tries is a 16-bit trie that contains data for the profile.
+ * Each code point is associated with a value (trie-word) in the trie.
+ *
+ * - structure of data words from the trie
+ *
+ *  i)  A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0)
+ *      represents the type associated with the code point
+ *      if(trieWord >= _SPREP_TYPE_THRESHOLD){
+ *          type = trieWord - 0xFFF0;
+ *      }
+ *      The type can be :
+ *             USPREP_UNASSIGNED
+ *             USPREP_PROHIBITED
+ *             USPREP_DELETE
+ *
+ *  ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
+ *      contains distribution described below
+ *
+ *      0       -  ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
+ *      1       -  ON : The value in the next 14 bits is an index into the mapping table
+ *                 OFF: The value in the next 14 bits is an delta value from the code point
+ *      2..15   -  Contains data as described by bit 1. If all bits are set
+ *                 (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
+ *
+ *
+ * Mapping Table:
+ * The data in mapping table is sorted according to the length of the mapping sequence.
+ * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
+ * is compared with start indexes of sequence length start to figure out the length according to
+ * the following algorithm:
+ *
+ *              if(       index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
+ *                        index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
+ *                   length = 1;
+ *               }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
+ *                        index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
+ *                   length = 2;
+ *               }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
+ *                        index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
+ *                   length = 3;
+ *               }else{
+ *                   // The first position in the mapping table contains the length
+ *                   // of the sequence
+ *                   length = mappingTable[index++];
+ *
+ *               }
+ *
+ */
+
+/* file data ---------------------------------------------------------------- */
+/* indexes[] value names */
+
+#if UCONFIG_NO_IDNA
+
+/* dummy UDataInfo cf. udata.h */
+static UDataInfo dataInfo = {
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    U_SIZEOF_UCHAR,
+    0,
+
+    { 0, 0, 0, 0 },                 /* dummy dataFormat */
+    { 0, 0, 0, 0 },                 /* dummy formatVersion */
+    { 0, 0, 0, 0 }                  /* dummy dataVersion */
+};
+
+#else
+
+static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
+
+static uint16_t* mappingData= NULL;
+static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
+static int16_t currentIndex = 0; /* the current index into the data trie */
+static int32_t maxLength = 0;  /* maximum length of mapping string */
+
+
+/* UDataInfo cf. udata.h */
+static UDataInfo dataInfo={
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    U_SIZEOF_UCHAR,
+    0,
+
+    { 0x53, 0x50, 0x52, 0x50 },                 /* dataFormat="SPRP" */
+    { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
+    { 3, 2, 0, 0 }                              /* dataVersion (Unicode version) */
+};
+void
+setUnicodeVersion(const char *v) {
+    UVersionInfo version;
+    u_versionFromString(version, v);
+    uprv_memcpy(dataInfo.dataVersion, version, 4);
+}
+
+void
+setUnicodeVersionNC(UVersionInfo version){
+    uint32_t univer = version[0] << 24;
+    univer += version[1] << 16;
+    univer += version[2] << 8;
+    univer += version[3];
+    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
+}
+static UNewTrie *sprepTrie;
+
+#define MAX_DATA_LENGTH 11500
+
+
+#define SPREP_DELTA_RANGE_POSITIVE_LIMIT              8191
+#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT              -8192
+
+
+extern void
+init() {
+
+    sprepTrie = (UNewTrie *)uprv_calloc(1, sizeof(UNewTrie));
+
+    /* initialize the two tries */
+    if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, false)) {
+        fprintf(stderr, "error: failed to initialize tries\n");
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+}
+
+static UHashtable* hashTable = NULL;
+
+
+typedef struct ValueStruct {
+    UChar* mapping;
+    int16_t length;
+    UStringPrepType type;
+} ValueStruct;
+
+/* Callback for deleting the value from the hashtable */
+static void U_CALLCONV valueDeleter(void* obj){
+    ValueStruct* value = (ValueStruct*) obj;
+    uprv_free(value->mapping);
+    uprv_free(value);
+}
+
+/* Callback for hashing the entry */
+static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
+    return  parm.integer;
+}
+
+/* Callback for comparing two entries */
+static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
+    return (UBool)(p1.integer != p2.integer);
+}
+
+
+static void
+storeMappingData(void){
+
+    int32_t pos = UHASH_FIRST;
+    const UHashElement* element = NULL;
+    ValueStruct* value  = NULL;
+    int32_t codepoint = 0;
+    int32_t elementCount = 0;
+    int32_t writtenElementCount = 0;
+    int32_t mappingLength = 1; /* minimum mapping length */
+    int32_t oldMappingLength = 0;
+    uint16_t trieWord =0;
+    int32_t limitIndex = 0;
+
+    if (hashTable == NULL) {
+        return;
+    }
+    elementCount = uhash_count(hashTable);
+
+	/*initialize the mapping data */
+    mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR);
+
+    while(writtenElementCount < elementCount){
+
+        while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
+
+            codepoint = element->key.integer;
+            value = (ValueStruct*)element->value.pointer;
+
+            /* store the start of indexes */
+            if(oldMappingLength != mappingLength){
+                /* Assume that index[] is used according to the enums defined */
+                if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
+                    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
+                }
+                if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
+                   mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
+
+                    limitIndex = currentIndex;
+
+                }
+                oldMappingLength = mappingLength;
+            }
+
+            if(value->length == mappingLength){
+                uint32_t savedTrieWord = 0;
+                trieWord = currentIndex << 2;
+                /* turn on the 2nd bit to signal that the following bits contain an index */
+                trieWord += 0x02;
+
+                if(trieWord > _SPREP_TYPE_THRESHOLD){
+                    fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
+                    exit(U_ILLEGAL_CHAR_FOUND);
+                }
+                /* figure out if the code point has type already stored */
+                savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
+                if(savedTrieWord!=0){
+                    if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
+                        /* turn on the first bit in trie word */
+                        trieWord += 0x01;
+                    }else{
+                        /*
+                         * the codepoint has value something other than prohibited
+                         * and a mapping .. error!
+                         */
+                        fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
+                        exit(U_ILLEGAL_ARGUMENT_ERROR);
+                    }
+                }
+
+                /* now set the value in the trie */
+                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
+                    fprintf(stderr,"Could not set the value for code point.\n");
+                    exit(U_ILLEGAL_ARGUMENT_ERROR);
+                }
+
+                /* written the trie word for the codepoint... increment the count*/
+                writtenElementCount++;
+
+                /* sanity check are we exceeding the max number allowed */
+                if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
+                    fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n",
+                        currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
+                    exit(U_INDEX_OUTOFBOUNDS_ERROR);
+                }
+
+                /* copy the mapping data */
+                /* write the length */
+                if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
+                     /* the cast here is safe since we donot expect the length to be > 65535 */
+                     mappingData[currentIndex++] = (uint16_t) mappingLength;
+                }
+                /* copy the contents to mappindData array */
+                u_memmove(mappingData+currentIndex, value->mapping, value->length);
+                currentIndex += value->length;
+                if (currentIndex > mappingDataCapacity) {
+                    /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */
+                    fprintf(stderr, "gensprep, fatal error at %s, %d.  Aborting.\n", __FILE__, __LINE__);
+                    exit(U_INTERNAL_PROGRAM_ERROR);
+                }
+            }
+        }
+        mappingLength++;
+        pos = -1;
+    }
+    /* set the last length for range check */
+    if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
+        indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
+    }else{
+        indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
+    }
+
+}
+
+extern void setOptions(int32_t options){
+    indexes[_SPREP_OPTIONS] = options;
+}
+extern void
+storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
+             UStringPrepType type, UErrorCode* status){
+
+
+    UChar* map = NULL;
+    int16_t adjustedLen=0, i, j;
+    uint16_t trieWord = 0;
+    ValueStruct *value = NULL;
+    uint32_t savedTrieWord = 0;
+
+    /* initialize the hashtable */
+    if(hashTable==NULL){
+        hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
+        uhash_setValueDeleter(hashTable, valueDeleter);
+    }
+
+    /* figure out if the code point has type already stored */
+    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
+    if(savedTrieWord!=0){
+        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
+            /* turn on the first bit in trie word */
+            trieWord += 0x01;
+        }else{
+            /*
+             * the codepoint has value something other than prohibited
+             * and a mapping .. error!
+             */
+            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
+            exit(U_ILLEGAL_ARGUMENT_ERROR);
+        }
+    }
+
+    /* figure out the real length */
+    for(i=0; i<length; i++){
+        adjustedLen += U16_LENGTH(mapping[i]);
+    }
+
+    if(adjustedLen == 0){
+        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
+        /* make sure that the value of trieWord is less than the threshold */
+        if(trieWord < _SPREP_TYPE_THRESHOLD){
+            /* now set the value in the trie */
+            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
+                fprintf(stderr,"Could not set the value for code point.\n");
+                exit(U_ILLEGAL_ARGUMENT_ERROR);
+            }
+            /* value is set so just return */
+            return;
+        }else{
+            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
+            exit(U_ILLEGAL_CHAR_FOUND);
+        }
+    }
+
+    if(adjustedLen == 1){
+        /* calculate the delta */
+        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
+        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
+
+            trieWord = delta;
+            trieWord <<= 2;
+
+
+            /* make sure that the second bit is OFF */
+            if((trieWord & 0x02) != 0 ){
+                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
+                exit(U_INTERNAL_PROGRAM_ERROR);
+            }
+            /* make sure that the value of trieWord is less than the threshold */
+            if(trieWord < _SPREP_TYPE_THRESHOLD){
+                /* now set the value in the trie */
+                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
+                    fprintf(stderr,"Could not set the value for code point.\n");
+                    exit(U_ILLEGAL_ARGUMENT_ERROR);
+                }
+                /* value is set so just return */
+                return;
+            }
+        }
+        /*
+         * if the delta is not in the given range or if the trieWord is larger than the threshold
+         * just fall through for storing the mapping in the mapping table
+         */
+    }
+
+    map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR);
+
+    for (i=0, j=0; i<length; i++) {
+        U16_APPEND_UNSAFE(map, j, mapping[i]);
+    }
+
+    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
+    value->mapping = map;
+    value->type    = type;
+    value->length  = adjustedLen;
+    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
+        mappingDataCapacity++;
+    }
+    if(maxLength < value->length){
+        maxLength = value->length;
+    }
+    uhash_iput(hashTable,codepoint,value,status);
+    mappingDataCapacity += adjustedLen;
+
+    if(U_FAILURE(*status)){
+        fprintf(stderr, "Failed to put entries into the hash table. Error: %s\n", u_errorName(*status));
+        exit(*status);
+    }
+}
+
+
+extern void
+storeRange(uint32_t start, uint32_t end, UStringPrepType type, UErrorCode* status){
+    (void)status; // suppress compiler warnings about unused variable
+    uint16_t trieWord = 0;
+
+    if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){
+        fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
+        exit(U_ILLEGAL_CHAR_FOUND);
+    }
+    trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
+    if(start == end){
+        uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
+        if(savedTrieWord>0){
+            if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
+                /*
+                 * A mapping is stored in the trie word
+                 * and the only other possible type that a
+                 * code point can have is USPREP_PROHIBITED
+                 *
+                 */
+
+                /* turn on the 0th bit in the savedTrieWord */
+                savedTrieWord += 0x01;
+
+                /* the downcast is safe since we only save 16 bit values */
+                trieWord = (uint16_t)savedTrieWord;
+
+                /* make sure that the value of trieWord is less than the threshold */
+                if(trieWord < _SPREP_TYPE_THRESHOLD){
+                    /* now set the value in the trie */
+                    if(!utrie_set32(sprepTrie,start,trieWord)){
+                        fprintf(stderr,"Could not set the value for code point.\n");
+                        exit(U_ILLEGAL_ARGUMENT_ERROR);
+                    }
+                    /* value is set so just return */
+                    return;
+                }else{
+                    fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
+                    exit(U_ILLEGAL_CHAR_FOUND);
+                }
+
+            }else if(savedTrieWord != trieWord){
+                fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
+                exit(U_ILLEGAL_ARGUMENT_ERROR);
+            }
+            /* if savedTrieWord == trieWord .. fall through and set the value */
+        }
+        if(!utrie_set32(sprepTrie,start,trieWord)){
+            fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
+            exit(U_ILLEGAL_ARGUMENT_ERROR);
+        }
+    }else{
+        if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, false)){
+            fprintf(stderr,"Value for certain codepoint already set.\n");
+            exit(U_ILLEGAL_CHAR_FOUND);
+        }
+    }
+
+}
+
+/* folding value: just store the offset (16 bits) if there is any non-0 entry */
+static uint32_t U_CALLCONV
+getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
+    uint32_t value;
+    UChar32 limit=0;
+    UBool inBlockZero;
+
+    limit=start+0x400;
+    while(start<limit) {
+        value=utrie_get32(trie, start, &inBlockZero);
+        if(inBlockZero) {
+            start+=UTRIE_DATA_BLOCK_LENGTH;
+        } else if(value!=0) {
+            return (uint32_t)offset;
+        } else {
+            ++start;
+        }
+    }
+    return 0;
+
+}
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+extern void
+generateData(const char *dataDir, const char* bundleName) {
+    static uint8_t sprepTrieBlock[100000];
+
+    UNewDataMemory *pData;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    int32_t size, dataLength;
+    char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
+
+#if UCONFIG_NO_IDNA
+
+    size=0;
+
+#else
+
+    int32_t sprepTrieSize;
+
+    /* sort and add mapping data */
+    storeMappingData();
+
+    sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, true, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
+        exit(errorCode);
+    }
+
+    size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
+    if(beVerbose) {
+        printf("size of sprep trie              %5u bytes\n", (int)sprepTrieSize);
+        printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
+        printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
+        printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
+        printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
+    }
+
+#endif
+
+    fileName[0]=0;
+    uprv_strcat(fileName,bundleName);
+    /* write the data */
+    pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
+                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
+        exit(errorCode);
+    }
+
+#if !UCONFIG_NO_IDNA
+
+    indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
+    indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
+
+    udata_writeBlock(pData, indexes, sizeof(indexes));
+    udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
+    udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
+
+
+#endif
+
+    /* finish up */
+    dataLength=udata_finish(pData, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
+        exit(errorCode);
+    }
+
+    if(dataLength!=size) {
+        fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
+            (long)dataLength, (long)size);
+        exit(U_INTERNAL_PROGRAM_ERROR);
+    }
+
+#if !UCONFIG_NO_IDNA
+    /* done with writing the data .. close the hashtable */
+    if (hashTable != NULL) {
+        uhash_close(hashTable);
+    }
+#endif
+
+    uprv_free(fileName);
+}
+
+#if !UCONFIG_NO_IDNA
+
+extern void
+cleanUpData(void) {
+    uprv_free(mappingData);
+    utrie_close(sprepTrie);
+    uprv_free(sprepTrie);
+}
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 01:47:29 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 01:47:29 +0000
commit	0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
tree	a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /intl/icu/source/tools/gensprep
parent	Initial commit. (diff)
download	firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip