18 files changed, 5144 insertions, 0 deletions
diff --git a/util/Makefile.in b/util/Makefile.in
new file mode 100644
index 0000000..7ad18c0
--- /dev/null
+++ b/util/Makefile.in
@@ -0,0 +1,74 @@
+#
+# Standard e2fsprogs prologue....
+#
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+top_builddir = ..
+my_dir = util
+INSTALL = @INSTALL@
+MKDIR_P = @MKDIR_P@
+
+SRCS = $(srcdir)/subst.c $(srcdir)/mkutf8data.c
+
+@MCONFIG@
+
+.c.o:
+	$(E) "	CC $<"
+	$(Q) $(BUILD_CC) -c $(BUILD_CFLAGS) $< -o $@
+	$(Q) $(CHECK_CMD) $(ALL_CFLAGS) $<
+	$(Q) $(CPPCHECK_CMD) $(CPPFLAGS) $<
+
+PROGS=		subst symlinks mkutf8data
+
+all:: $(PROGS) gen-tarball
+
+dirpaths.h:
+	$(E) "	CREATE dirpaths.h"
+	$(Q) echo "/* fake dirpaths.h for config.h */" > dirpaths.h
+
+subst.o: dirpaths.h
+
+subst: subst.o
+	$(E) "	LD $@"
+	$(Q) $(BUILD_CC) $(BUILD_LDFLAGS) -o subst subst.o
+
+mkutf8data: mkutf8data.o
+	$(E) "	LD $@"
+	$(Q) $(BUILD_CC) $(BUILD_LDFLAGS) -o mkutf8data mkutf8data.o
+
+copy_sparse: copy_sparse.o
+	$(E) "	LD $@"
+	$(Q) $(BUILD_CC) $(BUILD_LDFLAGS) -o copy_sparse copy_sparse.o
+
+symlinks: symlinks.o
+	$(E) "	LD $@"
+	$(Q) $(BUILD_CC) $(BUILD_LDFLAGS) -o symlinks symlinks.o
+
+gen-tarball: $(srcdir)/gen-tarball.in $(top_builddir)/config.status
+	$(E) "	CONFIG.STATUS $@"
+	$(Q) cd $(top_builddir); CONFIG_FILES=util/gen-tarball ./config.status
+	$(Q) chmod +x gen-tarball
+
+tarballs: gen-tarball
+	sh gen-tarball debian
+	sh gen-tarball all
+	sh gen-tarball subset
+
+clean::
+	$(RM) -f $(PROGS) \#* *.s *.o *.a *~ core *.tar.gz gen-tarball \
+		copy-sparse dirpaths.h install-symlink mkutf8data
+
+mostlyclean: clean
+
+distclean: clean
+	$(RM) -f .depend Makefile $(srcdir)/TAGS $(srcdir)/Makefile.in.old
+
+# +++ Dependency line eater +++
+# 
+# Makefile dependencies follow.  This must be the last section in
+# the Makefile.in file
+#
+subst.o: $(srcdir)/subst.c $(top_builddir)/lib/config.h dirpaths.h
+mkutf8data.o: $(srcdir)/mkutf8data.c
diff --git a/util/all.exclude b/util/all.exclude
new file mode 100644
index 0000000..d7d03b2
--- /dev/null
+++ b/util/all.exclude
@@ -0,0 +1,15 @@
+.git
+.hg
+.hgignore
+.pc
+patches
+README.subset
+build
+build[^/]*
+rpm.log
+TODO
+powerquest
+.exclude-file
+po/stamp-cat-id
+po/cat-id-tbl.c
+Meta
diff --git a/util/android-README.version.in b/util/android-README.version.in
new file mode 100644
index 0000000..c8a3217
--- /dev/null
+++ b/util/android-README.version.in
@@ -0,0 +1,3 @@
+URL: https://www.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/v@VER@/e2fsprogs-@FN@.tar.gz
+Version: @FN@
+BugComponent: 95221
diff --git a/util/android_config.h b/util/android_config.h
new file mode 100644
index 0000000..90b8f8a
--- /dev/null
+++ b/util/android_config.h
@@ -0,0 +1,69 @@
+#ifndef __APPLE__
+#define HAVE_MALLOC_H 1
+#endif
+
+#define ROOT_SYSCONFDIR "/etc"
+
+#define ENABLE_LIBSPARSE 1
+
+#define DISABLE_BACKTRACE 1
+#define HAVE_DIRENT_H 1
+#define HAVE_ERRNO_H 1
+#define HAVE_GETOPT_H 1
+#define HAVE_GETPWUID_R 1
+#define HAVE_INTPTR_T 1
+#define HAVE_INTTYPES_H 1
+#define HAVE_MMAP 1
+#define HAVE_SETJMP_H 1
+#define HAVE_SNPRINTF 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRCASECMP 1
+#define HAVE_STRDUP 1
+#define HAVE_STRINGS_H 1
+#define HAVE_STRNLEN 1
+#define HAVE_STRPTIME 1
+#define HAVE_SYSCONF 1
+#define HAVE_TYPE_SSIZE_T 1
+#define HAVE_UNISTD_H 1
+#define HAVE_UTIME_H 1
+
+#define HAVE_SYS_STAT_H 1
+#if !defined(__APPLE__)
+# define HAVE_SYS_SYSMACROS_H 1
+#endif
+#define HAVE_SYS_TIME_H 1
+#define HAVE_SYS_TYPES_H 1
+
+#if defined(_WIN32)
+# define HAVE_LINUX_TYPES_H 1
+#endif
+#if defined(__APPLE__) || defined(__linux__)
+# define HAVE_FCNTL 1
+# define HAVE_FSYNC 1
+# define HAVE_GETPAGESIZE 1
+# define HAVE_NET_IF_H 1
+# define HAVE_NETINET_IN_H 1
+# define HAVE_PREAD 1
+# define HAVE_PWRITE 1
+# define HAVE_POSIX_MEMALIGN 1
+# define HAVE_SYS_IOCTL_H 1
+# define HAVE_SYS_MMAN_H 1
+# define HAVE_SYS_MOUNT_H 1
+# define HAVE_SYS_PARAM_H 1
+# define HAVE_SYS_RESOURCE_H 1
+# define HAVE_SYS_SELECT_H 1
+# define HAVE_SYS_WAIT_H 1
+#endif
+#if defined(__linux__)
+# define HAVE_EXT2_IOCTLS 1
+# define HAVE_FALLOCATE 1
+# define HAVE_LINUX_FD_H 1
+# define HAVE_LINUX_TYPES_H 1
+# define HAVE_LSEEK64 1
+# define HAVE_LSEEK64_PROTOTYPE 1
+# define HAVE_MNTENT_H 1
+# define HAVE_PREAD64 1
+# define HAVE_PWRITE64 1
+# define HAVE_SETMNTENT 1
+# define HAVE_SYS_PRCTL_H 1
+#endif
diff --git a/util/android_types.h b/util/android_types.h
new file mode 100644
index 0000000..5f05903
--- /dev/null
+++ b/util/android_types.h
@@ -0,0 +1,45 @@
+/* 
+ * If linux/types.h is already been included, assume it has defined
+ * everything we need.  (cross fingers)  Other header files may have
+ * also defined the types that we need.
+ */
+#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \
+	!defined(_EXT2_TYPES_H) && !defined(_UUID_TYPES_H))
+#define _LINUX_TYPES_H
+
+typedef unsigned char __u8;
+typedef __signed__ char __s8;
+typedef unsigned short __u16;
+typedef __signed__ short __s16;
+typedef unsigned int __u32;
+typedef __signed__ int __s32;
+typedef unsigned long long __u64;
+typedef __signed__ long long __s64;
+#endif
+
+#include <stdint.h> //uintptr_t
+
+/* endian checking stuff */
+#ifndef EXT2_ENDIAN_H_
+#define EXT2_ENDIAN_H_
+
+#ifdef __CHECKER__
+#ifndef __bitwise
+#define __bitwise		__attribute__((bitwise))
+#endif
+#define __force			__attribute__((force))
+#else
+#ifndef __bitwise
+#define __bitwise
+#endif
+#define __force
+#endif
+
+typedef __u16	__bitwise	__le16;
+typedef __u32	__bitwise	__le32;
+typedef __u64	__bitwise	__le64;
+typedef __u16	__bitwise	__be16;
+typedef __u32	__bitwise	__be32;
+typedef __u64	__bitwise	__be64;
+
+#endif /* EXT2_ENDIAN_H_ */
diff --git a/util/copy_sparse.c b/util/copy_sparse.c
new file mode 100644
index 0000000..cbab273
--- /dev/null
+++ b/util/copy_sparse.c
@@ -0,0 +1,228 @@
+/*
+ * copy_sparse.c -- copy a very large sparse files efficiently
+ * 	(requires root privileges)
+ *
+ * Copyright 2003, 2004 by Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ * %End-Header%
+ */
+
+#ifndef __linux__
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(void) {
+    fputs("This program is only supported on Linux!\n", stderr);
+    exit(EXIT_FAILURE);
+}
+#else
+#define _LARGEFILE64_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <errno.h>
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#else
+extern char *optarg;
+extern int optind;
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <sys/ioctl.h>
+#include <linux/fd.h>
+
+int verbose = 0;
+
+#define FIBMAP	   _IO(0x00,1)	/* bmap access */
+#define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
+
+static unsigned long get_bmap(int fd, unsigned long block)
+{
+	int	ret;
+	unsigned long b;
+
+	b = block;
+	ret = ioctl(fd, FIBMAP, &b);
+	if (ret < 0) {
+		if (errno == EPERM) {
+			fprintf(stderr, "No permission to use FIBMAP ioctl; must have root privileges\n");
+			exit(1);
+		}
+		perror("FIBMAP");
+	}
+	return b;
+}
+
+static int full_read(int fd, char *buf, size_t count)
+{
+	int got, total = 0;
+	int pass = 0;
+
+	while (count > 0) {
+		got = read(fd, buf, count);
+		if (got == -1) {
+			if ((errno == EINTR) || (errno == EAGAIN))
+				continue;
+			return total ? total : -1;
+		}
+		if (got == 0) {
+			if (pass++ >= 3)
+				return total;
+			continue;
+		}
+		pass = 0;
+		buf += got;
+		total += got;
+		count -= got;
+	}
+	return total;
+}
+
+static void copy_sparse_file(const char *src, const char *dest)
+{
+	struct stat64	fileinfo;
+	long		lb, i, fd, ofd, bs, block, numblocks;
+	ssize_t		got, got2;
+	off64_t		offset = 0, should_be;
+	char		*buf;
+
+	if (verbose)
+		printf("Copying sparse file from %s to %s\n", src, dest);
+
+	if (strcmp(src, "-")) {
+		if (stat64(src, &fileinfo) < 0) {
+			perror("stat");
+			exit(1);
+		}
+		if (!S_ISREG(fileinfo.st_mode)) {
+			printf("%s: Not a regular file\n", src);
+			exit(1);
+		}
+		fd = open(src, O_RDONLY | O_LARGEFILE);
+		if (fd < 0) {
+			perror("open");
+			exit(1);
+		}
+		if (ioctl(fd, FIGETBSZ, &bs) < 0) {
+			perror("FIGETBSZ");
+			close(fd);
+			exit(1);
+		}
+		if (bs < 0) {
+			printf("%s: Invalid block size: %ld\n", src, bs);
+			exit(1);
+		}
+		if (verbose)
+			printf("Blocksize of file %s is %ld\n", src, bs);
+		numblocks = (fileinfo.st_size + (bs-1)) / bs;
+		if (verbose)
+			printf("File size of %s is %lld (%ld blocks)\n", src,
+			       (long long) fileinfo.st_size, numblocks);
+	} else {
+		fd = 0;
+		bs = 1024;
+	}
+
+	ofd = open(dest, O_WRONLY|O_CREAT|O_TRUNC|O_LARGEFILE, 0777);
+	if (ofd < 0) {
+		perror(dest);
+		exit(1);
+	}
+
+	buf = malloc(bs);
+	if (!buf) {
+		fprintf(stderr, "Couldn't allocate buffer");
+		exit(1);
+	}
+
+	for (lb = 0; !fd || lb < numblocks; lb++) {
+		if (fd) {
+			block = get_bmap(fd, lb);
+			if (!block)
+				continue;
+			should_be = ((off64_t) lb) * bs;
+			if (offset != should_be) {
+				if (verbose)
+					printf("Seeking to %lld\n", should_be);
+				if (lseek64(fd, should_be, SEEK_SET) == (off_t) -1) {
+					perror("lseek src");
+					exit(1);
+				}
+				if (lseek64(ofd, should_be, SEEK_SET) == (off_t) -1) {
+					perror("lseek dest");
+					exit(1);
+				}
+				offset = should_be;
+			}
+		}
+		got = full_read(fd, buf, bs);
+
+		if (fd == 0 && got == 0)
+			break;
+
+		if (got == bs) {
+			for (i=0; i < bs; i++)
+				if (buf[i])
+					break;
+			if (i == bs) {
+				lseek(ofd, bs, SEEK_CUR);
+				offset += bs;
+				continue;
+			}
+		}
+		got2 = write(ofd, buf, got);
+		if (got != got2) {
+			printf("short write\n");
+			exit(1);
+		}
+		offset += got;
+	}
+	offset = fileinfo.st_size;
+	if (fstat64(ofd, &fileinfo) < 0) {
+		perror("fstat");
+		exit(1);
+	}
+	if (fileinfo.st_size != offset) {
+		lseek64(ofd, offset-1, SEEK_CUR);
+		buf[0] = 0;
+		write(ofd, buf, 1);
+	}
+	close(fd);
+	close(ofd);
+}
+
+static void usage(const char *progname)
+{
+	fprintf(stderr, "Usage: %s [-v] source_file destination_file\n", progname);
+	exit(1);
+}
+
+int main(int argc, char**argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "v")) != EOF)
+		switch (c) {
+		case 'v':
+			verbose++;
+			break;
+		default:
+			usage(argv[0]);
+			break;
+		}
+	if (optind+2 != argc)
+		usage(argv[0]);
+	copy_sparse_file(argv[optind], argv[optind+1]);
+
+	return 0;
+}
+#endif
diff --git a/util/gen-android-files b/util/gen-android-files
new file mode 100755
index 0000000..cab4e8d
--- /dev/null
+++ b/util/gen-android-files
@@ -0,0 +1,118 @@
+#!/bin/sh
+
+ANDROID_GENERATED_FILES="lib/ext2fs/ext2_err.c lib/ext2fs/ext2_err.h \
+	lib/ss/ss_err.c lib/ss/ss_err.h lib/support/prof_err.c \
+	lib/support/prof_err.h \
+	lib/blkid/blkid_types.h lib/uuid/uuid_types.h \
+	lib/ext2fs/ext2_types.h lib/config.h lib/blkid/blkid.h \
+	lib/uuid/uuid.h lib/ext2fs/crc32c_table.h misc/default_profile.c \
+	lib/ss/std_rqs.c debugfs/debug_cmds.c debugfs/ro_debug_cmds.c \
+	debugfs/extent_cmds.c debugfs/e2freefrag.c \
+	debugfs/recovery.c debugfs/revoke.c \
+	MODULE_LICENSE_GPL README.version"
+
+SS_DIR=$(pwd)/lib/ss
+MK_CMDS=/tmp/mk_cmds$$.sh
+
+sed -e "s/@AWK@/awk/" < $SS_DIR/mk_cmds.sh.in \
+    | sed -e "s/@SED@/sed/" > $MK_CMDS
+
+sed -e "s/@E2FSPROGS_VERSION@/$(git describe)/" < lib/ext2fs/ext2_err.et.in > lib/ext2fs/ext2_err.et
+
+for i in lib/ss/ss_err lib/support/prof_err lib/ext2fs/ext2_err
+do
+   rm -f $i.c $i.h
+   awk -f lib/et/et_c.awk outfile=$i.c outfn=$(basename $i.c) $i.et
+   awk -f lib/et/et_h.awk outfile=$i.h outfn=$(basename $i.h) $i.et
+done
+
+for i in lib/ss/std_rqs debugfs/debug_cmds debugfs/ro_debug_cmds \
+			debugfs/extent_cmds
+do
+    _SS_DIR_OVERRIDE=lib/ss /bin/sh $MK_CMDS $i.ct
+    mv -f $(basename $i).c $i.c
+done
+
+rm -f $MK_CMDS
+
+cp lib/blkid/blkid.h.in lib/blkid/blkid.h
+cp lib/uuid/uuid.h.in lib/uuid/uuid.h
+
+cp util/android_types.h lib/ext2fs/ext2_types.h
+cp util/android_types.h lib/blkid/blkid_types.h
+cp util/android_types.h lib/uuid/uuid_types.h
+# Copied header files having exactly same content results in debug output
+# differences on RBE. Hence modify the #define's appropriately.
+sed -i 's/#define _LINUX_TYPES_H/#define _BLKID_TYPES_H/g' lib/blkid/blkid_types.h
+sed -i 's/#define _LINUX_TYPES_H/#define _EXT2_TYPES_H/g' lib/ext2fs/ext2_types.h
+sed -i 's/#define _LINUX_TYPES_H/#define _UUID_TYPES_H/g' lib/uuid/uuid_types.h
+
+cp util/android_config.h lib/config.h
+cp misc/e2freefrag.c debugfs/
+cp e2fsck/recovery.c e2fsck/revoke.c debugfs/
+
+gcc -o gen_crc32ctable lib/ext2fs/gen_crc32ctable.c
+./gen_crc32ctable > lib/ext2fs/crc32c_table.h
+
+awk -f misc/profile-to-c.awk < misc/mke2fs.conf.in > misc/default_profile.c
+
+rm -f ./gen_crc32table ./gen_crc32ctable lib/ext2fs/ext2_err.et
+
+touch MODULE_LICENSE_GPL
+
+E2FSPROGS_VERSION=`grep E2FSPROGS_VERSION version.h  \
+	| awk '{print $3}' | tr \" " " | awk '{print $1}'`
+DATE=`grep E2FSPROGS_DATE version.h | awk '{print $3}' \
+	| tr \" " "`
+E2FSPROGS_DAY=$(echo $DATE | awk -F- '{print $1}' | sed -e '/^[1-9]$/s/^/0/')
+MONTH=`echo $DATE | awk -F- '{print $2}'`
+YEAR=`echo $DATE | awk -F- '{print $3}'`
+
+if expr $YEAR ">" 1900 > /dev/null ; then
+	E2FSPROGS_YEAR=$YEAR
+elif expr $YEAR ">" 90 >/dev/null ; then
+	E2FSPROGS_YEAR=19$YEAR
+else
+	E2FSPROGS_YEAR=20$YEAR
+fi
+
+case $MONTH in
+Jan)	MONTH_NUM=01; E2FSPROGS_MONTH="January" ;;
+Feb)	MONTH_NUM=02; E2FSPROGS_MONTH="February" ;;
+Mar)	MONTH_NUM=03; E2FSPROGS_MONTH="March" ;;
+Apr)	MONTH_NUM=04; E2FSPROGS_MONTH="April" ;;
+May)	MONTH_NUM=05; E2FSPROGS_MONTH="May" ;;
+Jun)	MONTH_NUM=06; E2FSPROGS_MONTH="June" ;;
+Jul)	MONTH_NUM=07; E2FSPROGS_MONTH="July" ;;
+Aug)	MONTH_NUM=08; E2FSPROGS_MONTH="August" ;;
+Sep)	MONTH_NUM=09; E2FSPROGS_MONTH="September" ;;
+Oct)	MONTH_NUM=10; E2FSPROGS_MONTH="October" ;;
+Nov)	MONTH_NUM=11; E2FSPROGS_MONTH="November" ;;
+Dec)	MONTH_NUM=12; E2FSPROGS_MONTH="December" ;;
+*)	MONTH_NUM=13; E2FSPROGS_MONTH="UNKNOWN" ;;
+esac
+
+base_ver=`echo $E2FSPROGS_VERSION | \
+	       sed -e 's/-WIP//' -e 's/pre-//' -e 's/-PLUS//'`
+
+date_spec=${E2FSPROGS_YEAR}${MONTH_NUM}${E2FSPROGS_DAY}
+
+case $E2FSPROGS_VERSION in
+*-WIP|pre-*)
+	VER="$base_ver-WIP-$date_spec"
+	FN="$base_ver~WIP.$E2FSPROGS_YEAR.$MONTH_NUM.$E2FSPROGS_DAY"
+	;;
+*)
+	VER="$base_ver"
+	FN="$base_ver"
+	;;
+esac
+
+sed -e "s/@VER@/$VER/g" -e "s/@FN@/$FN/" < util/android-README.version.in > README.version
+
+git add -f $ANDROID_GENERATED_FILES
+if test -f COPYING
+then
+   git mv COPYING NOTICE
+fi
+git commit -m "Update generated files for Android"
diff --git a/util/gen-git-tarball b/util/gen-git-tarball
new file mode 100755
index 0000000..a959c4a
--- /dev/null
+++ b/util/gen-git-tarball
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Generate the e2fsprogs release tar ball
+#
+
+commit=HEAD
+
+if test -n "$1" ; then
+    commit="$1"
+fi
+
+ver=`git show ${commit}:version.h | grep E2FSPROGS_VERSION  \
+	| awk '{print $3}' | tr \" " " | awk '{print $1}'`
+fn=e2fsprogs-${ver}.tar.gz
+
+git archive --prefix=e2fsprogs-${ver}/ ${commit} | gzip -9n > $fn
+echo "Generated $fn"
diff --git a/util/gen-sample-fs b/util/gen-sample-fs
new file mode 100755
index 0000000..8e13916
--- /dev/null
+++ b/util/gen-sample-fs
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+MNT=/mnt
+FS=/tmp/foo.img
+
+cp /dev/null $FS
+mke2fs -q -t ext4 -O inline_data,^has_journal -I 256 -b 4096 -N 64 $FS 256
+mount -t ext4 $FS $MNT
+ln -s symlink_data $MNT/symlink
+for i in 30 70 500 1023 1024; do
+	ln -s /$(perl -e "print 'x' x $i;") $MNT/l_$i
+done
+touch $MNT/acl
+setfacl -m u:daemon:r $MNT/acl
+setfacl -m u:bin:rx $MNT/acl
+setfacl -m g:mail:rw $MNT/acl
+setfacl -m g:daemon:r $MNT/acl
+touch $MNT/simple_acl
+setfacl -m u:daemon:r $MNT/simple_acl
+touch $MNT/xattr
+attr -q -s foo -V bar $MNT/xattr
+echo -e "one\n\ttwo" | attr -q -s quux $MNT/xattr
+echo -e "abc\001\002\003" | attr -q -s def $MNT/xattr
+echo file_data > $MNT/small_inline
+a="I am a very model of a modern major general;"
+a="$a I've information vegetable, animal and mineral"
+echo $a > $MNT/big_inline
+mkdir $MNT/sdir
+touch $MNT/sdir/1
+touch $MNT/sdir/2
+touch $MNT/sdir/3
+touch $MNT/sdir/4
+mkdir $MNT/mdir
+touch $MNT/mdir/1
+touch $MNT/mdir/2
+touch $MNT/mdir/3
+touch $MNT/mdir/4
+touch $MNT/mdir/5
+umount $MNT
+e2fsck -fp $FS
diff --git a/util/gen-tarball.in b/util/gen-tarball.in
new file mode 100644
index 0000000..997bd93
--- /dev/null
+++ b/util/gen-tarball.in
@@ -0,0 +1,50 @@
+#!/bin/sh
+#
+# This script is used to generate the distribution tarball
+#
+srcdir=@srcdir@
+top_srcdir=@top_srcdir@
+top_dir=`cd $top_srcdir; pwd`
+base_ver=`echo @E2FSPROGS_VERSION@ | sed -e 's/-WIP//' -e 's/pre-//' -e 's/-PLUS//'`
+base_e2fsprogs=`basename $top_dir`
+exclude=/tmp/exclude$$
+GZIP=gzip
+
+#
+# This hack is needed because texi2dvi blows up horribly if there are 
+# any '~' characters in the directory pathname.  So we kludge around it by
+# using a non-standard directory name for WIP releases.  dpkg-source
+# complains, but life goes on.
+#
+deb_pkgver=`echo @E2FSPROGS_PKGVER@ | sed -e 's/~/-/g'`
+    
+case $1 in
+    debian|ubuntu)
+	SRCROOT="e2fsprogs-$deb_pkgver"
+	tarout="e2fsprogs_@E2FSPROGS_PKGVER@.orig.tar.gz"
+	;;
+   all|*)
+	SRCROOT="e2fsprogs-$base_ver"
+	tarout="$SRCROOT.tar.gz"
+	;;
+esac
+
+if test -z "$SOURCE_DATE_EPOCH" ; then
+    export SOURCE_DATE_EPOCH=$(cd $top_srcdir; git log -1 --pretty=%ct)
+fi
+
+(cd $top_srcdir/.. ; find $base_e2fsprogs \( -name \*~ -o -name \*.orig \
+		-o -name CVS -o -name \*.rej -o -name Makefile.pq \
+		-o -name TAGS -o -name \*.old -o -name SCCS \
+		-o -name changed-files -o -name .#\* -o -name \*.tar.gz \
+		-o -name autom4te.cache \) \
+		-print) > $exclude
+sed -e "s;^;$base_e2fsprogs/;" < $srcdir/all.exclude >> $exclude
+
+(cd $top_srcdir/.. ; \
+ tar -c -f - -X $exclude --sort=name --owner=0 --group=0 \
+     --transform "flags=r;s|^$base_e2fsprogs|$SRCROOT|" \
+     --numeric-owner --mtime="@${SOURCE_DATE_EPOCH}" $base_e2fsprogs) \
+    | $GZIP -9n -c > $tarout
+$GZIP -ln $tarout
+rm -f "$exclude"
diff --git a/util/get-ver b/util/get-ver
new file mode 100755
index 0000000..ade7d22
--- /dev/null
+++ b/util/get-ver
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+ver=$(git describe --always --dirty); echo "e2fsprogs	$ver ($(git log -1 --pretty=%cD))"
+
diff --git a/util/install-symlink.in b/util/install-symlink.in
new file mode 100644
index 0000000..24341b8
--- /dev/null
+++ b/util/install-symlink.in
@@ -0,0 +1,89 @@
+#!/bin/sh
+#
+# install-symlink source destination destdir
+#
+
+SYMLINKS=symlinks
+LN_S="@LN_S@"
+RM="@RM@"
+FORCE_RELATIVE=NO
+FORCE_ABSOLUTE=NO
+
+while echo $1 | grep -q -- ^- ;
+do
+	case $1 in
+		--relative)
+			FORCE_RELATIVE=YES
+			;;
+		--absolute)
+			FORCE_ABSOLUTE=YES
+			;;
+		--debian)
+			FORCE_ABSOLUTE=NO
+			FORCE_RELATIVE=NO
+			;;
+		--symlinks=*)
+			SYMLINKS=$(echo $1 | sed -e 's/--symlinks=//')
+			;;
+		*)
+			echo "Unknown option $1"
+			exit 1
+			;;
+	esac					     
+	shift;
+done
+
+
+FIX_SYMLINK="$SYMLINKS -c"
+
+SRC="$1"
+DEST="$2"
+DESTDIR="$3"
+
+if ! echo $SRC | grep -q ^/ ; then
+	echo $SRC: Source pathname must be absolute
+	exit 1
+fi
+
+if ! echo $DEST | grep -q ^/ ; then
+	echo $DEST: Destination pathname must be absolute
+	exit 1
+fi
+
+if ! test -e "$DESTDIR$SRC" ; then
+	echo $DESTDIR$SRC: file or directory does not exist
+	exit 1
+fi
+
+$RM -f "$DESTDIR$DEST"
+
+if test "$LN_S" != "ln -s" ; then
+	$LN_S "$DESTDIR$SRC" "$DESTDIR$DEST"
+	exit 0
+fi
+
+if test $(dirname "$SRC") = $(dirname "$DEST") ; then
+	$LN_S "$(basename "$SRC")" "$DESTDIR$DEST"
+	exit 0
+fi
+
+TOP_SRC=$(echo $SRC | awk -F/ '{print $2}')
+TOP_DEST=$(echo $DEST | awk -F/ '{print $2}')
+
+if test $FORCE_RELATIVE = YES ; then
+	TOP_SRC=FORCE
+	TOP_DEST=FORCE
+fi
+
+if test $FORCE_ABSOLUTE = YES ; then
+	TOP_SRC=FORCE
+	TOP_DEST=FORCE_ABSOLUTE
+fi
+
+if test $TOP_SRC != $TOP_DEST ; then
+	$LN_S "$SRC" "$DESTDIR$DEST"
+else
+	$LN_S "$DESTDIR$SRC" "$DESTDIR$DEST"
+	$FIX_SYMLINK "$DESTDIR$DEST"
+fi
+
diff --git a/util/libecho.c b/util/libecho.c
new file mode 100644
index 0000000..352ce1e
--- /dev/null
+++ b/util/libecho.c
@@ -0,0 +1,78 @@
+/*
+ * libecho.c
+ *
+ * For each argument on the command line, echo it.  Should expand
+ * DOS wildcards correctly.
+ *
+ * Syntax: libecho [-p prefix] list...
+ */
+#include <stdio.h>
+#include <io.h>
+#include <string.h>
+
+void echo_files(char *, char *);
+
+int
+main(int argc, char *argv[])
+{
+  int i;
+  char *prefix;
+
+  prefix = "";
+
+  if (argc < 2) {
+    fprintf(stderr, "Usage:  libecho [-p prefix] list...\n");
+    return 1;
+  }
+
+  for (i = 1 ; i < argc ; i++)
+    if (!stricmp(argv[i], "-p"))
+      prefix = argv[++i];
+    else
+      echo_files(prefix, argv[i]);
+
+  return 0;
+}
+
+void
+echo_files(char *prefix, char *f)
+{
+  long ff;
+  struct _finddata_t fdt;
+  char *slash;
+  char filepath[256];
+
+  /*
+   * We're unix based quite a bit here.  Look for normal slashes and
+   * make them reverse slashes.
+   */
+  while((slash = strrchr(f, '/')) != NULL)
+    *slash = '\\';
+
+  strcpy(filepath, f);
+
+  slash = strrchr(filepath, '\\');
+
+  if (slash) {
+    slash++;
+    *slash = 0;
+  } else {
+    filepath[0] = '\0';
+  }
+
+  ff = _findfirst(f, &fdt);
+
+  if (ff < 0) {
+    printf("%s%s\n", prefix, f);
+    return;
+  }
+
+  printf("%s%s%s\n", prefix, filepath, fdt.name);
+
+  for (;;) {
+    if (_findnext(ff, &fdt) < 0)
+      break;
+    printf("%s%s%s\n", prefix, filepath, fdt.name);
+  }
+  _findclose(ff);
+}
diff --git a/util/mkutf8data.c b/util/mkutf8data.c
new file mode 100644
index 0000000..2af25ac
--- /dev/null
+++ b/util/mkutf8data.c
@@ -0,0 +1,3392 @@
+/*
+ * Copyright (c) 2014 SGI.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Generator for a compact trie for unicode normalization */
+
+#include <sys/types.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+/* Default names of the in- and output files. */
+
+#define AGE_NAME	"DerivedAge.txt"
+#define CCC_NAME	"DerivedCombiningClass.txt"
+#define PROP_NAME	"DerivedCoreProperties.txt"
+#define DATA_NAME	"UnicodeData.txt"
+#define FOLD_NAME	"CaseFolding.txt"
+#define NORM_NAME	"NormalizationCorrections.txt"
+#define TEST_NAME	"NormalizationTest.txt"
+#define UTF8_NAME	"utf8data.h"
+
+const char	*age_name  = AGE_NAME;
+const char	*ccc_name  = CCC_NAME;
+const char	*prop_name = PROP_NAME;
+const char	*data_name = DATA_NAME;
+const char	*fold_name = FOLD_NAME;
+const char	*norm_name = NORM_NAME;
+const char	*test_name = TEST_NAME;
+const char	*utf8_name = UTF8_NAME;
+
+int verbose = 0;
+
+/* An arbitrary line size limit on input lines. */
+
+#define LINESIZE	1024
+char line[LINESIZE];
+char buf0[LINESIZE];
+char buf1[LINESIZE];
+char buf2[LINESIZE];
+char buf3[LINESIZE];
+
+const char *argv0;
+
+/* ------------------------------------------------------------------ */
+
+/*
+ * Unicode version numbers consist of three parts: major, minor, and a
+ * revision.  These numbers are packed into an unsigned int to obtain
+ * a single version number.
+ *
+ * To save space in the generated trie, the unicode version is not
+ * stored directly, instead we calculate a generation number from the
+ * unicode versions seen in the DerivedAge file, and use that as an
+ * index into a table of unicode versions.
+ */
+#define UNICODE_MAJ_SHIFT		(16)
+#define UNICODE_MIN_SHIFT		(8)
+
+#define UNICODE_MAJ_MAX			((unsigned short)-1)
+#define UNICODE_MIN_MAX			((unsigned char)-1)
+#define UNICODE_REV_MAX			((unsigned char)-1)
+
+#define UNICODE_AGE(MAJ,MIN,REV)			\
+	(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) |	\
+	 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) |	\
+	 ((unsigned int)(REV)))
+
+unsigned int *ages;
+int ages_count;
+
+unsigned int unicode_maxage;
+
+static int age_valid(unsigned int major, unsigned int minor,
+		     unsigned int revision)
+{
+	if (major > UNICODE_MAJ_MAX)
+		return 0;
+	if (minor > UNICODE_MIN_MAX)
+		return 0;
+	if (revision > UNICODE_REV_MAX)
+		return 0;
+	return 1;
+}
+
+/* ------------------------------------------------------------------ */
+
+/*
+ * utf8trie_t
+ *
+ * A compact binary tree, used to decode UTF-8 characters.
+ *
+ * Internal nodes are one byte for the node itself, and up to three
+ * bytes for an offset into the tree.  The first byte contains the
+ * following information:
+ *  NEXTBYTE  - flag        - advance to next byte if set
+ *  BITNUM    - 3 bit field - the bit number to tested
+ *  OFFLEN    - 2 bit field - number of bytes in the offset
+ * if offlen == 0 (non-branching node)
+ *  RIGHTPATH - 1 bit field - set if the following node is for the
+ *                            right-hand path (tested bit is set)
+ *  TRIENODE  - 1 bit field - set if the following node is an internal
+ *                            node, otherwise it is a leaf node
+ * if offlen != 0 (branching node)
+ *  LEFTNODE  - 1 bit field - set if the left-hand node is internal
+ *  RIGHTNODE - 1 bit field - set if the right-hand node is internal
+ *
+ * Due to the way utf8 works, there cannot be branching nodes with
+ * NEXTBYTE set, and moreover those nodes always have a righthand
+ * descendant.
+ */
+typedef unsigned char utf8trie_t;
+#define BITNUM		0x07
+#define NEXTBYTE	0x08
+#define OFFLEN		0x30
+#define OFFLEN_SHIFT	4
+#define RIGHTPATH	0x40
+#define TRIENODE	0x80
+#define RIGHTNODE	0x40
+#define LEFTNODE	0x80
+
+/*
+ * utf8leaf_t
+ *
+ * The leaves of the trie are embedded in the trie, and so the same
+ * underlying datatype, unsigned char.
+ *
+ * leaf[0]: The unicode version, stored as a generation number that is
+ *          an index into utf8agetab[].  With this we can filter code
+ *          points based on the unicode version in which they were
+ *          defined.  The CCC of a non-defined code point is 0.
+ * leaf[1]: Canonical Combining Class. During normalization, we need
+ *          to do a stable sort into ascending order of all characters
+ *          with a non-zero CCC that occur between two characters with
+ *          a CCC of 0, or at the begin or end of a string.
+ *          The unicode standard guarantees that all CCC values are
+ *          between 0 and 254 inclusive, which leaves 255 available as
+ *          a special value.
+ *          Code points with CCC 0 are known as stoppers.
+ * leaf[2]: Decomposition. If leaf[1] == 255, then leaf[2] is the
+ *          start of a NUL-terminated string that is the decomposition
+ *          of the character.
+ *          The CCC of a decomposable character is the same as the CCC
+ *          of the first character of its decomposition.
+ *          Some characters decompose as the empty string: these are
+ *          characters with the Default_Ignorable_Code_Point property.
+ *          These do affect normalization, as they all have CCC 0.
+ *
+ * The decompositions in the trie have been fully expanded.
+ *
+ * Casefolding, if applicable, is also done using decompositions.
+ */
+typedef unsigned char utf8leaf_t;
+
+#define LEAF_GEN(LEAF)	((LEAF)[0])
+#define LEAF_CCC(LEAF)	((LEAF)[1])
+#define LEAF_STR(LEAF)	((const char*)((LEAF) + 2))
+
+#define MAXGEN		(255)
+
+#define MINCCC		(0)
+#define MAXCCC		(254)
+#define STOPPER		(0)
+#define DECOMPOSE	(255)
+#define HANGUL		((char)(255))
+
+#define UTF8HANGULLEAF	(12)
+
+struct tree;
+static utf8leaf_t *utf8nlookup(struct tree *, unsigned char *,
+			       const char *, size_t);
+static utf8leaf_t *utf8lookup(struct tree *, unsigned char *, const char *);
+
+unsigned char *utf8data;
+size_t utf8data_size;
+
+utf8trie_t *nfkdi;
+utf8trie_t *nfkdicf;
+
+/* ------------------------------------------------------------------ */
+
+/*
+ * UTF8 valid ranges.
+ *
+ * The UTF-8 encoding spreads the bits of a 32bit word over several
+ * bytes. This table gives the ranges that can be held and how they'd
+ * be represented.
+ *
+ * 0x00000000 0x0000007F: 0xxxxxxx
+ * 0x00000000 0x000007FF: 110xxxxx 10xxxxxx
+ * 0x00000000 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
+ * 0x00000000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 0x00000000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 0x00000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * There is an additional requirement on UTF-8, in that only the
+ * shortest representation of a 32bit value is to be used.  A decoder
+ * must not decode sequences that do not satisfy this requirement.
+ * Thus the allowed ranges have a lower bound.
+ *
+ * 0x00000000 0x0000007F: 0xxxxxxx
+ * 0x00000080 0x000007FF: 110xxxxx 10xxxxxx
+ * 0x00000800 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
+ * 0x00010000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 0x00200000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 0x04000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * Actual unicode characters are limited to the range 0x0 - 0x10FFFF,
+ * 17 planes of 65536 values.  This limits the sequences actually seen
+ * even more, to just the following.
+ *
+ *          0 -     0x7f: 0                     0x7f
+ *       0x80 -    0x7ff: 0xc2 0x80             0xdf 0xbf
+ *      0x800 -   0xffff: 0xe0 0xa0 0x80        0xef 0xbf 0xbf
+ *    0x10000 - 0x10ffff: 0xf0 0x90 0x80 0x80   0xf4 0x8f 0xbf 0xbf
+ *
+ * Even within those ranges not all values are allowed: the surrogates
+ * 0xd800 - 0xdfff should never be seen.
+ *
+ * Note that the longest sequence seen with valid usage is 4 bytes,
+ * the same a single UTF-32 character.  This makes the UTF-8
+ * representation of Unicode strictly smaller than UTF-32.
+ *
+ * The shortest sequence requirement was introduced by:
+ *    Corrigendum #1: UTF-8 Shortest Form
+ * It can be found here:
+ *    http://www.unicode.org/versions/corrigendum1.html
+ *
+ */
+
+#define UTF8_2_BITS     0xC0
+#define UTF8_3_BITS     0xE0
+#define UTF8_4_BITS     0xF0
+#define UTF8_N_BITS     0x80
+#define UTF8_2_MASK     0xE0
+#define UTF8_3_MASK     0xF0
+#define UTF8_4_MASK     0xF8
+#define UTF8_N_MASK     0xC0
+#define UTF8_V_MASK     0x3F
+#define UTF8_V_SHIFT    6
+
+static int utf8encode(char *str, unsigned int val)
+{
+	int len;
+
+	if (val < 0x80) {
+		str[0] = val;
+		len = 1;
+	} else if (val < 0x800) {
+		str[1] = val & UTF8_V_MASK;
+		str[1] |= UTF8_N_BITS;
+		val >>= UTF8_V_SHIFT;
+		str[0] = val;
+		str[0] |= UTF8_2_BITS;
+		len = 2;
+	} else if (val < 0x10000) {
+		str[2] = val & UTF8_V_MASK;
+		str[2] |= UTF8_N_BITS;
+		val >>= UTF8_V_SHIFT;
+		str[1] = val & UTF8_V_MASK;
+		str[1] |= UTF8_N_BITS;
+		val >>= UTF8_V_SHIFT;
+		str[0] = val;
+		str[0] |= UTF8_3_BITS;
+		len = 3;
+	} else if (val < 0x110000) {
+		str[3] = val & UTF8_V_MASK;
+		str[3] |= UTF8_N_BITS;
+		val >>= UTF8_V_SHIFT;
+		str[2] = val & UTF8_V_MASK;
+		str[2] |= UTF8_N_BITS;
+		val >>= UTF8_V_SHIFT;
+		str[1] = val & UTF8_V_MASK;
+		str[1] |= UTF8_N_BITS;
+		val >>= UTF8_V_SHIFT;
+		str[0] = val;
+		str[0] |= UTF8_4_BITS;
+		len = 4;
+	} else {
+		printf("%#x: illegal val\n", val);
+		len = 0;
+	}
+	return len;
+}
+
+static unsigned int utf8decode(const char *str)
+{
+	const unsigned char *s = (const unsigned char*)str;
+	unsigned int unichar = 0;
+
+	if (*s < 0x80) {
+		unichar = *s;
+	} else if (*s < UTF8_3_BITS) {
+		unichar = *s++ & 0x1F;
+		unichar <<= UTF8_V_SHIFT;
+		unichar |= *s & 0x3F;
+	} else if (*s < UTF8_4_BITS) {
+		unichar = *s++ & 0x0F;
+		unichar <<= UTF8_V_SHIFT;
+		unichar |= *s++ & 0x3F;
+		unichar <<= UTF8_V_SHIFT;
+		unichar |= *s & 0x3F;
+	} else {
+		unichar = *s++ & 0x0F;
+		unichar <<= UTF8_V_SHIFT;
+		unichar |= *s++ & 0x3F;
+		unichar <<= UTF8_V_SHIFT;
+		unichar |= *s++ & 0x3F;
+		unichar <<= UTF8_V_SHIFT;
+		unichar |= *s & 0x3F;
+	}
+	return unichar;
+}
+
+static int utf32valid(unsigned int unichar)
+{
+	return unichar < 0x110000;
+}
+
+#define HANGUL_SYLLABLE(U)	((U) >= 0xAC00 && (U) <= 0xD7A3)
+
+#define NODE 1
+#define LEAF 0
+
+struct tree {
+	void *root;
+	int childnode;
+	const char *type;
+	unsigned int maxage;
+	struct tree *next;
+	int (*leaf_equal)(void *, void *);
+	void (*leaf_print)(void *, int);
+	int (*leaf_mark)(void *);
+	int (*leaf_size)(void *);
+	int *(*leaf_index)(struct tree *, void *);
+	unsigned char *(*leaf_emit)(void *, unsigned char *);
+	int leafindex[0x110000];
+	int index;
+};
+
+struct node {
+	int index;
+	int offset;
+	int mark;
+	int size;
+	struct node *parent;
+	void *left;
+	void *right;
+	unsigned char bitnum;
+	unsigned char nextbyte;
+	unsigned char leftnode;
+	unsigned char rightnode;
+	unsigned int keybits;
+	unsigned int keymask;
+};
+
+/*
+ * Example lookup function for a tree.
+ */
+static void *lookup(struct tree *tree, const char *key)
+{
+	struct node *node;
+	void *leaf = NULL;
+
+	node = tree->root;
+	while (!leaf && node) {
+		if (node->nextbyte)
+			key++;
+		if (*key & (1 << (node->bitnum & 7))) {
+			/* Right leg */
+			if (node->rightnode == NODE) {
+				node = node->right;
+			} else if (node->rightnode == LEAF) {
+				leaf = node->right;
+			} else {
+				node = NULL;
+			}
+		} else {
+			/* Left leg */
+			if (node->leftnode == NODE) {
+				node = node->left;
+			} else if (node->leftnode == LEAF) {
+				leaf = node->left;
+			} else {
+				node = NULL;
+			}
+		}
+	}
+
+	return leaf;
+}
+
+/*
+ * A simple non-recursive tree walker: keep track of visits to the
+ * left and right branches in the leftmask and rightmask.
+ */
+static void tree_walk(struct tree *tree)
+{
+	struct node *node;
+	unsigned int leftmask;
+	unsigned int rightmask;
+	unsigned int bitmask;
+	int indent = 1;
+	int nodes, singletons, leaves;
+
+	nodes = singletons = leaves = 0;
+
+	printf("%s_%x root %p\n", tree->type, tree->maxage, tree->root);
+	if (tree->childnode == LEAF) {
+		assert(tree->root);
+		tree->leaf_print(tree->root, indent);
+		leaves = 1;
+	} else {
+		assert(tree->childnode == NODE);
+		node = tree->root;
+		leftmask = rightmask = 0;
+		while (node) {
+			printf("%*snode @ %p bitnum %d nextbyte %d"
+			       " left %p right %p mask %x bits %x\n",
+				indent, "", node,
+				node->bitnum, node->nextbyte,
+				node->left, node->right,
+				node->keymask, node->keybits);
+			nodes += 1;
+			if (!(node->left && node->right))
+				singletons += 1;
+
+			while (node) {
+				bitmask = 1 << node->bitnum;
+				if ((leftmask & bitmask) == 0) {
+					leftmask |= bitmask;
+					if (node->leftnode == LEAF) {
+						assert(node->left);
+						tree->leaf_print(node->left,
+								 indent+1);
+						leaves += 1;
+					} else if (node->left) {
+						assert(node->leftnode == NODE);
+						indent += 1;
+						node = node->left;
+						break;
+					}
+				}
+				if ((rightmask & bitmask) == 0) {
+					rightmask |= bitmask;
+					if (node->rightnode == LEAF) {
+						assert(node->right);
+						tree->leaf_print(node->right,
+								 indent+1);
+						leaves += 1;
+					} else if (node->right) {
+						assert(node->rightnode == NODE);
+						indent += 1;
+						node = node->right;
+						break;
+					}
+				}
+				leftmask &= ~bitmask;
+				rightmask &= ~bitmask;
+				node = node->parent;
+				indent -= 1;
+			}
+		}
+	}
+	printf("nodes %d leaves %d singletons %d\n",
+	       nodes, leaves, singletons);
+}
+
+/*
+ * Allocate an initialize a new internal node.
+ */
+static struct node *alloc_node(struct node *parent)
+{
+	struct node *node;
+	int bitnum;
+
+	node = malloc(sizeof(*node));
+	node->left = node->right = NULL;
+	node->parent = parent;
+	node->leftnode = NODE;
+	node->rightnode = NODE;
+	node->keybits = 0;
+	node->keymask = 0;
+	node->mark = 0;
+	node->index = 0;
+	node->offset = -1;
+	node->size = 4;
+
+	if (node->parent) {
+		bitnum = parent->bitnum;
+		if ((bitnum & 7) == 0) {
+			node->bitnum = bitnum + 7 + 8;
+			node->nextbyte = 1;
+		} else {
+			node->bitnum = bitnum - 1;
+			node->nextbyte = 0;
+		}
+	} else {
+		node->bitnum = 7;
+		node->nextbyte = 0;
+	}
+
+	return node;
+}
+
+/*
+ * Insert a new leaf into the tree, and collapse any subtrees that are
+ * fully populated and end in identical leaves. A nextbyte tagged
+ * internal node will not be removed to preserve the tree's integrity.
+ * Note that due to the structure of utf8, no nextbyte tagged node
+ * will be a candidate for removal.
+ */
+static int insert(struct tree *tree, char *key, int keylen, void *leaf)
+{
+	struct node *node;
+	struct node *parent;
+	void **cursor;
+	int keybits;
+
+	assert(keylen >= 1 && keylen <= 4);
+
+	node = NULL;
+	cursor = &tree->root;
+	keybits = 8 * keylen;
+
+	/* Insert, creating path along the way. */
+	while (keybits) {
+		if (!*cursor)
+			*cursor = alloc_node(node);
+		node = *cursor;
+		if (node->nextbyte)
+			key++;
+		if (*key & (1 << (node->bitnum & 7)))
+			cursor = &node->right;
+		else
+			cursor = &node->left;
+		keybits--;
+	}
+	*cursor = leaf;
+
+	/* Merge subtrees if possible. */
+	while (node) {
+		if (*key & (1 << (node->bitnum & 7)))
+			node->rightnode = LEAF;
+		else
+			node->leftnode = LEAF;
+		if (node->nextbyte)
+			break;
+		if (node->leftnode == NODE || node->rightnode == NODE)
+			break;
+		assert(node->left);
+		assert(node->right);
+		/* Compare */
+		if (! tree->leaf_equal(node->left, node->right))
+			break;
+		/* Keep left, drop right leaf. */
+		leaf = node->left;
+		/* Check in parent */
+		parent = node->parent;
+		if (!parent) {
+			/* root of tree! */
+			tree->root = leaf;
+			tree->childnode = LEAF;
+		} else if (parent->left == node) {
+			parent->left = leaf;
+			parent->leftnode = LEAF;
+			if (parent->right) {
+				parent->keymask = 0;
+				parent->keybits = 0;
+			} else {
+				parent->keymask |= (1 << node->bitnum);
+			}
+		} else if (parent->right == node) {
+			parent->right = leaf;
+			parent->rightnode = LEAF;
+			if (parent->left) {
+				parent->keymask = 0;
+				parent->keybits = 0;
+			} else {
+				parent->keymask |= (1 << node->bitnum);
+				parent->keybits |= (1 << node->bitnum);
+			}
+		} else {
+			/* internal tree error */
+			assert(0);
+		}
+		free(node);
+		node = parent;
+	}
+
+	/* Propagate keymasks up along singleton chains. */
+	while (node) {
+		parent = node->parent;
+		if (!parent)
+			break;
+		/* Nix the mask for parents with two children. */
+		if (node->keymask == 0) {
+			parent->keymask = 0;
+			parent->keybits = 0;
+		} else if (parent->left && parent->right) {
+			parent->keymask = 0;
+			parent->keybits = 0;
+		} else {
+			assert((parent->keymask & node->keymask) == 0);
+			parent->keymask |= node->keymask;
+			parent->keymask |= (1 << parent->bitnum);
+			parent->keybits |= node->keybits;
+			if (parent->right)
+				parent->keybits |= (1 << parent->bitnum);
+		}
+		node = parent;
+	}
+
+	return 0;
+}
+
+/*
+ * Prune internal nodes.
+ *
+ * Fully populated subtrees that end at the same leaf have already
+ * been collapsed.  There are still internal nodes that have for both
+ * their left and right branches a sequence of singletons that make
+ * identical choices and end in identical leaves.  The keymask and
+ * keybits collected in the nodes describe the choices made in these
+ * singleton chains.  When they are identical for the left and right
+ * branch of a node, and the two leaves comare identical, the node in
+ * question can be removed.
+ *
+ * Note that nodes with the nextbyte tag set will not be removed by
+ * this to ensure tree integrity.  Note as well that the structure of
+ * utf8 ensures that these nodes would not have been candidates for
+ * removal in any case.
+ */
+static void prune(struct tree *tree)
+{
+	struct node *node;
+	struct node *left;
+	struct node *right;
+	struct node *parent;
+	void *leftleaf;
+	void *rightleaf;
+	unsigned int leftmask;
+	unsigned int rightmask;
+	unsigned int bitmask;
+	int count;
+
+	if (verbose > 0)
+		printf("Pruning %s_%x\n", tree->type, tree->maxage);
+
+	count = 0;
+	if (tree->childnode == LEAF)
+		return;
+	if (!tree->root)
+		return;
+
+	leftmask = rightmask = 0;
+	node = tree->root;
+	while (node) {
+		if (node->nextbyte)
+			goto advance;
+		if (node->leftnode == LEAF)
+			goto advance;
+		if (node->rightnode == LEAF)
+			goto advance;
+		if (!node->left)
+			goto advance;
+		if (!node->right)
+			goto advance;
+		left = node->left;
+		right = node->right;
+		if (left->keymask == 0)
+			goto advance;
+		if (right->keymask == 0)
+			goto advance;
+		if (left->keymask != right->keymask)
+			goto advance;
+		if (left->keybits != right->keybits)
+			goto advance;
+		leftleaf = NULL;
+		while (!leftleaf) {
+			assert(left->left || left->right);
+			if (left->leftnode == LEAF)
+				leftleaf = left->left;
+			else if (left->rightnode == LEAF)
+				leftleaf = left->right;
+			else if (left->left)
+				left = left->left;
+			else if (left->right)
+				left = left->right;
+			else
+				assert(0);
+		}
+		rightleaf = NULL;
+		while (!rightleaf) {
+			assert(right->left || right->right);
+			if (right->leftnode == LEAF)
+				rightleaf = right->left;
+			else if (right->rightnode == LEAF)
+				rightleaf = right->right;
+			else if (right->left)
+				right = right->left;
+			else if (right->right)
+				right = right->right;
+			else
+				assert(0);
+		}
+		if (! tree->leaf_equal(leftleaf, rightleaf))
+			goto advance;
+		/*
+		 * This node has identical singleton-only subtrees.
+		 * Remove it.
+		 */
+		parent = node->parent;
+		left = node->left;
+		right = node->right;
+		if (parent->left == node)
+			parent->left = left;
+		else if (parent->right == node)
+			parent->right = left;
+		else
+			assert(0);
+		left->parent = parent;
+		left->keymask |= (1 << node->bitnum);
+		node->left = NULL;
+		while (node) {
+			bitmask = 1 << node->bitnum;
+			leftmask &= ~bitmask;
+			rightmask &= ~bitmask;
+			if (node->leftnode == NODE && node->left) {
+				left = node->left;
+				free(node);
+				count++;
+				node = left;
+			} else if (node->rightnode == NODE && node->right) {
+				right = node->right;
+				free(node);
+				count++;
+				node = right;
+			} else {
+				node = NULL;
+			}
+		}
+		/* Propagate keymasks up along singleton chains. */
+		node = parent;
+		/* Force re-check */
+		bitmask = 1 << node->bitnum;
+		leftmask &= ~bitmask;
+		rightmask &= ~bitmask;
+		for (;;) {
+			if (node->left && node->right)
+				break;
+			if (node->left) {
+				left = node->left;
+				node->keymask |= left->keymask;
+				node->keybits |= left->keybits;
+			}
+			if (node->right) {
+				right = node->right;
+				node->keymask |= right->keymask;
+				node->keybits |= right->keybits;
+			}
+			node->keymask |= (1 << node->bitnum);
+			node = node->parent;
+			/* Force re-check */
+			bitmask = 1 << node->bitnum;
+			leftmask &= ~bitmask;
+			rightmask &= ~bitmask;
+		}
+	advance:
+		bitmask = 1 << node->bitnum;
+		if ((leftmask & bitmask) == 0 &&
+		    node->leftnode == NODE &&
+		    node->left) {
+			leftmask |= bitmask;
+			node = node->left;
+		} else if ((rightmask & bitmask) == 0 &&
+			   node->rightnode == NODE &&
+			   node->right) {
+			rightmask |= bitmask;
+			node = node->right;
+		} else {
+			leftmask &= ~bitmask;
+			rightmask &= ~bitmask;
+			node = node->parent;
+		}
+	}
+	if (verbose > 0)
+		printf("Pruned %d nodes\n", count);
+}
+
+/*
+ * Mark the nodes in the tree that lead to leaves that must be
+ * emitted.
+ */
+static void mark_nodes(struct tree *tree)
+{
+	struct node *node;
+	struct node *n;
+	unsigned int leftmask;
+	unsigned int rightmask;
+	unsigned int bitmask;
+	int marked;
+
+	marked = 0;
+	if (verbose > 0)
+		printf("Marking %s_%x\n", tree->type, tree->maxage);
+	if (tree->childnode == LEAF)
+		goto done;
+
+	assert(tree->childnode == NODE);
+	node = tree->root;
+	leftmask = rightmask = 0;
+	while (node) {
+		bitmask = 1 << node->bitnum;
+		if ((leftmask & bitmask) == 0) {
+			leftmask |= bitmask;
+			if (node->leftnode == LEAF) {
+				assert(node->left);
+				if (tree->leaf_mark(node->left)) {
+					n = node;
+					while (n && !n->mark) {
+						marked++;
+						n->mark = 1;
+						n = n->parent;
+					}
+				}
+			} else if (node->left) {
+				assert(node->leftnode == NODE);
+				node = node->left;
+				continue;
+			}
+		}
+		if ((rightmask & bitmask) == 0) {
+			rightmask |= bitmask;
+			if (node->rightnode == LEAF) {
+				assert(node->right);
+				if (tree->leaf_mark(node->right)) {
+					n = node;
+					while (n && !n->mark) {
+						marked++;
+						n->mark = 1;
+						n = n->parent;
+					}
+				}
+			} else if (node->right) {
+				assert(node->rightnode == NODE);
+				node = node->right;
+				continue;
+			}
+		}
+		leftmask &= ~bitmask;
+		rightmask &= ~bitmask;
+		node = node->parent;
+	}
+
+	/* second pass: left siblings and singletons */
+
+	assert(tree->childnode == NODE);
+	node = tree->root;
+	leftmask = rightmask = 0;
+	while (node) {
+		bitmask = 1 << node->bitnum;
+		if ((leftmask & bitmask) == 0) {
+			leftmask |= bitmask;
+			if (node->leftnode == LEAF) {
+				assert(node->left);
+				if (tree->leaf_mark(node->left)) {
+					n = node;
+					while (n && !n->mark) {
+						marked++;
+						n->mark = 1;
+						n = n->parent;
+					}
+				}
+			} else if (node->left) {
+				assert(node->leftnode == NODE);
+				node = node->left;
+				if (!node->mark && node->parent->mark) {
+					marked++;
+					node->mark = 1;
+				}
+				continue;
+			}
+		}
+		if ((rightmask & bitmask) == 0) {
+			rightmask |= bitmask;
+			if (node->rightnode == LEAF) {
+				assert(node->right);
+				if (tree->leaf_mark(node->right)) {
+					n = node;
+					while (n && !n->mark) {
+						marked++;
+						n->mark = 1;
+						n = n->parent;
+					}
+				}
+			} else if (node->right) {
+				assert(node->rightnode == NODE);
+				node = node->right;
+				if (!node->mark && node->parent->mark &&
+				    !node->parent->left) {
+					marked++;
+					node->mark = 1;
+				}
+				continue;
+			}
+		}
+		leftmask &= ~bitmask;
+		rightmask &= ~bitmask;
+		node = node->parent;
+	}
+done:
+	if (verbose > 0)
+		printf("Marked %d nodes\n", marked);
+}
+
+/*
+ * Compute the index of each node and leaf, which is the offset in the
+ * emitted trie.  These values must be pre-computed because relative
+ * offsets between nodes are used to navigate the tree.
+ */
+static int index_nodes(struct tree *tree, int index)
+{
+	struct node *node;
+	unsigned int leftmask;
+	unsigned int rightmask;
+	unsigned int bitmask;
+	int count;
+	int indent;
+
+	/* Align to a cache line (or half a cache line?). */
+	while (index % 64)
+		index++;
+	tree->index = index;
+	indent = 1;
+	count = 0;
+
+	if (verbose > 0)
+		printf("Indexing %s_%x: %d\n", tree->type, tree->maxage, index);
+	if (tree->childnode == LEAF) {
+		index += tree->leaf_size(tree->root);
+		goto done;
+	}
+
+	assert(tree->childnode == NODE);
+	node = tree->root;
+	leftmask = rightmask = 0;
+	while (node) {
+		if (!node->mark)
+			goto skip;
+		count++;
+		if (node->index != index)
+			node->index = index;
+		index += node->size;
+skip:
+		while (node) {
+			bitmask = 1 << node->bitnum;
+			if (node->mark && (leftmask & bitmask) == 0) {
+				leftmask |= bitmask;
+				if (node->leftnode == LEAF) {
+					assert(node->left);
+					*tree->leaf_index(tree, node->left) =
+									index;
+					index += tree->leaf_size(node->left);
+					count++;
+				} else if (node->left) {
+					assert(node->leftnode == NODE);
+					indent += 1;
+					node = node->left;
+					break;
+				}
+			}
+			if (node->mark && (rightmask & bitmask) == 0) {
+				rightmask |= bitmask;
+				if (node->rightnode == LEAF) {
+					assert(node->right);
+					*tree->leaf_index(tree, node->right) = index;
+					index += tree->leaf_size(node->right);
+					count++;
+				} else if (node->right) {
+					assert(node->rightnode == NODE);
+					indent += 1;
+					node = node->right;
+					break;
+				}
+			}
+			leftmask &= ~bitmask;
+			rightmask &= ~bitmask;
+			node = node->parent;
+			indent -= 1;
+		}
+	}
+done:
+	/* Round up to a multiple of 16 */
+	while (index % 16)
+		index++;
+	if (verbose > 0)
+		printf("Final index %d\n", index);
+	return index;
+}
+
+/*
+ * Mark the nodes in a subtree, helper for size_nodes().
+ */
+static int mark_subtree(struct node *node)
+{
+	int changed;
+
+	if (!node || node->mark)
+		return 0;
+	node->mark = 1;
+	node->index = node->parent->index;
+	changed = 1;
+	if (node->leftnode == NODE)
+		changed += mark_subtree(node->left);
+	if (node->rightnode == NODE)
+		changed += mark_subtree(node->right);
+	return changed;
+}
+
+/*
+ * Compute the size of nodes and leaves. We start by assuming that
+ * each node needs to store a three-byte offset. The indexes of the
+ * nodes are calculated based on that, and then this function is
+ * called to see if the sizes of some nodes can be reduced.  This is
+ * repeated until no more changes are seen.
+ */
+static int size_nodes(struct tree *tree)
+{
+	struct tree *next;
+	struct node *node;
+	struct node *right;
+	struct node *n;
+	unsigned int leftmask;
+	unsigned int rightmask;
+	unsigned int bitmask;
+	unsigned int pathbits;
+	unsigned int pathmask;
+	unsigned int nbit;
+	int changed;
+	int offset;
+	int size;
+	int indent;
+
+	indent = 1;
+	changed = 0;
+	size = 0;
+
+	if (verbose > 0)
+		printf("Sizing %s_%x\n", tree->type, tree->maxage);
+	if (tree->childnode == LEAF)
+		goto done;
+
+	assert(tree->childnode == NODE);
+	pathbits = 0;
+	pathmask = 0;
+	node = tree->root;
+	leftmask = rightmask = 0;
+	while (node) {
+		if (!node->mark)
+			goto skip;
+		offset = 0;
+		if (!node->left || !node->right) {
+			size = 1;
+		} else {
+			if (node->rightnode == NODE) {
+				/*
+				 * If the right node is not marked,
+				 * look for a corresponding node in
+				 * the next tree.  Such a node need
+				 * not exist.
+				 */
+				right = node->right;
+				next = tree->next;
+				while (!right->mark) {
+					assert(next);
+					n = next->root;
+					while (n->bitnum != node->bitnum) {
+						nbit = 1 << n->bitnum;
+						if (!(pathmask & nbit))
+							break;
+						if (pathbits & nbit) {
+							if (n->rightnode == LEAF)
+								break;
+							n = n->right;
+						} else {
+							if (n->leftnode == LEAF)
+								break;
+							n = n->left;
+						}
+					}
+					if (n->bitnum != node->bitnum)
+						break;
+					n = n->right;
+					right = n;
+					next = next->next;
+				}
+				/* Make sure the right node is marked. */
+				if (!right->mark)
+					changed += mark_subtree(right);
+				offset = right->index - node->index;
+			} else {
+				offset = *tree->leaf_index(tree, node->right);
+				offset -= node->index;
+			}
+			assert(offset >= 0);
+			assert(offset <= 0xffffff);
+			if (offset <= 0xff) {
+				size = 2;
+			} else if (offset <= 0xffff) {
+				size = 3;
+			} else { /* offset <= 0xffffff */
+				size = 4;
+			}
+		}
+		if (node->size != size || node->offset != offset) {
+			node->size = size;
+			node->offset = offset;
+			changed++;
+		}
+skip:
+		while (node) {
+			bitmask = 1 << node->bitnum;
+			pathmask |= bitmask;
+			if (node->mark && (leftmask & bitmask) == 0) {
+				leftmask |= bitmask;
+				if (node->leftnode == LEAF) {
+					assert(node->left);
+				} else if (node->left) {
+					assert(node->leftnode == NODE);
+					indent += 1;
+					node = node->left;
+					break;
+				}
+			}
+			if (node->mark && (rightmask & bitmask) == 0) {
+				rightmask |= bitmask;
+				pathbits |= bitmask;
+				if (node->rightnode == LEAF) {
+					assert(node->right);
+				} else if (node->right) {
+					assert(node->rightnode == NODE);
+					indent += 1;
+					node = node->right;
+					break;
+				}
+			}
+			leftmask &= ~bitmask;
+			rightmask &= ~bitmask;
+			pathmask &= ~bitmask;
+			pathbits &= ~bitmask;
+			node = node->parent;
+			indent -= 1;
+		}
+	}
+done:
+	if (verbose > 0)
+		printf("Found %d changes\n", changed);
+	return changed;
+}
+
+/*
+ * Emit a trie for the given tree into the data array.
+ */
+static void emit(struct tree *tree, unsigned char *data)
+{
+	struct node *node;
+	unsigned int leftmask;
+	unsigned int rightmask;
+	unsigned int bitmask;
+	int offlen;
+	int offset;
+	int index;
+	int indent;
+	int size;
+	int bytes;
+	int leaves;
+	int nodes[4];
+	unsigned char byte;
+
+	nodes[0] = nodes[1] = nodes[2] = nodes[3] = 0;
+	leaves = 0;
+	bytes = 0;
+	index = tree->index;
+	data += index;
+	indent = 1;
+	if (verbose > 0)
+		printf("Emitting %s_%x\n", tree->type, tree->maxage);
+	if (tree->childnode == LEAF) {
+		assert(tree->root);
+		tree->leaf_emit(tree->root, data);
+		size = tree->leaf_size(tree->root);
+		index += size;
+		leaves++;
+		goto done;
+	}
+
+	assert(tree->childnode == NODE);
+	node = tree->root;
+	leftmask = rightmask = 0;
+	while (node) {
+		if (!node->mark)
+			goto skip;
+		assert(node->offset != -1);
+		assert(node->index == index);
+
+		byte = 0;
+		if (node->nextbyte)
+			byte |= NEXTBYTE;
+		byte |= (node->bitnum & BITNUM);
+		if (node->left && node->right) {
+			if (node->leftnode == NODE)
+				byte |= LEFTNODE;
+			if (node->rightnode == NODE)
+				byte |= RIGHTNODE;
+			if (node->offset <= 0xff)
+				offlen = 1;
+			else if (node->offset <= 0xffff)
+				offlen = 2;
+			else
+				offlen = 3;
+			nodes[offlen]++;
+			offset = node->offset;
+			byte |= offlen << OFFLEN_SHIFT;
+			*data++ = byte;
+			index++;
+			while (offlen--) {
+				*data++ = offset & 0xff;
+				index++;
+				offset >>= 8;
+			}
+		} else if (node->left) {
+			if (node->leftnode == NODE)
+				byte |= TRIENODE;
+			nodes[0]++;
+			*data++ = byte;
+			index++;
+		} else if (node->right) {
+			byte |= RIGHTNODE;
+			if (node->rightnode == NODE)
+				byte |= TRIENODE;
+			nodes[0]++;
+			*data++ = byte;
+			index++;
+		} else {
+			assert(0);
+		}
+skip:
+		while (node) {
+			bitmask = 1 << node->bitnum;
+			if (node->mark && (leftmask & bitmask) == 0) {
+				leftmask |= bitmask;
+				if (node->leftnode == LEAF) {
+					assert(node->left);
+					data = tree->leaf_emit(node->left,
+							       data);
+					size = tree->leaf_size(node->left);
+					index += size;
+					bytes += size;
+					leaves++;
+				} else if (node->left) {
+					assert(node->leftnode == NODE);
+					indent += 1;
+					node = node->left;
+					break;
+				}
+			}
+			if (node->mark && (rightmask & bitmask) == 0) {
+				rightmask |= bitmask;
+				if (node->rightnode == LEAF) {
+					assert(node->right);
+					data = tree->leaf_emit(node->right,
+							       data);
+					size = tree->leaf_size(node->right);
+					index += size;
+					bytes += size;
+					leaves++;
+				} else if (node->right) {
+					assert(node->rightnode == NODE);
+					indent += 1;
+					node = node->right;
+					break;
+				}
+			}
+			leftmask &= ~bitmask;
+			rightmask &= ~bitmask;
+			node = node->parent;
+			indent -= 1;
+		}
+	}
+done:
+	if (verbose > 0) {
+		printf("Emitted %d (%d) leaves",
+			leaves, bytes);
+		printf(" %d (%d+%d+%d+%d) nodes",
+			nodes[0] + nodes[1] + nodes[2] + nodes[3],
+			nodes[0], nodes[1], nodes[2], nodes[3]);
+		printf(" %d total\n", index - tree->index);
+	}
+}
+
+/* ------------------------------------------------------------------ */
+
+/*
+ * Unicode data.
+ *
+ * We need to keep track of the Canonical Combining Class, the Age,
+ * and decompositions for a code point.
+ *
+ * For the Age, we store the index into the ages table.  Effectively
+ * this is a generation number that the table maps to a unicode
+ * version.
+ *
+ * The correction field is used to indicate that this entry is in the
+ * corrections array, which contains decompositions that were
+ * corrected in later revisions.  The value of the correction field is
+ * the Unicode version in which the mapping was corrected.
+ */
+struct unicode_data {
+	unsigned int code;
+	int ccc;
+	int gen;
+	int correction;
+	unsigned int *utf32nfkdi;
+	unsigned int *utf32nfkdicf;
+	char *utf8nfkdi;
+	char *utf8nfkdicf;
+};
+
+struct unicode_data unicode_data[0x110000];
+struct unicode_data *corrections;
+int    corrections_count;
+
+struct tree *nfkdi_tree;
+struct tree *nfkdicf_tree;
+
+struct tree *trees;
+int          trees_count;
+
+/*
+ * Check the corrections array to see if this entry was corrected at
+ * some point.
+ */
+static struct unicode_data *corrections_lookup(struct unicode_data *u)
+{
+	int i;
+
+	for (i = 0; i != corrections_count; i++)
+		if (u->code == corrections[i].code)
+			return &corrections[i];
+	return u;
+}
+
+static int nfkdi_equal(void *l, void *r)
+{
+	struct unicode_data *left = l;
+	struct unicode_data *right = r;
+
+	if (left->gen != right->gen)
+		return 0;
+	if (left->ccc != right->ccc)
+		return 0;
+	if (left->utf8nfkdi && right->utf8nfkdi &&
+	    strcmp(left->utf8nfkdi, right->utf8nfkdi) == 0)
+		return 1;
+	if (left->utf8nfkdi || right->utf8nfkdi)
+		return 0;
+	return 1;
+}
+
+static int nfkdicf_equal(void *l, void *r)
+{
+	struct unicode_data *left = l;
+	struct unicode_data *right = r;
+
+	if (left->gen != right->gen)
+		return 0;
+	if (left->ccc != right->ccc)
+		return 0;
+	if (left->utf8nfkdicf && right->utf8nfkdicf &&
+	    strcmp(left->utf8nfkdicf, right->utf8nfkdicf) == 0)
+		return 1;
+	if (left->utf8nfkdicf && right->utf8nfkdicf)
+		return 0;
+	if (left->utf8nfkdicf || right->utf8nfkdicf)
+		return 0;
+	if (left->utf8nfkdi && right->utf8nfkdi &&
+	    strcmp(left->utf8nfkdi, right->utf8nfkdi) == 0)
+		return 1;
+	if (left->utf8nfkdi || right->utf8nfkdi)
+		return 0;
+	return 1;
+}
+
+static void nfkdi_print(void *l, int indent)
+{
+	struct unicode_data *leaf = l;
+
+	printf("%*sleaf @ %p code %X ccc %d gen %d", indent, "", leaf,
+		leaf->code, leaf->ccc, leaf->gen);
+	if (leaf->utf8nfkdi && leaf->utf8nfkdi[0] == HANGUL)
+		printf(" nfkdi \"%s\"", "HANGUL SYLLABLE");
+	else if (leaf->utf8nfkdi)
+		printf(" nfkdi \"%s\"", (const char*)leaf->utf8nfkdi);
+	printf("\n");
+}
+
+static void nfkdicf_print(void *l, int indent)
+{
+	struct unicode_data *leaf = l;
+
+	printf("%*sleaf @ %p code %X ccc %d gen %d", indent, "", leaf,
+		leaf->code, leaf->ccc, leaf->gen);
+	if (leaf->utf8nfkdicf)
+		printf(" nfkdicf \"%s\"", (const char*)leaf->utf8nfkdicf);
+	else if (leaf->utf8nfkdi && leaf->utf8nfkdi[0] == HANGUL)
+		printf(" nfkdi \"%s\"", "HANGUL SYLLABLE");
+	else if (leaf->utf8nfkdi)
+		printf(" nfkdi \"%s\"", (const char*)leaf->utf8nfkdi);
+	printf("\n");
+}
+
+static int nfkdi_mark(void *l)
+{
+	return 1;
+}
+
+static int nfkdicf_mark(void *l)
+{
+	struct unicode_data *leaf = l;
+
+	if (leaf->utf8nfkdicf)
+		return 1;
+	return 0;
+}
+
+static int correction_mark(void *l)
+{
+	struct unicode_data *leaf = l;
+
+	return leaf->correction;
+}
+
+static int nfkdi_size(void *l)
+{
+	struct unicode_data *leaf = l;
+
+	int size = 2;
+	if (HANGUL_SYLLABLE(leaf->code))
+		size += 1;
+	else if (leaf->utf8nfkdi)
+		size += strlen(leaf->utf8nfkdi) + 1;
+	return size;
+}
+
+static int nfkdicf_size(void *l)
+{
+	struct unicode_data *leaf = l;
+
+	int size = 2;
+	if (HANGUL_SYLLABLE(leaf->code))
+		size += 1;
+	else if (leaf->utf8nfkdicf)
+		size += strlen(leaf->utf8nfkdicf) + 1;
+	else if (leaf->utf8nfkdi)
+		size += strlen(leaf->utf8nfkdi) + 1;
+	return size;
+}
+
+static int *nfkdi_index(struct tree *tree, void *l)
+{
+	struct unicode_data *leaf = l;
+
+	return &tree->leafindex[leaf->code];
+}
+
+static int *nfkdicf_index(struct tree *tree, void *l)
+{
+	struct unicode_data *leaf = l;
+
+	return &tree->leafindex[leaf->code];
+}
+
+static unsigned char *nfkdi_emit(void *l, unsigned char *data)
+{
+	struct unicode_data *leaf = l;
+	unsigned char *s;
+
+	*data++ = leaf->gen;
+	if (HANGUL_SYLLABLE(leaf->code)) {
+		*data++ = DECOMPOSE;
+		*data++ = HANGUL;
+	} else if (leaf->utf8nfkdi) {
+		*data++ = DECOMPOSE;
+		s = (unsigned char*)leaf->utf8nfkdi;
+		while ((*data++ = *s++) != 0)
+			;
+	} else {
+		*data++ = leaf->ccc;
+	}
+	return data;
+}
+
+static unsigned char *nfkdicf_emit(void *l, unsigned char *data)
+{
+	struct unicode_data *leaf = l;
+	unsigned char *s;
+
+	*data++ = leaf->gen;
+	if (HANGUL_SYLLABLE(leaf->code)) {
+		*data++ = DECOMPOSE;
+		*data++ = HANGUL;
+	} else if (leaf->utf8nfkdicf) {
+		*data++ = DECOMPOSE;
+		s = (unsigned char*)leaf->utf8nfkdicf;
+		while ((*data++ = *s++) != 0)
+			;
+	} else if (leaf->utf8nfkdi) {
+		*data++ = DECOMPOSE;
+		s = (unsigned char*)leaf->utf8nfkdi;
+		while ((*data++ = *s++) != 0)
+			;
+	} else {
+		*data++ = leaf->ccc;
+	}
+	return data;
+}
+
+static void utf8_create(struct unicode_data *data)
+{
+	char utf[18*4+1];
+	char *u;
+	unsigned int *um;
+	int i;
+
+	if (data->utf8nfkdi) {
+		assert(data->utf8nfkdi[0] == HANGUL);
+		return;
+	}
+
+	u = utf;
+	um = data->utf32nfkdi;
+	if (um) {
+		for (i = 0; um[i]; i++)
+			u += utf8encode(u, um[i]);
+		*u = '\0';
+		data->utf8nfkdi = strdup(utf);
+	}
+	u = utf;
+	um = data->utf32nfkdicf;
+	if (um) {
+		for (i = 0; um[i]; i++)
+			u += utf8encode(u, um[i]);
+		*u = '\0';
+		if (!data->utf8nfkdi || strcmp(data->utf8nfkdi, utf))
+			data->utf8nfkdicf = strdup(utf);
+	}
+}
+
+static void utf8_init(void)
+{
+	unsigned int unichar;
+	int i;
+
+	for (unichar = 0; unichar != 0x110000; unichar++)
+		utf8_create(&unicode_data[unichar]);
+
+	for (i = 0; i != corrections_count; i++)
+		utf8_create(&corrections[i]);
+}
+
+static void trees_init(void)
+{
+	struct unicode_data *data;
+	unsigned int maxage;
+	unsigned int nextage;
+	int count;
+	int i;
+	int j;
+
+	/* Count the number of different ages. */
+	count = 0;
+	nextage = (unsigned int)-1;
+	do {
+		maxage = nextage;
+		nextage = 0;
+		for (i = 0; i <= corrections_count; i++) {
+			data = &corrections[i];
+			if (nextage < data->correction &&
+			    data->correction < maxage)
+				nextage = data->correction;
+		}
+		count++;
+	} while (nextage);
+
+	/* Two trees per age: nfkdi and nfkdicf */
+	trees_count = count * 2;
+	trees = calloc(trees_count, sizeof(struct tree));
+
+	/* Assign ages to the trees. */
+	count = trees_count;
+	nextage = (unsigned int)-1;
+	do {
+		maxage = nextage;
+		trees[--count].maxage = maxage;
+		trees[--count].maxage = maxage;
+		nextage = 0;
+		for (i = 0; i <= corrections_count; i++) {
+			data = &corrections[i];
+			if (nextage < data->correction &&
+			    data->correction < maxage)
+				nextage = data->correction;
+		}
+	} while (nextage);
+
+	/* The ages assigned above are off by one. */
+	for (i = 0; i != trees_count; i++) {
+		j = 0;
+		while (ages[j] < trees[i].maxage)
+			j++;
+		trees[i].maxage = ages[j-1];
+	}
+
+	/* Set up the forwarding between trees. */
+	trees[trees_count-2].next = &trees[trees_count-1];
+	trees[trees_count-1].leaf_mark = nfkdi_mark;
+	trees[trees_count-2].leaf_mark = nfkdicf_mark;
+	for (i = 0; i != trees_count-2; i += 2) {
+		trees[i].next = &trees[trees_count-2];
+		trees[i].leaf_mark = correction_mark;
+		trees[i+1].next = &trees[trees_count-1];
+		trees[i+1].leaf_mark = correction_mark;
+	}
+
+	/* Assign the callouts. */
+	for (i = 0; i != trees_count; i += 2) {
+		trees[i].type = "nfkdicf";
+		trees[i].leaf_equal = nfkdicf_equal;
+		trees[i].leaf_print = nfkdicf_print;
+		trees[i].leaf_size = nfkdicf_size;
+		trees[i].leaf_index = nfkdicf_index;
+		trees[i].leaf_emit = nfkdicf_emit;
+
+		trees[i+1].type = "nfkdi";
+		trees[i+1].leaf_equal = nfkdi_equal;
+		trees[i+1].leaf_print = nfkdi_print;
+		trees[i+1].leaf_size = nfkdi_size;
+		trees[i+1].leaf_index = nfkdi_index;
+		trees[i+1].leaf_emit = nfkdi_emit;
+	}
+
+	/* Finish init. */
+	for (i = 0; i != trees_count; i++)
+		trees[i].childnode = NODE;
+}
+
+static void trees_populate(void)
+{
+	struct unicode_data *data;
+	unsigned int unichar;
+	char keyval[4];
+	int keylen;
+	int i;
+
+	for (i = 0; i != trees_count; i++) {
+		if (verbose > 0) {
+			printf("Populating %s_%x\n",
+				trees[i].type, trees[i].maxage);
+		}
+		for (unichar = 0; unichar != 0x110000; unichar++) {
+			if (unicode_data[unichar].gen < 0)
+				continue;
+			keylen = utf8encode(keyval, unichar);
+			data = corrections_lookup(&unicode_data[unichar]);
+			if (data->correction <= trees[i].maxage)
+				data = &unicode_data[unichar];
+			insert(&trees[i], keyval, keylen, data);
+		}
+	}
+}
+
+static void trees_reduce(void)
+{
+	int i;
+	int size;
+	int changed;
+
+	for (i = 0; i != trees_count; i++)
+		prune(&trees[i]);
+	for (i = 0; i != trees_count; i++)
+		mark_nodes(&trees[i]);
+	do {
+		size = 0;
+		for (i = 0; i != trees_count; i++)
+			size = index_nodes(&trees[i], size);
+		changed = 0;
+		for (i = 0; i != trees_count; i++)
+			changed += size_nodes(&trees[i]);
+	} while (changed);
+
+	utf8data = calloc(size, 1);
+	utf8data_size = size;
+	for (i = 0; i != trees_count; i++)
+		emit(&trees[i], utf8data);
+
+	if (verbose > 0) {
+		for (i = 0; i != trees_count; i++) {
+			printf("%s_%x idx %d\n",
+				trees[i].type, trees[i].maxage, trees[i].index);
+		}
+	}
+
+	nfkdi = utf8data + trees[trees_count-1].index;
+	nfkdicf = utf8data + trees[trees_count-2].index;
+
+	nfkdi_tree = &trees[trees_count-1];
+	nfkdicf_tree = &trees[trees_count-2];
+}
+
+static void verify(struct tree *tree)
+{
+	struct unicode_data *data;
+	utf8leaf_t	*leaf;
+	unsigned int	unichar;
+	char		key[4];
+	unsigned char	hangul[UTF8HANGULLEAF];
+	int		report;
+	int		nocf;
+
+	if (verbose > 0)
+		printf("Verifying %s_%x\n", tree->type, tree->maxage);
+	nocf = strcmp(tree->type, "nfkdicf");
+
+	for (unichar = 0; unichar != 0x110000; unichar++) {
+		report = 0;
+		data = corrections_lookup(&unicode_data[unichar]);
+		if (data->correction <= tree->maxage)
+			data = &unicode_data[unichar];
+		utf8encode(key,unichar);
+		leaf = utf8lookup(tree, hangul, key);
+
+		if (!leaf) {
+			if (data->gen != -1)
+				report++;
+			if (unichar < 0xd800 || unichar > 0xdfff)
+				report++;
+		} else {
+			if (unichar >= 0xd800 && unichar <= 0xdfff)
+				report++;
+			if (data->gen == -1)
+				report++;
+			if (data->gen != LEAF_GEN(leaf))
+				report++;
+			if (LEAF_CCC(leaf) == DECOMPOSE) {
+				if (HANGUL_SYLLABLE(data->code)) {
+					if (data->utf8nfkdi[0] != HANGUL)
+						report++;
+				} else if (nocf) {
+					if (!data->utf8nfkdi) {
+						report++;
+					} else if (strcmp(data->utf8nfkdi,
+							  LEAF_STR(leaf))) {
+						report++;
+					}
+				} else {
+					if (!data->utf8nfkdicf &&
+					    !data->utf8nfkdi) {
+						report++;
+					} else if (data->utf8nfkdicf) {
+						if (strcmp(data->utf8nfkdicf,
+							   LEAF_STR(leaf)))
+							report++;
+					} else if (strcmp(data->utf8nfkdi,
+							  LEAF_STR(leaf))) {
+						report++;
+					}
+				}
+			} else if (data->ccc != LEAF_CCC(leaf)) {
+				report++;
+			}
+		}
+		if (report) {
+			printf("%X code %X gen %d ccc %d"
+				" nfkdi -> \"%s\"",
+				unichar, data->code, data->gen,
+				data->ccc,
+				data->utf8nfkdi);
+			if (leaf) {
+				printf(" gen %d ccc %d"
+					" nfkdi -> \"%s\"",
+					LEAF_GEN(leaf),
+					LEAF_CCC(leaf),
+					LEAF_CCC(leaf) == DECOMPOSE ?
+						LEAF_STR(leaf) : "");
+			}
+			printf("\n");
+		}
+	}
+}
+
+static void trees_verify(void)
+{
+	int i;
+
+	for (i = 0; i != trees_count; i++)
+		verify(&trees[i]);
+}
+
+/* ------------------------------------------------------------------ */
+
+static void help(void)
+{
+	printf("Usage: %s [options]\n", argv0);
+	printf("\n");
+	printf("This program creates an a data trie used for parsing and\n");
+	printf("normalization of UTF-8 strings. The trie is derived from\n");
+	printf("a set of input files from the Unicode character database\n");
+	printf("found at: http://www.unicode.org/Public/UCD/latest/ucd/\n");
+	printf("\n");
+	printf("The generated tree supports two normalization forms:\n");
+	printf("\n");
+	printf("\tnfkdi:\n");
+	printf("\t- Apply unicode normalization form NFKD.\n");
+	printf("\t- Remove any Default_Ignorable_Code_Point.\n");
+	printf("\n");
+	printf("\tnfkdicf:\n");
+	printf("\t- Apply unicode normalization form NFKD.\n");
+	printf("\t- Remove any Default_Ignorable_Code_Point.\n");
+	printf("\t- Apply a full casefold (C + F).\n");
+	printf("\n");
+	printf("These forms were chosen as being most useful when dealing\n");
+	printf("with file names: NFKD catches most cases where characters\n");
+	printf("should be considered equivalent. The ignorables are mostly\n");
+	printf("invisible, making names hard to type.\n");
+	printf("\n");
+	printf("The options to specify the files to be used are listed\n");
+	printf("below with their default values, which are the names used\n");
+	printf("by version 11.0.0 of the Unicode Character Database.\n");
+	printf("\n");
+	printf("The input files:\n");
+	printf("\t-a %s\n", AGE_NAME);
+	printf("\t-c %s\n", CCC_NAME);
+	printf("\t-p %s\n", PROP_NAME);
+	printf("\t-d %s\n", DATA_NAME);
+	printf("\t-f %s\n", FOLD_NAME);
+	printf("\t-n %s\n", NORM_NAME);
+	printf("\n");
+	printf("Additionally, the generated tables are tested using:\n");
+	printf("\t-t %s\n", TEST_NAME);
+	printf("\n");
+	printf("Finally, the output file:\n");
+	printf("\t-o %s\n", UTF8_NAME);
+	printf("\n");
+}
+
+static void usage(void)
+{
+	help();
+	exit(1);
+}
+
+static void open_fail(const char *name, int error)
+{
+	printf("Error %d opening %s: %s\n", error, name, strerror(error));
+	exit(1);
+}
+
+static void file_fail(const char *filename)
+{
+	printf("Error parsing %s\n", filename);
+	exit(1);
+}
+
+static void line_fail(const char *filename, const char *line)
+{
+	printf("Error parsing %s:%s\n", filename, line);
+	exit(1);
+}
+
+/* ------------------------------------------------------------------ */
+
+static void print_utf32(unsigned int *utf32str)
+{
+	int	i;
+
+	for (i = 0; utf32str[i]; i++)
+		printf(" %X", utf32str[i]);
+}
+
+static void print_utf32nfkdi(unsigned int unichar)
+{
+	printf(" %X ->", unichar);
+	print_utf32(unicode_data[unichar].utf32nfkdi);
+	printf("\n");
+}
+
+static void print_utf32nfkdicf(unsigned int unichar)
+{
+	printf(" %X ->", unichar);
+	print_utf32(unicode_data[unichar].utf32nfkdicf);
+	printf("\n");
+}
+
+/* ------------------------------------------------------------------ */
+
+static void age_init(void)
+{
+	FILE *file;
+	unsigned int first;
+	unsigned int last;
+	unsigned int unichar;
+	unsigned int major;
+	unsigned int minor;
+	unsigned int revision;
+	int gen;
+	int count;
+	int ret;
+
+	if (verbose > 0)
+		printf("Parsing %s\n", age_name);
+
+	file = fopen(age_name, "r");
+	if (!file)
+		open_fail(age_name, errno);
+	count = 0;
+
+	gen = 0;
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "# Age=V%d_%d_%d",
+				&major, &minor, &revision);
+		if (ret == 3) {
+			ages_count++;
+			if (verbose > 1)
+				printf(" Age V%d_%d_%d\n",
+					major, minor, revision);
+			if (!age_valid(major, minor, revision))
+				line_fail(age_name, line);
+			continue;
+		}
+		ret = sscanf(line, "# Age=V%d_%d", &major, &minor);
+		if (ret == 2) {
+			ages_count++;
+			if (verbose > 1)
+				printf(" Age V%d_%d\n", major, minor);
+			if (!age_valid(major, minor, 0))
+				line_fail(age_name, line);
+			continue;
+		}
+	}
+
+	/* We must have found something above. */
+	if (verbose > 1)
+		printf("%d age entries\n", ages_count);
+	if (ages_count == 0 || ages_count > MAXGEN)
+		file_fail(age_name);
+
+	/* There is a 0 entry. */
+	ages_count++;
+	ages = calloc(ages_count + 1, sizeof(*ages));
+	/* And a guard entry. */
+	ages[ages_count] = (unsigned int)-1;
+
+	rewind(file);
+	count = 0;
+	gen = 0;
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "# Age=V%d_%d_%d",
+				&major, &minor, &revision);
+		if (ret == 3) {
+			ages[++gen] =
+				UNICODE_AGE(major, minor, revision);
+			if (verbose > 1)
+				printf(" Age V%d_%d_%d = gen %d\n",
+					major, minor, revision, gen);
+			if (!age_valid(major, minor, revision))
+				line_fail(age_name, line);
+			continue;
+		}
+		ret = sscanf(line, "# Age=V%d_%d", &major, &minor);
+		if (ret == 2) {
+			ages[++gen] = UNICODE_AGE(major, minor, 0);
+			if (verbose > 1)
+				printf(" Age V%d_%d = %d\n",
+					major, minor, gen);
+			if (!age_valid(major, minor, 0))
+				line_fail(age_name, line);
+			continue;
+		}
+		ret = sscanf(line, "%X..%X ; %d.%d #",
+			     &first, &last, &major, &minor);
+		if (ret == 4) {
+			for (unichar = first; unichar <= last; unichar++)
+				unicode_data[unichar].gen = gen;
+			count += 1 + last - first;
+			if (verbose > 1)
+				printf("  %X..%X gen %d\n", first, last, gen);
+			if (!utf32valid(first) || !utf32valid(last))
+				line_fail(age_name, line);
+			continue;
+		}
+		ret = sscanf(line, "%X ; %d.%d #", &unichar, &major, &minor);
+		if (ret == 3) {
+			unicode_data[unichar].gen = gen;
+			count++;
+			if (verbose > 1)
+				printf("  %X gen %d\n", unichar, gen);
+			if (!utf32valid(unichar))
+				line_fail(age_name, line);
+			continue;
+		}
+	}
+	unicode_maxage = ages[gen];
+	fclose(file);
+
+	/* Nix surrogate block */
+	if (verbose > 1)
+		printf(" Removing surrogate block D800..DFFF\n");
+	for (unichar = 0xd800; unichar <= 0xdfff; unichar++)
+		unicode_data[unichar].gen = -1;
+
+	if (verbose > 0)
+	        printf("Found %d entries\n", count);
+	if (count == 0)
+		file_fail(age_name);
+}
+
+static void ccc_init(void)
+{
+	FILE *file;
+	unsigned int first;
+	unsigned int last;
+	unsigned int unichar;
+	unsigned int value;
+	int count;
+	int ret;
+
+	if (verbose > 0)
+		printf("Parsing %s\n", ccc_name);
+
+	file = fopen(ccc_name, "r");
+	if (!file)
+		open_fail(ccc_name, errno);
+
+	count = 0;
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "%X..%X ; %d #", &first, &last, &value);
+		if (ret == 3) {
+			for (unichar = first; unichar <= last; unichar++) {
+				unicode_data[unichar].ccc = value;
+                                count++;
+			}
+			if (verbose > 1)
+				printf(" %X..%X ccc %d\n", first, last, value);
+			if (!utf32valid(first) || !utf32valid(last))
+				line_fail(ccc_name, line);
+			continue;
+		}
+		ret = sscanf(line, "%X ; %d #", &unichar, &value);
+		if (ret == 2) {
+			unicode_data[unichar].ccc = value;
+                        count++;
+			if (verbose > 1)
+				printf(" %X ccc %d\n", unichar, value);
+			if (!utf32valid(unichar))
+				line_fail(ccc_name, line);
+			continue;
+		}
+	}
+	fclose(file);
+
+	if (verbose > 0)
+		printf("Found %d entries\n", count);
+	if (count == 0)
+		file_fail(ccc_name);
+}
+
+static void nfkdi_init(void)
+{
+	FILE *file;
+	unsigned int unichar;
+	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
+	char *s;
+	unsigned int *um;
+	int count;
+	int i;
+	int ret;
+
+	if (verbose > 0)
+		printf("Parsing %s\n", data_name);
+	file = fopen(data_name, "r");
+	if (!file)
+		open_fail(data_name, errno);
+
+	count = 0;
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "%X;%*[^;];%*[^;];%*[^;];%*[^;];%[^;];",
+			     &unichar, buf0);
+		if (ret != 2)
+			continue;
+		if (!utf32valid(unichar))
+			line_fail(data_name, line);
+
+		s = buf0;
+		/* skip over <tag> */
+		if (*s == '<')
+			while (*s++ != ' ')
+				;
+		/* decode the decomposition into UTF-32 */
+		i = 0;
+		while (*s) {
+			mapping[i] = strtoul(s, &s, 16);
+			if (!utf32valid(mapping[i]))
+				line_fail(data_name, line);
+			i++;
+		}
+		mapping[i++] = 0;
+
+		um = malloc(i * sizeof(unsigned int));
+		memcpy(um, mapping, i * sizeof(unsigned int));
+		unicode_data[unichar].utf32nfkdi = um;
+
+		if (verbose > 1)
+			print_utf32nfkdi(unichar);
+		count++;
+	}
+	fclose(file);
+	if (verbose > 0)
+		printf("Found %d entries\n", count);
+	if (count == 0)
+		file_fail(data_name);
+}
+
+static void nfkdicf_init(void)
+{
+	FILE *file;
+	unsigned int unichar;
+	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
+	char status;
+	char *s;
+	unsigned int *um;
+	int i;
+	int count;
+	int ret;
+
+	if (verbose > 0)
+		printf("Parsing %s\n", fold_name);
+	file = fopen(fold_name, "r");
+	if (!file)
+		open_fail(fold_name, errno);
+
+	count = 0;
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "%X; %c; %[^;];", &unichar, &status, buf0);
+		if (ret != 3)
+			continue;
+		if (!utf32valid(unichar))
+			line_fail(fold_name, line);
+		/* Use the C+F casefold. */
+		if (status != 'C' && status != 'F')
+			continue;
+		s = buf0;
+		if (*s == '<')
+			while (*s++ != ' ')
+				;
+		i = 0;
+		while (*s) {
+			mapping[i] = strtoul(s, &s, 16);
+			if (!utf32valid(mapping[i]))
+				line_fail(fold_name, line);
+			i++;
+		}
+		mapping[i++] = 0;
+
+		um = malloc(i * sizeof(unsigned int));
+		memcpy(um, mapping, i * sizeof(unsigned int));
+		unicode_data[unichar].utf32nfkdicf = um;
+
+		if (verbose > 1)
+			print_utf32nfkdicf(unichar);
+		count++;
+	}
+	fclose(file);
+	if (verbose > 0)
+		printf("Found %d entries\n", count);
+	if (count == 0)
+		file_fail(fold_name);
+}
+
+static void ignore_init(void)
+{
+	FILE *file;
+	unsigned int unichar;
+	unsigned int first;
+	unsigned int last;
+	unsigned int *um;
+	int count;
+	int ret;
+
+	if (verbose > 0)
+		printf("Parsing %s\n", prop_name);
+	file = fopen(prop_name, "r");
+	if (!file)
+		open_fail(prop_name, errno);
+	assert(file);
+	count = 0;
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "%X..%X ; %s # ", &first, &last, buf0);
+		if (ret == 3) {
+			if (strcmp(buf0, "Default_Ignorable_Code_Point"))
+				continue;
+			if (!utf32valid(first) || !utf32valid(last))
+				line_fail(prop_name, line);
+			for (unichar = first; unichar <= last; unichar++) {
+				free(unicode_data[unichar].utf32nfkdi);
+				um = malloc(sizeof(unsigned int));
+				*um = 0;
+				unicode_data[unichar].utf32nfkdi = um;
+				free(unicode_data[unichar].utf32nfkdicf);
+				um = malloc(sizeof(unsigned int));
+				*um = 0;
+				unicode_data[unichar].utf32nfkdicf = um;
+				count++;
+			}
+			if (verbose > 1)
+				printf(" %X..%X Default_Ignorable_Code_Point\n",
+					first, last);
+			continue;
+		}
+		ret = sscanf(line, "%X ; %s # ", &unichar, buf0);
+		if (ret == 2) {
+			if (strcmp(buf0, "Default_Ignorable_Code_Point"))
+				continue;
+			if (!utf32valid(unichar))
+				line_fail(prop_name, line);
+			free(unicode_data[unichar].utf32nfkdi);
+			um = malloc(sizeof(unsigned int));
+			*um = 0;
+			unicode_data[unichar].utf32nfkdi = um;
+			free(unicode_data[unichar].utf32nfkdicf);
+			um = malloc(sizeof(unsigned int));
+			*um = 0;
+			unicode_data[unichar].utf32nfkdicf = um;
+			if (verbose > 1)
+				printf(" %X Default_Ignorable_Code_Point\n",
+					unichar);
+			count++;
+			continue;
+		}
+	}
+	fclose(file);
+
+	if (verbose > 0)
+		printf("Found %d entries\n", count);
+	if (count == 0)
+		file_fail(prop_name);
+}
+
+static void corrections_init(void)
+{
+	FILE *file;
+	unsigned int unichar;
+	unsigned int major;
+	unsigned int minor;
+	unsigned int revision;
+	unsigned int age;
+	unsigned int *um;
+	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
+	char *s;
+	int i;
+	int count;
+	int ret;
+
+	if (verbose > 0)
+		printf("Parsing %s\n", norm_name);
+	file = fopen(norm_name, "r");
+	if (!file)
+		open_fail(norm_name, errno);
+
+	count = 0;
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "%X;%[^;];%[^;];%d.%d.%d #",
+				&unichar, buf0, buf1,
+				&major, &minor, &revision);
+		if (ret != 6)
+			continue;
+		if (!utf32valid(unichar) || !age_valid(major, minor, revision))
+			line_fail(norm_name, line);
+		count++;
+	}
+	corrections = calloc(count, sizeof(struct unicode_data));
+	corrections_count = count;
+	rewind(file);
+
+	count = 0;
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "%X;%[^;];%[^;];%d.%d.%d #",
+				&unichar, buf0, buf1,
+				&major, &minor, &revision);
+		if (ret != 6)
+			continue;
+		if (!utf32valid(unichar) || !age_valid(major, minor, revision))
+			line_fail(norm_name, line);
+		corrections[count] = unicode_data[unichar];
+		assert(corrections[count].code == unichar);
+		age = UNICODE_AGE(major, minor, revision);
+		corrections[count].correction = age;
+
+		i = 0;
+		s = buf0;
+		while (*s) {
+			mapping[i] = strtoul(s, &s, 16);
+			if (!utf32valid(mapping[i]))
+				line_fail(norm_name, line);
+			i++;
+		}
+		mapping[i++] = 0;
+
+		um = malloc(i * sizeof(unsigned int));
+		memcpy(um, mapping, i * sizeof(unsigned int));
+		corrections[count].utf32nfkdi = um;
+
+		if (verbose > 1)
+			printf(" %X -> %s -> %s V%d_%d_%d\n",
+				unichar, buf0, buf1, major, minor, revision);
+		count++;
+	}
+	fclose(file);
+
+	if (verbose > 0)
+	        printf("Found %d entries\n", count);
+	if (count == 0)
+		file_fail(norm_name);
+}
+
+/* ------------------------------------------------------------------ */
+
+/*
+ * Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0)
+ *
+ * AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
+ * D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
+ *
+ * SBase = 0xAC00
+ * LBase = 0x1100
+ * VBase = 0x1161
+ * TBase = 0x11A7
+ * LCount = 19
+ * VCount = 21
+ * TCount = 28
+ * NCount = 588 (VCount * TCount)
+ * SCount = 11172 (LCount * NCount)
+ *
+ * Decomposition:
+ *   SIndex = s - SBase
+ *
+ * LV (Canonical/Full)
+ *   LIndex = SIndex / NCount
+ *   VIndex = (Sindex % NCount) / TCount
+ *   LPart = LBase + LIndex
+ *   VPart = VBase + VIndex
+ *
+ * LVT (Canonical)
+ *   LVIndex = (SIndex / TCount) * TCount
+ *   TIndex = (Sindex % TCount)
+ *   LVPart = SBase + LVIndex
+ *   TPart = TBase + TIndex
+ *
+ * LVT (Full)
+ *   LIndex = SIndex / NCount
+ *   VIndex = (Sindex % NCount) / TCount
+ *   TIndex = (Sindex % TCount)
+ *   LPart = LBase + LIndex
+ *   VPart = VBase + VIndex
+ *   if (TIndex == 0) {
+ *          d = <LPart, VPart>
+ *   } else {
+ *          TPart = TBase + TIndex
+ *          d = <LPart, VPart, TPart>
+ *   }
+ *
+ */
+
+static void hangul_decompose(void)
+{
+	unsigned int sb = 0xAC00;
+	unsigned int lb = 0x1100;
+	unsigned int vb = 0x1161;
+	unsigned int tb = 0x11a7;
+	/* unsigned int lc = 19; */
+	unsigned int vc = 21;
+	unsigned int tc = 28;
+	unsigned int nc = (vc * tc);
+	/* unsigned int sc = (lc * nc); */
+	unsigned int unichar;
+	unsigned int mapping[4];
+	unsigned int *um;
+        int count;
+	int i;
+
+	if (verbose > 0)
+		printf("Decomposing hangul\n");
+	/* Hangul */
+	count = 0;
+	for (unichar = 0xAC00; unichar <= 0xD7A3; unichar++) {
+		unsigned int si = unichar - sb;
+		unsigned int li = si / nc;
+		unsigned int vi = (si % nc) / tc;
+		unsigned int ti = si % tc;
+
+		i = 0;
+		mapping[i++] = lb + li;
+		mapping[i++] = vb + vi;
+		if (ti)
+			mapping[i++] = tb + ti;
+		mapping[i++] = 0;
+
+		assert(!unicode_data[unichar].utf32nfkdi);
+		um = malloc(i * sizeof(unsigned int));
+		memcpy(um, mapping, i * sizeof(unsigned int));
+		unicode_data[unichar].utf32nfkdi = um;
+
+		assert(!unicode_data[unichar].utf32nfkdicf);
+		um = malloc(i * sizeof(unsigned int));
+		memcpy(um, mapping, i * sizeof(unsigned int));
+		unicode_data[unichar].utf32nfkdicf = um;
+
+		/*
+		 * Add a cookie as a reminder that the hangul syllable
+		 * decompositions must not be stored in the generated
+		 * trie.
+		 */
+		unicode_data[unichar].utf8nfkdi = malloc(2);
+		unicode_data[unichar].utf8nfkdi[0] = HANGUL;
+		unicode_data[unichar].utf8nfkdi[1] = '\0';
+
+		if (verbose > 1)
+			print_utf32nfkdi(unichar);
+
+		count++;
+	}
+	if (verbose > 0)
+		printf("Created %d entries\n", count);
+}
+
+static void nfkdi_decompose(void)
+{
+	unsigned int unichar;
+	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
+	unsigned int *um;
+	unsigned int *dc;
+	int count;
+	int i;
+	int j;
+	int ret;
+
+	if (verbose > 0)
+		printf("Decomposing nfkdi\n");
+
+	count = 0;
+	for (unichar = 0; unichar != 0x110000; unichar++) {
+		if (!unicode_data[unichar].utf32nfkdi)
+			continue;
+		for (;;) {
+			ret = 1;
+			i = 0;
+			um = unicode_data[unichar].utf32nfkdi;
+			while (*um) {
+				dc = unicode_data[*um].utf32nfkdi;
+				if (dc) {
+					for (j = 0; dc[j]; j++)
+						mapping[i++] = dc[j];
+					ret = 0;
+				} else {
+					mapping[i++] = *um;
+				}
+				um++;
+			}
+			mapping[i++] = 0;
+			if (ret)
+				break;
+			free(unicode_data[unichar].utf32nfkdi);
+			um = malloc(i * sizeof(unsigned int));
+			memcpy(um, mapping, i * sizeof(unsigned int));
+			unicode_data[unichar].utf32nfkdi = um;
+		}
+		/* Add this decomposition to nfkdicf if there is no entry. */
+		if (!unicode_data[unichar].utf32nfkdicf) {
+			um = malloc(i * sizeof(unsigned int));
+			memcpy(um, mapping, i * sizeof(unsigned int));
+			unicode_data[unichar].utf32nfkdicf = um;
+		}
+		if (verbose > 1)
+			print_utf32nfkdi(unichar);
+		count++;
+	}
+	if (verbose > 0)
+		printf("Processed %d entries\n", count);
+}
+
+static void nfkdicf_decompose(void)
+{
+	unsigned int unichar;
+	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
+	unsigned int *um;
+	unsigned int *dc;
+	int count;
+	int i;
+	int j;
+	int ret;
+
+	if (verbose > 0)
+		printf("Decomposing nfkdicf\n");
+	count = 0;
+	for (unichar = 0; unichar != 0x110000; unichar++) {
+		if (!unicode_data[unichar].utf32nfkdicf)
+			continue;
+		for (;;) {
+			ret = 1;
+			i = 0;
+			um = unicode_data[unichar].utf32nfkdicf;
+			while (*um) {
+				dc = unicode_data[*um].utf32nfkdicf;
+				if (dc) {
+					for (j = 0; dc[j]; j++)
+						mapping[i++] = dc[j];
+					ret = 0;
+				} else {
+					mapping[i++] = *um;
+				}
+				um++;
+			}
+			mapping[i++] = 0;
+			if (ret)
+				break;
+			free(unicode_data[unichar].utf32nfkdicf);
+			um = malloc(i * sizeof(unsigned int));
+			memcpy(um, mapping, i * sizeof(unsigned int));
+			unicode_data[unichar].utf32nfkdicf = um;
+		}
+		if (verbose > 1)
+			print_utf32nfkdicf(unichar);
+		count++;
+	}
+	if (verbose > 0)
+		printf("Processed %d entries\n", count);
+}
+
+/* ------------------------------------------------------------------ */
+
+int utf8agemax(struct tree *, const char *);
+int utf8nagemax(struct tree *, const char *, size_t);
+int utf8agemin(struct tree *, const char *);
+int utf8nagemin(struct tree *, const char *, size_t);
+ssize_t utf8len(struct tree *, const char *);
+ssize_t utf8nlen(struct tree *, const char *, size_t);
+struct utf8cursor;
+int utf8cursor(struct utf8cursor *, struct tree *, const char *);
+int utf8ncursor(struct utf8cursor *, struct tree *, const char *, size_t);
+int utf8byte(struct utf8cursor *);
+
+/*
+ * Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0)
+ *
+ * AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
+ * D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
+ *
+ * SBase = 0xAC00
+ * LBase = 0x1100
+ * VBase = 0x1161
+ * TBase = 0x11A7
+ * LCount = 19
+ * VCount = 21
+ * TCount = 28
+ * NCount = 588 (VCount * TCount)
+ * SCount = 11172 (LCount * NCount)
+ *
+ * Decomposition:
+ *   SIndex = s - SBase
+ *
+ * LV (Canonical/Full)
+ *   LIndex = SIndex / NCount
+ *   VIndex = (Sindex % NCount) / TCount
+ *   LPart = LBase + LIndex
+ *   VPart = VBase + VIndex
+ *
+ * LVT (Canonical)
+ *   LVIndex = (SIndex / TCount) * TCount
+ *   TIndex = (Sindex % TCount)
+ *   LVPart = SBase + LVIndex
+ *   TPart = TBase + TIndex
+ *
+ * LVT (Full)
+ *   LIndex = SIndex / NCount
+ *   VIndex = (Sindex % NCount) / TCount
+ *   TIndex = (Sindex % TCount)
+ *   LPart = LBase + LIndex
+ *   VPart = VBase + VIndex
+ *   if (TIndex == 0) {
+ *          d = <LPart, VPart>
+ *   } else {
+ *          TPart = TBase + TIndex
+ *          d = <LPart, VPart, TPart>
+ *   }
+ */
+
+/* Constants */
+#define SB	(0xAC00)
+#define LB	(0x1100)
+#define VB	(0x1161)
+#define TB	(0x11A7)
+#define LC	(19)
+#define VC	(21)
+#define TC	(28)
+#define NC	(VC * TC)
+#define SC	(LC * NC)
+
+/* Algorithmic decomposition of hangul syllable. */
+static utf8leaf_t *utf8hangul(const char *str, unsigned char *hangul)
+{
+	unsigned int	si;
+	unsigned int	li;
+	unsigned int	vi;
+	unsigned int	ti;
+	unsigned char	*h;
+
+	/* Calculate the SI, LI, VI, and TI values. */
+	si = utf8decode(str) - SB;
+	li = si / NC;
+	vi = (si % NC) / TC;
+	ti = si % TC;
+
+	/* Fill in base of leaf. */
+	h = hangul;
+	LEAF_GEN(h) = 2;
+	LEAF_CCC(h) = DECOMPOSE;
+	h += 2;
+
+	/* Add LPart, a 3-byte UTF-8 sequence. */
+	h += utf8encode((char *)h, li + LB);
+
+	/* Add VPart, a 3-byte UTF-8 sequence. */
+	h += utf8encode((char *)h, vi + VB);
+
+	/* Add TPart if required, also a 3-byte UTF-8 sequence. */
+	if (ti)
+		h += utf8encode((char *)h, ti + TB);
+
+	/* Terminate string. */
+	h[0] = '\0';
+
+	return hangul;
+}
+
+/*
+ * Use trie to scan s, touching at most len bytes.
+ * Returns the leaf if one exists, NULL otherwise.
+ *
+ * A non-NULL return guarantees that the UTF-8 sequence starting at s
+ * is well-formed and corresponds to a known unicode code point.  The
+ * shorthand for this will be "is valid UTF-8 unicode".
+ */
+static utf8leaf_t *utf8nlookup(struct tree *tree, unsigned char *hangul,
+			       const char *s, size_t len)
+{
+	utf8trie_t	*trie = utf8data + tree->index;
+	int		offlen;
+	int		offset;
+	int		mask;
+	int		node;
+
+	if (!tree)
+		return NULL;
+	if (len == 0)
+		return NULL;
+	node = 1;
+	while (node) {
+		offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
+		if (*trie & NEXTBYTE) {
+			if (--len == 0)
+				return NULL;
+			s++;
+		}
+		mask = 1 << (*trie & BITNUM);
+		if (*s & mask) {
+			/* Right leg */
+			if (offlen) {
+				/* Right node at offset of trie */
+				node = (*trie & RIGHTNODE);
+				offset = trie[offlen];
+				while (--offlen) {
+					offset <<= 8;
+					offset |= trie[offlen];
+				}
+				trie += offset;
+			} else if (*trie & RIGHTPATH) {
+				/* Right node after this node */
+				node = (*trie & TRIENODE);
+				trie++;
+			} else {
+				/* No right node. */
+				return NULL;
+			}
+		} else {
+			/* Left leg */
+			if (offlen) {
+				/* Left node after this node. */
+				node = (*trie & LEFTNODE);
+				trie += offlen + 1;
+			} else if (*trie & RIGHTPATH) {
+				/* No left node. */
+				return NULL;
+			} else {
+				/* Left node after this node */
+				node = (*trie & TRIENODE);
+				trie++;
+			}
+		}
+	}
+	/*
+	 * Hangul decomposition is done algorithmically. These are the
+	 * codepoints >= 0xAC00 and <= 0xD7A3. Their UTF-8 encoding is
+	 * always 3 bytes long, so s has been advanced twice, and the
+	 * start of the sequence is at s-2.
+	 */
+	if (LEAF_CCC(trie) == DECOMPOSE && LEAF_STR(trie)[0] == HANGUL)
+		trie = utf8hangul(s - 2, hangul);
+	return trie;
+}
+
+/*
+ * Use trie to scan s.
+ * Returns the leaf if one exists, NULL otherwise.
+ *
+ * Forwards to trie_nlookup().
+ */
+static utf8leaf_t *utf8lookup(struct tree *tree, unsigned char *hangul,
+			      const char *s)
+{
+	return utf8nlookup(tree, hangul, s, (size_t)-1);
+}
+
+/*
+ * Return the number of bytes used by the current UTF-8 sequence.
+ * Assumes the input points to the first byte of a valid UTF-8
+ * sequence.
+ */
+static inline int utf8clen(const char *s)
+{
+	unsigned char c = *s;
+	return 1 + (c >= 0xC0) + (c >= 0xE0) + (c >= 0xF0);
+}
+
+/*
+ * Maximum age of any character in s.
+ * Return -1 if s is not valid UTF-8 unicode.
+ * Return 0 if only non-assigned code points are used.
+ */
+int utf8agemax(struct tree *tree, const char *s)
+{
+	utf8leaf_t	*leaf;
+	int		age = 0;
+	int		leaf_age;
+	unsigned char	hangul[UTF8HANGULLEAF];
+
+	if (!tree)
+		return -1;
+
+	while (*s) {
+		leaf = utf8lookup(tree, hangul, s);
+		if (!leaf)
+			return -1;
+		leaf_age = ages[LEAF_GEN(leaf)];
+		if (leaf_age <= tree->maxage && leaf_age > age)
+			age = leaf_age;
+		s += utf8clen(s);
+	}
+	return age;
+}
+
+/*
+ * Minimum age of any character in s.
+ * Return -1 if s is not valid UTF-8 unicode.
+ * Return 0 if non-assigned code points are used.
+ */
+int utf8agemin(struct tree *tree, const char *s)
+{
+	utf8leaf_t	*leaf;
+	int		age;
+	int		leaf_age;
+	unsigned char	hangul[UTF8HANGULLEAF];
+
+	if (!tree)
+		return -1;
+	age = tree->maxage;
+	while (*s) {
+		leaf = utf8lookup(tree, hangul, s);
+		if (!leaf)
+			return -1;
+		leaf_age = ages[LEAF_GEN(leaf)];
+		if (leaf_age <= tree->maxage && leaf_age < age)
+			age = leaf_age;
+		s += utf8clen(s);
+	}
+	return age;
+}
+
+/*
+ * Maximum age of any character in s, touch at most len bytes.
+ * Return -1 if s is not valid UTF-8 unicode.
+ */
+int utf8nagemax(struct tree *tree, const char *s, size_t len)
+{
+	utf8leaf_t	*leaf;
+	int		age = 0;
+	int		leaf_age;
+	unsigned char	hangul[UTF8HANGULLEAF];
+
+	if (!tree)
+		return -1;
+
+        while (len && *s) {
+		leaf = utf8nlookup(tree, hangul, s, len);
+		if (!leaf)
+			return -1;
+		leaf_age = ages[LEAF_GEN(leaf)];
+		if (leaf_age <= tree->maxage && leaf_age > age)
+			age = leaf_age;
+		len -= utf8clen(s);
+		s += utf8clen(s);
+	}
+	return age;
+}
+
+/*
+ * Maximum age of any character in s, touch at most len bytes.
+ * Return -1 if s is not valid UTF-8 unicode.
+ */
+int utf8nagemin(struct tree *tree, const char *s, size_t len)
+{
+	utf8leaf_t	*leaf;
+	int		leaf_age;
+	int		age;
+	unsigned char	hangul[UTF8HANGULLEAF];
+
+	if (!tree)
+		return -1;
+	age = tree->maxage;
+        while (len && *s) {
+		leaf = utf8nlookup(tree, hangul, s, len);
+		if (!leaf)
+			return -1;
+		leaf_age = ages[LEAF_GEN(leaf)];
+		if (leaf_age <= tree->maxage && leaf_age < age)
+			age = leaf_age;
+		len -= utf8clen(s);
+		s += utf8clen(s);
+	}
+	return age;
+}
+
+/*
+ * Length of the normalization of s.
+ * Return -1 if s is not valid UTF-8 unicode.
+ *
+ * A string of Default_Ignorable_Code_Point has length 0.
+ */
+ssize_t utf8len(struct tree *tree, const char *s)
+{
+	utf8leaf_t	*leaf;
+	size_t		ret = 0;
+	unsigned char	hangul[UTF8HANGULLEAF];
+
+	if (!tree)
+		return -1;
+	while (*s) {
+		leaf = utf8lookup(tree, hangul, s);
+		if (!leaf)
+			return -1;
+		if (ages[LEAF_GEN(leaf)] > tree->maxage)
+			ret += utf8clen(s);
+		else if (LEAF_CCC(leaf) == DECOMPOSE)
+			ret += strlen(LEAF_STR(leaf));
+		else
+			ret += utf8clen(s);
+		s += utf8clen(s);
+	}
+	return ret;
+}
+
+/*
+ * Length of the normalization of s, touch at most len bytes.
+ * Return -1 if s is not valid UTF-8 unicode.
+ */
+ssize_t utf8nlen(struct tree *tree, const char *s, size_t len)
+{
+	utf8leaf_t	*leaf;
+	size_t		ret = 0;
+	unsigned char	hangul[UTF8HANGULLEAF];
+
+	if (!tree)
+		return -1;
+	while (len && *s) {
+		leaf = utf8nlookup(tree, hangul, s, len);
+		if (!leaf)
+			return -1;
+		if (ages[LEAF_GEN(leaf)] > tree->maxage)
+			ret += utf8clen(s);
+		else if (LEAF_CCC(leaf) == DECOMPOSE)
+			ret += strlen(LEAF_STR(leaf));
+		else
+			ret += utf8clen(s);
+		len -= utf8clen(s);
+		s += utf8clen(s);
+	}
+	return ret;
+}
+
+/*
+ * Cursor structure used by the normalizer.
+ */
+struct utf8cursor {
+	struct tree	*tree;
+	const char	*s;
+	const char	*p;
+	const char	*ss;
+	const char	*sp;
+	unsigned int	len;
+	unsigned int	slen;
+	short int	ccc;
+	short int	nccc;
+	unsigned int	unichar;
+	unsigned char	hangul[UTF8HANGULLEAF];
+};
+
+/*
+ * Set up an utf8cursor for use by utf8byte().
+ *
+ *   s      : string.
+ *   len    : length of s.
+ *   u8c    : pointer to cursor.
+ *   trie   : utf8trie_t to use for normalization.
+ *
+ * Returns -1 on error, 0 on success.
+ */
+int utf8ncursor(struct utf8cursor *u8c, struct tree *tree, const char *s,
+		size_t len)
+{
+	if (!tree)
+		return -1;
+	if (!s)
+		return -1;
+	u8c->tree = tree;
+	u8c->s = s;
+	u8c->p = NULL;
+	u8c->ss = NULL;
+	u8c->sp = NULL;
+	u8c->len = len;
+	u8c->slen = 0;
+	u8c->ccc = STOPPER;
+	u8c->nccc = STOPPER;
+	u8c->unichar = 0;
+	/* Check we didn't clobber the maximum length. */
+	if (u8c->len != len)
+		return -1;
+	/* The first byte of s may not be an utf8 continuation. */
+	if (len > 0 && (*s & 0xC0) == 0x80)
+		return -1;
+	return 0;
+}
+
+/*
+ * Set up an utf8cursor for use by utf8byte().
+ *
+ *   s      : NUL-terminated string.
+ *   u8c    : pointer to cursor.
+ *   trie   : utf8trie_t to use for normalization.
+ *
+ * Returns -1 on error, 0 on success.
+ */
+int utf8cursor(struct utf8cursor *u8c, struct tree *tree, const char *s)
+{
+	return utf8ncursor(u8c, tree, s, (unsigned int)-1);
+}
+
+/*
+ * Get one byte from the normalized form of the string described by u8c.
+ *
+ * Returns the byte cast to an unsigned char on success, and -1 on failure.
+ *
+ * The cursor keeps track of the location in the string in u8c->s.
+ * When a character is decomposed, the current location is stored in
+ * u8c->p, and u8c->s is set to the start of the decomposition. Note
+ * that bytes from a decomposition do not count against u8c->len.
+ *
+ * Characters are emitted if they match the current CCC in u8c->ccc.
+ * Hitting end-of-string while u8c->ccc == STOPPER means we're done,
+ * and the function returns 0 in that case.
+ *
+ * Sorting by CCC is done by repeatedly scanning the string.  The
+ * values of u8c->s and u8c->p are stored in u8c->ss and u8c->sp at
+ * the start of the scan.  The first pass finds the lowest CCC to be
+ * emitted and stores it in u8c->nccc, the second pass emits the
+ * characters with this CCC and finds the next lowest CCC. This limits
+ * the number of passes to 1 + the number of different CCCs in the
+ * sequence being scanned.
+ *
+ * Therefore:
+ *  u8c->p  != NULL -> a decomposition is being scanned.
+ *  u8c->ss != NULL -> this is a repeating scan.
+ *  u8c->ccc == -1  -> this is the first scan of a repeating scan.
+ */
+int utf8byte(struct utf8cursor *u8c)
+{
+	utf8leaf_t *leaf;
+	int ccc;
+
+	for (;;) {
+		/* Check for the end of a decomposed character. */
+		if (u8c->p && *u8c->s == '\0') {
+			u8c->s = u8c->p;
+			u8c->p = NULL;
+		}
+
+		/* Check for end-of-string. */
+		if (!u8c->p && (u8c->len == 0 || *u8c->s == '\0')) {
+			/* There is no next byte. */
+			if (u8c->ccc == STOPPER)
+				return 0;
+			/* End-of-string during a scan counts as a stopper. */
+			ccc = STOPPER;
+			goto ccc_mismatch;
+		} else if ((*u8c->s & 0xC0) == 0x80) {
+			/* This is a continuation of the current character. */
+			if (!u8c->p)
+				u8c->len--;
+			return (unsigned char)*u8c->s++;
+		}
+
+		/* Look up the data for the current character. */
+		if (u8c->p) {
+			leaf = utf8lookup(u8c->tree, u8c->hangul, u8c->s);
+		} else {
+			leaf = utf8nlookup(u8c->tree, u8c->hangul,
+					   u8c->s, u8c->len);
+		}
+
+		/* No leaf found implies that the input is a binary blob. */
+		if (!leaf)
+			return -1;
+
+		/* Characters that are too new have CCC 0. */
+		if (ages[LEAF_GEN(leaf)] > u8c->tree->maxage) {
+			ccc = STOPPER;
+		} else if ((ccc = LEAF_CCC(leaf)) == DECOMPOSE) {
+			u8c->len -= utf8clen(u8c->s);
+			u8c->p = u8c->s + utf8clen(u8c->s);
+			u8c->s = LEAF_STR(leaf);
+			/* Empty decomposition implies CCC 0. */
+			if (*u8c->s == '\0') {
+				if (u8c->ccc == STOPPER)
+					continue;
+				ccc = STOPPER;
+				goto ccc_mismatch;
+			}
+			leaf = utf8lookup(u8c->tree, u8c->hangul, u8c->s);
+			ccc = LEAF_CCC(leaf);
+		}
+		u8c->unichar = utf8decode(u8c->s);
+
+		/*
+		 * If this is not a stopper, then see if it updates
+		 * the next canonical class to be emitted.
+		 */
+		if (ccc != STOPPER && u8c->ccc < ccc && ccc < u8c->nccc)
+			u8c->nccc = ccc;
+
+		/*
+		 * Return the current byte if this is the current
+		 * combining class.
+		 */
+		if (ccc == u8c->ccc) {
+			if (!u8c->p)
+				u8c->len--;
+			return (unsigned char)*u8c->s++;
+		}
+
+		/* Current combining class mismatch. */
+	ccc_mismatch:
+		if (u8c->nccc == STOPPER) {
+			/*
+			 * Scan forward for the first canonical class
+			 * to be emitted.  Save the position from
+			 * which to restart.
+			 */
+			assert(u8c->ccc == STOPPER);
+			u8c->ccc = MINCCC - 1;
+			u8c->nccc = ccc;
+			u8c->sp = u8c->p;
+			u8c->ss = u8c->s;
+			u8c->slen = u8c->len;
+			if (!u8c->p)
+				u8c->len -= utf8clen(u8c->s);
+			u8c->s += utf8clen(u8c->s);
+		} else if (ccc != STOPPER) {
+			/* Not a stopper, and not the ccc we're emitting. */
+			if (!u8c->p)
+				u8c->len -= utf8clen(u8c->s);
+			u8c->s += utf8clen(u8c->s);
+		} else if (u8c->nccc != MAXCCC + 1) {
+			/* At a stopper, restart for next ccc. */
+			u8c->ccc = u8c->nccc;
+			u8c->nccc = MAXCCC + 1;
+			u8c->s = u8c->ss;
+			u8c->p = u8c->sp;
+			u8c->len = u8c->slen;
+		} else {
+			/* All done, proceed from here. */
+			u8c->ccc = STOPPER;
+			u8c->nccc = STOPPER;
+			u8c->sp = NULL;
+			u8c->ss = NULL;
+			u8c->slen = 0;
+		}
+	}
+}
+
+/* ------------------------------------------------------------------ */
+
+static int normalize_line(struct tree *tree)
+{
+	char *s;
+	char *t;
+	int c;
+	struct utf8cursor u8c;
+
+	/* First test: null-terminated string. */
+	s = buf2;
+	t = buf3;
+	if (utf8cursor(&u8c, tree, s))
+		return -1;
+	while ((c = utf8byte(&u8c)) > 0)
+		if (c != (unsigned char)*t++)
+			return -1;
+	if (c < 0)
+		return -1;
+	if (*t != 0)
+		return -1;
+
+	/* Second test: length-limited string. */
+	s = buf2;
+	/* Replace NUL with a value that will cause an error if seen. */
+	s[strlen(s) + 1] = -1;
+	t = buf3;
+	if (utf8cursor(&u8c, tree, s))
+		return -1;
+	while ((c = utf8byte(&u8c)) > 0)
+		if (c != (unsigned char)*t++)
+			return -1;
+	if (c < 0)
+		return -1;
+	if (*t != 0)
+		return -1;
+
+	return 0;
+}
+
+static void normalization_test(void)
+{
+	FILE *file;
+	unsigned int unichar;
+	struct unicode_data *data;
+	char *s;
+	char *t;
+	int ret;
+	int ignorables;
+	int tests = 0;
+	int failures = 0;
+
+	if (verbose > 0)
+		printf("Parsing %s\n", test_name);
+	/* Step one, read data from file. */
+	file = fopen(test_name, "r");
+	if (!file)
+		open_fail(test_name, errno);
+
+	while (fgets(line, LINESIZE, file)) {
+		ret = sscanf(line, "%[^;];%*[^;];%*[^;];%*[^;];%[^;];",
+			     buf0, buf1);
+		if (ret != 2 || *line == '#')
+			continue;
+		s = buf0;
+		t = buf2;
+		while (*s) {
+			unichar = strtoul(s, &s, 16);
+			t += utf8encode(t, unichar);
+		}
+		*t = '\0';
+
+		ignorables = 0;
+		s = buf1;
+		t = buf3;
+		while (*s) {
+			unichar = strtoul(s, &s, 16);
+			data = &unicode_data[unichar];
+			if (data->utf8nfkdi && !*data->utf8nfkdi)
+				ignorables = 1;
+			else
+				t += utf8encode(t, unichar);
+		}
+		*t = '\0';
+
+		tests++;
+		if (normalize_line(nfkdi_tree) < 0) {
+			printf("Line %s -> %s", buf0, buf1);
+			if (ignorables)
+				printf(" (ignorables removed)");
+			printf(" failure\n");
+			failures++;
+		}
+	}
+	fclose(file);
+	if (verbose > 0)
+		printf("Ran %d tests with %d failures\n", tests, failures);
+	if (failures)
+		file_fail(test_name);
+}
+
+/* ------------------------------------------------------------------ */
+
+static void write_file(void)
+{
+	FILE *file;
+	int i;
+	int j;
+	int t;
+	int gen;
+
+	if (verbose > 0)
+		printf("Writing %s\n", utf8_name);
+	file = fopen(utf8_name, "w");
+	if (!file)
+		open_fail(utf8_name, errno);
+
+	fprintf(file, "/* This file is generated code, do not edit. */\n");
+	fprintf(file, "#ifndef __INCLUDED_FROM_UTF8NORM_C__\n");
+	fprintf(file, "#error Only nls_utf8-norm.c should include this file.\n");
+	fprintf(file, "#endif\n");
+	fprintf(file, "\n");
+	fprintf(file, "static const unsigned int utf8vers = %#x;\n",
+		unicode_maxage);
+	fprintf(file, "\n");
+	fprintf(file, "static const unsigned int utf8agetab[] = {\n");
+	for (i = 0; i != ages_count; i++)
+		fprintf(file, "\t%#x%s\n", ages[i],
+			ages[i] == unicode_maxage ? "" : ",");
+	fprintf(file, "};\n");
+	fprintf(file, "\n");
+	fprintf(file, "static const struct utf8data utf8nfkdicfdata[] = {\n");
+	t = 0;
+	for (gen = 0; gen < ages_count; gen++) {
+		fprintf(file, "\t{ %#x, %d }%s\n",
+			ages[gen], trees[t].index,
+			ages[gen] == unicode_maxage ? "" : ",");
+		if (trees[t].maxage == ages[gen])
+			t += 2;
+	}
+	fprintf(file, "};\n");
+	fprintf(file, "\n");
+	fprintf(file, "static const struct utf8data utf8nfkdidata[] = {\n");
+	t = 1;
+	for (gen = 0; gen < ages_count; gen++) {
+		fprintf(file, "\t{ %#x, %d }%s\n",
+			ages[gen], trees[t].index,
+			ages[gen] == unicode_maxage ? "" : ",");
+		if (trees[t].maxage == ages[gen])
+			t += 2;
+	}
+	fprintf(file, "};\n");
+	fprintf(file, "\n");
+	fprintf(file, "static const unsigned char utf8data[%zd] = {\n",
+		utf8data_size);
+	t = 0;
+	for (i = 0; i != utf8data_size; i += 16) {
+		if (i == trees[t].index) {
+			fprintf(file, "\t/* %s_%x */\n",
+				trees[t].type, trees[t].maxage);
+			if (t < trees_count-1)
+				t++;
+		}
+		fprintf(file, "\t");
+		for (j = i; j != i + 16; j++)
+			fprintf(file, "0x%.2x%s", utf8data[j],
+				(j < utf8data_size -1 ? "," : ""));
+		fprintf(file, "\n");
+	}
+	fprintf(file, "};\n");
+	fclose(file);
+}
+
+/* ------------------------------------------------------------------ */
+
+int main(int argc, char *argv[])
+{
+	unsigned int unichar;
+	int opt;
+
+	argv0 = argv[0];
+
+	while ((opt = getopt(argc, argv, "a:c:d:f:hn:o:p:t:v")) != -1) {
+		switch (opt) {
+		case 'a':
+			age_name = optarg;
+			break;
+		case 'c':
+			ccc_name = optarg;
+			break;
+		case 'd':
+			data_name = optarg;
+			break;
+		case 'f':
+			fold_name = optarg;
+			break;
+		case 'n':
+			norm_name = optarg;
+			break;
+		case 'o':
+			utf8_name = optarg;
+			break;
+		case 'p':
+			prop_name = optarg;
+			break;
+		case 't':
+			test_name = optarg;
+			break;
+		case 'v':
+			verbose++;
+			break;
+		case 'h':
+			help();
+			exit(0);
+		default:
+			usage();
+		}
+	}
+
+	if (verbose > 1)
+		help();
+	for (unichar = 0; unichar != 0x110000; unichar++)
+		unicode_data[unichar].code = unichar;
+	age_init();
+	ccc_init();
+	nfkdi_init();
+	nfkdicf_init();
+	ignore_init();
+	corrections_init();
+	hangul_decompose();
+	nfkdi_decompose();
+	nfkdicf_decompose();
+	utf8_init();
+	trees_init();
+	trees_populate();
+	trees_reduce();
+	trees_verify();
+	/* Prevent "unused function" warning. */
+	(void)lookup(nfkdi_tree, " ");
+	if (verbose > 2)
+		tree_walk(nfkdi_tree);
+	if (verbose > 2)
+		tree_walk(nfkdicf_tree);
+	normalization_test();
+	write_file();
+
+	return 0;
+}
diff --git a/util/subst.c b/util/subst.c
new file mode 100644
index 0000000..be2a0dd
--- /dev/null
+++ b/util/subst.c
@@ -0,0 +1,468 @@
+/*
+ * subst.c --- substitution program
+ *
+ * Subst is used as a quickie program to do @ substitutions
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#else
+#define HAVE_SYS_STAT_H
+#define HAVE_SYS_TIME_H
+#endif
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <time.h>
+#include <utime.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#else
+extern char *optarg;
+extern int optind;
+#endif
+
+
+struct subst_entry {
+	char *name;
+	char *value;
+	struct subst_entry *next;
+};
+
+static struct subst_entry *subst_table = 0;
+
+static int add_subst(char *name, char *value)
+{
+	struct subst_entry	*ent = 0;
+
+	ent = (struct subst_entry *) malloc(sizeof(struct subst_entry));
+	if (!ent)
+		goto fail;
+	ent->name = (char *) malloc(strlen(name)+1);
+	if (!ent->name)
+		goto fail;
+	ent->value = (char *) malloc(strlen(value)+1);
+	if (!ent->value)
+		goto fail;
+	strcpy(ent->name, name);
+	strcpy(ent->value, value);
+	ent->next = subst_table;
+	subst_table = ent;
+	return 0;
+fail:
+	if (ent) {
+		free(ent->name);
+		free(ent);
+	}
+	return ENOMEM;
+}
+
+static struct subst_entry *fetch_subst_entry(char *name)
+{
+	struct subst_entry *ent;
+
+	for (ent = subst_table; ent; ent = ent->next) {
+		if (strcmp(name, ent->name) == 0)
+			break;
+	}
+	return ent;
+}
+
+/*
+ * Given the starting and ending position of the replacement name,
+ * check to see if it is valid, and pull it out if it is.
+ */
+static char *get_subst_symbol(const char *begin, size_t len, char prefix)
+{
+	static char replace_name[128];
+	char *cp, *start;
+
+	start = replace_name;
+	if (prefix)
+		*start++ = prefix;
+
+	if (len > sizeof(replace_name)-2)
+		return NULL;
+	memcpy(start, begin, len);
+	start[len] = 0;
+
+	/*
+	 * The substitution variable must all be in the of [0-9A-Za-z_].
+	 * If it isn't, this must be an invalid symbol name.
+	 */
+	for (cp = start; *cp; cp++) {
+		if (!(*cp >= 'a' && *cp <= 'z') &&
+		    !(*cp >= 'A' && *cp <= 'Z') &&
+		    !(*cp >= '0' && *cp <= '9') &&
+		    !(*cp == '_'))
+			return NULL;
+	}
+	return (replace_name);
+}
+
+static void replace_string(char *begin, char *end, char *newstr)
+{
+	int	replace_len, len;
+
+	replace_len = strlen(newstr);
+	len = end - begin;
+	if (replace_len == 0)
+		memmove(begin, end+1, strlen(end)+1);
+	else if (replace_len != len+1)
+		memmove(end+(replace_len-len-1), end,
+			strlen(end)+1);
+	memcpy(begin, newstr, replace_len);
+}
+
+static void substitute_line(char *line)
+{
+	char	*ptr, *name_ptr, *end_ptr;
+	struct subst_entry *ent;
+	char	*replace_name;
+	size_t	len;
+
+	/*
+	 * Expand all @FOO@ substitutions
+	 */
+	ptr = line;
+	while (ptr) {
+		name_ptr = strchr(ptr, '@');
+		if (!name_ptr)
+			break;	/* No more */
+		if (*(++name_ptr) == '@') {
+			/*
+			 * Handle tytso@@mit.edu --> tytso@mit.edu
+			 */
+			memmove(name_ptr-1, name_ptr, strlen(name_ptr)+1);
+			ptr = name_ptr+1;
+			continue;
+		}
+		end_ptr = strchr(name_ptr, '@');
+		if (!end_ptr)
+			break;
+		len = end_ptr - name_ptr;
+		replace_name = get_subst_symbol(name_ptr, len, 0);
+		if (!replace_name) {
+			ptr = name_ptr;
+			continue;
+		}
+		ent = fetch_subst_entry(replace_name);
+		if (!ent) {
+			fprintf(stderr, "Unfound expansion: '%s'\n",
+				replace_name);
+			ptr = end_ptr + 1;
+			continue;
+		}
+#if 0
+		fprintf(stderr, "Replace name = '%s' with '%s'\n",
+		       replace_name, ent->value);
+#endif
+		ptr = name_ptr-1;
+		replace_string(ptr, end_ptr, ent->value);
+		if ((ent->value[0] == '@') &&
+		    (strlen(replace_name) == strlen(ent->value)-2) &&
+		    !strncmp(replace_name, ent->value+1,
+			     strlen(ent->value)-2))
+			/* avoid an infinite loop */
+			ptr += strlen(ent->value);
+	}
+	/*
+	 * Now do a second pass to expand ${FOO}
+	 */
+	ptr = line;
+	while (ptr) {
+		name_ptr = strchr(ptr, '$');
+		if (!name_ptr)
+			break;	/* No more */
+		if (*(++name_ptr) != '{') {
+			ptr = name_ptr;
+			continue;
+		}
+		name_ptr++;
+		end_ptr = strchr(name_ptr, '}');
+		if (!end_ptr)
+			break;
+		len = end_ptr - name_ptr;
+		replace_name = get_subst_symbol(name_ptr, len, '$');
+		if (!replace_name) {
+			ptr = name_ptr;
+			continue;
+		}
+		ent = fetch_subst_entry(replace_name);
+		if (!ent) {
+			ptr = end_ptr + 1;
+			continue;
+		}
+#if 0
+		fprintf(stderr, "Replace name = '%s' with '%s'\n",
+		       replace_name, ent->value);
+#endif
+		ptr = name_ptr-2;
+		replace_string(ptr, end_ptr, ent->value);
+	}
+}
+
+static void parse_config_file(FILE *f)
+{
+	char	line[2048];
+	char	*cp, *ptr;
+
+	while (!feof(f)) {
+		memset(line, 0, sizeof(line));
+		if (fgets(line, sizeof(line), f) == NULL)
+			break;
+		/*
+		 * Strip newlines and comments.
+		 */
+		cp = strchr(line, '\n');
+		if (cp)
+			*cp = 0;
+		cp = strchr(line, '#');
+		if (cp)
+			*cp = 0;
+		/*
+		 * Skip trailing and leading whitespace
+		 */
+		for (cp = line + strlen(line) - 1; cp >= line; cp--) {
+			if (*cp == ' ' || *cp == '\t')
+				*cp = 0;
+			else
+				break;
+		}
+		cp = line;
+		while (*cp && isspace(*cp))
+			cp++;
+		ptr = cp;
+		/*
+		 * Skip empty lines
+		 */
+		if (*ptr == 0)
+			continue;
+		/*
+		 * Ignore future extensions
+		 */
+		if (*ptr == '@')
+			continue;
+		/*
+		 * Parse substitutions
+		 */
+		for (cp = ptr; *cp; cp++)
+			if (isspace(*cp))
+				break;
+		*cp = 0;
+		for (cp++; *cp; cp++)
+			if (!isspace(*cp))
+				break;
+#if 0
+		printf("Substitute: '%s' for '%s'\n", ptr, cp ? cp : "<NULL>");
+#endif
+		add_subst(ptr, cp);
+	}
+}
+
+/*
+ * Return 0 if the files are different, 1 if the files are the same.
+ */
+static int compare_file(FILE *old_f, FILE *new_f)
+{
+	char	oldbuf[2048], newbuf[2048], *oldcp, *newcp;
+	int	retval;
+
+	while (1) {
+		oldcp = fgets(oldbuf, sizeof(oldbuf), old_f);
+		newcp = fgets(newbuf, sizeof(newbuf), new_f);
+		if (!oldcp && !newcp) {
+			retval = 1;
+			break;
+		}
+		if (!oldcp || !newcp || strcmp(oldbuf, newbuf)) {
+			retval = 0;
+			break;
+		}
+	}
+	return retval;
+}
+
+void set_utimes(const char *filename, int fd, const struct timeval times[2])
+{
+#ifdef HAVE_FUTIMES
+	if (futimes(fd, times) < 0)
+		perror("futimes");
+#elif HAVE_UTIMES
+	if (utimes(filename, times) < 0)
+		perror("utimes");
+#else
+	struct utimbuf ut;
+
+	ut.actime = times[0].tv_sec;
+	ut.modtime = times[1].tv_sec;
+	if (utime(filename, &ut) < 0)
+		perror("utime");
+#endif
+}
+
+
+int main(int argc, char **argv)
+{
+	char	line[2048];
+	int	c;
+	int	fd, ofd = -1;
+	FILE	*in, *out, *old = NULL;
+	char	*outfn = NULL, *newfn = NULL;
+	int	verbose = 0;
+	int	adjust_timestamp = 0;
+	int	got_atime = 0;
+	struct stat stbuf;
+	struct timeval tv[2];
+
+	while ((c = getopt (argc, argv, "f:tv")) != EOF) {
+		switch (c) {
+		case 'f':
+			in = fopen(optarg, "r");
+			if (!in) {
+				perror(optarg);
+				exit(1);
+			}
+			parse_config_file(in);
+			fclose(in);
+			break;
+		case 't':
+			adjust_timestamp++;
+			break;
+		case 'v':
+			verbose++;
+			break;
+		default:
+			fprintf(stderr, "%s: [-f config-file] [file]\n",
+				argv[0]);
+			break;
+		}
+	}
+	if (optind < argc) {
+		in = fopen(argv[optind], "r");
+		if (!in) {
+			perror(argv[optind]);
+			exit(1);
+		}
+		optind++;
+	} else
+		in = stdin;
+
+	if (optind < argc) {
+		outfn = argv[optind];
+		newfn = (char *) malloc(strlen(outfn)+20);
+		if (!newfn) {
+			fprintf(stderr, "Memory error!  Exiting.\n");
+			exit(1);
+		}
+		strcpy(newfn, outfn);
+		strcat(newfn, ".new");
+		ofd = open(newfn, O_CREAT|O_TRUNC|O_RDWR, 0644);
+		if (ofd < 0) {
+			perror(newfn);
+			exit(1);
+		}
+		out = fdopen(ofd, "w+");
+		if (!out) {
+			perror("fdopen");
+			exit(1);
+		}
+
+		fd = open(outfn, O_RDONLY);
+		if (fd > 0) {
+			/* save the original atime, if possible */
+			if (fstat(fd, &stbuf) == 0) {
+#if HAVE_STRUCT_STAT_ST_ATIM
+				tv[0].tv_sec = stbuf.st_atim.tv_sec;
+				tv[0].tv_usec = stbuf.st_atim.tv_nsec / 1000;
+#else
+				tv[0].tv_sec = stbuf.st_atime;
+				tv[0].tv_usec = 0;
+#endif
+				got_atime = 1;
+			}
+			old = fdopen(fd, "r");
+			if (!old)
+				close(fd);
+		}
+	} else {
+		out = stdout;
+		outfn = 0;
+	}
+
+	while (!feof(in)) {
+		if (fgets(line, sizeof(line), in) == NULL)
+			break;
+		substitute_line(line);
+		fputs(line, out);
+	}
+	fclose(in);
+	if (outfn) {
+		fflush(out);
+		rewind(out);
+		if (old && compare_file(old, out)) {
+			if (verbose)
+				printf("No change, keeping %s.\n", outfn);
+			if (adjust_timestamp) {
+				if (verbose)
+					printf("Updating modtime for %s\n", outfn);
+				if (gettimeofday(&tv[1], NULL) < 0) {
+					perror("gettimeofday");
+					exit(1);
+				}
+				if (got_atime == 0)
+					tv[0] = tv[1];
+				else if (verbose)
+					printf("Using original atime\n");
+				set_utimes(outfn, fileno(old), tv);
+			}
+#ifndef _WIN32
+			if (ofd >= 0)
+				(void) fchmod(ofd, 0444);
+#endif
+			fclose(out);
+			if (unlink(newfn) < 0)
+				perror("unlink");
+		} else {
+			if (verbose)
+				printf("Creating or replacing %s.\n", outfn);
+#ifndef _WIN32
+			if (ofd >= 0)
+				(void) fchmod(ofd, 0444);
+#endif
+			fclose(out);
+			if (old)
+				fclose(old);
+			old = NULL;
+			if (rename(newfn, outfn) < 0) {
+				perror("rename");
+				exit(1);
+			}
+		}
+	}
+	if (old)
+		fclose(old);
+	if (newfn)
+		free(newfn);
+	return (0);
+}
+
+
diff --git a/util/subst.conf.in b/util/subst.conf.in
new file mode 100644
index 0000000..0da4554
--- /dev/null
+++ b/util/subst.conf.in
@@ -0,0 +1,26 @@
+AWK			@AWK@
+SED			@SED@
+E2FSPROGS_MONTH		@E2FSPROGS_MONTH@
+E2FSPROGS_YEAR		@E2FSPROGS_YEAR@
+E2FSPROGS_DATE		@E2FSPROGS_DATE@
+E2FSPROGS_VERSION	@E2FSPROGS_VERSION@
+SIZEOF_LONG_LONG	@SIZEOF_LONG_LONG@
+SIZEOF_LONG		@SIZEOF_LONG@
+SIZEOF_INT		@SIZEOF_INT@
+SIZEOF_SHORT		@SIZEOF_SHORT@
+datarootdir		@datarootdir@
+datadir			@datadir@
+root_sysconfdir		@root_sysconfdir@
+$datarootdir		@datarootdir@
+$root_prefix		@root_prefix@
+$prefix			@prefix@
+# Enable the documentation for the journal device mke2fs, tune2fs, and
+# e2fsck's man page
+JDEV			
+# Enable the documentation for the tdb profile in e2fsck.conf's man page
+TDB_MAN_COMMENT		@TDB_MAN_COMMENT@
+root_sbindir		@root_sbindir@
+root_bindir		@root_bindir@
+libdir			@libdir@
+$exec_prefix		@exec_prefix@
+pkglibdir		@libdir@/e2fsprogs
diff --git a/util/symlinks.c b/util/symlinks.c
new file mode 100644
index 0000000..e9d2b01
--- /dev/null
+++ b/util/symlinks.c
@@ -0,0 +1,391 @@
+#define _FILE_OFFSET_BITS 64
+#ifndef _LARGEFILE_SOURCE
+#define _LARGEFILE_SOURCE
+#endif
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+
+#include <unistd.h>
+#ifndef _POSIX_SOURCE
+#define _POSIX_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#include <string.h>
+#include <fcntl.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <time.h>
+#include <stddef.h>
+#include <errno.h>
+
+#ifndef S_ISLNK
+#define S_ISLNK(mode) (((mode) & (_S_IFMT)) == (_S_IFLNK))
+#endif
+
+#ifndef PATH_MAX
+#define PATH_MAX 1024
+#endif
+
+#define progver "%s: scan/change symbolic links - v1.3 - by Mark Lord\n\n"
+static char *progname;
+static int verbose = 0, fix_links = 0, recurse = 0, delete = 0, shorten = 0,
+		testing = 0, single_fs = 1;
+
+/*
+ * tidypath removes excess slashes and "." references from a path string
+ */
+
+static int substr (char *s, char *old, char *new)
+{
+	char *tmp = NULL;
+	int oldlen = strlen(old), newlen = 0;
+
+	if (NULL == strstr(s, old))
+		return 0;
+
+	if (new)
+		newlen = strlen(new);
+
+	if (newlen > oldlen) {
+		if ((tmp = malloc(strlen(s))) == NULL) {
+			fprintf(stderr, "no memory\n");
+			exit (1);
+		}
+	}
+
+	while (NULL != (s = strstr(s, old))) {
+		char *p, *old_s = s;
+
+		if (new) {
+			if (newlen > oldlen)
+				old_s = strcpy(tmp, s);
+			p = new;
+			while (*p)
+				*s++ = *p++;
+		}
+		p = old_s + oldlen;
+		while ((*s++ = *p++));
+	}
+	if (tmp)
+		free(tmp);
+	return 1;
+}
+
+
+static int tidy_path (char *path)
+{
+	int tidied = 0;
+	char *s, *p;
+
+	s = path + strlen(path) - 1;
+	if (s[0] != '/') {	/* tmp trailing slash simplifies things */
+		s[1] = '/';
+		s[2] = '\0';
+	}
+	while (substr(path, "/./", "/"))
+		tidied = 1;
+	while (substr(path, "//", "/"))
+		tidied = 1;
+
+	while ((p = strstr(path,"/../")) != NULL) {
+		s = p+3;
+		for (p--; p != path; p--) if (*p == '/') break;
+		if (*p != '/')
+			break;
+		while ((*p++ = *s++));
+		tidied = 1;
+	}
+	if (*path == '\0')
+		strcpy(path,"/");
+	p = path + strlen(path) - 1;
+	if (p != path && *p == '/')
+		*p-- = '\0';	/* remove tmp trailing slash */
+	while (p != path && *p == '/') {	/* remove any others */
+		*p-- = '\0';
+		tidied = 1;
+	}
+	while (!strncmp(path,"./",2)) {
+		for (p = path, s = path+2; (*p++ = *s++););
+		tidied = 1;
+	}
+	return tidied;
+}
+
+static int shorten_path (char *path, char *abspath)
+{
+	static char dir[PATH_MAX];
+	int shortened = 0;
+	char *p;
+
+	/* get rid of unnecessary "../dir" sequences */
+	while (abspath && strlen(abspath) > 1 && (p = strstr(path,"../"))) {
+		/* find innermost occurrence of "../dir", and save "dir" */
+		int slashes = 2;
+		char *a, *s, *d = dir;
+		while ((s = strstr(p+3, "../"))) {
+			++slashes;
+			p = s;
+		}
+		s = p+3;
+		*d++ = '/';
+		while (*s && *s != '/')
+			*d++ = *s++;
+		*d++ = '/';
+		*d = '\0';
+		if (!strcmp(dir,"//"))
+			break;
+		/* note: p still points at ../dir */
+		if (*s != '/' || !*++s)
+			break;
+		a = abspath + strlen(abspath) - 1;
+		while (slashes-- > 0) {
+			if (a <= abspath)
+				goto ughh;
+			while (*--a != '/') {
+				if (a <= abspath)
+					goto ughh;
+			}
+		}
+		if (strncmp(dir, a, strlen(dir)))
+			break;
+		while ((*p++ = *s++)); /* delete the ../dir */
+		shortened = 1;
+	}
+ughh:
+	return shortened;
+}
+
+
+static void fix_symlink (char *path, dev_t my_dev)
+{
+	static char lpath[PATH_MAX], new[PATH_MAX], abspath[PATH_MAX];
+	char *p, *np, *lp, *tail, *msg;
+	struct stat stbuf, lstbuf;
+	int c, fix_abs = 0, fix_messy = 0, fix_long = 0;
+
+	if ((c = readlink(path, lpath, sizeof(lpath) - 1)) == -1) {
+		perror(path);
+		return;
+	}
+	lpath[c] = '\0';	/* readlink does not null terminate it */
+
+	/* construct the absolute address of the link */
+	abspath[0] = '\0';
+	if (lpath[0] != '/') {
+		strcat(abspath,path);
+		c = strlen(abspath);
+		if ((c > 0) && (abspath[c-1] == '/'))
+			abspath[c-1] = '\0'; /* cut trailing / */
+		if ((p = strrchr(abspath,'/')) != NULL)
+			*p = '\0'; /* cut last component */
+		strcat(abspath,"/");
+	}
+	strcat(abspath,lpath);
+	(void) tidy_path(abspath);
+
+	/* check for various things */
+	if (stat(abspath, &stbuf) == -1) {
+		printf("dangling: %s -> %s\n", path, lpath);
+		if (delete) {
+			if (unlink (path)) {
+				perror(path); 
+			} else
+				printf("deleted:  %s -> %s\n", path, lpath);
+		}
+		return;
+	}
+
+	if (single_fs)
+		lstat(abspath, &lstbuf); /* if the above didn't fail, then this shouldn't */
+	
+	if (single_fs && lstbuf.st_dev != my_dev) {
+		msg = "other_fs:";
+	} else if (lpath[0] == '/') {
+		msg = "absolute:";
+		fix_abs = 1;
+	} else if (verbose) {
+		msg = "relative:";
+	} else
+		msg = NULL;
+	fix_messy = tidy_path(strcpy(new,lpath));
+	if (shorten)
+		fix_long = shorten_path(new, path);
+	if (!fix_abs) {
+		if (fix_messy)
+			msg = "messy:   ";
+		else if (fix_long)
+			msg = "lengthy: ";
+	}
+	if (msg != NULL)
+		printf("%s %s -> %s\n", msg, path, lpath);
+	if (!(fix_links || testing) || !(fix_messy || fix_abs || fix_long))
+		return;
+
+	if (fix_abs) {
+		/* convert an absolute link to relative: */
+		/* point tail at first part of lpath that differs from path */
+		/* point p    at first part of path  that differs from lpath */
+		(void) tidy_path(lpath);
+		tail = lp = lpath;
+		p = path;
+		while (*p && (*p == *lp)) {
+			if (*lp++ == '/') {
+				tail = lp;
+				while (*++p == '/');
+			}
+		}
+
+		/* now create new, with "../"s followed by tail */
+		np = new;
+		while (*p) {
+			if (*p++ == '/') {
+				*np++ = '.';
+				*np++ = '.';
+				*np++ = '/';
+				while (*p == '/') ++p;
+			}
+		}
+		strcpy (np, tail);
+		(void) tidy_path(new);
+		if (shorten) (void) shorten_path(new, path);
+	}
+	shorten_path(new,path);
+	if (!testing) {
+		if (unlink (path)) {
+			perror(path);
+			return;
+		}
+		if (symlink(new, path)) {
+			perror(path);
+			return;
+		}
+	}
+	printf("changed:  %s -> %s\n", path, new);
+}
+
+static void dirwalk (char *path, int pathlen, dev_t dev)
+{
+ 	char *name;
+	DIR *dfd;
+	static struct stat st;
+	static struct dirent *dp;
+
+	if ((dfd = opendir(path)) == NULL) {
+		perror(path);
+		return;
+	}
+
+	name = path + pathlen;
+	if (*(name-1) != '/')
+		*name++ = '/'; 
+
+	while ((dp = readdir(dfd)) != NULL ) {
+		strcpy(name, dp->d_name);
+                if (strcmp(name, ".") && strcmp(name,"..")) {
+			if (lstat(path, &st) == -1) {
+				perror(path);
+			} else if (st.st_dev == dev) {
+				if (S_ISLNK(st.st_mode)) {
+					fix_symlink (path, dev);
+				} else if (recurse && S_ISDIR(st.st_mode)) {
+					dirwalk(path, strlen(path), dev);
+				}
+			}
+		}
+	} 
+	closedir(dfd);
+	path[pathlen] = '\0';
+}
+
+static void usage_error (void)
+{
+	fprintf(stderr, progver, progname);
+	fprintf(stderr, "Usage:\t%s [-cdorstv] LINK|DIR ...\n\n", progname);
+	fprintf(stderr, "Flags:"
+		"\t-c == change absolute/messy links to relative\n"
+		"\t-d == delete dangling links\n"
+		"\t-o == warn about links across file systems\n"
+		"\t-r == recurse into subdirs\n"
+		"\t-s == shorten lengthy links (displayed in output only when -c not specified)\n"
+		"\t-t == show what would be done by -c\n"
+		"\t-v == verbose (show all symlinks)\n\n");
+	exit(1);
+}
+
+int main(int argc, char **argv)
+{
+#if defined (_GNU_SOURCE) && defined (__GLIBC__)
+	static char path[PATH_MAX+2];
+	char* cwd = get_current_dir_name();
+#else
+	static char path[PATH_MAX+2], cwd[PATH_MAX+2];
+#endif
+	int dircount = 0;
+	char c, *p;
+
+	if  ((progname = (char *) strrchr(*argv, '/')) == NULL)
+                progname = *argv;
+        else
+                progname++;
+
+#if defined (_GNU_SOURCE) && defined (__GLIBC__)
+	if (NULL == cwd) {
+		fprintf(stderr,"get_current_dir_name() failed\n");
+#else
+	if (NULL == getcwd(cwd,PATH_MAX)) {
+		fprintf(stderr,"getcwd() failed\n");
+#endif
+		exit (1);
+	}
+#if defined (_GNU_SOURCE) && defined (__GLIBC__)
+	cwd = realloc(cwd, strlen(cwd)+2);
+	if (cwd == NULL) {
+		fprintf(stderr, "realloc() failed\n");
+		exit (1);
+	}
+#endif
+	if (!*cwd || cwd[strlen(cwd)-1] != '/')
+		strcat(cwd,"/");
+
+	while (--argc) {
+		p = *++argv;
+		if (*p == '-') {
+			if (*++p == '\0')
+				usage_error();
+			while ((c = *p++)) {
+				     if (c == 'c')	fix_links = 1;
+				else if (c == 'd')	delete    = 1;
+				else if (c == 'o')	single_fs = 0;
+				else if (c == 'r')	recurse   = 1;
+				else if (c == 's')	shorten   = 1;
+				else if (c == 't')	testing   = 1;
+				else if (c == 'v')	verbose   = 1;
+				else			usage_error();
+			}
+		} else {
+			struct stat st;
+			if (*p == '/')
+				*path = '\0';
+			else
+				strcpy(path,cwd);
+			tidy_path(strcat(path, p));
+			if (lstat(path, &st) == -1)
+				perror(path);
+			else if (S_ISLNK(st.st_mode))
+				fix_symlink(path, st.st_dev);
+			else
+				dirwalk(path, strlen(path), st.st_dev);
+			++dircount;
+		}
+	}
+	if (dircount == 0)
+		usage_error();
+	exit (0);
+}
diff --git a/util/ucd/README b/util/ucd/README
new file mode 100644
index 0000000..9fed514
--- /dev/null
+++ b/util/ucd/README
@@ -0,0 +1,37 @@
+The files in this directory are part of the Unicode Character Database
+for version 11.0.0 of the Unicode standard.
+
+The full set of UCD files are not distributed with e2fsprogs, since they
+are very large and only needed to regenerate the lib/ext2fs/utf8data.h
+during an Unicode version update.  They can be found in the link below
+and also in the Linux kernel source tree:
+
+  http://www.unicode.org/Public/11.0.0/ucd/
+
+The latest released version of the UCD can be found here:
+
+  http://www.unicode.org/Public/UCD/latest/
+
+The files in this directory are identical, except that they have been
+renamed with a suffix indicating the unicode version.
+
+Individual source links:
+
+  http://www.unicode.org/Public/11.0.0/ucd/CaseFolding.txt
+  http://www.unicode.org/Public/11.0.0/ucd/DerivedAge.txt
+  http://www.unicode.org/Public/11.0.0/ucd/extracted/DerivedCombiningClass.txt
+  http://www.unicode.org/Public/11.0.0/ucd/DerivedCoreProperties.txt
+  http://www.unicode.org/Public/11.0.0/ucd/NormalizationCorrections.txt
+  http://www.unicode.org/Public/11.0.0/ucd/NormalizationTest.txt
+  http://www.unicode.org/Public/11.0.0/ucd/UnicodeData.txt
+
+md5sums
+
+  414436796cf097df55f798e1585448ee  CaseFolding-11.0.0.txt
+  6032a595fbb782694456491d86eecfac  DerivedAge-11.0.0.txt
+  3240997d671297ac754ab0d27577acf7  DerivedCombiningClass-11.0.0.txt
+  d41d8cd98f00b204e9800998ecf8427e  DerivedCombiningClass.txt
+  2a4fe257d9d8184518e036194d2248ec  DerivedCoreProperties-11.0.0.txt
+  4e7d383fa0dd3cd9d49d64e5b7b7c9e0  NormalizationCorrections-11.0.0.txt
+  c9500c5b8b88e584469f056023ecc3f2  NormalizationTest-11.0.0.txt
+  acc291106c3758d2025f8d7bd5518bee  UnicodeData-11.0.0.txt