diff options
Diffstat (limited to 'src/libs/libgroff/make-uniuni')
-rwxr-xr-x | src/libs/libgroff/make-uniuni | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/src/libs/libgroff/make-uniuni b/src/libs/libgroff/make-uniuni new file mode 100755 index 0000000..386eacd --- /dev/null +++ b/src/libs/libgroff/make-uniuni @@ -0,0 +1,162 @@ +#! /bin/sh +# +# make-uniuni -- script for creating the file uniuni.cpp +# +# Copyright (C) 2005-2020 Free Software Foundation, Inc. +# Written by Werner Lemberg <wl@gnu.org> +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# +# usage: +# +# make-uniuni <version-string> < UnicodeData.txt > uniuni.cpp +# +# 'UnicodeData.txt' is the central database file from the Unicode standard. +# Unfortunately, it doesn't contain a version number which must be thus +# provided manually as a parameter to the filter. +# +# This program needs a C preprocessor. +# + +CPP=cpp + +prog="$0" + +if test $# -ne 1; then + echo "usage: $0 <version-string> < UnicodeData.txt > uniuni.cpp" + exit 1 +fi + +version_string="$1" + +# Remove ranges and control characters, +# then extract the decomposition field, +# then remove lines without decomposition, +# then remove all compatibility decompositions. +sed -e '/^[^;]*;</d' \ +| sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \ +| sed -e '/^[^;]*;$/d' \ +| sed -e '/^[^;]*;</d' > $$1 + +# Prepare input for running cpp. +cat $$1 \ +| sed -e 's/^\([^;]*\);/#define \1 /' \ + -e 's/ / u/g' > $$2 +cat $$1 \ +| sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $$2 + +# Run C preprocessor to recursively decompose. +$CPP $$2 $$3 + +# Convert it back to original format. +cat $$3 \ +| sed -e '/#/d' \ + -e '/^$/d' \ + -e 's/ \+/ /g' \ + -e 's/ *$//' \ + -e 's/u//g' \ + -e 's/^\([^ ]*\) /\1;/' > $$4 + +# Write preamble. +cat <<END +// -*- C++ -*- +/* Copyright (C) 2002-2014 Free Software Foundation, Inc. + Written by Werner Lemberg <wl@gnu.org> + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +// This code has been algorithmically derived from the file +// UnicodeData.txt, version $version_string, available from unicode.org, +// on `date '+%Y-%m-%d'`. + +#include "lib.h" +#include "stringclass.h" +#include "ptable.h" + +#include "unicode.h" + +struct unicode_decompose { + char *value; +}; + +declare_ptable(unicode_decompose) +implement_ptable(unicode_decompose) + +PTABLE(unicode_decompose) unicode_decompose_table; + +// the first digit in the composite string gives the number of composites + +struct S { + const char *key; + const char *value; +} unicode_decompose_list[] = { +END + +# Emit Unicode data. +cat $$4 \ +| sed -e 's/ /_/g' \ + -e 's/\(.*\);\(.*_.*_.*_.*\)$/ { "\1", "4\2" },/' \ + -e 's/\(.*\);\(.*_.*_.*\)$/ { "\1", "3\2" },/' \ + -e 's/\(.*\);\(.*_.*\)$/ { "\1", "2\2" },/' \ + -e 's/\(.*\);\(.*\)$/ { "\1", "1\2" },/' + +# Write postamble. +cat <<END +}; + +// global constructor + +static struct unicode_decompose_init { + unicode_decompose_init(); +} _unicode_decompose_init; + +unicode_decompose_init::unicode_decompose_init() +{ + for (unsigned int i = 0; + i < sizeof(unicode_decompose_list)/sizeof(unicode_decompose_list[0]); + i++) { + unicode_decompose *dec = new unicode_decompose[1]; + dec->value = (char *)unicode_decompose_list[i].value; + unicode_decompose_table.define(unicode_decompose_list[i].key, dec); + } +} + +const char *decompose_unicode(const char *s) +{ + unicode_decompose *result = unicode_decompose_table.lookup(s); + return result ? result->value : 0; +} +END + + +# Remove temporary files. +rm $$1 $$2 $$3 $$4 + +# EOF |