diff options
Diffstat (limited to 'pg_updatedicts')
-rwxr-xr-x | pg_updatedicts | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/pg_updatedicts b/pg_updatedicts new file mode 100755 index 0000000..897346e --- /dev/null +++ b/pg_updatedicts @@ -0,0 +1,139 @@ +#!/usr/bin/perl -w + +# Create dictionaries and affix rules palatable for PostgreSQL, using installed +# myspell and hunspell dictionaries. +# +# (C) 2008-2009 Martin Pitt <mpitt@debian.org> +# (C) 2012-2017 Christoph Berg <myon@debian.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +use strict; +use warnings; +my @srcdirs = ('/usr/share/hunspell', '/usr/share/myspell/dicts'); +my $cachedir = '/var/cache/postgresql/dicts'; +my $pgsharedir = '/usr/share/postgresql/'; + +use PgCommon; + +# determine encoding of an .aff file +sub get_encoding { + open my $fh, '<', $_[0] or die "cannot open $_[0]: $!"; + while (<$fh>) { + if (/^SET ([\w-]+)\s*$/) { return $1; } + } + return undef; +} + +umask 022; +if ((system 'mkdir', '-p', $cachedir) != 0) { + exit 1; +} + +# keep track of all up to date files, so that we can clean up cruft +my %current; + +print "Building PostgreSQL dictionaries from installed myspell/hunspell packages...\n"; +for my $d (@srcdirs) { + for my $aff (glob "$d/*.aff") { + next if -l $aff; # ignore symlinks + my $dic = substr($aff, 0, -3) . 'dic'; + if (! -f $dic) { + print STDERR "ERROR: $aff does not have corresponding $dic, ignoring\n"; + next; + } + + my $enc = get_encoding $aff; + if (!$enc) { + print STDERR "ERROR: no ecoding defined in $aff, ignoring\n"; + next; + } + + my $locale = substr ((split '/', $aff)[-1], 0, -4); + $locale =~ tr /A-Z/a-z/; + + $current{"$cachedir/$locale.affix"} = undef; + $current{"$cachedir/$locale.dict"} = undef; + + # convert to UTF-8 and write to cache dir + print " $locale\n"; + if ((system 'iconv', '-f', $enc, '-t', 'UTF-8', '-o', + "$cachedir/$locale.affix", $aff) != 0) { + unlink "$cachedir/$locale.affix"; + print STDERR "ERROR: Conversion of $aff failed\n"; + next; + } + if ((system 'iconv', '-f', $enc, '-t', 'UTF-8', '-o', + "$cachedir/$locale.dict", $dic) != 0) { + unlink "$cachedir/$locale.affix"; + unlink "$cachedir/$locale.dict"; + print STDERR "ERROR: Conversion of $dic failed\n"; + next; + } + + # install symlinks to all versions >= 8.3 + foreach my $v (get_versions) { + next if $v < '8.3'; + my $dest = "$pgsharedir/$v/tsearch_data"; + next if ! -d $dest; + $current{"$dest/$locale.affix"} = undef; + $current{"$dest/$locale.dict"} = undef; + next if -e "$dest/$locale.affix" && ! -l "$dest/$locale.affix"; + next if -e "$dest/$locale.dict" && ! -l "$dest/$locale.dict"; + unlink "$dest/$locale.affix"; + unlink "$dest/$locale.dict"; + symlink "$cachedir/$locale.affix", "$dest/$locale.affix"; + symlink "$cachedir/$locale.dict", "$dest/$locale.dict"; + } + } +} + +# clean up files for locales which do not exist any more +print "Removing obsolete dictionary files:\n"; +foreach my $f (glob "$cachedir/*") { + next if exists $current{$f}; + print " $f\n"; + unlink $f; +} +foreach my $f ((glob "$pgsharedir/*/tsearch_data/*.affix"), + (glob "$pgsharedir/*/tsearch_data/*.dict")) { + next unless -l $f; + next if exists $current{$f}; + print " $f\n"; + unlink $f; +} + +__END__ + +=head1 NAME + +pg_updatedicts - build PostgreSQL dictionaries from myspell/hunspell ones + +=head1 SYNOPSIS + +B<pg_updatedicts> + +=head1 DESCRIPTION + +B<pg_updatedicts> makes dictionaries and affix files from installed myspell +and hunspell dictionary packages available to PostgreSQL for usage with tsearch +and word stem support. In particular, it takes all I<*.dic> and I<*.aff> files +from /usr/share/myspell/dicts/, converts them to UTF-8, puts them into +/var/cache/postgresql/dicts/ with I<*.dict> and I<*.affix> suffixes, and +symlinks them into /usr/share/postgresql/I<version>/tsearch_data/, where +PostgreSQL looks for them. + +Through postgresql-common's dpkg trigger, this program is automatically run +whenever a myspell or hunspell dictionary package is installed or upgraded. + +=head1 AUTHOR + +Martin Pitt L<E<lt>mpitt@debian.orgE<gt>> |