summaryrefslogtreecommitdiffstats
path: root/pg_updatedicts
diff options
context:
space:
mode:
Diffstat (limited to 'pg_updatedicts')
-rwxr-xr-xpg_updatedicts139
1 files changed, 139 insertions, 0 deletions
diff --git a/pg_updatedicts b/pg_updatedicts
new file mode 100755
index 0000000..897346e
--- /dev/null
+++ b/pg_updatedicts
@@ -0,0 +1,139 @@
+#!/usr/bin/perl -w
+
+# Create dictionaries and affix rules palatable for PostgreSQL, using installed
+# myspell and hunspell dictionaries.
+#
+# (C) 2008-2009 Martin Pitt <mpitt@debian.org>
+# (C) 2012-2017 Christoph Berg <myon@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+use strict;
+use warnings;
+my @srcdirs = ('/usr/share/hunspell', '/usr/share/myspell/dicts');
+my $cachedir = '/var/cache/postgresql/dicts';
+my $pgsharedir = '/usr/share/postgresql/';
+
+use PgCommon;
+
+# determine encoding of an .aff file
+sub get_encoding {
+ open my $fh, '<', $_[0] or die "cannot open $_[0]: $!";
+ while (<$fh>) {
+ if (/^SET ([\w-]+)\s*$/) { return $1; }
+ }
+ return undef;
+}
+
+umask 022;
+if ((system 'mkdir', '-p', $cachedir) != 0) {
+ exit 1;
+}
+
+# keep track of all up to date files, so that we can clean up cruft
+my %current;
+
+print "Building PostgreSQL dictionaries from installed myspell/hunspell packages...\n";
+for my $d (@srcdirs) {
+ for my $aff (glob "$d/*.aff") {
+ next if -l $aff; # ignore symlinks
+ my $dic = substr($aff, 0, -3) . 'dic';
+ if (! -f $dic) {
+ print STDERR "ERROR: $aff does not have corresponding $dic, ignoring\n";
+ next;
+ }
+
+ my $enc = get_encoding $aff;
+ if (!$enc) {
+ print STDERR "ERROR: no ecoding defined in $aff, ignoring\n";
+ next;
+ }
+
+ my $locale = substr ((split '/', $aff)[-1], 0, -4);
+ $locale =~ tr /A-Z/a-z/;
+
+ $current{"$cachedir/$locale.affix"} = undef;
+ $current{"$cachedir/$locale.dict"} = undef;
+
+ # convert to UTF-8 and write to cache dir
+ print " $locale\n";
+ if ((system 'iconv', '-f', $enc, '-t', 'UTF-8', '-o',
+ "$cachedir/$locale.affix", $aff) != 0) {
+ unlink "$cachedir/$locale.affix";
+ print STDERR "ERROR: Conversion of $aff failed\n";
+ next;
+ }
+ if ((system 'iconv', '-f', $enc, '-t', 'UTF-8', '-o',
+ "$cachedir/$locale.dict", $dic) != 0) {
+ unlink "$cachedir/$locale.affix";
+ unlink "$cachedir/$locale.dict";
+ print STDERR "ERROR: Conversion of $dic failed\n";
+ next;
+ }
+
+ # install symlinks to all versions >= 8.3
+ foreach my $v (get_versions) {
+ next if $v < '8.3';
+ my $dest = "$pgsharedir/$v/tsearch_data";
+ next if ! -d $dest;
+ $current{"$dest/$locale.affix"} = undef;
+ $current{"$dest/$locale.dict"} = undef;
+ next if -e "$dest/$locale.affix" && ! -l "$dest/$locale.affix";
+ next if -e "$dest/$locale.dict" && ! -l "$dest/$locale.dict";
+ unlink "$dest/$locale.affix";
+ unlink "$dest/$locale.dict";
+ symlink "$cachedir/$locale.affix", "$dest/$locale.affix";
+ symlink "$cachedir/$locale.dict", "$dest/$locale.dict";
+ }
+ }
+}
+
+# clean up files for locales which do not exist any more
+print "Removing obsolete dictionary files:\n";
+foreach my $f (glob "$cachedir/*") {
+ next if exists $current{$f};
+ print " $f\n";
+ unlink $f;
+}
+foreach my $f ((glob "$pgsharedir/*/tsearch_data/*.affix"),
+ (glob "$pgsharedir/*/tsearch_data/*.dict")) {
+ next unless -l $f;
+ next if exists $current{$f};
+ print " $f\n";
+ unlink $f;
+}
+
+__END__
+
+=head1 NAME
+
+pg_updatedicts - build PostgreSQL dictionaries from myspell/hunspell ones
+
+=head1 SYNOPSIS
+
+B<pg_updatedicts>
+
+=head1 DESCRIPTION
+
+B<pg_updatedicts> makes dictionaries and affix files from installed myspell
+and hunspell dictionary packages available to PostgreSQL for usage with tsearch
+and word stem support. In particular, it takes all I<*.dic> and I<*.aff> files
+from /usr/share/myspell/dicts/, converts them to UTF-8, puts them into
+/var/cache/postgresql/dicts/ with I<*.dict> and I<*.affix> suffixes, and
+symlinks them into /usr/share/postgresql/I<version>/tsearch_data/, where
+PostgreSQL looks for them.
+
+Through postgresql-common's dpkg trigger, this program is automatically run
+whenever a myspell or hunspell dictionary package is installed or upgraded.
+
+=head1 AUTHOR
+
+Martin Pitt L<E<lt>mpitt@debian.orgE<gt>>