summaryrefslogtreecommitdiffstats
path: root/pg_updatedicts
blob: 7d8636542a586d5675c548ecf225b5cf4ce60739 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/perl -w

# Create dictionaries and affix rules palatable for PostgreSQL, using installed
# myspell and hunspell dictionaries.
#
# (C) 2008-2009 Martin Pitt <mpitt@debian.org>
# (C) 2012-2017 Christoph Berg <myon@debian.org>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.

my @srcdirs = ('/usr/share/hunspell', '/usr/share/myspell/dicts');
my $cachedir = '/var/cache/postgresql/dicts';
my $pgsharedir = '/usr/share/postgresql/';

use PgCommon;

# determine encoding of an .aff file
sub get_encoding {
    open F, $_[0] or die "cannot open $_[0]: $!";
    while (<F>) {
        if (/^SET ([\w-]+)\s*$/) { return $1; }
    }
    return undef;
}

umask 022;
if ((system 'mkdir', '-p', $cachedir) != 0) {
    exit 1;
}

# keep track of all up to date files, so that we can clean up cruft
my %current;

print "Building PostgreSQL dictionaries from installed myspell/hunspell packages...\n";
my ($dic, $enc, $locale);
for my $d (@srcdirs) {
    for my $aff (glob "$d/*.aff") {
	next if -l $aff; # ignore symlinks
	$dic = substr($aff, 0, -3) . 'dic';
	if (! -f $dic) {
	    print STDERR "ERROR: $aff does not have corresponding $dic, ignoring\n";
	    next;
	}

	$enc = get_encoding $aff;
	if (!$enc) {
	    print STDERR "ERROR: no ecoding defined in $aff, ignoring\n";
	    next;
	}

	$locale = substr ((split '/', $aff)[-1], 0, -4);
	$locale =~ tr /A-Z/a-z/;

        $current{"$cachedir/$locale.affix"} = undef;
        $current{"$cachedir/$locale.dict"} = undef;

	# convert to UTF-8 and write to cache dir
	print "  $locale\n";
	if ((system 'iconv', '-f', $enc, '-t', 'UTF-8', '-o',
	    "$cachedir/$locale.affix", $aff) != 0) {
	    unlink "$cachedir/$locale.affix";
	    print STDERR "ERROR: Conversion of $aff failed\n";
	    next;
	}
	if ((system 'iconv', '-f', $enc, '-t', 'UTF-8', '-o',
	    "$cachedir/$locale.dict", $dic) != 0) {
	    unlink "$cachedir/$locale.affix";
	    unlink "$cachedir/$locale.dict";
	    print STDERR "ERROR: Conversion of $dic failed\n";
	    next;
	}

	# install symlinks to all versions >= 8.3
	foreach my $v (get_versions) {
	    next if $v < '8.3';
	    my $dest = "$pgsharedir/$v/tsearch_data";
	    next if ! -d $dest;
            $current{"$dest/$locale.affix"} = undef;
            $current{"$dest/$locale.dict"} = undef;
	    next if -e "$dest/$locale.affix" && ! -l "$dest/$locale.affix";
	    next if -e "$dest/$locale.dict" && ! -l "$dest/$locale.dict";
	    unlink "$dest/$locale.affix";
	    unlink "$dest/$locale.dict";
	    symlink "$cachedir/$locale.affix", "$dest/$locale.affix";
	    symlink "$cachedir/$locale.dict", "$dest/$locale.dict";
	}
    }
}

# clean up files for locales which do not exist any more
print "Removing obsolete dictionary files:\n";
foreach my $f (glob "$cachedir/*") {
    next if exists $current{$f};
    print "  $f\n";
    unlink $f;
}
foreach my $f ((glob "$pgsharedir/*/tsearch_data/*.affix"),
               (glob "$pgsharedir/*/tsearch_data/*.dict")) {
    next unless -l $f;
    next if exists $current{$f};
    print "  $f\n";
    unlink $f;
}

__END__

=head1 NAME

pg_updatedicts - build PostgreSQL dictionaries from myspell/hunspell ones

=head1 SYNOPSIS

B<pg_updatedicts>

=head1 DESCRIPTION

B<pg_updatedicts> makes dictionaries and affix files from installed myspell
and hunspell dictionary packages available to PostgreSQL for usage with tsearch
and word stem support. In particular, it takes all I<*.dic> and I<*.aff> files
from /usr/share/myspell/dicts/, converts them to UTF-8, puts them into
/var/cache/postgresql/dicts/ with I<*.dict> and I<*.affix> suffixes, and
symlinks them into /usr/share/postgresql/I<version>/tsearch_data/, where
PostgreSQL looks for them.

Through postgresql-common's dpkg trigger, this program is automatically run
whenever a myspell or hunspell dictionary package is installed or upgraded.

=head1 AUTHOR

Martin Pitt L<E<lt>mpitt@debian.orgE<gt>>