diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /contrib/snowball/algorithms/swedish.sbl | |
parent | Initial commit. (diff) | |
download | rspamd-133a45c109da5310add55824db21af5239951f93.tar.xz rspamd-133a45c109da5310add55824db21af5239951f93.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'contrib/snowball/algorithms/swedish.sbl')
-rw-r--r-- | contrib/snowball/algorithms/swedish.sbl | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/contrib/snowball/algorithms/swedish.sbl b/contrib/snowball/algorithms/swedish.sbl new file mode 100644 index 0000000..2cbb885 --- /dev/null +++ b/contrib/snowball/algorithms/swedish.sbl @@ -0,0 +1,72 @@ +routines ( + mark_regions + main_suffix + consonant_pair + other_suffix +) + +externals ( stem ) + +integers ( p1 x ) + +groupings ( v s_ending ) + +stringescapes {} + +/* special characters */ + +stringdef a" '{U+00E4}' +stringdef ao '{U+00E5}' +stringdef o" '{U+00F6}' + +define v 'aeiouy{a"}{ao}{o"}' + +define s_ending 'bcdfghjklmnoprtvy' + +define mark_regions as ( + + $p1 = limit + test ( hop 3 setmark x ) + goto v gopast non-v setmark p1 + try ( $p1 < x $p1 = x ) +) + +backwardmode ( + + define main_suffix as ( + setlimit tomark p1 for ([substring]) + among( + + 'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne' + 'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter' + 'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens' + 'hetens' 'erns' 'at' 'andet' 'het' 'ast' + (delete) + 's' + (s_ending delete) + ) + ) + + define consonant_pair as setlimit tomark p1 for ( + among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt') + and ([next] delete) + ) + + define other_suffix as setlimit tomark p1 for ( + [substring] among( + 'lig' 'ig' 'els' (delete) + 'l{o"}st' (<-'l{o"}s') + 'fullt' (<-'full') + ) + ) +) + +define stem as ( + + do mark_regions + backwards ( + do main_suffix + do consonant_pair + do other_suffix + ) +) |