diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /contrib/snowball/algorithms/german2.sbl | |
parent | Initial commit. (diff) | |
download | rspamd-133a45c109da5310add55824db21af5239951f93.tar.xz rspamd-133a45c109da5310add55824db21af5239951f93.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'contrib/snowball/algorithms/german2.sbl')
-rw-r--r-- | contrib/snowball/algorithms/german2.sbl | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/contrib/snowball/algorithms/german2.sbl b/contrib/snowball/algorithms/german2.sbl new file mode 100644 index 0000000..47ff61e --- /dev/null +++ b/contrib/snowball/algorithms/german2.sbl @@ -0,0 +1,145 @@ + +/* + Extra rule for -nisse ending added 11 Dec 2009 +*/ + +routines ( + prelude postlude + mark_regions + R1 R2 + standard_suffix +) + +externals ( stem ) + +integers ( p1 p2 x ) + +groupings ( v s_ending st_ending ) + +stringescapes {} + +/* special characters */ + +stringdef a" '{U+00E4}' +stringdef o" '{U+00F6}' +stringdef u" '{U+00FC}' +stringdef ss '{U+00DF}' + +define v 'aeiouy{a"}{o"}{u"}' + +define s_ending 'bdfghklmnrt' +define st_ending s_ending - 'r' + +define prelude as ( + + test repeat goto ( + v [('u'] v <- 'U') or + ('y'] v <- 'Y') + ) + + repeat ( + [substring] among( + '{ss}' (<- 'ss') + 'ae' (<- '{a"}') + 'oe' (<- '{o"}') + 'ue' (<- '{u"}') + 'qu' (hop 2) + '' (next) + ) + ) + +) + +define mark_regions as ( + + $p1 = limit + $p2 = limit + + test(hop 3 setmark x) + + gopast v gopast non-v setmark p1 + try($p1 < x $p1 = x) // at least 3 + gopast v gopast non-v setmark p2 + +) + +define postlude as repeat ( + + [substring] among( + 'Y' (<- 'y') + 'U' (<- 'u') + '{a"}' (<- 'a') + '{o"}' (<- 'o') + '{u"}' (<- 'u') + '' (next) + ) + +) + +backwardmode ( + + define R1 as $p1 <= cursor + define R2 as $p2 <= cursor + + define standard_suffix as ( + do ( + [substring] R1 among( + 'em' 'ern' 'er' + ( delete + ) + 'e' 'en' 'es' + ( delete + try (['s'] 'nis' delete) + ) + 's' + ( s_ending delete + ) + ) + ) + do ( + [substring] R1 among( + 'en' 'er' 'est' + ( delete + ) + 'st' + ( st_ending hop 3 delete + ) + ) + ) + do ( + [substring] R2 among( + 'end' 'ung' + ( delete + try (['ig'] not 'e' R2 delete) + ) + 'ig' 'ik' 'isch' + ( not 'e' delete + ) + 'lich' 'heit' + ( delete + try ( + ['er' or 'en'] R1 delete + ) + ) + 'keit' + ( delete + try ( + [substring] R2 among( + 'lich' 'ig' + ( delete + ) + ) + ) + ) + ) + ) + ) +) + +define stem as ( + do prelude + do mark_regions + backwards + do standard_suffix + do postlude +) |