From 133a45c109da5310add55824db21af5239951f93 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 23:30:40 +0200 Subject: Adding upstream version 3.8.1. Signed-off-by: Daniel Baumann --- contrib/snowball/algorithms/porter.sbl | 139 +++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 contrib/snowball/algorithms/porter.sbl (limited to 'contrib/snowball/algorithms/porter.sbl') diff --git a/contrib/snowball/algorithms/porter.sbl b/contrib/snowball/algorithms/porter.sbl new file mode 100644 index 0000000..9533b79 --- /dev/null +++ b/contrib/snowball/algorithms/porter.sbl @@ -0,0 +1,139 @@ +integers ( p1 p2 ) +booleans ( Y_found ) + +routines ( + shortv + R1 R2 + Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5a Step_5b +) + +externals ( stem ) + +groupings ( v v_WXY ) + +define v 'aeiouy' +define v_WXY v + 'wxY' + +backwardmode ( + + define shortv as ( non-v_WXY v non-v ) + + define R1 as $p1 <= cursor + define R2 as $p2 <= cursor + + define Step_1a as ( + [substring] among ( + 'sses' (<-'ss') + 'ies' (<-'i') + 'ss' () + 's' (delete) + ) + ) + + define Step_1b as ( + [substring] among ( + 'eed' (R1 <-'ee') + 'ed' + 'ing' ( + test gopast v delete + test substring among( + 'at' 'bl' 'iz' + (<+ 'e') + 'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt' + // ignoring double c, h, j, k, q, v, w, and x + ([next] delete) + '' (atmark p1 test shortv <+ 'e') + ) + ) + ) + ) + + define Step_1c as ( + ['y' or 'Y'] + gopast v + <-'i' + ) + + define Step_2 as ( + [substring] R1 among ( + 'tional' (<-'tion') + 'enci' (<-'ence') + 'anci' (<-'ance') + 'abli' (<-'able') + 'entli' (<-'ent') + 'eli' (<-'e') + 'izer' 'ization' + (<-'ize') + 'ational' 'ation' 'ator' + (<-'ate') + 'alli' (<-'al') + 'alism' 'aliti' + (<-'al') + 'fulness' (<-'ful') + 'ousli' 'ousness' + (<-'ous') + 'iveness' 'iviti' + (<-'ive') + 'biliti' (<-'ble') + ) + ) + + define Step_3 as ( + [substring] R1 among ( + 'alize' (<-'al') + 'icate' 'iciti' 'ical' + (<-'ic') + 'ative' 'ful' 'ness' + (delete) + ) + ) + + define Step_4 as ( + [substring] R2 among ( + 'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement' + 'ment' 'ent' 'ou' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize' + (delete) + 'ion' ('s' or 't' delete) + ) + ) + + define Step_5a as ( + ['e'] + R2 or (R1 not shortv) + delete + ) + + define Step_5b as ( + ['l'] + R2 'l' + delete + ) +) + +define stem as ( + + unset Y_found + do ( ['y'] <-'Y' set Y_found) + do repeat(goto (v ['y']) <-'Y' set Y_found) + + $p1 = limit + $p2 = limit + do( + gopast v gopast non-v setmark p1 + gopast v gopast non-v setmark p2 + ) + + backwards ( + do Step_1a + do Step_1b + do Step_1c + do Step_2 + do Step_3 + do Step_4 + do Step_5a + do Step_5b + ) + + do(Y_found repeat(goto (['Y']) <-'y')) + +) -- cgit v1.2.3