diff options
Diffstat (limited to '')
-rw-r--r-- | contrib/snowball/algorithms/english.sbl | 229 |
1 files changed, 229 insertions, 0 deletions
diff --git a/contrib/snowball/algorithms/english.sbl b/contrib/snowball/algorithms/english.sbl new file mode 100644 index 0000000..fe18d7a --- /dev/null +++ b/contrib/snowball/algorithms/english.sbl @@ -0,0 +1,229 @@ +integers ( p1 p2 ) +booleans ( Y_found ) + +routines ( + prelude postlude + mark_regions + shortv + R1 R2 + Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5 + exception1 + exception2 +) + +externals ( stem ) + +groupings ( v v_WXY valid_LI ) + +stringescapes {} + +define v 'aeiouy' +define v_WXY v + 'wxY' + +define valid_LI 'cdeghkmnrt' + +define prelude as ( + unset Y_found + do ( ['{'}'] delete) + do ( ['y'] <-'Y' set Y_found) + do repeat(goto (v ['y']) <-'Y' set Y_found) +) + +define mark_regions as ( + $p1 = limit + $p2 = limit + do( + among ( + 'gener' + 'commun' // added May 2005 + 'arsen' // added Nov 2006 (arsenic/arsenal) + // ... extensions possible here ... + ) or (gopast v gopast non-v) + setmark p1 + gopast v gopast non-v setmark p2 + ) +) + +backwardmode ( + + define shortv as ( + ( non-v_WXY v non-v ) + or + ( non-v v atlimit ) + ) + + define R1 as $p1 <= cursor + define R2 as $p2 <= cursor + + define Step_1a as ( + try ( + [substring] among ( + '{'}' '{'}s' '{'}s{'}' + (delete) + ) + ) + [substring] among ( + 'sses' (<-'ss') + 'ied' 'ies' + ((hop 2 <-'i') or <-'ie') + 's' (next gopast v delete) + 'us' 'ss' + ) + ) + + define Step_1b as ( + [substring] among ( + 'eed' 'eedly' + (R1 <-'ee') + 'ed' 'edly' 'ing' 'ingly' + ( + test gopast v delete + test substring among( + 'at' 'bl' 'iz' + (<+ 'e') + 'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt' + // ignoring double c, h, j, k, q, v, w, and x + ([next] delete) + '' (atmark p1 test shortv <+ 'e') + ) + ) + ) + ) + + define Step_1c as ( + ['y' or 'Y'] + non-v not atlimit + <-'i' + ) + + define Step_2 as ( + [substring] R1 among ( + 'tional' (<-'tion') + 'enci' (<-'ence') + 'anci' (<-'ance') + 'abli' (<-'able') + 'entli' (<-'ent') + 'izer' 'ization' + (<-'ize') + 'ational' 'ation' 'ator' + (<-'ate') + 'alism' 'aliti' 'alli' + (<-'al') + 'fulness' (<-'ful') + 'ousli' 'ousness' + (<-'ous') + 'iveness' 'iviti' + (<-'ive') + 'biliti' 'bli' + (<-'ble') + 'ogi' ('l' <-'og') + 'fulli' (<-'ful') + 'lessli' (<-'less') + 'li' (valid_LI delete) + ) + ) + + define Step_3 as ( + [substring] R1 among ( + 'tional' (<- 'tion') + 'ational' (<- 'ate') + 'alize' (<-'al') + 'icate' 'iciti' 'ical' + (<-'ic') + 'ful' 'ness' + (delete) + 'ative' + (R2 delete) // 'R2' added Dec 2001 + ) + ) + + define Step_4 as ( + [substring] R2 among ( + 'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement' + 'ment' 'ent' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize' + (delete) + 'ion' ('s' or 't' delete) + ) + ) + + define Step_5 as ( + [substring] among ( + 'e' (R2 or (R1 not shortv) delete) + 'l' (R2 'l' delete) + ) + ) + + define exception2 as ( + + [substring] atlimit among( + 'inning' 'outing' 'canning' 'herring' 'earring' + 'proceed' 'exceed' 'succeed' + + // ... extensions possible here ... + + ) + ) +) + +define exception1 as ( + + [substring] atlimit among( + + /* special changes: */ + + 'skis' (<-'ski') + 'skies' (<-'sky') + 'dying' (<-'die') + 'lying' (<-'lie') + 'tying' (<-'tie') + + /* special -LY cases */ + + 'idly' (<-'idl') + 'gently' (<-'gentl') + 'ugly' (<-'ugli') + 'early' (<-'earli') + 'only' (<-'onli') + 'singly' (<-'singl') + + // ... extensions possible here ... + + /* invariant forms: */ + + 'sky' + 'news' + 'howe' + + 'atlas' 'cosmos' 'bias' 'andes' // not plural forms + + // ... extensions possible here ... + ) +) + +define postlude as (Y_found repeat(goto (['Y']) <-'y')) + +define stem as ( + + exception1 or + not hop 3 or ( + do prelude + do mark_regions + backwards ( + + do Step_1a + + exception2 or ( + + do Step_1b + do Step_1c + + do Step_2 + do Step_3 + do Step_4 + + do Step_5 + ) + ) + do postlude + ) +) |