summaryrefslogtreecommitdiffstats
path: root/contrib/snowball/algorithms/lovins.sbl
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/snowball/algorithms/lovins.sbl')
-rw-r--r--contrib/snowball/algorithms/lovins.sbl208
1 files changed, 208 insertions, 0 deletions
diff --git a/contrib/snowball/algorithms/lovins.sbl b/contrib/snowball/algorithms/lovins.sbl
new file mode 100644
index 0000000..3f69f15
--- /dev/null
+++ b/contrib/snowball/algorithms/lovins.sbl
@@ -0,0 +1,208 @@
+
+stringescapes {}
+
+routines (
+ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA BB CC
+
+ endings
+
+ undouble respell
+)
+
+externals ( stem )
+
+backwardmode (
+
+ /* Lovins' conditions A, B ... CC, as given in her Appendix B, where
+ a test for a two letter prefix ('test hop 2') is implicitly
+ assumed. Note that 'e' next 'u' corresponds to her u*e because
+ Snowball is scanning backwards. */
+
+ define A as ( hop 2 )
+ define B as ( hop 3 )
+ define C as ( hop 4 )
+ define D as ( hop 5 )
+ define E as ( test hop 2 not 'e' )
+ define F as ( test hop 3 not 'e' )
+ define G as ( test hop 3 'f' )
+ define H as ( test hop 2 't' or 'll' )
+ define I as ( test hop 2 not 'o' not 'e' )
+ define J as ( test hop 2 not 'a' not 'e' )
+ define K as ( test hop 3 'l' or 'i' or ('e' next 'u') )
+ define L as ( test hop 2 not 'u' not 'x' not ('s' not 'o') )
+ define M as ( test hop 2 not 'a' not 'c' not 'e' not 'm' )
+ define N as ( test hop 3 ( hop 2 not 's' or hop 2 ) )
+ define O as ( test hop 2 'l' or 'i' )
+ define P as ( test hop 2 not 'c' )
+ define Q as ( test hop 2 test hop 3 not 'l' not 'n' )
+ define R as ( test hop 2 'n' or 'r' )
+ define S as ( test hop 2 'dr' or ('t' not 't') )
+ define T as ( test hop 2 's' or ('t' not 'o') )
+ define U as ( test hop 2 'l' or 'm' or 'n' or 'r' )
+ define V as ( test hop 2 'c' )
+ define W as ( test hop 2 not 's' not 'u' )
+ define X as ( test hop 2 'l' or 'i' or ('e' next 'u') )
+ define Y as ( test hop 2 'in' )
+ define Z as ( test hop 2 not 'f' )
+ define AA as ( test hop 2 among ( 'd' 'f' 'ph' 'th' 'l' 'er' 'or'
+ 'es' 't' ) )
+ define BB as ( test hop 3 not 'met' not 'ryst' )
+ define CC as ( test hop 2 'l' )
+
+
+ /* The system of endings, as given in Appendix A. */
+
+ define endings as (
+ [substring] among(
+ 'alistically' B 'arizability' A 'izationally' B
+
+ 'antialness' A 'arisations' A 'arizations' A 'entialness' A
+
+ 'allically' C 'antaneous' A 'antiality' A 'arisation' A
+ 'arization' A 'ationally' B 'ativeness' A 'eableness' E
+ 'entations' A 'entiality' A 'entialize' A 'entiation' A
+ 'ionalness' A 'istically' A 'itousness' A 'izability' A
+ 'izational' A
+
+ 'ableness' A 'arizable' A 'entation' A 'entially' A
+ 'eousness' A 'ibleness' A 'icalness' A 'ionalism' A
+ 'ionality' A 'ionalize' A 'iousness' A 'izations' A
+ 'lessness' A
+
+ 'ability' A 'aically' A 'alistic' B 'alities' A
+ 'ariness' E 'aristic' A 'arizing' A 'ateness' A
+ 'atingly' A 'ational' B 'atively' A 'ativism' A
+ 'elihood' E 'encible' A 'entally' A 'entials' A
+ 'entiate' A 'entness' A 'fulness' A 'ibility' A
+ 'icalism' A 'icalist' A 'icality' A 'icalize' A
+ 'ication' G 'icianry' A 'ination' A 'ingness' A
+ 'ionally' A 'isation' A 'ishness' A 'istical' A
+ 'iteness' A 'iveness' A 'ivistic' A 'ivities' A
+ 'ization' F 'izement' A 'oidally' A 'ousness' A
+
+ 'aceous' A 'acious' B 'action' G 'alness' A
+ 'ancial' A 'ancies' A 'ancing' B 'ariser' A
+ 'arized' A 'arizer' A 'atable' A 'ations' B
+ 'atives' A 'eature' Z 'efully' A 'encies' A
+ 'encing' A 'ential' A 'enting' C 'entist' A
+ 'eously' A 'ialist' A 'iality' A 'ialize' A
+ 'ically' A 'icance' A 'icians' A 'icists' A
+ 'ifully' A 'ionals' A 'ionate' D 'ioning' A
+ 'ionist' A 'iously' A 'istics' A 'izable' E
+ 'lessly' A 'nesses' A 'oidism' A
+
+ 'acies' A 'acity' A 'aging' B 'aical' A
+ 'alist' A 'alism' B 'ality' A 'alize' A
+ 'allic'BB 'anced' B 'ances' B 'antic' C
+ 'arial' A 'aries' A 'arily' A 'arity' B
+ 'arize' A 'aroid' A 'ately' A 'ating' I
+ 'ation' B 'ative' A 'ators' A 'atory' A
+ 'ature' E 'early' Y 'ehood' A 'eless' A
+ 'elity' A 'ement' A 'enced' A 'ences' A
+ 'eness' E 'ening' E 'ental' A 'ented' C
+ 'ently' A 'fully' A 'ially' A 'icant' A
+ 'ician' A 'icide' A 'icism' A 'icist' A
+ 'icity' A 'idine' I 'iedly' A 'ihood' A
+ 'inate' A 'iness' A 'ingly' B 'inism' J
+ 'inity'CC 'ional' A 'ioned' A 'ished' A
+ 'istic' A 'ities' A 'itous' A 'ively' A
+ 'ivity' A 'izers' F 'izing' F 'oidal' A
+ 'oides' A 'otide' A 'ously' A
+
+ 'able' A 'ably' A 'ages' B 'ally' B
+ 'ance' B 'ancy' B 'ants' B 'aric' A
+ 'arly' K 'ated' I 'ates' A 'atic' B
+ 'ator' A 'ealy' Y 'edly' E 'eful' A
+ 'eity' A 'ence' A 'ency' A 'ened' E
+ 'enly' E 'eous' A 'hood' A 'ials' A
+ 'ians' A 'ible' A 'ibly' A 'ical' A
+ 'ides' L 'iers' A 'iful' A 'ines' M
+ 'ings' N 'ions' B 'ious' A 'isms' B
+ 'ists' A 'itic' H 'ized' F 'izer' F
+ 'less' A 'lily' A 'ness' A 'ogen' A
+ 'ward' A 'wise' A 'ying' B 'yish' A
+
+ 'acy' A 'age' B 'aic' A 'als'BB
+ 'ant' B 'ars' O 'ary' F 'ata' A
+ 'ate' A 'eal' Y 'ear' Y 'ely' E
+ 'ene' E 'ent' C 'ery' E 'ese' A
+ 'ful' A 'ial' A 'ian' A 'ics' A
+ 'ide' L 'ied' A 'ier' A 'ies' P
+ 'ily' A 'ine' M 'ing' N 'ion' Q
+ 'ish' C 'ism' B 'ist' A 'ite'AA
+ 'ity' A 'ium' A 'ive' A 'ize' F
+ 'oid' A 'one' R 'ous' A
+
+ 'ae' A 'al'BB 'ar' X 'as' B
+ 'ed' E 'en' F 'es' E 'ia' A
+ 'ic' A 'is' A 'ly' B 'on' S
+ 'or' T 'um' U 'us' V 'yl' R
+ '{'}s' A 's{'}' A
+
+ 'a' A 'e' A 'i' A 'o' A
+ 's' W 'y' B
+
+ (delete)
+ )
+ )
+
+ /* Undoubling is rule 1 of appendix C. */
+
+ define undouble as (
+ test substring among ('bb' 'dd' 'gg' 'll' 'mm' 'nn' 'pp' 'rr' 'ss'
+ 'tt')
+ [next] delete
+ )
+
+ /* The other appendix C rules can be done together. */
+
+ define respell as (
+ [substring] among (
+ 'iev' (<-'ief')
+ 'uct' (<-'uc')
+ 'umpt' (<-'um')
+ 'rpt' (<-'rb')
+ 'urs' (<-'ur')
+ 'istr' (<-'ister')
+ 'metr' (<-'meter')
+ 'olv' (<-'olut')
+ 'ul' (not 'a' not 'i' not 'o' <-'l')
+ 'bex' (<-'bic')
+ 'dex' (<-'dic')
+ 'pex' (<-'pic')
+ 'tex' (<-'tic')
+ 'ax' (<-'ac')
+ 'ex' (<-'ec')
+ 'ix' (<-'ic')
+ 'lux' (<-'luc')
+ 'uad' (<-'uas')
+ 'vad' (<-'vas')
+ 'cid' (<-'cis')
+ 'lid' (<-'lis')
+ 'erid' (<-'eris')
+ 'pand' (<-'pans')
+ 'end' (not 's' <-'ens')
+ 'ond' (<-'ons')
+ 'lud' (<-'lus')
+ 'rud' (<-'rus')
+ 'her' (not 'p' not 't' <-'hes')
+ 'mit' (<-'mis')
+ 'ent' (not 'm' <-'ens')
+ /* 'ent' was 'end' in the 1968 paper - a typo. */
+ 'ert' (<-'ers')
+ 'et' (not 'n' <-'es')
+ 'yt' (<-'ys')
+ 'yz' (<-'ys')
+ )
+ )
+)
+
+define stem as (
+
+ backwards (
+ do endings
+ do undouble
+ do respell
+ )
+)
+