summaryrefslogtreecommitdiffstats
path: root/contrib/snowball/algorithms/irish.sbl
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /contrib/snowball/algorithms/irish.sbl
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'contrib/snowball/algorithms/irish.sbl')
-rw-r--r--contrib/snowball/algorithms/irish.sbl151
1 files changed, 151 insertions, 0 deletions
diff --git a/contrib/snowball/algorithms/irish.sbl b/contrib/snowball/algorithms/irish.sbl
new file mode 100644
index 0000000..0b1288a
--- /dev/null
+++ b/contrib/snowball/algorithms/irish.sbl
@@ -0,0 +1,151 @@
+routines (
+ R1 R2 RV
+ initial_morph
+ mark_regions
+ noun_sfx
+ deriv
+ verb_sfx
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* Accented characters */
+
+stringdef a' '{U+00E1}' // a-acute
+stringdef e' '{U+00E9}' // e-acute
+stringdef i' '{U+00ED}' // i-acute
+stringdef o' '{U+00F3}' // o-acute
+stringdef u' '{U+00FA}' // u-acute
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}'
+
+define mark_regions as (
+
+ $pV = limit
+ $p1 = limit
+ $p2 = limit // defaults
+
+ do (
+ gopast v setmark pV
+ )
+ do (
+ gopast v gopast non-v setmark p1
+ gopast v gopast non-v setmark p2
+ )
+)
+
+define initial_morph as (
+ [substring] among (
+ 'h-' 'n-' 't-' //nAthair -> n-athair, but alone are problematic
+ (delete)
+
+ // verbs
+ 'd{'}'
+ (delete)
+ 'd{'}fh'
+ (<- 'f')
+ // other contractions
+ 'm{'}' 'b{'}'
+ (delete)
+
+ 'sh'
+ (<- 's')
+
+ 'mb'
+ (<- 'b')
+ 'gc'
+ (<- 'c')
+ 'nd'
+ (<- 'd')
+ 'bhf'
+ (<- 'f')
+ 'ng'
+ (<- 'g')
+ 'bp'
+ (<- 'p')
+ 'ts'
+ (<- 's')
+ 'dt'
+ (<- 't')
+
+ // Lenition
+ 'bh'
+ (<- 'b')
+ 'ch'
+ (<- 'c')
+ 'dh'
+ (<- 'd')
+ 'fh'
+ (<- 'f')
+ 'gh'
+ (<- 'g')
+ 'mh'
+ (<- 'm')
+ 'ph'
+ (<- 'p')
+ 'th'
+ (<- 't')
+ )
+)
+
+backwardmode (
+
+ define RV as $pV <= cursor
+ define R1 as $p1 <= cursor
+ define R2 as $p2 <= cursor
+
+ define noun_sfx as (
+ [substring] among (
+ 'amh' 'eamh' 'abh' 'eabh'
+ 'aibh' 'ibh' 'aimh' 'imh'
+ 'a{i'}ocht' '{i'}ocht' 'a{i'}ochta' '{i'}ochta'
+ (R1 delete)
+ 'ire' 'ir{i'}' 'aire' 'air{i'}'
+ (R2 delete)
+ )
+ )
+ define deriv as (
+ [substring] among (
+ 'acht' 'eacht' 'ach' 'each' 'eacht{u'}il' 'eachta' 'acht{u'}il' 'achta'
+ (R2 delete) //siopadóireacht -> siopadóir but not poblacht -> pobl
+ 'arcacht' 'arcachta{i'}' 'arcachta'
+ (<- 'arc') // monarcacht -> monarc
+ 'gineach' 'gineas' 'ginis'
+ (<- 'gin')
+ 'grafa{i'}och' 'grafa{i'}ocht' 'grafa{i'}ochta' 'grafa{i'}ochta{i'}'
+ (<- 'graf')
+ 'paite' 'patach' 'pataigh' 'patacha'
+ (<- 'paite')
+ '{o'}ideach' '{o'}ideacha' '{o'}idigh'
+ (<- '{o'}id')
+ )
+ )
+ define verb_sfx as (
+ [substring] among (
+ 'imid' 'aimid' '{i'}mid' 'a{i'}mid'
+ 'faidh' 'fidh'
+ (RV delete)
+ 'ain'
+ 'eadh' 'adh'
+ '{a'}il'
+ 'tear' 'tar'
+ (R1 delete)
+ )
+ )
+)
+
+define stem as (
+ do initial_morph
+ do mark_regions
+ backwards (
+ do noun_sfx
+ do deriv
+ do verb_sfx
+ )
+)