diff options
Diffstat (limited to 'ext/fts5/mkportersteps.tcl')
-rw-r--r-- | ext/fts5/mkportersteps.tcl | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/ext/fts5/mkportersteps.tcl b/ext/fts5/mkportersteps.tcl new file mode 100644 index 0000000..b6214c6 --- /dev/null +++ b/ext/fts5/mkportersteps.tcl @@ -0,0 +1,222 @@ +# +# 2014 Jun 09 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#------------------------------------------------------------------------- +# +# This script generates the implementations of the following C functions, +# which are part of the porter tokenizer implementation: +# +# static int fts5PorterStep1B(char *aBuf, int *pnBuf); +# static int fts5PorterStep1B2(char *aBuf, int *pnBuf); +# static int fts5PorterStep2(char *aBuf, int *pnBuf); +# static int fts5PorterStep3(char *aBuf, int *pnBuf); +# static int fts5PorterStep4(char *aBuf, int *pnBuf); +# + +set O(Step1B2) { + { at {} ate 1 } + { bl {} ble 1 } + { iz {} ize 1 } +} + +set O(Step1B) { + { "eed" fts5Porter_MGt0 "ee" 0 } + { "ed" fts5Porter_Vowel "" 1 } + { "ing" fts5Porter_Vowel "" 1 } +} + +set O(Step2) { + { "ational" fts5Porter_MGt0 "ate" } + { "tional" fts5Porter_MGt0 "tion" } + { "enci" fts5Porter_MGt0 "ence" } + { "anci" fts5Porter_MGt0 "ance" } + { "izer" fts5Porter_MGt0 "ize" } + { "logi" fts5Porter_MGt0 "log" } + { "bli" fts5Porter_MGt0 "ble" } + { "alli" fts5Porter_MGt0 "al" } + { "entli" fts5Porter_MGt0 "ent" } + { "eli" fts5Porter_MGt0 "e" } + { "ousli" fts5Porter_MGt0 "ous" } + { "ization" fts5Porter_MGt0 "ize" } + { "ation" fts5Porter_MGt0 "ate" } + { "ator" fts5Porter_MGt0 "ate" } + { "alism" fts5Porter_MGt0 "al" } + { "iveness" fts5Porter_MGt0 "ive" } + { "fulness" fts5Porter_MGt0 "ful" } + { "ousness" fts5Porter_MGt0 "ous" } + { "aliti" fts5Porter_MGt0 "al" } + { "iviti" fts5Porter_MGt0 "ive" } + { "biliti" fts5Porter_MGt0 "ble" } +} + +set O(Step3) { + { "icate" fts5Porter_MGt0 "ic" } + { "ative" fts5Porter_MGt0 "" } + { "alize" fts5Porter_MGt0 "al" } + { "iciti" fts5Porter_MGt0 "ic" } + { "ical" fts5Porter_MGt0 "ic" } + { "ful" fts5Porter_MGt0 "" } + { "ness" fts5Porter_MGt0 "" } +} + +set O(Step4) { + { "al" fts5Porter_MGt1 "" } + { "ance" fts5Porter_MGt1 "" } + { "ence" fts5Porter_MGt1 "" } + { "er" fts5Porter_MGt1 "" } + { "ic" fts5Porter_MGt1 "" } + { "able" fts5Porter_MGt1 "" } + { "ible" fts5Porter_MGt1 "" } + { "ant" fts5Porter_MGt1 "" } + { "ement" fts5Porter_MGt1 "" } + { "ment" fts5Porter_MGt1 "" } + { "ent" fts5Porter_MGt1 "" } + { "ion" fts5Porter_MGt1_and_S_or_T "" } + { "ou" fts5Porter_MGt1 "" } + { "ism" fts5Porter_MGt1 "" } + { "ate" fts5Porter_MGt1 "" } + { "iti" fts5Porter_MGt1 "" } + { "ous" fts5Porter_MGt1 "" } + { "ive" fts5Porter_MGt1 "" } + { "ize" fts5Porter_MGt1 "" } +} + +proc sort_cb {lhs rhs} { + set L [string range [lindex $lhs 0] end-1 end-1] + set R [string range [lindex $rhs 0] end-1 end-1] + string compare $L $R +} + +proc create_step_function {name data} { + + set T(function) { +static int fts5Porter${name}(char *aBuf, int *pnBuf){ + int ret = 0; + int nBuf = *pnBuf; + switch( aBuf[nBuf-2] ){ + ${switchbody} + } + return ret; +} + } + + set T(case) { + case '${k}': + ${ifstmts} + break; + } + + set T(if_0_0_0) { + if( ${match} ){ + *pnBuf = nBuf - $n; + } + } + set T(if_1_0_0) { + if( ${match} ){ + if( ${cond} ){ + *pnBuf = nBuf - $n; + } + } + } + set T(if_0_1_0) { + if( ${match} ){ + ${memcpy} + *pnBuf = nBuf - $n + $nRep; + } + } + set T(if_1_1_0) { + if( ${match} ){ + if( ${cond} ){ + ${memcpy} + *pnBuf = nBuf - $n + $nRep; + } + } + } + set T(if_1_0_1) { + if( ${match} ){ + if( ${cond} ){ + *pnBuf = nBuf - $n; + ret = 1; + } + } + } + set T(if_0_1_1) { + if( ${match} ){ + ${memcpy} + *pnBuf = nBuf - $n + $nRep; + ret = 1; + } + } + set T(if_1_1_1) { + if( ${match} ){ + if( ${cond} ){ + ${memcpy} + *pnBuf = nBuf - $n + $nRep; + ret = 1; + } + } + } + + set switchbody "" + + foreach I $data { + set k [string range [lindex $I 0] end-1 end-1] + lappend aCase($k) $I + } + foreach k [lsort [array names aCase]] { + set ifstmts "" + foreach I $aCase($k) { + set zSuffix [lindex $I 0] ;# Suffix text for this rule + set zRep [lindex $I 2] ;# Replacement text for rule + set xCond [lindex $I 1] ;# Condition callback (or "") + + set n [string length $zSuffix] + set nRep [string length $zRep] + + set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)" + set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);" + set cond "${xCond}(aBuf, nBuf-$n)" + + set bMemcpy [expr {$nRep>0}] + set bCond [expr {$xCond!=""}] + set bRet [expr {[llength $I]>3 && [lindex $I 3]}] + + set t $T(if_${bCond}_${bMemcpy}_${bRet}) + lappend ifstmts [string trim [subst -nocommands $t]] + } + + set ifstmts [join $ifstmts "else "] + + append switchbody [subst -nocommands $T(case)] + } + + + puts [subst -nocommands $T(function)] +} + + +puts [string trim { +/************************************************************************** +*************************************************************************** +** GENERATED CODE STARTS HERE (mkportersteps.tcl) +*/ +}] +foreach step [array names O] { + create_step_function $step $O($step) +} +puts [string trim { +/* +** GENERATED CODE ENDS HERE (mkportersteps.tcl) +*************************************************************************** +**************************************************************************/ +}] + + + |