summaryrefslogtreecommitdiffstats
path: root/ext/fts5/mkportersteps.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts5/mkportersteps.tcl')
-rw-r--r--ext/fts5/mkportersteps.tcl222
1 files changed, 222 insertions, 0 deletions
diff --git a/ext/fts5/mkportersteps.tcl b/ext/fts5/mkportersteps.tcl
new file mode 100644
index 0000000..b6214c6
--- /dev/null
+++ b/ext/fts5/mkportersteps.tcl
@@ -0,0 +1,222 @@
+#
+# 2014 Jun 09
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#-------------------------------------------------------------------------
+#
+# This script generates the implementations of the following C functions,
+# which are part of the porter tokenizer implementation:
+#
+# static int fts5PorterStep1B(char *aBuf, int *pnBuf);
+# static int fts5PorterStep1B2(char *aBuf, int *pnBuf);
+# static int fts5PorterStep2(char *aBuf, int *pnBuf);
+# static int fts5PorterStep3(char *aBuf, int *pnBuf);
+# static int fts5PorterStep4(char *aBuf, int *pnBuf);
+#
+
+set O(Step1B2) {
+ { at {} ate 1 }
+ { bl {} ble 1 }
+ { iz {} ize 1 }
+}
+
+set O(Step1B) {
+ { "eed" fts5Porter_MGt0 "ee" 0 }
+ { "ed" fts5Porter_Vowel "" 1 }
+ { "ing" fts5Porter_Vowel "" 1 }
+}
+
+set O(Step2) {
+ { "ational" fts5Porter_MGt0 "ate" }
+ { "tional" fts5Porter_MGt0 "tion" }
+ { "enci" fts5Porter_MGt0 "ence" }
+ { "anci" fts5Porter_MGt0 "ance" }
+ { "izer" fts5Porter_MGt0 "ize" }
+ { "logi" fts5Porter_MGt0 "log" }
+ { "bli" fts5Porter_MGt0 "ble" }
+ { "alli" fts5Porter_MGt0 "al" }
+ { "entli" fts5Porter_MGt0 "ent" }
+ { "eli" fts5Porter_MGt0 "e" }
+ { "ousli" fts5Porter_MGt0 "ous" }
+ { "ization" fts5Porter_MGt0 "ize" }
+ { "ation" fts5Porter_MGt0 "ate" }
+ { "ator" fts5Porter_MGt0 "ate" }
+ { "alism" fts5Porter_MGt0 "al" }
+ { "iveness" fts5Porter_MGt0 "ive" }
+ { "fulness" fts5Porter_MGt0 "ful" }
+ { "ousness" fts5Porter_MGt0 "ous" }
+ { "aliti" fts5Porter_MGt0 "al" }
+ { "iviti" fts5Porter_MGt0 "ive" }
+ { "biliti" fts5Porter_MGt0 "ble" }
+}
+
+set O(Step3) {
+ { "icate" fts5Porter_MGt0 "ic" }
+ { "ative" fts5Porter_MGt0 "" }
+ { "alize" fts5Porter_MGt0 "al" }
+ { "iciti" fts5Porter_MGt0 "ic" }
+ { "ical" fts5Porter_MGt0 "ic" }
+ { "ful" fts5Porter_MGt0 "" }
+ { "ness" fts5Porter_MGt0 "" }
+}
+
+set O(Step4) {
+ { "al" fts5Porter_MGt1 "" }
+ { "ance" fts5Porter_MGt1 "" }
+ { "ence" fts5Porter_MGt1 "" }
+ { "er" fts5Porter_MGt1 "" }
+ { "ic" fts5Porter_MGt1 "" }
+ { "able" fts5Porter_MGt1 "" }
+ { "ible" fts5Porter_MGt1 "" }
+ { "ant" fts5Porter_MGt1 "" }
+ { "ement" fts5Porter_MGt1 "" }
+ { "ment" fts5Porter_MGt1 "" }
+ { "ent" fts5Porter_MGt1 "" }
+ { "ion" fts5Porter_MGt1_and_S_or_T "" }
+ { "ou" fts5Porter_MGt1 "" }
+ { "ism" fts5Porter_MGt1 "" }
+ { "ate" fts5Porter_MGt1 "" }
+ { "iti" fts5Porter_MGt1 "" }
+ { "ous" fts5Porter_MGt1 "" }
+ { "ive" fts5Porter_MGt1 "" }
+ { "ize" fts5Porter_MGt1 "" }
+}
+
+proc sort_cb {lhs rhs} {
+ set L [string range [lindex $lhs 0] end-1 end-1]
+ set R [string range [lindex $rhs 0] end-1 end-1]
+ string compare $L $R
+}
+
+proc create_step_function {name data} {
+
+ set T(function) {
+static int fts5Porter${name}(char *aBuf, int *pnBuf){
+ int ret = 0;
+ int nBuf = *pnBuf;
+ switch( aBuf[nBuf-2] ){
+ ${switchbody}
+ }
+ return ret;
+}
+ }
+
+ set T(case) {
+ case '${k}':
+ ${ifstmts}
+ break;
+ }
+
+ set T(if_0_0_0) {
+ if( ${match} ){
+ *pnBuf = nBuf - $n;
+ }
+ }
+ set T(if_1_0_0) {
+ if( ${match} ){
+ if( ${cond} ){
+ *pnBuf = nBuf - $n;
+ }
+ }
+ }
+ set T(if_0_1_0) {
+ if( ${match} ){
+ ${memcpy}
+ *pnBuf = nBuf - $n + $nRep;
+ }
+ }
+ set T(if_1_1_0) {
+ if( ${match} ){
+ if( ${cond} ){
+ ${memcpy}
+ *pnBuf = nBuf - $n + $nRep;
+ }
+ }
+ }
+ set T(if_1_0_1) {
+ if( ${match} ){
+ if( ${cond} ){
+ *pnBuf = nBuf - $n;
+ ret = 1;
+ }
+ }
+ }
+ set T(if_0_1_1) {
+ if( ${match} ){
+ ${memcpy}
+ *pnBuf = nBuf - $n + $nRep;
+ ret = 1;
+ }
+ }
+ set T(if_1_1_1) {
+ if( ${match} ){
+ if( ${cond} ){
+ ${memcpy}
+ *pnBuf = nBuf - $n + $nRep;
+ ret = 1;
+ }
+ }
+ }
+
+ set switchbody ""
+
+ foreach I $data {
+ set k [string range [lindex $I 0] end-1 end-1]
+ lappend aCase($k) $I
+ }
+ foreach k [lsort [array names aCase]] {
+ set ifstmts ""
+ foreach I $aCase($k) {
+ set zSuffix [lindex $I 0] ;# Suffix text for this rule
+ set zRep [lindex $I 2] ;# Replacement text for rule
+ set xCond [lindex $I 1] ;# Condition callback (or "")
+
+ set n [string length $zSuffix]
+ set nRep [string length $zRep]
+
+ set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)"
+ set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);"
+ set cond "${xCond}(aBuf, nBuf-$n)"
+
+ set bMemcpy [expr {$nRep>0}]
+ set bCond [expr {$xCond!=""}]
+ set bRet [expr {[llength $I]>3 && [lindex $I 3]}]
+
+ set t $T(if_${bCond}_${bMemcpy}_${bRet})
+ lappend ifstmts [string trim [subst -nocommands $t]]
+ }
+
+ set ifstmts [join $ifstmts "else "]
+
+ append switchbody [subst -nocommands $T(case)]
+ }
+
+
+ puts [subst -nocommands $T(function)]
+}
+
+
+puts [string trim {
+/**************************************************************************
+***************************************************************************
+** GENERATED CODE STARTS HERE (mkportersteps.tcl)
+*/
+}]
+foreach step [array names O] {
+ create_step_function $step $O($step)
+}
+puts [string trim {
+/*
+** GENERATED CODE ENDS HERE (mkportersteps.tcl)
+***************************************************************************
+**************************************************************************/
+}]
+
+
+