diff options
Diffstat (limited to 'ext/fts5/tool/loadfts5.tcl')
-rw-r--r-- | ext/fts5/tool/loadfts5.tcl | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl new file mode 100644 index 0000000..96fd692 --- /dev/null +++ b/ext/fts5/tool/loadfts5.tcl @@ -0,0 +1,172 @@ + + +proc loadfile {f} { + set fd [open $f] + set data [read $fd] + close $fd + return $data +} + +set ::nRow 0 +set ::nRowPerDot 1000 + +proc load_hierachy {dir} { + foreach f [glob -nocomplain -dir $dir *] { + if {$::O(limit) && $::nRow>=$::O(limit)} break + if {[file isdir $f]} { + load_hierachy $f + } else { + db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } + incr ::nRow + + if {$::O(trans) && ($::nRow % $::O(trans))==0} { + db eval { COMMIT } + db eval { INSERT INTO t1(t1) VALUES('integrity-check') } + db eval { BEGIN } + } + + if {($::nRow % $::nRowPerDot)==0} { + puts -nonewline . + if {($::nRow % (65*$::nRowPerDot))==0} { puts "" } + flush stdout + } + + } + } +} + +proc usage {} { + puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH" + puts stderr "" + puts stderr "Switches are:" + puts stderr " -fts4 (use fts4 instead of fts5)" + puts stderr " -fts5 (use fts5)" + puts stderr " -porter (use porter tokenizer)" + puts stderr " -delete (delete the database file before starting)" + puts stderr " -limit N (load no more than N documents)" + puts stderr " -automerge N (set the automerge parameter to N)" + puts stderr " -crisismerge N (set the crisismerge parameter to N)" + puts stderr " -prefix PREFIX (comma separated prefix= argument)" + puts stderr " -trans N (commit after N inserts - 0 == never)" + puts stderr " -hashsize N (set the fts5 hashsize parameter to N)" + puts stderr " -detail MODE (detail mode for fts5 tables)" + exit 1 +} + +set O(vtab) fts5 +set O(tok) "" +set O(limit) 0 +set O(delete) 0 +set O(automerge) -1 +set O(crisismerge) -1 +set O(prefix) "" +set O(trans) 0 +set O(hashsize) -1 +set O(detail) full + +if {[llength $argv]<2} usage +set nOpt [expr {[llength $argv]-2}] +for {set i 0} {$i < $nOpt} {incr i} { + set arg [lindex $argv $i] + switch -- [lindex $argv $i] { + -fts4 { + set O(vtab) fts4 + } + + -fts5 { + set O(vtab) fts5 + } + + -porter { + set O(tok) ", tokenize=porter" + } + + -delete { + set O(delete) 1 + } + + -limit { + if { [incr i]>=$nOpt } usage + set O(limit) [lindex $argv $i] + } + + -trans { + if { [incr i]>=$nOpt } usage + set O(trans) [lindex $argv $i] + } + + -automerge { + if { [incr i]>=$nOpt } usage + set O(automerge) [lindex $argv $i] + } + + -crisismerge { + if { [incr i]>=$nOpt } usage + set O(crisismerge) [lindex $argv $i] + } + + -prefix { + if { [incr i]>=$nOpt } usage + set O(prefix) [lindex $argv $i] + } + + -hashsize { + if { [incr i]>=$nOpt } usage + set O(hashsize) [lindex $argv $i] + } + + -detail { + if { [incr i]>=$nOpt } usage + set O(detail) [lindex $argv $i] + } + + default { + usage + } + } +} + +set dbfile [lindex $argv end-1] +if {$O(delete)} { file delete -force $dbfile } +sqlite3 db $dbfile +catch { load_static_extension db fts5 } +db func loadfile loadfile +db eval "PRAGMA page_size=4096" + +db eval BEGIN + set pref "" + if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } + if {$O(vtab)=="fts5"} { + append pref ", detail=$O(detail)" + } + catch { + db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" + db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" + } + + if {$O(hashsize)>=0} { + catch { + db eval "INSERT INTO t1(t1, rank) VALUES('hashsize', $O(hashsize));" + } + } + + + if {$O(automerge)>=0} { + if {$O(vtab) == "fts5"} { + db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } + } else { + db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) } + } + } + if {$O(crisismerge)>=0} { + if {$O(vtab) == "fts5"} { + db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))} + } else { + } + } + load_hierachy [lindex $argv end] +db eval COMMIT +puts "" + + + |