summaryrefslogtreecommitdiffstats
path: root/storage/myisam/ftbench
diff options
context:
space:
mode:
Diffstat (limited to 'storage/myisam/ftbench')
-rwxr-xr-xstorage/myisam/ftbench/Ecompare.pl112
-rwxr-xr-xstorage/myisam/ftbench/Ecreate.pl60
-rwxr-xr-xstorage/myisam/ftbench/Ereport.pl65
-rw-r--r--storage/myisam/ftbench/README43
-rwxr-xr-xstorage/myisam/ftbench/ft-test-run.sh116
5 files changed, 396 insertions, 0 deletions
diff --git a/storage/myisam/ftbench/Ecompare.pl b/storage/myisam/ftbench/Ecompare.pl
new file mode 100755
index 00000000..a97f126e
--- /dev/null
+++ b/storage/myisam/ftbench/Ecompare.pl
@@ -0,0 +1,112 @@
+#!/usr/bin/env perl
+
+# Copyright (c) 2003, 2005 MySQL AB
+# Use is subject to license terms
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+# compares out-files (as created by Ereport.pl) from dir1/*.out and dir2/*.out
+# for each effectiveness column computes the probability of the hypothesis
+# "Both files have the same effectiveness"
+
+# sign test is used to verify that test results are statistically
+# significant to support the hypothesis. Function is computed on the fly.
+
+# basic formula is \sum_{r=0}^R C_N^r 2^{-N}
+# As N can be big, we'll work with logarithms
+$log2=log(2);
+sub probab {
+ my $N=shift, $R=shift;
+
+ my $r, $sum=0;
+
+ for $r (0..$R) {
+ $sum+=exp(logfac($N)-logfac($r)-logfac($N-$r)-$N*$log2);
+ }
+ return $sum;
+}
+
+# log(N!)
+# for N<20 exact value from the table (below) is taken
+# otherwise, Stirling approximation for N! is used
+sub logfac {
+ my $n=shift; die "n=$n<0" if $n<0;
+ return $logfactab[$n] if $n<=$#logfactab;
+ return $n*log($n)-$n+log(2*3.14159265358*$n)/2;
+}
+@logfactab=(
+0, 0, 0.693147180559945, 1.79175946922805, 3.17805383034795,
+4.78749174278205, 6.57925121201010, 8.52516136106541, 10.6046029027453,
+12.8018274800815, 15.1044125730755, 17.5023078458739, 19.9872144956619,
+22.5521638531234, 25.1912211827387, 27.8992713838409, 30.6718601060807,
+33.5050734501369, 36.3954452080331, 39.3398841871995, 42.3356164607535,
+);
+
+############################# main () ###############################
+#$p=shift; $m=shift; $p-=$m;
+#if($p>$m) {
+# print "1 > 2 [+$p-$m]: ", probab($p+$m, $m), "\n";
+#} elsif($p<$m) {
+# print "1 < 2 [+$p-$m]: ", probab($p+$m, $p), "\n";
+#} else {
+# print "1 = 2 [+$p-$m]: ", probab($p+$m, $m), "\n";
+#}
+#exit;
+
+die "Use: $0 dir1 dir2\n" unless @ARGV==2 &&
+ -d ($dir1=shift) && -d ($dir2=shift);
+$_=`cd $dir1; echo *.out`;
+s/\.out\b//g;
+$total="";
+
+for $file (split) {
+ open(OUT1,$out1="$dir1/$file.out") || die "Cannot open $out1: $!";
+ open(OUT2,$out2="$dir2/$file.out") || die "Cannot open $out2: $!";
+
+ @p=@m=();
+ while(!eof(OUT1) || !eof(OUT2)) {
+ $_=<OUT1>; @l1=split; shift @l1;
+ $_=<OUT2>; @l2=split; shift @l2;
+
+ die "Number of columns differ in line $.\n" unless $#l1 == $#l2;
+
+ for (0..$#l1) {
+ $p[$_]+= $l1[$_] > $l2[$_];
+ $m[$_]+= $l1[$_] < $l2[$_];
+ }
+ }
+
+ for (0..$#l1) {
+ $pp[$_]+=$p[$_]; $mm[$_]+=$m[$_];
+ $total[$_].=rep($file, ($#l1 ? $_ : undef), $p[$_], $m[$_]);
+ }
+ close OUT1;
+ close OUT2;
+}
+
+for (0..$#l1) {
+ rep($total[$_], ($#l1 ? $_ : undef), $pp[$_], $mm[$_]);
+}
+
+sub rep {
+ my ($test, $n, $p, $m, $c, $r)=@_;
+
+ if ($p>$m) { $c=">"; $r="+"; }
+ elsif($p<$m) { $c="<"; $r="-"; }
+ else { $c="="; $r="="; }
+ $n=" $n: " if defined $n;
+ printf "%-8s $n $dir1 $c $dir2 [+%03d-%03d]: %16.15f\n",
+ $test, $p, $m, probab($p+$m, ($p>=$m ? $m : $p));
+ $r;
+}
diff --git a/storage/myisam/ftbench/Ecreate.pl b/storage/myisam/ftbench/Ecreate.pl
new file mode 100755
index 00000000..78962466
--- /dev/null
+++ b/storage/myisam/ftbench/Ecreate.pl
@@ -0,0 +1,60 @@
+#!/usr/bin/env perl
+
+# Copyright (c) 2003, 2005 MySQL AB
+# Use is subject to license terms
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+$test=shift || die "Usage $0 testname [option]";
+$option=shift;
+
+open(D, "<data/$test.d") || die "Cannot open(<data/$test.d): $!";
+open(Q, "<data/$test.q") || die "Cannot open(<data/$test.q): $!";
+
+$N=0;
+
+print <<__HEADER__;
+DROP TABLE IF EXISTS $test;
+CREATE TABLE $test (
+ id int(10) unsigned NOT NULL,
+ text text NOT NULL,
+ FULLTEXT KEY text (text)
+) TYPE=MyISAM CHARSET=latin1;
+
+ALTER TABLE $test DISABLE KEYS;
+__HEADER__
+
+while (<D>) { chomp;
+ s/'/\\'/g; ++$N;
+ print "INSERT $test VALUES ($N, '$_');\n";
+}
+
+print <<__PREP__;
+ALTER TABLE $test ENABLE KEYS;
+SELECT $N;
+__PREP__
+
+$N=0;
+
+while (<Q>) { chomp;
+ s/'/\\'/g; ++$N;
+ $_="MATCH text AGAINST ('$_' $option)";
+ print "SELECT $N, id, $_ FROM $test WHERE $_;\n";
+}
+
+print <<__FOOTER__;
+DROP TABLE $test;
+__FOOTER__
+
+
diff --git a/storage/myisam/ftbench/Ereport.pl b/storage/myisam/ftbench/Ereport.pl
new file mode 100755
index 00000000..a8c7c57e
--- /dev/null
+++ b/storage/myisam/ftbench/Ereport.pl
@@ -0,0 +1,65 @@
+#!/usr/bin/env perl
+
+# Copyright (c) 2003, 2005 MySQL AB
+# Use is subject to license terms
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+die "Use: $0 eval_output qrels_file\n" unless @ARGV==2;
+
+open(EOUT,$eout=shift) || die "Cannot open $eout: $!";
+open(RELJ,$relj=shift) || die "Cannot open $relj: $!";
+
+$_=<EOUT>;
+die "$eout must start with a number!\n "unless /^[1-9][0-9]*\n/;
+$ndocs=$_+0;
+
+$qid=0;
+$relj_str=<RELJ>;
+$eout_str=<EOUT>;
+
+while(!eof(RELJ) || !eof(EOUT)) {
+ ++$qid;
+ %dq=();
+ $A=$B=$AB=0;
+ $Ravg=$Pavg=0;
+
+ while($relj_str =~ /^0*$qid\s+(\d+)/) {
+ ++$A;
+ $dq{$1+0}=1;
+ last unless $relj_str=<RELJ>;
+ }
+ # Favg measure = 1/(a/Pavg+(1-a)/Ravg)
+sub Favg { my $a=shift; $Pavg*$Ravg ? 1/($a/$Pavg+(1-$a)/$Ravg) : 0; }
+ # F0 : a=0 -- ignore precision
+ # F5 : a=0.5
+ # F1 : a=1 -- ignore recall
+ while($eout_str =~ /^$qid\s+(\d+)\s+(\d+(?:\.\d+)?)/) {
+ $B++;
+ $AB++ if $dq{$1+0};
+ $Ravg+=$AB;
+ $Pavg+=$AB/$B;
+ last unless $eout_str=<EOUT>;
+ }
+ next unless $A;
+
+ $Ravg/=$B*$A if $B;
+ $Pavg/=$B if $B;
+
+ printf "%5d %1.12f %1.12f %1.12f\n", $qid, Favg(0),Favg(0.5),Favg(1);
+}
+
+exit 0;
+
+
diff --git a/storage/myisam/ftbench/README b/storage/myisam/ftbench/README
new file mode 100644
index 00000000..b1f8b66b
--- /dev/null
+++ b/storage/myisam/ftbench/README
@@ -0,0 +1,43 @@
+1. should be run from myisam/ftbench/
+2. myisam/ftdefs.h should NOT be locked (bk get, not bk edit!)
+3. there should be ./data/ subdir with test collections, files:
+ test1.d
+ test1.q
+ test1.r
+ test2.d
+ test2.q
+ test2.r
+ where test1, test2, etc - are arbitrary test names
+
+ *.[dq] files contain documents/queries one item per line.
+
+ *.r files have the structure:
+ 1 16 .....blablabla
+ 1 09 .....blablabla
+ 2 116 .....blablabla
+ ...
+
+ that is /^\d+\s+\d+/
+ and are sorted by the first number (not necessarily by the second)
+
+4. there should be ./t/ subdir with test directories
+
+ ./t
+ ./t/BEST/
+ ./t/testdir1/
+ ./t/testdir2/
+ ...
+
+ there *must* be ./t/BEST/ subdir or a symlink to one of other dirs in ./t
+ all other names (besides BEST) can be arbitrary
+
+ all test results are compared with BEST results.
+
+ test directories may contain ftdefs.h, my.cnf, ft_mode
+ (the last one is used as in ... MATCH ... AGAINST ("..." $ft_mode) ...)
+ NOTE: all *.out files in test directories will NOT be overwritten!
+ delete them to re-test
+
+5. run ./ft-test-run.sh
+6. go make some coffee
+
diff --git a/storage/myisam/ftbench/ft-test-run.sh b/storage/myisam/ftbench/ft-test-run.sh
new file mode 100755
index 00000000..08294071
--- /dev/null
+++ b/storage/myisam/ftbench/ft-test-run.sh
@@ -0,0 +1,116 @@
+#!/bin/sh
+
+# Copyright (c) 2003, 2005, 2006 MySQL AB
+# Use is subject to license terms
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; version 2
+# of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this library; if not, write to the Free
+# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1335 USA
+
+if [ ! -x ./ft-test-run.sh ] ; then
+ echo "Usage: ./ft-test-run.sh"
+ exit 1
+fi
+
+BASE=`pwd`
+DATA=$BASE/var
+ROOT=`cd ../..; pwd`
+MYSQLD=$ROOT/sql/mysqld
+MYSQL=$ROOT/client/mysql
+MYSQLADMIN=$ROOT/client/mysqladmin
+SOCK=$DATA/mysql.sock
+PID=$DATA/mysql.pid
+H=../ftdefs.h
+OPTS="--no-defaults --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets"
+DELAY=10
+
+stop_myslqd()
+{
+ [ -S $SOCK ] && $MYSQLADMIN $OPTS shutdown
+ [ -f $PID ] && kill `cat $PID` && sleep 15 && [ -f $PID ] && kill -9 `cat $PID`
+}
+
+if [ ! -d t/BEST ] ; then
+ echo "No ./t/BEST directory! Aborting..."
+ exit 1
+fi
+rm -f t/BEST/report.txt
+if [ -w $H ] ; then
+ echo "$H is writeable! Aborting..."
+ exit 1
+fi
+
+stop_myslqd
+rm -rf var > /dev/null 2>&1
+mkdir var
+mkdir var/test
+
+for batch in t/* ; do
+ [ ! -d $batch ] && continue
+ [ $batch -ef t/BEST -a $batch != t/BEST ] && continue
+
+ rm -rf var/test/* > /dev/null 2>&1
+ rm -f $H
+ if [ -f $BASE/$batch/ftdefs.h ] ; then
+ cat $BASE/$batch/ftdefs.h > $H
+ chmod a-wx $H
+ else
+ bk get -q $H
+ fi
+ OPTS="--defaults-file=$BASE/$batch/my.cnf --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets"
+ stop_myslqd
+ rm -f $MYSQLD
+ echo "building $batch"
+ echo "============== $batch ===============" >> var/ft_test.log
+ (cd $ROOT; gmake) >> var/ft_test.log 2>&1
+
+ for prog in $MYSQLD $MYSQL $MYSQLADMIN ; do
+ if [ ! -x $prog ] ; then
+ echo "build failed: no $prog"
+ exit 1
+ fi
+ done
+
+ echo "=====================================" >> var/ft_test.log
+ $MYSQLD $OPTS --basedir=$BASE --pid-file=$PID \
+ --language=$ROOT/sql/share/english \
+ --skip-grant-tables --skip-innodb \
+ --skip-networking --tmpdir=$DATA >> var/ft_test.log 2>&1 &
+
+ sleep $DELAY
+ $MYSQLADMIN $OPTS ping
+ if [ $? != 0 ] ; then
+ echo "$MYSQLD refused to start"
+ exit 1
+ fi
+ for test in `cd data; echo *.r|sed "s/\.r//g"` ; do
+ if [ -f $batch/$test.out ] ; then
+ echo "skipping $batch/$test.out"
+ continue
+ fi
+ echo "testing $batch/$test"
+ FT_MODE=`cat $batch/ft_mode 2>/dev/null`
+ ./Ecreate.pl $test "$FT_MODE" | $MYSQL $OPTS --skip-column-names test >var/$test.eval
+ echo "reporting $batch/$test"
+ ./Ereport.pl var/$test.eval data/$test.r > $batch/$test.out || exit
+ done
+ stop_myslqd
+ rm -f $H
+ bk get -q $H
+ if [ ! $batch -ef t/BEST ] ; then
+ echo "comparing $batch"
+ ./Ecompare.pl t/BEST $batch >> t/BEST/report.txt
+ fi
+done
+