diff options
Diffstat (limited to 'storage/myisam/ftbench')
-rwxr-xr-x | storage/myisam/ftbench/Ecompare.pl | 112 | ||||
-rwxr-xr-x | storage/myisam/ftbench/Ecreate.pl | 60 | ||||
-rwxr-xr-x | storage/myisam/ftbench/Ereport.pl | 65 | ||||
-rw-r--r-- | storage/myisam/ftbench/README | 43 | ||||
-rwxr-xr-x | storage/myisam/ftbench/ft-test-run.sh | 116 |
5 files changed, 396 insertions, 0 deletions
diff --git a/storage/myisam/ftbench/Ecompare.pl b/storage/myisam/ftbench/Ecompare.pl new file mode 100755 index 00000000..a97f126e --- /dev/null +++ b/storage/myisam/ftbench/Ecompare.pl @@ -0,0 +1,112 @@ +#!/usr/bin/env perl + +# Copyright (c) 2003, 2005 MySQL AB +# Use is subject to license terms +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +# compares out-files (as created by Ereport.pl) from dir1/*.out and dir2/*.out +# for each effectiveness column computes the probability of the hypothesis +# "Both files have the same effectiveness" + +# sign test is used to verify that test results are statistically +# significant to support the hypothesis. Function is computed on the fly. + +# basic formula is \sum_{r=0}^R C_N^r 2^{-N} +# As N can be big, we'll work with logarithms +$log2=log(2); +sub probab { + my $N=shift, $R=shift; + + my $r, $sum=0; + + for $r (0..$R) { + $sum+=exp(logfac($N)-logfac($r)-logfac($N-$r)-$N*$log2); + } + return $sum; +} + +# log(N!) +# for N<20 exact value from the table (below) is taken +# otherwise, Stirling approximation for N! is used +sub logfac { + my $n=shift; die "n=$n<0" if $n<0; + return $logfactab[$n] if $n<=$#logfactab; + return $n*log($n)-$n+log(2*3.14159265358*$n)/2; +} +@logfactab=( +0, 0, 0.693147180559945, 1.79175946922805, 3.17805383034795, +4.78749174278205, 6.57925121201010, 8.52516136106541, 10.6046029027453, +12.8018274800815, 15.1044125730755, 17.5023078458739, 19.9872144956619, +22.5521638531234, 25.1912211827387, 27.8992713838409, 30.6718601060807, +33.5050734501369, 36.3954452080331, 39.3398841871995, 42.3356164607535, +); + +############################# main () ############################### +#$p=shift; $m=shift; $p-=$m; +#if($p>$m) { +# print "1 > 2 [+$p-$m]: ", probab($p+$m, $m), "\n"; +#} elsif($p<$m) { +# print "1 < 2 [+$p-$m]: ", probab($p+$m, $p), "\n"; +#} else { +# print "1 = 2 [+$p-$m]: ", probab($p+$m, $m), "\n"; +#} +#exit; + +die "Use: $0 dir1 dir2\n" unless @ARGV==2 && + -d ($dir1=shift) && -d ($dir2=shift); +$_=`cd $dir1; echo *.out`; +s/\.out\b//g; +$total=""; + +for $file (split) { + open(OUT1,$out1="$dir1/$file.out") || die "Cannot open $out1: $!"; + open(OUT2,$out2="$dir2/$file.out") || die "Cannot open $out2: $!"; + + @p=@m=(); + while(!eof(OUT1) || !eof(OUT2)) { + $_=<OUT1>; @l1=split; shift @l1; + $_=<OUT2>; @l2=split; shift @l2; + + die "Number of columns differ in line $.\n" unless $#l1 == $#l2; + + for (0..$#l1) { + $p[$_]+= $l1[$_] > $l2[$_]; + $m[$_]+= $l1[$_] < $l2[$_]; + } + } + + for (0..$#l1) { + $pp[$_]+=$p[$_]; $mm[$_]+=$m[$_]; + $total[$_].=rep($file, ($#l1 ? $_ : undef), $p[$_], $m[$_]); + } + close OUT1; + close OUT2; +} + +for (0..$#l1) { + rep($total[$_], ($#l1 ? $_ : undef), $pp[$_], $mm[$_]); +} + +sub rep { + my ($test, $n, $p, $m, $c, $r)=@_; + + if ($p>$m) { $c=">"; $r="+"; } + elsif($p<$m) { $c="<"; $r="-"; } + else { $c="="; $r="="; } + $n=" $n: " if defined $n; + printf "%-8s $n $dir1 $c $dir2 [+%03d-%03d]: %16.15f\n", + $test, $p, $m, probab($p+$m, ($p>=$m ? $m : $p)); + $r; +} diff --git a/storage/myisam/ftbench/Ecreate.pl b/storage/myisam/ftbench/Ecreate.pl new file mode 100755 index 00000000..78962466 --- /dev/null +++ b/storage/myisam/ftbench/Ecreate.pl @@ -0,0 +1,60 @@ +#!/usr/bin/env perl + +# Copyright (c) 2003, 2005 MySQL AB +# Use is subject to license terms +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +$test=shift || die "Usage $0 testname [option]"; +$option=shift; + +open(D, "<data/$test.d") || die "Cannot open(<data/$test.d): $!"; +open(Q, "<data/$test.q") || die "Cannot open(<data/$test.q): $!"; + +$N=0; + +print <<__HEADER__; +DROP TABLE IF EXISTS $test; +CREATE TABLE $test ( + id int(10) unsigned NOT NULL, + text text NOT NULL, + FULLTEXT KEY text (text) +) TYPE=MyISAM CHARSET=latin1; + +ALTER TABLE $test DISABLE KEYS; +__HEADER__ + +while (<D>) { chomp; + s/'/\\'/g; ++$N; + print "INSERT $test VALUES ($N, '$_');\n"; +} + +print <<__PREP__; +ALTER TABLE $test ENABLE KEYS; +SELECT $N; +__PREP__ + +$N=0; + +while (<Q>) { chomp; + s/'/\\'/g; ++$N; + $_="MATCH text AGAINST ('$_' $option)"; + print "SELECT $N, id, $_ FROM $test WHERE $_;\n"; +} + +print <<__FOOTER__; +DROP TABLE $test; +__FOOTER__ + + diff --git a/storage/myisam/ftbench/Ereport.pl b/storage/myisam/ftbench/Ereport.pl new file mode 100755 index 00000000..a8c7c57e --- /dev/null +++ b/storage/myisam/ftbench/Ereport.pl @@ -0,0 +1,65 @@ +#!/usr/bin/env perl + +# Copyright (c) 2003, 2005 MySQL AB +# Use is subject to license terms +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +die "Use: $0 eval_output qrels_file\n" unless @ARGV==2; + +open(EOUT,$eout=shift) || die "Cannot open $eout: $!"; +open(RELJ,$relj=shift) || die "Cannot open $relj: $!"; + +$_=<EOUT>; +die "$eout must start with a number!\n "unless /^[1-9][0-9]*\n/; +$ndocs=$_+0; + +$qid=0; +$relj_str=<RELJ>; +$eout_str=<EOUT>; + +while(!eof(RELJ) || !eof(EOUT)) { + ++$qid; + %dq=(); + $A=$B=$AB=0; + $Ravg=$Pavg=0; + + while($relj_str =~ /^0*$qid\s+(\d+)/) { + ++$A; + $dq{$1+0}=1; + last unless $relj_str=<RELJ>; + } + # Favg measure = 1/(a/Pavg+(1-a)/Ravg) +sub Favg { my $a=shift; $Pavg*$Ravg ? 1/($a/$Pavg+(1-$a)/$Ravg) : 0; } + # F0 : a=0 -- ignore precision + # F5 : a=0.5 + # F1 : a=1 -- ignore recall + while($eout_str =~ /^$qid\s+(\d+)\s+(\d+(?:\.\d+)?)/) { + $B++; + $AB++ if $dq{$1+0}; + $Ravg+=$AB; + $Pavg+=$AB/$B; + last unless $eout_str=<EOUT>; + } + next unless $A; + + $Ravg/=$B*$A if $B; + $Pavg/=$B if $B; + + printf "%5d %1.12f %1.12f %1.12f\n", $qid, Favg(0),Favg(0.5),Favg(1); +} + +exit 0; + + diff --git a/storage/myisam/ftbench/README b/storage/myisam/ftbench/README new file mode 100644 index 00000000..b1f8b66b --- /dev/null +++ b/storage/myisam/ftbench/README @@ -0,0 +1,43 @@ +1. should be run from myisam/ftbench/ +2. myisam/ftdefs.h should NOT be locked (bk get, not bk edit!) +3. there should be ./data/ subdir with test collections, files: + test1.d + test1.q + test1.r + test2.d + test2.q + test2.r + where test1, test2, etc - are arbitrary test names + + *.[dq] files contain documents/queries one item per line. + + *.r files have the structure: + 1 16 .....blablabla + 1 09 .....blablabla + 2 116 .....blablabla + ... + + that is /^\d+\s+\d+/ + and are sorted by the first number (not necessarily by the second) + +4. there should be ./t/ subdir with test directories + + ./t + ./t/BEST/ + ./t/testdir1/ + ./t/testdir2/ + ... + + there *must* be ./t/BEST/ subdir or a symlink to one of other dirs in ./t + all other names (besides BEST) can be arbitrary + + all test results are compared with BEST results. + + test directories may contain ftdefs.h, my.cnf, ft_mode + (the last one is used as in ... MATCH ... AGAINST ("..." $ft_mode) ...) + NOTE: all *.out files in test directories will NOT be overwritten! + delete them to re-test + +5. run ./ft-test-run.sh +6. go make some coffee + diff --git a/storage/myisam/ftbench/ft-test-run.sh b/storage/myisam/ftbench/ft-test-run.sh new file mode 100755 index 00000000..08294071 --- /dev/null +++ b/storage/myisam/ftbench/ft-test-run.sh @@ -0,0 +1,116 @@ +#!/bin/sh + +# Copyright (c) 2003, 2005, 2006 MySQL AB +# Use is subject to license terms +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; version 2 +# of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the Free +# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1335 USA + +if [ ! -x ./ft-test-run.sh ] ; then + echo "Usage: ./ft-test-run.sh" + exit 1 +fi + +BASE=`pwd` +DATA=$BASE/var +ROOT=`cd ../..; pwd` +MYSQLD=$ROOT/sql/mysqld +MYSQL=$ROOT/client/mysql +MYSQLADMIN=$ROOT/client/mysqladmin +SOCK=$DATA/mysql.sock +PID=$DATA/mysql.pid +H=../ftdefs.h +OPTS="--no-defaults --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets" +DELAY=10 + +stop_myslqd() +{ + [ -S $SOCK ] && $MYSQLADMIN $OPTS shutdown + [ -f $PID ] && kill `cat $PID` && sleep 15 && [ -f $PID ] && kill -9 `cat $PID` +} + +if [ ! -d t/BEST ] ; then + echo "No ./t/BEST directory! Aborting..." + exit 1 +fi +rm -f t/BEST/report.txt +if [ -w $H ] ; then + echo "$H is writeable! Aborting..." + exit 1 +fi + +stop_myslqd +rm -rf var > /dev/null 2>&1 +mkdir var +mkdir var/test + +for batch in t/* ; do + [ ! -d $batch ] && continue + [ $batch -ef t/BEST -a $batch != t/BEST ] && continue + + rm -rf var/test/* > /dev/null 2>&1 + rm -f $H + if [ -f $BASE/$batch/ftdefs.h ] ; then + cat $BASE/$batch/ftdefs.h > $H + chmod a-wx $H + else + bk get -q $H + fi + OPTS="--defaults-file=$BASE/$batch/my.cnf --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets" + stop_myslqd + rm -f $MYSQLD + echo "building $batch" + echo "============== $batch ===============" >> var/ft_test.log + (cd $ROOT; gmake) >> var/ft_test.log 2>&1 + + for prog in $MYSQLD $MYSQL $MYSQLADMIN ; do + if [ ! -x $prog ] ; then + echo "build failed: no $prog" + exit 1 + fi + done + + echo "=====================================" >> var/ft_test.log + $MYSQLD $OPTS --basedir=$BASE --pid-file=$PID \ + --language=$ROOT/sql/share/english \ + --skip-grant-tables --skip-innodb \ + --skip-networking --tmpdir=$DATA >> var/ft_test.log 2>&1 & + + sleep $DELAY + $MYSQLADMIN $OPTS ping + if [ $? != 0 ] ; then + echo "$MYSQLD refused to start" + exit 1 + fi + for test in `cd data; echo *.r|sed "s/\.r//g"` ; do + if [ -f $batch/$test.out ] ; then + echo "skipping $batch/$test.out" + continue + fi + echo "testing $batch/$test" + FT_MODE=`cat $batch/ft_mode 2>/dev/null` + ./Ecreate.pl $test "$FT_MODE" | $MYSQL $OPTS --skip-column-names test >var/$test.eval + echo "reporting $batch/$test" + ./Ereport.pl var/$test.eval data/$test.r > $batch/$test.out || exit + done + stop_myslqd + rm -f $H + bk get -q $H + if [ ! $batch -ef t/BEST ] ; then + echo "comparing $batch" + ./Ecompare.pl t/BEST $batch >> t/BEST/report.txt + fi +done + |