#! /bin/sh module=man DIFF="diff -uBb" WDIFF="wdiff -3 -n" if [ $# = 1 ] && [ $1 = "-h" ] ; then if [ "$module" = "man" ] ; then echo "\ Compare how are displayed an original man page and the po4a-normalized one. Usage: $0 Check all man pages found in /usr/share/man/man[1-8] $0 Check the specified man pages. $0 -f Check the man pages specified in (one page per line). Man pages can be gzipped. $0 erases all LISTE.* files in the current directory and creates the following files: LISTE.TOTAL List of all the checked man pages LISTE.OK List of the man pages for which $DIFF does not see a difference. LISTE.OK2 $DIFF does not see a difference afetr converting hyphens to minus signs, `` to \", and '' to \" (in both pages). LISTE.WOK1 $WDIFF doesn't see any difference after the same modifications LISTE.WOK2 This tries to detect changes in the hyphenation of words. LISTE.WOK3 This removes minus signs, and thus detects more changes in hyphenation. It also mask font changes. LISTE.PBS po4a preferred to stop processing the man page (non supported,...) LISTE.WDIFF These are probably bugs in po4a or in the man page LISTE.IGN man pages po4a refused to operate on (e.g. were generated by Pod::Man) You may wish to use stats.sh to compare to runs of $0." fi exit 0 fi rm -f LISTE.* touch LISTE.{TOTAL,OK,OK2,WDIFF,PBS,IGN,WOK1,WOK2,WOK3} if [ $# = 2 ] && [ $1 = "-f" ] ; then cat $2 > LISTE.TOTAL elif [ $# != 0 ] ; then echo $@ > LISTE.TOTAL else if [ "$module" = "man" ] ; then find /usr/share/man/man[1-9] /usr/X11R6/man/man[1-9] -type f | sort > LISTE.TOTAL elif [ "$module" = "pod" ] ; then locate pod|egrep '\.pod$' | sort > LISTE.TOTAL fi fi tmp=/tmp/po4a-check-$module-$$ mkdir -p $tmp if [ "$module" = "man" ] ; then cmdtotxt="mantotxt" elif [ "$module" = "pod" ] ; then cmdtotxt="podtotxt" fi mantotxt() { `grog -Tutf8 $1` > $1.txt } podtotxt() { # $1 file to convert ; $2 name to pretend to have if [ $1 != $2 ] ; then mv $2 $2.old mv $1 $2 fi pod2man $2 > $2.man man -Pcat -l $2.man > $2.txt if [ $1 != $2 ] ; then mv $2.txt $1.txt mv $2 $1 mv $2.old $2 fi } for fich in `cat LISTE.TOTAL` ; do if echo $fich | egrep '\.gz$' ; then newfich=`basename $fich .gz`; zcat $fich > $tmp/$newfich; else newfich=`basename $fich` cat $fich > $tmp/$newfich; fi echo "####### $fich"; rm -f po4a-normalize.* if PERLLIB=../lib ../po4a-normalize -f $module -M iso-8859-1 $tmp/$newfich \ -o groff_code=translate \ -o noarg=ny0,Sp,Pp,BS,BE,VE,ES,zZ,zY \ -o translate_each=TA,IN,hN,AS,OP,Tp,Ip,TQ \ -o no_wrap=Ds:De,Ex:Ee,CS:CE,Vb:Ve \ -o untranslated=Id,VS,rm \ -o translate_joined=FN,Sh,iX \ -o inline=ZN,Pn \ -o generated 2>&1 ; then mv po4a-normalize.output $tmp/po4a-normalize.output $cmdtotxt $tmp/po4a-normalize.output $tmp/$newfich; $cmdtotxt $tmp/$newfich $tmp/$newfich; echo $fich echo "$DIFF $tmp/$newfich.txt $tmp/po4a-normalize.output.txt" if $DIFF $tmp/$newfich.txt $tmp/po4a-normalize.output.txt ; then echo ">ok" echo $fich >> LISTE.OK else awk 'BEGIN{RS=" ";ORS=" "} {gsub("\xE2\x80\x90","-"); # this is safe gsub("\xE2\x88\x92","-"); # the minus sign in eqn gsub("\xE2\x80\x98\xE2\x80\x98","\xE2\x80\x9C"); # this can # be troublesome (e.g. two single quotes => " insteas of "" gsub("\xE2\x80\x99\xE2\x80\x99","\xE2\x80\x9D"); print}' $tmp/$newfich.txt > $tmp/"$newfich"_ignore awk 'BEGIN{RS=" ";ORS=" "} {gsub("\xE2\x80\x90","-"); gsub("\xE2\x88\x92","-"); gsub("\xE2\x80\x98\xE2\x80\x98","\xE2\x80\x9C"); gsub("\xE2\x80\x99\xE2\x80\x99","\xE2\x80\x9D"); print}' $tmp/po4a-normalize.output.txt > $tmp/po4a-normalize.ignore echo $fich echo "$DIFF $tmp/"$newfich"_ignore $tmp/po4a-normalize.ignore" if $DIFF $tmp/"$newfich"_ignore $tmp/po4a-normalize.ignore ; then echo ">ok2" echo $fich >> LISTE.OK2 else echo $fich echo "$WDIFF $tmp/"$newfich"_ignore $tmp/po4a-normalize.ignore" if $WDIFF $tmp/"$newfich"_ignore $tmp/po4a-normalize.ignore; then echo ">wok1" echo $fich >> LISTE.WOK1 else awk 'BEGIN{RS=";";ORS=";"} {gsub("-\n[[:blank:]]+",""); # remove hyphen at the end of lines # this permits to detect some diff due # to word rewrapping (fails if a word # containing a hyphen was wrapped) print}' $tmp/"$newfich"_ignore > $tmp/"$newfich"_ignore2 awk 'BEGIN{RS=";";ORS=";"} {gsub("-\n[[:blank:]]+",""); print}' $tmp/po4a-normalize.ignore > $tmp/po4a-normalize.ignore2 # the same, but for word wrapped on hyphen. # This is useful only if there is either word wrapped on # hyphen or word not wrapped on hyphen. But this may be # usefull to classify the diff awk 'BEGIN{RS=";";ORS=";"} {gsub("-\n[[:blank:]]+","-"); print}' $tmp/"$newfich"_ignore > $tmp/"$newfich"_ignore2b awk 'BEGIN{RS=";";ORS=";"} {gsub("-\n[[:blank:]]+","-"); print}' $tmp/po4a-normalize.ignore > $tmp/po4a-normalize.ignore2b echo $fich echo "$WDIFF $tmp/"$newfich"_ignore2 $tmp/po4a-normalize.ignore2" $WDIFF $tmp/"$newfich"_ignore2 $tmp/po4a-normalize.ignore2 ret1=$? echo $fich echo "$WDIFF $tmp/"$newfich"_ignore2b $tmp/po4a-normalize.ignore2b" $WDIFF $tmp/"$newfich"_ignore2b $tmp/po4a-normalize.ignore2b ret2=$? if [ $ret1 = 0 ] || [ $ret2 = 0 ]; then echo ">wok2" echo $fich >> LISTE.WOK2 else awk '{gsub(".\x08",""); # this hides font diff ? print}' $tmp/"$newfich"_ignore > $tmp/"$newfich"_ignore3_tmp awk 'BEGIN{RS=";";ORS=";"} {gsub("-\n[[:blank:]]+",""); gsub("-",""); print}' $tmp/"$newfich"_ignore3_tmp > $tmp/"$newfich"_ignore3 awk '{gsub(".\x08",""); # this hides font diff ? print}' $tmp/po4a-normalize.ignore > $tmp/po4a-normalize.ignore3_tmp awk 'BEGIN{RS=";";ORS=";"} {gsub("-\n[[:blank:]]+",""); gsub("-",""); print}' $tmp/po4a-normalize.ignore3_tmp > $tmp/po4a-normalize.ignore3 echo $fich echo "$WDIFF $tmp/"$newfich"_ignore3 $tmp/po4a-normalize.ignore3" if $WDIFF $tmp/"$newfich"_ignore3 $tmp/po4a-normalize.ignore3 then echo ">wok3" echo $fich >> LISTE.WOK3 else echo ">wdiff" echo $fich >> LISTE.WDIFF fi fi fi fi fi else if [ $? = 254 ]; then echo ">ign" echo $fich >> LISTE.IGN else l=$(wc -l $tmp/$newfich|cut -d" " -f1) egrep "^.so " $tmp/$newfich 2>&1 > /dev/null if [ "$?" = "0" ] && [ "$l" = "1" ]; then echo ">ign" echo $fich >> LISTE.IGN else echo ">pbs" echo $fich >> LISTE.PBS fi fi fi rm -f $tmp/$newfich* $tmp/po4a-normalize.* echo '-------------------' done rm -r $tmp