#! /bin/sh
. ${srcdir:=.}/t.frame
#
# test common encoding related capabilities, e.g.
# registering messages and database maintenance
#
# initialize
rm -r "$TMPDIR"/* # reset TXN detection
if [ $DB_TXN = true ] ; then TXN=--db-transaction=yes ; else TXN= ; fi
MSG="msg.accents"
cat <<_EOF > "$TMPDIR"/$MSG.txt
Subject: UTF-8 testing
Content-Type: text/html; charset=iso-8859-1
Accented characters:
â château
é Cédric
í Cecília
ó escribió
ü Düsseldorf
Special Characters:
0x92-’-windows-apostrophe
0x93-“-windows-left-quote
0x94-”-windows-right-quote
0xA0- -no-break-space
0xA1-¡-inverted-exclamation-mark
0xA2-¢-cent-sign
0xA3-£-pound-sign
0xA4-¤-yen-sign
0xA7-§-section-sign
0xA9-©-copyright-sign
0xAA-ª-feminine-ordinal-indicator
0xAB-«-left-pointing-double-angle-quotation-mark
0xAC-¬-not-sign
0xAD--soft-hyphen
0xAE-®-registered-sign
0xAF-¯-macron
0xB0-°-degree-sign
0xB1-±-plus-minus-sign
0xB2-²-superscript-two
0xB3-³-superscript-three
0xB5-µ-micro-sign
0xB6-¶-pilcrow-sign
0xB7-·-middle-dot
0xB9-¹-superscript-one
0xBA-º-masculine-ordinal-indicator
0xBB-»-right-pointing-double-angle-quotation-mark
0xBC-¼-inverted-question-mark
0xD7-×-multiplication-sign
0xF7-÷-division-sign
_EOF
DATE=`date +%Y%m%d`
BOGODIR="$TMPDIR"/wordlist
WORDLIST="$TMPDIR"/wordlist/wordlist.$DB_EXT
[ ! -d "$BOGODIR" ] && mkdir "$BOGODIR"
# create and dump wordlists with/without unicode
# without uses iso-8859-1
# with uses utf-8
for YN in yes no ; do
rm -fr "$BOGODIR"/*
$BOGOFILTER -C -y 0 -n -d "$BOGODIR" --unicode=$YN < "$TMPDIR"/$MSG.txt
$BOGOUTIL -C -y 0 -d "$WORDLIST" | sort > "$TMPDIR"/wordlist.$YN.txt
done
$BOGOUTIL -C -y 0 -d "$WORDLIST" | sort > "$TMPDIR"/wordlist.raw.txt
iconv -f iso-8859-1 -t utf-8 <"$TMPDIR"/wordlist.raw.txt | \
sed "s/.ENCODING 1/.ENCODING 2/" | sort > "$TMPDIR"/wordlist.iconv.txt
# convert from iso-8859-1 to unicode
$BOGOUTIL -C -y 0 -m "$WORDLIST" --unicode=yes
$BOGOUTIL -C -y 0 -d "$WORDLIST" | sort > "$TMPDIR"/wordlist.new.txt
# convert from unicode to iso-8859-1
$BOGOUTIL -C -y 0 -m "$WORDLIST" --unicode=no
$BOGOUTIL -C -y 0 -d "$WORDLIST" | sort > "$TMPDIR"/wordlist.old.txt
echo "This is a test." > "$TMPDIR"/empty.txt
if ! cksum > "$TMPDIR"/empty.sum "$TMPDIR"/empty.txt ; then
echo >&2 'cksum utility not found'
exit 77
fi
tr ' ' ' ' <"$TMPDIR"/empty.sum | cut -d " " -f 1-2 > "$TMPDIR"/empty.sumonly
if ! echo "2711662207 16" | cmp -s - "$TMPDIR"/empty.sumonly
then
echo >&2 'cksum utility not POSIX compliant!'
exit 1
fi
cat <<EOF | sed "s/ $DATE//" > "$TMPDIR"/cksum.ref
1059676362 909 wordlist.iconv.txt
1059676362 909 wordlist.new.txt
3303802408 880 wordlist.no.txt
3303802408 880 wordlist.old.txt
3303802408 880 wordlist.raw.txt
1059676362 909 wordlist.yes.txt
EOF
for FILE in "$TMPDIR"/wordlist.*.txt ; do
cksum "$FILE" | $AWK '{ printf "%s %s ", $1, $2 }' >> "$TMPDIR"/cksum.out
basename "$FILE" >> "$TMPDIR"/cksum.out
done
if [ $verbose -eq 0 ] ; then
cmp "$TMPDIR"/cksum.ref "$TMPDIR"/cksum.out
else
diff -s "$TMPDIR"/cksum.ref "$TMPDIR"/cksum.out
fi