#!/bin/sh
# Quick and dirty script to audit font use in a package repository
#
# It is slow, it is ugly, and it requires a good network connection
# Function declarations
usage() {
cat >&2 << EOF_USAGE
Usage: $0 <id> <location>
<id>: identifier of the package repository to check
<location>: location of the package repository to check
Examples:
$0 tmp /tmp/rpm
$0 tmp file://tmp/rpm
$0 rawhide http://example.com/mirrors/fedora.redhat.com/fedora/linux/development/x86_64/os/
EOF_USAGE
exit 1
}
DATADIR="$(dirname $0)/../private"
# Filenames with whitespace or & + makefile = !!!
bad_pattern="[[:space:]&\:]"
# Tools and humans make mistakes; try to fix some obvious ones
fix_url() {
sed 's=^file:\(\/*\)=/=' | sed 's=^\(ftp\|http\|https\):\(\/*\)=\1://='
}
pretty_indent() {
fold -s -w $(($(tput cols) - 2)) \
| while read line ; do echo " $line" ; done
}
# $1 = CSV file to evaluate
tally() {
awk -F "|" '
{
filesize += $17
filenumber += 1
rpmsz[$3 "-" $4 "." $5] = $6
srpms[$2] = 1
}
END {
for (rpm in rpmsz) {
rpmsize += rpmsz[rpm]
rpmnumber +=1
}
for (srpm in srpms) {
srpmnumber +=1
}
rpmsize = rpmsize / (1024*1024)
filesize = filesize / (1024*1024)
print filenumber "|" rpmnumber "|" srpmnumber "|" filesize "|" rpmsize
}' "$1"
}
summary() {
if [ $(cat "$1" | wc -l) -gt 0 ] ; then
tally "$1" | awk -F '|' '{ print "⇒ " $1 " file(s) (" $4 " MiB) in " $2 \
" package(s) (" $5 " MiB) generated from " $3 " source package(s)." }'\
| pretty_indent
else
echo "⇒ None!" | pretty_indent
fi
echo ""
}
# $1 = summary file
# $2 = temporary data file
# $3 = test id
complete_csv_summary() {
awk -F '|' -v t_datafile="$2" -v testlabel="$3" \
'FILENAME==t_datafile { fail[$1"|"$2"|"$3"|"$4"|"$5"|"$10] = 1 }
FILENAME!=t_datafile && FNR==1 {
max = NF + 1
header = $1
for (i = 2 ; i <= max ; i++) {
header = header "|" $i
}
print header testlabel
}
FILENAME!=t_datafile && FNR>1 {
line = $1 "|" $2 "|" $3 "|" $4 "|" $5 "|" $6
newline = $1
for (i = 2 ; i <= max ; i++) {
newline = newline "|" $i
}
print newline fail[line]
processed[line] = 1
}
END {
for ( line in fail ) {
if ( ! processed[line] ) {
newline = line
for (i = 7 ; i <= max ; i++) {
newline = newline "|"
}
print newline fail[line]
}
}
}' "$2" "$1" > tmp/tmp.ccs.csv
awk 'FNR==1' tmp/tmp.ccs.csv > "$1"
awk 'FNR>1' tmp/tmp.ccs.csv | sort -f -t '|' >> "$1"
rm tmp/tmp.ccs.csv
}
substats() {
ss_datafile="$1"
awk -F '|' '{ print $NF }' "$ss_datafile" | sort | uniq \
| while read key ; do
echo -n "$key|"
awk -F '|' -v key="$key" '$NF==key' "$ss_datafile" > tmp/tmp.ss.csv
tally tmp/tmp.ss.csv
rm tmp/tmp.ss.csv
done
}
# $1 = CSV file to sum_up
stats() {
s_datafile="$1"
summary "$s_datafile"
if [ $(cat "$s_datafile" | wc -l) -gt 0 ] ; then
(echo "Format|Files|rpm|srpm|Files (MiB)|rpm (MiB)"
awk -F '|' '$13 != "" { print $0 "|" $15 }' "$s_datafile" > tmp/tmp.s.csv
substats tmp/tmp.s.csv) | column -t -s '|' | pretty_indent
echo ""
(echo "Arch|Files|rpm|srpm|Files (MiB)|rpm (MiB)"
awk -F '|' '{ print $0 "|" $5 }' "$s_datafile" > tmp/tmp.s.csv
substats tmp/tmp.s.csv)| column -t -s '|' | pretty_indent
echo ""
rm tmp/tmp.s.csv
fi
}
# $1 = CSV file to sum_up
list_rpm() {
[ $(cat "$1" | wc -l) -gt 0 ] && \
awk -F '|' '
{
rpm = $3
if ( $7 == "M" ) { rpm = "[" rpm "]" }
total[rpm] += 1
}
END {
n = asorti(total, sorted)
for ( i = 1 ; i <= n ; i++ ) {
line = line sorted[i] "(" total[sorted[i]] "), "
}
sub(", $","",line)
print line
}' "$1" | pretty_indent
}
# Pipe CSV file to sum_up
# It is assumed to be of the form
# rpm | pattern to match | pattern to display
# patterns may include # as separators
enum_rpm() {
awk -F '|' '
BEGIN { print "" }
{
rpm = $1
key = $2
pat = $3
files[key] = files[key] pat "#" rpm "|"
}
END {
for (key in files) {
sub("\\|$", "", files[key])
total = split(files[key], lines,"|")
pb[total] = pb[total] key "|"
}
n = asorti(pb, sorted)
for (i = n; i >= 1; i--) {
total = sorted[i]
sub("\\|$", "", pb[total])
split(pb[total], keys,"|")
nk = asort(keys, sortedkeys)
for ( k = 1 ; k <= nk ; k++ ) {
key = sortedkeys[k]
print gensub("#", ", ", "g", key)
split(files[key], lines, "|")
nl = asort(lines, sortedlines)
for ( l = 1 ; l <= nl ; l++ ) {
print " " gensub("#", "\t", "G", lines[l])
}
print ""
}
}
}' | pretty_indent
}
# $1 = this test id
# $2 = csv file with the results of this test
# $3 = csv file that consolidates the results of all tests
test_wrapup() {
complete_csv_summary "$3" "$2" "$1"
summary "$2"
}
# $1 = csv file with test results
test_score() {
if [ $(cat "$1" | wc -l) -gt 1 ] ; then
awk -F '|' '
BEGIN {
lh=7
headerline = "P#"
}
FNR==1 {
max = NF
}
FNR>1 {
id = ""
for (i = 1 ; i < (lh-1) ; i++) {
if ($i != "") id = id $i "|"
else id = id " |"
}
sub("\\|$","",id)
fail[id] = 1
for (i = lh ; i <= max ; i++) {
score[id"|"i] = score[id"|"i] + $i
totalscore[i] = totalscore[i] + $i
}
}
END {
for (i = lh ; i <= max ; i++) {
if ( totalscore[i] ) { headerline = headerline "|" "t" i+1-lh }
}
print headerline
n = asorti(fail)
for (l = 1; l <= n; l++) {
newline = l
for (c = lh ; c <= max ; c++) {
if ( totalscore[c] ) {
if ( score[fail[l]"|"c] )
{ newline = newline "|" score[fail[l]"|"c] }
else
{ newline = newline "|‧" }
}
}
print newline
}
totalline = "Total"
for (i = lh ; i <= max ; i++) {
if ( totalscore[i] ) { totalline = totalline "|" totalscore[i] }
}
print totalline
}' "$1" | column -t -s '|'
echo ""
awk -F '|' '
BEGIN { lh=7 }
FNR>1 {
id = ""
for (i = 1 ; i < (lh-1) ; i++) {
if ($i != "") id = id $i "|"
else id = id " |"
}
sub("\\|$","",id)
fail[id] = 1
}
END {
print "P#|Maintainer|SRPM|RPM|EVR|Arch"
n = asorti(fail)
for (l = 1; l <= n; l++) {
print l "|" fail[l]
}
}' "$1" | column -t -s '|'
echo ""
echo "Test explanation:"
echo ""
awk -F '|' '
BEGIN { lh = 7 }
FNR==1 {
max = NF
for (i = lh ; i <= max ; i++) {
legend[i] = i+1-lh "|" $i }
}
FNR>1 {
for (i = lh ; i <= max ; i++) {
totalscore[i] = totalscore[i] + $i
}
}
END {
for (i = lh ; i <= max ; i++) {
if ( totalscore[i] ) { print legend[i] }
}
}' "$1" | while read line ; do
testnumber=$(echo $line | awk -F '|' '{ print "t"$1 }')
testid=$(echo $line | awk -F '|' '{ print $2 }')
echo -n "$testnumber. "
"$DATADIR/test-info" $testid title
echo ""
( "$DATADIR/test-info" $testid help ) | pretty_indent
done
else
echo "⇒ None!"
fi
}
collect() {
start_time=$(date "+%s")
# This is very ugly but the safest syntax to use with any repoquery version
FREPOURL=$(echo "$REPOURL" | sed 's=^/=file:///=')
echo "Looking for packages:"
echo "— with font metadata…"
$REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
--qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|M" \
--whatprovides "font(*)" --quiet \
| sort | uniq \
| sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
> "$FPL"
echo "— that include files with common font extensions…"
$REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
--qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|E" \
-f '*.ttf' -f '*.otf' -f '*.ttc' \
-f '*.pfb' -f '*.pfa' \
-f '*.pcf.gz' -f '*.pcf' -f '*.bdf' --quiet \
| sort | uniq \
| sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
> "$PWFL"
echo "— that use the core X11 protocol…"
$REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
--qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|X" \
--whatrequires 'libX11.so*' \
| sort | uniq \
| sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
> "$XPL"
if [ $(cat "$FPL" "$PWFL" "$XPL" | wc -l) -eq 0 ] ; then
echo "Nothing to do!"
exit
fi
cat "$FPL" "$PWFL" "$XPL" \
| awk -F '|' '
{ tag[$1 "|" $2 "|" $3 "|" $4 "|" $5] = tag[$1 "|" $2 "|" $3 "|" $4 "|" $5] $6 "," }
END {
n = asorti(tag,rpm)
for (l = 1; l <= n; l++) {
sub(",$", "", tag[rpm[l]])
print rpm[l] "|" tag[rpm[l]]
}
}' > "tmp/$FL.1"
echo ""
echo "Inspecting packages:"
cd "tmp"
rm -f "$FL.2"
cat "$FL.1" | while read line ; do
rpm=$(echo "$line" | awk -F '|' '{ print $2 "-" $3 "." $4 }')
rpmline=$(echo "$line" | awk -F '|' '{ print $1 "|" $2 "|" $3 "|" $4 "|" $5 }')
rpmd=$(echo "$rpm" | sed "s+$bad_pattern+_+g")
echo "$line" | cut -d "|" -f 6 | grep -q "M" && metadata="M" || metadata=""
echo -n "– $rpm ◔"
mkdir "$rpmd"
cd "$rpmd"
rpm_loc=$($REPOQUERY --repofrompath=$REPOID,$REPOURL --repoid=$REPOID \
--location "$rpm" 2>/dev/null \
| fix_url)
if $(echo "$rpm_loc" | grep -q -e '^[\./~]') ; then
ln -s "$rpm_loc" .
else
curl -O --silent --show-error --retry 3 --connect-timeout 10 --retry-delay 15 --url "$rpm_loc"
fi
echo -ne "\b◑"
rpm2cpio *.rpm > "$rpm.cpio"
echo -ne "\b◕"
cat "$rpm.cpio" | cpio --quiet -it > "$rpm.lst"
nofont_score=$(cat "$rpm.lst" | sed 's=^\./=/=g' \
| grep -viE -e "^/usr/share/fonts" \
-e "^/usr/share/fontconfig/conf.avail/" \
-e "^/etc/fonts/conf.d/" \
-e "^/usr/share/doc/" \
-e "^/etc/X11/fontpath.d/" \
-e '\.((ttf)|(ttc)|(otf)|(pfa)|(pfb)|(bdf)|(pcf)|(pcf\.gz))$' \
| wc -l)
echo "$rpmline|$metadata|$nofont_score" > rpm-info.txt
if $(echo $line | cut -d "|" -f 6 | grep -q "E") ; then
cat "$rpm.lst" \
| grep -iE '\.((ttf)|(ttc)|(otf)|(pfa)|(pfb)|(bdf)|(pcf)|(pcf\.gz))$' \
| sort > "$rpm.fonts.lst"
fi
if $(echo $line | cut -d "|" -f 6 | grep -q "X") ; then
cat "$rpm.lst" \
| grep -i -e "^./sbin/" \
-e "^./usr/sbin/" \
-e "^./usr/kerberos/sbin" \
-e "^./bin/" \
-e "^./usr/bin/" \
-e "^./usr/kerberos/bin/" \
-e "^./lib.*/" \
-e "^./usr/lib.*/" \
-e "^./opt/" \
-e "^./usr/X11R6/" \
-e "^./usr/games/" \
-e "^./usr/local/" \
| grep -vi -e "^./usr/bin/dmxwininfo" \
-e "^./usr/bin/Xdmx" \
-e "^./usr/bin/xfontsel" \
-e "^./usr/bin/xlsfonts" \
-e "^./usr/bin/Xnest" \
-e "^./usr/bin/xprop" \
-e "^./usr/bin/xsetroot" \
-e "^./usr/bin/xwininfo" \
-e "^./usr/bin/x11vnc" \
-e "^./usr/bin/x2vnc" \
-e "^./usr/lib.*/libXcursor.so" \
| sort > "$rpm.bin.lst"
fi
touch "$rpm.fonts.lst" "$rpm.bin.lst"
cat "$rpm.fonts.lst" "$rpm.bin.lst" | sort | uniq > "$rpm.files.lst"
mkdir src
cd src
cpio -idm --quiet -E "../$rpm.files.lst" < "../$rpm.cpio"
cd ..
rm "$rpm.cpio"
echo -ne "\b● "
sed 's=^\./==g' "$rpm.fonts.lst" \
| while read file; do
unset target
type=$(file -bzh "src/$file")
case $(echo "$type" | sed 's+ (\(.*\)++g' \
| sed 's+ `\(.*\)++g' \
| sed 's+,\(.*\)++g' \
| sed 's+\( \)*$++g' ) in
"TrueType font data")
echo -n "t"
;;
"TrueType font collection data")
echo -n "T"
;;
"OpenType font data")
echo -n "o"
;;
"X11 Portable Compiled Font data")
echo -n "b"
;;
"X11 BDF font text")
echo -n "B"
;;
# PostScript files are a mess
"PostScript Type 1 font text")
echo -n "P"
;;
"PostScript Type 1 font program data")
echo -n "p"
;;
"PostScript document text conforming DSC level 3.0")
type="ignored"
echo -n "-"
;;
"PostScript document text"|"8086 relocatable")
echo -n "!" >&2
;;
"symbolic link to"|"broken symbolic link to")
target=$(readlink -m "src/$file" | sed "s+^$PWD/src++g")
if $(echo "$target" | grep -q "^/usr/share/fonts") ; then
type="Link"
echo -n "l"
else
type="ignored"
echo -n "-"
fi
;;
*)
echo -n "?"
;;
esac
if [ "$type" != "ignored" -a ! -h "src/$file" ] ; then
mkdir -p $(dirname "tmp/$file")
echo "/$file|$type" > "tmp/$file.rfo.fonts.info"
if [ "$type" != "TrueType font data" -a \
"$type" != "OpenType font data" ] ; then
touch "tmp/$file.rfo.unicover"
fi
fi
if [ "$type" = "Link" ] ; then
echo "/$file|$target" >> font-links.txt
fi
done
sed 's=^\./==g' "$rpm.bin.lst" \
| while read file ; do
type=$(file -bzh "src/$file")
if $(echo "$type" | grep -q -e '^ELF ' -e ' ELF ') ; then
echo -n "‧"
mkdir -p $(dirname "tmp/$file")
echo "/$file|$type" > "tmp/$file.rfo.core-fonts.info"
fi
done
echo ""
cd ..
done
echo ""
echo "Analysing files…"
find . -type d | grep "$bad_pattern" \
| while read dir ; do
mkdir -p $(echo "$dir" | sed "s+$bad_pattern+_+g")
done
find . -type f | grep "$bad_pattern" \
| while read file ; do
mv "$file" $(echo "$file"| sed "s+$bad_pattern+_+g")
done
# There must be a cleaner way to do this
parallelism=$(($(cat /proc/cpuinfo | grep processor | wc -l)+1))
make -s -r -j $parallelism -f "$DATADIR/repo-font-audit.mk"
find . -name "*\.rfo\.fonts" \
-o -name "*\.rfo\.core-fonts" \
-o -name processed-font-links.txt \
| while read datafile ; do
cat "$datafile" >> "$FL.2"
done
mkdir ../data
for rpmlint in */rpmlint.txt ; do
mkdir "../data/$(dirname $rpmlint)"
mv "$rpmlint" "../data/$rpmlint"
done
for ext in fontlint unicover fc-query ; do
find . -name "*\.rfo\.$ext" \
| while read file ; do
dest=$(echo "$file" \
| sed "s+./\([^/]*\)/tmp/\(.*\).rfo.$ext+\1|\2.$ext.txt+" \
| sed 's+/+_+g' | sed 's+|+/+g')
[ -s "$file" ] && mv "$file" "../data/$dest"
done
done
rm -fr "*/"
echo " ♻"
cd ..
}
consolidate() {
echo ""
echo "Consolidating data…"
echo "Maintainer|SRPM|RPM|EVR|Arch|RPM size|RPM metadata|foreign data|\
rpmlint score|filename|family name|face name|fixed family name|\
fixed face name|format (fontconfig)|format (libmagic)|file size|checksum|\
partial scripts|partial blocks|fontlint|symlink target|symlink provider|\
core fonts use" > "$FL"
if [ -s "tmp/$FL.2" -a -r "tmp/$FL.2" ] ; then
# FIXME: make this distro-agnostic
cut -d "|" -f 1 "tmp/$FL.2" | sort | uniq \
| /usr/bin/fedoradev-pkgowners 2>/dev/null \
| sed 's+ +|+g' \
| awk -F "|" -v file="tmp/$FL.2" '
{ owner[$2]=$1 }
END {
while ((getline < file) > 0) {
print owner[$1] "|" $0
}
}' | sort -df -t "|" -k 1 -k 2 -k 3 > "tmp/$FL.3"
awk -F "|" '
($16 != "Link") { source[$10] = $3 "-" $4
out[$0] = $0 "|" }
($16 == "Link") { link[$0] = $22 }
END {
for (line in link) {
out[line] = line "|" source[link[line]]
}
n = asort(out)
for (i = 1; i <= n; i++) print out[i]
}' "tmp/$FL.3" > "tmp/$FL.4"
cat "tmp/$FL.4" \
| sort -df -t "|" -k 1 -k 2 -k 3 -k 4n -k 13 -k 14 -k 10 \
| sed 's=$=|=g' | sed 's=|0|=||=g' | sed 's=|0|=||=g' | sed 's=|\+$==g' \
>> "$FL"
fi
echo "Maintainer|SRPM|RPM|EVR|Arch|File" > "$TSUM"
}
analyse() {
echo ""
echo "Conducting tests:"
echo ""
echo -n "— "
"$DATADIR/test-info" outside-usr-share-fonts title }
awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
($10 !~ /^\/usr\/share\/fonts\//)' "$FL" \
| sort -df -t "|" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup outside-usr-share-fonts tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" without-rpm-metadata title
awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
($7 != "M")' "$FL" \
| sort -df -t "|" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup without-rpm-metadata tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" family-mixing title
awk -F '|' '
(NR > 1) && ($11 != "") {
rpm = $3 "-" $4 "." $5
files[rpm] = files[rpm] $0 "#"
if ( ! family[rpm] ) {
family[rpm] = $11
}
else {
if (family[rpm] != $11) {
mixed[rpm] = 1
}
}
}
END {
for (rpm in mixed) {
sub("#$", "", files[rpm])
split(files[rpm], lines,"#")
for (l in lines) {
print lines[l]
}
}
}' "$FL" \
| sort -df -t "|" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup family-mixing tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" duplicated-file title
awk -F '|' '
(NR > 1) && ($24 != "X") && ($16 != "Link") {
files[$18] = files[$18] $0 "#"
}
END {
for (checksum in files) {
sub("#$", "", files[checksum])
count = split(files[checksum], lines,"#")
if (count > 1) {
for (l in lines) {
print lines[l]
}
}
}
}' "$FL" \
| sort -g -t "|" -k 18 > tmp/tmp.csv
[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
{
rpm = $3
if ( $7 == "M" ) { rpm = "[" rpm "]" }
sub(".*/", "", $10)
if ( $11 != "" ) { sig = $11 }
if ( $12 != "") {
if ( sig != "" ) { sig = sig ", " }
sig = sig $12
}
if ( sig != "" ) { sig = "#(" sig ")" }
print rpm "|" $18 "|" $10 sig
}' tmp/tmp.csv | enum_rpm
test_wrapup duplicated-file tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" duplicated-face-ext title
awk -F '|' '
(NR > 1) && ($11 != "") && ($12 != "") {
face = $11 "|" $12
rpm = $3 "-" $4 "." $5
files[face] = files[face] $0 "#"
if ( ! loc[face] ) { loc[face] = rpm }
else {
if (loc[face] != rpm) {
duplicated[face] = 1
}
}
}
END {
for (face in duplicated) {
sub("#$", "", files[face])
split(files[face], lines,"#")
for (l in lines) {
print lines[l]
}
}
}' "$FL" \
| sort -df -t "|" -k 11 -k 12 > tmp/tmp.csv
[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
{
rpm = $3
if ( $7 == "M" ) { rpm = "[" rpm "]" }
sub(".*/", "", $10)
print rpm "|" $11 ", " $12 "|" $10
}' tmp/tmp.csv | enum_rpm
test_wrapup duplicated-face-ext tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" fc-query title
awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
($15 == "")' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup fc-query tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" libmagic title
awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
(tolower(" " $16 " ") !~ / font /)' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup libmagic tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" broken-symlink title
awk -F '|' '(NR > 1) && ($16 == "Link") && ($23 == "")' "$FL" > tmp/tmp.csv
[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
{
rpm = $3
if ( $7 == "M" ) { rpm = "[" rpm "]" }
sub(".*/", "", $10)
print rpm "|" $10 " ↛ " $22
}' tmp/tmp.csv | enum_rpm
test_wrapup broken-symlink tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" rpmlint title
awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
($9 != "")' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup rpmlint tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" mixed-with-non-font-data title
awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
($8 != "")' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup mixed-with-non-font-data tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" arch-package title
awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
($5 != "noarch")' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup arch-package tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" bad-rpm-naming title
awk -F "|" '(NR > 1) && ($16 != "Link") && ($24!="X") && \
$3 !~ /^[0-9abcdefghijklmnopqrstuvwxyz\.-]*-fonts$/' "$FL" \
> tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup bad-rpm-naming tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" bad-naming title
awk -F '|' '(NR > 1) && \
((tolower($11)!=tolower($13)) || (tolower($12)!=tolower($14)))' \
"$FL" > tmp/tmp.csv
[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
{
rpm = $3
if ( $7 == "M" ) { rpm = "[" rpm "]" }
sub(".*/", "", $10)
print rpm "|" $11 ", " $12 " → " $13 ", " $14 "|" $10
}' tmp/tmp.csv | enum_rpm
test_wrapup bad-naming tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" core-fonts title
awk -F '|' '(NR > 1) && ($24=="X")' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup core-fonts tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" font-linking title
awk -F '|' '(NR > 1) && ($16=="Link")' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup font-linking tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" duplicated-face-int title
awk -F '|' '
(NR > 1) && ($24!="X") && \
($15 != "PCF") && ($15 != "Type 1") && \
($11 != "") && ($12 != "") {
sig = $3 "|" $4 "|" $5 "|" $11 "|" $12
files[sig] = files[sig] $0 "#"
}
END {
for (sig in files) {
sub("#$", "", files[sig])
n = split(files[sig],lines,"#")
if (n > 1) {
for (l in lines) { print lines[l] }
}
}
}' "$FL" \
| sort -df -t "|" -k 11 -k 12 -k 3 -k 2 > tmp/tmp.csv
[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
{
rpm = $3
if ( $7 == "M" ) { rpm = "[" rpm "]" }
sub(".*/", "", $10)
print rpm "|" $11 ", " $12 "|" $10
}' tmp/tmp.csv | enum_rpm
test_wrapup duplicated-face-int tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" fontlint title
awk -F '|' '(NR > 1) && ($24!="X") && ($21 > 0)' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup fontlint tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" no-english-metadata title
awk -F '|' '(NR > 1) && ($24!="X") && \
($15 != "") && (($11 == "") || ($12 == ""))' "$FL" > tmp/tmp.csv
[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
{
rpm = $3 "-" $4 "." $5
if ( $7 == "M" ) { rpm = "[" rpm "]" }
print $10 "|" rpm
}' tmp/tmp.csv \
| column -t -s '|' | pretty_indent
test_wrapup no-english-metadata tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" partial-scripts title
awk -F '|' '(NR > 1) && ($24!="X") && ($19 > 0)' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup partial-scripts tmp/tmp.csv "$TSUM"
echo -n "— "
"$DATADIR/test-info" partial-blocks title
awk -F '|' '(NR > 1) && ($24!="X") && ($20 > 0)' "$FL" > tmp/tmp.csv
list_rpm tmp/tmp.csv
test_wrapup partial-blocks tmp/tmp.csv "$TSUM"
}
sum_up() {
echo ""
echo "Audit results:"
echo ""
echo "– packages that declare font metadata:"
awk -F '|' '(NR > 1) && ($7 == "M") && ($24!="X")' "$FL" > tmp/tmp.csv
stats tmp/tmp.csv
echo "☛ File size is computed as extracted, while rpm is a compressed \
format." | pretty_indent
echo "☛ Mid-term, files in legacy PCF or Type1 formats need to be converted \
or removed." | pretty_indent
echo ""
echo "– font files in other packages (we should not find any!)"
awk -F '|' '(NR > 1) && ($7 != "M") && ($16 != "Link") && ($24!="X")' "$FL" > tmp/tmp.csv
stats tmp/tmp.csv
[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
echo "☛ Bad packaging may result in arched packages or mixed content." \
| pretty_indent && echo ""
echo "– errors, warnings and suggestions:"
echo ""
test_score "$TSUM"
}
prepare_spam() {
echo ""
echo "Packing mail data…"
mkdir spam
for srpm in $(awk -F '|' 'FNR>1 { print $2 }' "$TSUM" | uniq) ; do
[ ! -d "tmp/$srpm" ] && mkdir "tmp/$srpm"
cp -p report.txt "tmp/$srpm/repo-report.txt"
for file in "$FL" "$TSUM" ; do
awk 'FNR==1' "$file" > "tmp/$srpm/$file"
awk -F '|' -v srpm="$srpm" 'FNR>1 && $2==srpm' "$file" >> "tmp/$srpm/$file"
done
for rpm in $(awk -F '|' 'FNR>1 { print $3 "-" $4 "." $5 }' "tmp/$srpm/$TSUM" | uniq | sed "s+$bad_pattern+_+g") ; do
cp -pr "data/$rpm" "tmp/$srpm"
done
cat > tmp/$srpm/message.txt << EOF
Dear packager,
At $TIMESTAMP, your “$srpm” package failed one or more of the tests
I was performing on the “$ID” repository located at:
$REPOURL
There are three different reasons that may cause this message:
1. your package is including one or more font files, but not packaging
them properly;
2. your package is including one or more font files, and I've found
issues in some of them;
3. your package is not shipping any font file, but the way it accesses
fonts in other packages is not satisfying.
To stop receiving this message, you need to:
1. drop the font files or fix their packaging;
2. relay the fonts issues to the fonts upstream to get them revised;
3. work with the code upstream to improve the way it accesses font
files (usually by making it use fontconfig through a higher-level
text library such as pango, pango-cairo, harfbuzz, or QT)
You can self-check your packages at any time by:
1. installing $CREATEREPO and fontpackages-tools:
# $YUM install $CREATEREPO fontpackages-tools
2. putting your packages and any font package they depends on in a
test directory
3. indexing this directory with $CREATEREPO:
$ $CREATEREPO path-to-test-directory
4. running repo-font-audit:
$ repo-font-audit test absolute-path-to-test-directory
A summary of the issues I detected is appended here. For your
convenience a more comprehensive analysis is also attached to this
message.
Errors, warnings and suggestions:
$(test_score "tmp/$srpm/$TSUM")
Please take the appropriate measures to fix the “$srpm” package.
I will warn you again if it is still necessary next time I am ran.
This report was generated by the repo-font-audit command from:
http://fedoraproject.org/wiki/fontpackages
Please post questions, suggestions, patches or bug reports to:
https://admin.fedoraproject.org/mailman/listinfo/fonts
(subscription required)
Your friendly QA robot,
--
repo-font-audit
EOF
cd tmp
tar cf "$srpm.tar" "$srpm"
cd ..
xz -9 "tmp/$srpm.tar"
mv "tmp/$srpm.tar.xz" spam
done
cat > spam/send-messages.sh << EOF
#!/bin/sh
# Send warnings to problem package owners
# This is a bit distribution-specific, people from other distributions are
# welcome to suggest how to make it more agnostic
#EMAIL="repo-font-audit <your@mail>"
#REPLYTO=another@mail
#export EMAIL REPLYTO
for srpm in \\
$(awk -F '|' 'FNR>1 { print $2 " \\" }' "$TSUM" | uniq)
; do
tar -xf \$srpm.tar.xz */message.txt -O \\
| mutt -s "[RFA] Your \$srpm $ID package did not pass QA" \\
-a \$srpm.tar.xz -- \$srpm-owner@fedoraproject.org
sleep 5
done
EOF
chmod +x spam/send-messages.sh
}
pack_data() {
echo "Packing result data…"
mv spam "$SPAM"
mkdir -p "$RES/data"
cp "$TSUM" summary.txt report.txt "$FL" "$RES/"
cp -pr data/* "$RES/data" 2>/dev/null
mkdir "$SRES/"
cp "$TSUM" summary.txt report.txt "$SRES/"
for report in "$RES" "$SRES" "$SPAM" ; do
tar cf "$report.tar" "$report"
xz -9 "$report.tar"
mv "$report.tar.xz" "$ORIGDIR/"
done
echo ""
echo "Audit complete!"
awk 'NR > 1' "$FL" > "tmp/$FL"
end_time=$(date "+%s")
t=$(($end_time-$start_time))
th=$(($t/(60*60)))
tm=$((($t/60)-(th*60)))
ts=$(($t-(($th*60+$tm)*60)))
[ $th -gt 0 ] && elapsed_time="$th h"
if [ $tm -gt 0 ] ; then
[ "$elapsed_time" != "" ] \
&& elapsed_time="$elapsed_time $tm min" \
|| elapsed_time="$tm min"
fi
if [ $ts -gt 0 ] ; then
[ "$elapsed_time" != "" ] \
&& elapsed_time="$elapsed_time $ts s" \
|| elapsed_time="$ts s"
fi
(
cat << EOF
Run time: $elapsed_time.
Number of items processed:
$(summary "tmp/$FL")
1. Extracted data: $ORIGDIR/$RES.tar.xz
2. Short summary: $ORIGDIR/$SRES.tar.xz
3. Mail data: $ORIGDIR/$SPAM.tar.xz
This report was generated by the repo-font-audit command from:
http://fedoraproject.org/wiki/fontpackages
Please post questions, suggestions, patches or bug reports to:
https://admin.fedoraproject.org/mailman/listinfo/fonts
(subscription required)
EOF
) | pretty_indent
}
# End of function declarations
[ "$#" -lt "2" ] && usage
TIMESTAMP=$(date -u +%Y%m%dT%H%M%SZ)
ID=$1
REPOID="$1-rfa-$TIMESTAMP"
REPOURL=$(echo "$2" | fix_url)
ORIGDIR="$PWD"
FPL="font-packages.csv"
PWFL="packages-with-fonts.csv"
XPL="package-using-x11.csv"
CSL="checksums.csv"
FL="consolidated-data.csv"
TSUM="test-summary.csv"
RES="repo-font-audit-$ID-$TIMESTAMP"
SRES="repo-font-audit-$ID-$TIMESTAMP-short"
SPAM="repo-font-audit-$ID-$TIMESTAMP-mail"
if [ -x /usr/bin/dnf ] ; then
CREATEREPO='createrepo_c'
REPOQUERY='dnf repoquery'
YUM='dnf'
else
CREATEREPO='createrepo'
REPOQUERY='repoquery'
YUM='yum'
fi
TMPDIR=$(mktemp -d --tmpdir=/tmp $RES-XXXXXXXXXX)
cd $TMPDIR
mkdir tmp
collect
consolidate
analyse | tee report.txt
sum_up | tee summary.txt
prepare_spam
pack_data
cd "$ORIGDIR"
rm -fr "$TMPDIR"
echo "♻"