#!/bin/sh # Quick and dirty script to audit font use in a package repository # # It is slow, it is ugly, and it requires a good network connection # Function declarations usage() { cat >&2 << EOF_USAGE Usage: $0 : identifier of the package repository to check : location of the package repository to check Examples: $0 tmp /tmp/rpm $0 tmp file://tmp/rpm $0 rawhide http://example.com/mirrors/fedora.redhat.com/fedora/linux/development/x86_64/os/ EOF_USAGE exit 1 } DATADIR="$(dirname $0)/../private" # Filenames with whitespace or & + makefile = !!! bad_pattern="[[:space:]&\:]" # Tools and humans make mistakes; try to fix some obvious ones fix_url() { sed 's=^file:\(\/*\)=/=' | sed 's=^\(ftp\|http\|https\):\(\/*\)=\1://=' } pretty_indent() { fold -s -w $(($(tput cols) - 2)) \ | while read line ; do echo " $line" ; done } # $1 = CSV file to evaluate tally() { awk -F "|" ' { filesize += $17 filenumber += 1 rpmsz[$3 "-" $4 "." $5] = $6 srpms[$2] = 1 } END { for (rpm in rpmsz) { rpmsize += rpmsz[rpm] rpmnumber +=1 } for (srpm in srpms) { srpmnumber +=1 } rpmsize = rpmsize / (1024*1024) filesize = filesize / (1024*1024) print filenumber "|" rpmnumber "|" srpmnumber "|" filesize "|" rpmsize }' "$1" } summary() { if [ $(cat "$1" | wc -l) -gt 0 ] ; then tally "$1" | awk -F '|' '{ print "⇒ " $1 " file(s) (" $4 " MiB) in " $2 \ " package(s) (" $5 " MiB) generated from " $3 " source package(s)." }'\ | pretty_indent else echo "⇒ None!" | pretty_indent fi echo "" } # $1 = summary file # $2 = temporary data file # $3 = test id complete_csv_summary() { awk -F '|' -v t_datafile="$2" -v testlabel="$3" \ 'FILENAME==t_datafile { fail[$1"|"$2"|"$3"|"$4"|"$5"|"$10] = 1 } FILENAME!=t_datafile && FNR==1 { max = NF + 1 header = $1 for (i = 2 ; i <= max ; i++) { header = header "|" $i } print header testlabel } FILENAME!=t_datafile && FNR>1 { line = $1 "|" $2 "|" $3 "|" $4 "|" $5 "|" $6 newline = $1 for (i = 2 ; i <= max ; i++) { newline = newline "|" $i } print newline fail[line] processed[line] = 1 } END { for ( line in fail ) { if ( ! processed[line] ) { newline = line for (i = 7 ; i <= max ; i++) { newline = newline "|" } print newline fail[line] } } }' "$2" "$1" > tmp/tmp.ccs.csv awk 'FNR==1' tmp/tmp.ccs.csv > "$1" awk 'FNR>1' tmp/tmp.ccs.csv | sort -f -t '|' >> "$1" rm tmp/tmp.ccs.csv } substats() { ss_datafile="$1" awk -F '|' '{ print $NF }' "$ss_datafile" | sort | uniq \ | while read key ; do echo -n "$key|" awk -F '|' -v key="$key" '$NF==key' "$ss_datafile" > tmp/tmp.ss.csv tally tmp/tmp.ss.csv rm tmp/tmp.ss.csv done } # $1 = CSV file to sum_up stats() { s_datafile="$1" summary "$s_datafile" if [ $(cat "$s_datafile" | wc -l) -gt 0 ] ; then (echo "Format|Files|rpm|srpm|Files (MiB)|rpm (MiB)" awk -F '|' '$13 != "" { print $0 "|" $15 }' "$s_datafile" > tmp/tmp.s.csv substats tmp/tmp.s.csv) | column -t -s '|' | pretty_indent echo "" (echo "Arch|Files|rpm|srpm|Files (MiB)|rpm (MiB)" awk -F '|' '{ print $0 "|" $5 }' "$s_datafile" > tmp/tmp.s.csv substats tmp/tmp.s.csv)| column -t -s '|' | pretty_indent echo "" rm tmp/tmp.s.csv fi } # $1 = CSV file to sum_up list_rpm() { [ $(cat "$1" | wc -l) -gt 0 ] && \ awk -F '|' ' { rpm = $3 if ( $7 == "M" ) { rpm = "[" rpm "]" } total[rpm] += 1 } END { n = asorti(total, sorted) for ( i = 1 ; i <= n ; i++ ) { line = line sorted[i] "(" total[sorted[i]] "), " } sub(", $","",line) print line }' "$1" | pretty_indent } # Pipe CSV file to sum_up # It is assumed to be of the form # rpm | pattern to match | pattern to display # patterns may include # as separators enum_rpm() { awk -F '|' ' BEGIN { print "" } { rpm = $1 key = $2 pat = $3 files[key] = files[key] pat "#" rpm "|" } END { for (key in files) { sub("\\|$", "", files[key]) total = split(files[key], lines,"|") pb[total] = pb[total] key "|" } n = asorti(pb, sorted) for (i = n; i >= 1; i--) { total = sorted[i] sub("\\|$", "", pb[total]) split(pb[total], keys,"|") nk = asort(keys, sortedkeys) for ( k = 1 ; k <= nk ; k++ ) { key = sortedkeys[k] print gensub("#", ", ", "g", key) split(files[key], lines, "|") nl = asort(lines, sortedlines) for ( l = 1 ; l <= nl ; l++ ) { print "  " gensub("#", "\t", "G", lines[l]) } print "" } } }' | pretty_indent } # $1 = this test id # $2 = csv file with the results of this test # $3 = csv file that consolidates the results of all tests test_wrapup() { complete_csv_summary "$3" "$2" "$1" summary "$2" } # $1 = csv file with test results test_score() { if [ $(cat "$1" | wc -l) -gt 1 ] ; then awk -F '|' ' BEGIN { lh=7 headerline = "P#" } FNR==1 { max = NF } FNR>1 { id = "" for (i = 1 ; i < (lh-1) ; i++) { if ($i != "") id = id $i "|" else id = id " |" } sub("\\|$","",id) fail[id] = 1 for (i = lh ; i <= max ; i++) { score[id"|"i] = score[id"|"i] + $i totalscore[i] = totalscore[i] + $i } } END { for (i = lh ; i <= max ; i++) { if ( totalscore[i] ) { headerline = headerline "|" "t" i+1-lh } } print headerline n = asorti(fail) for (l = 1; l <= n; l++) { newline = l for (c = lh ; c <= max ; c++) { if ( totalscore[c] ) { if ( score[fail[l]"|"c] ) { newline = newline "|" score[fail[l]"|"c] } else { newline = newline "|‧" } } } print newline } totalline = "Total" for (i = lh ; i <= max ; i++) { if ( totalscore[i] ) { totalline = totalline "|" totalscore[i] } } print totalline }' "$1" | column -t -s '|' echo "" awk -F '|' ' BEGIN { lh=7 } FNR>1 { id = "" for (i = 1 ; i < (lh-1) ; i++) { if ($i != "") id = id $i "|" else id = id " |" } sub("\\|$","",id) fail[id] = 1 } END { print "P#|Maintainer|SRPM|RPM|EVR|Arch" n = asorti(fail) for (l = 1; l <= n; l++) { print l "|" fail[l] } }' "$1" | column -t -s '|' echo "" echo "Test explanation:" echo "" awk -F '|' ' BEGIN { lh = 7 } FNR==1 { max = NF for (i = lh ; i <= max ; i++) { legend[i] = i+1-lh "|" $i } } FNR>1 { for (i = lh ; i <= max ; i++) { totalscore[i] = totalscore[i] + $i } } END { for (i = lh ; i <= max ; i++) { if ( totalscore[i] ) { print legend[i] } } }' "$1" | while read line ; do testnumber=$(echo $line | awk -F '|' '{ print "t"$1 }') testid=$(echo $line | awk -F '|' '{ print $2 }') echo -n "$testnumber. " "$DATADIR/test-info" $testid title echo "" ( "$DATADIR/test-info" $testid help ) | pretty_indent done else echo "⇒ None!" fi } collect() { start_time=$(date "+%s") # This is very ugly but the safest syntax to use with any repoquery version FREPOURL=$(echo "$REPOURL" | sed 's=^/=file:///=') echo "Looking for packages:" echo "— with font metadata…" $REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \ --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|M" \ --whatprovides "font(*)" --quiet \ | sort | uniq \ | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \ > "$FPL" echo "— that include files with common font extensions…" $REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \ --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|E" \ -f '*.ttf' -f '*.otf' -f '*.ttc' \ -f '*.pfb' -f '*.pfa' \ -f '*.pcf.gz' -f '*.pcf' -f '*.bdf' --quiet \ | sort | uniq \ | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \ > "$PWFL" echo "— that use the core X11 protocol…" $REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \ --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|X" \ --whatrequires 'libX11.so*' \ | sort | uniq \ | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \ > "$XPL" if [ $(cat "$FPL" "$PWFL" "$XPL" | wc -l) -eq 0 ] ; then echo "Nothing to do!" exit fi cat "$FPL" "$PWFL" "$XPL" \ | awk -F '|' ' { tag[$1 "|" $2 "|" $3 "|" $4 "|" $5] = tag[$1 "|" $2 "|" $3 "|" $4 "|" $5] $6 "," } END { n = asorti(tag,rpm) for (l = 1; l <= n; l++) { sub(",$", "", tag[rpm[l]]) print rpm[l] "|" tag[rpm[l]] } }' > "tmp/$FL.1" echo "" echo "Inspecting packages:" cd "tmp" rm -f "$FL.2" cat "$FL.1" | while read line ; do rpm=$(echo "$line" | awk -F '|' '{ print $2 "-" $3 "." $4 }') rpmline=$(echo "$line" | awk -F '|' '{ print $1 "|" $2 "|" $3 "|" $4 "|" $5 }') rpmd=$(echo "$rpm" | sed "s+$bad_pattern+_+g") echo "$line" | cut -d "|" -f 6 | grep -q "M" && metadata="M" || metadata="" echo -n "– $rpm ◔" mkdir "$rpmd" cd "$rpmd" rpm_loc=$($REPOQUERY --repofrompath=$REPOID,$REPOURL --repoid=$REPOID \ --location "$rpm" 2>/dev/null \ | fix_url) if $(echo "$rpm_loc" | grep -q -e '^[\./~]') ; then ln -s "$rpm_loc" . else curl -O --silent --show-error --retry 3 --connect-timeout 10 --retry-delay 15 --url "$rpm_loc" fi echo -ne "\b◑" rpm2cpio *.rpm > "$rpm.cpio" echo -ne "\b◕" cat "$rpm.cpio" | cpio --quiet -it > "$rpm.lst" nofont_score=$(cat "$rpm.lst" | sed 's=^\./=/=g' \ | grep -viE -e "^/usr/share/fonts" \ -e "^/usr/share/fontconfig/conf.avail/" \ -e "^/etc/fonts/conf.d/" \ -e "^/usr/share/doc/" \ -e "^/etc/X11/fontpath.d/" \ -e '\.((ttf)|(ttc)|(otf)|(pfa)|(pfb)|(bdf)|(pcf)|(pcf\.gz))$' \ | wc -l) echo "$rpmline|$metadata|$nofont_score" > rpm-info.txt if $(echo $line | cut -d "|" -f 6 | grep -q "E") ; then cat "$rpm.lst" \ | grep -iE '\.((ttf)|(ttc)|(otf)|(pfa)|(pfb)|(bdf)|(pcf)|(pcf\.gz))$' \ | sort > "$rpm.fonts.lst" fi if $(echo $line | cut -d "|" -f 6 | grep -q "X") ; then cat "$rpm.lst" \ | grep -i -e "^./sbin/" \ -e "^./usr/sbin/" \ -e "^./usr/kerberos/sbin" \ -e "^./bin/" \ -e "^./usr/bin/" \ -e "^./usr/kerberos/bin/" \ -e "^./lib.*/" \ -e "^./usr/lib.*/" \ -e "^./opt/" \ -e "^./usr/X11R6/" \ -e "^./usr/games/" \ -e "^./usr/local/" \ | grep -vi -e "^./usr/bin/dmxwininfo" \ -e "^./usr/bin/Xdmx" \ -e "^./usr/bin/xfontsel" \ -e "^./usr/bin/xlsfonts" \ -e "^./usr/bin/Xnest" \ -e "^./usr/bin/xprop" \ -e "^./usr/bin/xsetroot" \ -e "^./usr/bin/xwininfo" \ -e "^./usr/bin/x11vnc" \ -e "^./usr/bin/x2vnc" \ -e "^./usr/lib.*/libXcursor.so" \ | sort > "$rpm.bin.lst" fi touch "$rpm.fonts.lst" "$rpm.bin.lst" cat "$rpm.fonts.lst" "$rpm.bin.lst" | sort | uniq > "$rpm.files.lst" mkdir src cd src cpio -idm --quiet -E "../$rpm.files.lst" < "../$rpm.cpio" cd .. rm "$rpm.cpio" echo -ne "\b● " sed 's=^\./==g' "$rpm.fonts.lst" \ | while read file; do unset target type=$(file -bzh "src/$file") case $(echo "$type" | sed 's+ (\(.*\)++g' \ | sed 's+ `\(.*\)++g' \ | sed 's+,\(.*\)++g' \ | sed 's+\( \)*$++g' ) in "TrueType font data") echo -n "t" ;; "TrueType font collection data") echo -n "T" ;; "OpenType font data") echo -n "o" ;; "X11 Portable Compiled Font data") echo -n "b" ;; "X11 BDF font text") echo -n "B" ;; # PostScript files are a mess "PostScript Type 1 font text") echo -n "P" ;; "PostScript Type 1 font program data") echo -n "p" ;; "PostScript document text conforming DSC level 3.0") type="ignored" echo -n "-" ;; "PostScript document text"|"8086 relocatable") echo -n "!" >&2 ;; "symbolic link to"|"broken symbolic link to") target=$(readlink -m "src/$file" | sed "s+^$PWD/src++g") if $(echo "$target" | grep -q "^/usr/share/fonts") ; then type="Link" echo -n "l" else type="ignored" echo -n "-" fi ;; *) echo -n "?" ;; esac if [ "$type" != "ignored" -a ! -h "src/$file" ] ; then mkdir -p $(dirname "tmp/$file") echo "/$file|$type" > "tmp/$file.rfo.fonts.info" if [ "$type" != "TrueType font data" -a \ "$type" != "OpenType font data" ] ; then touch "tmp/$file.rfo.unicover" fi fi if [ "$type" = "Link" ] ; then echo "/$file|$target" >> font-links.txt fi done sed 's=^\./==g' "$rpm.bin.lst" \ | while read file ; do type=$(file -bzh "src/$file") if $(echo "$type" | grep -q -e '^ELF ' -e ' ELF ') ; then echo -n "‧" mkdir -p $(dirname "tmp/$file") echo "/$file|$type" > "tmp/$file.rfo.core-fonts.info" fi done echo "" cd .. done echo "" echo "Analysing files…" find . -type d | grep "$bad_pattern" \ | while read dir ; do mkdir -p $(echo "$dir" | sed "s+$bad_pattern+_+g") done find . -type f | grep "$bad_pattern" \ | while read file ; do mv "$file" $(echo "$file"| sed "s+$bad_pattern+_+g") done # There must be a cleaner way to do this parallelism=$(($(cat /proc/cpuinfo | grep processor | wc -l)+1)) make -s -r -j $parallelism -f "$DATADIR/repo-font-audit.mk" find . -name "*\.rfo\.fonts" \ -o -name "*\.rfo\.core-fonts" \ -o -name processed-font-links.txt \ | while read datafile ; do cat "$datafile" >> "$FL.2" done mkdir ../data for rpmlint in */rpmlint.txt ; do mkdir "../data/$(dirname $rpmlint)" mv "$rpmlint" "../data/$rpmlint" done for ext in fontlint unicover fc-query ; do find . -name "*\.rfo\.$ext" \ | while read file ; do dest=$(echo "$file" \ | sed "s+./\([^/]*\)/tmp/\(.*\).rfo.$ext+\1|\2.$ext.txt+" \ | sed 's+/+_+g' | sed 's+|+/+g') [ -s "$file" ] && mv "$file" "../data/$dest" done done rm -fr "*/" echo " ♻" cd .. } consolidate() { echo "" echo "Consolidating data…" echo "Maintainer|SRPM|RPM|EVR|Arch|RPM size|RPM metadata|foreign data|\ rpmlint score|filename|family name|face name|fixed family name|\ fixed face name|format (fontconfig)|format (libmagic)|file size|checksum|\ partial scripts|partial blocks|fontlint|symlink target|symlink provider|\ core fonts use" > "$FL" if [ -s "tmp/$FL.2" -a -r "tmp/$FL.2" ] ; then # FIXME: make this distro-agnostic cut -d "|" -f 1 "tmp/$FL.2" | sort | uniq \ | /usr/bin/fedoradev-pkgowners 2>/dev/null \ | sed 's+ +|+g' \ | awk -F "|" -v file="tmp/$FL.2" ' { owner[$2]=$1 } END { while ((getline < file) > 0) { print owner[$1] "|" $0 } }' | sort -df -t "|" -k 1 -k 2 -k 3 > "tmp/$FL.3" awk -F "|" ' ($16 != "Link") { source[$10] = $3 "-" $4 out[$0] = $0 "|" } ($16 == "Link") { link[$0] = $22 } END { for (line in link) { out[line] = line "|" source[link[line]] } n = asort(out) for (i = 1; i <= n; i++) print out[i] }' "tmp/$FL.3" > "tmp/$FL.4" cat "tmp/$FL.4" \ | sort -df -t "|" -k 1 -k 2 -k 3 -k 4n -k 13 -k 14 -k 10 \ | sed 's=$=|=g' | sed 's=|0|=||=g' | sed 's=|0|=||=g' | sed 's=|\+$==g' \ >> "$FL" fi echo "Maintainer|SRPM|RPM|EVR|Arch|File" > "$TSUM" } analyse() { echo "" echo "Conducting tests:" echo "" echo -n "— " "$DATADIR/test-info" outside-usr-share-fonts title } awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \ ($10 !~ /^\/usr\/share\/fonts\//)' "$FL" \ | sort -df -t "|" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup outside-usr-share-fonts tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" without-rpm-metadata title awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \ ($7 != "M")' "$FL" \ | sort -df -t "|" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup without-rpm-metadata tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" family-mixing title awk -F '|' ' (NR > 1) && ($11 != "") { rpm = $3 "-" $4 "." $5 files[rpm] = files[rpm] $0 "#" if ( ! family[rpm] ) { family[rpm] = $11 } else { if (family[rpm] != $11) { mixed[rpm] = 1 } } } END { for (rpm in mixed) { sub("#$", "", files[rpm]) split(files[rpm], lines,"#") for (l in lines) { print lines[l] } } }' "$FL" \ | sort -df -t "|" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup family-mixing tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" duplicated-file title awk -F '|' ' (NR > 1) && ($24 != "X") && ($16 != "Link") { files[$18] = files[$18] $0 "#" } END { for (checksum in files) { sub("#$", "", files[checksum]) count = split(files[checksum], lines,"#") if (count > 1) { for (l in lines) { print lines[l] } } } }' "$FL" \ | sort -g -t "|" -k 18 > tmp/tmp.csv [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \ awk -F '|' ' { rpm = $3 if ( $7 == "M" ) { rpm = "[" rpm "]" } sub(".*/", "", $10) if ( $11 != "" ) { sig = $11 } if ( $12 != "") { if ( sig != "" ) { sig = sig ", " } sig = sig $12 } if ( sig != "" ) { sig = "#(" sig ")" } print rpm "|" $18 "|" $10 sig }' tmp/tmp.csv | enum_rpm test_wrapup duplicated-file tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" duplicated-face-ext title awk -F '|' ' (NR > 1) && ($11 != "") && ($12 != "") { face = $11 "|" $12 rpm = $3 "-" $4 "." $5 files[face] = files[face] $0 "#" if ( ! loc[face] ) { loc[face] = rpm } else { if (loc[face] != rpm) { duplicated[face] = 1 } } } END { for (face in duplicated) { sub("#$", "", files[face]) split(files[face], lines,"#") for (l in lines) { print lines[l] } } }' "$FL" \ | sort -df -t "|" -k 11 -k 12 > tmp/tmp.csv [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \ awk -F '|' ' { rpm = $3 if ( $7 == "M" ) { rpm = "[" rpm "]" } sub(".*/", "", $10) print rpm "|" $11 ", " $12 "|" $10 }' tmp/tmp.csv | enum_rpm test_wrapup duplicated-face-ext tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" fc-query title awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \ ($15 == "")' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup fc-query tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" libmagic title awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \ (tolower(" " $16 " ") !~ / font /)' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup libmagic tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" broken-symlink title awk -F '|' '(NR > 1) && ($16 == "Link") && ($23 == "")' "$FL" > tmp/tmp.csv [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \ awk -F '|' ' { rpm = $3 if ( $7 == "M" ) { rpm = "[" rpm "]" } sub(".*/", "", $10) print rpm "|" $10 " ↛ " $22 }' tmp/tmp.csv | enum_rpm test_wrapup broken-symlink tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" rpmlint title awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \ ($9 != "")' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup rpmlint tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" mixed-with-non-font-data title awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \ ($8 != "")' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup mixed-with-non-font-data tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" arch-package title awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \ ($5 != "noarch")' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup arch-package tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" bad-rpm-naming title awk -F "|" '(NR > 1) && ($16 != "Link") && ($24!="X") && \ $3 !~ /^[0-9abcdefghijklmnopqrstuvwxyz\.-]*-fonts$/' "$FL" \ > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup bad-rpm-naming tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" bad-naming title awk -F '|' '(NR > 1) && \ ((tolower($11)!=tolower($13)) || (tolower($12)!=tolower($14)))' \ "$FL" > tmp/tmp.csv [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \ awk -F '|' ' { rpm = $3 if ( $7 == "M" ) { rpm = "[" rpm "]" } sub(".*/", "", $10) print rpm "|" $11 ", " $12 " → " $13 ", " $14 "|" $10 }' tmp/tmp.csv | enum_rpm test_wrapup bad-naming tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" core-fonts title awk -F '|' '(NR > 1) && ($24=="X")' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup core-fonts tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" font-linking title awk -F '|' '(NR > 1) && ($16=="Link")' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup font-linking tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" duplicated-face-int title awk -F '|' ' (NR > 1) && ($24!="X") && \ ($15 != "PCF") && ($15 != "Type 1") && \ ($11 != "") && ($12 != "") { sig = $3 "|" $4 "|" $5 "|" $11 "|" $12 files[sig] = files[sig] $0 "#" } END { for (sig in files) { sub("#$", "", files[sig]) n = split(files[sig],lines,"#") if (n > 1) { for (l in lines) { print lines[l] } } } }' "$FL" \ | sort -df -t "|" -k 11 -k 12 -k 3 -k 2 > tmp/tmp.csv [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \ awk -F '|' ' { rpm = $3 if ( $7 == "M" ) { rpm = "[" rpm "]" } sub(".*/", "", $10) print rpm "|" $11 ", " $12 "|" $10 }' tmp/tmp.csv | enum_rpm test_wrapup duplicated-face-int tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" fontlint title awk -F '|' '(NR > 1) && ($24!="X") && ($21 > 0)' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup fontlint tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" no-english-metadata title awk -F '|' '(NR > 1) && ($24!="X") && \ ($15 != "") && (($11 == "") || ($12 == ""))' "$FL" > tmp/tmp.csv [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \ awk -F '|' ' { rpm = $3 "-" $4 "." $5 if ( $7 == "M" ) { rpm = "[" rpm "]" } print $10 "|" rpm }' tmp/tmp.csv \ | column -t -s '|' | pretty_indent test_wrapup no-english-metadata tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" partial-scripts title awk -F '|' '(NR > 1) && ($24!="X") && ($19 > 0)' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup partial-scripts tmp/tmp.csv "$TSUM" echo -n "— " "$DATADIR/test-info" partial-blocks title awk -F '|' '(NR > 1) && ($24!="X") && ($20 > 0)' "$FL" > tmp/tmp.csv list_rpm tmp/tmp.csv test_wrapup partial-blocks tmp/tmp.csv "$TSUM" } sum_up() { echo "" echo "Audit results:" echo "" echo "– packages that declare font metadata:" awk -F '|' '(NR > 1) && ($7 == "M") && ($24!="X")' "$FL" > tmp/tmp.csv stats tmp/tmp.csv echo "☛ File size is computed as extracted, while rpm is a compressed \ format." | pretty_indent echo "☛ Mid-term, files in legacy PCF or Type1 formats need to be converted \ or removed." | pretty_indent echo "" echo "– font files in other packages (we should not find any!)" awk -F '|' '(NR > 1) && ($7 != "M") && ($16 != "Link") && ($24!="X")' "$FL" > tmp/tmp.csv stats tmp/tmp.csv [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \ echo "☛ Bad packaging may result in arched packages or mixed content." \ | pretty_indent && echo "" echo "– errors, warnings and suggestions:" echo "" test_score "$TSUM" } prepare_spam() { echo "" echo "Packing mail data…" mkdir spam for srpm in $(awk -F '|' 'FNR>1 { print $2 }' "$TSUM" | uniq) ; do [ ! -d "tmp/$srpm" ] && mkdir "tmp/$srpm" cp -p report.txt "tmp/$srpm/repo-report.txt" for file in "$FL" "$TSUM" ; do awk 'FNR==1' "$file" > "tmp/$srpm/$file" awk -F '|' -v srpm="$srpm" 'FNR>1 && $2==srpm' "$file" >> "tmp/$srpm/$file" done for rpm in $(awk -F '|' 'FNR>1 { print $3 "-" $4 "." $5 }' "tmp/$srpm/$TSUM" | uniq | sed "s+$bad_pattern+_+g") ; do cp -pr "data/$rpm" "tmp/$srpm" done cat > tmp/$srpm/message.txt << EOF Dear packager, At $TIMESTAMP, your “$srpm” package failed one or more of the tests I was performing on the “$ID” repository located at: $REPOURL There are three different reasons that may cause this message: 1. your package is including one or more font files, but not packaging them properly; 2. your package is including one or more font files, and I've found issues in some of them; 3. your package is not shipping any font file, but the way it accesses fonts in other packages is not satisfying. To stop receiving this message, you need to: 1. drop the font files or fix their packaging; 2. relay the fonts issues to the fonts upstream to get them revised; 3. work with the code upstream to improve the way it accesses font files (usually by making it use fontconfig through a higher-level text library such as pango, pango-cairo, harfbuzz, or QT) You can self-check your packages at any time by: 1. installing $CREATEREPO and fontpackages-tools: # $YUM install $CREATEREPO fontpackages-tools 2. putting your packages and any font package they depends on in a test directory 3. indexing this directory with $CREATEREPO: $ $CREATEREPO path-to-test-directory 4. running repo-font-audit: $ repo-font-audit test absolute-path-to-test-directory A summary of the issues I detected is appended here. For your convenience a more comprehensive analysis is also attached to this message. Errors, warnings and suggestions: $(test_score "tmp/$srpm/$TSUM") Please take the appropriate measures to fix the “$srpm” package. I will warn you again if it is still necessary next time I am ran. This report was generated by the repo-font-audit command from: http://fedoraproject.org/wiki/fontpackages Please post questions, suggestions, patches or bug reports to: https://admin.fedoraproject.org/mailman/listinfo/fonts (subscription required) Your friendly QA robot, -- repo-font-audit EOF cd tmp tar cf "$srpm.tar" "$srpm" cd .. xz -9 "tmp/$srpm.tar" mv "tmp/$srpm.tar.xz" spam done cat > spam/send-messages.sh << EOF #!/bin/sh # Send warnings to problem package owners # This is a bit distribution-specific, people from other distributions are # welcome to suggest how to make it more agnostic #EMAIL="repo-font-audit " #REPLYTO=another@mail #export EMAIL REPLYTO for srpm in \\ $(awk -F '|' 'FNR>1 { print $2 " \\" }' "$TSUM" | uniq) ; do tar -xf \$srpm.tar.xz */message.txt -O \\ | mutt -s "[RFA] Your \$srpm $ID package did not pass QA" \\ -a \$srpm.tar.xz -- \$srpm-owner@fedoraproject.org sleep 5 done EOF chmod +x spam/send-messages.sh } pack_data() { echo "Packing result data…" mv spam "$SPAM" mkdir -p "$RES/data" cp "$TSUM" summary.txt report.txt "$FL" "$RES/" cp -pr data/* "$RES/data" 2>/dev/null mkdir "$SRES/" cp "$TSUM" summary.txt report.txt "$SRES/" for report in "$RES" "$SRES" "$SPAM" ; do tar cf "$report.tar" "$report" xz -9 "$report.tar" mv "$report.tar.xz" "$ORIGDIR/" done echo "" echo "Audit complete!" awk 'NR > 1' "$FL" > "tmp/$FL" end_time=$(date "+%s") t=$(($end_time-$start_time)) th=$(($t/(60*60))) tm=$((($t/60)-(th*60))) ts=$(($t-(($th*60+$tm)*60))) [ $th -gt 0 ] && elapsed_time="$th h" if [ $tm -gt 0 ] ; then [ "$elapsed_time" != "" ] \ && elapsed_time="$elapsed_time $tm min" \ || elapsed_time="$tm min" fi if [ $ts -gt 0 ] ; then [ "$elapsed_time" != "" ] \ && elapsed_time="$elapsed_time $ts s" \ || elapsed_time="$ts s" fi ( cat << EOF Run time: $elapsed_time. Number of items processed: $(summary "tmp/$FL") 1. Extracted data: $ORIGDIR/$RES.tar.xz 2. Short summary: $ORIGDIR/$SRES.tar.xz 3. Mail data: $ORIGDIR/$SPAM.tar.xz This report was generated by the repo-font-audit command from: http://fedoraproject.org/wiki/fontpackages Please post questions, suggestions, patches or bug reports to: https://admin.fedoraproject.org/mailman/listinfo/fonts (subscription required) EOF ) | pretty_indent } # End of function declarations [ "$#" -lt "2" ] && usage TIMESTAMP=$(date -u +%Y%m%dT%H%M%SZ) ID=$1 REPOID="$1-rfa-$TIMESTAMP" REPOURL=$(echo "$2" | fix_url) ORIGDIR="$PWD" FPL="font-packages.csv" PWFL="packages-with-fonts.csv" XPL="package-using-x11.csv" CSL="checksums.csv" FL="consolidated-data.csv" TSUM="test-summary.csv" RES="repo-font-audit-$ID-$TIMESTAMP" SRES="repo-font-audit-$ID-$TIMESTAMP-short" SPAM="repo-font-audit-$ID-$TIMESTAMP-mail" if [ -x /usr/bin/dnf ] ; then CREATEREPO='createrepo_c' REPOQUERY='dnf repoquery' YUM='dnf' else CREATEREPO='createrepo' REPOQUERY='repoquery' YUM='yum' fi TMPDIR=$(mktemp -d --tmpdir=/tmp $RES-XXXXXXXXXX) cd $TMPDIR mkdir tmp collect consolidate analyse | tee report.txt sum_up | tee summary.txt prepare_spam pack_data cd "$ORIGDIR" rm -fr "$TMPDIR" echo "♻"