Blob Blame History Raw
#!/bin/sh
# Quick and dirty script to audit font use in a package repository
#
# It is slow, it is ugly, and it requires a good network connection

# Function declarations

usage() {
cat >&2 << EOF_USAGE
Usage: $0 <id> <location>
<id>:       identifier of the package repository to check
<location>: location of the package repository to check

Examples:
$0 tmp /tmp/rpm
$0 tmp file://tmp/rpm
$0 rawhide http://example.com/mirrors/fedora.redhat.com/fedora/linux/development/x86_64/os/

EOF_USAGE
exit 1
}

DATADIR="$(dirname $0)/../private"

# Filenames with whitespace or & + makefile = !!!
bad_pattern="[[:space:]&\:]"

# Tools and humans make mistakes; try to fix some obvious ones
fix_url() {
sed 's=^file:\(\/*\)=/=' | sed 's=^\(ftp\|http\|https\):\(\/*\)=\1://='
}

pretty_indent() {
  fold -s -w $(($(tput cols) - 2)) \
  | while read line ; do echo "  $line" ; done
}

# $1 = CSV file to evaluate
tally() {

awk -F "|" '
  {
    filesize   += $17
    filenumber += 1
    rpmsz[$3 "-" $4 "." $5] = $6
    srpms[$2] = 1
  }
  END {
    for (rpm in rpmsz) {
      rpmsize   += rpmsz[rpm]
      rpmnumber +=1
    }
    for (srpm in srpms) {
      srpmnumber +=1
    }
    rpmsize  = rpmsize  / (1024*1024)
    filesize = filesize / (1024*1024)
    print filenumber "|" rpmnumber "|" srpmnumber "|" filesize "|" rpmsize
  }' "$1"
}


summary() {
if [ $(cat "$1" | wc -l) -gt 0 ] ; then
  tally "$1" | awk -F '|' '{ print "⇒ " $1 " file(s) (" $4 " MiB) in " $2 \
        " package(s) (" $5 " MiB) generated from " $3 " source package(s)." }'\
        | pretty_indent
else
  echo "⇒  None!" | pretty_indent
fi
echo ""
}


# $1 = summary file
# $2 = temporary data file
# $3 = test id
complete_csv_summary() {
  awk -F '|' -v t_datafile="$2" -v testlabel="$3" \
      'FILENAME==t_datafile { fail[$1"|"$2"|"$3"|"$4"|"$5"|"$10] = 1 }
       FILENAME!=t_datafile && FNR==1 {
        max = NF + 1
        header = $1
        for (i = 2 ; i <= max ; i++) {
           header = header "|" $i
         }
        print header testlabel
       }
       FILENAME!=t_datafile && FNR>1 {
         line = $1 "|" $2 "|" $3 "|" $4 "|" $5 "|" $6
         newline = $1
         for (i = 2 ; i <= max ; i++) {
           newline = newline "|" $i
         }
         print newline fail[line]
         processed[line] = 1
       }
       END {
         for ( line in fail ) {
           if ( ! processed[line] ) {
             newline = line
             for (i = 7 ; i <= max ; i++) {
               newline = newline "|"
             }
             print newline fail[line]
           }
         }
       }' "$2" "$1" > tmp/tmp.ccs.csv
  awk 'FNR==1' tmp/tmp.ccs.csv > "$1"
  awk 'FNR>1'  tmp/tmp.ccs.csv | sort -f -t '|' >> "$1"
  rm tmp/tmp.ccs.csv
}


substats() {
ss_datafile="$1"

awk -F '|' '{ print $NF }' "$ss_datafile" | sort | uniq \
  | while read key ; do
    echo -n "$key|"
    awk -F '|' -v key="$key" '$NF==key' "$ss_datafile" > tmp/tmp.ss.csv
    tally tmp/tmp.ss.csv
    rm tmp/tmp.ss.csv
  done
}


# $1 = CSV file to sum_up
stats() {
s_datafile="$1"

summary "$s_datafile"

if [ $(cat "$s_datafile" | wc -l) -gt 0 ] ; then
  (echo "Format|Files|rpm|srpm|Files (MiB)|rpm (MiB)"
  awk -F '|' '$13 != "" { print $0 "|" $15 }' "$s_datafile" > tmp/tmp.s.csv
  substats tmp/tmp.s.csv) | column -t -s '|' | pretty_indent
  echo ""

  (echo "Arch|Files|rpm|srpm|Files (MiB)|rpm (MiB)"
  awk -F '|' '{ print $0 "|" $5 }' "$s_datafile" > tmp/tmp.s.csv
  substats tmp/tmp.s.csv)| column -t -s '|' | pretty_indent
  echo ""

  rm tmp/tmp.s.csv
fi
}


# $1 = CSV file to sum_up
list_rpm() {

[ $(cat "$1" | wc -l) -gt 0 ] && \
awk -F '|' '
  {
    rpm = $3
    if ( $7 == "M" ) { rpm = "[" rpm "]" }
    total[rpm] += 1
  }
  END {
    n = asorti(total, sorted)
    for ( i = 1 ; i <= n ; i++ ) {
      line = line sorted[i] "(" total[sorted[i]] "), "
    }
    sub(", $","",line)
    print line
  }' "$1" | pretty_indent

}

# Pipe CSV file to sum_up
# It is assumed to be of the form
# rpm | pattern to match | pattern to display
# patterns may include # as separators
enum_rpm() {
awk -F '|' '
  BEGIN { print "" }
  {
    rpm = $1
    key = $2
    pat = $3
    files[key] = files[key] pat "#" rpm "|"
  }
  END {
    for (key in files) {
      sub("\\|$", "", files[key])
      total = split(files[key], lines,"|")
      pb[total] = pb[total] key "|"
    }
    n = asorti(pb, sorted)
    for (i = n; i >= 1; i--) {
      total = sorted[i]
      sub("\\|$", "", pb[total])
      split(pb[total], keys,"|")
      nk = asort(keys, sortedkeys)
      for ( k = 1 ; k <= nk ; k++ ) {
        key = sortedkeys[k]
        print gensub("#", ", ", "g", key)
        split(files[key], lines, "|")
        nl = asort(lines, sortedlines)
        for ( l = 1 ; l <= nl ; l++ ) {
          print "  " gensub("#", "\t", "G", lines[l])
        }
        print ""
      }
    }
  }' | pretty_indent
}

# $1 = this test id
# $2 = csv file with the results of this test
# $3 = csv file that consolidates the results of all tests
test_wrapup() {

complete_csv_summary "$3" "$2" "$1"
summary "$2"

}


# $1 = csv file with test results
test_score() {
if [ $(cat "$1" | wc -l) -gt 1 ] ; then
  awk -F '|' '
    BEGIN {
      lh=7
      headerline = "P#"
    }
    FNR==1 {
      max = NF
    }
    FNR>1 {
      id = ""
      for (i = 1 ; i < (lh-1) ; i++) {
        if ($i != "") id = id $i "|"
        else          id = id   " |"
      }
      sub("\\|$","",id)
      fail[id] = 1
      for (i = lh ; i <= max ; i++) {
        score[id"|"i] = score[id"|"i] + $i
        totalscore[i] = totalscore[i] + $i
      }
    }
    END {
      for (i = lh ; i <= max ; i++) {
          if ( totalscore[i] ) { headerline  = headerline "|" "t" i+1-lh }
        }
      print headerline
      n = asorti(fail)
      for (l = 1; l <= n; l++) {
        newline = l
        for (c = lh ; c <= max ; c++) {
          if ( totalscore[c] ) {
            if ( score[fail[l]"|"c] )
              { newline = newline "|" score[fail[l]"|"c] }
            else
              { newline = newline "|‧" }
          }
        }
        print newline
      }
      totalline = "Total"
      for (i = lh ; i <= max ; i++) {
        if ( totalscore[i] ) { totalline = totalline "|"  totalscore[i] }
      }
      print totalline
    }' "$1" | column -t -s '|'

  echo ""

  awk -F '|' '
    BEGIN { lh=7 }
    FNR>1 {
      id = ""
      for (i = 1 ; i < (lh-1) ; i++) {
        if ($i != "") id = id $i "|"
        else          id = id   " |"
      }
      sub("\\|$","",id)
      fail[id] = 1
    }
    END {
      print "P#|Maintainer|SRPM|RPM|EVR|Arch"
      n = asorti(fail)
      for (l = 1; l <= n; l++) {
      print l "|" fail[l]
      }
    }' "$1" | column -t -s '|'

  echo ""
  echo "Test explanation:"
  echo ""

  awk -F '|' '
    BEGIN { lh = 7 }
    FNR==1 {
      max = NF
      for (i = lh ; i <= max ; i++) {
        legend[i] = i+1-lh "|" $i }
      }
    FNR>1 {
      for (i = lh ; i <= max ; i++) {
        totalscore[i] = totalscore[i] + $i
      }
    }
    END {
      for (i = lh ; i <= max ; i++) {
        if ( totalscore[i] ) { print legend[i] }
      }
    }' "$1" | while read line ; do
      testnumber=$(echo $line | awk -F '|' '{ print "t"$1 }')
      testid=$(echo $line | awk -F '|' '{ print $2 }')
      echo -n "$testnumber. "
      "$DATADIR/test-info" $testid title
      echo ""
      ( "$DATADIR/test-info" $testid help ) | pretty_indent
    done

else
  echo "⇒  None!"
fi

}


collect() {
start_time=$(date "+%s")

# This is very ugly but the safest syntax to use with any repoquery version
FREPOURL=$(echo "$REPOURL" | sed 's=^/=file:///=')

echo "Looking for packages:"
echo "— with font metadata…"
$REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
          --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|M" \
          --whatprovides "font(*)" --quiet \
          | sort | uniq \
          | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
          > "$FPL"

echo "— that include files with common font extensions…"
$REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
          --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|E" \
          -f '*.ttf' -f '*.otf' -f '*.ttc' \
          -f '*.pfb' -f '*.pfa' \
          -f '*.pcf.gz' -f '*.pcf' -f '*.bdf' --quiet \
          | sort | uniq \
          | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
          > "$PWFL"

echo "— that use the core X11 protocol…"
$REPOQUERY --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
          --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|X" \
          --whatrequires 'libX11.so*' \
          | sort | uniq \
          | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
          > "$XPL"

if [ $(cat "$FPL" "$PWFL" "$XPL" | wc -l) -eq 0 ] ; then
  echo "Nothing to do!"
  exit
fi

cat "$FPL" "$PWFL" "$XPL" \
  | awk -F '|' '
      { tag[$1 "|" $2 "|" $3 "|" $4 "|" $5] = tag[$1 "|" $2 "|" $3 "|" $4 "|" $5] $6 "," }
      END {
        n =  asorti(tag,rpm)
        for (l = 1; l <= n; l++) {
          sub(",$", "", tag[rpm[l]])
          print rpm[l] "|" tag[rpm[l]]
        }
      }' > "tmp/$FL.1"

echo ""
echo "Inspecting packages:"
cd "tmp"
rm -f "$FL.2"

cat "$FL.1" | while read line ; do
  rpm=$(echo "$line" | awk -F '|' '{ print $2 "-" $3 "." $4 }')
  rpmline=$(echo "$line" | awk -F '|' '{ print $1 "|" $2 "|" $3 "|" $4 "|" $5 }')
  rpmd=$(echo "$rpm" | sed "s+$bad_pattern+_+g")
  echo "$line" | cut -d "|" -f 6 | grep -q "M" && metadata="M" || metadata=""
  echo -n "– $rpm ◔"
  mkdir "$rpmd"
  cd "$rpmd"
  rpm_loc=$($REPOQUERY --repofrompath=$REPOID,$REPOURL --repoid=$REPOID \
                      --location "$rpm" 2>/dev/null \
              | fix_url)
  if $(echo "$rpm_loc" | grep -q -e '^[\./~]') ; then
    ln -s "$rpm_loc" .
  else
    curl -O --silent --show-error --retry 3 --connect-timeout 10 --retry-delay 15 --url "$rpm_loc"
  fi
  echo -ne "\b◑"
  rpm2cpio *.rpm > "$rpm.cpio"
  echo -ne "\b◕"
  cat "$rpm.cpio" | cpio --quiet -it > "$rpm.lst"
  nofont_score=$(cat "$rpm.lst" | sed 's=^\./=/=g' \
    | grep -viE -e "^/usr/share/fonts" \
                -e "^/usr/share/fontconfig/conf.avail/" \
                -e "^/etc/fonts/conf.d/" \
                -e "^/usr/share/doc/" \
                -e "^/etc/X11/fontpath.d/" \
                -e '\.((ttf)|(ttc)|(otf)|(pfa)|(pfb)|(bdf)|(pcf)|(pcf\.gz))$' \
    | wc -l)
  echo "$rpmline|$metadata|$nofont_score" > rpm-info.txt
  if $(echo $line | cut -d "|" -f 6 | grep -q "E") ; then
    cat "$rpm.lst" \
      | grep -iE '\.((ttf)|(ttc)|(otf)|(pfa)|(pfb)|(bdf)|(pcf)|(pcf\.gz))$' \
      | sort > "$rpm.fonts.lst"
  fi
  if $(echo $line | cut -d "|" -f 6 | grep -q "X") ; then
    cat "$rpm.lst" \
      | grep -i  -e "^./sbin/" \
                 -e "^./usr/sbin/" \
                 -e "^./usr/kerberos/sbin" \
                 -e "^./bin/" \
                 -e "^./usr/bin/" \
                 -e "^./usr/kerberos/bin/" \
                 -e "^./lib.*/" \
                 -e "^./usr/lib.*/" \
                 -e "^./opt/" \
                 -e "^./usr/X11R6/" \
                 -e "^./usr/games/" \
                 -e "^./usr/local/" \
      | grep -vi -e "^./usr/bin/dmxwininfo" \
                 -e "^./usr/bin/Xdmx" \
                 -e "^./usr/bin/xfontsel" \
                 -e "^./usr/bin/xlsfonts" \
                 -e "^./usr/bin/Xnest" \
                 -e "^./usr/bin/xprop" \
                 -e "^./usr/bin/xsetroot" \
                 -e "^./usr/bin/xwininfo" \
                 -e "^./usr/bin/x11vnc" \
                 -e "^./usr/bin/x2vnc" \
                 -e "^./usr/lib.*/libXcursor.so" \
      | sort > "$rpm.bin.lst"
  fi
  touch "$rpm.fonts.lst" "$rpm.bin.lst"
  cat "$rpm.fonts.lst" "$rpm.bin.lst"  | sort | uniq > "$rpm.files.lst"
  mkdir src
  cd src
  cpio -idm --quiet -E "../$rpm.files.lst" < "../$rpm.cpio"
  cd ..
  rm "$rpm.cpio"
  echo -ne "\b● "
  sed 's=^\./==g' "$rpm.fonts.lst" \
    | while read file; do
    unset target
    type=$(file -bzh "src/$file")
    case $(echo "$type" | sed 's+ (\(.*\)++g' \
                             | sed 's+ `\(.*\)++g' \
                             | sed 's+,\(.*\)++g' \
                             | sed 's+\( \)*$++g' ) in
      "TrueType font data")
        echo -n "t"
        ;;
      "TrueType font collection data")
        echo -n "T"
        ;;
      "OpenType font data")
        echo -n "o"
        ;;
      "X11 Portable Compiled Font data")
        echo -n "b"
        ;;
      "X11 BDF font text")
        echo -n "B"
        ;;
      # PostScript files are a mess
      "PostScript Type 1 font text")
        echo -n "P"
        ;;
      "PostScript Type 1 font program data")
        echo -n "p"
        ;;
      "PostScript document text conforming DSC level 3.0")
        type="ignored"
        echo -n "-"
        ;;
      "PostScript document text"|"8086 relocatable")
        echo -n "!" >&2
        ;;
      "symbolic link to"|"broken symbolic link to")
        target=$(readlink -m "src/$file" | sed "s+^$PWD/src++g")
        if $(echo "$target" | grep -q "^/usr/share/fonts") ; then
          type="Link"
          echo -n "l"
        else
          type="ignored"
          echo -n "-"
        fi
        ;;
      *)
        echo -n "?"
        ;;
    esac
    if [ "$type" != "ignored" -a ! -h "src/$file" ] ; then
       mkdir -p $(dirname "tmp/$file")
       echo "/$file|$type" > "tmp/$file.rfo.fonts.info"
       if [ "$type" != "TrueType font data" -a \
            "$type" != "OpenType font data" ] ; then
         touch "tmp/$file.rfo.unicover"
       fi
    fi
    if [ "$type" = "Link" ] ; then
      echo "/$file|$target" >> font-links.txt
    fi
  done

  sed 's=^\./==g' "$rpm.bin.lst" \
    | while read file ; do
    type=$(file -bzh "src/$file")
    if $(echo "$type" | grep -q -e '^ELF ' -e ' ELF ') ; then
      echo -n "‧"
      mkdir -p $(dirname "tmp/$file")
      echo "/$file|$type" > "tmp/$file.rfo.core-fonts.info"
    fi
  done

  echo ""
  cd ..
done

echo ""
echo "Analysing files…"

find . -type d | grep "$bad_pattern" \
  | while read dir ; do
      mkdir -p $(echo "$dir" | sed "s+$bad_pattern+_+g")
    done

find . -type f | grep "$bad_pattern" \
  | while read file ; do
      mv "$file" $(echo "$file"| sed "s+$bad_pattern+_+g")
    done

# There must be a cleaner way to do this
parallelism=$(($(cat /proc/cpuinfo | grep processor | wc -l)+1))
make -s -r -j $parallelism -f "$DATADIR/repo-font-audit.mk"

find .  -name "*\.rfo\.fonts" \
     -o -name "*\.rfo\.core-fonts" \
     -o -name processed-font-links.txt \
  | while read datafile ; do
      cat "$datafile" >> "$FL.2"
    done

mkdir ../data
for rpmlint in */rpmlint.txt ; do
  mkdir "../data/$(dirname $rpmlint)"
  mv "$rpmlint" "../data/$rpmlint"
done

for ext in fontlint unicover fc-query ; do
  find . -name "*\.rfo\.$ext" \
     | while read file ; do
          dest=$(echo "$file" \
            | sed "s+./\([^/]*\)/tmp/\(.*\).rfo.$ext+\1|\2.$ext.txt+" \
            | sed 's+/+_+g' | sed 's+|+/+g')
          [ -s "$file" ] && mv "$file" "../data/$dest"
       done
done

rm -fr "*/"
echo " ♻"
cd ..
}


consolidate() {
echo ""
echo "Consolidating data…"

echo "Maintainer|SRPM|RPM|EVR|Arch|RPM size|RPM metadata|foreign data|\
rpmlint score|filename|family name|face name|fixed family name|\
fixed face name|format (fontconfig)|format (libmagic)|file size|checksum|\
partial scripts|partial blocks|fontlint|symlink target|symlink provider|\
core fonts use" > "$FL"

if  [ -s "tmp/$FL.2" -a -r "tmp/$FL.2" ] ; then

  # FIXME: make this distro-agnostic
  cut -d "|" -f 1 "tmp/$FL.2" | sort | uniq \
    | "" 2>/dev/null \
    | sed 's+ +|+g' \
    | awk -F "|" -v file="tmp/$FL.2" '
        { owner[$2]=$1 }
        END {
          while ((getline < file) > 0) {
            print owner[$1] "|" $0
          }
        }' | sort -df -t "|" -k 1 -k 2 -k 3 > "tmp/$FL.3"

  awk -F "|" '
    ($16 != "Link") { source[$10] = $3 "-" $4
                      out[$0] = $0 "|" }
    ($16 == "Link") { link[$0] = $22 }
    END {
      for (line in link) {
        out[line] = line "|" source[link[line]]
      }
      n = asort(out)
      for (i = 1; i <= n; i++) print out[i]
    }' "tmp/$FL.3" > "tmp/$FL.4"

  cat "tmp/$FL.4" \
    | sort -df -t "|" -k 1 -k 2 -k 3 -k 4n -k 13 -k 14 -k 10 \
    | sed 's=$=|=g' | sed 's=|0|=||=g' | sed 's=|0|=||=g' | sed 's=|\+$==g' \
    >> "$FL"

fi

echo "Maintainer|SRPM|RPM|EVR|Arch|File" > "$TSUM"
}


analyse() {

echo ""
echo "Conducting tests:"
echo ""


echo -n "— "
"$DATADIR/test-info" outside-usr-share-fonts title }

awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
            ($10 !~ /^\/usr\/share\/fonts\//)' "$FL" \
  | sort -df -t "|" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup outside-usr-share-fonts tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" without-rpm-metadata title

awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
            ($7 != "M")' "$FL" \
  | sort -df -t "|" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup without-rpm-metadata tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" family-mixing title

awk -F '|' '
  (NR > 1) && ($11 != "") {
    rpm = $3 "-" $4 "." $5
    files[rpm] = files[rpm] $0 "#"
    if ( ! family[rpm] ) {
      family[rpm] = $11
    }
    else {
      if (family[rpm] != $11) {
        mixed[rpm] = 1
      }
    }
  }
  END {
    for (rpm in mixed) {
      sub("#$", "", files[rpm])
      split(files[rpm], lines,"#")
      for (l in lines) {
        print lines[l]
      }
    }
  }' "$FL" \
  | sort -df -t "|" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup family-mixing tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" duplicated-file title

awk -F '|' '
  (NR > 1) && ($24 != "X") && ($16 != "Link") {
    files[$18] = files[$18] $0 "#"
  }
  END {
    for (checksum in files) {
      sub("#$", "", files[checksum])
      count = split(files[checksum], lines,"#")
      if (count > 1) {
        for (l in lines) {
          print lines[l]
        }
      }
    }
  }' "$FL" \
  | sort -g -t "|" -k 18 > tmp/tmp.csv

[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
  {
    rpm = $3
    if ( $7 == "M" ) { rpm = "[" rpm "]" }
    sub(".*/", "", $10)
    if ( $11 != "" ) { sig = $11 }
    if ( $12 != "") {
      if ( sig != "" ) { sig = sig ", " }
      sig = sig $12
    }
    if ( sig != "" ) { sig = "#(" sig ")" }
    print rpm "|" $18 "|" $10 sig
  }' tmp/tmp.csv | enum_rpm

test_wrapup duplicated-file tmp/tmp.csv "$TSUM"

echo -n "— "
"$DATADIR/test-info" duplicated-face-ext title

awk -F '|' '
  (NR > 1) && ($11 != "") && ($12 != "") {
    face = $11 "|" $12
    rpm  = $3  "-" $4 "." $5
    files[face] = files[face] $0 "#"
    if ( ! loc[face] ) { loc[face] = rpm }
    else {
      if (loc[face] != rpm) {
        duplicated[face] = 1
      }
    }
  }
  END {
    for (face in duplicated) {
      sub("#$", "", files[face])
      split(files[face], lines,"#")
      for (l in lines) {
        print lines[l]
      }
    }
  }' "$FL" \
  | sort -df -t "|" -k 11 -k 12 > tmp/tmp.csv

[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
  {
    rpm = $3
    if ( $7 == "M" ) { rpm = "[" rpm "]" }
    sub(".*/", "", $10)
    print rpm "|" $11 ", " $12 "|" $10
  }' tmp/tmp.csv | enum_rpm

test_wrapup duplicated-face-ext tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" fc-query title

awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
            ($15 == "")' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup fc-query tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" libmagic title

awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
            (tolower(" " $16 " ") !~ / font /)' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup libmagic tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" broken-symlink title

awk -F '|' '(NR > 1) && ($16 == "Link") && ($23 == "")' "$FL" > tmp/tmp.csv

[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
  {
    rpm = $3
    if ( $7 == "M" ) { rpm = "[" rpm "]" }
    sub(".*/", "", $10)
    print rpm "|" $10 " ↛ " $22
  }' tmp/tmp.csv | enum_rpm

test_wrapup broken-symlink tmp/tmp.csv "$TSUM"

echo -n "— "
"$DATADIR/test-info" rpmlint title

awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
            ($9 != "")' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup rpmlint tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" mixed-with-non-font-data title

awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
            ($8 != "")' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup mixed-with-non-font-data tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" arch-package title

awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
            ($5 != "noarch")' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup arch-package tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" bad-rpm-naming title

awk -F "|" '(NR > 1) && ($16 != "Link") && ($24!="X") && \
            $3 !~ /^[0-9abcdefghijklmnopqrstuvwxyz\.-]*-fonts$/' "$FL" \
            > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup bad-rpm-naming tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" bad-naming title

awk -F '|' '(NR > 1) && \
            ((tolower($11)!=tolower($13)) || (tolower($12)!=tolower($14)))' \
    "$FL" > tmp/tmp.csv

[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
  {
    rpm = $3
    if ( $7 == "M" ) { rpm = "[" rpm "]" }
    sub(".*/", "", $10)
    print rpm "|" $11 ", " $12 " → " $13 ", " $14 "|" $10
  }' tmp/tmp.csv | enum_rpm

test_wrapup bad-naming tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" core-fonts title

awk -F '|' '(NR > 1) && ($24=="X")' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup core-fonts tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" font-linking title

awk -F '|' '(NR > 1) && ($16=="Link")' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup font-linking tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" duplicated-face-int title

awk -F '|' '
  (NR > 1) && ($24!="X") && \
    ($15 != "PCF") && ($15 != "Type 1") && \
    ($11 != "") && ($12 != "") {
    sig = $3 "|" $4 "|" $5 "|" $11 "|" $12
    files[sig] = files[sig] $0 "#"
  }
  END {
    for (sig in files) {
      sub("#$", "", files[sig])
      n = split(files[sig],lines,"#")
      if (n > 1) {
        for (l in lines) { print lines[l] }
      }
    }
  }' "$FL" \
  | sort -df -t "|" -k 11 -k 12 -k 3 -k 2 > tmp/tmp.csv

[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
  {
    rpm = $3
    if ( $7 == "M" ) { rpm = "[" rpm "]" }
    sub(".*/", "", $10)
    print rpm "|" $11 ", " $12 "|" $10
  }' tmp/tmp.csv | enum_rpm

test_wrapup duplicated-face-int tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" fontlint title

awk -F '|' '(NR > 1) && ($24!="X") && ($21 > 0)' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup fontlint tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" no-english-metadata title

awk -F '|' '(NR > 1) && ($24!="X") && \
            ($15 != "") && (($11 == "") || ($12 == ""))' "$FL" > tmp/tmp.csv

[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
awk -F '|' '
  {
    rpm = $3 "-" $4 "." $5
    if ( $7 == "M" ) { rpm = "[" rpm "]" }
    print $10 "|" rpm
  }' tmp/tmp.csv \
  | column -t -s '|' | pretty_indent

test_wrapup no-english-metadata tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" partial-scripts title

awk -F '|' '(NR > 1) && ($24!="X") && ($19 > 0)' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup partial-scripts tmp/tmp.csv "$TSUM"


echo -n "— "
"$DATADIR/test-info" partial-blocks title

awk -F '|' '(NR > 1) && ($24!="X") && ($20 > 0)' "$FL" > tmp/tmp.csv

list_rpm tmp/tmp.csv
test_wrapup partial-blocks tmp/tmp.csv "$TSUM"
}


sum_up() {
echo ""
echo "Audit results:"
echo ""

echo "– packages that declare font metadata:"

awk -F '|' '(NR > 1) && ($7 == "M") && ($24!="X")' "$FL" > tmp/tmp.csv
stats tmp/tmp.csv

echo "☛ File size is computed as extracted, while rpm is a compressed \
format." | pretty_indent
echo "☛ Mid-term, files in legacy PCF or Type1 formats need to be converted \
or removed." | pretty_indent
echo ""

echo "– font files in other packages (we should not find any!)"

awk -F '|' '(NR > 1) && ($7 != "M") && ($16 != "Link") && ($24!="X")' "$FL" > tmp/tmp.csv
stats tmp/tmp.csv

[ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
echo "☛ Bad packaging may result in arched packages or mixed content." \
| pretty_indent && echo ""

echo "– errors, warnings and suggestions:"
echo ""

test_score "$TSUM"

}


prepare_spam() {
echo ""
echo "Packing mail data…"

mkdir spam

for srpm in $(awk -F '|' 'FNR>1 { print $2 }' "$TSUM" | uniq) ; do
   [ ! -d "tmp/$srpm" ] && mkdir "tmp/$srpm"
  cp -p report.txt "tmp/$srpm/repo-report.txt"
  for file in "$FL" "$TSUM" ; do
    awk 'FNR==1' "$file" > "tmp/$srpm/$file"
    awk -F '|' -v srpm="$srpm" 'FNR>1 && $2==srpm' "$file" >> "tmp/$srpm/$file"
  done
  for rpm in $(awk -F '|' 'FNR>1 { print $3 "-" $4 "." $5 }' "tmp/$srpm/$TSUM" | uniq | sed "s+$bad_pattern+_+g") ; do
    cp -pr "data/$rpm" "tmp/$srpm"
  done
  cat > tmp/$srpm/message.txt << EOF
Dear packager,

At $TIMESTAMP, your “$srpm” package failed one or more of the tests
I was performing on the “$ID” repository located at:
$REPOURL

There are three different reasons that may cause this message:
1. your package is including one or more font files, but not packaging
   them properly;
2. your package is including one or more font files, and I've found
   issues in some of them;
3. your package is not shipping any font file, but the way it accesses
   fonts in other packages is not satisfying.

To stop receiving this message, you need to:
1. drop the font files or fix their packaging;
2. relay the fonts issues to the fonts upstream to get them revised;
3. work with the code upstream to improve the way it accesses font
   files (usually by making it use fontconfig through a higher-level
   text library such as pango, pango-cairo, harfbuzz, or QT)

You can self-check your packages at any time by:
1. installing $CREATEREPO and fontpackages-tools:
# $YUM install $CREATEREPO fontpackages-tools
2. putting your packages and any font package they depends on in a
   test directory
3. indexing this directory with $CREATEREPO:
$ $CREATEREPO path-to-test-directory
4. running repo-font-audit:
$ repo-font-audit test absolute-path-to-test-directory

A summary of the issues I detected is appended here. For your
convenience a more comprehensive analysis is also attached to this
message.

Errors, warnings and suggestions:

$(test_score "tmp/$srpm/$TSUM")

Please take the appropriate measures to fix the “$srpm” package.
I will warn you again if it is still necessary next time I am ran.

This report was generated by the repo-font-audit command from:
http://fedoraproject.org/wiki/fontpackages

Please post questions, suggestions, patches or bug reports to:
https://admin.fedoraproject.org/mailman/listinfo/fonts
(subscription required)

Your friendly QA robot,

-- 
repo-font-audit
EOF
  cd tmp
  tar cf "$srpm.tar" "$srpm"
  cd ..
  xz -9  "tmp/$srpm.tar"
  mv "tmp/$srpm.tar.xz" spam
done
cat > spam/send-messages.sh << EOF
#!/bin/sh
# Send warnings to problem package owners
# This is a bit distribution-specific, people from other distributions are
# welcome to suggest how to make it more agnostic

#EMAIL="repo-font-audit <your@mail>"
#REPLYTO=another@mail

#export EMAIL REPLYTO

for srpm in \\
$(awk -F '|' 'FNR>1 { print $2 " \\" }' "$TSUM" | uniq)
; do
  tar -xf \$srpm.tar.xz */message.txt -O \\
    | mutt -s "[RFA] Your \$srpm $ID package did not pass QA" \\
           -a \$srpm.tar.xz -- \$srpm-owner@fedoraproject.org
  sleep 5
done

EOF
chmod +x spam/send-messages.sh
}

pack_data() {
echo "Packing result data…"


mv spam "$SPAM"

mkdir -p "$RES/data"
cp "$TSUM" summary.txt report.txt "$FL" "$RES/"
cp -pr data/* "$RES/data" 2>/dev/null

mkdir "$SRES/"
cp "$TSUM" summary.txt report.txt "$SRES/"

for report in "$RES" "$SRES" "$SPAM" ; do
  tar cf "$report.tar" "$report"
  xz -9  "$report.tar"
  mv "$report.tar.xz" "$ORIGDIR/"
done

echo ""
echo "Audit complete!"

awk 'NR > 1' "$FL" > "tmp/$FL"

end_time=$(date "+%s")
t=$(($end_time-$start_time))
th=$(($t/(60*60)))
tm=$((($t/60)-(th*60)))
ts=$(($t-(($th*60+$tm)*60)))
[ $th -gt 0 ] && elapsed_time="$th h"
if [ $tm -gt 0 ] ; then
  [ "$elapsed_time" != "" ] \
    && elapsed_time="$elapsed_time $tm min" \
    || elapsed_time="$tm min"
fi
if [ $ts -gt 0 ] ; then
  [ "$elapsed_time" != "" ] \
    && elapsed_time="$elapsed_time $ts s" \
    || elapsed_time="$ts s"
fi

(
cat << EOF

Run time: $elapsed_time.
Number of items processed:
$(summary "tmp/$FL")

1. Extracted data: $ORIGDIR/$RES.tar.xz
2. Short summary: $ORIGDIR/$SRES.tar.xz
3. Mail data: $ORIGDIR/$SPAM.tar.xz

This report was generated by the repo-font-audit command from:
http://fedoraproject.org/wiki/fontpackages

Please post questions, suggestions, patches or bug reports to:
https://admin.fedoraproject.org/mailman/listinfo/fonts
(subscription required)
EOF
) | pretty_indent

}

# End of function declarations

[ "$#" -lt "2" ] && usage

TIMESTAMP=$(date -u +%Y%m%dT%H%M%SZ)
ID=$1
REPOID="$1-rfa-$TIMESTAMP"
REPOURL=$(echo "$2" | fix_url)
ORIGDIR="$PWD"

FPL="font-packages.csv"
PWFL="packages-with-fonts.csv"
XPL="package-using-x11.csv"
CSL="checksums.csv"
FL="consolidated-data.csv"
TSUM="test-summary.csv"
RES="repo-font-audit-$ID-$TIMESTAMP"
SRES="repo-font-audit-$ID-$TIMESTAMP-short"
SPAM="repo-font-audit-$ID-$TIMESTAMP-mail"

if [ -x /usr/bin/dnf ] ; then
    CREATEREPO='createrepo_c'
    REPOQUERY='dnf repoquery'
    YUM='dnf'
else
    CREATEREPO='createrepo'
    REPOQUERY='repoquery'
    YUM='yum'
fi
TMPDIR=$(mktemp -d --tmpdir=/tmp $RES-XXXXXXXXXX)
cd $TMPDIR

mkdir tmp

collect
consolidate
analyse | tee report.txt
sum_up  | tee summary.txt
prepare_spam
pack_data

cd "$ORIGDIR"
rm -fr "$TMPDIR"
echo "♻"