Blob Blame History Raw
#!/bin/sh
# Implements a font name cleanup heuristic similar to the one described in
# http://blogs.msdn.com/text/attachment/2249036.ashx

usage() {
cat >&2 << EOF_USAGE
Usage: $0 <family> <style>
<family>: font family to test
<style>:  font style to test

Example:
$0 "DejaVu Sans" "Condensed Bold Oblique"

EOF_USAGE
exit 1
}

fix_naming() {
awk --re-interval -v family="$1" -v face="$2" '
  BEGIN {
    # regular is a slant but is special-cased in naming
    attrs = "regular|slant|stretch|weight"

    blocks["regular"] = "Regular"
    blocks["slant"]   = "Italic|Oblique"
    blocks["stretch"] = "UltraCondensed|ExtraCondensed|SemiCondensed|SemiExpanded|ExtraExpanded|UltraExpanded|Condensed|Expanded"
    blocks["weight"]  = "Thin1|ExtraLight|DemiBold1|ExtraBold1|Bold|Thin2|Light|Medium|ExtraBlack|Black|DemiBold2|ExtraBold2"

    aliases["Regular"]  = "Book|Normal|Regular|Roman|Upright"

    aliases["Italic"]  = "ita|ital|Italic|cursive|kursiv"
    aliases["Oblique"] = "inclined|Oblique|backslanted|backslant|slanted"

    aliases["UltraCondensed"] = "extra compressed|ext compressed|ultra compressed|ultra condensed|ultra cond|UltraCondensed"
    aliases["ExtraCondensed"] = "compressed|extra condensed|ext condensed|extra cond|ext cond|ExtraCondensed"
    aliases["SemiCondensed"]  = "narrow|compact|semi condensed|semi cond|SemiCondensed"
    aliases["SemiExpanded"]   = "wide|semi expanded|semi extended|SemiExpanded"
    aliases["ExtraExpanded"]  = "extra expanded|ext expanded|extra extended|ext extended|ExtraExpanded"
    aliases["UltraExpanded"]  = "ultra expanded|ultra extended|UltraExpanded"
    aliases["Condensed"]    = "Condensed|cond"
    aliases["Expanded"]     = "extended|Expanded|extended"

    aliases["Thin1"]    = "extra thin|ext thin|ultra thin"
    aliases["ExtraLight"] = "extra light|ext light|ultra light|UltraLight|ExtraLight"
    aliases["DemiBold1"]  = "semi bold|demi bold|SemiBold|DemiBold"
    aliases["ExtraBold1"] = "extra bold|ext bold|ultra bold|UltraBold|ExtraBold"
    aliases["Bold"]     = "Bold"
    aliases["Thin2"]    = "thin"
    aliases["Light"]    = "Light"
    aliases["Medium"]     = "Medium"
    aliases["ExtraBlack"] = "extra black|ext black|ultra black|UltraBlack|ExtraBlack"
    aliases["Black"]    = "Black|heavy|nord"
    aliases["DemiBold2"]  = "demi"
    aliases["ExtraBold2"] = "ultra"

    nat = split(attrs,attributes,"|")
    for ( at = 1 ; at <= nat ; at++ ) {
      nb = split(blocks[attributes[at]],b_l,"|")
      for ( b = 1 ; b <= nb ; b++ ) {
        nal = split(tolower(aliases[b_l[b]]),al_l,"|")
        aliases[b_l[b]] = ""
        for ( al = 1 ; al <= nal ; al++ ) {
          # WWS uses reverse-matching everywhere
          nw = split(al_l[al], words," ")
          for (w = nw ; w >= 1 ; w-- ) {
            aliases[b_l[b]] = aliases[b_l[b]] words[w] " "
          }
          sub(" $", "|", aliases[b_l[b]])
        }
        sub("\\|$", "", aliases[b_l[b]])
      }
    }

    nf = split(family "|" face, s, "|")
    # This is more aggressive than demanded by WWS
    for ( i = 1 ; i <= nf ; i++ ) {
      fs[i] = gensub("&", " and ", "g", s[i])
      fs[i] = gensub("\\([[:digit:]]){3}", " ", "g", fs[i])
      fs[i] = gensub("([[:digit:]]+)", " \\1 ", "g", fs[i])
      fs[i] = gensub("([[:lower:]])([[:upper:]])([[:lower:]])", "\\1 \\2\\3", "g", fs[i])
      fs[i] = gensub("[ \t_\\.-]+", " ", "g", fs[i])
      fs[i] = tolower(fs[i])
      rfs[i] = ""
      nw = split(fs[i], words, " ")
      for (w = nw ; w >= 1 ; w-- ) {
        rfs[i] = rfs[i] words[w] " "
      }
      sub("^"," ",rfs[i])
    }

    rffamily = rfs[1]
    rfface = rfs[2]

    # Remove the rightmost occurrence of a regular face name
    nrfface = rfface
    control = nrfface
    nb = split(blocks[attributes[1]],b_l,"|")
    b = 1
    while ((b <= nb) && (nrfface == control)) {
      nal = split(aliases[b_l[b]],al_l,"|")
      al = 1
      while ((al <= nal) && (nrfface == control)) {
        sub(" " al_l[al] " ", " ", nrfface)
        al++
      }
      b++
    }
    # We should canonalize Regular too but too many fonts get it wrong and
    # it is not terribly important
    if (nrfface != control) {
      canonical_attribute["regular"] = toupper(substr(al_l[al-1], 1,1)) substr(al_l[al-1], 2)
    }

    # Append FaceName to FontFamily
    sub(nrfface, " ", rffamily)
    nrffont = gensub("  ", " ", "g", nrfface rffamily)

    # Process other qualifiers
    for ( at = 2 ; at <= nat ; at++ ) {
      canonical_attribute[attributes[at]] = ""
      control = nrffont
      nb = split(blocks[attributes[at]],b_l,"|")
      b = 1
      while ((b <= nb) && (nrffont == control)) {
        nal = split(aliases[b_l[b]],al_l,"|")
        al = 1
        while ((al <= nal) && (nrffont == control)) {
          sub(" " al_l[al] " ", " ", nrffont)
          al++
        }
        b++
      }
      if (nrffont != control) {
        canonical_attribute[attributes[at]] = gensub("[[:digit:]]", "", "g", b_l[b-1])
      }
    }

    cnface = canonical_attribute["stretch"] " " canonical_attribute["weight"] " " canonical_attribute["slant"]
    sub("  ", " ", cnface)
    sub(" $", "", cnface)
    sub("^ ", "", cnface)
    if (cnface == "") {
      if ( canonical_attribute["regular"] != "" )
           { cnface = canonical_attribute["regular"] }
      else { cnface = "Regular" }
    }

    # Make sure there are no stray attributes left
    for ( at = 1 ; at <= nat ; at++ ) {
      nb = split(blocks[attributes[at]],b_l,"|")
      for ( b = 1 ; b <= nb ; b++ ) {
        nal = split(aliases[b_l[b]],al_l,"|")
        for ( al = 1 ; al <= nal ; al++ ) {
          sub(" " al_l[al] " ", " ", nrffont)
        }
      }
    }

    cnfamily = ""
    nw = split(nrffont, words, " ")
    for (w = nw ; w >= 1 ; w-- ) {
      cnfamily = cnfamily toupper(substr(words[w], 1,1)) substr(words[w], 2) " "
    }
    sub(" $", "", cnfamily)

    # Keep the original family name if it matches since some of the
    # operations performed before, while necessary for attribute detection,
    # can be needlessly destructive on legitimate family names
    nfamily = gensub("([[:digit:]]+)", " \\1 ", "g", family)
    nfamily = gensub("([[:lower:]])([[:upper:]])([[:lower:]])", "\\1 \\2\\3", "g", nfamily)
    sub("-", " ", nfamily)
    sub(" +", " ", nfamily)
    sub(" $", "", nfamily)
    sub("^ ", "", nfamily)

    if ( tolower(nfamily) == tolower(cnfamily) ) { cnfamily = family }

    print cnfamily "\t" cnface
  }'

}

[ "$#" -lt "2" ] && usage
fix_naming "$1" "$2"