Blame awklib/eg/prog/uniq.awk

Packit 575503
# uniq.awk --- do uniq in awk
Packit 575503
#
Packit 575503
# Requires getopt() and join() library functions
Packit 575503
#
Packit 575503
# Arnold Robbins, arnold@skeeve.com, Public Domain
Packit 575503
# May 1993
Packit 575503
Packit 575503
function usage()
Packit 575503
{
Packit 575503
    print("Usage: uniq [-udc [-n]] [+n] [ in [ out ]]") > "/dev/stderr"
Packit 575503
    exit 1
Packit 575503
}
Packit 575503
Packit 575503
# -c    count lines. overrides -d and -u
Packit 575503
# -d    only repeated lines
Packit 575503
# -u    only nonrepeated lines
Packit 575503
# -n    skip n fields
Packit 575503
# +n    skip n characters, skip fields first
Packit 575503
Packit 575503
BEGIN {
Packit 575503
    count = 1
Packit 575503
    outputfile = "/dev/stdout"
Packit 575503
    opts = "udc0:1:2:3:4:5:6:7:8:9:"
Packit 575503
    while ((c = getopt(ARGC, ARGV, opts)) != -1) {
Packit 575503
        if (c == "u")
Packit 575503
            non_repeated_only++
Packit 575503
        else if (c == "d")
Packit 575503
            repeated_only++
Packit 575503
        else if (c == "c")
Packit 575503
            do_count++
Packit 575503
        else if (index("0123456789", c) != 0) {
Packit 575503
            # getopt() requires args to options
Packit 575503
            # this messes us up for things like -5
Packit 575503
            if (Optarg ~ /^[[:digit:]]+$/)
Packit 575503
                fcount = (c Optarg) + 0
Packit 575503
            else {
Packit 575503
                fcount = c + 0
Packit 575503
                Optind--
Packit 575503
            }
Packit 575503
        } else
Packit 575503
            usage()
Packit 575503
    }
Packit 575503
Packit 575503
    if (ARGV[Optind] ~ /^\+[[:digit:]]+$/) {
Packit 575503
        charcount = substr(ARGV[Optind], 2) + 0
Packit 575503
        Optind++
Packit 575503
    }
Packit 575503
Packit 575503
    for (i = 1; i < Optind; i++)
Packit 575503
        ARGV[i] = ""
Packit 575503
Packit 575503
    if (repeated_only == 0 && non_repeated_only == 0)
Packit 575503
        repeated_only = non_repeated_only = 1
Packit 575503
Packit 575503
    if (ARGC - Optind == 2) {
Packit 575503
        outputfile = ARGV[ARGC - 1]
Packit 575503
        ARGV[ARGC - 1] = ""
Packit 575503
    }
Packit 575503
}
Packit 575503
function are_equal(    n, m, clast, cline, alast, aline)
Packit 575503
{
Packit 575503
    if (fcount == 0 && charcount == 0)
Packit 575503
        return (last == $0)
Packit 575503
Packit 575503
    if (fcount > 0) {
Packit 575503
        n = split(last, alast)
Packit 575503
        m = split($0, aline)
Packit 575503
        clast = join(alast, fcount+1, n)
Packit 575503
        cline = join(aline, fcount+1, m)
Packit 575503
    } else {
Packit 575503
        clast = last
Packit 575503
        cline = $0
Packit 575503
    }
Packit 575503
    if (charcount) {
Packit 575503
        clast = substr(clast, charcount + 1)
Packit 575503
        cline = substr(cline, charcount + 1)
Packit 575503
    }
Packit 575503
Packit 575503
    return (clast == cline)
Packit 575503
}
Packit 575503
NR == 1 {
Packit 575503
    last = $0
Packit 575503
    next
Packit 575503
}
Packit 575503
Packit 575503
{
Packit 575503
    equal = are_equal()
Packit 575503
Packit 575503
    if (do_count) {    # overrides -d and -u
Packit 575503
        if (equal)
Packit 575503
            count++
Packit 575503
        else {
Packit 575503
            printf("%4d %s\n", count, last) > outputfile
Packit 575503
            last = $0
Packit 575503
            count = 1    # reset
Packit 575503
        }
Packit 575503
        next
Packit 575503
    }
Packit 575503
Packit 575503
    if (equal)
Packit 575503
        count++
Packit 575503
    else {
Packit 575503
        if ((repeated_only && count > 1) ||
Packit 575503
            (non_repeated_only && count == 1))
Packit 575503
                print last > outputfile
Packit 575503
        last = $0
Packit 575503
        count = 1
Packit 575503
    }
Packit 575503
}
Packit 575503
Packit 575503
END {
Packit 575503
    if (do_count)
Packit 575503
        printf("%4d %s\n", count, last) > outputfile
Packit 575503
    else if ((repeated_only && count > 1) ||
Packit 575503
            (non_repeated_only && count == 1))
Packit 575503
        print last > outputfile
Packit 575503
    close(outputfile)
Packit 575503
}