|
Packit |
575503 |
# uniq.awk --- do uniq in awk
|
|
Packit |
575503 |
#
|
|
Packit |
575503 |
# Requires getopt() and join() library functions
|
|
Packit |
575503 |
#
|
|
Packit |
575503 |
# Arnold Robbins, arnold@skeeve.com, Public Domain
|
|
Packit |
575503 |
# May 1993
|
|
Packit |
575503 |
|
|
Packit |
575503 |
function usage()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
print("Usage: uniq [-udc [-n]] [+n] [ in [ out ]]") > "/dev/stderr"
|
|
Packit |
575503 |
exit 1
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
# -c count lines. overrides -d and -u
|
|
Packit |
575503 |
# -d only repeated lines
|
|
Packit |
575503 |
# -u only nonrepeated lines
|
|
Packit |
575503 |
# -n skip n fields
|
|
Packit |
575503 |
# +n skip n characters, skip fields first
|
|
Packit |
575503 |
|
|
Packit |
575503 |
BEGIN {
|
|
Packit |
575503 |
count = 1
|
|
Packit |
575503 |
outputfile = "/dev/stdout"
|
|
Packit |
575503 |
opts = "udc0:1:2:3:4:5:6:7:8:9:"
|
|
Packit |
575503 |
while ((c = getopt(ARGC, ARGV, opts)) != -1) {
|
|
Packit |
575503 |
if (c == "u")
|
|
Packit |
575503 |
non_repeated_only++
|
|
Packit |
575503 |
else if (c == "d")
|
|
Packit |
575503 |
repeated_only++
|
|
Packit |
575503 |
else if (c == "c")
|
|
Packit |
575503 |
do_count++
|
|
Packit |
575503 |
else if (index("0123456789", c) != 0) {
|
|
Packit |
575503 |
# getopt() requires args to options
|
|
Packit |
575503 |
# this messes us up for things like -5
|
|
Packit |
575503 |
if (Optarg ~ /^[[:digit:]]+$/)
|
|
Packit |
575503 |
fcount = (c Optarg) + 0
|
|
Packit |
575503 |
else {
|
|
Packit |
575503 |
fcount = c + 0
|
|
Packit |
575503 |
Optind--
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
} else
|
|
Packit |
575503 |
usage()
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (ARGV[Optind] ~ /^\+[[:digit:]]+$/) {
|
|
Packit |
575503 |
charcount = substr(ARGV[Optind], 2) + 0
|
|
Packit |
575503 |
Optind++
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
for (i = 1; i < Optind; i++)
|
|
Packit |
575503 |
ARGV[i] = ""
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (repeated_only == 0 && non_repeated_only == 0)
|
|
Packit |
575503 |
repeated_only = non_repeated_only = 1
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (ARGC - Optind == 2) {
|
|
Packit |
575503 |
outputfile = ARGV[ARGC - 1]
|
|
Packit |
575503 |
ARGV[ARGC - 1] = ""
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
function are_equal( n, m, clast, cline, alast, aline)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
if (fcount == 0 && charcount == 0)
|
|
Packit |
575503 |
return (last == $0)
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (fcount > 0) {
|
|
Packit |
575503 |
n = split(last, alast)
|
|
Packit |
575503 |
m = split($0, aline)
|
|
Packit |
575503 |
clast = join(alast, fcount+1, n)
|
|
Packit |
575503 |
cline = join(aline, fcount+1, m)
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
clast = last
|
|
Packit |
575503 |
cline = $0
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (charcount) {
|
|
Packit |
575503 |
clast = substr(clast, charcount + 1)
|
|
Packit |
575503 |
cline = substr(cline, charcount + 1)
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
return (clast == cline)
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
NR == 1 {
|
|
Packit |
575503 |
last = $0
|
|
Packit |
575503 |
next
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
equal = are_equal()
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (do_count) { # overrides -d and -u
|
|
Packit |
575503 |
if (equal)
|
|
Packit |
575503 |
count++
|
|
Packit |
575503 |
else {
|
|
Packit |
575503 |
printf("%4d %s\n", count, last) > outputfile
|
|
Packit |
575503 |
last = $0
|
|
Packit |
575503 |
count = 1 # reset
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
next
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (equal)
|
|
Packit |
575503 |
count++
|
|
Packit |
575503 |
else {
|
|
Packit |
575503 |
if ((repeated_only && count > 1) ||
|
|
Packit |
575503 |
(non_repeated_only && count == 1))
|
|
Packit |
575503 |
print last > outputfile
|
|
Packit |
575503 |
last = $0
|
|
Packit |
575503 |
count = 1
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
END {
|
|
Packit |
575503 |
if (do_count)
|
|
Packit |
575503 |
printf("%4d %s\n", count, last) > outputfile
|
|
Packit |
575503 |
else if ((repeated_only && count > 1) ||
|
|
Packit |
575503 |
(non_repeated_only && count == 1))
|
|
Packit |
575503 |
print last > outputfile
|
|
Packit |
575503 |
close(outputfile)
|
|
Packit |
575503 |
}
|