|
Packit |
575503 |
BEGIN {
|
|
Packit |
575503 |
false = 0
|
|
Packit |
575503 |
true = 1
|
|
Packit |
575503 |
|
|
Packit |
575503 |
fpat[1] = "([^,]*)|(\"[^\"]+\")"
|
|
Packit |
575503 |
fpat[2] = fpat[1]
|
|
Packit |
575503 |
fpat[3] = fpat[1]
|
|
Packit |
575503 |
fpat[4] = "aa+"
|
|
Packit |
575503 |
fpat[5] = fpat[4]
|
|
Packit |
575503 |
fpat[6] = "[a-z]"
|
|
Packit |
575503 |
|
|
Packit |
575503 |
data[1] = "Robbins,,Arnold,"
|
|
Packit |
575503 |
data[2] = "Smith,,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
|
|
Packit |
575503 |
data[3] = "Robbins,Arnold,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
|
|
Packit |
575503 |
data[4] = "bbbaaacccdddaaaaaqqqq"
|
|
Packit |
575503 |
data[5] = "bbbaaacccdddaaaaaqqqqa" # should get trailing qqqa
|
|
Packit |
575503 |
data[6] = "aAbBcC"
|
|
Packit |
575503 |
|
|
Packit |
575503 |
for (i = 1; i in data; i++) {
|
|
Packit |
575503 |
printf("Splitting: <%s>\n", data[i])
|
|
Packit |
575503 |
n = mypatsplit(data[i], fields, fpat[i], seps)
|
|
Packit |
575503 |
m = patsplit(data[i], fields2, fpat[i], seps2)
|
|
Packit |
575503 |
print "n =", n, "m =", m
|
|
Packit |
575503 |
if (n != m) {
|
|
Packit |
575503 |
printf("ERROR: counts wrong!\n") > "/dev/stderr"
|
|
Packit |
575503 |
exit 1
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
for (j = 1; j <= n; j++) {
|
|
Packit |
575503 |
printf("fields[%d] = <%s>\tfields2[%d] = <%s>\n", j, fields[j], j, fields2[j])
|
|
Packit |
575503 |
if (fields[j] != fields2[j]) {
|
|
Packit |
575503 |
printf("ERROR: data %d, field %d mismatch!\n", i, j) > "/dev/stderr"
|
|
Packit |
575503 |
exit 1
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
for (j = 0; j in seps; j++) {
|
|
Packit |
575503 |
printf("seps[%d] = <%s>\tseps2[%d] = <%s>\n", j, seps[j], j, seps2[j])
|
|
Packit |
575503 |
if (seps[j] != seps2[j]) {
|
|
Packit |
575503 |
printf("ERROR: data %d, separator %d mismatch!\n", i, j) > "/dev/stderr"
|
|
Packit |
575503 |
exit 1
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
function mypatsplit(string, array, pattern, seps,
|
|
Packit |
575503 |
eosflag, non_empty, nf) # locals
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
delete array
|
|
Packit |
575503 |
delete seps
|
|
Packit |
575503 |
if (length(string) == 0)
|
|
Packit |
575503 |
return 0
|
|
Packit |
575503 |
|
|
Packit |
575503 |
eosflag = non_empty = false
|
|
Packit |
575503 |
nf = 0
|
|
Packit |
575503 |
while (match(string, pattern)) {
|
|
Packit |
575503 |
if (RLENGTH > 0) { # easy case
|
|
Packit |
575503 |
non_empty = true
|
|
Packit |
575503 |
if (! (nf in seps)) {
|
|
Packit |
575503 |
if (RSTART == 1) # match at front of string
|
|
Packit |
575503 |
seps[nf] = ""
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
seps[nf] = substr(string, 1, RSTART - 1)
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
array[++nf] = substr(string, RSTART, RLENGTH)
|
|
Packit |
575503 |
string = substr(string, RSTART+RLENGTH)
|
|
Packit |
575503 |
if (length(string) == 0)
|
|
Packit |
575503 |
break
|
|
Packit |
575503 |
} else if (non_empty) {
|
|
Packit |
575503 |
# last match was non-empty, and at the
|
|
Packit |
575503 |
# current character we get a zero length match,
|
|
Packit |
575503 |
# which we don't want, so skip over it
|
|
Packit |
575503 |
non_empty = false
|
|
Packit |
575503 |
seps[nf] = substr(string, 1, 1)
|
|
Packit |
575503 |
string = substr(string, 2)
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
# 0 length match
|
|
Packit |
575503 |
if (! (nf in seps)) {
|
|
Packit |
575503 |
if (RSTART == 1)
|
|
Packit |
575503 |
seps[nf] = ""
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
seps[nf] = substr(string, 1, RSTART - 1)
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
array[++nf] = ""
|
|
Packit |
575503 |
if (! non_empty && ! eosflag) { # prev was empty
|
|
Packit |
575503 |
seps[nf] = substr(string, 1, 1)
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (RSTART == 1) {
|
|
Packit |
575503 |
string = substr(string, 2)
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
string = substr(string, RSTART + 1)
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
non_empty = false
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (length(string) == 0) {
|
|
Packit |
575503 |
if (eosflag)
|
|
Packit |
575503 |
break
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
eosflag = true
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (length(string) > 0)
|
|
Packit |
575503 |
seps[nf] = string
|
|
Packit |
575503 |
|
|
Packit |
575503 |
return length(array)
|
|
Packit |
575503 |
}
|