Blame test/fpat4.awk

Packit 575503
BEGIN {
Packit 575503
	false = 0
Packit 575503
	true = 1
Packit 575503
Packit 575503
	fpat[1] = "([^,]*)|(\"[^\"]+\")"
Packit 575503
	fpat[2] = fpat[1]
Packit 575503
	fpat[3] = fpat[1]
Packit 575503
	fpat[4] = "aa+"
Packit 575503
	fpat[5] = fpat[4]
Packit 575503
	fpat[6] = "[a-z]"
Packit 575503
Packit 575503
	data[1] = "Robbins,,Arnold,"
Packit 575503
	data[2] = "Smith,,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
Packit 575503
	data[3] = "Robbins,Arnold,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
Packit 575503
	data[4] = "bbbaaacccdddaaaaaqqqq"
Packit 575503
	data[5] = "bbbaaacccdddaaaaaqqqqa" # should get trailing qqqa
Packit 575503
	data[6] = "aAbBcC"
Packit 575503
Packit 575503
	for (i = 1; i in data; i++) {
Packit 575503
		printf("Splitting: <%s>\n", data[i])
Packit 575503
		n = mypatsplit(data[i], fields, fpat[i], seps)
Packit 575503
		m = patsplit(data[i], fields2, fpat[i], seps2)
Packit 575503
		print "n =", n, "m =", m
Packit 575503
		if (n != m) {
Packit 575503
			printf("ERROR: counts wrong!\n") > "/dev/stderr"
Packit 575503
			exit 1
Packit 575503
		}
Packit 575503
		for (j = 1; j <= n; j++) {
Packit 575503
			printf("fields[%d] = <%s>\tfields2[%d] = <%s>\n", j, fields[j], j, fields2[j])
Packit 575503
			if (fields[j] != fields2[j]) {
Packit 575503
				printf("ERROR: data %d, field %d mismatch!\n", i, j) > "/dev/stderr"
Packit 575503
				exit 1
Packit 575503
			}
Packit 575503
		}
Packit 575503
		for (j = 0; j in seps; j++) {
Packit 575503
			printf("seps[%d] = <%s>\tseps2[%d] = <%s>\n", j, seps[j], j, seps2[j])
Packit 575503
			if (seps[j] != seps2[j]) {
Packit 575503
				printf("ERROR: data %d, separator %d mismatch!\n", i, j) > "/dev/stderr"
Packit 575503
				exit 1
Packit 575503
			}
Packit 575503
		}
Packit 575503
	}
Packit 575503
}
Packit 575503
Packit 575503
function mypatsplit(string, array, pattern, seps,
Packit 575503
			eosflag, non_empty, nf) # locals
Packit 575503
{
Packit 575503
	delete array
Packit 575503
	delete seps
Packit 575503
	if (length(string) == 0)
Packit 575503
		return 0
Packit 575503
Packit 575503
	eosflag = non_empty = false
Packit 575503
	nf = 0
Packit 575503
	while (match(string, pattern)) {
Packit 575503
		if (RLENGTH > 0) {	# easy case
Packit 575503
			non_empty = true
Packit 575503
			if (! (nf in seps)) {
Packit 575503
				if (RSTART == 1)	# match at front of string
Packit 575503
					seps[nf] = ""
Packit 575503
				else
Packit 575503
					seps[nf] = substr(string, 1, RSTART - 1)
Packit 575503
			}
Packit 575503
			array[++nf] = substr(string, RSTART, RLENGTH)
Packit 575503
			string = substr(string, RSTART+RLENGTH)
Packit 575503
			if (length(string) == 0)
Packit 575503
				break
Packit 575503
		} else if (non_empty) {
Packit 575503
			# last match was non-empty, and at the
Packit 575503
			# current character we get a zero length match,
Packit 575503
			# which we don't want, so skip over it
Packit 575503
			non_empty = false
Packit 575503
			seps[nf] = substr(string, 1, 1)
Packit 575503
			string = substr(string, 2)
Packit 575503
		} else {
Packit 575503
			# 0 length match
Packit 575503
			if (! (nf in seps)) {
Packit 575503
				if (RSTART == 1)
Packit 575503
					seps[nf] = ""
Packit 575503
				else
Packit 575503
					seps[nf] = substr(string, 1, RSTART - 1)
Packit 575503
			}
Packit 575503
			array[++nf] = ""
Packit 575503
			if (! non_empty && ! eosflag) { # prev was empty
Packit 575503
				seps[nf] = substr(string, 1, 1)
Packit 575503
			}
Packit 575503
			if (RSTART == 1) {
Packit 575503
				string = substr(string, 2)
Packit 575503
			} else {
Packit 575503
				string = substr(string, RSTART + 1)
Packit 575503
			}
Packit 575503
			non_empty = false
Packit 575503
		}
Packit 575503
		if (length(string) == 0) {
Packit 575503
			if (eosflag)
Packit 575503
				break
Packit 575503
			else
Packit 575503
				eosflag = true
Packit 575503
		}
Packit 575503
	}
Packit 575503
	if (length(string) > 0)
Packit 575503
		seps[nf] = string
Packit 575503
Packit 575503
	return length(array)
Packit 575503
}