Blame field.c

Packit Service f629e6
/*
Packit Service f629e6
 * field.c - routines for dealing with fields and record parsing
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * Copyright (C) 1986, 1988, 1989, 1991-2018 the Free Software Foundation, Inc.
Packit Service f629e6
 *
Packit Service f629e6
 * This file is part of GAWK, the GNU implementation of the
Packit Service f629e6
 * AWK Programming Language.
Packit Service f629e6
 *
Packit Service f629e6
 * GAWK is free software; you can redistribute it and/or modify
Packit Service f629e6
 * it under the terms of the GNU General Public License as published by
Packit Service f629e6
 * the Free Software Foundation; either version 3 of the License, or
Packit Service f629e6
 * (at your option) any later version.
Packit Service f629e6
 *
Packit Service f629e6
 * GAWK is distributed in the hope that it will be useful,
Packit Service f629e6
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service f629e6
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service f629e6
 * GNU General Public License for more details.
Packit Service f629e6
 *
Packit Service f629e6
 * You should have received a copy of the GNU General Public License
Packit Service f629e6
 * along with this program; if not, write to the Free Software
Packit Service f629e6
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
#include "awk.h"
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * In case that the system doesn't have isblank().
Packit Service f629e6
 * Don't bother with autoconf ifdef junk, just force it.
Packit Service f629e6
 * See dfa.c and regex_internal.h and regcomp.c. Bleah.
Packit Service f629e6
 */
Packit Service f629e6
static int
Packit Service f629e6
is_blank(int c)
Packit Service f629e6
{
Packit Service f629e6
	return c == ' ' || c == '\t';
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
typedef void (* Setfunc)(long, char *, long, NODE *);
Packit Service f629e6
Packit Service f629e6
/* is the API currently overriding the default parsing mechanism? */
Packit Service f629e6
static bool api_parser_override = false;
Packit Service f629e6
typedef long (*parse_field_func_t)(long, char **, int, NODE *,
Packit Service f629e6
			     Regexp *, Setfunc, NODE *, NODE *, bool);
Packit Service f629e6
static parse_field_func_t parse_field;
Packit Service f629e6
/*
Packit Service f629e6
 * N.B. The normal_parse_field function pointer contains the parse_field value
Packit Service f629e6
 * that should be used except when API field parsing is overriding the default
Packit Service f629e6
 * field parsing mechanism.
Packit Service f629e6
 */
Packit Service f629e6
static parse_field_func_t normal_parse_field;
Packit Service f629e6
static long re_parse_field(long, char **, int, NODE *,
Packit Service f629e6
			     Regexp *, Setfunc, NODE *, NODE *, bool);
Packit Service f629e6
static long def_parse_field(long, char **, int, NODE *,
Packit Service f629e6
			      Regexp *, Setfunc, NODE *, NODE *, bool);
Packit Service f629e6
static long null_parse_field(long, char **, int, NODE *,
Packit Service f629e6
			     Regexp *, Setfunc, NODE *, NODE *, bool);
Packit Service f629e6
static long sc_parse_field(long, char **, int, NODE *,
Packit Service f629e6
			     Regexp *, Setfunc, NODE *, NODE *, bool);
Packit Service f629e6
static long fw_parse_field(long, char **, int, NODE *,
Packit Service f629e6
			     Regexp *, Setfunc, NODE *, NODE *, bool);
Packit Service f629e6
static const awk_fieldwidth_info_t *api_fw = NULL;
Packit Service f629e6
static long fpat_parse_field(long, char **, int, NODE *,
Packit Service f629e6
			     Regexp *, Setfunc, NODE *, NODE *, bool);
Packit Service f629e6
static void set_element(long num, char * str, long len, NODE *arr);
Packit Service f629e6
static void grow_fields_arr(long num);
Packit Service f629e6
static void set_field(long num, char *str, long len, NODE *dummy);
Packit Service f629e6
static void purge_record(void);
Packit Service f629e6
Packit Service f629e6
static char *parse_extent;	/* marks where to restart parse of record */
Packit Service f629e6
static long parse_high_water = 0; /* field number that we have parsed so far */
Packit Service f629e6
static long nf_high_water = 0;	/* size of fields_arr */
Packit Service f629e6
static bool resave_fs;
Packit Service f629e6
static NODE *save_FS;		/* save current value of FS when line is read,
Packit Service f629e6
				 * to be used in deferred parsing
Packit Service f629e6
				 */
Packit Service f629e6
static awk_fieldwidth_info_t *FIELDWIDTHS = NULL;
Packit Service f629e6
Packit Service f629e6
NODE **fields_arr;		/* array of pointers to the field nodes */
Packit Service f629e6
bool field0_valid;		/* $(>0) has not been changed yet */
Packit Service f629e6
int default_FS;			/* true when FS == " " */
Packit Service f629e6
Regexp *FS_re_yes_case = NULL;
Packit Service f629e6
Regexp *FS_re_no_case = NULL;
Packit Service f629e6
Regexp *FS_regexp = NULL;
Packit Service f629e6
Regexp *FPAT_re_yes_case = NULL;
Packit Service f629e6
Regexp *FPAT_re_no_case = NULL;
Packit Service f629e6
Regexp *FPAT_regexp = NULL;
Packit Service f629e6
NODE *Null_field = NULL;
Packit Service f629e6
Packit Service f629e6
/* init_fields --- set up the fields array to start with */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
init_fields()
Packit Service f629e6
{
Packit Service f629e6
	emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
Packit Service f629e6
Packit Service f629e6
	getnode(fields_arr[0]);
Packit Service f629e6
	*fields_arr[0] = *Nnull_string;
Packit Service f629e6
	fields_arr[0]->flags |= NULL_FIELD;
Packit Service f629e6
Packit Service f629e6
	parse_extent = fields_arr[0]->stptr;
Packit Service f629e6
	save_FS = dupnode(FS_node->var_value);
Packit Service f629e6
Packit Service f629e6
	getnode(Null_field);
Packit Service f629e6
	*Null_field = *Nnull_string;
Packit Service f629e6
	Null_field->valref = 1;
Packit Service f629e6
	Null_field->flags = (STRCUR|STRING|NULL_FIELD); /* do not set MALLOC */
Packit Service f629e6
Packit Service f629e6
	field0_valid = true;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* grow_fields --- acquire new fields as needed */
Packit Service f629e6
Packit Service f629e6
static void
Packit Service f629e6
grow_fields_arr(long num)
Packit Service f629e6
{
Packit Service f629e6
	int t;
Packit Service f629e6
	NODE *n;
Packit Service f629e6
Packit Service f629e6
	erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr");
Packit Service f629e6
	for (t = nf_high_water + 1; t <= num; t++) {
Packit Service f629e6
		getnode(n);
Packit Service f629e6
		*n = *Null_field;
Packit Service f629e6
		fields_arr[t] = n;
Packit Service f629e6
	}
Packit Service f629e6
	nf_high_water = num;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* set_field --- set the value of a particular field */
Packit Service f629e6
Packit Service f629e6
/*ARGSUSED*/
Packit Service f629e6
static void
Packit Service f629e6
set_field(long num,
Packit Service f629e6
	char *str,
Packit Service f629e6
	long len,
Packit Service f629e6
	NODE *dummy ATTRIBUTE_UNUSED)	/* just to make interface same as set_element */
Packit Service f629e6
{
Packit Service f629e6
	NODE *n;
Packit Service f629e6
Packit Service f629e6
	if (num > nf_high_water)
Packit Service f629e6
		grow_fields_arr(num);
Packit Service f629e6
	n = fields_arr[num];
Packit Service f629e6
	n->stptr = str;
Packit Service f629e6
	n->stlen = len;
Packit Service f629e6
	n->flags = (STRCUR|STRING|USER_INPUT);	/* do not set MALLOC */
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* rebuild_record --- Someone assigned a value to $(something).
Packit Service f629e6
			Fix up $0 to be right */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
rebuild_record()
Packit Service f629e6
{
Packit Service f629e6
	/*
Packit Service f629e6
	 * use explicit unsigned longs for lengths, in case
Packit Service f629e6
	 * a size_t isn't big enough.
Packit Service f629e6
	 */
Packit Service f629e6
	unsigned long tlen;
Packit Service f629e6
	NODE *tmp;
Packit Service f629e6
	char *ops;
Packit Service f629e6
	char *cops;
Packit Service f629e6
	long i;
Packit Service f629e6
Packit Service f629e6
	assert(NF != -1);
Packit Service f629e6
Packit Service f629e6
	tlen = 0;
Packit Service f629e6
	for (i = NF; i > 0; i--) {
Packit Service f629e6
		tmp = fields_arr[i];
Packit Service f629e6
		tmp = force_string(tmp);
Packit Service f629e6
		tlen += tmp->stlen;
Packit Service f629e6
	}
Packit Service f629e6
	tlen += (NF - 1) * OFSlen;
Packit Service f629e6
	if ((long) tlen < 0)
Packit Service f629e6
		tlen = 0;
Packit Service f629e6
	emalloc(ops, char *, tlen + 1, "rebuild_record");
Packit Service f629e6
	cops = ops;
Packit Service f629e6
	ops[0] = '\0';
Packit Service f629e6
	for (i = 1;  i <= NF; i++) {
Packit Service f629e6
		free_wstr(fields_arr[i]);
Packit Service f629e6
		tmp = fields_arr[i];
Packit Service f629e6
		/* copy field */
Packit Service f629e6
		if (tmp->stlen == 1)
Packit Service f629e6
			*cops++ = tmp->stptr[0];
Packit Service f629e6
		else if (tmp->stlen != 0) {
Packit Service f629e6
			memcpy(cops, tmp->stptr, tmp->stlen);
Packit Service f629e6
			cops += tmp->stlen;
Packit Service f629e6
		}
Packit Service f629e6
		/* copy OFS */
Packit Service f629e6
		if (i != NF) {
Packit Service f629e6
			if (OFSlen == 1)
Packit Service f629e6
				*cops++ = *OFS;
Packit Service f629e6
			else if (OFSlen != 0) {
Packit Service f629e6
				memcpy(cops, OFS, OFSlen);
Packit Service f629e6
				cops += OFSlen;
Packit Service f629e6
			}
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
	tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Since we are about to unref fields_arr[0], we want to find
Packit Service f629e6
	 * any fields that still point into it, and have them point
Packit Service f629e6
	 * into the new field zero.  This has to be done intelligently,
Packit Service f629e6
	 * so that unrefing a field doesn't try to unref into the old $0.
Packit Service f629e6
	 */
Packit Service f629e6
	for (cops = ops, i = 1; i <= NF; i++) {
Packit Service f629e6
		NODE *r = fields_arr[i];
Packit Service f629e6
		/*
Packit Service f629e6
		 * There is no reason to copy malloc'ed fields to point into
Packit Service f629e6
		 * the new $0 buffer, although that's how previous versions did
Packit Service f629e6
		 * it. It seems faster to leave the malloc'ed fields in place.
Packit Service f629e6
		 */
Packit Service f629e6
		if (r->stlen > 0 && (r->flags & MALLOC) == 0) {
Packit Service f629e6
			NODE *n;
Packit Service f629e6
			getnode(n);
Packit Service f629e6
Packit Service f629e6
			*n = *r;
Packit Service f629e6
			if (r->valref > 1) {
Packit Service f629e6
				/*
Packit Service f629e6
				 * This can and does happen.  It seems clear that
Packit Service f629e6
				 * we can't leave r's stptr pointing into the
Packit Service f629e6
				 * old $0 buffer that we are about to unref.
Packit Service f629e6
				 */
Packit Service f629e6
				emalloc(r->stptr, char *, r->stlen + 1, "rebuild_record");
Packit Service f629e6
				memcpy(r->stptr, cops, r->stlen);
Packit Service f629e6
				r->stptr[r->stlen] = '\0';
Packit Service f629e6
				r->flags |= MALLOC;
Packit Service f629e6
Packit Service f629e6
				n->valref = 1;	// reset in the new field to start it off correctly!
Packit Service f629e6
			}
Packit Service f629e6
Packit Service f629e6
			n->stptr = cops;
Packit Service f629e6
			unref(r);
Packit Service f629e6
			fields_arr[i] = n;
Packit Service f629e6
			assert((n->flags & WSTRCUR) == 0);
Packit Service f629e6
		}
Packit Service f629e6
		cops += fields_arr[i]->stlen + OFSlen;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	assert((fields_arr[0]->flags & MALLOC) == 0
Packit Service f629e6
		? fields_arr[0]->valref == 1
Packit Service f629e6
		: true);
Packit Service f629e6
Packit Service f629e6
	unref(fields_arr[0]);
Packit Service f629e6
Packit Service f629e6
	fields_arr[0] = tmp;
Packit Service f629e6
	field0_valid = true;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * set_record:
Packit Service f629e6
 * setup $0, but defer parsing rest of line until reference is made to $(>0)
Packit Service f629e6
 * or to NF.  At that point, parse only as much as necessary.
Packit Service f629e6
 *
Packit Service f629e6
 * Manage a private buffer for the contents of $0.  Doing so keeps us safe
Packit Service f629e6
 * if `getline var' decides to rearrange the contents of the IOBUF that
Packit Service f629e6
 * $0 might have been pointing into.  The cost is the copying of the buffer;
Packit Service f629e6
 * but better correct than fast.
Packit Service f629e6
 */
Packit Service f629e6
void
Packit Service f629e6
set_record(const char *buf, int cnt, const awk_fieldwidth_info_t *fw)
Packit Service f629e6
{
Packit Service f629e6
	NODE *n;
Packit Service f629e6
	static char *databuf;
Packit Service f629e6
	static unsigned long databuf_size;
Packit Service f629e6
#define INITIAL_SIZE	512
Packit Service f629e6
#define MAX_SIZE	((unsigned long) ~0)	/* maximally portable ... */
Packit Service f629e6
Packit Service f629e6
	purge_record();
Packit Service f629e6
Packit Service f629e6
	/* buffer management: */
Packit Service f629e6
	if (databuf_size == 0) {	/* first time */
Packit Service f629e6
		ezalloc(databuf, char *, INITIAL_SIZE, "set_record");
Packit Service f629e6
		databuf_size = INITIAL_SIZE;
Packit Service f629e6
	}
Packit Service f629e6
	/*
Packit Service f629e6
	 * Make sure there's enough room. Since we sometimes need
Packit Service f629e6
	 * to place a sentinel at the end, we make sure
Packit Service f629e6
	 * databuf_size is > cnt after allocation.
Packit Service f629e6
	 */
Packit Service f629e6
	if (cnt >= databuf_size) {
Packit Service f629e6
		do {
Packit Service f629e6
			if (databuf_size > MAX_SIZE/2)
Packit Service f629e6
				fatal(_("input record too large"));
Packit Service f629e6
			databuf_size *= 2;
Packit Service f629e6
		} while (cnt >= databuf_size);
Packit Service f629e6
		erealloc(databuf, char *, databuf_size, "set_record");
Packit Service f629e6
		memset(databuf, '\0', databuf_size);
Packit Service f629e6
	}
Packit Service f629e6
	/* copy the data */
Packit Service f629e6
	memcpy(databuf, buf, cnt);
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Add terminating '\0' so that C library routines
Packit Service f629e6
	 * will know when to stop.
Packit Service f629e6
	 */
Packit Service f629e6
	databuf[cnt] = '\0';
Packit Service f629e6
Packit Service f629e6
	/* manage field 0: */
Packit Service f629e6
	assert((fields_arr[0]->flags & MALLOC) == 0
Packit Service f629e6
		? fields_arr[0]->valref == 1
Packit Service f629e6
		: true);
Packit Service f629e6
Packit Service f629e6
	unref(fields_arr[0]);
Packit Service f629e6
	getnode(n);
Packit Service f629e6
	n->stptr = databuf;
Packit Service f629e6
	n->stlen = cnt;
Packit Service f629e6
	n->valref = 1;
Packit Service f629e6
	n->type = Node_val;
Packit Service f629e6
	n->stfmt = STFMT_UNUSED;
Packit Service f629e6
#ifdef HAVE_MPFR
Packit Service f629e6
	n->strndmode = MPFR_round_mode;
Packit Service f629e6
#endif
Packit Service f629e6
	n->flags = (STRING|STRCUR|USER_INPUT);	/* do not set MALLOC */
Packit Service f629e6
	fields_arr[0] = n;
Packit Service f629e6
	if (fw != api_fw) {
Packit Service f629e6
		if ((api_fw = fw) != NULL) {
Packit Service f629e6
			if (! api_parser_override) {
Packit Service f629e6
				api_parser_override = true;
Packit Service f629e6
				parse_field = fw_parse_field;
Packit Service f629e6
				update_PROCINFO_str("FS", "API");
Packit Service f629e6
			}
Packit Service f629e6
		} else if (api_parser_override) {
Packit Service f629e6
			api_parser_override = false;
Packit Service f629e6
			parse_field = normal_parse_field;
Packit Service f629e6
			update_PROCINFO_str("FS", current_field_sep_str());
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
#undef INITIAL_SIZE
Packit Service f629e6
#undef MAX_SIZE
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* reset_record --- start over again with current $0 */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
reset_record()
Packit Service f629e6
{
Packit Service f629e6
	fields_arr[0] = force_string(fields_arr[0]);
Packit Service f629e6
	purge_record();
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
static void
Packit Service f629e6
purge_record()
Packit Service f629e6
{
Packit Service f629e6
	int i;
Packit Service f629e6
Packit Service f629e6
	NF = -1;
Packit Service f629e6
	for (i = 1; i <= parse_high_water; i++) {
Packit Service f629e6
		NODE *n;
Packit Service f629e6
		NODE *r = fields_arr[i];
Packit Service f629e6
		if ((r->flags & MALLOC) == 0 && r->valref > 1) {
Packit Service f629e6
			/* This can and does happen. We must copy the string! */
Packit Service f629e6
			const char *save = r->stptr;
Packit Service f629e6
			emalloc(r->stptr, char *, r->stlen + 1, "purge_record");
Packit Service f629e6
			memcpy(r->stptr, save, r->stlen);
Packit Service f629e6
			r->stptr[r->stlen] = '\0';
Packit Service f629e6
			r->flags |= MALLOC;
Packit Service f629e6
		}
Packit Service f629e6
		unref(r);
Packit Service f629e6
		getnode(n);
Packit Service f629e6
		*n = *Null_field;
Packit Service f629e6
		fields_arr[i] = n;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	parse_high_water = 0;
Packit Service f629e6
	/*
Packit Service f629e6
	 * $0 = $0 should resplit using the current value of FS.
Packit Service f629e6
	 */
Packit Service f629e6
	if (resave_fs) {
Packit Service f629e6
		resave_fs = false;
Packit Service f629e6
		unref(save_FS);
Packit Service f629e6
		save_FS = dupnode(FS_node->var_value);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	field0_valid = true;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* set_NF --- handle what happens to $0 and fields when NF is changed */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
set_NF()
Packit Service f629e6
{
Packit Service f629e6
	int i;
Packit Service f629e6
	long nf;
Packit Service f629e6
	NODE *n;
Packit Service f629e6
Packit Service f629e6
	assert(NF != -1);
Packit Service f629e6
Packit Service f629e6
	(void) force_number(NF_node->var_value);
Packit Service f629e6
	nf = get_number_si(NF_node->var_value);
Packit Service f629e6
	if (nf < 0)
Packit Service f629e6
		fatal(_("NF set to negative value"));
Packit Service f629e6
	NF = nf;
Packit Service f629e6
Packit Service f629e6
	if (NF > nf_high_water)
Packit Service f629e6
		grow_fields_arr(NF);
Packit Service f629e6
	if (parse_high_water < NF) {
Packit Service f629e6
		for (i = parse_high_water + 1; i >= 0 && i <= NF; i++) {
Packit Service f629e6
			unref(fields_arr[i]);
Packit Service f629e6
			getnode(n);
Packit Service f629e6
			*n = *Null_field;
Packit Service f629e6
			fields_arr[i] = n;
Packit Service f629e6
		}
Packit Service f629e6
		parse_high_water = NF;
Packit Service f629e6
	} else if (parse_high_water > 0) {
Packit Service f629e6
		for (i = NF + 1; i >= 0 && i <= parse_high_water; i++) {
Packit Service f629e6
			unref(fields_arr[i]);
Packit Service f629e6
			getnode(n);
Packit Service f629e6
			*n = *Null_field;
Packit Service f629e6
			fields_arr[i] = n;
Packit Service f629e6
		}
Packit Service f629e6
		parse_high_water = NF;
Packit Service f629e6
	}
Packit Service f629e6
	field0_valid = false;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * re_parse_field --- parse fields using a regexp.
Packit Service f629e6
 *
Packit Service f629e6
 * This is called both from get_field() and from do_split()
Packit Service f629e6
 * via (*parse_field)().  This variation is for when FS is a regular
Packit Service f629e6
 * expression -- either user-defined or because RS=="" and FS==" "
Packit Service f629e6
 */
Packit Service f629e6
static long
Packit Service f629e6
re_parse_field(long up_to,	/* parse only up to this field number */
Packit Service f629e6
	char **buf,	/* on input: string to parse; on output: point to start next */
Packit Service f629e6
	int len,
Packit Service f629e6
	NODE *fs ATTRIBUTE_UNUSED,
Packit Service f629e6
	Regexp *rp,
Packit Service f629e6
	Setfunc set,	/* routine to set the value of the parsed field */
Packit Service f629e6
	NODE *n,
Packit Service f629e6
	NODE *sep_arr,  /* array of field separators (maybe NULL) */
Packit Service f629e6
	bool in_middle)
Packit Service f629e6
{
Packit Service f629e6
	char *scan = *buf;
Packit Service f629e6
	long nf = parse_high_water;
Packit Service f629e6
	char *field;
Packit Service f629e6
	char *end = scan + len;
Packit Service f629e6
	int regex_flags = RE_NEED_START;
Packit Service f629e6
	char *sep;
Packit Service f629e6
	size_t mbclen = 0;
Packit Service f629e6
	mbstate_t mbs;
Packit Service f629e6
Packit Service f629e6
	memset(&mbs, 0, sizeof(mbstate_t));
Packit Service f629e6
Packit Service f629e6
	if (in_middle)
Packit Service f629e6
		regex_flags |= RE_NO_BOL;
Packit Service f629e6
Packit Service f629e6
	if (up_to == UNLIMITED)
Packit Service f629e6
		nf = 0;
Packit Service f629e6
	if (len == 0)
Packit Service f629e6
		return nf;
Packit Service f629e6
Packit Service f629e6
	if (RS_is_null && default_FS) {
Packit Service f629e6
		sep = scan;
Packit Service f629e6
		while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
Packit Service f629e6
			scan++;
Packit Service f629e6
		if (sep_arr != NULL && sep < scan)
Packit Service f629e6
			set_element(nf, sep, (long)(scan - sep), sep_arr);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if (rp == NULL) /* use FS */
Packit Service f629e6
		rp = FS_regexp;
Packit Service f629e6
Packit Service f629e6
	field = scan;
Packit Service f629e6
	while (scan < end
Packit Service f629e6
	       && research(rp, scan, 0, (end - scan), regex_flags) != -1
Packit Service f629e6
	       && nf < up_to) {
Packit Service f629e6
		regex_flags |= RE_NO_BOL;
Packit Service f629e6
		if (REEND(rp, scan) == RESTART(rp, scan)) {   /* null match */
Packit Service f629e6
			if (gawk_mb_cur_max > 1)	{
Packit Service f629e6
				mbclen = mbrlen(scan, end-scan, &mbs);
Packit Service f629e6
				if ((mbclen == 1) || (mbclen == (size_t) -1)
Packit Service f629e6
					|| (mbclen == (size_t) -2) || (mbclen == 0)) {
Packit Service f629e6
					/* We treat it as a singlebyte character.  */
Packit Service f629e6
					mbclen = 1;
Packit Service f629e6
				}
Packit Service f629e6
				scan += mbclen;
Packit Service f629e6
			} else
Packit Service f629e6
				scan++;
Packit Service f629e6
			if (scan == end) {
Packit Service f629e6
				(*set)(++nf, field, (long)(scan - field), n);
Packit Service f629e6
				up_to = nf;
Packit Service f629e6
				break;
Packit Service f629e6
			}
Packit Service f629e6
			continue;
Packit Service f629e6
		}
Packit Service f629e6
		(*set)(++nf, field,
Packit Service f629e6
		       (long)(scan + RESTART(rp, scan) - field), n);
Packit Service f629e6
		if (sep_arr != NULL)
Packit Service f629e6
	    		set_element(nf, scan + RESTART(rp, scan),
Packit Service f629e6
           			(long) (REEND(rp, scan) - RESTART(rp, scan)), sep_arr);
Packit Service f629e6
		scan += REEND(rp, scan);
Packit Service f629e6
		field = scan;
Packit Service f629e6
		if (scan == end)	/* FS at end of record */
Packit Service f629e6
			(*set)(++nf, field, 0L, n);
Packit Service f629e6
	}
Packit Service f629e6
	if (nf != up_to && scan < end) {
Packit Service f629e6
		(*set)(++nf, scan, (long)(end - scan), n);
Packit Service f629e6
		scan = end;
Packit Service f629e6
	}
Packit Service f629e6
	*buf = scan;
Packit Service f629e6
	return nf;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * def_parse_field --- default field parsing.
Packit Service f629e6
 *
Packit Service f629e6
 * This is called both from get_field() and from do_split()
Packit Service f629e6
 * via (*parse_field)().  This variation is for when FS is a single space
Packit Service f629e6
 * character.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
static long
Packit Service f629e6
def_parse_field(long up_to,	/* parse only up to this field number */
Packit Service f629e6
	char **buf,	/* on input: string to parse; on output: point to start next */
Packit Service f629e6
	int len,
Packit Service f629e6
	NODE *fs,
Packit Service f629e6
	Regexp *rp ATTRIBUTE_UNUSED,
Packit Service f629e6
	Setfunc set,	/* routine to set the value of the parsed field */
Packit Service f629e6
	NODE *n,
Packit Service f629e6
	NODE *sep_arr,  /* array of field separators (maybe NULL) */
Packit Service f629e6
	bool in_middle ATTRIBUTE_UNUSED)
Packit Service f629e6
{
Packit Service f629e6
	char *scan = *buf;
Packit Service f629e6
	long nf = parse_high_water;
Packit Service f629e6
	char *field;
Packit Service f629e6
	char *end = scan + len;
Packit Service f629e6
	char sav;
Packit Service f629e6
	char *sep;
Packit Service f629e6
Packit Service f629e6
	if (up_to == UNLIMITED)
Packit Service f629e6
		nf = 0;
Packit Service f629e6
	if (len == 0)
Packit Service f629e6
		return nf;
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Nasty special case. If FS set to "", return whole record
Packit Service f629e6
	 * as first field. This is not worth a separate function.
Packit Service f629e6
	 */
Packit Service f629e6
	if (fs->stlen == 0) {
Packit Service f629e6
		(*set)(++nf, *buf, len, n);
Packit Service f629e6
		*buf += len;
Packit Service f629e6
		return nf;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* before doing anything save the char at *end */
Packit Service f629e6
	sav = *end;
Packit Service f629e6
	/* because it will be destroyed now: */
Packit Service f629e6
Packit Service f629e6
	*end = ' ';	/* sentinel character */
Packit Service f629e6
	sep = scan;
Packit Service f629e6
	for (; nf < up_to; scan++) {
Packit Service f629e6
		/*
Packit Service f629e6
		 * special case:  fs is single space, strip leading whitespace
Packit Service f629e6
		 */
Packit Service f629e6
		while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
Packit Service f629e6
			scan++;
Packit Service f629e6
Packit Service f629e6
		if (sep_arr != NULL && scan > sep)
Packit Service f629e6
			set_element(nf, sep, (long) (scan - sep), sep_arr);
Packit Service f629e6
Packit Service f629e6
		if (scan >= end)
Packit Service f629e6
			break;
Packit Service f629e6
Packit Service f629e6
		field = scan;
Packit Service f629e6
Packit Service f629e6
		while (*scan != ' ' && *scan != '\t' && *scan != '\n')
Packit Service f629e6
			scan++;
Packit Service f629e6
Packit Service f629e6
		(*set)(++nf, field, (long)(scan - field), n);
Packit Service f629e6
Packit Service f629e6
		if (scan == end)
Packit Service f629e6
			break;
Packit Service f629e6
Packit Service f629e6
		sep = scan;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* everything done, restore original char at *end */
Packit Service f629e6
	*end = sav;
Packit Service f629e6
Packit Service f629e6
	*buf = scan;
Packit Service f629e6
	return nf;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * null_parse_field --- each character is a separate field
Packit Service f629e6
 *
Packit Service f629e6
 * This is called both from get_field() and from do_split()
Packit Service f629e6
 * via (*parse_field)().  This variation is for when FS is the null string.
Packit Service f629e6
 */
Packit Service f629e6
static long
Packit Service f629e6
null_parse_field(long up_to,	/* parse only up to this field number */
Packit Service f629e6
	char **buf,	/* on input: string to parse; on output: point to start next */
Packit Service f629e6
	int len,
Packit Service f629e6
	NODE *fs ATTRIBUTE_UNUSED,
Packit Service f629e6
	Regexp *rp ATTRIBUTE_UNUSED,
Packit Service f629e6
	Setfunc set,	/* routine to set the value of the parsed field */
Packit Service f629e6
	NODE *n,
Packit Service f629e6
	NODE *sep_arr,  /* array of field separators (maybe NULL) */
Packit Service f629e6
	bool in_middle ATTRIBUTE_UNUSED)
Packit Service f629e6
{
Packit Service f629e6
	char *scan = *buf;
Packit Service f629e6
	long nf = parse_high_water;
Packit Service f629e6
	char *end = scan + len;
Packit Service f629e6
Packit Service f629e6
	if (up_to == UNLIMITED)
Packit Service f629e6
		nf = 0;
Packit Service f629e6
	if (len == 0)
Packit Service f629e6
		return nf;
Packit Service f629e6
Packit Service f629e6
	if (gawk_mb_cur_max > 1) {
Packit Service f629e6
		mbstate_t mbs;
Packit Service f629e6
		memset(&mbs, 0, sizeof(mbstate_t));
Packit Service f629e6
		for (; nf < up_to && scan < end;) {
Packit Service f629e6
			size_t mbclen = mbrlen(scan, end-scan, &mbs);
Packit Service f629e6
			if ((mbclen == 1) || (mbclen == (size_t) -1)
Packit Service f629e6
				|| (mbclen == (size_t) -2) || (mbclen == 0)) {
Packit Service f629e6
				/* We treat it as a singlebyte character.  */
Packit Service f629e6
				mbclen = 1;
Packit Service f629e6
			}
Packit Service f629e6
			if (sep_arr != NULL && nf > 0)
Packit Service f629e6
				set_element(nf, scan, 0L, sep_arr);
Packit Service f629e6
			(*set)(++nf, scan, mbclen, n);
Packit Service f629e6
			scan += mbclen;
Packit Service f629e6
		}
Packit Service f629e6
	} else {
Packit Service f629e6
		for (; nf < up_to && scan < end; scan++) {
Packit Service f629e6
			if (sep_arr != NULL && nf > 0)
Packit Service f629e6
				set_element(nf, scan, 0L, sep_arr);
Packit Service f629e6
			(*set)(++nf, scan, 1L, n);
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	*buf = scan;
Packit Service f629e6
	return nf;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * sc_parse_field --- single character field separator
Packit Service f629e6
 *
Packit Service f629e6
 * This is called both from get_field() and from do_split()
Packit Service f629e6
 * via (*parse_field)().  This variation is for when FS is a single character
Packit Service f629e6
 * other than space.
Packit Service f629e6
 */
Packit Service f629e6
static long
Packit Service f629e6
sc_parse_field(long up_to,	/* parse only up to this field number */
Packit Service f629e6
	char **buf,	/* on input: string to parse; on output: point to start next */
Packit Service f629e6
	int len,
Packit Service f629e6
	NODE *fs,
Packit Service f629e6
	Regexp *rp ATTRIBUTE_UNUSED,
Packit Service f629e6
	Setfunc set,	/* routine to set the value of the parsed field */
Packit Service f629e6
	NODE *n,
Packit Service f629e6
	NODE *sep_arr,  /* array of field separators (maybe NULL) */
Packit Service f629e6
	bool in_middle ATTRIBUTE_UNUSED)
Packit Service f629e6
{
Packit Service f629e6
	char *scan = *buf;
Packit Service f629e6
	char fschar;
Packit Service f629e6
	long nf = parse_high_water;
Packit Service f629e6
	char *field;
Packit Service f629e6
	char *end = scan + len;
Packit Service f629e6
	char sav;
Packit Service f629e6
	size_t mbclen = 0;
Packit Service f629e6
	mbstate_t mbs;
Packit Service f629e6
Packit Service f629e6
	memset(&mbs, 0, sizeof(mbstate_t));
Packit Service f629e6
Packit Service f629e6
	if (up_to == UNLIMITED)
Packit Service f629e6
		nf = 0;
Packit Service f629e6
	if (len == 0)
Packit Service f629e6
		return nf;
Packit Service f629e6
Packit Service f629e6
	if (RS_is_null && fs->stlen == 0)
Packit Service f629e6
		fschar = '\n';
Packit Service f629e6
	else
Packit Service f629e6
		fschar = fs->stptr[0];
Packit Service f629e6
Packit Service f629e6
	/* before doing anything save the char at *end */
Packit Service f629e6
	sav = *end;
Packit Service f629e6
	/* because it will be destroyed now: */
Packit Service f629e6
	*end = fschar;	/* sentinel character */
Packit Service f629e6
Packit Service f629e6
	for (; nf < up_to;) {
Packit Service f629e6
		field = scan;
Packit Service f629e6
		if (gawk_mb_cur_max > 1) {
Packit Service f629e6
			while (*scan != fschar) {
Packit Service f629e6
				mbclen = mbrlen(scan, end-scan, &mbs);
Packit Service f629e6
				if ((mbclen == 1) || (mbclen == (size_t) -1)
Packit Service f629e6
					|| (mbclen == (size_t) -2) || (mbclen == 0)) {
Packit Service f629e6
					/* We treat it as a singlebyte character.  */
Packit Service f629e6
					mbclen = 1;
Packit Service f629e6
				}
Packit Service f629e6
				scan += mbclen;
Packit Service f629e6
			}
Packit Service f629e6
		} else {
Packit Service f629e6
			while (*scan != fschar)
Packit Service f629e6
				scan++;
Packit Service f629e6
		}
Packit Service f629e6
		(*set)(++nf, field, (long)(scan - field), n);
Packit Service f629e6
		if (scan == end)
Packit Service f629e6
			break;
Packit Service f629e6
		if (sep_arr != NULL)
Packit Service f629e6
			set_element(nf, scan, 1L, sep_arr);
Packit Service f629e6
		scan++;
Packit Service f629e6
		if (scan == end) {	/* FS at end of record */
Packit Service f629e6
			(*set)(++nf, field, 0L, n);
Packit Service f629e6
			break;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* everything done, restore original char at *end */
Packit Service f629e6
	*end = sav;
Packit Service f629e6
Packit Service f629e6
	*buf = scan;
Packit Service f629e6
	return nf;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * calc_mbslen --- calculate the length in bytes of a multi-byte string
Packit Service f629e6
 * containing len characters.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
static size_t
Packit Service f629e6
calc_mbslen(char *scan, char *end, size_t len, mbstate_t *mbs)
Packit Service f629e6
{
Packit Service f629e6
Packit Service f629e6
	size_t mbclen;
Packit Service f629e6
	char *mbscan = scan;
Packit Service f629e6
Packit Service f629e6
	while (len-- > 0 && mbscan < end) {
Packit Service f629e6
		mbclen = mbrlen(mbscan, end - mbscan, mbs);
Packit Service f629e6
		if (!(mbclen > 0 && mbclen <= (size_t)(end - mbscan)))
Packit Service f629e6
			/*
Packit Service f629e6
			 * We treat it as a singlebyte character. This should
Packit Service f629e6
			 * catch error codes 0, (size_t) -1, and (size_t) -2.
Packit Service f629e6
			 */
Packit Service f629e6
			mbclen = 1;
Packit Service f629e6
		mbscan += mbclen;
Packit Service f629e6
	}
Packit Service f629e6
	return mbscan - scan;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * fw_parse_field --- field parsing using FIELDWIDTHS spec
Packit Service f629e6
 *
Packit Service f629e6
 * This is called from get_field() via (*parse_field)().
Packit Service f629e6
 * This variation is for fields are fixed widths.
Packit Service f629e6
 */
Packit Service f629e6
static long
Packit Service f629e6
fw_parse_field(long up_to,	/* parse only up to this field number */
Packit Service f629e6
	char **buf,	/* on input: string to parse; on output: point to start next */
Packit Service f629e6
	int len,
Packit Service f629e6
	NODE *fs ATTRIBUTE_UNUSED,
Packit Service f629e6
	Regexp *rp ATTRIBUTE_UNUSED,
Packit Service f629e6
	Setfunc set,	/* routine to set the value of the parsed field */
Packit Service f629e6
	NODE *n,
Packit Service f629e6
	NODE *dummy ATTRIBUTE_UNUSED, /* sep_arr not needed here: hence dummy */
Packit Service f629e6
	bool in_middle ATTRIBUTE_UNUSED)
Packit Service f629e6
{
Packit Service f629e6
	char *scan = *buf;
Packit Service f629e6
	long nf = parse_high_water;
Packit Service f629e6
	char *end = scan + len;
Packit Service f629e6
	const awk_fieldwidth_info_t *fw;
Packit Service f629e6
	mbstate_t mbs;
Packit Service f629e6
	size_t skiplen;
Packit Service f629e6
	size_t flen;
Packit Service f629e6
Packit Service f629e6
	fw = (api_parser_override ? api_fw : FIELDWIDTHS);
Packit Service f629e6
Packit Service f629e6
	if (up_to == UNLIMITED)
Packit Service f629e6
		nf = 0;
Packit Service f629e6
	if (len == 0)
Packit Service f629e6
		return nf;
Packit Service f629e6
	if (gawk_mb_cur_max > 1 && fw->use_chars) {
Packit Service f629e6
		/*
Packit Service f629e6
		 * Reset the shift state. Arguably, the shift state should
Packit Service f629e6
		 * be part of the file state and carried forward at all times,
Packit Service f629e6
		 * but nobody has complained so far, so this may not matter
Packit Service f629e6
		 * in practice.
Packit Service f629e6
		 */
Packit Service f629e6
		memset(&mbs, 0, sizeof(mbstate_t));
Packit Service f629e6
		while (nf < up_to && scan < end) {
Packit Service f629e6
			if (nf >= fw->nf) {
Packit Service f629e6
				*buf = end;
Packit Service f629e6
				return nf;
Packit Service f629e6
			}
Packit Service f629e6
			scan += calc_mbslen(scan, end, fw->fields[nf].skip, &mbs);
Packit Service f629e6
			flen = calc_mbslen(scan, end, fw->fields[nf].len, &mbs);
Packit Service f629e6
			(*set)(++nf, scan, (long) flen, n);
Packit Service f629e6
			scan += flen;
Packit Service f629e6
		}
Packit Service f629e6
	} else {
Packit Service f629e6
		while (nf < up_to && scan < end) {
Packit Service f629e6
			if (nf >= fw->nf) {
Packit Service f629e6
				*buf = end;
Packit Service f629e6
				return nf;
Packit Service f629e6
			}
Packit Service f629e6
			skiplen = fw->fields[nf].skip;
Packit Service f629e6
			if (skiplen > end - scan)
Packit Service f629e6
				skiplen = end - scan;
Packit Service f629e6
			scan += skiplen;
Packit Service f629e6
			flen = fw->fields[nf].len;
Packit Service f629e6
			if (flen > end - scan)
Packit Service f629e6
				flen = end - scan;
Packit Service f629e6
			(*set)(++nf, scan, (long) flen, n);
Packit Service f629e6
			scan += flen;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
	*buf = scan;
Packit Service f629e6
	return nf;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* invalidate_field0 --- $0 needs reconstruction */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
invalidate_field0()
Packit Service f629e6
{
Packit Service f629e6
	field0_valid = false;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* get_field --- return a particular $n */
Packit Service f629e6
Packit Service f629e6
/* assign is not NULL if this field is on the LHS of an assign */
Packit Service f629e6
Packit Service f629e6
NODE **
Packit Service f629e6
get_field(long requested, Func_ptr *assign)
Packit Service f629e6
{
Packit Service f629e6
	bool in_middle = false;
Packit Service f629e6
	/*
Packit Service f629e6
	 * if requesting whole line but some other field has been altered,
Packit Service f629e6
	 * then the whole line must be rebuilt
Packit Service f629e6
	 */
Packit Service f629e6
	if (requested == 0) {
Packit Service f629e6
		if (! field0_valid) {
Packit Service f629e6
			/* first, parse remainder of input record */
Packit Service f629e6
			if (NF == -1) {
Packit Service f629e6
				NF = (*parse_field)(UNLIMITED - 1, &parse_extent,
Packit Service f629e6
		    			fields_arr[0]->stlen -
Packit Service f629e6
					(parse_extent - fields_arr[0]->stptr),
Packit Service f629e6
		    			save_FS, FS_regexp, set_field,
Packit Service f629e6
					(NODE *) NULL,
Packit Service f629e6
					(NODE *) NULL,
Packit Service f629e6
					in_middle);
Packit Service f629e6
				parse_high_water = NF;
Packit Service f629e6
			}
Packit Service f629e6
			rebuild_record();
Packit Service f629e6
		}
Packit Service f629e6
		if (assign != NULL)
Packit Service f629e6
			*assign = reset_record;
Packit Service f629e6
		return &fields_arr[0];
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* assert(requested > 0); */
Packit Service f629e6
Packit Service f629e6
#if 0
Packit Service f629e6
	if (assign != NULL)
Packit Service f629e6
		field0_valid = false;		/* $0 needs reconstruction */
Packit Service f629e6
#else
Packit Service f629e6
	/*
Packit Service f629e6
	 * Keep things uniform. Also, mere intention of assigning something
Packit Service f629e6
	 * to $n should not make $0 invalid. Makes sense to invalidate $0
Packit Service f629e6
	 * after the actual assignment is performed. Not a real issue in
Packit Service f629e6
	 * the interpreter otherwise, but causes problem in the
Packit Service f629e6
	 * debugger when watching or printing fields.
Packit Service f629e6
	 */
Packit Service f629e6
Packit Service f629e6
	if (assign != NULL)
Packit Service f629e6
		*assign = invalidate_field0;	/* $0 needs reconstruction */
Packit Service f629e6
#endif
Packit Service f629e6
Packit Service f629e6
	if (requested <= parse_high_water)	/* already parsed this field */
Packit Service f629e6
		return &fields_arr[requested];
Packit Service f629e6
Packit Service f629e6
	if (NF == -1) {	/* have not yet parsed to end of record */
Packit Service f629e6
		/*
Packit Service f629e6
		 * parse up to requested fields, calling set_field() for each,
Packit Service f629e6
		 * saving in parse_extent the point where the parse left off
Packit Service f629e6
		 */
Packit Service f629e6
		if (parse_high_water == 0)	/* starting at the beginning */
Packit Service f629e6
			parse_extent = fields_arr[0]->stptr;
Packit Service f629e6
		else
Packit Service f629e6
			in_middle = true;
Packit Service f629e6
		parse_high_water = (*parse_field)(requested, &parse_extent,
Packit Service f629e6
		     fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr),
Packit Service f629e6
		     save_FS, NULL, set_field, (NODE *) NULL, (NODE *) NULL, in_middle);
Packit Service f629e6
Packit Service f629e6
		/*
Packit Service f629e6
		 * if we reached the end of the record, set NF to the number of
Packit Service f629e6
		 * fields so far.  Note that requested might actually refer to
Packit Service f629e6
		 * a field that is beyond the end of the record, but we won't
Packit Service f629e6
		 * set NF to that value at this point, since this is only a
Packit Service f629e6
		 * reference to the field and NF only gets set if the field
Packit Service f629e6
		 * is assigned to -- this case is handled below
Packit Service f629e6
		 */
Packit Service f629e6
		if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
Packit Service f629e6
			NF = parse_high_water;
Packit Service f629e6
		else if (parse_field == fpat_parse_field) {
Packit Service f629e6
			/* FPAT parsing is weird, isolate the special cases */
Packit Service f629e6
			char *rec_start = fields_arr[0]->stptr;
Packit Service f629e6
			char *rec_end = fields_arr[0]->stptr + fields_arr[0]->stlen;
Packit Service f629e6
Packit Service f629e6
			if (    parse_extent > rec_end
Packit Service f629e6
			    || (parse_extent > rec_start && parse_extent < rec_end && requested == UNLIMITED-1))
Packit Service f629e6
				NF = parse_high_water;
Packit Service f629e6
			else if (parse_extent == rec_start) /* could be no match for FPAT */
Packit Service f629e6
				NF = 0;
Packit Service f629e6
		}
Packit Service f629e6
		if (requested == UNLIMITED - 1)	/* UNLIMITED-1 means set NF */
Packit Service f629e6
			requested = parse_high_water;
Packit Service f629e6
	}
Packit Service f629e6
	if (parse_high_water < requested) { /* requested beyond end of record */
Packit Service f629e6
		if (assign != NULL) {	/* expand record */
Packit Service f629e6
			if (requested > nf_high_water)
Packit Service f629e6
				grow_fields_arr(requested);
Packit Service f629e6
Packit Service f629e6
			NF = requested;
Packit Service f629e6
			parse_high_water = requested;
Packit Service f629e6
		} else
Packit Service f629e6
			return &Null_field;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	return &fields_arr[requested];
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* set_element --- set an array element, used by do_split() */
Packit Service f629e6
Packit Service f629e6
static void
Packit Service f629e6
set_element(long num, char *s, long len, NODE *n)
Packit Service f629e6
{
Packit Service f629e6
	NODE *it;
Packit Service f629e6
	NODE **lhs;
Packit Service f629e6
	NODE *sub;
Packit Service f629e6
Packit Service f629e6
	it = make_string(s, len);
Packit Service f629e6
	it->flags |= USER_INPUT;
Packit Service f629e6
	sub = make_number((AWKNUM) (num));
Packit Service f629e6
	lhs = assoc_lookup(n, sub);
Packit Service f629e6
	unref(*lhs);
Packit Service f629e6
	*lhs = it;
Packit Service f629e6
        if (n->astore != NULL)
Packit Service f629e6
                (*n->astore)(n, sub);
Packit Service f629e6
	unref(sub);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* do_split --- implement split(), semantics are same as for field splitting */
Packit Service f629e6
Packit Service f629e6
NODE *
Packit Service f629e6
do_split(int nargs)
Packit Service f629e6
{
Packit Service f629e6
	NODE *src, *arr, *sep, *fs, *tmp, *sep_arr = NULL;
Packit Service f629e6
	char *s;
Packit Service f629e6
	long (*parseit)(long, char **, int, NODE *,
Packit Service f629e6
			 Regexp *, Setfunc, NODE *, NODE *, bool);
Packit Service f629e6
	Regexp *rp = NULL;
Packit Service f629e6
Packit Service f629e6
	if (nargs == 4) {
Packit Service f629e6
		static bool warned = false;
Packit Service f629e6
Packit Service f629e6
		if (do_traditional || do_posix) {
Packit Service f629e6
			fatal(_("split: fourth argument is a gawk extension"));
Packit Service f629e6
		}
Packit Service f629e6
		sep_arr = POP_PARAM();
Packit Service f629e6
		if (sep_arr->type != Node_var_array)
Packit Service f629e6
			fatal(_("split: fourth argument is not an array"));
Packit Service f629e6
		if ((do_lint || do_lint_old) && ! warned) {
Packit Service f629e6
			warned = true;
Packit Service f629e6
			lintwarn(_("split: fourth argument is a gawk extension"));
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	sep = POP();
Packit Service f629e6
	arr = POP_PARAM();
Packit Service f629e6
	if (arr->type != Node_var_array)
Packit Service f629e6
		fatal(_("split: second argument is not an array"));
Packit Service f629e6
Packit Service f629e6
	if (sep_arr != NULL) {
Packit Service f629e6
		if (sep_arr == arr)
Packit Service f629e6
			fatal(_("split: cannot use the same array for second and fourth args"));
Packit Service f629e6
Packit Service f629e6
		/* This checks need to be done before clearing any of the arrays */
Packit Service f629e6
		for (tmp = sep_arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
Packit Service f629e6
			if (tmp == arr)
Packit Service f629e6
				fatal(_("split: cannot use a subarray of second arg for fourth arg"));
Packit Service f629e6
		for (tmp = arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
Packit Service f629e6
			if (tmp == sep_arr)
Packit Service f629e6
				fatal(_("split: cannot use a subarray of fourth arg for second arg"));
Packit Service f629e6
		assoc_clear(sep_arr);
Packit Service f629e6
	}
Packit Service f629e6
	assoc_clear(arr);
Packit Service f629e6
Packit Service f629e6
	src = TOP_STRING();
Packit Service f629e6
	if (src->stlen == 0) {
Packit Service f629e6
		/*
Packit Service f629e6
		 * Skip the work if first arg is the null string.
Packit Service f629e6
		 */
Packit Service f629e6
		tmp = POP_SCALAR();
Packit Service f629e6
		DEREF(tmp);
Packit Service f629e6
		return make_number((AWKNUM) 0);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if ((sep->flags & REGEX) != 0)
Packit Service f629e6
		sep = sep->typed_re;
Packit Service f629e6
Packit Service f629e6
	if (   (sep->re_flags & FS_DFLT) != 0
Packit Service f629e6
	    && current_field_sep() == Using_FS
Packit Service f629e6
	    && ! RS_is_null) {
Packit Service f629e6
		parseit = parse_field;
Packit Service f629e6
		fs = force_string(FS_node->var_value);
Packit Service f629e6
		rp = FS_regexp;
Packit Service f629e6
	} else {
Packit Service f629e6
		fs = sep->re_exp;
Packit Service f629e6
Packit Service f629e6
		if (fs->stlen == 0) {
Packit Service f629e6
			static bool warned = false;
Packit Service f629e6
Packit Service f629e6
			parseit = null_parse_field;
Packit Service f629e6
Packit Service f629e6
			if (do_lint && ! warned) {
Packit Service f629e6
				warned = true;
Packit Service f629e6
				lintwarn(_("split: null string for third arg is a gawk extension"));
Packit Service f629e6
			}
Packit Service f629e6
		} else if (fs->stlen == 1 && (sep->re_flags & CONSTANT) == 0) {
Packit Service f629e6
			if (fs->stptr[0] == ' ') {
Packit Service f629e6
				parseit = def_parse_field;
Packit Service f629e6
			} else
Packit Service f629e6
				parseit = sc_parse_field;
Packit Service f629e6
		} else {
Packit Service f629e6
			parseit = re_parse_field;
Packit Service f629e6
			rp = re_update(sep);
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	s = src->stptr;
Packit Service f629e6
	tmp = make_number((AWKNUM) (*parseit)(UNLIMITED, &s, (int) src->stlen,
Packit Service f629e6
					     fs, rp, set_element, arr, sep_arr, false));
Packit Service f629e6
Packit Service f629e6
	src = POP_SCALAR();	/* really pop off stack */
Packit Service f629e6
	DEREF(src);
Packit Service f629e6
	return tmp;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * do_patsplit --- implement patsplit(), semantics are same as for field
Packit Service f629e6
 *		   splitting with FPAT.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
NODE *
Packit Service f629e6
do_patsplit(int nargs)
Packit Service f629e6
{
Packit Service f629e6
	NODE *src, *arr, *sep, *fpat, *tmp, *sep_arr = NULL;
Packit Service f629e6
	char *s;
Packit Service f629e6
	Regexp *rp = NULL;
Packit Service f629e6
Packit Service f629e6
	if (nargs == 4) {
Packit Service f629e6
		sep_arr = POP_PARAM();
Packit Service f629e6
		if (sep_arr->type != Node_var_array)
Packit Service f629e6
			fatal(_("patsplit: fourth argument is not an array"));
Packit Service f629e6
	}
Packit Service f629e6
	sep = POP();
Packit Service f629e6
	arr = POP_PARAM();
Packit Service f629e6
	if (arr->type != Node_var_array)
Packit Service f629e6
		fatal(_("patsplit: second argument is not an array"));
Packit Service f629e6
Packit Service f629e6
	src = TOP_STRING();
Packit Service f629e6
Packit Service f629e6
	if ((sep->flags & REGEX) != 0)
Packit Service f629e6
		sep = sep->typed_re;
Packit Service f629e6
Packit Service f629e6
	fpat = sep->re_exp;
Packit Service f629e6
	if (fpat->stlen == 0)
Packit Service f629e6
		fatal(_("patsplit: third argument must be non-null"));
Packit Service f629e6
Packit Service f629e6
	if (sep_arr != NULL) {
Packit Service f629e6
		if (sep_arr == arr)
Packit Service f629e6
			fatal(_("patsplit: cannot use the same array for second and fourth args"));
Packit Service f629e6
Packit Service f629e6
		/* These checks need to be done before clearing any of the arrays */
Packit Service f629e6
		for (tmp = sep_arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
Packit Service f629e6
			if (tmp == arr)
Packit Service f629e6
				fatal(_("patsplit: cannot use a subarray of second arg for fourth arg"));
Packit Service f629e6
		for (tmp = arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
Packit Service f629e6
			if (tmp == sep_arr)
Packit Service f629e6
				fatal(_("patsplit: cannot use a subarray of fourth arg for second arg"));
Packit Service f629e6
		assoc_clear(sep_arr);
Packit Service f629e6
	}
Packit Service f629e6
	assoc_clear(arr);
Packit Service f629e6
Packit Service f629e6
	if (src->stlen == 0) {
Packit Service f629e6
		/*
Packit Service f629e6
		 * Skip the work if first arg is the null string.
Packit Service f629e6
		 */
Packit Service f629e6
		tmp =  make_number((AWKNUM) 0);
Packit Service f629e6
	} else {
Packit Service f629e6
		rp = re_update(sep);
Packit Service f629e6
		s = src->stptr;
Packit Service f629e6
		tmp = make_number((AWKNUM) fpat_parse_field(UNLIMITED, &s,
Packit Service f629e6
				(int) src->stlen, fpat, rp,
Packit Service f629e6
				set_element, arr, sep_arr, false));
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	src = POP_SCALAR();	/* really pop off stack */
Packit Service f629e6
	DEREF(src);
Packit Service f629e6
	return tmp;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* set_parser --- update the current (non-API) parser */
Packit Service f629e6
Packit Service f629e6
static void
Packit Service f629e6
set_parser(parse_field_func_t func)
Packit Service f629e6
{
Packit Service f629e6
	normal_parse_field = func;
Packit Service f629e6
	if (! api_parser_override && parse_field != func) {
Packit Service f629e6
		parse_field = func;
Packit Service f629e6
	        update_PROCINFO_str("FS", current_field_sep_str());
Packit Service f629e6
	}
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
set_FIELDWIDTHS()
Packit Service f629e6
{
Packit Service f629e6
	char *scan;
Packit Service f629e6
	char *end;
Packit Service f629e6
	int i;
Packit Service f629e6
	static int fw_alloc = 4;
Packit Service f629e6
	static bool warned = false;
Packit Service f629e6
	bool fatal_error = false;
Packit Service f629e6
	NODE *tmp;
Packit Service f629e6
Packit Service f629e6
	if (do_lint && ! warned) {
Packit Service f629e6
		warned = true;
Packit Service f629e6
		lintwarn(_("`FIELDWIDTHS' is a gawk extension"));
Packit Service f629e6
	}
Packit Service f629e6
	if (do_traditional)	/* quick and dirty, does the trick */
Packit Service f629e6
		return;
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * If changing the way fields are split, obey least-surprise
Packit Service f629e6
	 * semantics, and force $0 to be split totally.
Packit Service f629e6
	 */
Packit Service f629e6
	if (fields_arr != NULL)
Packit Service f629e6
		(void) get_field(UNLIMITED - 1, 0);
Packit Service f629e6
Packit Service f629e6
	set_parser(fw_parse_field);
Packit Service f629e6
	tmp = force_string(FIELDWIDTHS_node->var_value);
Packit Service f629e6
	scan = tmp->stptr;
Packit Service f629e6
Packit Service f629e6
	if (FIELDWIDTHS == NULL) {
Packit Service f629e6
		emalloc(FIELDWIDTHS, awk_fieldwidth_info_t *, awk_fieldwidth_info_size(fw_alloc), "set_FIELDWIDTHS");
Packit Service f629e6
		FIELDWIDTHS->use_chars = awk_true;
Packit Service f629e6
	}
Packit Service f629e6
	FIELDWIDTHS->nf = 0;
Packit Service f629e6
	for (i = 0; ; i++) {
Packit Service f629e6
		unsigned long int tmp;
Packit Service f629e6
		if (i >= fw_alloc) {
Packit Service f629e6
			fw_alloc *= 2;
Packit Service f629e6
			erealloc(FIELDWIDTHS, awk_fieldwidth_info_t *, awk_fieldwidth_info_size(fw_alloc), "set_FIELDWIDTHS");
Packit Service f629e6
		}
Packit Service f629e6
		/* Ensure that there is no leading `-' sign.  Otherwise,
Packit Service f629e6
		   strtoul would accept it and return a bogus result.  */
Packit Service f629e6
		while (is_blank(*scan)) {
Packit Service f629e6
			++scan;
Packit Service f629e6
		}
Packit Service f629e6
		if (*scan == '-') {
Packit Service f629e6
			fatal_error = true;
Packit Service f629e6
			break;
Packit Service f629e6
		}
Packit Service f629e6
		if (*scan == '\0')
Packit Service f629e6
			break;
Packit Service f629e6
Packit Service f629e6
		// Look for skip value. We allow N:M and N:*.
Packit Service f629e6
		/*
Packit Service f629e6
		 * Detect an invalid base-10 integer, a valid value that
Packit Service f629e6
		 * is followed by something other than a blank or '\0',
Packit Service f629e6
		 * or a value that is not in the range [1..UINT_MAX].
Packit Service f629e6
		 */
Packit Service f629e6
		errno = 0;
Packit Service f629e6
		tmp = strtoul(scan, &end, 10);
Packit Service f629e6
		if (errno == 0 && *end == ':' && (0 < tmp && tmp <= UINT_MAX)) {
Packit Service f629e6
			FIELDWIDTHS->fields[i].skip = tmp;
Packit Service f629e6
			scan = end + 1;
Packit Service f629e6
			if (*scan == '-' || is_blank(*scan)) {
Packit Service f629e6
				fatal_error = true;
Packit Service f629e6
				break;
Packit Service f629e6
			}
Packit Service f629e6
			// try scanning for field width
Packit Service f629e6
			tmp = strtoul(scan, &end, 10);
Packit Service f629e6
		}
Packit Service f629e6
		else
Packit Service f629e6
			FIELDWIDTHS->fields[i].skip = 0;
Packit Service f629e6
Packit Service f629e6
		if (errno != 0
Packit Service f629e6
		    	|| (*end != '\0' && ! is_blank(*end))
Packit Service f629e6
				|| !(0 < tmp && tmp <= UINT_MAX)
Packit Service f629e6
		) {
Packit Service f629e6
			if (*scan == '*') {
Packit Service f629e6
				for (scan++; is_blank(*scan); scan++)
Packit Service f629e6
					continue;
Packit Service f629e6
Packit Service f629e6
				if (*scan != '\0')
Packit Service f629e6
					fatal(_("`*' must be the last designator in FIELDWIDTHS"));
Packit Service f629e6
Packit Service f629e6
				FIELDWIDTHS->fields[i].len = UINT_MAX;
Packit Service f629e6
				FIELDWIDTHS->nf = i+1;
Packit Service f629e6
			}
Packit Service f629e6
			else
Packit Service f629e6
				fatal_error = true;
Packit Service f629e6
			break;
Packit Service f629e6
		}
Packit Service f629e6
		FIELDWIDTHS->fields[i].len = tmp;
Packit Service f629e6
		FIELDWIDTHS->nf = i+1;
Packit Service f629e6
		scan = end;
Packit Service f629e6
		/* Skip past any trailing blanks.  */
Packit Service f629e6
		while (is_blank(*scan)) {
Packit Service f629e6
			++scan;
Packit Service f629e6
		}
Packit Service f629e6
		if (*scan == '\0')
Packit Service f629e6
			break;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if (fatal_error)
Packit Service f629e6
		fatal(_("invalid FIELDWIDTHS value, for field %d, near `%s'"),
Packit Service f629e6
			      i + 1, scan);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* set_FS --- handle things when FS is assigned to */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
set_FS()
Packit Service f629e6
{
Packit Service f629e6
	char buf[10];
Packit Service f629e6
	NODE *fs;
Packit Service f629e6
	static NODE *save_fs = NULL;
Packit Service f629e6
	static NODE *save_rs = NULL;
Packit Service f629e6
	bool remake_re = true;
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * If changing the way fields are split, obey least-surprise
Packit Service f629e6
	 * semantics, and force $0 to be split totally.
Packit Service f629e6
	 */
Packit Service f629e6
	if (fields_arr != NULL)
Packit Service f629e6
		(void) get_field(UNLIMITED - 1, 0);
Packit Service f629e6
Packit Service f629e6
	/* It's possible that only IGNORECASE changed, or FS = FS */
Packit Service f629e6
	/*
Packit Service f629e6
	 * This comparison can't use cmp_nodes(), which pays attention
Packit Service f629e6
	 * to IGNORECASE, and that's not what we want.
Packit Service f629e6
	 */
Packit Service f629e6
	if (save_fs
Packit Service f629e6
		&& FS_node->var_value->stlen == save_fs->stlen
Packit Service f629e6
		&& memcmp(FS_node->var_value->stptr, save_fs->stptr, save_fs->stlen) == 0
Packit Service f629e6
		&& save_rs
Packit Service f629e6
		&& RS_node->var_value->stlen == save_rs->stlen
Packit Service f629e6
		&& memcmp(RS_node->var_value->stptr, save_rs->stptr, save_rs->stlen) == 0) {
Packit Service f629e6
		if (FS_regexp != NULL)
Packit Service f629e6
			FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
Packit Service f629e6
Packit Service f629e6
		/* FS = FS */
Packit Service f629e6
		if (current_field_sep() == Using_FS) {
Packit Service f629e6
			return;
Packit Service f629e6
		} else {
Packit Service f629e6
			remake_re = false;
Packit Service f629e6
			goto choose_fs_function;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	unref(save_fs);
Packit Service f629e6
	save_fs = dupnode(FS_node->var_value);
Packit Service f629e6
	unref(save_rs);
Packit Service f629e6
	save_rs = dupnode(RS_node->var_value);
Packit Service f629e6
	resave_fs = true;
Packit Service f629e6
Packit Service f629e6
	/* If FS_re_no_case assignment is fatal (make_regexp in remake_re)
Packit Service f629e6
	 * FS_regexp will be NULL with a non-null FS_re_yes_case.
Packit Service f629e6
	 * refree() handles null argument; no need for `if (FS_regexp != NULL)' below.
Packit Service f629e6
	 * Please do not remerge.
Packit Service f629e6
	 */
Packit Service f629e6
	refree(FS_re_yes_case);
Packit Service f629e6
	refree(FS_re_no_case);
Packit Service f629e6
	FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
choose_fs_function:
Packit Service f629e6
	buf[0] = '\0';
Packit Service f629e6
	default_FS = false;
Packit Service f629e6
	fs = force_string(FS_node->var_value);
Packit Service f629e6
Packit Service f629e6
	if (! do_traditional && fs->stlen == 0) {
Packit Service f629e6
		static bool warned = false;
Packit Service f629e6
Packit Service f629e6
		set_parser(null_parse_field);
Packit Service f629e6
Packit Service f629e6
		if (do_lint && ! warned) {
Packit Service f629e6
			warned = true;
Packit Service f629e6
			lintwarn(_("null string for `FS' is a gawk extension"));
Packit Service f629e6
		}
Packit Service f629e6
	} else if (fs->stlen > 1) {
Packit Service f629e6
		if (do_lint_old)
Packit Service f629e6
			warning(_("old awk does not support regexps as value of `FS'"));
Packit Service f629e6
		set_parser(re_parse_field);
Packit Service f629e6
	} else if (RS_is_null) {
Packit Service f629e6
		/* we know that fs->stlen <= 1 */
Packit Service f629e6
		set_parser(sc_parse_field);
Packit Service f629e6
		if (fs->stlen == 1) {
Packit Service f629e6
			if (fs->stptr[0] == ' ') {
Packit Service f629e6
				default_FS = true;
Packit Service f629e6
				strcpy(buf, "[ \t\n]+");
Packit Service f629e6
			} else if (fs->stptr[0] == '\\') {
Packit Service f629e6
				/* yet another special case */
Packit Service f629e6
				strcpy(buf, "[\\\\\n]");
Packit Service f629e6
			} else if (fs->stptr[0] == '\0') {
Packit Service f629e6
				/* and yet another special case */
Packit Service f629e6
				strcpy(buf, "[\\000\n]");
Packit Service f629e6
			} else if (fs->stptr[0] != '\n') {
Packit Service f629e6
				sprintf(buf, "[%c\n]", fs->stptr[0]);
Packit Service f629e6
			}
Packit Service f629e6
		}
Packit Service f629e6
	} else {
Packit Service f629e6
		set_parser(def_parse_field);
Packit Service f629e6
Packit Service f629e6
		if (fs->stlen == 1) {
Packit Service f629e6
			if (fs->stptr[0] == ' ')
Packit Service f629e6
				default_FS = true;
Packit Service f629e6
			else if (fs->stptr[0] == '\\')
Packit Service f629e6
				/* same special case */
Packit Service f629e6
				strcpy(buf, "[\\\\]");
Packit Service f629e6
			else
Packit Service f629e6
				set_parser(sc_parse_field);
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
	if (remake_re) {
Packit Service f629e6
		refree(FS_re_yes_case);
Packit Service f629e6
		refree(FS_re_no_case);
Packit Service f629e6
		FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
Packit Service f629e6
Packit Service f629e6
		if (buf[0] != '\0') {
Packit Service f629e6
			FS_re_yes_case = make_regexp(buf, strlen(buf), false, true, true);
Packit Service f629e6
			FS_re_no_case = make_regexp(buf, strlen(buf), true, true, true);
Packit Service f629e6
			FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
Packit Service f629e6
			set_parser(re_parse_field);
Packit Service f629e6
		} else if (parse_field == re_parse_field) {
Packit Service f629e6
			FS_re_yes_case = make_regexp(fs->stptr, fs->stlen, false, true, true);
Packit Service f629e6
			FS_re_no_case = make_regexp(fs->stptr, fs->stlen, true, true, true);
Packit Service f629e6
			FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
Packit Service f629e6
		} else
Packit Service f629e6
			FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * For FS = "c", we don't use IGNORECASE. But we must use
Packit Service f629e6
	 * re_parse_field to get the character and the newline as
Packit Service f629e6
	 * field separators.
Packit Service f629e6
	 */
Packit Service f629e6
	if (fs->stlen == 1 && parse_field == re_parse_field)
Packit Service f629e6
		FS_regexp = FS_re_yes_case;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* current_field_sep --- return the field separator type */
Packit Service f629e6
Packit Service f629e6
field_sep_type
Packit Service f629e6
current_field_sep()
Packit Service f629e6
{
Packit Service f629e6
	if (api_parser_override)
Packit Service f629e6
		return Using_API;
Packit Service f629e6
	else if (parse_field == fw_parse_field)
Packit Service f629e6
		return Using_FIELDWIDTHS;
Packit Service f629e6
	else if (parse_field == fpat_parse_field)
Packit Service f629e6
		return Using_FPAT;
Packit Service f629e6
	else
Packit Service f629e6
		return Using_FS;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* current_field_sep_str --- return the field separator type as a string */
Packit Service f629e6
Packit Service f629e6
const char *
Packit Service f629e6
current_field_sep_str()
Packit Service f629e6
{
Packit Service f629e6
	if (api_parser_override)
Packit Service f629e6
		return "API";
Packit Service f629e6
	else if (parse_field == fw_parse_field)
Packit Service f629e6
		return "FIELDWIDTHS";
Packit Service f629e6
	else if (parse_field == fpat_parse_field)
Packit Service f629e6
		return "FPAT";
Packit Service f629e6
	else
Packit Service f629e6
		return "FS";
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* update_PROCINFO_str --- update PROCINFO[sub] with string value */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
update_PROCINFO_str(const char *subscript, const char *str)
Packit Service f629e6
{
Packit Service f629e6
	NODE **aptr;
Packit Service f629e6
	NODE *tmp;
Packit Service f629e6
Packit Service f629e6
	if (PROCINFO_node == NULL)
Packit Service f629e6
		return;
Packit Service f629e6
	tmp = make_string(subscript, strlen(subscript));
Packit Service f629e6
	aptr = assoc_lookup(PROCINFO_node, tmp);
Packit Service f629e6
	unref(tmp);
Packit Service f629e6
	unref(*aptr);
Packit Service f629e6
	*aptr = make_string(str, strlen(str));
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* update_PROCINFO_num --- update PROCINFO[sub] with numeric value */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
update_PROCINFO_num(const char *subscript, AWKNUM val)
Packit Service f629e6
{
Packit Service f629e6
	NODE **aptr;
Packit Service f629e6
	NODE *tmp;
Packit Service f629e6
Packit Service f629e6
	if (PROCINFO_node == NULL)
Packit Service f629e6
		return;
Packit Service f629e6
	tmp = make_string(subscript, strlen(subscript));
Packit Service f629e6
	aptr = assoc_lookup(PROCINFO_node, tmp);
Packit Service f629e6
	unref(tmp);
Packit Service f629e6
	unref(*aptr);
Packit Service f629e6
	*aptr = make_number(val);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* set_FPAT --- handle an assignment to FPAT */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
set_FPAT()
Packit Service f629e6
{
Packit Service f629e6
	static bool warned = false;
Packit Service f629e6
	static NODE *save_fpat = NULL;
Packit Service f629e6
	bool remake_re = true;
Packit Service f629e6
	NODE *fpat;
Packit Service f629e6
Packit Service f629e6
	if (do_lint && ! warned) {
Packit Service f629e6
		warned = true;
Packit Service f629e6
		lintwarn(_("`FPAT' is a gawk extension"));
Packit Service f629e6
	}
Packit Service f629e6
	if (do_traditional)	/* quick and dirty, does the trick */
Packit Service f629e6
		return;
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * If changing the way fields are split, obey least-suprise
Packit Service f629e6
	 * semantics, and force $0 to be split totally.
Packit Service f629e6
	 */
Packit Service f629e6
	if (fields_arr != NULL)
Packit Service f629e6
		(void) get_field(UNLIMITED - 1, 0);
Packit Service f629e6
Packit Service f629e6
	/* It's possible that only IGNORECASE changed, or FPAT = FPAT */
Packit Service f629e6
	/*
Packit Service f629e6
	 * This comparison can't use cmp_nodes(), which pays attention
Packit Service f629e6
	 * to IGNORECASE, and that's not what we want.
Packit Service f629e6
	 */
Packit Service f629e6
	if (save_fpat
Packit Service f629e6
		&& FPAT_node->var_value->stlen == save_fpat->stlen
Packit Service f629e6
		&& memcmp(FPAT_node->var_value->stptr, save_fpat->stptr, save_fpat->stlen) == 0) {
Packit Service f629e6
		if (FPAT_regexp != NULL)
Packit Service f629e6
			FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes_case);
Packit Service f629e6
Packit Service f629e6
		/* FPAT = FPAT */
Packit Service f629e6
		if (current_field_sep() == Using_FPAT) {
Packit Service f629e6
			return;
Packit Service f629e6
		} else {
Packit Service f629e6
			remake_re = false;
Packit Service f629e6
			goto set_fpat_function;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	unref(save_fpat);
Packit Service f629e6
	save_fpat = dupnode(FPAT_node->var_value);
Packit Service f629e6
	refree(FPAT_re_yes_case);
Packit Service f629e6
	refree(FPAT_re_no_case);
Packit Service f629e6
	FPAT_re_yes_case = FPAT_re_no_case = FPAT_regexp = NULL;
Packit Service f629e6
Packit Service f629e6
set_fpat_function:
Packit Service f629e6
	fpat = force_string(FPAT_node->var_value);
Packit Service f629e6
	set_parser(fpat_parse_field);
Packit Service f629e6
Packit Service f629e6
	if (remake_re) {
Packit Service f629e6
		refree(FPAT_re_yes_case);
Packit Service f629e6
		refree(FPAT_re_no_case);
Packit Service f629e6
		FPAT_re_yes_case = FPAT_re_no_case = FPAT_regexp = NULL;
Packit Service f629e6
Packit Service f629e6
		FPAT_re_yes_case = make_regexp(fpat->stptr, fpat->stlen, false, true, true);
Packit Service f629e6
		FPAT_re_no_case = make_regexp(fpat->stptr, fpat->stlen, true, true, true);
Packit Service f629e6
		FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes_case);
Packit Service f629e6
	}
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * increment_scan --- macro to move scan pointer ahead by one character.
Packit Service f629e6
 * 			Implementation varies if doing MBS or not.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
#define increment_scan(scanp, len) incr_scan(scanp, len, & mbs)
Packit Service f629e6
Packit Service f629e6
/* incr_scan --- MBS version of increment_scan() */
Packit Service f629e6
Packit Service f629e6
static void
Packit Service f629e6
incr_scan(char **scanp, size_t len, mbstate_t *mbs)
Packit Service f629e6
{
Packit Service f629e6
	size_t mbclen = 0;
Packit Service f629e6
Packit Service f629e6
	if (gawk_mb_cur_max > 1) {
Packit Service f629e6
		mbclen = mbrlen(*scanp, len, mbs);
Packit Service f629e6
		if (   (mbclen == 1)
Packit Service f629e6
		    || (mbclen == (size_t) -1)
Packit Service f629e6
		    || (mbclen == (size_t) -2)
Packit Service f629e6
		    || (mbclen == 0)) {
Packit Service f629e6
			/* We treat it as a singlebyte character.  */
Packit Service f629e6
			mbclen = 1;
Packit Service f629e6
		}
Packit Service f629e6
		*scanp += mbclen;
Packit Service f629e6
	} else
Packit Service f629e6
		(*scanp)++;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * fpat_parse_field --- parse fields using a regexp.
Packit Service f629e6
 *
Packit Service f629e6
 * This is called both from get_field() and from do_patsplit()
Packit Service f629e6
 * via (*parse_field)().  This variation is for when FPAT is a regular
Packit Service f629e6
 * expression -- use the value to find field contents.
Packit Service f629e6
 *
Packit Service f629e6
 * The FPAT parsing logic is a bit difficult to specify. In particular
Packit Service f629e6
 * to allow null fields at certain locations. To make the code as robust
Packit Service f629e6
 * as possible, an awk reference implementation was written and tested
Packit Service f629e6
 * as a first step, and later recoded in C, preserving its structure as
Packit Service f629e6
 * much as possible.
Packit Service f629e6
 *
Packit Service f629e6
 * # Reference implementation of the FPAT record parsing.
Packit Service f629e6
 * #
Packit Service f629e6
 * # Each loop iteration identifies a (separator[n-1],field[n]) pair.
Packit Service f629e6
 * # Each loop iteration must consume some characters, except for the first field.
Packit Service f629e6
 * # So a null field is only valid as a first field or after a non-null separator.
Packit Service f629e6
 * # A null record has no fields (not a single null field).
Packit Service f629e6
 * 
Packit Service f629e6
 * function refpatsplit(string, fields, pattern, seps,
Packit Service f629e6
 *         parse_start, sep_start, field_start, field_length, field_found, nf) # locals
Packit Service f629e6
 * {
Packit Service f629e6
 *     # Local state variables:
Packit Service f629e6
 *     # - parse_start: pointer to the first not yet consumed character
Packit Service f629e6
 *     # - sep_start: pointer to the beginning of the parsed separator
Packit Service f629e6
 *     # - field start: pointer to the beginning of the parsed field
Packit Service f629e6
 *     # - field length: length of the parsed field
Packit Service f629e6
 *     # - field_found: flag for succesful field match
Packit Service f629e6
 *     # - nf: Number of fields found so far
Packit Service f629e6
 *     
Packit Service f629e6
 *     # Prepare for parsing
Packit Service f629e6
 *     parse_start = 1   # first not yet parsed char
Packit Service f629e6
 *     nf = 0            # fields found so far
Packit Service f629e6
 *     delete fields
Packit Service f629e6
 *     delete seps
Packit Service f629e6
 * 
Packit Service f629e6
 *     # Loop that consumes the whole record
Packit Service f629e6
 *     while (parse_start <= length(string)) {  # still something to parse
Packit Service f629e6
 *     
Packit Service f629e6
 *         # first attempt to match the next field
Packit Service f629e6
 *         sep_start = parse_start
Packit Service f629e6
 *         field_found = match(substr(string, parse_start), pattern)
Packit Service f629e6
 *         
Packit Service f629e6
 *         # check for an invalid null field and retry one character away
Packit Service f629e6
 *         if (nf > 0 && field_found && RSTART==1 && RLENGTH==0) {
Packit Service f629e6
 *             parse_start++
Packit Service f629e6
 *             field_found = match(substr(string, parse_start), pattern)
Packit Service f629e6
 *         }
Packit Service f629e6
 *         
Packit Service f629e6
 *         # store the (sep[n-1],field[n]) pair
Packit Service f629e6
 *         if (field_found) {
Packit Service f629e6
 *             field_start = parse_start + RSTART - 1
Packit Service f629e6
 *             field_length = RLENGTH
Packit Service f629e6
 *             seps[nf] = substr(string, sep_start, field_start-sep_start)
Packit Service f629e6
 *             fields[++nf] = substr(string, field_start, field_length)
Packit Service f629e6
 *             parse_start = field_start + field_length
Packit Service f629e6
 *             
Packit Service f629e6
 *         # store the final extra sep after the last field
Packit Service f629e6
 *         } else {
Packit Service f629e6
 *             seps[nf] = substr(string, sep_start)
Packit Service f629e6
 *             parse_start = length(string) + 1
Packit Service f629e6
 *         }
Packit Service f629e6
 *     }
Packit Service f629e6
 *     
Packit Service f629e6
 *     return nf
Packit Service f629e6
 * }
Packit Service f629e6
 */
Packit Service f629e6
static long
Packit Service f629e6
fpat_parse_field(long up_to,	/* parse only up to this field number */
Packit Service f629e6
	char **buf,	/* on input: string to parse; on output: point to start next */
Packit Service f629e6
	int len,
Packit Service f629e6
	NODE *fs ATTRIBUTE_UNUSED,
Packit Service f629e6
	Regexp *rp,
Packit Service f629e6
	Setfunc set,	/* routine to set the value of the parsed field */
Packit Service f629e6
	NODE *n,
Packit Service f629e6
	NODE *sep_arr,  /* array of field separators (may be NULL) */
Packit Service f629e6
	bool in_middle)
Packit Service f629e6
{
Packit Service f629e6
	char *scan = *buf;
Packit Service f629e6
	long nf = parse_high_water;
Packit Service f629e6
	char *start;
Packit Service f629e6
	char *end = scan + len;
Packit Service f629e6
	int regex_flags = RE_NEED_START;
Packit Service f629e6
	mbstate_t mbs;
Packit Service f629e6
	char* field_start;
Packit Service f629e6
	bool field_found = false;
Packit Service f629e6
Packit Service f629e6
	memset(&mbs, 0, sizeof(mbstate_t));
Packit Service f629e6
Packit Service f629e6
	if (up_to == UNLIMITED)
Packit Service f629e6
		nf = 0;
Packit Service f629e6
Packit Service f629e6
	if (len == 0)
Packit Service f629e6
		return nf;
Packit Service f629e6
Packit Service f629e6
	if (rp == NULL) /* use FPAT */
Packit Service f629e6
		rp = FPAT_regexp;
Packit Service f629e6
Packit Service f629e6
	while (scan < end && nf < up_to) {  /* still something to parse */
Packit Service f629e6
Packit Service f629e6
		/* first attempt to match the next field */
Packit Service f629e6
		start = scan;
Packit Service f629e6
		field_found = research(rp, scan, 0, (end - scan), regex_flags) != -1;
Packit Service f629e6
Packit Service f629e6
		/* check for an invalid null field and retry one character away */ 
Packit Service f629e6
		if (nf > 0 && field_found && REEND(rp, scan) == 0) { /* invalid null field */
Packit Service f629e6
			increment_scan(& scan, end - scan);
Packit Service f629e6
			field_found = research(rp, scan, 0, (end - scan), regex_flags) != -1;
Packit Service f629e6
		}
Packit Service f629e6
Packit Service f629e6
		/* store the (sep[n-1],field[n]) pair */
Packit Service f629e6
		if (field_found) {
Packit Service f629e6
			field_start = scan + RESTART(rp, scan);
Packit Service f629e6
			if (sep_arr != NULL) { /* store the separator */
Packit Service f629e6
				if (field_start == start) /* match at front */
Packit Service f629e6
					set_element(nf, start, 0L, sep_arr);
Packit Service f629e6
				else
Packit Service f629e6
					set_element(nf,
Packit Service f629e6
						start,
Packit Service f629e6
						(long) (field_start - start),
Packit Service f629e6
						sep_arr);
Packit Service f629e6
			}
Packit Service f629e6
			/* field is text that matched */
Packit Service f629e6
			(*set)(++nf,
Packit Service f629e6
				field_start,
Packit Service f629e6
				(long)(REEND(rp, scan) - RESTART(rp, scan)),
Packit Service f629e6
				n);
Packit Service f629e6
			scan += REEND(rp, scan);
Packit Service f629e6
Packit Service f629e6
		} else {
Packit Service f629e6
			/*
Packit Service f629e6
			 * No match, store the final extra separator after
Packit Service f629e6
			 * the last field.
Packit Service f629e6
			 */
Packit Service f629e6
			if (sep_arr != NULL)
Packit Service f629e6
				set_element(nf, start, (long) (end - start), sep_arr);
Packit Service f629e6
			scan = end;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * If the last field extends up to the end of the record, generate
Packit Service f629e6
	 * a null trailing separator
Packit Service f629e6
	 */
Packit Service f629e6
	if (sep_arr != NULL && scan == end && field_found) 
Packit Service f629e6
		set_element(nf, scan, 0L, sep_arr);
Packit Service f629e6
Packit Service f629e6
	*buf = scan;
Packit Service f629e6
	return nf;
Packit Service f629e6
}