|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* field.c - routines for dealing with fields and record parsing
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Copyright (C) 1986, 1988, 1989, 1991-2018 the Free Software Foundation, Inc.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* This file is part of GAWK, the GNU implementation of the
|
|
Packit |
575503 |
* AWK Programming Language.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* GAWK is free software; you can redistribute it and/or modify
|
|
Packit |
575503 |
* it under the terms of the GNU General Public License as published by
|
|
Packit |
575503 |
* the Free Software Foundation; either version 3 of the License, or
|
|
Packit |
575503 |
* (at your option) any later version.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* GAWK is distributed in the hope that it will be useful,
|
|
Packit |
575503 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
575503 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
Packit |
575503 |
* GNU General Public License for more details.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* You should have received a copy of the GNU General Public License
|
|
Packit |
575503 |
* along with this program; if not, write to the Free Software
|
|
Packit |
575503 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
|
|
Packit |
575503 |
#include "awk.h"
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* In case that the system doesn't have isblank().
|
|
Packit |
575503 |
* Don't bother with autoconf ifdef junk, just force it.
|
|
Packit |
575503 |
* See dfa.c and regex_internal.h and regcomp.c. Bleah.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
static int
|
|
Packit |
575503 |
is_blank(int c)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
return c == ' ' || c == '\t';
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
typedef void (* Setfunc)(long, char *, long, NODE *);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* is the API currently overriding the default parsing mechanism? */
|
|
Packit |
575503 |
static bool api_parser_override = false;
|
|
Packit |
575503 |
typedef long (*parse_field_func_t)(long, char **, int, NODE *,
|
|
Packit |
575503 |
Regexp *, Setfunc, NODE *, NODE *, bool);
|
|
Packit |
575503 |
static parse_field_func_t parse_field;
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* N.B. The normal_parse_field function pointer contains the parse_field value
|
|
Packit |
575503 |
* that should be used except when API field parsing is overriding the default
|
|
Packit |
575503 |
* field parsing mechanism.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
static parse_field_func_t normal_parse_field;
|
|
Packit |
575503 |
static long re_parse_field(long, char **, int, NODE *,
|
|
Packit |
575503 |
Regexp *, Setfunc, NODE *, NODE *, bool);
|
|
Packit |
575503 |
static long def_parse_field(long, char **, int, NODE *,
|
|
Packit |
575503 |
Regexp *, Setfunc, NODE *, NODE *, bool);
|
|
Packit |
575503 |
static long null_parse_field(long, char **, int, NODE *,
|
|
Packit |
575503 |
Regexp *, Setfunc, NODE *, NODE *, bool);
|
|
Packit |
575503 |
static long sc_parse_field(long, char **, int, NODE *,
|
|
Packit |
575503 |
Regexp *, Setfunc, NODE *, NODE *, bool);
|
|
Packit |
575503 |
static long fw_parse_field(long, char **, int, NODE *,
|
|
Packit |
575503 |
Regexp *, Setfunc, NODE *, NODE *, bool);
|
|
Packit |
575503 |
static const awk_fieldwidth_info_t *api_fw = NULL;
|
|
Packit |
575503 |
static long fpat_parse_field(long, char **, int, NODE *,
|
|
Packit |
575503 |
Regexp *, Setfunc, NODE *, NODE *, bool);
|
|
Packit |
575503 |
static void set_element(long num, char * str, long len, NODE *arr);
|
|
Packit |
575503 |
static void grow_fields_arr(long num);
|
|
Packit |
575503 |
static void set_field(long num, char *str, long len, NODE *dummy);
|
|
Packit |
575503 |
static void purge_record(void);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
static char *parse_extent; /* marks where to restart parse of record */
|
|
Packit |
575503 |
static long parse_high_water = 0; /* field number that we have parsed so far */
|
|
Packit |
575503 |
static long nf_high_water = 0; /* size of fields_arr */
|
|
Packit |
575503 |
static bool resave_fs;
|
|
Packit |
575503 |
static NODE *save_FS; /* save current value of FS when line is read,
|
|
Packit |
575503 |
* to be used in deferred parsing
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
static awk_fieldwidth_info_t *FIELDWIDTHS = NULL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
NODE **fields_arr; /* array of pointers to the field nodes */
|
|
Packit |
575503 |
bool field0_valid; /* $(>0) has not been changed yet */
|
|
Packit |
575503 |
int default_FS; /* true when FS == " " */
|
|
Packit |
575503 |
Regexp *FS_re_yes_case = NULL;
|
|
Packit |
575503 |
Regexp *FS_re_no_case = NULL;
|
|
Packit |
575503 |
Regexp *FS_regexp = NULL;
|
|
Packit |
575503 |
Regexp *FPAT_re_yes_case = NULL;
|
|
Packit |
575503 |
Regexp *FPAT_re_no_case = NULL;
|
|
Packit |
575503 |
Regexp *FPAT_regexp = NULL;
|
|
Packit |
575503 |
NODE *Null_field = NULL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* init_fields --- set up the fields array to start with */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
init_fields()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
|
|
Packit |
575503 |
|
|
Packit |
575503 |
getnode(fields_arr[0]);
|
|
Packit |
575503 |
*fields_arr[0] = *Nnull_string;
|
|
Packit |
575503 |
fields_arr[0]->flags |= NULL_FIELD;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
parse_extent = fields_arr[0]->stptr;
|
|
Packit |
575503 |
save_FS = dupnode(FS_node->var_value);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
getnode(Null_field);
|
|
Packit |
575503 |
*Null_field = *Nnull_string;
|
|
Packit |
575503 |
Null_field->valref = 1;
|
|
Packit |
575503 |
Null_field->flags = (STRCUR|STRING|NULL_FIELD); /* do not set MALLOC */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
field0_valid = true;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* grow_fields --- acquire new fields as needed */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
static void
|
|
Packit |
575503 |
grow_fields_arr(long num)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
int t;
|
|
Packit |
575503 |
NODE *n;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr");
|
|
Packit |
575503 |
for (t = nf_high_water + 1; t <= num; t++) {
|
|
Packit |
575503 |
getnode(n);
|
|
Packit |
575503 |
*n = *Null_field;
|
|
Packit |
575503 |
fields_arr[t] = n;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
nf_high_water = num;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* set_field --- set the value of a particular field */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*ARGSUSED*/
|
|
Packit |
575503 |
static void
|
|
Packit |
575503 |
set_field(long num,
|
|
Packit |
575503 |
char *str,
|
|
Packit |
575503 |
long len,
|
|
Packit |
575503 |
NODE *dummy ATTRIBUTE_UNUSED) /* just to make interface same as set_element */
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
NODE *n;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (num > nf_high_water)
|
|
Packit |
575503 |
grow_fields_arr(num);
|
|
Packit |
575503 |
n = fields_arr[num];
|
|
Packit |
575503 |
n->stptr = str;
|
|
Packit |
575503 |
n->stlen = len;
|
|
Packit |
575503 |
n->flags = (STRCUR|STRING|USER_INPUT); /* do not set MALLOC */
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* rebuild_record --- Someone assigned a value to $(something).
|
|
Packit |
575503 |
Fix up $0 to be right */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
rebuild_record()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* use explicit unsigned longs for lengths, in case
|
|
Packit |
575503 |
* a size_t isn't big enough.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
unsigned long tlen;
|
|
Packit |
575503 |
NODE *tmp;
|
|
Packit |
575503 |
char *ops;
|
|
Packit |
575503 |
char *cops;
|
|
Packit |
575503 |
long i;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
assert(NF != -1);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
tlen = 0;
|
|
Packit |
575503 |
for (i = NF; i > 0; i--) {
|
|
Packit |
575503 |
tmp = fields_arr[i];
|
|
Packit |
575503 |
tmp = force_string(tmp);
|
|
Packit |
575503 |
tlen += tmp->stlen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
tlen += (NF - 1) * OFSlen;
|
|
Packit |
575503 |
if ((long) tlen < 0)
|
|
Packit |
575503 |
tlen = 0;
|
|
Packit |
575503 |
emalloc(ops, char *, tlen + 1, "rebuild_record");
|
|
Packit |
575503 |
cops = ops;
|
|
Packit |
575503 |
ops[0] = '\0';
|
|
Packit |
575503 |
for (i = 1; i <= NF; i++) {
|
|
Packit |
575503 |
free_wstr(fields_arr[i]);
|
|
Packit |
575503 |
tmp = fields_arr[i];
|
|
Packit |
575503 |
/* copy field */
|
|
Packit |
575503 |
if (tmp->stlen == 1)
|
|
Packit |
575503 |
*cops++ = tmp->stptr[0];
|
|
Packit |
575503 |
else if (tmp->stlen != 0) {
|
|
Packit |
575503 |
memcpy(cops, tmp->stptr, tmp->stlen);
|
|
Packit |
575503 |
cops += tmp->stlen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
/* copy OFS */
|
|
Packit |
575503 |
if (i != NF) {
|
|
Packit |
575503 |
if (OFSlen == 1)
|
|
Packit |
575503 |
*cops++ = *OFS;
|
|
Packit |
575503 |
else if (OFSlen != 0) {
|
|
Packit |
575503 |
memcpy(cops, OFS, OFSlen);
|
|
Packit |
575503 |
cops += OFSlen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Since we are about to unref fields_arr[0], we want to find
|
|
Packit |
575503 |
* any fields that still point into it, and have them point
|
|
Packit |
575503 |
* into the new field zero. This has to be done intelligently,
|
|
Packit |
575503 |
* so that unrefing a field doesn't try to unref into the old $0.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
for (cops = ops, i = 1; i <= NF; i++) {
|
|
Packit |
575503 |
NODE *r = fields_arr[i];
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* There is no reason to copy malloc'ed fields to point into
|
|
Packit |
575503 |
* the new $0 buffer, although that's how previous versions did
|
|
Packit |
575503 |
* it. It seems faster to leave the malloc'ed fields in place.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (r->stlen > 0 && (r->flags & MALLOC) == 0) {
|
|
Packit |
575503 |
NODE *n;
|
|
Packit |
575503 |
getnode(n);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
*n = *r;
|
|
Packit |
575503 |
if (r->valref > 1) {
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* This can and does happen. It seems clear that
|
|
Packit |
575503 |
* we can't leave r's stptr pointing into the
|
|
Packit |
575503 |
* old $0 buffer that we are about to unref.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
emalloc(r->stptr, char *, r->stlen + 1, "rebuild_record");
|
|
Packit |
575503 |
memcpy(r->stptr, cops, r->stlen);
|
|
Packit |
575503 |
r->stptr[r->stlen] = '\0';
|
|
Packit |
575503 |
r->flags |= MALLOC;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
n->valref = 1; // reset in the new field to start it off correctly!
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
n->stptr = cops;
|
|
Packit |
575503 |
unref(r);
|
|
Packit |
575503 |
fields_arr[i] = n;
|
|
Packit |
575503 |
assert((n->flags & WSTRCUR) == 0);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
cops += fields_arr[i]->stlen + OFSlen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
assert((fields_arr[0]->flags & MALLOC) == 0
|
|
Packit |
575503 |
? fields_arr[0]->valref == 1
|
|
Packit |
575503 |
: true);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
unref(fields_arr[0]);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
fields_arr[0] = tmp;
|
|
Packit |
575503 |
field0_valid = true;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* set_record:
|
|
Packit |
575503 |
* setup $0, but defer parsing rest of line until reference is made to $(>0)
|
|
Packit |
575503 |
* or to NF. At that point, parse only as much as necessary.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* Manage a private buffer for the contents of $0. Doing so keeps us safe
|
|
Packit |
575503 |
* if `getline var' decides to rearrange the contents of the IOBUF that
|
|
Packit |
575503 |
* $0 might have been pointing into. The cost is the copying of the buffer;
|
|
Packit |
575503 |
* but better correct than fast.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
set_record(const char *buf, int cnt, const awk_fieldwidth_info_t *fw)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
NODE *n;
|
|
Packit |
575503 |
static char *databuf;
|
|
Packit |
575503 |
static unsigned long databuf_size;
|
|
Packit |
575503 |
#define INITIAL_SIZE 512
|
|
Packit |
575503 |
#define MAX_SIZE ((unsigned long) ~0) /* maximally portable ... */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
purge_record();
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* buffer management: */
|
|
Packit |
575503 |
if (databuf_size == 0) { /* first time */
|
|
Packit |
575503 |
ezalloc(databuf, char *, INITIAL_SIZE, "set_record");
|
|
Packit |
575503 |
databuf_size = INITIAL_SIZE;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Make sure there's enough room. Since we sometimes need
|
|
Packit |
575503 |
* to place a sentinel at the end, we make sure
|
|
Packit |
575503 |
* databuf_size is > cnt after allocation.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (cnt >= databuf_size) {
|
|
Packit |
575503 |
do {
|
|
Packit |
575503 |
if (databuf_size > MAX_SIZE/2)
|
|
Packit |
575503 |
fatal(_("input record too large"));
|
|
Packit |
575503 |
databuf_size *= 2;
|
|
Packit |
575503 |
} while (cnt >= databuf_size);
|
|
Packit |
575503 |
erealloc(databuf, char *, databuf_size, "set_record");
|
|
Packit |
575503 |
memset(databuf, '\0', databuf_size);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
/* copy the data */
|
|
Packit |
575503 |
memcpy(databuf, buf, cnt);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Add terminating '\0' so that C library routines
|
|
Packit |
575503 |
* will know when to stop.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
databuf[cnt] = '\0';
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* manage field 0: */
|
|
Packit |
575503 |
assert((fields_arr[0]->flags & MALLOC) == 0
|
|
Packit |
575503 |
? fields_arr[0]->valref == 1
|
|
Packit |
575503 |
: true);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
unref(fields_arr[0]);
|
|
Packit |
575503 |
getnode(n);
|
|
Packit |
575503 |
n->stptr = databuf;
|
|
Packit |
575503 |
n->stlen = cnt;
|
|
Packit |
575503 |
n->valref = 1;
|
|
Packit |
575503 |
n->type = Node_val;
|
|
Packit |
575503 |
n->stfmt = STFMT_UNUSED;
|
|
Packit |
575503 |
#ifdef HAVE_MPFR
|
|
Packit |
575503 |
n->strndmode = MPFR_round_mode;
|
|
Packit |
575503 |
#endif
|
|
Packit |
575503 |
n->flags = (STRING|STRCUR|USER_INPUT); /* do not set MALLOC */
|
|
Packit |
575503 |
fields_arr[0] = n;
|
|
Packit |
575503 |
if (fw != api_fw) {
|
|
Packit |
575503 |
if ((api_fw = fw) != NULL) {
|
|
Packit |
575503 |
if (! api_parser_override) {
|
|
Packit |
575503 |
api_parser_override = true;
|
|
Packit |
575503 |
parse_field = fw_parse_field;
|
|
Packit |
575503 |
update_PROCINFO_str("FS", "API");
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
} else if (api_parser_override) {
|
|
Packit |
575503 |
api_parser_override = false;
|
|
Packit |
575503 |
parse_field = normal_parse_field;
|
|
Packit |
575503 |
update_PROCINFO_str("FS", current_field_sep_str());
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
#undef INITIAL_SIZE
|
|
Packit |
575503 |
#undef MAX_SIZE
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* reset_record --- start over again with current $0 */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
reset_record()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
fields_arr[0] = force_string(fields_arr[0]);
|
|
Packit |
575503 |
purge_record();
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
static void
|
|
Packit |
575503 |
purge_record()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
int i;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
NF = -1;
|
|
Packit |
575503 |
for (i = 1; i <= parse_high_water; i++) {
|
|
Packit |
575503 |
NODE *n;
|
|
Packit |
575503 |
NODE *r = fields_arr[i];
|
|
Packit |
575503 |
if ((r->flags & MALLOC) == 0 && r->valref > 1) {
|
|
Packit |
575503 |
/* This can and does happen. We must copy the string! */
|
|
Packit |
575503 |
const char *save = r->stptr;
|
|
Packit |
575503 |
emalloc(r->stptr, char *, r->stlen + 1, "purge_record");
|
|
Packit |
575503 |
memcpy(r->stptr, save, r->stlen);
|
|
Packit |
575503 |
r->stptr[r->stlen] = '\0';
|
|
Packit |
575503 |
r->flags |= MALLOC;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
unref(r);
|
|
Packit |
575503 |
getnode(n);
|
|
Packit |
575503 |
*n = *Null_field;
|
|
Packit |
575503 |
fields_arr[i] = n;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
parse_high_water = 0;
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* $0 = $0 should resplit using the current value of FS.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (resave_fs) {
|
|
Packit |
575503 |
resave_fs = false;
|
|
Packit |
575503 |
unref(save_FS);
|
|
Packit |
575503 |
save_FS = dupnode(FS_node->var_value);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
field0_valid = true;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* set_NF --- handle what happens to $0 and fields when NF is changed */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
set_NF()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
int i;
|
|
Packit |
575503 |
long nf;
|
|
Packit |
575503 |
NODE *n;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
assert(NF != -1);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
(void) force_number(NF_node->var_value);
|
|
Packit |
575503 |
nf = get_number_si(NF_node->var_value);
|
|
Packit |
575503 |
if (nf < 0)
|
|
Packit |
575503 |
fatal(_("NF set to negative value"));
|
|
Packit |
575503 |
NF = nf;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (NF > nf_high_water)
|
|
Packit |
575503 |
grow_fields_arr(NF);
|
|
Packit |
575503 |
if (parse_high_water < NF) {
|
|
Packit |
575503 |
for (i = parse_high_water + 1; i >= 0 && i <= NF; i++) {
|
|
Packit |
575503 |
unref(fields_arr[i]);
|
|
Packit |
575503 |
getnode(n);
|
|
Packit |
575503 |
*n = *Null_field;
|
|
Packit |
575503 |
fields_arr[i] = n;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
parse_high_water = NF;
|
|
Packit |
575503 |
} else if (parse_high_water > 0) {
|
|
Packit |
575503 |
for (i = NF + 1; i >= 0 && i <= parse_high_water; i++) {
|
|
Packit |
575503 |
unref(fields_arr[i]);
|
|
Packit |
575503 |
getnode(n);
|
|
Packit |
575503 |
*n = *Null_field;
|
|
Packit |
575503 |
fields_arr[i] = n;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
parse_high_water = NF;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
field0_valid = false;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* re_parse_field --- parse fields using a regexp.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* This is called both from get_field() and from do_split()
|
|
Packit |
575503 |
* via (*parse_field)(). This variation is for when FS is a regular
|
|
Packit |
575503 |
* expression -- either user-defined or because RS=="" and FS==" "
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
static long
|
|
Packit |
575503 |
re_parse_field(long up_to, /* parse only up to this field number */
|
|
Packit |
575503 |
char **buf, /* on input: string to parse; on output: point to start next */
|
|
Packit |
575503 |
int len,
|
|
Packit |
575503 |
NODE *fs ATTRIBUTE_UNUSED,
|
|
Packit |
575503 |
Regexp *rp,
|
|
Packit |
575503 |
Setfunc set, /* routine to set the value of the parsed field */
|
|
Packit |
575503 |
NODE *n,
|
|
Packit |
575503 |
NODE *sep_arr, /* array of field separators (maybe NULL) */
|
|
Packit |
575503 |
bool in_middle)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
char *scan = *buf;
|
|
Packit |
575503 |
long nf = parse_high_water;
|
|
Packit |
575503 |
char *field;
|
|
Packit |
575503 |
char *end = scan + len;
|
|
Packit |
575503 |
int regex_flags = RE_NEED_START;
|
|
Packit |
575503 |
char *sep;
|
|
Packit |
575503 |
size_t mbclen = 0;
|
|
Packit |
575503 |
mbstate_t mbs;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
memset(&mbs, 0, sizeof(mbstate_t));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (in_middle)
|
|
Packit |
575503 |
regex_flags |= RE_NO_BOL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (up_to == UNLIMITED)
|
|
Packit |
575503 |
nf = 0;
|
|
Packit |
575503 |
if (len == 0)
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (RS_is_null && default_FS) {
|
|
Packit |
575503 |
sep = scan;
|
|
Packit |
575503 |
while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
|
|
Packit |
575503 |
scan++;
|
|
Packit |
575503 |
if (sep_arr != NULL && sep < scan)
|
|
Packit |
575503 |
set_element(nf, sep, (long)(scan - sep), sep_arr);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (rp == NULL) /* use FS */
|
|
Packit |
575503 |
rp = FS_regexp;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
field = scan;
|
|
Packit |
575503 |
while (scan < end
|
|
Packit |
575503 |
&& research(rp, scan, 0, (end - scan), regex_flags) != -1
|
|
Packit |
575503 |
&& nf < up_to) {
|
|
Packit |
575503 |
regex_flags |= RE_NO_BOL;
|
|
Packit |
575503 |
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
|
|
Packit |
575503 |
if (gawk_mb_cur_max > 1) {
|
|
Packit |
575503 |
mbclen = mbrlen(scan, end-scan, &mbs);
|
|
Packit |
575503 |
if ((mbclen == 1) || (mbclen == (size_t) -1)
|
|
Packit |
575503 |
|| (mbclen == (size_t) -2) || (mbclen == 0)) {
|
|
Packit |
575503 |
/* We treat it as a singlebyte character. */
|
|
Packit |
575503 |
mbclen = 1;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
scan += mbclen;
|
|
Packit |
575503 |
} else
|
|
Packit |
575503 |
scan++;
|
|
Packit |
575503 |
if (scan == end) {
|
|
Packit |
575503 |
(*set)(++nf, field, (long)(scan - field), n);
|
|
Packit |
575503 |
up_to = nf;
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
continue;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
(*set)(++nf, field,
|
|
Packit |
575503 |
(long)(scan + RESTART(rp, scan) - field), n);
|
|
Packit |
575503 |
if (sep_arr != NULL)
|
|
Packit |
575503 |
set_element(nf, scan + RESTART(rp, scan),
|
|
Packit |
575503 |
(long) (REEND(rp, scan) - RESTART(rp, scan)), sep_arr);
|
|
Packit |
575503 |
scan += REEND(rp, scan);
|
|
Packit |
575503 |
field = scan;
|
|
Packit |
575503 |
if (scan == end) /* FS at end of record */
|
|
Packit |
575503 |
(*set)(++nf, field, 0L, n);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (nf != up_to && scan < end) {
|
|
Packit |
575503 |
(*set)(++nf, scan, (long)(end - scan), n);
|
|
Packit |
575503 |
scan = end;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
*buf = scan;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* def_parse_field --- default field parsing.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* This is called both from get_field() and from do_split()
|
|
Packit |
575503 |
* via (*parse_field)(). This variation is for when FS is a single space
|
|
Packit |
575503 |
* character.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
|
|
Packit |
575503 |
static long
|
|
Packit |
575503 |
def_parse_field(long up_to, /* parse only up to this field number */
|
|
Packit |
575503 |
char **buf, /* on input: string to parse; on output: point to start next */
|
|
Packit |
575503 |
int len,
|
|
Packit |
575503 |
NODE *fs,
|
|
Packit |
575503 |
Regexp *rp ATTRIBUTE_UNUSED,
|
|
Packit |
575503 |
Setfunc set, /* routine to set the value of the parsed field */
|
|
Packit |
575503 |
NODE *n,
|
|
Packit |
575503 |
NODE *sep_arr, /* array of field separators (maybe NULL) */
|
|
Packit |
575503 |
bool in_middle ATTRIBUTE_UNUSED)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
char *scan = *buf;
|
|
Packit |
575503 |
long nf = parse_high_water;
|
|
Packit |
575503 |
char *field;
|
|
Packit |
575503 |
char *end = scan + len;
|
|
Packit |
575503 |
char sav;
|
|
Packit |
575503 |
char *sep;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (up_to == UNLIMITED)
|
|
Packit |
575503 |
nf = 0;
|
|
Packit |
575503 |
if (len == 0)
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Nasty special case. If FS set to "", return whole record
|
|
Packit |
575503 |
* as first field. This is not worth a separate function.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (fs->stlen == 0) {
|
|
Packit |
575503 |
(*set)(++nf, *buf, len, n);
|
|
Packit |
575503 |
*buf += len;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* before doing anything save the char at *end */
|
|
Packit |
575503 |
sav = *end;
|
|
Packit |
575503 |
/* because it will be destroyed now: */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
*end = ' '; /* sentinel character */
|
|
Packit |
575503 |
sep = scan;
|
|
Packit |
575503 |
for (; nf < up_to; scan++) {
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* special case: fs is single space, strip leading whitespace
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
|
|
Packit |
575503 |
scan++;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (sep_arr != NULL && scan > sep)
|
|
Packit |
575503 |
set_element(nf, sep, (long) (scan - sep), sep_arr);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (scan >= end)
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
field = scan;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
while (*scan != ' ' && *scan != '\t' && *scan != '\n')
|
|
Packit |
575503 |
scan++;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
(*set)(++nf, field, (long)(scan - field), n);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (scan == end)
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
sep = scan;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* everything done, restore original char at *end */
|
|
Packit |
575503 |
*end = sav;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
*buf = scan;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* null_parse_field --- each character is a separate field
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* This is called both from get_field() and from do_split()
|
|
Packit |
575503 |
* via (*parse_field)(). This variation is for when FS is the null string.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
static long
|
|
Packit |
575503 |
null_parse_field(long up_to, /* parse only up to this field number */
|
|
Packit |
575503 |
char **buf, /* on input: string to parse; on output: point to start next */
|
|
Packit |
575503 |
int len,
|
|
Packit |
575503 |
NODE *fs ATTRIBUTE_UNUSED,
|
|
Packit |
575503 |
Regexp *rp ATTRIBUTE_UNUSED,
|
|
Packit |
575503 |
Setfunc set, /* routine to set the value of the parsed field */
|
|
Packit |
575503 |
NODE *n,
|
|
Packit |
575503 |
NODE *sep_arr, /* array of field separators (maybe NULL) */
|
|
Packit |
575503 |
bool in_middle ATTRIBUTE_UNUSED)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
char *scan = *buf;
|
|
Packit |
575503 |
long nf = parse_high_water;
|
|
Packit |
575503 |
char *end = scan + len;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (up_to == UNLIMITED)
|
|
Packit |
575503 |
nf = 0;
|
|
Packit |
575503 |
if (len == 0)
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (gawk_mb_cur_max > 1) {
|
|
Packit |
575503 |
mbstate_t mbs;
|
|
Packit |
575503 |
memset(&mbs, 0, sizeof(mbstate_t));
|
|
Packit |
575503 |
for (; nf < up_to && scan < end;) {
|
|
Packit |
575503 |
size_t mbclen = mbrlen(scan, end-scan, &mbs);
|
|
Packit |
575503 |
if ((mbclen == 1) || (mbclen == (size_t) -1)
|
|
Packit |
575503 |
|| (mbclen == (size_t) -2) || (mbclen == 0)) {
|
|
Packit |
575503 |
/* We treat it as a singlebyte character. */
|
|
Packit |
575503 |
mbclen = 1;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (sep_arr != NULL && nf > 0)
|
|
Packit |
575503 |
set_element(nf, scan, 0L, sep_arr);
|
|
Packit |
575503 |
(*set)(++nf, scan, mbclen, n);
|
|
Packit |
575503 |
scan += mbclen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
for (; nf < up_to && scan < end; scan++) {
|
|
Packit |
575503 |
if (sep_arr != NULL && nf > 0)
|
|
Packit |
575503 |
set_element(nf, scan, 0L, sep_arr);
|
|
Packit |
575503 |
(*set)(++nf, scan, 1L, n);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
*buf = scan;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* sc_parse_field --- single character field separator
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* This is called both from get_field() and from do_split()
|
|
Packit |
575503 |
* via (*parse_field)(). This variation is for when FS is a single character
|
|
Packit |
575503 |
* other than space.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
static long
|
|
Packit |
575503 |
sc_parse_field(long up_to, /* parse only up to this field number */
|
|
Packit |
575503 |
char **buf, /* on input: string to parse; on output: point to start next */
|
|
Packit |
575503 |
int len,
|
|
Packit |
575503 |
NODE *fs,
|
|
Packit |
575503 |
Regexp *rp ATTRIBUTE_UNUSED,
|
|
Packit |
575503 |
Setfunc set, /* routine to set the value of the parsed field */
|
|
Packit |
575503 |
NODE *n,
|
|
Packit |
575503 |
NODE *sep_arr, /* array of field separators (maybe NULL) */
|
|
Packit |
575503 |
bool in_middle ATTRIBUTE_UNUSED)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
char *scan = *buf;
|
|
Packit |
575503 |
char fschar;
|
|
Packit |
575503 |
long nf = parse_high_water;
|
|
Packit |
575503 |
char *field;
|
|
Packit |
575503 |
char *end = scan + len;
|
|
Packit |
575503 |
char sav;
|
|
Packit |
575503 |
size_t mbclen = 0;
|
|
Packit |
575503 |
mbstate_t mbs;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
memset(&mbs, 0, sizeof(mbstate_t));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (up_to == UNLIMITED)
|
|
Packit |
575503 |
nf = 0;
|
|
Packit |
575503 |
if (len == 0)
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (RS_is_null && fs->stlen == 0)
|
|
Packit |
575503 |
fschar = '\n';
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
fschar = fs->stptr[0];
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* before doing anything save the char at *end */
|
|
Packit |
575503 |
sav = *end;
|
|
Packit |
575503 |
/* because it will be destroyed now: */
|
|
Packit |
575503 |
*end = fschar; /* sentinel character */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
for (; nf < up_to;) {
|
|
Packit |
575503 |
field = scan;
|
|
Packit |
575503 |
if (gawk_mb_cur_max > 1) {
|
|
Packit |
575503 |
while (*scan != fschar) {
|
|
Packit |
575503 |
mbclen = mbrlen(scan, end-scan, &mbs);
|
|
Packit |
575503 |
if ((mbclen == 1) || (mbclen == (size_t) -1)
|
|
Packit |
575503 |
|| (mbclen == (size_t) -2) || (mbclen == 0)) {
|
|
Packit |
575503 |
/* We treat it as a singlebyte character. */
|
|
Packit |
575503 |
mbclen = 1;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
scan += mbclen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
while (*scan != fschar)
|
|
Packit |
575503 |
scan++;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
(*set)(++nf, field, (long)(scan - field), n);
|
|
Packit |
575503 |
if (scan == end)
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
if (sep_arr != NULL)
|
|
Packit |
575503 |
set_element(nf, scan, 1L, sep_arr);
|
|
Packit |
575503 |
scan++;
|
|
Packit |
575503 |
if (scan == end) { /* FS at end of record */
|
|
Packit |
575503 |
(*set)(++nf, field, 0L, n);
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* everything done, restore original char at *end */
|
|
Packit |
575503 |
*end = sav;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
*buf = scan;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* calc_mbslen --- calculate the length in bytes of a multi-byte string
|
|
Packit |
575503 |
* containing len characters.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
|
|
Packit |
575503 |
static size_t
|
|
Packit |
575503 |
calc_mbslen(char *scan, char *end, size_t len, mbstate_t *mbs)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
|
|
Packit |
575503 |
size_t mbclen;
|
|
Packit |
575503 |
char *mbscan = scan;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
while (len-- > 0 && mbscan < end) {
|
|
Packit |
575503 |
mbclen = mbrlen(mbscan, end - mbscan, mbs);
|
|
Packit |
575503 |
if (!(mbclen > 0 && mbclen <= (size_t)(end - mbscan)))
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* We treat it as a singlebyte character. This should
|
|
Packit |
575503 |
* catch error codes 0, (size_t) -1, and (size_t) -2.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
mbclen = 1;
|
|
Packit |
575503 |
mbscan += mbclen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
return mbscan - scan;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* fw_parse_field --- field parsing using FIELDWIDTHS spec
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* This is called from get_field() via (*parse_field)().
|
|
Packit |
575503 |
* This variation is for fields are fixed widths.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
static long
|
|
Packit |
575503 |
fw_parse_field(long up_to, /* parse only up to this field number */
|
|
Packit |
575503 |
char **buf, /* on input: string to parse; on output: point to start next */
|
|
Packit |
575503 |
int len,
|
|
Packit |
575503 |
NODE *fs ATTRIBUTE_UNUSED,
|
|
Packit |
575503 |
Regexp *rp ATTRIBUTE_UNUSED,
|
|
Packit |
575503 |
Setfunc set, /* routine to set the value of the parsed field */
|
|
Packit |
575503 |
NODE *n,
|
|
Packit |
575503 |
NODE *dummy ATTRIBUTE_UNUSED, /* sep_arr not needed here: hence dummy */
|
|
Packit |
575503 |
bool in_middle ATTRIBUTE_UNUSED)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
char *scan = *buf;
|
|
Packit |
575503 |
long nf = parse_high_water;
|
|
Packit |
575503 |
char *end = scan + len;
|
|
Packit |
575503 |
const awk_fieldwidth_info_t *fw;
|
|
Packit |
575503 |
mbstate_t mbs;
|
|
Packit |
575503 |
size_t skiplen;
|
|
Packit |
575503 |
size_t flen;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
fw = (api_parser_override ? api_fw : FIELDWIDTHS);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (up_to == UNLIMITED)
|
|
Packit |
575503 |
nf = 0;
|
|
Packit |
575503 |
if (len == 0)
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
if (gawk_mb_cur_max > 1 && fw->use_chars) {
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Reset the shift state. Arguably, the shift state should
|
|
Packit |
575503 |
* be part of the file state and carried forward at all times,
|
|
Packit |
575503 |
* but nobody has complained so far, so this may not matter
|
|
Packit |
575503 |
* in practice.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
memset(&mbs, 0, sizeof(mbstate_t));
|
|
Packit |
575503 |
while (nf < up_to && scan < end) {
|
|
Packit |
575503 |
if (nf >= fw->nf) {
|
|
Packit |
575503 |
*buf = end;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
scan += calc_mbslen(scan, end, fw->fields[nf].skip, &mbs);
|
|
Packit |
575503 |
flen = calc_mbslen(scan, end, fw->fields[nf].len, &mbs);
|
|
Packit |
575503 |
(*set)(++nf, scan, (long) flen, n);
|
|
Packit |
575503 |
scan += flen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
while (nf < up_to && scan < end) {
|
|
Packit |
575503 |
if (nf >= fw->nf) {
|
|
Packit |
575503 |
*buf = end;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
skiplen = fw->fields[nf].skip;
|
|
Packit |
575503 |
if (skiplen > end - scan)
|
|
Packit |
575503 |
skiplen = end - scan;
|
|
Packit |
575503 |
scan += skiplen;
|
|
Packit |
575503 |
flen = fw->fields[nf].len;
|
|
Packit |
575503 |
if (flen > end - scan)
|
|
Packit |
575503 |
flen = end - scan;
|
|
Packit |
575503 |
(*set)(++nf, scan, (long) flen, n);
|
|
Packit |
575503 |
scan += flen;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
*buf = scan;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* invalidate_field0 --- $0 needs reconstruction */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
invalidate_field0()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
field0_valid = false;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* get_field --- return a particular $n */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* assign is not NULL if this field is on the LHS of an assign */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
NODE **
|
|
Packit |
575503 |
get_field(long requested, Func_ptr *assign)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
bool in_middle = false;
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* if requesting whole line but some other field has been altered,
|
|
Packit |
575503 |
* then the whole line must be rebuilt
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (requested == 0) {
|
|
Packit |
575503 |
if (! field0_valid) {
|
|
Packit |
575503 |
/* first, parse remainder of input record */
|
|
Packit |
575503 |
if (NF == -1) {
|
|
Packit |
575503 |
NF = (*parse_field)(UNLIMITED - 1, &parse_extent,
|
|
Packit |
575503 |
fields_arr[0]->stlen -
|
|
Packit |
575503 |
(parse_extent - fields_arr[0]->stptr),
|
|
Packit |
575503 |
save_FS, FS_regexp, set_field,
|
|
Packit |
575503 |
(NODE *) NULL,
|
|
Packit |
575503 |
(NODE *) NULL,
|
|
Packit |
575503 |
in_middle);
|
|
Packit |
575503 |
parse_high_water = NF;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
rebuild_record();
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (assign != NULL)
|
|
Packit |
575503 |
*assign = reset_record;
|
|
Packit |
575503 |
return &fields_arr[0];
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* assert(requested > 0); */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
#if 0
|
|
Packit |
575503 |
if (assign != NULL)
|
|
Packit |
575503 |
field0_valid = false; /* $0 needs reconstruction */
|
|
Packit |
575503 |
#else
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Keep things uniform. Also, mere intention of assigning something
|
|
Packit |
575503 |
* to $n should not make $0 invalid. Makes sense to invalidate $0
|
|
Packit |
575503 |
* after the actual assignment is performed. Not a real issue in
|
|
Packit |
575503 |
* the interpreter otherwise, but causes problem in the
|
|
Packit |
575503 |
* debugger when watching or printing fields.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (assign != NULL)
|
|
Packit |
575503 |
*assign = invalidate_field0; /* $0 needs reconstruction */
|
|
Packit |
575503 |
#endif
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (requested <= parse_high_water) /* already parsed this field */
|
|
Packit |
575503 |
return &fields_arr[requested];
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (NF == -1) { /* have not yet parsed to end of record */
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* parse up to requested fields, calling set_field() for each,
|
|
Packit |
575503 |
* saving in parse_extent the point where the parse left off
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (parse_high_water == 0) /* starting at the beginning */
|
|
Packit |
575503 |
parse_extent = fields_arr[0]->stptr;
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
in_middle = true;
|
|
Packit |
575503 |
parse_high_water = (*parse_field)(requested, &parse_extent,
|
|
Packit |
575503 |
fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr),
|
|
Packit |
575503 |
save_FS, NULL, set_field, (NODE *) NULL, (NODE *) NULL, in_middle);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* if we reached the end of the record, set NF to the number of
|
|
Packit |
575503 |
* fields so far. Note that requested might actually refer to
|
|
Packit |
575503 |
* a field that is beyond the end of the record, but we won't
|
|
Packit |
575503 |
* set NF to that value at this point, since this is only a
|
|
Packit |
575503 |
* reference to the field and NF only gets set if the field
|
|
Packit |
575503 |
* is assigned to -- this case is handled below
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
|
|
Packit |
575503 |
NF = parse_high_water;
|
|
Packit |
575503 |
else if (parse_field == fpat_parse_field) {
|
|
Packit |
575503 |
/* FPAT parsing is weird, isolate the special cases */
|
|
Packit |
575503 |
char *rec_start = fields_arr[0]->stptr;
|
|
Packit |
575503 |
char *rec_end = fields_arr[0]->stptr + fields_arr[0]->stlen;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if ( parse_extent > rec_end
|
|
Packit |
575503 |
|| (parse_extent > rec_start && parse_extent < rec_end && requested == UNLIMITED-1))
|
|
Packit |
575503 |
NF = parse_high_water;
|
|
Packit |
575503 |
else if (parse_extent == rec_start) /* could be no match for FPAT */
|
|
Packit |
575503 |
NF = 0;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (requested == UNLIMITED - 1) /* UNLIMITED-1 means set NF */
|
|
Packit |
575503 |
requested = parse_high_water;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (parse_high_water < requested) { /* requested beyond end of record */
|
|
Packit |
575503 |
if (assign != NULL) { /* expand record */
|
|
Packit |
575503 |
if (requested > nf_high_water)
|
|
Packit |
575503 |
grow_fields_arr(requested);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
NF = requested;
|
|
Packit |
575503 |
parse_high_water = requested;
|
|
Packit |
575503 |
} else
|
|
Packit |
575503 |
return &Null_field;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
return &fields_arr[requested];
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* set_element --- set an array element, used by do_split() */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
static void
|
|
Packit |
575503 |
set_element(long num, char *s, long len, NODE *n)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
NODE *it;
|
|
Packit |
575503 |
NODE **lhs;
|
|
Packit |
575503 |
NODE *sub;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
it = make_string(s, len);
|
|
Packit |
575503 |
it->flags |= USER_INPUT;
|
|
Packit |
575503 |
sub = make_number((AWKNUM) (num));
|
|
Packit |
575503 |
lhs = assoc_lookup(n, sub);
|
|
Packit |
575503 |
unref(*lhs);
|
|
Packit |
575503 |
*lhs = it;
|
|
Packit |
575503 |
if (n->astore != NULL)
|
|
Packit |
575503 |
(*n->astore)(n, sub);
|
|
Packit |
575503 |
unref(sub);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* do_split --- implement split(), semantics are same as for field splitting */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
NODE *
|
|
Packit |
575503 |
do_split(int nargs)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
NODE *src, *arr, *sep, *fs, *tmp, *sep_arr = NULL;
|
|
Packit |
575503 |
char *s;
|
|
Packit |
575503 |
long (*parseit)(long, char **, int, NODE *,
|
|
Packit |
575503 |
Regexp *, Setfunc, NODE *, NODE *, bool);
|
|
Packit |
575503 |
Regexp *rp = NULL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (nargs == 4) {
|
|
Packit |
575503 |
static bool warned = false;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (do_traditional || do_posix) {
|
|
Packit |
575503 |
fatal(_("split: fourth argument is a gawk extension"));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
sep_arr = POP_PARAM();
|
|
Packit |
575503 |
if (sep_arr->type != Node_var_array)
|
|
Packit |
575503 |
fatal(_("split: fourth argument is not an array"));
|
|
Packit |
575503 |
if ((do_lint || do_lint_old) && ! warned) {
|
|
Packit |
575503 |
warned = true;
|
|
Packit |
575503 |
lintwarn(_("split: fourth argument is a gawk extension"));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
sep = POP();
|
|
Packit |
575503 |
arr = POP_PARAM();
|
|
Packit |
575503 |
if (arr->type != Node_var_array)
|
|
Packit |
575503 |
fatal(_("split: second argument is not an array"));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (sep_arr != NULL) {
|
|
Packit |
575503 |
if (sep_arr == arr)
|
|
Packit |
575503 |
fatal(_("split: cannot use the same array for second and fourth args"));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* This checks need to be done before clearing any of the arrays */
|
|
Packit |
575503 |
for (tmp = sep_arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
|
|
Packit |
575503 |
if (tmp == arr)
|
|
Packit |
575503 |
fatal(_("split: cannot use a subarray of second arg for fourth arg"));
|
|
Packit |
575503 |
for (tmp = arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
|
|
Packit |
575503 |
if (tmp == sep_arr)
|
|
Packit |
575503 |
fatal(_("split: cannot use a subarray of fourth arg for second arg"));
|
|
Packit |
575503 |
assoc_clear(sep_arr);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
assoc_clear(arr);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
src = TOP_STRING();
|
|
Packit |
575503 |
if (src->stlen == 0) {
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Skip the work if first arg is the null string.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
tmp = POP_SCALAR();
|
|
Packit |
575503 |
DEREF(tmp);
|
|
Packit |
575503 |
return make_number((AWKNUM) 0);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if ((sep->flags & REGEX) != 0)
|
|
Packit |
575503 |
sep = sep->typed_re;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if ( (sep->re_flags & FS_DFLT) != 0
|
|
Packit |
575503 |
&& current_field_sep() == Using_FS
|
|
Packit |
575503 |
&& ! RS_is_null) {
|
|
Packit |
575503 |
parseit = parse_field;
|
|
Packit |
575503 |
fs = force_string(FS_node->var_value);
|
|
Packit |
575503 |
rp = FS_regexp;
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
fs = sep->re_exp;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (fs->stlen == 0) {
|
|
Packit |
575503 |
static bool warned = false;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
parseit = null_parse_field;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (do_lint && ! warned) {
|
|
Packit |
575503 |
warned = true;
|
|
Packit |
575503 |
lintwarn(_("split: null string for third arg is a gawk extension"));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
} else if (fs->stlen == 1 && (sep->re_flags & CONSTANT) == 0) {
|
|
Packit |
575503 |
if (fs->stptr[0] == ' ') {
|
|
Packit |
575503 |
parseit = def_parse_field;
|
|
Packit |
575503 |
} else
|
|
Packit |
575503 |
parseit = sc_parse_field;
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
parseit = re_parse_field;
|
|
Packit |
575503 |
rp = re_update(sep);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
s = src->stptr;
|
|
Packit |
575503 |
tmp = make_number((AWKNUM) (*parseit)(UNLIMITED, &s, (int) src->stlen,
|
|
Packit |
575503 |
fs, rp, set_element, arr, sep_arr, false));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
src = POP_SCALAR(); /* really pop off stack */
|
|
Packit |
575503 |
DEREF(src);
|
|
Packit |
575503 |
return tmp;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* do_patsplit --- implement patsplit(), semantics are same as for field
|
|
Packit |
575503 |
* splitting with FPAT.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
|
|
Packit |
575503 |
NODE *
|
|
Packit |
575503 |
do_patsplit(int nargs)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
NODE *src, *arr, *sep, *fpat, *tmp, *sep_arr = NULL;
|
|
Packit |
575503 |
char *s;
|
|
Packit |
575503 |
Regexp *rp = NULL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (nargs == 4) {
|
|
Packit |
575503 |
sep_arr = POP_PARAM();
|
|
Packit |
575503 |
if (sep_arr->type != Node_var_array)
|
|
Packit |
575503 |
fatal(_("patsplit: fourth argument is not an array"));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
sep = POP();
|
|
Packit |
575503 |
arr = POP_PARAM();
|
|
Packit |
575503 |
if (arr->type != Node_var_array)
|
|
Packit |
575503 |
fatal(_("patsplit: second argument is not an array"));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
src = TOP_STRING();
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if ((sep->flags & REGEX) != 0)
|
|
Packit |
575503 |
sep = sep->typed_re;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
fpat = sep->re_exp;
|
|
Packit |
575503 |
if (fpat->stlen == 0)
|
|
Packit |
575503 |
fatal(_("patsplit: third argument must be non-null"));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (sep_arr != NULL) {
|
|
Packit |
575503 |
if (sep_arr == arr)
|
|
Packit |
575503 |
fatal(_("patsplit: cannot use the same array for second and fourth args"));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* These checks need to be done before clearing any of the arrays */
|
|
Packit |
575503 |
for (tmp = sep_arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
|
|
Packit |
575503 |
if (tmp == arr)
|
|
Packit |
575503 |
fatal(_("patsplit: cannot use a subarray of second arg for fourth arg"));
|
|
Packit |
575503 |
for (tmp = arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
|
|
Packit |
575503 |
if (tmp == sep_arr)
|
|
Packit |
575503 |
fatal(_("patsplit: cannot use a subarray of fourth arg for second arg"));
|
|
Packit |
575503 |
assoc_clear(sep_arr);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
assoc_clear(arr);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (src->stlen == 0) {
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Skip the work if first arg is the null string.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
tmp = make_number((AWKNUM) 0);
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
rp = re_update(sep);
|
|
Packit |
575503 |
s = src->stptr;
|
|
Packit |
575503 |
tmp = make_number((AWKNUM) fpat_parse_field(UNLIMITED, &s,
|
|
Packit |
575503 |
(int) src->stlen, fpat, rp,
|
|
Packit |
575503 |
set_element, arr, sep_arr, false));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
src = POP_SCALAR(); /* really pop off stack */
|
|
Packit |
575503 |
DEREF(src);
|
|
Packit |
575503 |
return tmp;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* set_parser --- update the current (non-API) parser */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
static void
|
|
Packit |
575503 |
set_parser(parse_field_func_t func)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
normal_parse_field = func;
|
|
Packit |
575503 |
if (! api_parser_override && parse_field != func) {
|
|
Packit |
575503 |
parse_field = func;
|
|
Packit |
575503 |
update_PROCINFO_str("FS", current_field_sep_str());
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
set_FIELDWIDTHS()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
char *scan;
|
|
Packit |
575503 |
char *end;
|
|
Packit |
575503 |
int i;
|
|
Packit |
575503 |
static int fw_alloc = 4;
|
|
Packit |
575503 |
static bool warned = false;
|
|
Packit |
575503 |
bool fatal_error = false;
|
|
Packit |
575503 |
NODE *tmp;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (do_lint && ! warned) {
|
|
Packit |
575503 |
warned = true;
|
|
Packit |
575503 |
lintwarn(_("`FIELDWIDTHS' is a gawk extension"));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (do_traditional) /* quick and dirty, does the trick */
|
|
Packit |
575503 |
return;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* If changing the way fields are split, obey least-surprise
|
|
Packit |
575503 |
* semantics, and force $0 to be split totally.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (fields_arr != NULL)
|
|
Packit |
575503 |
(void) get_field(UNLIMITED - 1, 0);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
set_parser(fw_parse_field);
|
|
Packit |
575503 |
tmp = force_string(FIELDWIDTHS_node->var_value);
|
|
Packit |
575503 |
scan = tmp->stptr;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (FIELDWIDTHS == NULL) {
|
|
Packit |
575503 |
emalloc(FIELDWIDTHS, awk_fieldwidth_info_t *, awk_fieldwidth_info_size(fw_alloc), "set_FIELDWIDTHS");
|
|
Packit |
575503 |
FIELDWIDTHS->use_chars = awk_true;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
FIELDWIDTHS->nf = 0;
|
|
Packit |
575503 |
for (i = 0; ; i++) {
|
|
Packit |
575503 |
unsigned long int tmp;
|
|
Packit |
575503 |
if (i >= fw_alloc) {
|
|
Packit |
575503 |
fw_alloc *= 2;
|
|
Packit |
575503 |
erealloc(FIELDWIDTHS, awk_fieldwidth_info_t *, awk_fieldwidth_info_size(fw_alloc), "set_FIELDWIDTHS");
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
/* Ensure that there is no leading `-' sign. Otherwise,
|
|
Packit |
575503 |
strtoul would accept it and return a bogus result. */
|
|
Packit |
575503 |
while (is_blank(*scan)) {
|
|
Packit |
575503 |
++scan;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (*scan == '-') {
|
|
Packit |
575503 |
fatal_error = true;
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (*scan == '\0')
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
// Look for skip value. We allow N:M and N:*.
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* Detect an invalid base-10 integer, a valid value that
|
|
Packit |
575503 |
* is followed by something other than a blank or '\0',
|
|
Packit |
575503 |
* or a value that is not in the range [1..UINT_MAX].
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
errno = 0;
|
|
Packit |
575503 |
tmp = strtoul(scan, &end, 10);
|
|
Packit |
575503 |
if (errno == 0 && *end == ':' && (0 < tmp && tmp <= UINT_MAX)) {
|
|
Packit |
575503 |
FIELDWIDTHS->fields[i].skip = tmp;
|
|
Packit |
575503 |
scan = end + 1;
|
|
Packit |
575503 |
if (*scan == '-' || is_blank(*scan)) {
|
|
Packit |
575503 |
fatal_error = true;
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
// try scanning for field width
|
|
Packit |
575503 |
tmp = strtoul(scan, &end, 10);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
FIELDWIDTHS->fields[i].skip = 0;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (errno != 0
|
|
Packit |
575503 |
|| (*end != '\0' && ! is_blank(*end))
|
|
Packit |
575503 |
|| !(0 < tmp && tmp <= UINT_MAX)
|
|
Packit |
575503 |
) {
|
|
Packit |
575503 |
if (*scan == '*') {
|
|
Packit |
575503 |
for (scan++; is_blank(*scan); scan++)
|
|
Packit |
575503 |
continue;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (*scan != '\0')
|
|
Packit |
575503 |
fatal(_("`*' must be the last designator in FIELDWIDTHS"));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
FIELDWIDTHS->fields[i].len = UINT_MAX;
|
|
Packit |
575503 |
FIELDWIDTHS->nf = i+1;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
fatal_error = true;
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
FIELDWIDTHS->fields[i].len = tmp;
|
|
Packit |
575503 |
FIELDWIDTHS->nf = i+1;
|
|
Packit |
575503 |
scan = end;
|
|
Packit |
575503 |
/* Skip past any trailing blanks. */
|
|
Packit |
575503 |
while (is_blank(*scan)) {
|
|
Packit |
575503 |
++scan;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (*scan == '\0')
|
|
Packit |
575503 |
break;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (fatal_error)
|
|
Packit |
575503 |
fatal(_("invalid FIELDWIDTHS value, for field %d, near `%s'"),
|
|
Packit |
575503 |
i + 1, scan);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* set_FS --- handle things when FS is assigned to */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
set_FS()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
char buf[10];
|
|
Packit |
575503 |
NODE *fs;
|
|
Packit |
575503 |
static NODE *save_fs = NULL;
|
|
Packit |
575503 |
static NODE *save_rs = NULL;
|
|
Packit |
575503 |
bool remake_re = true;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* If changing the way fields are split, obey least-surprise
|
|
Packit |
575503 |
* semantics, and force $0 to be split totally.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (fields_arr != NULL)
|
|
Packit |
575503 |
(void) get_field(UNLIMITED - 1, 0);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* It's possible that only IGNORECASE changed, or FS = FS */
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* This comparison can't use cmp_nodes(), which pays attention
|
|
Packit |
575503 |
* to IGNORECASE, and that's not what we want.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (save_fs
|
|
Packit |
575503 |
&& FS_node->var_value->stlen == save_fs->stlen
|
|
Packit |
575503 |
&& memcmp(FS_node->var_value->stptr, save_fs->stptr, save_fs->stlen) == 0
|
|
Packit |
575503 |
&& save_rs
|
|
Packit |
575503 |
&& RS_node->var_value->stlen == save_rs->stlen
|
|
Packit |
575503 |
&& memcmp(RS_node->var_value->stptr, save_rs->stptr, save_rs->stlen) == 0) {
|
|
Packit |
575503 |
if (FS_regexp != NULL)
|
|
Packit |
575503 |
FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* FS = FS */
|
|
Packit |
575503 |
if (current_field_sep() == Using_FS) {
|
|
Packit |
575503 |
return;
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
remake_re = false;
|
|
Packit |
575503 |
goto choose_fs_function;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
unref(save_fs);
|
|
Packit |
575503 |
save_fs = dupnode(FS_node->var_value);
|
|
Packit |
575503 |
unref(save_rs);
|
|
Packit |
575503 |
save_rs = dupnode(RS_node->var_value);
|
|
Packit |
575503 |
resave_fs = true;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* If FS_re_no_case assignment is fatal (make_regexp in remake_re)
|
|
Packit |
575503 |
* FS_regexp will be NULL with a non-null FS_re_yes_case.
|
|
Packit |
575503 |
* refree() handles null argument; no need for `if (FS_regexp != NULL)' below.
|
|
Packit |
575503 |
* Please do not remerge.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
refree(FS_re_yes_case);
|
|
Packit |
575503 |
refree(FS_re_no_case);
|
|
Packit |
575503 |
FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
|
|
Packit |
575503 |
choose_fs_function:
|
|
Packit |
575503 |
buf[0] = '\0';
|
|
Packit |
575503 |
default_FS = false;
|
|
Packit |
575503 |
fs = force_string(FS_node->var_value);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (! do_traditional && fs->stlen == 0) {
|
|
Packit |
575503 |
static bool warned = false;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
set_parser(null_parse_field);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (do_lint && ! warned) {
|
|
Packit |
575503 |
warned = true;
|
|
Packit |
575503 |
lintwarn(_("null string for `FS' is a gawk extension"));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
} else if (fs->stlen > 1) {
|
|
Packit |
575503 |
if (do_lint_old)
|
|
Packit |
575503 |
warning(_("old awk does not support regexps as value of `FS'"));
|
|
Packit |
575503 |
set_parser(re_parse_field);
|
|
Packit |
575503 |
} else if (RS_is_null) {
|
|
Packit |
575503 |
/* we know that fs->stlen <= 1 */
|
|
Packit |
575503 |
set_parser(sc_parse_field);
|
|
Packit |
575503 |
if (fs->stlen == 1) {
|
|
Packit |
575503 |
if (fs->stptr[0] == ' ') {
|
|
Packit |
575503 |
default_FS = true;
|
|
Packit |
575503 |
strcpy(buf, "[ \t\n]+");
|
|
Packit |
575503 |
} else if (fs->stptr[0] == '\\') {
|
|
Packit |
575503 |
/* yet another special case */
|
|
Packit |
575503 |
strcpy(buf, "[\\\\\n]");
|
|
Packit |
575503 |
} else if (fs->stptr[0] == '\0') {
|
|
Packit |
575503 |
/* and yet another special case */
|
|
Packit |
575503 |
strcpy(buf, "[\\000\n]");
|
|
Packit |
575503 |
} else if (fs->stptr[0] != '\n') {
|
|
Packit |
575503 |
sprintf(buf, "[%c\n]", fs->stptr[0]);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
set_parser(def_parse_field);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (fs->stlen == 1) {
|
|
Packit |
575503 |
if (fs->stptr[0] == ' ')
|
|
Packit |
575503 |
default_FS = true;
|
|
Packit |
575503 |
else if (fs->stptr[0] == '\\')
|
|
Packit |
575503 |
/* same special case */
|
|
Packit |
575503 |
strcpy(buf, "[\\\\]");
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
set_parser(sc_parse_field);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (remake_re) {
|
|
Packit |
575503 |
refree(FS_re_yes_case);
|
|
Packit |
575503 |
refree(FS_re_no_case);
|
|
Packit |
575503 |
FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (buf[0] != '\0') {
|
|
Packit |
575503 |
FS_re_yes_case = make_regexp(buf, strlen(buf), false, true, true);
|
|
Packit |
575503 |
FS_re_no_case = make_regexp(buf, strlen(buf), true, true, true);
|
|
Packit |
575503 |
FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
|
|
Packit |
575503 |
set_parser(re_parse_field);
|
|
Packit |
575503 |
} else if (parse_field == re_parse_field) {
|
|
Packit |
575503 |
FS_re_yes_case = make_regexp(fs->stptr, fs->stlen, false, true, true);
|
|
Packit |
575503 |
FS_re_no_case = make_regexp(fs->stptr, fs->stlen, true, true, true);
|
|
Packit |
575503 |
FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
|
|
Packit |
575503 |
} else
|
|
Packit |
575503 |
FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* For FS = "c", we don't use IGNORECASE. But we must use
|
|
Packit |
575503 |
* re_parse_field to get the character and the newline as
|
|
Packit |
575503 |
* field separators.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (fs->stlen == 1 && parse_field == re_parse_field)
|
|
Packit |
575503 |
FS_regexp = FS_re_yes_case;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* current_field_sep --- return the field separator type */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
field_sep_type
|
|
Packit |
575503 |
current_field_sep()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
if (api_parser_override)
|
|
Packit |
575503 |
return Using_API;
|
|
Packit |
575503 |
else if (parse_field == fw_parse_field)
|
|
Packit |
575503 |
return Using_FIELDWIDTHS;
|
|
Packit |
575503 |
else if (parse_field == fpat_parse_field)
|
|
Packit |
575503 |
return Using_FPAT;
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
return Using_FS;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* current_field_sep_str --- return the field separator type as a string */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
const char *
|
|
Packit |
575503 |
current_field_sep_str()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
if (api_parser_override)
|
|
Packit |
575503 |
return "API";
|
|
Packit |
575503 |
else if (parse_field == fw_parse_field)
|
|
Packit |
575503 |
return "FIELDWIDTHS";
|
|
Packit |
575503 |
else if (parse_field == fpat_parse_field)
|
|
Packit |
575503 |
return "FPAT";
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
return "FS";
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* update_PROCINFO_str --- update PROCINFO[sub] with string value */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
update_PROCINFO_str(const char *subscript, const char *str)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
NODE **aptr;
|
|
Packit |
575503 |
NODE *tmp;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (PROCINFO_node == NULL)
|
|
Packit |
575503 |
return;
|
|
Packit |
575503 |
tmp = make_string(subscript, strlen(subscript));
|
|
Packit |
575503 |
aptr = assoc_lookup(PROCINFO_node, tmp);
|
|
Packit |
575503 |
unref(tmp);
|
|
Packit |
575503 |
unref(*aptr);
|
|
Packit |
575503 |
*aptr = make_string(str, strlen(str));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* update_PROCINFO_num --- update PROCINFO[sub] with numeric value */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
update_PROCINFO_num(const char *subscript, AWKNUM val)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
NODE **aptr;
|
|
Packit |
575503 |
NODE *tmp;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (PROCINFO_node == NULL)
|
|
Packit |
575503 |
return;
|
|
Packit |
575503 |
tmp = make_string(subscript, strlen(subscript));
|
|
Packit |
575503 |
aptr = assoc_lookup(PROCINFO_node, tmp);
|
|
Packit |
575503 |
unref(tmp);
|
|
Packit |
575503 |
unref(*aptr);
|
|
Packit |
575503 |
*aptr = make_number(val);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* set_FPAT --- handle an assignment to FPAT */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
void
|
|
Packit |
575503 |
set_FPAT()
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
static bool warned = false;
|
|
Packit |
575503 |
static NODE *save_fpat = NULL;
|
|
Packit |
575503 |
bool remake_re = true;
|
|
Packit |
575503 |
NODE *fpat;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (do_lint && ! warned) {
|
|
Packit |
575503 |
warned = true;
|
|
Packit |
575503 |
lintwarn(_("`FPAT' is a gawk extension"));
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
if (do_traditional) /* quick and dirty, does the trick */
|
|
Packit |
575503 |
return;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* If changing the way fields are split, obey least-suprise
|
|
Packit |
575503 |
* semantics, and force $0 to be split totally.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (fields_arr != NULL)
|
|
Packit |
575503 |
(void) get_field(UNLIMITED - 1, 0);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* It's possible that only IGNORECASE changed, or FPAT = FPAT */
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* This comparison can't use cmp_nodes(), which pays attention
|
|
Packit |
575503 |
* to IGNORECASE, and that's not what we want.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (save_fpat
|
|
Packit |
575503 |
&& FPAT_node->var_value->stlen == save_fpat->stlen
|
|
Packit |
575503 |
&& memcmp(FPAT_node->var_value->stptr, save_fpat->stptr, save_fpat->stlen) == 0) {
|
|
Packit |
575503 |
if (FPAT_regexp != NULL)
|
|
Packit |
575503 |
FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes_case);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* FPAT = FPAT */
|
|
Packit |
575503 |
if (current_field_sep() == Using_FPAT) {
|
|
Packit |
575503 |
return;
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
remake_re = false;
|
|
Packit |
575503 |
goto set_fpat_function;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
unref(save_fpat);
|
|
Packit |
575503 |
save_fpat = dupnode(FPAT_node->var_value);
|
|
Packit |
575503 |
refree(FPAT_re_yes_case);
|
|
Packit |
575503 |
refree(FPAT_re_no_case);
|
|
Packit |
575503 |
FPAT_re_yes_case = FPAT_re_no_case = FPAT_regexp = NULL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
set_fpat_function:
|
|
Packit |
575503 |
fpat = force_string(FPAT_node->var_value);
|
|
Packit |
575503 |
set_parser(fpat_parse_field);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (remake_re) {
|
|
Packit |
575503 |
refree(FPAT_re_yes_case);
|
|
Packit |
575503 |
refree(FPAT_re_no_case);
|
|
Packit |
575503 |
FPAT_re_yes_case = FPAT_re_no_case = FPAT_regexp = NULL;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
FPAT_re_yes_case = make_regexp(fpat->stptr, fpat->stlen, false, true, true);
|
|
Packit |
575503 |
FPAT_re_no_case = make_regexp(fpat->stptr, fpat->stlen, true, true, true);
|
|
Packit |
575503 |
FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes_case);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* increment_scan --- macro to move scan pointer ahead by one character.
|
|
Packit |
575503 |
* Implementation varies if doing MBS or not.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
|
|
Packit |
575503 |
#define increment_scan(scanp, len) incr_scan(scanp, len, & mbs)
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* incr_scan --- MBS version of increment_scan() */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
static void
|
|
Packit |
575503 |
incr_scan(char **scanp, size_t len, mbstate_t *mbs)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
size_t mbclen = 0;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (gawk_mb_cur_max > 1) {
|
|
Packit |
575503 |
mbclen = mbrlen(*scanp, len, mbs);
|
|
Packit |
575503 |
if ( (mbclen == 1)
|
|
Packit |
575503 |
|| (mbclen == (size_t) -1)
|
|
Packit |
575503 |
|| (mbclen == (size_t) -2)
|
|
Packit |
575503 |
|| (mbclen == 0)) {
|
|
Packit |
575503 |
/* We treat it as a singlebyte character. */
|
|
Packit |
575503 |
mbclen = 1;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
*scanp += mbclen;
|
|
Packit |
575503 |
} else
|
|
Packit |
575503 |
(*scanp)++;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* fpat_parse_field --- parse fields using a regexp.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* This is called both from get_field() and from do_patsplit()
|
|
Packit |
575503 |
* via (*parse_field)(). This variation is for when FPAT is a regular
|
|
Packit |
575503 |
* expression -- use the value to find field contents.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* The FPAT parsing logic is a bit difficult to specify. In particular
|
|
Packit |
575503 |
* to allow null fields at certain locations. To make the code as robust
|
|
Packit |
575503 |
* as possible, an awk reference implementation was written and tested
|
|
Packit |
575503 |
* as a first step, and later recoded in C, preserving its structure as
|
|
Packit |
575503 |
* much as possible.
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* # Reference implementation of the FPAT record parsing.
|
|
Packit |
575503 |
* #
|
|
Packit |
575503 |
* # Each loop iteration identifies a (separator[n-1],field[n]) pair.
|
|
Packit |
575503 |
* # Each loop iteration must consume some characters, except for the first field.
|
|
Packit |
575503 |
* # So a null field is only valid as a first field or after a non-null separator.
|
|
Packit |
575503 |
* # A null record has no fields (not a single null field).
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* function refpatsplit(string, fields, pattern, seps,
|
|
Packit |
575503 |
* parse_start, sep_start, field_start, field_length, field_found, nf) # locals
|
|
Packit |
575503 |
* {
|
|
Packit |
575503 |
* # Local state variables:
|
|
Packit |
575503 |
* # - parse_start: pointer to the first not yet consumed character
|
|
Packit |
575503 |
* # - sep_start: pointer to the beginning of the parsed separator
|
|
Packit |
575503 |
* # - field start: pointer to the beginning of the parsed field
|
|
Packit |
575503 |
* # - field length: length of the parsed field
|
|
Packit |
575503 |
* # - field_found: flag for succesful field match
|
|
Packit |
575503 |
* # - nf: Number of fields found so far
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* # Prepare for parsing
|
|
Packit |
575503 |
* parse_start = 1 # first not yet parsed char
|
|
Packit |
575503 |
* nf = 0 # fields found so far
|
|
Packit |
575503 |
* delete fields
|
|
Packit |
575503 |
* delete seps
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* # Loop that consumes the whole record
|
|
Packit |
575503 |
* while (parse_start <= length(string)) { # still something to parse
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* # first attempt to match the next field
|
|
Packit |
575503 |
* sep_start = parse_start
|
|
Packit |
575503 |
* field_found = match(substr(string, parse_start), pattern)
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* # check for an invalid null field and retry one character away
|
|
Packit |
575503 |
* if (nf > 0 && field_found && RSTART==1 && RLENGTH==0) {
|
|
Packit |
575503 |
* parse_start++
|
|
Packit |
575503 |
* field_found = match(substr(string, parse_start), pattern)
|
|
Packit |
575503 |
* }
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* # store the (sep[n-1],field[n]) pair
|
|
Packit |
575503 |
* if (field_found) {
|
|
Packit |
575503 |
* field_start = parse_start + RSTART - 1
|
|
Packit |
575503 |
* field_length = RLENGTH
|
|
Packit |
575503 |
* seps[nf] = substr(string, sep_start, field_start-sep_start)
|
|
Packit |
575503 |
* fields[++nf] = substr(string, field_start, field_length)
|
|
Packit |
575503 |
* parse_start = field_start + field_length
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* # store the final extra sep after the last field
|
|
Packit |
575503 |
* } else {
|
|
Packit |
575503 |
* seps[nf] = substr(string, sep_start)
|
|
Packit |
575503 |
* parse_start = length(string) + 1
|
|
Packit |
575503 |
* }
|
|
Packit |
575503 |
* }
|
|
Packit |
575503 |
*
|
|
Packit |
575503 |
* return nf
|
|
Packit |
575503 |
* }
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
static long
|
|
Packit |
575503 |
fpat_parse_field(long up_to, /* parse only up to this field number */
|
|
Packit |
575503 |
char **buf, /* on input: string to parse; on output: point to start next */
|
|
Packit |
575503 |
int len,
|
|
Packit |
575503 |
NODE *fs ATTRIBUTE_UNUSED,
|
|
Packit |
575503 |
Regexp *rp,
|
|
Packit |
575503 |
Setfunc set, /* routine to set the value of the parsed field */
|
|
Packit |
575503 |
NODE *n,
|
|
Packit |
575503 |
NODE *sep_arr, /* array of field separators (may be NULL) */
|
|
Packit |
575503 |
bool in_middle)
|
|
Packit |
575503 |
{
|
|
Packit |
575503 |
char *scan = *buf;
|
|
Packit |
575503 |
long nf = parse_high_water;
|
|
Packit |
575503 |
char *start;
|
|
Packit |
575503 |
char *end = scan + len;
|
|
Packit |
575503 |
int regex_flags = RE_NEED_START;
|
|
Packit |
575503 |
mbstate_t mbs;
|
|
Packit |
575503 |
char* field_start;
|
|
Packit |
575503 |
bool field_found = false;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
memset(&mbs, 0, sizeof(mbstate_t));
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (up_to == UNLIMITED)
|
|
Packit |
575503 |
nf = 0;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (len == 0)
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
if (rp == NULL) /* use FPAT */
|
|
Packit |
575503 |
rp = FPAT_regexp;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
while (scan < end && nf < up_to) { /* still something to parse */
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* first attempt to match the next field */
|
|
Packit |
575503 |
start = scan;
|
|
Packit |
575503 |
field_found = research(rp, scan, 0, (end - scan), regex_flags) != -1;
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* check for an invalid null field and retry one character away */
|
|
Packit |
575503 |
if (nf > 0 && field_found && REEND(rp, scan) == 0) { /* invalid null field */
|
|
Packit |
575503 |
increment_scan(& scan, end - scan);
|
|
Packit |
575503 |
field_found = research(rp, scan, 0, (end - scan), regex_flags) != -1;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/* store the (sep[n-1],field[n]) pair */
|
|
Packit |
575503 |
if (field_found) {
|
|
Packit |
575503 |
field_start = scan + RESTART(rp, scan);
|
|
Packit |
575503 |
if (sep_arr != NULL) { /* store the separator */
|
|
Packit |
575503 |
if (field_start == start) /* match at front */
|
|
Packit |
575503 |
set_element(nf, start, 0L, sep_arr);
|
|
Packit |
575503 |
else
|
|
Packit |
575503 |
set_element(nf,
|
|
Packit |
575503 |
start,
|
|
Packit |
575503 |
(long) (field_start - start),
|
|
Packit |
575503 |
sep_arr);
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
/* field is text that matched */
|
|
Packit |
575503 |
(*set)(++nf,
|
|
Packit |
575503 |
field_start,
|
|
Packit |
575503 |
(long)(REEND(rp, scan) - RESTART(rp, scan)),
|
|
Packit |
575503 |
n);
|
|
Packit |
575503 |
scan += REEND(rp, scan);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
} else {
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* No match, store the final extra separator after
|
|
Packit |
575503 |
* the last field.
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (sep_arr != NULL)
|
|
Packit |
575503 |
set_element(nf, start, (long) (end - start), sep_arr);
|
|
Packit |
575503 |
scan = end;
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
}
|
|
Packit |
575503 |
|
|
Packit |
575503 |
/*
|
|
Packit |
575503 |
* If the last field extends up to the end of the record, generate
|
|
Packit |
575503 |
* a null trailing separator
|
|
Packit |
575503 |
*/
|
|
Packit |
575503 |
if (sep_arr != NULL && scan == end && field_found)
|
|
Packit |
575503 |
set_element(nf, scan, 0L, sep_arr);
|
|
Packit |
575503 |
|
|
Packit |
575503 |
*buf = scan;
|
|
Packit |
575503 |
return nf;
|
|
Packit |
575503 |
}
|