Blame node.c

Packit 575503
/*
Packit 575503
 * node.c -- routines for node management
Packit 575503
 */
Packit 575503
Packit 575503
/*
Packit 575503
 * Copyright (C) 1986, 1988, 1989, 1991-2001, 2003-2015, 2017, 2018,
Packit 575503
 * the Free Software Foundation, Inc.
Packit 575503
 *
Packit 575503
 * This file is part of GAWK, the GNU implementation of the
Packit 575503
 * AWK Programming Language.
Packit 575503
 *
Packit 575503
 * GAWK is free software; you can redistribute it and/or modify
Packit 575503
 * it under the terms of the GNU General Public License as published by
Packit 575503
 * the Free Software Foundation; either version 3 of the License, or
Packit 575503
 * (at your option) any later version.
Packit 575503
 *
Packit 575503
 * GAWK is distributed in the hope that it will be useful,
Packit 575503
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 575503
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 575503
 * GNU General Public License for more details.
Packit 575503
 *
Packit 575503
 * You should have received a copy of the GNU General Public License
Packit 575503
 * along with this program; if not, write to the Free Software
Packit 575503
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
Packit 575503
 */
Packit 575503
Packit 575503
#include "awk.h"
Packit 575503
#include "math.h"
Packit 575503
#include "floatmagic.h"	/* definition of isnan */
Packit 575503
Packit 575503
static int is_ieee_magic_val(const char *val);
Packit 575503
static NODE *r_make_number(double x);
Packit 575503
static AWKNUM get_ieee_magic_val(char *val);
Packit 575503
extern NODE **fmt_list;          /* declared in eval.c */
Packit 575503
Packit 575503
NODE *(*make_number)(double) = r_make_number;
Packit 575503
NODE *(*str2number)(NODE *) = r_force_number;
Packit 575503
NODE *(*format_val)(const char *, int, NODE *) = r_format_val;
Packit 575503
int (*cmp_numbers)(const NODE *, const NODE *) = cmp_awknums;
Packit 575503
Packit 575503
/* is_hex --- return true if a string looks like a hex value */
Packit 575503
Packit 575503
static bool
Packit 575503
is_hex(const char *str, const char *cpend)
Packit 575503
{
Packit 575503
	/* on entry, we know the string length is >= 1 */
Packit 575503
	if (*str == '-' || *str == '+')
Packit 575503
		str++;
Packit 575503
Packit 575503
	if (str + 1 < cpend && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
Packit 575503
		return true;
Packit 575503
Packit 575503
	return false;
Packit 575503
}
Packit 575503
Packit 575503
/* force_number --- force a value to be numeric */
Packit 575503
Packit 575503
NODE *
Packit 575503
r_force_number(NODE *n)
Packit 575503
{
Packit 575503
	char *cp;
Packit 575503
	char *cpend;
Packit 575503
	char save;
Packit 575503
	char *ptr;
Packit 575503
	extern double strtod();
Packit 575503
Packit 575503
	if ((n->flags & NUMCUR) != 0)
Packit 575503
		return n;
Packit 575503
Packit 575503
	/*
Packit 575503
	 * We should always set NUMCUR. If USER_INPUT is set and it's a
Packit 575503
	 * numeric string, we clear STRING and enable NUMBER, but if it's not
Packit 575503
	 * numeric, we disable USER_INPUT.
Packit 575503
	 */
Packit 575503
Packit 575503
	/* All the conditionals are an attempt to avoid the expensive strtod */
Packit 575503
Packit 575503
	n->flags |= NUMCUR;
Packit 575503
	n->numbr = 0.0;
Packit 575503
Packit 575503
	/* Trim leading white space, bailing out if there's nothing else */
Packit 575503
	for (cp = n->stptr, cpend = cp + n->stlen;
Packit 575503
	     cp < cpend && isspace((unsigned char) *cp); cp++)
Packit 575503
		continue;
Packit 575503
Packit 575503
	if (cp == cpend)
Packit 575503
		goto badnum;
Packit 575503
Packit 575503
	/* At this point, we know the string is not entirely white space */
Packit 575503
	/* Trim trailing white space */
Packit 575503
	while (isspace((unsigned char) cpend[-1]))
Packit 575503
		cpend--;
Packit 575503
Packit 575503
	/*
Packit 575503
	 * 2/2007:
Packit 575503
	 * POSIX, by way of severe language lawyering, seems to
Packit 575503
	 * allow things like "inf" and "nan" to mean something.
Packit 575503
	 * So if do_posix, the user gets what he deserves.
Packit 575503
	 * This also allows hexadecimal floating point. Ugh.
Packit 575503
	 */
Packit 575503
	if (! do_posix) {
Packit 575503
		if (is_alpha((unsigned char) *cp))
Packit 575503
			goto badnum;
Packit 575503
		else if (cpend == cp+4 && is_ieee_magic_val(cp)) {
Packit 575503
			n->numbr = get_ieee_magic_val(cp);
Packit 575503
			goto goodnum;
Packit 575503
		}
Packit 575503
		/* else
Packit 575503
			fall through */
Packit 575503
	}
Packit 575503
	/* else POSIX, so
Packit 575503
		fall through */
Packit 575503
Packit 575503
	if (   (! do_posix		/* not POSIXLY paranoid and */
Packit 575503
	        && (is_alpha((unsigned char) *cp)	/* letter, or */
Packit 575503
					/* CANNOT do non-decimal and saw 0x */
Packit 575503
		    || (! do_non_decimal_data && is_hex(cp, cpend))))) {
Packit 575503
		goto badnum;
Packit 575503
	}
Packit 575503
Packit 575503
	if (cpend - cp == 1) {		/* only one character */
Packit 575503
		if (isdigit((unsigned char) *cp)) {	/* it's a digit! */
Packit 575503
			n->numbr = (AWKNUM)(*cp - '0');
Packit 575503
			if (n->stlen == 1)		/* no white space */
Packit 575503
				n->flags |= NUMINT;
Packit 575503
			goto goodnum;
Packit 575503
		}
Packit 575503
		goto badnum;
Packit 575503
	}
Packit 575503
Packit 575503
	errno = 0;
Packit 575503
	if (do_non_decimal_data		/* main.c assures false if do_posix */
Packit 575503
		&& ! do_traditional && get_numbase(cp, cpend - cp, true) != 10) {
Packit 575503
		/* nondec2awknum() saves and restores the byte after the string itself */
Packit 575503
		n->numbr = nondec2awknum(cp, cpend - cp, &ptr);
Packit 575503
	} else {
Packit 575503
		save = *cpend;
Packit 575503
		*cpend = '\0';
Packit 575503
		n->numbr = (AWKNUM) strtod((const char *) cp, &ptr);
Packit 575503
		*cpend = save;
Packit 575503
	}
Packit 575503
Packit 575503
	if (errno == 0) {
Packit 575503
		if (ptr == cpend)
Packit 575503
			goto goodnum;
Packit 575503
		/* else keep the leading numeric value without updating flags */
Packit 575503
		/* fall through to badnum */
Packit 575503
	} else {
Packit 575503
		errno = 0;
Packit 575503
		/*
Packit 575503
		 * N.B. For subnormal values, strtod may return the
Packit 575503
		 * floating-point representation while setting errno to ERANGE.
Packit 575503
		 * We force the numeric value to 0 in such cases.
Packit 575503
		 */
Packit 575503
		n->numbr = 0;
Packit 575503
		/*
Packit 575503
		 * Or should we accept it as a NUMBER even though strtod
Packit 575503
		 * threw an error?
Packit 575503
		 */
Packit 575503
		/* fall through to badnum */
Packit 575503
	}
Packit 575503
badnum:
Packit 575503
	n->flags &= ~USER_INPUT;
Packit 575503
	return n;
Packit 575503
Packit 575503
goodnum:
Packit 575503
	if ((n->flags & USER_INPUT) != 0) {
Packit 575503
		/* leave USER_INPUT enabled to indicate that this is a strnum */
Packit 575503
		n->flags &= ~STRING;
Packit 575503
		n->flags |= NUMBER;
Packit 575503
	}
Packit 575503
	return n;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/*
Packit 575503
 * The following lookup table is used as an optimization in force_string;
Packit 575503
 * (more complicated) variations on this theme didn't seem to pay off, but
Packit 575503
 * systematic testing might be in order at some point.
Packit 575503
 */
Packit 575503
static const char *values[] = {
Packit 575503
	"0",
Packit 575503
	"1",
Packit 575503
	"2",
Packit 575503
	"3",
Packit 575503
	"4",
Packit 575503
	"5",
Packit 575503
	"6",
Packit 575503
	"7",
Packit 575503
	"8",
Packit 575503
	"9",
Packit 575503
};
Packit 575503
#define	NVAL	(sizeof(values)/sizeof(values[0]))
Packit 575503
Packit 575503
/* r_format_val --- format a numeric value based on format */
Packit 575503
Packit 575503
NODE *
Packit 575503
r_format_val(const char *format, int index, NODE *s)
Packit 575503
{
Packit 575503
	char buf[BUFSIZ];
Packit 575503
	char *sp = buf;
Packit 575503
	double val;
Packit 575503
Packit 575503
	/*
Packit 575503
	 * 2/2007: Simplify our lives here. Instead of worrying about
Packit 575503
	 * whether or not the value will fit into a long just so we
Packit 575503
	 * can use sprintf("%ld", val) on it, always format it ourselves.
Packit 575503
	 * The only thing to worry about is that integral values always
Packit 575503
	 * format as integers. %.0f does that very well.
Packit 575503
	 *
Packit 575503
	 * 6/2008: Would that things were so simple. Always using %.0f
Packit 575503
	 * imposes a notable performance penalty for applications that
Packit 575503
	 * do a lot of conversion of integers to strings. So, we reinstate
Packit 575503
	 * the old code, but use %.0f for integral values that are outside
Packit 575503
	 * the range of a long.  This seems a reasonable compromise.
Packit 575503
	 *
Packit 575503
	 * 12/2009: Use <= and >= in the comparisons with LONG_xxx instead of
Packit 575503
	 * < and > so that things work correctly on systems with 64 bit integers.
Packit 575503
	 */
Packit 575503
Packit 575503
	/* not an integral value, or out of range */
Packit 575503
	if ((val = double_to_int(s->numbr)) != s->numbr
Packit 575503
			|| val <= LONG_MIN || val >= LONG_MAX
Packit 575503
	) {
Packit 575503
		/*
Packit 575503
		 * Once upon a time, we just blindly did this:
Packit 575503
		 *	sprintf(sp, format, s->numbr);
Packit 575503
		 *	s->stlen = strlen(sp);
Packit 575503
		 *	s->stfmt = index;
Packit 575503
		 * but that's no good if, e.g., OFMT is %s. So we punt,
Packit 575503
		 * and just always format the value ourselves.
Packit 575503
		 */
Packit 575503
Packit 575503
		NODE *dummy[2], *r;
Packit 575503
		unsigned int oflags;
Packit 575503
Packit 575503
		/* create dummy node for a sole use of format_tree */
Packit 575503
		dummy[1] = s;
Packit 575503
		oflags = s->flags;
Packit 575503
Packit 575503
		if (val == s->numbr) {
Packit 575503
			/* integral value, but outside range of %ld, use %.0f */
Packit 575503
			r = format_tree("%.0f", 4, dummy, 2);
Packit 575503
			s->stfmt = STFMT_UNUSED;
Packit 575503
		} else {
Packit 575503
			r = format_tree(format, fmt_list[index]->stlen, dummy, 2);
Packit 575503
			assert(r != NULL);
Packit 575503
			s->stfmt = index;
Packit 575503
		}
Packit 575503
		s->flags = oflags;
Packit 575503
		s->stlen = r->stlen;
Packit 575503
		if ((s->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR))
Packit 575503
			efree(s->stptr);
Packit 575503
		s->stptr = r->stptr;
Packit 575503
#ifdef HAVE_MPFR
Packit 575503
		s->strndmode = MPFR_round_mode;
Packit 575503
#endif
Packit 575503
		freenode(r);	/* Do not unref(r)! We want to keep s->stptr == r->stpr.  */
Packit 575503
Packit 575503
		goto no_malloc;
Packit 575503
	} else {
Packit 575503
		/*
Packit 575503
		 * integral value; force conversion to long only once.
Packit 575503
		 */
Packit 575503
		long num = (long) val;
Packit 575503
Packit 575503
		if (num < NVAL && num >= 0) {
Packit 575503
			sp = (char *) values[num];
Packit 575503
			s->stlen = 1;
Packit 575503
		} else {
Packit 575503
			(void) sprintf(sp, "%ld", num);
Packit 575503
			s->stlen = strlen(sp);
Packit 575503
		}
Packit 575503
		s->stfmt = STFMT_UNUSED;
Packit 575503
		if ((s->flags & INTIND) != 0) {
Packit 575503
			s->flags &= ~(INTIND|NUMBER);
Packit 575503
			s->flags |= STRING;
Packit 575503
		}
Packit 575503
#ifdef HAVE_MPFR
Packit 575503
		s->strndmode = MPFR_round_mode;
Packit 575503
#endif
Packit 575503
	}
Packit 575503
	if ((s->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR))
Packit 575503
		efree(s->stptr);
Packit 575503
	emalloc(s->stptr, char *, s->stlen + 1, "format_val");
Packit 575503
	memcpy(s->stptr, sp, s->stlen + 1);
Packit 575503
no_malloc:
Packit 575503
	s->flags |= STRCUR;
Packit 575503
	free_wstr(s);
Packit 575503
	return s;
Packit 575503
}
Packit 575503
Packit 575503
/* r_dupnode --- duplicate a node */
Packit 575503
Packit 575503
NODE *
Packit 575503
r_dupnode(NODE *n)
Packit 575503
{
Packit 575503
	NODE *r;
Packit 575503
Packit 575503
	assert(n->type == Node_val);
Packit 575503
Packit 575503
#ifdef GAWKDEBUG
Packit 575503
	if ((n->flags & MALLOC) != 0) {
Packit 575503
		n->valref++;
Packit 575503
		return n;
Packit 575503
	}
Packit 575503
#endif
Packit 575503
Packit 575503
	getnode(r);
Packit 575503
	*r = *n;
Packit 575503
	r->flags |= MALLOC;
Packit 575503
	r->valref = 1;
Packit 575503
	/*
Packit 575503
	 * DON'T call free_wstr(r) here!
Packit 575503
	 * r->wstptr still points at n->wstptr's value, and we
Packit 575503
	 * don't want to free it!
Packit 575503
	 */
Packit 575503
	r->wstptr = NULL;
Packit 575503
	r->wstlen = 0;
Packit 575503
Packit 575503
	if ((n->flags & STRCUR) != 0) {
Packit 575503
		emalloc(r->stptr, char *, n->stlen + 1, "r_dupnode");
Packit 575503
		memcpy(r->stptr, n->stptr, n->stlen);
Packit 575503
		r->stptr[n->stlen] = '\0';
Packit 575503
		if ((n->flags & WSTRCUR) != 0) {
Packit 575503
			r->wstlen = n->wstlen;
Packit 575503
			emalloc(r->wstptr, wchar_t *, sizeof(wchar_t) * (n->wstlen + 1), "r_dupnode");
Packit 575503
			memcpy(r->wstptr, n->wstptr, n->wstlen * sizeof(wchar_t));
Packit 575503
			r->wstptr[n->wstlen] = L'\0';
Packit 575503
			r->flags |= WSTRCUR;
Packit 575503
		}
Packit 575503
	}
Packit 575503
Packit 575503
	return r;
Packit 575503
}
Packit 575503
Packit 575503
/* r_make_number --- allocate a node with defined number */
Packit 575503
Packit 575503
static NODE *
Packit 575503
r_make_number(double x)
Packit 575503
{
Packit 575503
	NODE *r = make_number_node(0);
Packit 575503
	r->numbr = x;
Packit 575503
	return r;
Packit 575503
}
Packit 575503
Packit 575503
/* cmp_awknums --- compare two AWKNUMs */
Packit 575503
Packit 575503
int
Packit 575503
cmp_awknums(const NODE *t1, const NODE *t2)
Packit 575503
{
Packit 575503
	/*
Packit 575503
	 * This routine is also used to sort numeric array indices or values.
Packit 575503
	 * For the purposes of sorting, NaN is considered greater than
Packit 575503
	 * any other value, and all NaN values are considered equivalent and equal.
Packit 575503
	 * This isn't in compliance with IEEE standard, but compliance w.r.t. NaN
Packit 575503
	 * comparison at the awk level is a different issue, and needs to be dealt
Packit 575503
	 * with in the interpreter for each opcode seperately.
Packit 575503
	 */
Packit 575503
Packit 575503
	if (isnan(t1->numbr))
Packit 575503
		return ! isnan(t2->numbr);
Packit 575503
	if (isnan(t2->numbr))
Packit 575503
		return -1;
Packit 575503
	/* don't subtract, in case one or both are infinite */
Packit 575503
	if (t1->numbr == t2->numbr)
Packit 575503
		return 0;
Packit 575503
	if (t1->numbr < t2->numbr)
Packit 575503
		return -1;
Packit 575503
	return 1;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* make_str_node --- make a string node */
Packit 575503
Packit 575503
NODE *
Packit 575503
make_str_node(const char *s, size_t len, int flags)
Packit 575503
{
Packit 575503
	NODE *r;
Packit 575503
	getnode(r);
Packit 575503
	r->type = Node_val;
Packit 575503
	r->numbr = 0;
Packit 575503
	r->flags = (MALLOC|STRING|STRCUR);
Packit 575503
	r->valref = 1;
Packit 575503
	r->stfmt = STFMT_UNUSED;
Packit 575503
#ifdef HAVE_MPFR
Packit 575503
	r->strndmode = MPFR_round_mode;
Packit 575503
#endif
Packit 575503
	r->wstptr = NULL;
Packit 575503
	r->wstlen = 0;
Packit 575503
Packit 575503
	if ((flags & ALREADY_MALLOCED) != 0)
Packit 575503
		r->stptr = (char *) s;
Packit 575503
	else {
Packit 575503
		emalloc(r->stptr, char *, len + 1, "make_str_node");
Packit 575503
		memcpy(r->stptr, s, len);
Packit 575503
	}
Packit 575503
	r->stptr[len] = '\0';
Packit 575503
Packit 575503
	if ((flags & SCAN) != 0) {	/* scan for escape sequences */
Packit 575503
		const char *pf;
Packit 575503
		char *ptm;
Packit 575503
		int c;
Packit 575503
		const char *end;
Packit 575503
		mbstate_t cur_state;
Packit 575503
Packit 575503
		memset(& cur_state, 0, sizeof(cur_state));
Packit 575503
Packit 575503
		end = &(r->stptr[len]);
Packit 575503
		for (pf = ptm = r->stptr; pf < end;) {
Packit 575503
			/*
Packit 575503
			 * Keep multibyte characters together. This avoids
Packit 575503
			 * problems if a subsequent byte of a multibyte
Packit 575503
			 * character happens to be a backslash.
Packit 575503
			 */
Packit 575503
			if (gawk_mb_cur_max > 1) {
Packit 575503
				int mblen = mbrlen(pf, end-pf, &cur_state);
Packit 575503
Packit 575503
				if (mblen > 1) {
Packit 575503
					int i;
Packit 575503
Packit 575503
					for (i = 0; i < mblen; i++)
Packit 575503
						*ptm++ = *pf++;
Packit 575503
					continue;
Packit 575503
				}
Packit 575503
			}
Packit 575503
Packit 575503
			c = *pf++;
Packit 575503
			if (c == '\\') {
Packit 575503
				c = parse_escape(&pf);
Packit 575503
				if (c < 0) {
Packit 575503
					if (do_lint)
Packit 575503
						lintwarn(_("backslash at end of string"));
Packit 575503
					c = '\\';
Packit 575503
				}
Packit 575503
				*ptm++ = c;
Packit 575503
			} else
Packit 575503
				*ptm++ = c;
Packit 575503
		}
Packit 575503
		len = ptm - r->stptr;
Packit 575503
		erealloc(r->stptr, char *, len + 1, "make_str_node");
Packit 575503
		r->stptr[len] = '\0';
Packit 575503
	}
Packit 575503
	r->stlen = len;
Packit 575503
Packit 575503
	return r;
Packit 575503
}
Packit 575503
Packit 575503
/* make_typed_regex --- make a typed regex node */
Packit 575503
Packit 575503
NODE *
Packit 575503
make_typed_regex(const char *re, size_t len)
Packit 575503
{
Packit 575503
	NODE *n, *exp, *n2;
Packit 575503
Packit 575503
	exp = make_str_node(re, len, ALREADY_MALLOCED);
Packit 575503
	n = make_regnode(Node_regex, exp);
Packit 575503
	if (n == NULL)
Packit 575503
		fatal(_("could not make typed regex"));
Packit 575503
Packit 575503
	n2 = make_string(re, len);
Packit 575503
	n2->typed_re = n;
Packit 575503
	n2->numbr = 0;
Packit 575503
	n2->flags |= NUMCUR|STRCUR|REGEX; 
Packit 575503
	n2->flags &= ~(STRING|NUMBER);
Packit 575503
Packit 575503
	return n2;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* unref --- remove reference to a particular node */
Packit 575503
Packit 575503
void
Packit 575503
r_unref(NODE *tmp)
Packit 575503
{
Packit 575503
#ifdef GAWKDEBUG
Packit 575503
	if (tmp == NULL)
Packit 575503
		return;
Packit 575503
	if ((tmp->flags & MALLOC) != 0) {
Packit 575503
		if (tmp->valref > 1) {
Packit 575503
			tmp->valref--;
Packit 575503
			return;
Packit 575503
		}
Packit 575503
		if ((tmp->flags & STRCUR) != 0)
Packit 575503
			efree(tmp->stptr);
Packit 575503
	}
Packit 575503
#else
Packit 575503
	if ((tmp->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR))
Packit 575503
		efree(tmp->stptr);
Packit 575503
#endif
Packit 575503
Packit 575503
	mpfr_unset(tmp);
Packit 575503
Packit 575503
	free_wstr(tmp);
Packit 575503
	freenode(tmp);
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/*
Packit 575503
 * parse_escape:
Packit 575503
 *
Packit 575503
 * Parse a C escape sequence.  STRING_PTR points to a variable containing a
Packit 575503
 * pointer to the string to parse.  That pointer is updated past the
Packit 575503
 * characters we use.  The value of the escape sequence is returned.
Packit 575503
 *
Packit 575503
 * A negative value means the sequence \ newline was seen, which is supposed to
Packit 575503
 * be equivalent to nothing at all.
Packit 575503
 *
Packit 575503
 * If \ is followed by a null character, we return a negative value and leave
Packit 575503
 * the string pointer pointing at the null character.
Packit 575503
 *
Packit 575503
 * If \ is followed by 000, we return 0 and leave the string pointer after the
Packit 575503
 * zeros.  A value of 0 does not mean end of string.
Packit 575503
 *
Packit 575503
 * POSIX doesn't allow \x.
Packit 575503
 */
Packit 575503
Packit 575503
int
Packit 575503
parse_escape(const char **string_ptr)
Packit 575503
{
Packit 575503
	int c = *(*string_ptr)++;
Packit 575503
	int i;
Packit 575503
	int count;
Packit 575503
	int j;
Packit 575503
	const char *start;
Packit 575503
Packit 575503
	if (do_lint_old) {
Packit 575503
		switch (c) {
Packit 575503
		case 'a':
Packit 575503
		case 'b':
Packit 575503
		case 'f':
Packit 575503
		case 'r':
Packit 575503
			warning(_("old awk does not support the `\\%c' escape sequence"), c);
Packit 575503
			break;
Packit 575503
		}
Packit 575503
	}
Packit 575503
Packit 575503
	switch (c) {
Packit 575503
	case 'a':
Packit 575503
		return '\a';
Packit 575503
	case 'b':
Packit 575503
		return '\b';
Packit 575503
	case 'f':
Packit 575503
		return '\f';
Packit 575503
	case 'n':
Packit 575503
		return '\n';
Packit 575503
	case 'r':
Packit 575503
		return '\r';
Packit 575503
	case 't':
Packit 575503
		return '\t';
Packit 575503
	case 'v':
Packit 575503
		return '\v';
Packit 575503
	case '\n':
Packit 575503
		return -2;
Packit 575503
	case 0:
Packit 575503
		(*string_ptr)--;
Packit 575503
		return -1;
Packit 575503
	case '0':
Packit 575503
	case '1':
Packit 575503
	case '2':
Packit 575503
	case '3':
Packit 575503
	case '4':
Packit 575503
	case '5':
Packit 575503
	case '6':
Packit 575503
	case '7':
Packit 575503
		i = c - '0';
Packit 575503
		count = 0;
Packit 575503
		while (++count < 3) {
Packit 575503
			if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
Packit 575503
				i *= 8;
Packit 575503
				i += c - '0';
Packit 575503
			} else {
Packit 575503
				(*string_ptr)--;
Packit 575503
				break;
Packit 575503
			}
Packit 575503
		}
Packit 575503
		return i;
Packit 575503
	case 'x':
Packit 575503
		if (do_lint) {
Packit 575503
			static bool warned = false;
Packit 575503
Packit 575503
			if (! warned) {
Packit 575503
				warned = true;
Packit 575503
				lintwarn(_("POSIX does not allow `\\x' escapes"));
Packit 575503
			}
Packit 575503
		}
Packit 575503
		if (do_posix)
Packit 575503
			return ('x');
Packit 575503
		if (! isxdigit((unsigned char) (*string_ptr)[0])) {
Packit 575503
			warning(_("no hex digits in `\\x' escape sequence"));
Packit 575503
			return ('x');
Packit 575503
		}
Packit 575503
		start = *string_ptr;
Packit 575503
		for (i = j = 0; j < 2; j++) {
Packit 575503
			/* do outside test to avoid multiple side effects */
Packit 575503
			c = *(*string_ptr)++;
Packit 575503
			if (isxdigit(c)) {
Packit 575503
				i *= 16;
Packit 575503
				if (isdigit(c))
Packit 575503
					i += c - '0';
Packit 575503
				else if (isupper(c))
Packit 575503
					i += c - 'A' + 10;
Packit 575503
				else
Packit 575503
					i += c - 'a' + 10;
Packit 575503
			} else {
Packit 575503
				(*string_ptr)--;
Packit 575503
				break;
Packit 575503
			}
Packit 575503
		}
Packit 575503
		if (do_lint && j > 2)
Packit 575503
			lintwarn(_("hex escape \\x%.*s of %d characters probably not interpreted the way you expect"), j, start, j);
Packit 575503
		return i;
Packit 575503
	case '\\':
Packit 575503
	case '"':
Packit 575503
		return c;
Packit 575503
	default:
Packit 575503
	{
Packit 575503
		static bool warned[256];
Packit 575503
		unsigned char uc = (unsigned char) c;
Packit 575503
Packit 575503
		/* N.B.: use unsigned char here to avoid Latin-1 problems */
Packit 575503
Packit 575503
		if (! warned[uc]) {
Packit 575503
			warned[uc] = true;
Packit 575503
Packit 575503
			warning(_("escape sequence `\\%c' treated as plain `%c'"), uc, uc);
Packit 575503
		}
Packit 575503
	}
Packit 575503
		return c;
Packit 575503
	}
Packit 575503
}
Packit 575503
Packit 575503
/* get_numbase --- return the base to use for the number in 's' */
Packit 575503
Packit 575503
int
Packit 575503
get_numbase(const char *s, size_t len, bool use_locale)
Packit 575503
{
Packit 575503
	int dec_point = '.';
Packit 575503
	const char *str = s;
Packit 575503
Packit 575503
#if defined(HAVE_LOCALE_H)
Packit 575503
	/*
Packit 575503
	 * loc.decimal_point may not have been initialized yet,
Packit 575503
	 * so double check it before using it.
Packit 575503
	 */
Packit 575503
	if (use_locale && loc.decimal_point != NULL && loc.decimal_point[0] != '\0')
Packit 575503
		dec_point = loc.decimal_point[0];	/* XXX --- assumes one char */
Packit 575503
#endif
Packit 575503
Packit 575503
	if (len < 2 || str[0] != '0')
Packit 575503
		return 10;
Packit 575503
Packit 575503
	/* leading 0x or 0X */
Packit 575503
	if (str[1] == 'x' || str[1] == 'X')
Packit 575503
		return 16;
Packit 575503
Packit 575503
	/*
Packit 575503
	 * Numbers with '.', 'e', or 'E' are decimal.
Packit 575503
	 * Have to check so that things like 00.34 are handled right.
Packit 575503
	 *
Packit 575503
	 * These beasts can have trailing whitespace. Deal with that too.
Packit 575503
	 */
Packit 575503
	for (; len > 0; len--, str++) {
Packit 575503
		if (*str == 'e' || *str == 'E' || *str == dec_point)
Packit 575503
			return 10;
Packit 575503
		else if (! isdigit((unsigned char) *str))
Packit 575503
			break;
Packit 575503
	}
Packit 575503
Packit 575503
	if (! isdigit((unsigned char) s[1])
Packit 575503
			|| s[1] == '8' || s[1] == '9'
Packit 575503
	)
Packit 575503
		return 10;
Packit 575503
	return 8;
Packit 575503
}
Packit 575503
Packit 575503
/* str2wstr --- convert a multibyte string to a wide string */
Packit 575503
Packit 575503
NODE *
Packit 575503
str2wstr(NODE *n, size_t **ptr)
Packit 575503
{
Packit 575503
	size_t i, count, src_count;
Packit 575503
	char *sp;
Packit 575503
	mbstate_t mbs;
Packit 575503
	wchar_t wc, *wsp;
Packit 575503
	static bool warned = false;
Packit 575503
Packit 575503
	assert((n->flags & (STRING|STRCUR)) != 0);
Packit 575503
Packit 575503
	/*
Packit 575503
	 * Don't convert global null string or global null field
Packit 575503
	 * variables to a wide string. They are both zero-length anyway.
Packit 575503
	 * This also avoids future double-free errors while releasing
Packit 575503
	 * shallow copies, eg. *tmp = *Null_field; free_wstr(tmp);
Packit 575503
	 */
Packit 575503
	if (n == Nnull_string || n == Null_field)
Packit 575503
		return n;
Packit 575503
Packit 575503
	if ((n->flags & WSTRCUR) != 0) {
Packit 575503
		if (ptr == NULL)
Packit 575503
			return n;
Packit 575503
		/* otherwise
Packit 575503
			fall through and recompute to fill in the array */
Packit 575503
		free_wstr(n);
Packit 575503
	}
Packit 575503
Packit 575503
	/*
Packit 575503
	 * After consideration and consultation, this
Packit 575503
	 * code trades space for time. We allocate
Packit 575503
	 * an array of wchar_t that is n->stlen long.
Packit 575503
	 * This is needed in the worst case anyway, where
Packit 575503
	 * each input byte maps to one wchar_t.  The
Packit 575503
	 * advantage is that we only have to convert the string
Packit 575503
	 * once, instead of twice, once to find out how many
Packit 575503
	 * wide characters, and then again to actually fill in
Packit 575503
	 * the info.  If there's a lot left over, we can
Packit 575503
	 * realloc the wide string down in size.
Packit 575503
	 */
Packit 575503
Packit 575503
	emalloc(n->wstptr, wchar_t *, sizeof(wchar_t) * (n->stlen + 1), "str2wstr");
Packit 575503
	wsp = n->wstptr;
Packit 575503
Packit 575503
	/*
Packit 575503
	 * For use by do_match, create and fill in an array.
Packit 575503
	 * For each byte `i' in n->stptr (the original string),
Packit 575503
	 * a[i] is equal to `j', where `j' is the corresponding wchar_t
Packit 575503
	 * in the converted wide string.
Packit 575503
	 *
Packit 575503
	 * Create the array.
Packit 575503
	 */
Packit 575503
	if (ptr != NULL) {
Packit 575503
		ezalloc(*ptr, size_t *, sizeof(size_t) * n->stlen, "str2wstr");
Packit 575503
	}
Packit 575503
Packit 575503
	sp = n->stptr;
Packit 575503
	src_count = n->stlen;
Packit 575503
	memset(& mbs, 0, sizeof(mbs));
Packit 575503
	for (i = 0; src_count > 0; i++) {
Packit 575503
		/*
Packit 575503
		 * 9/2010: Check the current byte; if it's a valid character,
Packit 575503
		 * then it doesn't start a multibyte sequence. This brings a
Packit 575503
		 * big speed up. Thanks to Ulrich Drepper for the tip.
Packit 575503
		 * 11/2010: Thanks to Paolo Bonzini for some even faster code.
Packit 575503
		 */
Packit 575503
		if (is_valid_character(*sp)) {
Packit 575503
			count = 1;
Packit 575503
			wc = btowc_cache(*sp);
Packit 575503
		} else
Packit 575503
			count = mbrtowc(& wc, sp, src_count, & mbs);
Packit 575503
		switch (count) {
Packit 575503
		case (size_t) -2:
Packit 575503
		case (size_t) -1:
Packit 575503
			/*
Packit 575503
			 * mbrtowc(3) says the state of mbs becomes undefined
Packit 575503
			 * after a bad character, so reset it.
Packit 575503
			 */
Packit 575503
			memset(& mbs, 0, sizeof(mbs));
Packit 575503
Packit 575503
			/* Warn the user something's wrong */
Packit 575503
			if (! warned) {
Packit 575503
				warned = true;
Packit 575503
				warning(_("Invalid multibyte data detected. There may be a mismatch between your data and your locale."));
Packit 575503
			}
Packit 575503
Packit 575503
			/*
Packit 575503
			 * 8/2015: If we're using UTF, then instead of just
Packit 575503
			 * skipping the character, plug in the Unicode
Packit 575503
			 * replacement character. In most cases this gives
Packit 575503
			 * us "better" results, in that character counts
Packit 575503
			 * and string lengths tend to make more sense.
Packit 575503
			 *
Packit 575503
			 * Otherwise, just skip the bad byte and keep going,
Packit 575503
			 * so that we get a more-or-less full string, instead of
Packit 575503
			 * stopping early. This is particularly important
Packit 575503
			 * for match() where we need to build the indices.
Packit 575503
			 */
Packit 575503
			if (using_utf8()) {
Packit 575503
				count = 1;
Packit 575503
				wc = 0xFFFD;	/* unicode replacement character */
Packit 575503
				goto set_wc;
Packit 575503
			} else {
Packit 575503
				/* skip it and keep going */
Packit 575503
				sp++;
Packit 575503
				src_count--;
Packit 575503
			}
Packit 575503
			break;
Packit 575503
Packit 575503
		case 0:
Packit 575503
			count = 1;
Packit 575503
			/* fall through */
Packit 575503
		default:
Packit 575503
		set_wc:
Packit 575503
			*wsp++ = wc;
Packit 575503
			src_count -= count;
Packit 575503
			while (count--)  {
Packit 575503
				if (ptr != NULL)
Packit 575503
					(*ptr)[sp - n->stptr] = i;
Packit 575503
				sp++;
Packit 575503
			}
Packit 575503
			break;
Packit 575503
		}
Packit 575503
	}
Packit 575503
Packit 575503
	*wsp = L'\0';
Packit 575503
	n->wstlen = wsp - n->wstptr;
Packit 575503
	n->flags |= WSTRCUR;
Packit 575503
#define ARBITRARY_AMOUNT_TO_GIVE_BACK 100
Packit 575503
	if (n->stlen - n->wstlen > ARBITRARY_AMOUNT_TO_GIVE_BACK)
Packit 575503
		erealloc(n->wstptr, wchar_t *, sizeof(wchar_t) * (n->wstlen + 1), "str2wstr");
Packit 575503
Packit 575503
	return n;
Packit 575503
}
Packit 575503
Packit 575503
/* wstr2str --- convert a wide string back into multibyte one */
Packit 575503
Packit 575503
NODE *
Packit 575503
wstr2str(NODE *n)
Packit 575503
{
Packit 575503
	size_t result;
Packit 575503
	size_t length;
Packit 575503
	wchar_t *wp;
Packit 575503
	mbstate_t mbs;
Packit 575503
	char *newval, *cp;
Packit 575503
Packit 575503
	assert(n->valref == 1);
Packit 575503
	assert((n->flags & WSTRCUR) != 0);
Packit 575503
Packit 575503
	/*
Packit 575503
	 * Convert the wide chars in t1->wstptr back into m.b. chars.
Packit 575503
	 * This is pretty grotty, but it's the most straightforward
Packit 575503
	 * way to do things.
Packit 575503
	 */
Packit 575503
	memset(& mbs, 0, sizeof(mbs));
Packit 575503
Packit 575503
	length = n->wstlen;
Packit 575503
	emalloc(newval, char *, (length * gawk_mb_cur_max) + 1, "wstr2str");
Packit 575503
Packit 575503
	wp = n->wstptr;
Packit 575503
	for (cp = newval; length > 0; length--) {
Packit 575503
		result = wcrtomb(cp, *wp, & mbs);
Packit 575503
		if (result == (size_t) -1)	/* what to do? break seems best */
Packit 575503
			break;
Packit 575503
		cp += result;
Packit 575503
		wp++;
Packit 575503
	}
Packit 575503
	*cp = '\0';
Packit 575503
Packit 575503
	/* N.B. caller just created n with make_string, so this free is safe */
Packit 575503
	efree(n->stptr);
Packit 575503
	n->stptr = newval;
Packit 575503
	n->stlen = cp - newval;
Packit 575503
Packit 575503
	return n;
Packit 575503
}
Packit 575503
Packit 575503
/* free_wstr --- release the wide string part of a node */
Packit 575503
Packit 575503
void
Packit 575503
r_free_wstr(NODE *n)
Packit 575503
{
Packit 575503
	assert(n->type == Node_val);
Packit 575503
Packit 575503
	if ((n->flags & WSTRCUR) != 0) {
Packit 575503
		assert(n->wstptr != NULL);
Packit 575503
		efree(n->wstptr);
Packit 575503
	}
Packit 575503
	n->wstptr = NULL;
Packit 575503
	n->wstlen = 0;
Packit 575503
	n->flags &= ~WSTRCUR;
Packit 575503
}
Packit 575503
Packit 575503
static void __attribute__ ((unused))
Packit 575503
dump_wstr(FILE *fp, const wchar_t *str, size_t len)
Packit 575503
{
Packit 575503
	if (str == NULL || len == 0)
Packit 575503
		return;
Packit 575503
Packit 575503
	for (; len--; str++)
Packit 575503
		putwc(*str, fp);
Packit 575503
}
Packit 575503
Packit 575503
/* wstrstr --- walk haystack, looking for needle, wide char version */
Packit 575503
Packit 575503
const wchar_t *
Packit 575503
wstrstr(const wchar_t *haystack, size_t hs_len,
Packit 575503
	const wchar_t *needle, size_t needle_len)
Packit 575503
{
Packit 575503
	size_t i;
Packit 575503
Packit 575503
	if (haystack == NULL || needle == NULL || needle_len > hs_len)
Packit 575503
		return NULL;
Packit 575503
Packit 575503
	for (i = 0; i < hs_len; i++) {
Packit 575503
		if (haystack[i] == needle[0]
Packit 575503
		    && i+needle_len-1 < hs_len
Packit 575503
		    && haystack[i+needle_len-1] == needle[needle_len-1]) {
Packit 575503
			/* first & last chars match, check string */
Packit 575503
			if (memcmp(haystack+i, needle, sizeof(wchar_t) * needle_len) == 0) {
Packit 575503
				return haystack + i;
Packit 575503
			}
Packit 575503
		}
Packit 575503
	}
Packit 575503
Packit 575503
	return NULL;
Packit 575503
}
Packit 575503
Packit 575503
/* wcasestrstr --- walk haystack, nocase look for needle, wide char version */
Packit 575503
Packit 575503
const wchar_t *
Packit 575503
wcasestrstr(const wchar_t *haystack, size_t hs_len,
Packit 575503
	const wchar_t *needle, size_t needle_len)
Packit 575503
{
Packit 575503
	size_t i, j;
Packit 575503
Packit 575503
	if (haystack == NULL || needle == NULL || needle_len > hs_len)
Packit 575503
		return NULL;
Packit 575503
Packit 575503
	for (i = 0; i < hs_len; i++) {
Packit 575503
		if (towlower(haystack[i]) == towlower(needle[0])
Packit 575503
		    && i+needle_len-1 < hs_len
Packit 575503
		    && towlower(haystack[i+needle_len-1]) == towlower(needle[needle_len-1])) {
Packit 575503
			/* first & last chars match, check string */
Packit 575503
			const wchar_t *start;
Packit 575503
Packit 575503
			start = haystack+i;
Packit 575503
			for (j = 0; j < needle_len; j++, start++) {
Packit 575503
				wchar_t h, n;
Packit 575503
Packit 575503
				h = towlower(*start);
Packit 575503
				n = towlower(needle[j]);
Packit 575503
				if (h != n)
Packit 575503
					goto out;
Packit 575503
			}
Packit 575503
			return haystack + i;
Packit 575503
		}
Packit 575503
out:	;
Packit 575503
	}
Packit 575503
Packit 575503
	return NULL;
Packit 575503
}
Packit 575503
Packit 575503
/* is_ieee_magic_val --- return true for +inf, -inf, +nan, -nan */
Packit 575503
Packit 575503
static int
Packit 575503
is_ieee_magic_val(const char *val)
Packit 575503
{
Packit 575503
	/*
Packit 575503
	 * Avoid strncasecmp: it mishandles ASCII bytes in some locales.
Packit 575503
	 * Assume the length is 4, as the caller checks this.
Packit 575503
	 */
Packit 575503
	return (   (val[0] == '+' || val[0] == '-')
Packit 575503
		&& (   (   (val[1] == 'i' || val[1] == 'I')
Packit 575503
			&& (val[2] == 'n' || val[2] == 'N')
Packit 575503
			&& (val[3] == 'f' || val[3] == 'F'))
Packit 575503
		    || (   (val[1] == 'n' || val[1] == 'N')
Packit 575503
			&& (val[2] == 'a' || val[2] == 'A')
Packit 575503
			&& (val[3] == 'n' || val[3] == 'N'))));
Packit 575503
}
Packit 575503
Packit 575503
/* get_ieee_magic_val --- return magic value for string */
Packit 575503
Packit 575503
static AWKNUM
Packit 575503
get_ieee_magic_val(char *val)
Packit 575503
{
Packit 575503
	static bool first = true;
Packit 575503
	static AWKNUM inf;
Packit 575503
	static AWKNUM nan;
Packit 575503
	char save;
Packit 575503
Packit 575503
	char *ptr;
Packit 575503
	save = val[4];
Packit 575503
	val[4] = '\0';
Packit 575503
	AWKNUM v = strtod(val, &ptr);
Packit 575503
	val[4] = save;
Packit 575503
Packit 575503
	if (val == ptr) { /* Older strtod implementations don't support inf or nan. */
Packit 575503
		if (first) {
Packit 575503
			first = false;
Packit 575503
			nan = sqrt(-1.0);
Packit 575503
			inf = -log(0.0);
Packit 575503
		}
Packit 575503
Packit 575503
		v = ((val[1] == 'i' || val[1] == 'I') ? inf : nan);
Packit 575503
		if (val[0] == '-')
Packit 575503
			v = -v;
Packit 575503
	}
Packit 575503
Packit 575503
	return v;
Packit 575503
}
Packit 575503
Packit 575503
wint_t btowc_cache[256];
Packit 575503
Packit 575503
/* init_btowc_cache --- initialize the cache */
Packit 575503
Packit 575503
void init_btowc_cache()
Packit 575503
{
Packit 575503
	int i;
Packit 575503
Packit 575503
	for (i = 0; i < 255; i++) {
Packit 575503
		btowc_cache[i] = btowc(i);
Packit 575503
	}
Packit 575503
}
Packit 575503
Packit 575503
#define BLOCKCHUNK 100
Packit 575503
Packit 575503
struct block_header nextfree[BLOCK_MAX] = {
Packit 575503
	{ NULL, sizeof(NODE) },
Packit 575503
	{ NULL, sizeof(BUCKET) },
Packit 575503
#ifdef HAVE_MPFR
Packit 575503
	{ NULL, sizeof(mpfr_t) },
Packit 575503
	{ NULL, sizeof(mpz_t) },
Packit 575503
#endif
Packit 575503
};
Packit 575503
Packit 575503
Packit 575503
/* more_blocks --- get more blocks of memory and add to the free list;
Packit 575503
	size of a block must be >= sizeof(struct block_item)
Packit 575503
 */
Packit 575503
Packit 575503
void *
Packit 575503
more_blocks(int id)
Packit 575503
{
Packit 575503
	struct block_item *freep, *np, *next;
Packit 575503
	char *p, *endp;
Packit 575503
	size_t size;
Packit 575503
Packit 575503
	size = nextfree[id].size;
Packit 575503
Packit 575503
	assert(size >= sizeof(struct block_item));
Packit 575503
	emalloc(freep, struct block_item *, BLOCKCHUNK * size, "more_blocks");
Packit 575503
	p = (char *) freep;
Packit 575503
	endp = p + BLOCKCHUNK * size;
Packit 575503
Packit 575503
	for (np = freep; ; np = next) {
Packit 575503
		next = (struct block_item *) (p += size);
Packit 575503
		if (p >= endp) {
Packit 575503
			np->freep = NULL;
Packit 575503
			break;
Packit 575503
		}
Packit 575503
		np->freep = next;
Packit 575503
	}
Packit 575503
	nextfree[id].freep = freep->freep;
Packit 575503
	return freep;
Packit 575503
}