Blob Blame History Raw
/*
 * node.c -- routines for node management
 */

/*
 * Copyright (C) 1986, 1988, 1989, 1991-2001, 2003-2015, 2017, 2018,
 * the Free Software Foundation, Inc.
 *
 * This file is part of GAWK, the GNU implementation of the
 * AWK Programming Language.
 *
 * GAWK is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * GAWK is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 */

#include "awk.h"
#include "math.h"
#include "floatmagic.h"	/* definition of isnan */

static int is_ieee_magic_val(const char *val);
static NODE *r_make_number(double x);
static AWKNUM get_ieee_magic_val(char *val);
extern NODE **fmt_list;          /* declared in eval.c */

NODE *(*make_number)(double) = r_make_number;
NODE *(*str2number)(NODE *) = r_force_number;
NODE *(*format_val)(const char *, int, NODE *) = r_format_val;
int (*cmp_numbers)(const NODE *, const NODE *) = cmp_awknums;

/* is_hex --- return true if a string looks like a hex value */

static bool
is_hex(const char *str, const char *cpend)
{
	/* on entry, we know the string length is >= 1 */
	if (*str == '-' || *str == '+')
		str++;

	if (str + 1 < cpend && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
		return true;

	return false;
}

/* force_number --- force a value to be numeric */

NODE *
r_force_number(NODE *n)
{
	char *cp;
	char *cpend;
	char save;
	char *ptr;
	extern double strtod();

	if ((n->flags & NUMCUR) != 0)
		return n;

	/*
	 * We should always set NUMCUR. If USER_INPUT is set and it's a
	 * numeric string, we clear STRING and enable NUMBER, but if it's not
	 * numeric, we disable USER_INPUT.
	 */

	/* All the conditionals are an attempt to avoid the expensive strtod */

	n->flags |= NUMCUR;
	n->numbr = 0.0;

	/* Trim leading white space, bailing out if there's nothing else */
	for (cp = n->stptr, cpend = cp + n->stlen;
	     cp < cpend && isspace((unsigned char) *cp); cp++)
		continue;

	if (cp == cpend)
		goto badnum;

	/* At this point, we know the string is not entirely white space */
	/* Trim trailing white space */
	while (isspace((unsigned char) cpend[-1]))
		cpend--;

	/*
	 * 2/2007:
	 * POSIX, by way of severe language lawyering, seems to
	 * allow things like "inf" and "nan" to mean something.
	 * So if do_posix, the user gets what he deserves.
	 * This also allows hexadecimal floating point. Ugh.
	 */
	if (! do_posix) {
		if (is_alpha((unsigned char) *cp))
			goto badnum;
		else if (cpend == cp+4 && is_ieee_magic_val(cp)) {
			n->numbr = get_ieee_magic_val(cp);
			goto goodnum;
		}
		/* else
			fall through */
	}
	/* else POSIX, so
		fall through */

	if (   (! do_posix		/* not POSIXLY paranoid and */
	        && (is_alpha((unsigned char) *cp)	/* letter, or */
					/* CANNOT do non-decimal and saw 0x */
		    || (! do_non_decimal_data && is_hex(cp, cpend))))) {
		goto badnum;
	}

	if (cpend - cp == 1) {		/* only one character */
		if (isdigit((unsigned char) *cp)) {	/* it's a digit! */
			n->numbr = (AWKNUM)(*cp - '0');
			if (n->stlen == 1)		/* no white space */
				n->flags |= NUMINT;
			goto goodnum;
		}
		goto badnum;
	}

	errno = 0;
	if (do_non_decimal_data		/* main.c assures false if do_posix */
		&& ! do_traditional && get_numbase(cp, cpend - cp, true) != 10) {
		/* nondec2awknum() saves and restores the byte after the string itself */
		n->numbr = nondec2awknum(cp, cpend - cp, &ptr);
	} else {
		save = *cpend;
		*cpend = '\0';
		n->numbr = (AWKNUM) strtod((const char *) cp, &ptr);
		*cpend = save;
	}

	if (errno == 0) {
		if (ptr == cpend)
			goto goodnum;
		/* else keep the leading numeric value without updating flags */
		/* fall through to badnum */
	} else {
		errno = 0;
		/*
		 * N.B. For subnormal values, strtod may return the
		 * floating-point representation while setting errno to ERANGE.
		 * We force the numeric value to 0 in such cases.
		 */
		n->numbr = 0;
		/*
		 * Or should we accept it as a NUMBER even though strtod
		 * threw an error?
		 */
		/* fall through to badnum */
	}
badnum:
	n->flags &= ~USER_INPUT;
	return n;

goodnum:
	if ((n->flags & USER_INPUT) != 0) {
		/* leave USER_INPUT enabled to indicate that this is a strnum */
		n->flags &= ~STRING;
		n->flags |= NUMBER;
	}
	return n;
}


/*
 * The following lookup table is used as an optimization in force_string;
 * (more complicated) variations on this theme didn't seem to pay off, but
 * systematic testing might be in order at some point.
 */
static const char *values[] = {
	"0",
	"1",
	"2",
	"3",
	"4",
	"5",
	"6",
	"7",
	"8",
	"9",
};
#define	NVAL	(sizeof(values)/sizeof(values[0]))

/* r_format_val --- format a numeric value based on format */

NODE *
r_format_val(const char *format, int index, NODE *s)
{
	char buf[BUFSIZ];
	char *sp = buf;
	double val;

	/*
	 * 2/2007: Simplify our lives here. Instead of worrying about
	 * whether or not the value will fit into a long just so we
	 * can use sprintf("%ld", val) on it, always format it ourselves.
	 * The only thing to worry about is that integral values always
	 * format as integers. %.0f does that very well.
	 *
	 * 6/2008: Would that things were so simple. Always using %.0f
	 * imposes a notable performance penalty for applications that
	 * do a lot of conversion of integers to strings. So, we reinstate
	 * the old code, but use %.0f for integral values that are outside
	 * the range of a long.  This seems a reasonable compromise.
	 *
	 * 12/2009: Use <= and >= in the comparisons with LONG_xxx instead of
	 * < and > so that things work correctly on systems with 64 bit integers.
	 */

	/* not an integral value, or out of range */
	if ((val = double_to_int(s->numbr)) != s->numbr
			|| val <= LONG_MIN || val >= LONG_MAX
	) {
		/*
		 * Once upon a time, we just blindly did this:
		 *	sprintf(sp, format, s->numbr);
		 *	s->stlen = strlen(sp);
		 *	s->stfmt = index;
		 * but that's no good if, e.g., OFMT is %s. So we punt,
		 * and just always format the value ourselves.
		 */

		NODE *dummy[2], *r;
		unsigned int oflags;

		/* create dummy node for a sole use of format_tree */
		dummy[1] = s;
		oflags = s->flags;

		if (val == s->numbr) {
			/* integral value, but outside range of %ld, use %.0f */
			r = format_tree("%.0f", 4, dummy, 2);
			s->stfmt = STFMT_UNUSED;
		} else {
			r = format_tree(format, fmt_list[index]->stlen, dummy, 2);
			assert(r != NULL);
			s->stfmt = index;
		}
		s->flags = oflags;
		s->stlen = r->stlen;
		if ((s->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR))
			efree(s->stptr);
		s->stptr = r->stptr;
#ifdef HAVE_MPFR
		s->strndmode = MPFR_round_mode;
#endif
		freenode(r);	/* Do not unref(r)! We want to keep s->stptr == r->stpr.  */

		goto no_malloc;
	} else {
		/*
		 * integral value; force conversion to long only once.
		 */
		long num = (long) val;

		if (num < NVAL && num >= 0) {
			sp = (char *) values[num];
			s->stlen = 1;
		} else {
			(void) sprintf(sp, "%ld", num);
			s->stlen = strlen(sp);
		}
		s->stfmt = STFMT_UNUSED;
		if ((s->flags & INTIND) != 0) {
			s->flags &= ~(INTIND|NUMBER);
			s->flags |= STRING;
		}
#ifdef HAVE_MPFR
		s->strndmode = MPFR_round_mode;
#endif
	}
	if ((s->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR))
		efree(s->stptr);
	emalloc(s->stptr, char *, s->stlen + 1, "format_val");
	memcpy(s->stptr, sp, s->stlen + 1);
no_malloc:
	s->flags |= STRCUR;
	free_wstr(s);
	return s;
}

/* r_dupnode --- duplicate a node */

NODE *
r_dupnode(NODE *n)
{
	NODE *r;

	assert(n->type == Node_val);

#ifdef GAWKDEBUG
	if ((n->flags & MALLOC) != 0) {
		n->valref++;
		return n;
	}
#endif

	getnode(r);
	*r = *n;
	r->flags |= MALLOC;
	r->valref = 1;
	/*
	 * DON'T call free_wstr(r) here!
	 * r->wstptr still points at n->wstptr's value, and we
	 * don't want to free it!
	 */
	r->wstptr = NULL;
	r->wstlen = 0;

	if ((n->flags & STRCUR) != 0) {
		emalloc(r->stptr, char *, n->stlen + 1, "r_dupnode");
		memcpy(r->stptr, n->stptr, n->stlen);
		r->stptr[n->stlen] = '\0';
		if ((n->flags & WSTRCUR) != 0) {
			r->wstlen = n->wstlen;
			emalloc(r->wstptr, wchar_t *, sizeof(wchar_t) * (n->wstlen + 1), "r_dupnode");
			memcpy(r->wstptr, n->wstptr, n->wstlen * sizeof(wchar_t));
			r->wstptr[n->wstlen] = L'\0';
			r->flags |= WSTRCUR;
		}
	}

	return r;
}

/* r_make_number --- allocate a node with defined number */

static NODE *
r_make_number(double x)
{
	NODE *r = make_number_node(0);
	r->numbr = x;
	return r;
}

/* cmp_awknums --- compare two AWKNUMs */

int
cmp_awknums(const NODE *t1, const NODE *t2)
{
	/*
	 * This routine is also used to sort numeric array indices or values.
	 * For the purposes of sorting, NaN is considered greater than
	 * any other value, and all NaN values are considered equivalent and equal.
	 * This isn't in compliance with IEEE standard, but compliance w.r.t. NaN
	 * comparison at the awk level is a different issue, and needs to be dealt
	 * with in the interpreter for each opcode seperately.
	 */

	if (isnan(t1->numbr))
		return ! isnan(t2->numbr);
	if (isnan(t2->numbr))
		return -1;
	/* don't subtract, in case one or both are infinite */
	if (t1->numbr == t2->numbr)
		return 0;
	if (t1->numbr < t2->numbr)
		return -1;
	return 1;
}


/* make_str_node --- make a string node */

NODE *
make_str_node(const char *s, size_t len, int flags)
{
	NODE *r;
	getnode(r);
	r->type = Node_val;
	r->numbr = 0;
	r->flags = (MALLOC|STRING|STRCUR);
	r->valref = 1;
	r->stfmt = STFMT_UNUSED;
#ifdef HAVE_MPFR
	r->strndmode = MPFR_round_mode;
#endif
	r->wstptr = NULL;
	r->wstlen = 0;

	if ((flags & ALREADY_MALLOCED) != 0)
		r->stptr = (char *) s;
	else {
		emalloc(r->stptr, char *, len + 1, "make_str_node");
		memcpy(r->stptr, s, len);
	}
	r->stptr[len] = '\0';

	if ((flags & SCAN) != 0) {	/* scan for escape sequences */
		const char *pf;
		char *ptm;
		int c;
		const char *end;
		mbstate_t cur_state;

		memset(& cur_state, 0, sizeof(cur_state));

		end = &(r->stptr[len]);
		for (pf = ptm = r->stptr; pf < end;) {
			/*
			 * Keep multibyte characters together. This avoids
			 * problems if a subsequent byte of a multibyte
			 * character happens to be a backslash.
			 */
			if (gawk_mb_cur_max > 1) {
				int mblen = mbrlen(pf, end-pf, &cur_state);

				if (mblen > 1) {
					int i;

					for (i = 0; i < mblen; i++)
						*ptm++ = *pf++;
					continue;
				}
			}

			c = *pf++;
			if (c == '\\') {
				c = parse_escape(&pf);
				if (c < 0) {
					if (do_lint)
						lintwarn(_("backslash at end of string"));
					c = '\\';
				}
				*ptm++ = c;
			} else
				*ptm++ = c;
		}
		len = ptm - r->stptr;
		erealloc(r->stptr, char *, len + 1, "make_str_node");
		r->stptr[len] = '\0';
	}
	r->stlen = len;

	return r;
}

/* make_typed_regex --- make a typed regex node */

NODE *
make_typed_regex(const char *re, size_t len)
{
	NODE *n, *exp, *n2;

	exp = make_str_node(re, len, ALREADY_MALLOCED);
	n = make_regnode(Node_regex, exp);
	if (n == NULL)
		fatal(_("could not make typed regex"));

	n2 = make_string(re, len);
	n2->typed_re = n;
	n2->numbr = 0;
	n2->flags |= NUMCUR|STRCUR|REGEX; 
	n2->flags &= ~(STRING|NUMBER);

	return n2;
}


/* unref --- remove reference to a particular node */

void
r_unref(NODE *tmp)
{
#ifdef GAWKDEBUG
	if (tmp == NULL)
		return;
	if ((tmp->flags & MALLOC) != 0) {
		if (tmp->valref > 1) {
			tmp->valref--;
			return;
		}
		if ((tmp->flags & STRCUR) != 0)
			efree(tmp->stptr);
	}
#else
	if ((tmp->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR))
		efree(tmp->stptr);
#endif

	mpfr_unset(tmp);

	free_wstr(tmp);
	freenode(tmp);
}


/*
 * parse_escape:
 *
 * Parse a C escape sequence.  STRING_PTR points to a variable containing a
 * pointer to the string to parse.  That pointer is updated past the
 * characters we use.  The value of the escape sequence is returned.
 *
 * A negative value means the sequence \ newline was seen, which is supposed to
 * be equivalent to nothing at all.
 *
 * If \ is followed by a null character, we return a negative value and leave
 * the string pointer pointing at the null character.
 *
 * If \ is followed by 000, we return 0 and leave the string pointer after the
 * zeros.  A value of 0 does not mean end of string.
 *
 * POSIX doesn't allow \x.
 */

int
parse_escape(const char **string_ptr)
{
	int c = *(*string_ptr)++;
	int i;
	int count;
	int j;
	const char *start;

	if (do_lint_old) {
		switch (c) {
		case 'a':
		case 'b':
		case 'f':
		case 'r':
			warning(_("old awk does not support the `\\%c' escape sequence"), c);
			break;
		}
	}

	switch (c) {
	case 'a':
		return '\a';
	case 'b':
		return '\b';
	case 'f':
		return '\f';
	case 'n':
		return '\n';
	case 'r':
		return '\r';
	case 't':
		return '\t';
	case 'v':
		return '\v';
	case '\n':
		return -2;
	case 0:
		(*string_ptr)--;
		return -1;
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
		i = c - '0';
		count = 0;
		while (++count < 3) {
			if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
				i *= 8;
				i += c - '0';
			} else {
				(*string_ptr)--;
				break;
			}
		}
		return i;
	case 'x':
		if (do_lint) {
			static bool warned = false;

			if (! warned) {
				warned = true;
				lintwarn(_("POSIX does not allow `\\x' escapes"));
			}
		}
		if (do_posix)
			return ('x');
		if (! isxdigit((unsigned char) (*string_ptr)[0])) {
			warning(_("no hex digits in `\\x' escape sequence"));
			return ('x');
		}
		start = *string_ptr;
		for (i = j = 0; j < 2; j++) {
			/* do outside test to avoid multiple side effects */
			c = *(*string_ptr)++;
			if (isxdigit(c)) {
				i *= 16;
				if (isdigit(c))
					i += c - '0';
				else if (isupper(c))
					i += c - 'A' + 10;
				else
					i += c - 'a' + 10;
			} else {
				(*string_ptr)--;
				break;
			}
		}
		if (do_lint && j > 2)
			lintwarn(_("hex escape \\x%.*s of %d characters probably not interpreted the way you expect"), j, start, j);
		return i;
	case '\\':
	case '"':
		return c;
	default:
	{
		static bool warned[256];
		unsigned char uc = (unsigned char) c;

		/* N.B.: use unsigned char here to avoid Latin-1 problems */

		if (! warned[uc]) {
			warned[uc] = true;

			warning(_("escape sequence `\\%c' treated as plain `%c'"), uc, uc);
		}
	}
		return c;
	}
}

/* get_numbase --- return the base to use for the number in 's' */

int
get_numbase(const char *s, size_t len, bool use_locale)
{
	int dec_point = '.';
	const char *str = s;

#if defined(HAVE_LOCALE_H)
	/*
	 * loc.decimal_point may not have been initialized yet,
	 * so double check it before using it.
	 */
	if (use_locale && loc.decimal_point != NULL && loc.decimal_point[0] != '\0')
		dec_point = loc.decimal_point[0];	/* XXX --- assumes one char */
#endif

	if (len < 2 || str[0] != '0')
		return 10;

	/* leading 0x or 0X */
	if (str[1] == 'x' || str[1] == 'X')
		return 16;

	/*
	 * Numbers with '.', 'e', or 'E' are decimal.
	 * Have to check so that things like 00.34 are handled right.
	 *
	 * These beasts can have trailing whitespace. Deal with that too.
	 */
	for (; len > 0; len--, str++) {
		if (*str == 'e' || *str == 'E' || *str == dec_point)
			return 10;
		else if (! isdigit((unsigned char) *str))
			break;
	}

	if (! isdigit((unsigned char) s[1])
			|| s[1] == '8' || s[1] == '9'
	)
		return 10;
	return 8;
}

/* str2wstr --- convert a multibyte string to a wide string */

NODE *
str2wstr(NODE *n, size_t **ptr)
{
	size_t i, count, src_count;
	char *sp;
	mbstate_t mbs;
	wchar_t wc, *wsp;
	static bool warned = false;

	assert((n->flags & (STRING|STRCUR)) != 0);

	/*
	 * Don't convert global null string or global null field
	 * variables to a wide string. They are both zero-length anyway.
	 * This also avoids future double-free errors while releasing
	 * shallow copies, eg. *tmp = *Null_field; free_wstr(tmp);
	 */
	if (n == Nnull_string || n == Null_field)
		return n;

	if ((n->flags & WSTRCUR) != 0) {
		if (ptr == NULL)
			return n;
		/* otherwise
			fall through and recompute to fill in the array */
		free_wstr(n);
	}

	/*
	 * After consideration and consultation, this
	 * code trades space for time. We allocate
	 * an array of wchar_t that is n->stlen long.
	 * This is needed in the worst case anyway, where
	 * each input byte maps to one wchar_t.  The
	 * advantage is that we only have to convert the string
	 * once, instead of twice, once to find out how many
	 * wide characters, and then again to actually fill in
	 * the info.  If there's a lot left over, we can
	 * realloc the wide string down in size.
	 */

	emalloc(n->wstptr, wchar_t *, sizeof(wchar_t) * (n->stlen + 1), "str2wstr");
	wsp = n->wstptr;

	/*
	 * For use by do_match, create and fill in an array.
	 * For each byte `i' in n->stptr (the original string),
	 * a[i] is equal to `j', where `j' is the corresponding wchar_t
	 * in the converted wide string.
	 *
	 * Create the array.
	 */
	if (ptr != NULL) {
		ezalloc(*ptr, size_t *, sizeof(size_t) * n->stlen, "str2wstr");
	}

	sp = n->stptr;
	src_count = n->stlen;
	memset(& mbs, 0, sizeof(mbs));
	for (i = 0; src_count > 0; i++) {
		/*
		 * 9/2010: Check the current byte; if it's a valid character,
		 * then it doesn't start a multibyte sequence. This brings a
		 * big speed up. Thanks to Ulrich Drepper for the tip.
		 * 11/2010: Thanks to Paolo Bonzini for some even faster code.
		 */
		if (is_valid_character(*sp)) {
			count = 1;
			wc = btowc_cache(*sp);
		} else
			count = mbrtowc(& wc, sp, src_count, & mbs);
		switch (count) {
		case (size_t) -2:
		case (size_t) -1:
			/*
			 * mbrtowc(3) says the state of mbs becomes undefined
			 * after a bad character, so reset it.
			 */
			memset(& mbs, 0, sizeof(mbs));

			/* Warn the user something's wrong */
			if (! warned) {
				warned = true;
				warning(_("Invalid multibyte data detected. There may be a mismatch between your data and your locale."));
			}

			/*
			 * 8/2015: If we're using UTF, then instead of just
			 * skipping the character, plug in the Unicode
			 * replacement character. In most cases this gives
			 * us "better" results, in that character counts
			 * and string lengths tend to make more sense.
			 *
			 * Otherwise, just skip the bad byte and keep going,
			 * so that we get a more-or-less full string, instead of
			 * stopping early. This is particularly important
			 * for match() where we need to build the indices.
			 */
			if (using_utf8()) {
				count = 1;
				wc = 0xFFFD;	/* unicode replacement character */
				goto set_wc;
			} else {
				/* skip it and keep going */
				sp++;
				src_count--;
			}
			break;

		case 0:
			count = 1;
			/* fall through */
		default:
		set_wc:
			*wsp++ = wc;
			src_count -= count;
			while (count--)  {
				if (ptr != NULL)
					(*ptr)[sp - n->stptr] = i;
				sp++;
			}
			break;
		}
	}

	*wsp = L'\0';
	n->wstlen = wsp - n->wstptr;
	n->flags |= WSTRCUR;
#define ARBITRARY_AMOUNT_TO_GIVE_BACK 100
	if (n->stlen - n->wstlen > ARBITRARY_AMOUNT_TO_GIVE_BACK)
		erealloc(n->wstptr, wchar_t *, sizeof(wchar_t) * (n->wstlen + 1), "str2wstr");

	return n;
}

/* wstr2str --- convert a wide string back into multibyte one */

NODE *
wstr2str(NODE *n)
{
	size_t result;
	size_t length;
	wchar_t *wp;
	mbstate_t mbs;
	char *newval, *cp;

	assert(n->valref == 1);
	assert((n->flags & WSTRCUR) != 0);

	/*
	 * Convert the wide chars in t1->wstptr back into m.b. chars.
	 * This is pretty grotty, but it's the most straightforward
	 * way to do things.
	 */
	memset(& mbs, 0, sizeof(mbs));

	length = n->wstlen;
	emalloc(newval, char *, (length * gawk_mb_cur_max) + 1, "wstr2str");

	wp = n->wstptr;
	for (cp = newval; length > 0; length--) {
		result = wcrtomb(cp, *wp, & mbs);
		if (result == (size_t) -1)	/* what to do? break seems best */
			break;
		cp += result;
		wp++;
	}
	*cp = '\0';

	/* N.B. caller just created n with make_string, so this free is safe */
	efree(n->stptr);
	n->stptr = newval;
	n->stlen = cp - newval;

	return n;
}

/* free_wstr --- release the wide string part of a node */

void
r_free_wstr(NODE *n)
{
	assert(n->type == Node_val);

	if ((n->flags & WSTRCUR) != 0) {
		assert(n->wstptr != NULL);
		efree(n->wstptr);
	}
	n->wstptr = NULL;
	n->wstlen = 0;
	n->flags &= ~WSTRCUR;
}

static void __attribute__ ((unused))
dump_wstr(FILE *fp, const wchar_t *str, size_t len)
{
	if (str == NULL || len == 0)
		return;

	for (; len--; str++)
		putwc(*str, fp);
}

/* wstrstr --- walk haystack, looking for needle, wide char version */

const wchar_t *
wstrstr(const wchar_t *haystack, size_t hs_len,
	const wchar_t *needle, size_t needle_len)
{
	size_t i;

	if (haystack == NULL || needle == NULL || needle_len > hs_len)
		return NULL;

	for (i = 0; i < hs_len; i++) {
		if (haystack[i] == needle[0]
		    && i+needle_len-1 < hs_len
		    && haystack[i+needle_len-1] == needle[needle_len-1]) {
			/* first & last chars match, check string */
			if (memcmp(haystack+i, needle, sizeof(wchar_t) * needle_len) == 0) {
				return haystack + i;
			}
		}
	}

	return NULL;
}

/* wcasestrstr --- walk haystack, nocase look for needle, wide char version */

const wchar_t *
wcasestrstr(const wchar_t *haystack, size_t hs_len,
	const wchar_t *needle, size_t needle_len)
{
	size_t i, j;

	if (haystack == NULL || needle == NULL || needle_len > hs_len)
		return NULL;

	for (i = 0; i < hs_len; i++) {
		if (towlower(haystack[i]) == towlower(needle[0])
		    && i+needle_len-1 < hs_len
		    && towlower(haystack[i+needle_len-1]) == towlower(needle[needle_len-1])) {
			/* first & last chars match, check string */
			const wchar_t *start;

			start = haystack+i;
			for (j = 0; j < needle_len; j++, start++) {
				wchar_t h, n;

				h = towlower(*start);
				n = towlower(needle[j]);
				if (h != n)
					goto out;
			}
			return haystack + i;
		}
out:	;
	}

	return NULL;
}

/* is_ieee_magic_val --- return true for +inf, -inf, +nan, -nan */

static int
is_ieee_magic_val(const char *val)
{
	/*
	 * Avoid strncasecmp: it mishandles ASCII bytes in some locales.
	 * Assume the length is 4, as the caller checks this.
	 */
	return (   (val[0] == '+' || val[0] == '-')
		&& (   (   (val[1] == 'i' || val[1] == 'I')
			&& (val[2] == 'n' || val[2] == 'N')
			&& (val[3] == 'f' || val[3] == 'F'))
		    || (   (val[1] == 'n' || val[1] == 'N')
			&& (val[2] == 'a' || val[2] == 'A')
			&& (val[3] == 'n' || val[3] == 'N'))));
}

/* get_ieee_magic_val --- return magic value for string */

static AWKNUM
get_ieee_magic_val(char *val)
{
	static bool first = true;
	static AWKNUM inf;
	static AWKNUM nan;
	char save;

	char *ptr;
	save = val[4];
	val[4] = '\0';
	AWKNUM v = strtod(val, &ptr);
	val[4] = save;

	if (val == ptr) { /* Older strtod implementations don't support inf or nan. */
		if (first) {
			first = false;
			nan = sqrt(-1.0);
			inf = -log(0.0);
		}

		v = ((val[1] == 'i' || val[1] == 'I') ? inf : nan);
		if (val[0] == '-')
			v = -v;
	}

	return v;
}

wint_t btowc_cache[256];

/* init_btowc_cache --- initialize the cache */

void init_btowc_cache()
{
	int i;

	for (i = 0; i < 255; i++) {
		btowc_cache[i] = btowc(i);
	}
}

#define BLOCKCHUNK 100

struct block_header nextfree[BLOCK_MAX] = {
	{ NULL, sizeof(NODE) },
	{ NULL, sizeof(BUCKET) },
#ifdef HAVE_MPFR
	{ NULL, sizeof(mpfr_t) },
	{ NULL, sizeof(mpz_t) },
#endif
};


/* more_blocks --- get more blocks of memory and add to the free list;
	size of a block must be >= sizeof(struct block_item)
 */

void *
more_blocks(int id)
{
	struct block_item *freep, *np, *next;
	char *p, *endp;
	size_t size;

	size = nextfree[id].size;

	assert(size >= sizeof(struct block_item));
	emalloc(freep, struct block_item *, BLOCKCHUNK * size, "more_blocks");
	p = (char *) freep;
	endp = p + BLOCKCHUNK * size;

	for (np = freep; ; np = next) {
		next = (struct block_item *) (p += size);
		if (p >= endp) {
			np->freep = NULL;
			break;
		}
		np->freep = next;
	}
	nextfree[id].freep = freep->freep;
	return freep;
}