Blame str_array.c

Packit Service f629e6
/*
Packit Service f629e6
 * str_array.c - routines for associative arrays of string indices.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2018,
Packit Service f629e6
 * the Free Software Foundation, Inc.
Packit Service f629e6
 *
Packit Service f629e6
 * This file is part of GAWK, the GNU implementation of the
Packit Service f629e6
 * AWK Programming Language.
Packit Service f629e6
 *
Packit Service f629e6
 * GAWK is free software; you can redistribute it and/or modify
Packit Service f629e6
 * it under the terms of the GNU General Public License as published by
Packit Service f629e6
 * the Free Software Foundation; either version 3 of the License, or
Packit Service f629e6
 * (at your option) any later version.
Packit Service f629e6
 *
Packit Service f629e6
 * GAWK is distributed in the hope that it will be useful,
Packit Service f629e6
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service f629e6
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service f629e6
 * GNU General Public License for more details.
Packit Service f629e6
 *
Packit Service f629e6
 * You should have received a copy of the GNU General Public License
Packit Service f629e6
 * along with this program; if not, write to the Free Software
Packit Service f629e6
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
#include "awk.h"
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * Tree walks (``for (iggy in foo)'') and array deletions use expensive
Packit Service f629e6
 * linear searching.  So what we do is start out with small arrays and
Packit Service f629e6
 * grow them as needed, so that our arrays are hopefully small enough,
Packit Service f629e6
 * most of the time, that they're pretty full and we're not looking at
Packit Service f629e6
 * wasted space.
Packit Service f629e6
 *
Packit Service f629e6
 * The decision is made to grow the array if the average chain length is
Packit Service f629e6
 * ``too big''. This is defined as the total number of entries in the table
Packit Service f629e6
 * divided by the size of the array being greater than some constant.
Packit Service f629e6
 *
Packit Service f629e6
 * 11/2002: We make the constant a variable, so that it can be tweaked
Packit Service f629e6
 * via environment variable.
Packit Service f629e6
 * 11/2002: Modern machines are bigger, cut this down from 10.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
static size_t STR_CHAIN_MAX = 2;
Packit Service f629e6
Packit Service f629e6
extern FILE *output_fp;
Packit Service f629e6
extern void indent(int indent_level);
Packit Service f629e6
Packit Service f629e6
static NODE **str_array_init(NODE *symbol, NODE *subs);
Packit Service f629e6
static NODE **str_lookup(NODE *symbol, NODE *subs);
Packit Service f629e6
static NODE **str_exists(NODE *symbol, NODE *subs);
Packit Service f629e6
static NODE **str_clear(NODE *symbol, NODE *subs);
Packit Service f629e6
static NODE **str_remove(NODE *symbol, NODE *subs);
Packit Service f629e6
static NODE **str_list(NODE *symbol, NODE *subs);
Packit Service f629e6
static NODE **str_copy(NODE *symbol, NODE *newsymb);
Packit Service f629e6
static NODE **str_dump(NODE *symbol, NODE *ndump);
Packit Service f629e6
Packit Service f629e6
afunc_t str_array_func[] = {
Packit Service f629e6
	str_array_init,
Packit Service f629e6
	(afunc_t) 0,
Packit Service f629e6
	null_length,
Packit Service f629e6
	str_lookup,
Packit Service f629e6
	str_exists,
Packit Service f629e6
	str_clear,
Packit Service f629e6
	str_remove,
Packit Service f629e6
	str_list,
Packit Service f629e6
	str_copy,
Packit Service f629e6
	str_dump,
Packit Service f629e6
	(afunc_t) 0,
Packit Service f629e6
};
Packit Service f629e6
Packit Service f629e6
static NODE **env_remove(NODE *symbol, NODE *subs);
Packit Service f629e6
static NODE **env_store(NODE *symbol, NODE *subs);
Packit Service f629e6
static NODE **env_clear(NODE *symbol, NODE *subs);
Packit Service f629e6
Packit Service f629e6
/* special case for ENVIRON */
Packit Service f629e6
afunc_t env_array_func[] = {
Packit Service f629e6
	str_array_init,
Packit Service f629e6
	(afunc_t) 0,
Packit Service f629e6
	null_length,
Packit Service f629e6
	str_lookup,
Packit Service f629e6
	str_exists,
Packit Service f629e6
	env_clear,
Packit Service f629e6
	env_remove,
Packit Service f629e6
	str_list,
Packit Service f629e6
	str_copy,
Packit Service f629e6
	str_dump,
Packit Service f629e6
	env_store,
Packit Service f629e6
};
Packit Service f629e6
Packit Service f629e6
static inline NODE **str_find(NODE *symbol, NODE *s1, size_t code1, unsigned long hash1);
Packit Service f629e6
static void grow_table(NODE *symbol);
Packit Service f629e6
Packit Service f629e6
static unsigned long gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code);
Packit Service f629e6
static unsigned long scramble(unsigned long x);
Packit Service f629e6
static unsigned long awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code);
Packit Service f629e6
Packit Service f629e6
unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code) = awk_hash;
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* str_array_init --- array initialization routine */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
str_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
Packit Service f629e6
{
Packit Service f629e6
	if (symbol == NULL) {		/* first time */
Packit Service f629e6
		long newval;
Packit Service f629e6
		const char *val;
Packit Service f629e6
Packit Service f629e6
		/* check relevant environment variables */
Packit Service f629e6
		if ((newval = getenv_long("STR_CHAIN_MAX")) > 0)
Packit Service f629e6
			STR_CHAIN_MAX = newval;
Packit Service f629e6
		if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0)
Packit Service f629e6
			hash = gst_hash_string;
Packit Service f629e6
	} else
Packit Service f629e6
		null_array(symbol);
Packit Service f629e6
Packit Service f629e6
	return & success_node;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * assoc_lookup:
Packit Service f629e6
 * Find SYMBOL[SUBS] in the assoc array.  Install it with value "" if it
Packit Service f629e6
 * isn't there. Returns a pointer ala get_lhs to where its value is stored.
Packit Service f629e6
 *
Packit Service f629e6
 * SYMBOL is the address of the node (or other pointer) being dereferenced.
Packit Service f629e6
 * SUBS is a number or string used as the subscript.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
str_lookup(NODE *symbol, NODE *subs)
Packit Service f629e6
{
Packit Service f629e6
	unsigned long hash1;
Packit Service f629e6
	NODE **lhs;
Packit Service f629e6
	BUCKET *b;
Packit Service f629e6
	size_t code1;
Packit Service f629e6
Packit Service f629e6
	subs = force_string(subs);
Packit Service f629e6
Packit Service f629e6
	if (symbol->buckets == NULL)
Packit Service f629e6
		grow_table(symbol);
Packit Service f629e6
	hash1 = hash(subs->stptr, subs->stlen,
Packit Service f629e6
			(unsigned long) symbol->array_size, & code1);
Packit Service f629e6
	if ((lhs = str_find(symbol, subs, code1, hash1)) != NULL)
Packit Service f629e6
		return lhs;
Packit Service f629e6
Packit Service f629e6
	/* It's not there, install it. */
Packit Service f629e6
	/* first see if we would need to grow the array, before installing */
Packit Service f629e6
Packit Service f629e6
	symbol->table_size++;
Packit Service f629e6
	if ((symbol->flags & ARRAYMAXED) == 0
Packit Service f629e6
			&& (symbol->table_size / symbol->array_size) > STR_CHAIN_MAX) {
Packit Service f629e6
		grow_table(symbol);
Packit Service f629e6
		/* have to recompute hash value for new size */
Packit Service f629e6
		hash1 = code1 % (unsigned long) symbol->array_size;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Repeat after me: "Array indices are always strings."
Packit Service f629e6
	 * "Array indices are always strings."
Packit Service f629e6
	 * "Array indices are always strings."
Packit Service f629e6
	 * "Array indices are always strings."
Packit Service f629e6
	 * ....
Packit Service f629e6
	 */
Packit Service f629e6
	// Special cases:
Packit Service f629e6
	// 1. The string was generated using CONVFMT.
Packit Service f629e6
	// 2. The string was from an unassigned variable.
Packit Service f629e6
	// 3. The string was from an unassigned field.
Packit Service f629e6
	if (   subs->stfmt != STFMT_UNUSED
Packit Service f629e6
	    || subs == Nnull_string
Packit Service f629e6
	    || (subs->flags & NULL_FIELD) != 0) {
Packit Service f629e6
		NODE *tmp;
Packit Service f629e6
Packit Service f629e6
		/*
Packit Service f629e6
		 * Need to freeze this string value --- it must never
Packit Service f629e6
		 * change, no matter what happens to the value
Packit Service f629e6
		 * that created it or to CONVFMT, etc.; So, get
Packit Service f629e6
		 * a private copy.
Packit Service f629e6
		 */
Packit Service f629e6
Packit Service f629e6
		tmp = make_string(subs->stptr, subs->stlen);
Packit Service f629e6
Packit Service f629e6
		/*
Packit Service f629e6
		* Set the numeric value for the index if it's  available. Useful
Packit Service f629e6
		* for numeric sorting by index.  Do this only if the numeric
Packit Service f629e6
		* value is available, instead of all the time, since doing it
Packit Service f629e6
		* all the time is a big performance hit for something that may
Packit Service f629e6
		* never be used.
Packit Service f629e6
		*/
Packit Service f629e6
Packit Service f629e6
		if ((subs->flags & (MPFN|MPZN|NUMCUR)) == NUMCUR) {
Packit Service f629e6
			tmp->numbr = subs->numbr;
Packit Service f629e6
			tmp->flags |= NUMCUR;
Packit Service f629e6
		}
Packit Service f629e6
		subs = tmp;
Packit Service f629e6
	} else {
Packit Service f629e6
		/* string value already "frozen" */
Packit Service f629e6
Packit Service f629e6
		subs = dupnode(subs);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	getbucket(b);
Packit Service f629e6
	b->ahnext = symbol->buckets[hash1];
Packit Service f629e6
	symbol->buckets[hash1] = b;
Packit Service f629e6
	b->ahname = subs;
Packit Service f629e6
	b->ahname_str = subs->stptr;
Packit Service f629e6
	b->ahname_len = subs->stlen;
Packit Service f629e6
	b->ahvalue = dupnode(Nnull_string);
Packit Service f629e6
	b->ahcode = code1;
Packit Service f629e6
	return & (b->ahvalue);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* str_exists --- test whether the array element symbol[subs] exists or not,
Packit Service f629e6
 * 		return pointer to value if it does.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
str_exists(NODE *symbol, NODE *subs)
Packit Service f629e6
{
Packit Service f629e6
	unsigned long hash1;
Packit Service f629e6
	size_t code1;
Packit Service f629e6
Packit Service f629e6
	if (symbol->table_size == 0)
Packit Service f629e6
		return NULL;
Packit Service f629e6
Packit Service f629e6
	subs = force_string(subs);
Packit Service f629e6
	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size, & code1);
Packit Service f629e6
	return str_find(symbol, subs, code1, hash1);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* str_clear --- flush all the values in symbol[] */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
str_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
Packit Service f629e6
{
Packit Service f629e6
	unsigned long i;
Packit Service f629e6
	BUCKET *b, *next;
Packit Service f629e6
	NODE *r;
Packit Service f629e6
Packit Service f629e6
	for (i = 0; i < symbol->array_size; i++) {
Packit Service f629e6
		for (b = symbol->buckets[i]; b != NULL; b = next) {
Packit Service f629e6
			next = b->ahnext;
Packit Service f629e6
			r = b->ahvalue;
Packit Service f629e6
			if (r->type == Node_var_array) {
Packit Service f629e6
				assoc_clear(r);	/* recursively clear all sub-arrays */
Packit Service f629e6
				efree(r->vname);
Packit Service f629e6
				freenode(r);
Packit Service f629e6
			} else
Packit Service f629e6
				unref(r);
Packit Service f629e6
			unref(b->ahname);
Packit Service f629e6
			freebucket(b);
Packit Service f629e6
		}
Packit Service f629e6
		symbol->buckets[i] = NULL;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if (symbol->buckets != NULL)
Packit Service f629e6
		efree(symbol->buckets);
Packit Service f629e6
	symbol->ainit(symbol, NULL);	/* re-initialize symbol */
Packit Service f629e6
	return NULL;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* str_remove --- If SUBS is already in the table, remove it. */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
str_remove(NODE *symbol, NODE *subs)
Packit Service f629e6
{
Packit Service f629e6
	unsigned long hash1;
Packit Service f629e6
	BUCKET *b, *prev;
Packit Service f629e6
	NODE *s2;
Packit Service f629e6
	size_t s1_len;
Packit Service f629e6
Packit Service f629e6
	if (symbol->table_size == 0)
Packit Service f629e6
		return NULL;
Packit Service f629e6
Packit Service f629e6
	s2 = force_string(subs);
Packit Service f629e6
	hash1 = hash(s2->stptr, s2->stlen, (unsigned long) symbol->array_size, NULL);
Packit Service f629e6
Packit Service f629e6
	for (b = symbol->buckets[hash1], prev = NULL; b != NULL;
Packit Service f629e6
				prev = b, b = b->ahnext) {
Packit Service f629e6
Packit Service f629e6
		/* Array indexes are strings; compare as such, always! */
Packit Service f629e6
		s1_len = b->ahname_len;
Packit Service f629e6
Packit Service f629e6
		if (s1_len != s2->stlen)
Packit Service f629e6
			continue;
Packit Service f629e6
		if (s1_len == 0		/* "" is a valid index */
Packit Service f629e6
			    || memcmp(b->ahname_str, s2->stptr, s1_len) == 0) {
Packit Service f629e6
			/* item found */
Packit Service f629e6
Packit Service f629e6
			unref(b->ahname);
Packit Service f629e6
			if (prev != NULL)
Packit Service f629e6
				prev->ahnext = b->ahnext;
Packit Service f629e6
			else
Packit Service f629e6
				symbol->buckets[hash1] = b->ahnext;
Packit Service f629e6
Packit Service f629e6
			/* delete bucket */
Packit Service f629e6
			freebucket(b);
Packit Service f629e6
Packit Service f629e6
			/* one less element in array */
Packit Service f629e6
			if (--symbol->table_size == 0) {
Packit Service f629e6
				if (symbol->buckets != NULL)
Packit Service f629e6
					efree(symbol->buckets);
Packit Service f629e6
				symbol->ainit(symbol, NULL);	/* re-initialize symbol */
Packit Service f629e6
			}
Packit Service f629e6
Packit Service f629e6
			return & success_node;	/* return success */
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	return NULL;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* str_copy --- duplicate input array "symbol" */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
str_copy(NODE *symbol, NODE *newsymb)
Packit Service f629e6
{
Packit Service f629e6
	BUCKET **old, **new, **pnew;
Packit Service f629e6
	BUCKET *chain, *newchain;
Packit Service f629e6
	unsigned long cursize, i;
Packit Service f629e6
Packit Service f629e6
	assert(symbol->table_size > 0);
Packit Service f629e6
Packit Service f629e6
	/* find the current hash size */
Packit Service f629e6
	cursize = symbol->array_size;
Packit Service f629e6
Packit Service f629e6
	/* allocate new table */
Packit Service f629e6
	ezalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "str_copy");
Packit Service f629e6
Packit Service f629e6
	old = symbol->buckets;
Packit Service f629e6
Packit Service f629e6
	for (i = 0; i < cursize; i++) {
Packit Service f629e6
		for (chain = old[i], pnew = & new[i]; chain != NULL;
Packit Service f629e6
				chain = chain->ahnext
Packit Service f629e6
		) {
Packit Service f629e6
			NODE *oldval, *newsubs;
Packit Service f629e6
Packit Service f629e6
			getbucket(newchain);
Packit Service f629e6
Packit Service f629e6
			/*
Packit Service f629e6
			 * copy the corresponding name and
Packit Service f629e6
			 * value from the original input list
Packit Service f629e6
			 */
Packit Service f629e6
Packit Service f629e6
			newsubs = newchain->ahname = dupnode(chain->ahname);
Packit Service f629e6
			newchain->ahname_str = newsubs->stptr;
Packit Service f629e6
			newchain->ahname_len = newsubs->stlen;
Packit Service f629e6
Packit Service f629e6
			oldval = chain->ahvalue;
Packit Service f629e6
			if (oldval->type == Node_val)
Packit Service f629e6
				newchain->ahvalue = dupnode(oldval);
Packit Service f629e6
			else {
Packit Service f629e6
				NODE *r;
Packit Service f629e6
Packit Service f629e6
				r = make_array();
Packit Service f629e6
				r->vname = estrdup(oldval->vname, strlen(oldval->vname));
Packit Service f629e6
				r->parent_array = newsymb;
Packit Service f629e6
				newchain->ahvalue = assoc_copy(oldval, r);
Packit Service f629e6
			}
Packit Service f629e6
			newchain->ahcode = chain->ahcode;
Packit Service f629e6
Packit Service f629e6
			*pnew = newchain;
Packit Service f629e6
			newchain->ahnext = NULL;
Packit Service f629e6
			pnew = & newchain->ahnext;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	newsymb->table_size = symbol->table_size;
Packit Service f629e6
	newsymb->buckets = new;
Packit Service f629e6
	newsymb->array_size = cursize;
Packit Service f629e6
	newsymb->flags = symbol->flags;
Packit Service f629e6
	return NULL;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* str_list --- return a list of array items */
Packit Service f629e6
Packit Service f629e6
static NODE**
Packit Service f629e6
str_list(NODE *symbol, NODE *t)
Packit Service f629e6
{
Packit Service f629e6
	NODE **list;
Packit Service f629e6
	NODE *subs, *val;
Packit Service f629e6
	BUCKET *b;
Packit Service f629e6
	unsigned long num_elems, list_size, i, k = 0;
Packit Service f629e6
	int elem_size = 1;
Packit Service f629e6
	assoc_kind_t assoc_kind;
Packit Service f629e6
Packit Service f629e6
	if (symbol->table_size == 0)
Packit Service f629e6
		return NULL;
Packit Service f629e6
Packit Service f629e6
	assoc_kind = (assoc_kind_t) t->flags;
Packit Service f629e6
	if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
Packit Service f629e6
		elem_size = 2;
Packit Service f629e6
Packit Service f629e6
	/* allocate space for array */
Packit Service f629e6
	num_elems = symbol->table_size;
Packit Service f629e6
	if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
Packit Service f629e6
		num_elems = 1;
Packit Service f629e6
	list_size =  elem_size * num_elems;
Packit Service f629e6
Packit Service f629e6
	emalloc(list, NODE **, list_size * sizeof(NODE *), "str_list");
Packit Service f629e6
Packit Service f629e6
	/* populate it */
Packit Service f629e6
Packit Service f629e6
	for (i = 0; i < symbol->array_size; i++) {
Packit Service f629e6
		for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext) {
Packit Service f629e6
			/* index */
Packit Service f629e6
			subs = b->ahname;
Packit Service f629e6
			if ((assoc_kind & AINUM) != 0)
Packit Service f629e6
				(void) force_number(subs);
Packit Service f629e6
			list[k++] = dupnode(subs);
Packit Service f629e6
Packit Service f629e6
			/* value */
Packit Service f629e6
			if ((assoc_kind & AVALUE) != 0) {
Packit Service f629e6
				val = b->ahvalue;
Packit Service f629e6
				if (val->type == Node_val) {
Packit Service f629e6
					if ((assoc_kind & AVNUM) != 0)
Packit Service f629e6
						(void) force_number(val);
Packit Service f629e6
					else if ((assoc_kind & AVSTR) != 0)
Packit Service f629e6
						val = force_string(val);
Packit Service f629e6
				}
Packit Service f629e6
				list[k++] = val;
Packit Service f629e6
			}
Packit Service f629e6
			if (k >= list_size)
Packit Service f629e6
				return list;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
	return list;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* str_kilobytes --- calculate memory consumption of the assoc array */
Packit Service f629e6
Packit Service f629e6
AWKNUM
Packit Service f629e6
str_kilobytes(NODE *symbol)
Packit Service f629e6
{
Packit Service f629e6
	unsigned long bucket_cnt;
Packit Service f629e6
	AWKNUM kb;
Packit Service f629e6
Packit Service f629e6
	bucket_cnt = symbol->table_size;
Packit Service f629e6
Packit Service f629e6
	/* This does not include extra memory for indices with stfmt != STFMT_UNUSED */
Packit Service f629e6
	kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) +
Packit Service f629e6
		((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 1024.0;
Packit Service f629e6
	return kb;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* str_dump --- dump array info */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
str_dump(NODE *symbol, NODE *ndump)
Packit Service f629e6
{
Packit Service f629e6
#define HCNT	31
Packit Service f629e6
Packit Service f629e6
	int indent_level;
Packit Service f629e6
	unsigned long i, bucket_cnt;
Packit Service f629e6
	BUCKET *b;
Packit Service f629e6
	static size_t hash_dist[HCNT + 1];
Packit Service f629e6
Packit Service f629e6
	indent_level = ndump->alevel;
Packit Service f629e6
Packit Service f629e6
	if ((symbol->flags & XARRAY) == 0)
Packit Service f629e6
		fprintf(output_fp, "%s `%s'\n",
Packit Service f629e6
				(symbol->parent_array == NULL) ? "array" : "sub-array",
Packit Service f629e6
				array_vname(symbol));
Packit Service f629e6
	indent_level++;
Packit Service f629e6
	indent(indent_level);
Packit Service f629e6
	fprintf(output_fp, "array_func: str_array_func\n");
Packit Service f629e6
	if (symbol->flags != 0) {
Packit Service f629e6
		indent(indent_level);
Packit Service f629e6
		fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
Packit Service f629e6
	}
Packit Service f629e6
	indent(indent_level);
Packit Service f629e6
	fprintf(output_fp, "STR_CHAIN_MAX: %lu\n", (unsigned long) STR_CHAIN_MAX);
Packit Service f629e6
	indent(indent_level);
Packit Service f629e6
	fprintf(output_fp, "array_size: %lu\n", (unsigned long) symbol->array_size);
Packit Service f629e6
	indent(indent_level);
Packit Service f629e6
	fprintf(output_fp, "table_size: %lu\n", (unsigned long) symbol->table_size);
Packit Service f629e6
	indent(indent_level);
Packit Service f629e6
	fprintf(output_fp, "Avg # of items per chain: %.2g\n",
Packit Service f629e6
				((AWKNUM) symbol->table_size) / symbol->array_size);
Packit Service f629e6
Packit Service f629e6
	indent(indent_level);
Packit Service f629e6
	fprintf(output_fp, "memory: %.2g kB\n", str_kilobytes(symbol));
Packit Service f629e6
Packit Service f629e6
	/* hash value distribution */
Packit Service f629e6
Packit Service f629e6
	memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
Packit Service f629e6
	for (i = 0; i < symbol->array_size; i++) {
Packit Service f629e6
		bucket_cnt = 0;
Packit Service f629e6
		for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext)
Packit Service f629e6
			bucket_cnt++;
Packit Service f629e6
		if (bucket_cnt >= HCNT)
Packit Service f629e6
			bucket_cnt = HCNT;
Packit Service f629e6
		hash_dist[bucket_cnt]++;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	indent(indent_level);
Packit Service f629e6
	fprintf(output_fp, "Hash distribution:\n");
Packit Service f629e6
	indent_level++;
Packit Service f629e6
	for (i = 0; i <= HCNT; i++) {
Packit Service f629e6
		if (hash_dist[i] > 0) {
Packit Service f629e6
			indent(indent_level);
Packit Service f629e6
			if (i == HCNT)
Packit Service f629e6
				fprintf(output_fp, "[>=%lu]:%lu\n",
Packit Service f629e6
					(unsigned long) HCNT, (unsigned long) hash_dist[i]);
Packit Service f629e6
			else
Packit Service f629e6
				fprintf(output_fp, "[%lu]:%lu\n",
Packit Service f629e6
					(unsigned long) i, (unsigned long) hash_dist[i]);
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
	indent_level--;
Packit Service f629e6
Packit Service f629e6
	/* dump elements */
Packit Service f629e6
Packit Service f629e6
	if (ndump->adepth >= 0) {
Packit Service f629e6
		const char *aname;
Packit Service f629e6
Packit Service f629e6
		fprintf(output_fp, "\n");
Packit Service f629e6
		aname = make_aname(symbol);
Packit Service f629e6
		for (i = 0; i < symbol->array_size; i++) {
Packit Service f629e6
			for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext)
Packit Service f629e6
				assoc_info(b->ahname, b->ahvalue, ndump, aname);
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	return NULL;
Packit Service f629e6
Packit Service f629e6
#undef HCNT
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* awk_hash --- calculate the hash function of the string in subs */
Packit Service f629e6
Packit Service f629e6
static unsigned long
Packit Service f629e6
awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
Packit Service f629e6
{
Packit Service f629e6
	unsigned long h = 0;
Packit Service f629e6
	unsigned long htmp;
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
Packit Service f629e6
	 * db package.
Packit Service f629e6
	 *
Packit Service f629e6
	 * This is INCREDIBLY ugly, but fast.  We break the string up into
Packit Service f629e6
	 * 8 byte units.  On the first time through the loop we get the
Packit Service f629e6
	 * "leftover bytes" (strlen % 8).  On every other iteration, we
Packit Service f629e6
	 * perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
Packit Service f629e6
	 * saves us 7 cmp & branch instructions.  If this routine is
Packit Service f629e6
	 * heavily used enough, it's worth the ugly coding.
Packit Service f629e6
	 */
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Even more speed:
Packit Service f629e6
	 * #define HASHC   h = *s++ + 65599 * h
Packit Service f629e6
	 * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
Packit Service f629e6
	 *
Packit Service f629e6
	 * 4/2011: Force the results to 32 bits, to get the same
Packit Service f629e6
	 * result on both 32- and 64-bit systems. This may be a
Packit Service f629e6
	 * bad idea.
Packit Service f629e6
	 */
Packit Service f629e6
#define HASHC   htmp = (h << 6);  \
Packit Service f629e6
		h = *s++ + htmp + (htmp << 10) - h ; \
Packit Service f629e6
		htmp &= 0xFFFFFFFF; \
Packit Service f629e6
		h &= 0xFFFFFFFF
Packit Service f629e6
Packit Service f629e6
	h = 0;
Packit Service f629e6
Packit Service f629e6
	/* "Duff's Device" */
Packit Service f629e6
	if (len > 0) {
Packit Service f629e6
		size_t loop = (len + 8 - 1) >> 3;
Packit Service f629e6
Packit Service f629e6
		switch (len & (8 - 1)) {
Packit Service f629e6
		case 0:
Packit Service f629e6
			do {	/* All fall throughs */
Packit Service f629e6
				HASHC;
Packit Service f629e6
		case 7:		HASHC;
Packit Service f629e6
		case 6:		HASHC;
Packit Service f629e6
		case 5:		HASHC;
Packit Service f629e6
		case 4:		HASHC;
Packit Service f629e6
		case 3:		HASHC;
Packit Service f629e6
		case 2:		HASHC;
Packit Service f629e6
		case 1:		HASHC;
Packit Service f629e6
			} while (--loop);
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if (code != NULL)
Packit Service f629e6
		*code = h;
Packit Service f629e6
Packit Service f629e6
	if (h >= hsize)
Packit Service f629e6
		h %= hsize;
Packit Service f629e6
	return h;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* str_find --- locate symbol[subs] */
Packit Service f629e6
Packit Service f629e6
static inline NODE **
Packit Service f629e6
str_find(NODE *symbol, NODE *s1, size_t code1, unsigned long hash1)
Packit Service f629e6
{
Packit Service f629e6
	BUCKET *b;
Packit Service f629e6
	size_t s2_len;
Packit Service f629e6
Packit Service f629e6
	for (b = symbol->buckets[hash1]; b != NULL; b = b->ahnext) {
Packit Service f629e6
		/*
Packit Service f629e6
		 * This used to use cmp_nodes() here.  That's wrong.
Packit Service f629e6
		 * Array indexes are strings; compare as such, always!
Packit Service f629e6
	 	 */
Packit Service f629e6
		s2_len = b->ahname_len;
Packit Service f629e6
Packit Service f629e6
		if (code1 == b->ahcode
Packit Service f629e6
			&& s1->stlen == s2_len
Packit Service f629e6
			&& (s2_len == 0		/* "" is a valid index */
Packit Service f629e6
				|| memcmp(s1->stptr, b->ahname_str, s2_len) == 0)
Packit Service f629e6
		)
Packit Service f629e6
			return & (b->ahvalue);
Packit Service f629e6
	}
Packit Service f629e6
	return NULL;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/* grow_table --- grow a hash table */
Packit Service f629e6
Packit Service f629e6
static void
Packit Service f629e6
grow_table(NODE *symbol)
Packit Service f629e6
{
Packit Service f629e6
	BUCKET **old, **new;
Packit Service f629e6
	BUCKET *chain, *next;
Packit Service f629e6
	int i, j;
Packit Service f629e6
	unsigned long oldsize, newsize, k;
Packit Service f629e6
	unsigned long hash1;
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * This is an array of primes. We grow the table by an order of
Packit Service f629e6
	 * magnitude each time (not just doubling) so that growing is a
Packit Service f629e6
	 * rare operation. We expect, on average, that it won't happen
Packit Service f629e6
	 * more than twice.  The final size is also chosen to be small
Packit Service f629e6
	 * enough so that MS-DOG mallocs can handle it. When things are
Packit Service f629e6
	 * very large (> 8K), we just double more or less, instead of
Packit Service f629e6
	 * just jumping from 8K to 64K.
Packit Service f629e6
	 */
Packit Service f629e6
Packit Service f629e6
	static const unsigned long sizes[] = {
Packit Service f629e6
		13, 127, 1021, 8191, 16381, 32749, 65497,
Packit Service f629e6
		131101, 262147, 524309, 1048583, 2097169,
Packit Service f629e6
		4194319, 8388617, 16777259, 33554467,
Packit Service f629e6
		67108879, 134217757, 268435459, 536870923,
Packit Service f629e6
		1073741827
Packit Service f629e6
	};
Packit Service f629e6
Packit Service f629e6
	/* find next biggest hash size */
Packit Service f629e6
	newsize = oldsize = symbol->array_size;
Packit Service f629e6
Packit Service f629e6
	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
Packit Service f629e6
		if (oldsize < sizes[i]) {
Packit Service f629e6
			newsize = sizes[i];
Packit Service f629e6
			break;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
	if (newsize == oldsize) {	/* table already at max (!) */
Packit Service f629e6
		symbol->flags |= ARRAYMAXED;
Packit Service f629e6
		return;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* allocate new table */
Packit Service f629e6
	ezalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_table");
Packit Service f629e6
Packit Service f629e6
	old = symbol->buckets;
Packit Service f629e6
	symbol->buckets = new;
Packit Service f629e6
	symbol->array_size = newsize;
Packit Service f629e6
Packit Service f629e6
	/* brand new hash table, set things up and return */
Packit Service f629e6
	if (old == NULL) {
Packit Service f629e6
		symbol->table_size = 0;
Packit Service f629e6
		return;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* old hash table there, move stuff to new, free old */
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * note that symbol->table_size does not change if an old array,
Packit Service f629e6
	 * and is explicitly set to 0 if a new one.
Packit Service f629e6
	 */
Packit Service f629e6
Packit Service f629e6
	for (k = 0; k < oldsize; k++) {
Packit Service f629e6
		for (chain = old[k]; chain != NULL; chain = next) {
Packit Service f629e6
			next = chain->ahnext;
Packit Service f629e6
			hash1 = chain->ahcode % newsize;
Packit Service f629e6
Packit Service f629e6
			/* remove from old list, add to new */
Packit Service f629e6
			chain->ahnext = new[hash1];
Packit Service f629e6
			new[hash1] = chain;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
	efree(old);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
From bonzini@gnu.org  Mon Oct 28 16:05:26 2002
Packit Service f629e6
Date: Mon, 28 Oct 2002 13:33:03 +0100
Packit Service f629e6
From: Paolo Bonzini <bonzini@gnu.org>
Packit Service f629e6
To: arnold@skeeve.com
Packit Service f629e6
Subject: Hash function
Packit Service f629e6
Message-ID: <20021028123303.GA6832@biancaneve>
Packit Service f629e6
Packit Service f629e6
Here is the hash function I'm using in GNU Smalltalk.  The scrambling is
Packit Service f629e6
needed if you use powers of two as the table sizes.  If you use primes it
Packit Service f629e6
is not needed.
Packit Service f629e6
Packit Service f629e6
To use double-hashing with power-of-two size, you should use the
Packit Service f629e6
_gst_hash_string(str, len) as the primary hash and
Packit Service f629e6
scramble(_gst_hash_string (str, len)) | 1 as the secondary hash.
Packit Service f629e6
Packit Service f629e6
Paolo
Packit Service f629e6
Packit Service f629e6
*/
Packit Service f629e6
/*
Packit Service f629e6
 * ADR: Slightly modified to work w/in the context of gawk.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
static unsigned long
Packit Service f629e6
gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code)
Packit Service f629e6
{
Packit Service f629e6
	unsigned long hashVal = 1497032417;    /* arbitrary value */
Packit Service f629e6
	unsigned long ret;
Packit Service f629e6
Packit Service f629e6
	while (len--) {
Packit Service f629e6
		hashVal += *str++;
Packit Service f629e6
		hashVal += (hashVal << 10);
Packit Service f629e6
		hashVal ^= (hashVal >> 6);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	ret = scramble(hashVal);
Packit Service f629e6
Packit Service f629e6
	if (code != NULL)
Packit Service f629e6
		*code = ret;
Packit Service f629e6
Packit Service f629e6
	if (ret >= hsize)
Packit Service f629e6
		ret %= hsize;
Packit Service f629e6
Packit Service f629e6
	return ret;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
static unsigned long
Packit Service f629e6
scramble(unsigned long x)
Packit Service f629e6
{
Packit Service f629e6
	if (sizeof(long) == 4) {
Packit Service f629e6
		int y = ~x;
Packit Service f629e6
Packit Service f629e6
		x += (y << 10) | (y >> 22);
Packit Service f629e6
		x += (x << 6)  | (x >> 26);
Packit Service f629e6
		x -= (x << 16) | (x >> 16);
Packit Service f629e6
	} else {
Packit Service f629e6
		x ^= (~x) >> 31;
Packit Service f629e6
		x += (x << 21) | (x >> 11);
Packit Service f629e6
		x += (x << 5) | (x >> 27);
Packit Service f629e6
		x += (x << 27) | (x >> 5);
Packit Service f629e6
		x += (x << 31);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	return x;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* env_remove --- for ENVIRON, remove value from real environment */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
env_remove(NODE *symbol, NODE *subs)
Packit Service f629e6
{
Packit Service f629e6
	NODE **val = str_remove(symbol, subs);
Packit Service f629e6
	char save;
Packit Service f629e6
Packit Service f629e6
	if (val != NULL) {
Packit Service f629e6
		str_terminate(subs, save);
Packit Service f629e6
		(void) unsetenv(subs->stptr);
Packit Service f629e6
		str_restore(subs, save);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	return val;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* env_clear --- clear out the environment when ENVIRON is deleted */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
env_clear(NODE *symbol, NODE *subs)
Packit Service f629e6
{
Packit Service f629e6
	extern char **environ;
Packit Service f629e6
	NODE **val = str_clear(symbol, subs);
Packit Service f629e6
Packit Service f629e6
	environ = NULL;	/* ZAP! */
Packit Service f629e6
Packit Service f629e6
	/* str_clear zaps the vtable, reset it */
Packit Service f629e6
	symbol->array_funcs = env_array_func;
Packit Service f629e6
Packit Service f629e6
	return val;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* env_store --- post assign function for ENVIRON, put new value into env */
Packit Service f629e6
Packit Service f629e6
static NODE **
Packit Service f629e6
env_store(NODE *symbol, NODE *subs)
Packit Service f629e6
{
Packit Service f629e6
	NODE **val = str_exists(symbol, subs);
Packit Service f629e6
	const char *newval;
Packit Service f629e6
Packit Service f629e6
	assert(val != NULL);
Packit Service f629e6
Packit Service f629e6
	newval = (*val)->stptr;
Packit Service f629e6
	if (newval == NULL)
Packit Service f629e6
		newval = "";
Packit Service f629e6
Packit Service f629e6
	(void) setenv(subs->stptr, newval, 1);
Packit Service f629e6
Packit Service f629e6
	return val;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* init_env_array --- set up the pointers for ENVIRON. A bit hacky. */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
init_env_array(NODE *env_node)
Packit Service f629e6
{
Packit Service f629e6
	/* If POSIX simply don't reset the vtable and things work as before */
Packit Service f629e6
	if (do_posix)
Packit Service f629e6
		return;
Packit Service f629e6
Packit Service f629e6
	env_node->array_funcs = env_array_func;
Packit Service f629e6
}