Blame str_array.c

Packit 575503
/*
Packit 575503
 * str_array.c - routines for associative arrays of string indices.
Packit 575503
 */
Packit 575503
Packit 575503
/*
Packit 575503
 * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2018,
Packit 575503
 * the Free Software Foundation, Inc.
Packit 575503
 *
Packit 575503
 * This file is part of GAWK, the GNU implementation of the
Packit 575503
 * AWK Programming Language.
Packit 575503
 *
Packit 575503
 * GAWK is free software; you can redistribute it and/or modify
Packit 575503
 * it under the terms of the GNU General Public License as published by
Packit 575503
 * the Free Software Foundation; either version 3 of the License, or
Packit 575503
 * (at your option) any later version.
Packit 575503
 *
Packit 575503
 * GAWK is distributed in the hope that it will be useful,
Packit 575503
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 575503
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 575503
 * GNU General Public License for more details.
Packit 575503
 *
Packit 575503
 * You should have received a copy of the GNU General Public License
Packit 575503
 * along with this program; if not, write to the Free Software
Packit 575503
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
Packit 575503
 */
Packit 575503
Packit 575503
#include "awk.h"
Packit 575503
Packit 575503
/*
Packit 575503
 * Tree walks (``for (iggy in foo)'') and array deletions use expensive
Packit 575503
 * linear searching.  So what we do is start out with small arrays and
Packit 575503
 * grow them as needed, so that our arrays are hopefully small enough,
Packit 575503
 * most of the time, that they're pretty full and we're not looking at
Packit 575503
 * wasted space.
Packit 575503
 *
Packit 575503
 * The decision is made to grow the array if the average chain length is
Packit 575503
 * ``too big''. This is defined as the total number of entries in the table
Packit 575503
 * divided by the size of the array being greater than some constant.
Packit 575503
 *
Packit 575503
 * 11/2002: We make the constant a variable, so that it can be tweaked
Packit 575503
 * via environment variable.
Packit 575503
 * 11/2002: Modern machines are bigger, cut this down from 10.
Packit 575503
 */
Packit 575503
Packit 575503
static size_t STR_CHAIN_MAX = 2;
Packit 575503
Packit 575503
extern FILE *output_fp;
Packit 575503
extern void indent(int indent_level);
Packit 575503
Packit 575503
static NODE **str_array_init(NODE *symbol, NODE *subs);
Packit 575503
static NODE **str_lookup(NODE *symbol, NODE *subs);
Packit 575503
static NODE **str_exists(NODE *symbol, NODE *subs);
Packit 575503
static NODE **str_clear(NODE *symbol, NODE *subs);
Packit 575503
static NODE **str_remove(NODE *symbol, NODE *subs);
Packit 575503
static NODE **str_list(NODE *symbol, NODE *subs);
Packit 575503
static NODE **str_copy(NODE *symbol, NODE *newsymb);
Packit 575503
static NODE **str_dump(NODE *symbol, NODE *ndump);
Packit 575503
Packit 575503
afunc_t str_array_func[] = {
Packit 575503
	str_array_init,
Packit 575503
	(afunc_t) 0,
Packit 575503
	null_length,
Packit 575503
	str_lookup,
Packit 575503
	str_exists,
Packit 575503
	str_clear,
Packit 575503
	str_remove,
Packit 575503
	str_list,
Packit 575503
	str_copy,
Packit 575503
	str_dump,
Packit 575503
	(afunc_t) 0,
Packit 575503
};
Packit 575503
Packit 575503
static NODE **env_remove(NODE *symbol, NODE *subs);
Packit 575503
static NODE **env_store(NODE *symbol, NODE *subs);
Packit 575503
static NODE **env_clear(NODE *symbol, NODE *subs);
Packit 575503
Packit 575503
/* special case for ENVIRON */
Packit 575503
afunc_t env_array_func[] = {
Packit 575503
	str_array_init,
Packit 575503
	(afunc_t) 0,
Packit 575503
	null_length,
Packit 575503
	str_lookup,
Packit 575503
	str_exists,
Packit 575503
	env_clear,
Packit 575503
	env_remove,
Packit 575503
	str_list,
Packit 575503
	str_copy,
Packit 575503
	str_dump,
Packit 575503
	env_store,
Packit 575503
};
Packit 575503
Packit 575503
static inline NODE **str_find(NODE *symbol, NODE *s1, size_t code1, unsigned long hash1);
Packit 575503
static void grow_table(NODE *symbol);
Packit 575503
Packit 575503
static unsigned long gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code);
Packit 575503
static unsigned long scramble(unsigned long x);
Packit 575503
static unsigned long awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code);
Packit 575503
Packit 575503
unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code) = awk_hash;
Packit 575503
Packit 575503
Packit 575503
/* str_array_init --- array initialization routine */
Packit 575503
Packit 575503
static NODE **
Packit 575503
str_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
Packit 575503
{
Packit 575503
	if (symbol == NULL) {		/* first time */
Packit 575503
		long newval;
Packit 575503
		const char *val;
Packit 575503
Packit 575503
		/* check relevant environment variables */
Packit 575503
		if ((newval = getenv_long("STR_CHAIN_MAX")) > 0)
Packit 575503
			STR_CHAIN_MAX = newval;
Packit 575503
		if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0)
Packit 575503
			hash = gst_hash_string;
Packit 575503
	} else
Packit 575503
		null_array(symbol);
Packit 575503
Packit 575503
	return & success_node;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/*
Packit 575503
 * assoc_lookup:
Packit 575503
 * Find SYMBOL[SUBS] in the assoc array.  Install it with value "" if it
Packit 575503
 * isn't there. Returns a pointer ala get_lhs to where its value is stored.
Packit 575503
 *
Packit 575503
 * SYMBOL is the address of the node (or other pointer) being dereferenced.
Packit 575503
 * SUBS is a number or string used as the subscript.
Packit 575503
 */
Packit 575503
Packit 575503
static NODE **
Packit 575503
str_lookup(NODE *symbol, NODE *subs)
Packit 575503
{
Packit 575503
	unsigned long hash1;
Packit 575503
	NODE **lhs;
Packit 575503
	BUCKET *b;
Packit 575503
	size_t code1;
Packit 575503
Packit 575503
	subs = force_string(subs);
Packit 575503
Packit 575503
	if (symbol->buckets == NULL)
Packit 575503
		grow_table(symbol);
Packit 575503
	hash1 = hash(subs->stptr, subs->stlen,
Packit 575503
			(unsigned long) symbol->array_size, & code1);
Packit 575503
	if ((lhs = str_find(symbol, subs, code1, hash1)) != NULL)
Packit 575503
		return lhs;
Packit 575503
Packit 575503
	/* It's not there, install it. */
Packit 575503
	/* first see if we would need to grow the array, before installing */
Packit 575503
Packit 575503
	symbol->table_size++;
Packit 575503
	if ((symbol->flags & ARRAYMAXED) == 0
Packit 575503
			&& (symbol->table_size / symbol->array_size) > STR_CHAIN_MAX) {
Packit 575503
		grow_table(symbol);
Packit 575503
		/* have to recompute hash value for new size */
Packit 575503
		hash1 = code1 % (unsigned long) symbol->array_size;
Packit 575503
	}
Packit 575503
Packit 575503
Packit 575503
	/*
Packit 575503
	 * Repeat after me: "Array indices are always strings."
Packit 575503
	 * "Array indices are always strings."
Packit 575503
	 * "Array indices are always strings."
Packit 575503
	 * "Array indices are always strings."
Packit 575503
	 * ....
Packit 575503
	 */
Packit 575503
	// Special cases:
Packit 575503
	// 1. The string was generated using CONVFMT.
Packit 575503
	// 2. The string was from an unassigned variable.
Packit 575503
	// 3. The string was from an unassigned field.
Packit 575503
	if (   subs->stfmt != STFMT_UNUSED
Packit 575503
	    || subs == Nnull_string
Packit 575503
	    || (subs->flags & NULL_FIELD) != 0) {
Packit 575503
		NODE *tmp;
Packit 575503
Packit 575503
		/*
Packit 575503
		 * Need to freeze this string value --- it must never
Packit 575503
		 * change, no matter what happens to the value
Packit 575503
		 * that created it or to CONVFMT, etc.; So, get
Packit 575503
		 * a private copy.
Packit 575503
		 */
Packit 575503
Packit 575503
		tmp = make_string(subs->stptr, subs->stlen);
Packit 575503
Packit 575503
		/*
Packit 575503
		* Set the numeric value for the index if it's  available. Useful
Packit 575503
		* for numeric sorting by index.  Do this only if the numeric
Packit 575503
		* value is available, instead of all the time, since doing it
Packit 575503
		* all the time is a big performance hit for something that may
Packit 575503
		* never be used.
Packit 575503
		*/
Packit 575503
Packit 575503
		if ((subs->flags & (MPFN|MPZN|NUMCUR)) == NUMCUR) {
Packit 575503
			tmp->numbr = subs->numbr;
Packit 575503
			tmp->flags |= NUMCUR;
Packit 575503
		}
Packit 575503
		subs = tmp;
Packit 575503
	} else {
Packit 575503
		/* string value already "frozen" */
Packit 575503
Packit 575503
		subs = dupnode(subs);
Packit 575503
	}
Packit 575503
Packit 575503
	getbucket(b);
Packit 575503
	b->ahnext = symbol->buckets[hash1];
Packit 575503
	symbol->buckets[hash1] = b;
Packit 575503
	b->ahname = subs;
Packit 575503
	b->ahname_str = subs->stptr;
Packit 575503
	b->ahname_len = subs->stlen;
Packit 575503
	b->ahvalue = dupnode(Nnull_string);
Packit 575503
	b->ahcode = code1;
Packit 575503
	return & (b->ahvalue);
Packit 575503
}
Packit 575503
Packit 575503
/* str_exists --- test whether the array element symbol[subs] exists or not,
Packit 575503
 * 		return pointer to value if it does.
Packit 575503
 */
Packit 575503
Packit 575503
static NODE **
Packit 575503
str_exists(NODE *symbol, NODE *subs)
Packit 575503
{
Packit 575503
	unsigned long hash1;
Packit 575503
	size_t code1;
Packit 575503
Packit 575503
	if (symbol->table_size == 0)
Packit 575503
		return NULL;
Packit 575503
Packit 575503
	subs = force_string(subs);
Packit 575503
	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size, & code1);
Packit 575503
	return str_find(symbol, subs, code1, hash1);
Packit 575503
}
Packit 575503
Packit 575503
/* str_clear --- flush all the values in symbol[] */
Packit 575503
Packit 575503
static NODE **
Packit 575503
str_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
Packit 575503
{
Packit 575503
	unsigned long i;
Packit 575503
	BUCKET *b, *next;
Packit 575503
	NODE *r;
Packit 575503
Packit 575503
	for (i = 0; i < symbol->array_size; i++) {
Packit 575503
		for (b = symbol->buckets[i]; b != NULL; b = next) {
Packit 575503
			next = b->ahnext;
Packit 575503
			r = b->ahvalue;
Packit 575503
			if (r->type == Node_var_array) {
Packit 575503
				assoc_clear(r);	/* recursively clear all sub-arrays */
Packit 575503
				efree(r->vname);
Packit 575503
				freenode(r);
Packit 575503
			} else
Packit 575503
				unref(r);
Packit 575503
			unref(b->ahname);
Packit 575503
			freebucket(b);
Packit 575503
		}
Packit 575503
		symbol->buckets[i] = NULL;
Packit 575503
	}
Packit 575503
Packit 575503
	if (symbol->buckets != NULL)
Packit 575503
		efree(symbol->buckets);
Packit 575503
	symbol->ainit(symbol, NULL);	/* re-initialize symbol */
Packit 575503
	return NULL;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* str_remove --- If SUBS is already in the table, remove it. */
Packit 575503
Packit 575503
static NODE **
Packit 575503
str_remove(NODE *symbol, NODE *subs)
Packit 575503
{
Packit 575503
	unsigned long hash1;
Packit 575503
	BUCKET *b, *prev;
Packit 575503
	NODE *s2;
Packit 575503
	size_t s1_len;
Packit 575503
Packit 575503
	if (symbol->table_size == 0)
Packit 575503
		return NULL;
Packit 575503
Packit 575503
	s2 = force_string(subs);
Packit 575503
	hash1 = hash(s2->stptr, s2->stlen, (unsigned long) symbol->array_size, NULL);
Packit 575503
Packit 575503
	for (b = symbol->buckets[hash1], prev = NULL; b != NULL;
Packit 575503
				prev = b, b = b->ahnext) {
Packit 575503
Packit 575503
		/* Array indexes are strings; compare as such, always! */
Packit 575503
		s1_len = b->ahname_len;
Packit 575503
Packit 575503
		if (s1_len != s2->stlen)
Packit 575503
			continue;
Packit 575503
		if (s1_len == 0		/* "" is a valid index */
Packit 575503
			    || memcmp(b->ahname_str, s2->stptr, s1_len) == 0) {
Packit 575503
			/* item found */
Packit 575503
Packit 575503
			unref(b->ahname);
Packit 575503
			if (prev != NULL)
Packit 575503
				prev->ahnext = b->ahnext;
Packit 575503
			else
Packit 575503
				symbol->buckets[hash1] = b->ahnext;
Packit 575503
Packit 575503
			/* delete bucket */
Packit 575503
			freebucket(b);
Packit 575503
Packit 575503
			/* one less element in array */
Packit 575503
			if (--symbol->table_size == 0) {
Packit 575503
				if (symbol->buckets != NULL)
Packit 575503
					efree(symbol->buckets);
Packit 575503
				symbol->ainit(symbol, NULL);	/* re-initialize symbol */
Packit 575503
			}
Packit 575503
Packit 575503
			return & success_node;	/* return success */
Packit 575503
		}
Packit 575503
	}
Packit 575503
Packit 575503
	return NULL;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* str_copy --- duplicate input array "symbol" */
Packit 575503
Packit 575503
static NODE **
Packit 575503
str_copy(NODE *symbol, NODE *newsymb)
Packit 575503
{
Packit 575503
	BUCKET **old, **new, **pnew;
Packit 575503
	BUCKET *chain, *newchain;
Packit 575503
	unsigned long cursize, i;
Packit 575503
Packit 575503
	assert(symbol->table_size > 0);
Packit 575503
Packit 575503
	/* find the current hash size */
Packit 575503
	cursize = symbol->array_size;
Packit 575503
Packit 575503
	/* allocate new table */
Packit 575503
	ezalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "str_copy");
Packit 575503
Packit 575503
	old = symbol->buckets;
Packit 575503
Packit 575503
	for (i = 0; i < cursize; i++) {
Packit 575503
		for (chain = old[i], pnew = & new[i]; chain != NULL;
Packit 575503
				chain = chain->ahnext
Packit 575503
		) {
Packit 575503
			NODE *oldval, *newsubs;
Packit 575503
Packit 575503
			getbucket(newchain);
Packit 575503
Packit 575503
			/*
Packit 575503
			 * copy the corresponding name and
Packit 575503
			 * value from the original input list
Packit 575503
			 */
Packit 575503
Packit 575503
			newsubs = newchain->ahname = dupnode(chain->ahname);
Packit 575503
			newchain->ahname_str = newsubs->stptr;
Packit 575503
			newchain->ahname_len = newsubs->stlen;
Packit 575503
Packit 575503
			oldval = chain->ahvalue;
Packit 575503
			if (oldval->type == Node_val)
Packit 575503
				newchain->ahvalue = dupnode(oldval);
Packit 575503
			else {
Packit 575503
				NODE *r;
Packit 575503
Packit 575503
				r = make_array();
Packit 575503
				r->vname = estrdup(oldval->vname, strlen(oldval->vname));
Packit 575503
				r->parent_array = newsymb;
Packit 575503
				newchain->ahvalue = assoc_copy(oldval, r);
Packit 575503
			}
Packit 575503
			newchain->ahcode = chain->ahcode;
Packit 575503
Packit 575503
			*pnew = newchain;
Packit 575503
			newchain->ahnext = NULL;
Packit 575503
			pnew = & newchain->ahnext;
Packit 575503
		}
Packit 575503
	}
Packit 575503
Packit 575503
	newsymb->table_size = symbol->table_size;
Packit 575503
	newsymb->buckets = new;
Packit 575503
	newsymb->array_size = cursize;
Packit 575503
	newsymb->flags = symbol->flags;
Packit 575503
	return NULL;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* str_list --- return a list of array items */
Packit 575503
Packit 575503
static NODE**
Packit 575503
str_list(NODE *symbol, NODE *t)
Packit 575503
{
Packit 575503
	NODE **list;
Packit 575503
	NODE *subs, *val;
Packit 575503
	BUCKET *b;
Packit 575503
	unsigned long num_elems, list_size, i, k = 0;
Packit 575503
	int elem_size = 1;
Packit 575503
	assoc_kind_t assoc_kind;
Packit 575503
Packit 575503
	if (symbol->table_size == 0)
Packit 575503
		return NULL;
Packit 575503
Packit 575503
	assoc_kind = (assoc_kind_t) t->flags;
Packit 575503
	if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
Packit 575503
		elem_size = 2;
Packit 575503
Packit 575503
	/* allocate space for array */
Packit 575503
	num_elems = symbol->table_size;
Packit 575503
	if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
Packit 575503
		num_elems = 1;
Packit 575503
	list_size =  elem_size * num_elems;
Packit 575503
Packit 575503
	emalloc(list, NODE **, list_size * sizeof(NODE *), "str_list");
Packit 575503
Packit 575503
	/* populate it */
Packit 575503
Packit 575503
	for (i = 0; i < symbol->array_size; i++) {
Packit 575503
		for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext) {
Packit 575503
			/* index */
Packit 575503
			subs = b->ahname;
Packit 575503
			if ((assoc_kind & AINUM) != 0)
Packit 575503
				(void) force_number(subs);
Packit 575503
			list[k++] = dupnode(subs);
Packit 575503
Packit 575503
			/* value */
Packit 575503
			if ((assoc_kind & AVALUE) != 0) {
Packit 575503
				val = b->ahvalue;
Packit 575503
				if (val->type == Node_val) {
Packit 575503
					if ((assoc_kind & AVNUM) != 0)
Packit 575503
						(void) force_number(val);
Packit 575503
					else if ((assoc_kind & AVSTR) != 0)
Packit 575503
						val = force_string(val);
Packit 575503
				}
Packit 575503
				list[k++] = val;
Packit 575503
			}
Packit 575503
			if (k >= list_size)
Packit 575503
				return list;
Packit 575503
		}
Packit 575503
	}
Packit 575503
	return list;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* str_kilobytes --- calculate memory consumption of the assoc array */
Packit 575503
Packit 575503
AWKNUM
Packit 575503
str_kilobytes(NODE *symbol)
Packit 575503
{
Packit 575503
	unsigned long bucket_cnt;
Packit 575503
	AWKNUM kb;
Packit 575503
Packit 575503
	bucket_cnt = symbol->table_size;
Packit 575503
Packit 575503
	/* This does not include extra memory for indices with stfmt != STFMT_UNUSED */
Packit 575503
	kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) +
Packit 575503
		((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 1024.0;
Packit 575503
	return kb;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* str_dump --- dump array info */
Packit 575503
Packit 575503
static NODE **
Packit 575503
str_dump(NODE *symbol, NODE *ndump)
Packit 575503
{
Packit 575503
#define HCNT	31
Packit 575503
Packit 575503
	int indent_level;
Packit 575503
	unsigned long i, bucket_cnt;
Packit 575503
	BUCKET *b;
Packit 575503
	static size_t hash_dist[HCNT + 1];
Packit 575503
Packit 575503
	indent_level = ndump->alevel;
Packit 575503
Packit 575503
	if ((symbol->flags & XARRAY) == 0)
Packit 575503
		fprintf(output_fp, "%s `%s'\n",
Packit 575503
				(symbol->parent_array == NULL) ? "array" : "sub-array",
Packit 575503
				array_vname(symbol));
Packit 575503
	indent_level++;
Packit 575503
	indent(indent_level);
Packit 575503
	fprintf(output_fp, "array_func: str_array_func\n");
Packit 575503
	if (symbol->flags != 0) {
Packit 575503
		indent(indent_level);
Packit 575503
		fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
Packit 575503
	}
Packit 575503
	indent(indent_level);
Packit 575503
	fprintf(output_fp, "STR_CHAIN_MAX: %lu\n", (unsigned long) STR_CHAIN_MAX);
Packit 575503
	indent(indent_level);
Packit 575503
	fprintf(output_fp, "array_size: %lu\n", (unsigned long) symbol->array_size);
Packit 575503
	indent(indent_level);
Packit 575503
	fprintf(output_fp, "table_size: %lu\n", (unsigned long) symbol->table_size);
Packit 575503
	indent(indent_level);
Packit 575503
	fprintf(output_fp, "Avg # of items per chain: %.2g\n",
Packit 575503
				((AWKNUM) symbol->table_size) / symbol->array_size);
Packit 575503
Packit 575503
	indent(indent_level);
Packit 575503
	fprintf(output_fp, "memory: %.2g kB\n", str_kilobytes(symbol));
Packit 575503
Packit 575503
	/* hash value distribution */
Packit 575503
Packit 575503
	memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
Packit 575503
	for (i = 0; i < symbol->array_size; i++) {
Packit 575503
		bucket_cnt = 0;
Packit 575503
		for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext)
Packit 575503
			bucket_cnt++;
Packit 575503
		if (bucket_cnt >= HCNT)
Packit 575503
			bucket_cnt = HCNT;
Packit 575503
		hash_dist[bucket_cnt]++;
Packit 575503
	}
Packit 575503
Packit 575503
	indent(indent_level);
Packit 575503
	fprintf(output_fp, "Hash distribution:\n");
Packit 575503
	indent_level++;
Packit 575503
	for (i = 0; i <= HCNT; i++) {
Packit 575503
		if (hash_dist[i] > 0) {
Packit 575503
			indent(indent_level);
Packit 575503
			if (i == HCNT)
Packit 575503
				fprintf(output_fp, "[>=%lu]:%lu\n",
Packit 575503
					(unsigned long) HCNT, (unsigned long) hash_dist[i]);
Packit 575503
			else
Packit 575503
				fprintf(output_fp, "[%lu]:%lu\n",
Packit 575503
					(unsigned long) i, (unsigned long) hash_dist[i]);
Packit 575503
		}
Packit 575503
	}
Packit 575503
	indent_level--;
Packit 575503
Packit 575503
	/* dump elements */
Packit 575503
Packit 575503
	if (ndump->adepth >= 0) {
Packit 575503
		const char *aname;
Packit 575503
Packit 575503
		fprintf(output_fp, "\n");
Packit 575503
		aname = make_aname(symbol);
Packit 575503
		for (i = 0; i < symbol->array_size; i++) {
Packit 575503
			for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext)
Packit 575503
				assoc_info(b->ahname, b->ahvalue, ndump, aname);
Packit 575503
		}
Packit 575503
	}
Packit 575503
Packit 575503
	return NULL;
Packit 575503
Packit 575503
#undef HCNT
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* awk_hash --- calculate the hash function of the string in subs */
Packit 575503
Packit 575503
static unsigned long
Packit 575503
awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
Packit 575503
{
Packit 575503
	unsigned long h = 0;
Packit 575503
	unsigned long htmp;
Packit 575503
Packit 575503
	/*
Packit 575503
	 * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
Packit 575503
	 * db package.
Packit 575503
	 *
Packit 575503
	 * This is INCREDIBLY ugly, but fast.  We break the string up into
Packit 575503
	 * 8 byte units.  On the first time through the loop we get the
Packit 575503
	 * "leftover bytes" (strlen % 8).  On every other iteration, we
Packit 575503
	 * perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
Packit 575503
	 * saves us 7 cmp & branch instructions.  If this routine is
Packit 575503
	 * heavily used enough, it's worth the ugly coding.
Packit 575503
	 */
Packit 575503
Packit 575503
	/*
Packit 575503
	 * Even more speed:
Packit 575503
	 * #define HASHC   h = *s++ + 65599 * h
Packit 575503
	 * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
Packit 575503
	 *
Packit 575503
	 * 4/2011: Force the results to 32 bits, to get the same
Packit 575503
	 * result on both 32- and 64-bit systems. This may be a
Packit 575503
	 * bad idea.
Packit 575503
	 */
Packit 575503
#define HASHC   htmp = (h << 6);  \
Packit 575503
		h = *s++ + htmp + (htmp << 10) - h ; \
Packit 575503
		htmp &= 0xFFFFFFFF; \
Packit 575503
		h &= 0xFFFFFFFF
Packit 575503
Packit 575503
	h = 0;
Packit 575503
Packit 575503
	/* "Duff's Device" */
Packit 575503
	if (len > 0) {
Packit 575503
		size_t loop = (len + 8 - 1) >> 3;
Packit 575503
Packit 575503
		switch (len & (8 - 1)) {
Packit 575503
		case 0:
Packit 575503
			do {	/* All fall throughs */
Packit 575503
				HASHC;
Packit 575503
		case 7:		HASHC;
Packit 575503
		case 6:		HASHC;
Packit 575503
		case 5:		HASHC;
Packit 575503
		case 4:		HASHC;
Packit 575503
		case 3:		HASHC;
Packit 575503
		case 2:		HASHC;
Packit 575503
		case 1:		HASHC;
Packit 575503
			} while (--loop);
Packit 575503
		}
Packit 575503
	}
Packit 575503
Packit 575503
	if (code != NULL)
Packit 575503
		*code = h;
Packit 575503
Packit 575503
	if (h >= hsize)
Packit 575503
		h %= hsize;
Packit 575503
	return h;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* str_find --- locate symbol[subs] */
Packit 575503
Packit 575503
static inline NODE **
Packit 575503
str_find(NODE *symbol, NODE *s1, size_t code1, unsigned long hash1)
Packit 575503
{
Packit 575503
	BUCKET *b;
Packit 575503
	size_t s2_len;
Packit 575503
Packit 575503
	for (b = symbol->buckets[hash1]; b != NULL; b = b->ahnext) {
Packit 575503
		/*
Packit 575503
		 * This used to use cmp_nodes() here.  That's wrong.
Packit 575503
		 * Array indexes are strings; compare as such, always!
Packit 575503
	 	 */
Packit 575503
		s2_len = b->ahname_len;
Packit 575503
Packit 575503
		if (code1 == b->ahcode
Packit 575503
			&& s1->stlen == s2_len
Packit 575503
			&& (s2_len == 0		/* "" is a valid index */
Packit 575503
				|| memcmp(s1->stptr, b->ahname_str, s2_len) == 0)
Packit 575503
		)
Packit 575503
			return & (b->ahvalue);
Packit 575503
	}
Packit 575503
	return NULL;
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
/* grow_table --- grow a hash table */
Packit 575503
Packit 575503
static void
Packit 575503
grow_table(NODE *symbol)
Packit 575503
{
Packit 575503
	BUCKET **old, **new;
Packit 575503
	BUCKET *chain, *next;
Packit 575503
	int i, j;
Packit 575503
	unsigned long oldsize, newsize, k;
Packit 575503
	unsigned long hash1;
Packit 575503
Packit 575503
	/*
Packit 575503
	 * This is an array of primes. We grow the table by an order of
Packit 575503
	 * magnitude each time (not just doubling) so that growing is a
Packit 575503
	 * rare operation. We expect, on average, that it won't happen
Packit 575503
	 * more than twice.  The final size is also chosen to be small
Packit 575503
	 * enough so that MS-DOG mallocs can handle it. When things are
Packit 575503
	 * very large (> 8K), we just double more or less, instead of
Packit 575503
	 * just jumping from 8K to 64K.
Packit 575503
	 */
Packit 575503
Packit 575503
	static const unsigned long sizes[] = {
Packit 575503
		13, 127, 1021, 8191, 16381, 32749, 65497,
Packit 575503
		131101, 262147, 524309, 1048583, 2097169,
Packit 575503
		4194319, 8388617, 16777259, 33554467,
Packit 575503
		67108879, 134217757, 268435459, 536870923,
Packit 575503
		1073741827
Packit 575503
	};
Packit 575503
Packit 575503
	/* find next biggest hash size */
Packit 575503
	newsize = oldsize = symbol->array_size;
Packit 575503
Packit 575503
	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
Packit 575503
		if (oldsize < sizes[i]) {
Packit 575503
			newsize = sizes[i];
Packit 575503
			break;
Packit 575503
		}
Packit 575503
	}
Packit 575503
	if (newsize == oldsize) {	/* table already at max (!) */
Packit 575503
		symbol->flags |= ARRAYMAXED;
Packit 575503
		return;
Packit 575503
	}
Packit 575503
Packit 575503
	/* allocate new table */
Packit 575503
	ezalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_table");
Packit 575503
Packit 575503
	old = symbol->buckets;
Packit 575503
	symbol->buckets = new;
Packit 575503
	symbol->array_size = newsize;
Packit 575503
Packit 575503
	/* brand new hash table, set things up and return */
Packit 575503
	if (old == NULL) {
Packit 575503
		symbol->table_size = 0;
Packit 575503
		return;
Packit 575503
	}
Packit 575503
Packit 575503
	/* old hash table there, move stuff to new, free old */
Packit 575503
Packit 575503
	/*
Packit 575503
	 * note that symbol->table_size does not change if an old array,
Packit 575503
	 * and is explicitly set to 0 if a new one.
Packit 575503
	 */
Packit 575503
Packit 575503
	for (k = 0; k < oldsize; k++) {
Packit 575503
		for (chain = old[k]; chain != NULL; chain = next) {
Packit 575503
			next = chain->ahnext;
Packit 575503
			hash1 = chain->ahcode % newsize;
Packit 575503
Packit 575503
			/* remove from old list, add to new */
Packit 575503
			chain->ahnext = new[hash1];
Packit 575503
			new[hash1] = chain;
Packit 575503
		}
Packit 575503
	}
Packit 575503
	efree(old);
Packit 575503
}
Packit 575503
Packit 575503
Packit 575503
Packit 575503
/*
Packit 575503
From bonzini@gnu.org  Mon Oct 28 16:05:26 2002
Packit 575503
Date: Mon, 28 Oct 2002 13:33:03 +0100
Packit 575503
From: Paolo Bonzini <bonzini@gnu.org>
Packit 575503
To: arnold@skeeve.com
Packit 575503
Subject: Hash function
Packit 575503
Message-ID: <20021028123303.GA6832@biancaneve>
Packit 575503
Packit 575503
Here is the hash function I'm using in GNU Smalltalk.  The scrambling is
Packit 575503
needed if you use powers of two as the table sizes.  If you use primes it
Packit 575503
is not needed.
Packit 575503
Packit 575503
To use double-hashing with power-of-two size, you should use the
Packit 575503
_gst_hash_string(str, len) as the primary hash and
Packit 575503
scramble(_gst_hash_string (str, len)) | 1 as the secondary hash.
Packit 575503
Packit 575503
Paolo
Packit 575503
Packit 575503
*/
Packit 575503
/*
Packit 575503
 * ADR: Slightly modified to work w/in the context of gawk.
Packit 575503
 */
Packit 575503
Packit 575503
static unsigned long
Packit 575503
gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code)
Packit 575503
{
Packit 575503
	unsigned long hashVal = 1497032417;    /* arbitrary value */
Packit 575503
	unsigned long ret;
Packit 575503
Packit 575503
	while (len--) {
Packit 575503
		hashVal += *str++;
Packit 575503
		hashVal += (hashVal << 10);
Packit 575503
		hashVal ^= (hashVal >> 6);
Packit 575503
	}
Packit 575503
Packit 575503
	ret = scramble(hashVal);
Packit 575503
Packit 575503
	if (code != NULL)
Packit 575503
		*code = ret;
Packit 575503
Packit 575503
	if (ret >= hsize)
Packit 575503
		ret %= hsize;
Packit 575503
Packit 575503
	return ret;
Packit 575503
}
Packit 575503
Packit 575503
static unsigned long
Packit 575503
scramble(unsigned long x)
Packit 575503
{
Packit 575503
	if (sizeof(long) == 4) {
Packit 575503
		int y = ~x;
Packit 575503
Packit 575503
		x += (y << 10) | (y >> 22);
Packit 575503
		x += (x << 6)  | (x >> 26);
Packit 575503
		x -= (x << 16) | (x >> 16);
Packit 575503
	} else {
Packit 575503
		x ^= (~x) >> 31;
Packit 575503
		x += (x << 21) | (x >> 11);
Packit 575503
		x += (x << 5) | (x >> 27);
Packit 575503
		x += (x << 27) | (x >> 5);
Packit 575503
		x += (x << 31);
Packit 575503
	}
Packit 575503
Packit 575503
	return x;
Packit 575503
}
Packit 575503
Packit 575503
/* env_remove --- for ENVIRON, remove value from real environment */
Packit 575503
Packit 575503
static NODE **
Packit 575503
env_remove(NODE *symbol, NODE *subs)
Packit 575503
{
Packit 575503
	NODE **val = str_remove(symbol, subs);
Packit 575503
	char save;
Packit 575503
Packit 575503
	if (val != NULL) {
Packit 575503
		str_terminate(subs, save);
Packit 575503
		(void) unsetenv(subs->stptr);
Packit 575503
		str_restore(subs, save);
Packit 575503
	}
Packit 575503
Packit 575503
	return val;
Packit 575503
}
Packit 575503
Packit 575503
/* env_clear --- clear out the environment when ENVIRON is deleted */
Packit 575503
Packit 575503
static NODE **
Packit 575503
env_clear(NODE *symbol, NODE *subs)
Packit 575503
{
Packit 575503
	extern char **environ;
Packit 575503
	NODE **val = str_clear(symbol, subs);
Packit 575503
Packit 575503
	environ = NULL;	/* ZAP! */
Packit 575503
Packit 575503
	/* str_clear zaps the vtable, reset it */
Packit 575503
	symbol->array_funcs = env_array_func;
Packit 575503
Packit 575503
	return val;
Packit 575503
}
Packit 575503
Packit 575503
/* env_store --- post assign function for ENVIRON, put new value into env */
Packit 575503
Packit 575503
static NODE **
Packit 575503
env_store(NODE *symbol, NODE *subs)
Packit 575503
{
Packit 575503
	NODE **val = str_exists(symbol, subs);
Packit 575503
	const char *newval;
Packit 575503
Packit 575503
	assert(val != NULL);
Packit 575503
Packit 575503
	newval = (*val)->stptr;
Packit 575503
	if (newval == NULL)
Packit 575503
		newval = "";
Packit 575503
Packit 575503
	(void) setenv(subs->stptr, newval, 1);
Packit 575503
Packit 575503
	return val;
Packit 575503
}
Packit 575503
Packit 575503
/* init_env_array --- set up the pointers for ENVIRON. A bit hacky. */
Packit 575503
Packit 575503
void
Packit 575503
init_env_array(NODE *env_node)
Packit 575503
{
Packit 575503
	/* If POSIX simply don't reset the vtable and things work as before */
Packit 575503
	if (do_posix)
Packit 575503
		return;
Packit 575503
Packit 575503
	env_node->array_funcs = env_array_func;
Packit 575503
}