Blob Blame History Raw
/*
 * cint_array.c - routines for arrays of (mostly) consecutive positive integer indices.
 */

/*
 * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017,
 * the Free Software Foundation, Inc.
 *
 * This file is part of GAWK, the GNU implementation of the
 * AWK Programming Language.
 *
 * GAWK is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * GAWK is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 */

#include "awk.h"

#define INT32_BIT 32

extern FILE *output_fp;
extern void indent(int indent_level);
extern NODE **is_integer(NODE *symbol, NODE *subs);

/*
 * NHAT         ---  maximum size of a leaf array (2^NHAT).
 * THRESHOLD    ---  Maximum capacity waste; THRESHOLD >= 2^(NHAT + 1).
 */

static int NHAT = 10;
static long THRESHOLD;

/*
 * What is the optimium NHAT ? timing results suggest that 10 is a good choice,
 * although differences aren't that significant for > 10.
 */


static NODE **cint_array_init(NODE *symbol, NODE *subs);
static NODE **is_uinteger(NODE *symbol, NODE *subs);
static NODE **cint_lookup(NODE *symbol, NODE *subs);
static NODE **cint_exists(NODE *symbol, NODE *subs);
static NODE **cint_clear(NODE *symbol, NODE *subs);
static NODE **cint_remove(NODE *symbol, NODE *subs);
static NODE **cint_list(NODE *symbol, NODE *t);
static NODE **cint_copy(NODE *symbol, NODE *newsymb);
static NODE **cint_dump(NODE *symbol, NODE *ndump);
#ifdef ARRAYDEBUG
static void cint_print(NODE *symbol);
#endif

afunc_t cint_array_func[] = {
	cint_array_init,
	is_uinteger,
	null_length,
	cint_lookup,
	cint_exists,
	cint_clear,
	cint_remove,
	cint_list,
	cint_copy,
	cint_dump,
	(afunc_t) 0,
};

static inline int cint_hash(long k);
static inline NODE **cint_find(NODE *symbol, long k, int h1);

static inline NODE *make_node(NODETYPE type);

static NODE **tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base);
static NODE **tree_exists(NODE *tree, long k);
static void tree_clear(NODE *tree);
static int tree_remove(NODE *symbol, NODE *tree, long k);
static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree);
static long tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind);
static inline NODE **tree_find(NODE *tree, long k, int i);
static void tree_info(NODE *tree, NODE *ndump, const char *aname);
static size_t tree_kilobytes(NODE *tree);
#ifdef ARRAYDEBUG
static void tree_print(NODE *tree, size_t bi, int indent_level);
#endif

static inline NODE **leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base);
static inline NODE **leaf_exists(NODE *array, long k);
static void leaf_clear(NODE *array);
static int leaf_remove(NODE *symbol, NODE *array, long k);
static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray);
static long leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind);
static void leaf_info(NODE *array, NODE *ndump, const char *aname);
#ifdef ARRAYDEBUG
static void leaf_print(NODE *array, size_t bi, int indent_level);
#endif

/* powers of 2 table upto 2^30 */
static const long power_two_table[] = {
	1, 2, 4, 8, 16, 32, 64,
	128, 256, 512, 1024, 2048, 4096,
	8192, 16384, 32768, 65536, 131072, 262144,
	524288, 1048576, 2097152, 4194304, 8388608, 16777216,
	33554432, 67108864, 134217728, 268435456, 536870912, 1073741824
};


#define ISUINT(a, s)	((((s)->flags & NUMINT) != 0 || is_integer(a, s) != NULL) \
                                    && (s)->numbr >= 0)

/*
 * To store 2^n integers, allocate top-level array of size n, elements
 * of which are 1-Dimensional (leaf-array) of geometrically increasing
 * size (power of 2).
 *
 *  [0]   -->  [ 0 ]
 *  [1]   -->  [ 1 ]
 *  |2|   -->  [ 2 | 3 ]
 *  |3|   -->  [ 4 | 5 | 6 | 7 ]
 *  |.|
 *  |k|   -->  [ 2^(k - 1)| ...  | 2^k - 1 ]
 *  ...
 *
 * For a given integer n (> 0), the leaf-array is at 1 + floor(log2(n)).
 *
 * The idea for the geometrically increasing array sizes is from:
 * 	Fast Functional Lists, Hash-Lists, Deques and Variable Length Arrays.
 * 	Bagwell, Phil (2002).
 * 	http://infoscience.epfl.ch/record/64410/files/techlists.pdf
 *
 * Disadvantage:
 * Worst case memory waste > 99% and will happen when each of the
 * leaf arrays contains only a single element. Even with consecutive
 * integers, memory waste can be as high as 50%.
 *
 * Solution: Hashed Array Trees (HATs).
 *
 */

/* cint_array_init ---  array initialization routine */

static NODE **
cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
{
	if (symbol == NULL) {
		long newval;
		size_t nelems = (sizeof(power_two_table) / sizeof(power_two_table[0]));

		/* check relevant environment variables */
		if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT)
			NHAT = newval;
		/* don't allow overflow off the end of the table */
		if (NHAT >= nelems)
			NHAT = nelems - 2;
		THRESHOLD = power_two_table[NHAT + 1];
	} else
		null_array(symbol);

	return & success_node;
}


/* is_uinteger --- test if the subscript is an integer >= 0 */

NODE **
is_uinteger(NODE *symbol, NODE *subs)
{
	if (is_integer(symbol, subs) != NULL && subs->numbr >= 0)
		return & success_node;
	return NULL;
}


/* cint_lookup --- Find the subscript in the array; Install it if it isn't there. */

static NODE **
cint_lookup(NODE *symbol, NODE *subs)
{
	NODE **lhs;
	long k;
	int h1 = -1, m, li;
	NODE *tn, *xn;
	long cint_size, capacity;

	k = -1;
	if (ISUINT(symbol, subs)) {
		k = subs->numbr;	/* k >= 0 */
		h1 = cint_hash(k);	/* h1 >= NHAT */
		if ((lhs = cint_find(symbol, k, h1)) != NULL)
			return lhs;
	}
	xn = symbol->xarray;
	if (xn != NULL && (lhs = xn->aexists(xn, subs)) != NULL)
		return lhs;

	/* It's not there, install it */

	if (k < 0)
		goto xinstall;

	m = h1 - 1;	/* m >= (NHAT- 1) */

	/* Estimate capacity upper bound.
	 * capacity upper bound = current capacity + leaf array size.
	 */
	li = m > NHAT ? m : NHAT;
	while (li >= NHAT) {
		/* leaf-array of a HAT */
		li = (li + 1) / 2;
	}
	capacity = symbol->array_capacity + power_two_table[li];

	cint_size = (xn == NULL) ? symbol->table_size
				: (symbol->table_size - xn->table_size);
	assert(cint_size >= 0);
	if ((capacity - cint_size) > THRESHOLD)
		goto xinstall;

	if (symbol->nodes == NULL) {
		symbol->array_capacity = 0;
		assert(symbol->table_size == 0);

		/* nodes[0] .. nodes[NHAT- 1] not used */
		ezalloc(symbol->nodes, NODE **, INT32_BIT * sizeof(NODE *), "cint_lookup");
	}

	symbol->table_size++;	/* one more element in array */

	tn = symbol->nodes[h1];
	if (tn == NULL) {
		tn = make_node(Node_array_tree);
		symbol->nodes[h1] = tn;
	}

	if (m < NHAT)
		return tree_lookup(symbol, tn, k, NHAT, 0);
	return tree_lookup(symbol, tn, k, m, power_two_table[m]);

xinstall:

	symbol->table_size++;
	if (xn == NULL) {
		xn = symbol->xarray = make_array();
		xn->vname = symbol->vname;	/* shallow copy */

		/*
		 * Avoid using assoc_lookup(xn, subs) which may lead
		 * to infinite recursion.
		 */

		if (is_integer(xn, subs))
			xn->array_funcs = int_array_func;
		else
			xn->array_funcs = str_array_func;
		xn->flags |= XARRAY;
	}
	return xn->alookup(xn, subs);
}


/* cint_exists --- test whether an index is in the array or not. */

static NODE **
cint_exists(NODE *symbol, NODE *subs)
{
	NODE *xn;

	if (ISUINT(symbol, subs)) {
		long k = subs->numbr;
		NODE **lhs;
		if ((lhs = cint_find(symbol, k, cint_hash(k))) != NULL)
			return lhs;
	}
	if ((xn = symbol->xarray) == NULL)
		return NULL;
	return xn->aexists(xn, subs);
}


/* cint_clear --- flush all the values in symbol[] */

static NODE **
cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
{
	size_t i;
	NODE *tn;

	assert(symbol->nodes != NULL);

	if (symbol->xarray != NULL) {
		NODE *xn = symbol->xarray;
		assoc_clear(xn);
		freenode(xn);
		symbol->xarray = NULL;
	}

	for (i = NHAT; i < INT32_BIT; i++) {
		tn = symbol->nodes[i];
		if (tn != NULL) {
			tree_clear(tn);
			freenode(tn);
		}
	}

	efree(symbol->nodes);
	symbol->ainit(symbol, NULL);	/* re-initialize symbol */
	return NULL;
}


/* cint_remove --- remove an index from the array */

static NODE **
cint_remove(NODE *symbol, NODE *subs)
{
	long k;
	int h1;
	NODE *tn, *xn = symbol->xarray;

	if (symbol->table_size == 0)
		return NULL;

	if (! ISUINT(symbol, subs))
		goto xremove;

	assert(symbol->nodes != NULL);

	k = subs->numbr;
	h1 = cint_hash(k);
	tn = symbol->nodes[h1];
	if (tn == NULL || ! tree_remove(symbol, tn, k))
		goto xremove;

	if (tn->table_size == 0) {
		freenode(tn);
		symbol->nodes[h1] = NULL;
	}

	symbol->table_size--;

	if (xn == NULL && symbol->table_size == 0) {
		efree(symbol->nodes);
		symbol->ainit(symbol, NULL);	/* re-initialize array 'symbol' */
	} else if(xn != NULL && symbol->table_size == xn->table_size) {
		/* promote xn to symbol */

		xn->flags &= ~XARRAY;
		xn->parent_array = symbol->parent_array;
		efree(symbol->nodes);
		*symbol = *xn;
		freenode(xn);
	}

	return & success_node;

xremove:
	xn = symbol->xarray;
	if (xn == NULL || xn->aremove(xn, subs) == NULL)
		return NULL;
	if (xn->table_size == 0) {
		freenode(xn);
		symbol->xarray = NULL;
	}
	symbol->table_size--;
	assert(symbol->table_size > 0);

	return & success_node;
}


/* cint_copy --- duplicate input array "symbol" */

static NODE **
cint_copy(NODE *symbol, NODE *newsymb)
{
	NODE **old, **new;
	size_t i;

	assert(symbol->nodes != NULL);

	/* allocate new table */
	ezalloc(new, NODE **, INT32_BIT * sizeof(NODE *), "cint_copy");

	old = symbol->nodes;
	for (i = NHAT; i < INT32_BIT; i++) {
		if (old[i] == NULL)
			continue;
		new[i] = make_node(Node_array_tree);
		tree_copy(newsymb, old[i], new[i]);
	}

	if (symbol->xarray != NULL) {
		NODE *xn, *n;
		xn = symbol->xarray;
		n = make_array();
		n->vname = newsymb->vname;
		(void) xn->acopy(xn, n);
		newsymb->xarray = n;
	} else
		newsymb->xarray = NULL;

	newsymb->nodes = new;
	newsymb->table_size = symbol->table_size;
	newsymb->array_capacity = symbol->array_capacity;
	newsymb->flags = symbol->flags;

	return NULL;
}


/* cint_list --- return a list of items */

static NODE**
cint_list(NODE *symbol, NODE *t)
{
	NODE **list = NULL;
	NODE *tn, *xn;
	unsigned long k = 0, num_elems, list_size;
	size_t j, ja, jd;
	int elem_size = 1;
	assoc_kind_t assoc_kind;

	num_elems = symbol->table_size;
	if (num_elems == 0)
		return NULL;
	assoc_kind = (assoc_kind_t) t->flags;
	if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
		num_elems = 1;

	if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
		elem_size = 2;
	list_size = num_elems * elem_size;

	if (symbol->xarray != NULL) {
		xn = symbol->xarray;
		list = xn->alist(xn, t);
		assert(list != NULL);
		assoc_kind &= ~(AASC|ADESC);
		t->flags = (unsigned int) assoc_kind;
		if (num_elems == 1 || num_elems == xn->table_size)
			return list;
		erealloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
		k = elem_size * xn->table_size;
	} else
		emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");

	if ((assoc_kind & AINUM) == 0) {
		/* not sorting by "index num" */
		assoc_kind &= ~(AASC|ADESC);
		t->flags = (unsigned int) assoc_kind;
	}

	/* populate it with index in ascending or descending order */

	for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) {
		j = (assoc_kind & ADESC) != 0 ? jd-- : ja++;
		tn = symbol->nodes[j];
		if (tn == NULL)
			continue;
		k += tree_list(tn, list + k, assoc_kind);
		if (k >= list_size)
			return list;
	}
	return list;
}


/* cint_dump --- dump array info */

static NODE **
cint_dump(NODE *symbol, NODE *ndump)
{
	NODE *tn, *xn = NULL;
	int indent_level;
	size_t i;
	long cint_size = 0, xsize = 0;
	AWKNUM kb = 0;
	extern AWKNUM int_kilobytes(NODE *symbol);
	extern AWKNUM str_kilobytes(NODE *symbol);

	indent_level = ndump->alevel;

	if (symbol->xarray != NULL) {
		xn = symbol->xarray;
		xsize = xn->table_size;
	}
	cint_size = symbol->table_size - xsize;

	if ((symbol->flags & XARRAY) == 0)
		fprintf(output_fp, "%s `%s'\n",
			(symbol->parent_array == NULL) ? "array" : "sub-array",
			array_vname(symbol));
	indent_level++;
	indent(indent_level);
	fprintf(output_fp, "array_func: cint_array_func\n");
	if (symbol->flags != 0) {
		indent(indent_level);
		fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
	}
	indent(indent_level);
	fprintf(output_fp, "NHAT: %d\n", NHAT);
	indent(indent_level);
	fprintf(output_fp, "THRESHOLD: %ld\n", THRESHOLD);
	indent(indent_level);
	fprintf(output_fp, "table_size: %ld (total), %ld (cint), %ld (int + str)\n",
				symbol->table_size, cint_size, xsize);
	indent(indent_level);
	fprintf(output_fp, "array_capacity: %lu\n", (unsigned long) symbol->array_capacity);
	indent(indent_level);
	fprintf(output_fp, "Load Factor: %.2g\n", (AWKNUM) cint_size / symbol->array_capacity);

	for (i = NHAT; i < INT32_BIT; i++) {
		tn = symbol->nodes[i];
		if (tn == NULL)
			continue;
		/* Node_array_tree  + HAT */
		kb += (sizeof(NODE) + tree_kilobytes(tn)) / 1024.0;
	}
	kb += (INT32_BIT * sizeof(NODE *)) / 1024.0;	/* symbol->nodes */
	kb += (symbol->array_capacity * sizeof(NODE *)) / 1024.0;	/* value nodes in Node_array_leaf(s) */
	if (xn != NULL) {
		if (xn->array_funcs == int_array_func)
			kb += int_kilobytes(xn);
		else
			kb += str_kilobytes(xn);
	}

	indent(indent_level);
	fprintf(output_fp, "memory: %.2g kB (total)\n", kb);

	/* dump elements */

	if (ndump->adepth >= 0) {
		const char *aname;

		fprintf(output_fp, "\n");
		aname = make_aname(symbol);
		for (i = NHAT; i < INT32_BIT; i++) {
			tn = symbol->nodes[i];
			if (tn != NULL)
				tree_info(tn, ndump, aname);
		}
	}

	if (xn != NULL) {
		fprintf(output_fp, "\n");
		xn->adump(xn, ndump);
	}

#ifdef ARRAYDEBUG
	if (ndump->adepth < -999)
		cint_print(symbol);
#endif

	return NULL;
}


/* cint_hash --- locate the HAT for a given number 'k' */

static inline int
cint_hash(long k)
{
	uint32_t num, r, shift;

	assert(k >= 0);
	if (k == 0)
		return NHAT;
	num = k;

	/* Find the Floor(log base 2 of 32-bit integer) */

	/*
	 * Warren Jr., Henry S. (2002). Hacker's Delight.
	 * Addison Wesley. pp. pp. 215. ISBN 978-0201914658.
	 *
	 *	r = 0;
	 *	if (num >= 1<<16) { num >>= 16;	r += 16; }
	 *	if (num >= 1<< 8) { num >>=  8;	r +=  8; }
	 *	if (num >= 1<< 4) { num >>=  4;	r +=  4; }
	 *	if (num >= 1<< 2) { num >>=  2;	r +=  2; }
	 *	if (num >= 1<< 1) {		r +=  1; }
	 */


	/*
	 * Slightly different code copied from:
	 *
	 * http://www-graphics.stanford.edu/~seander/bithacks.html
	 * Bit Twiddling Hacks
	 * By Sean Eron Anderson
	 * seander@cs.stanford.edu
	 * Individually, the code snippets here are in the public domain
	 * (unless otherwise noted) --- feel free to use them however you please.
	 * The aggregate collection and descriptions are (C) 1997-2005
	 * Sean Eron Anderson. The code and descriptions are distributed in the
	 * hope that they will be useful, but WITHOUT ANY WARRANTY and without
	 * even the implied warranty of merchantability or fitness for a particular
	 * purpose.
	 *
	 */

	r = (num > 0xFFFF) << 4; num >>= r;
	shift = (num > 0xFF) << 3; num >>= shift; r |= shift;
	shift = (num > 0x0F) << 2; num >>= shift; r |= shift;
	shift = (num > 0x03) << 1; num >>= shift; r |= shift;
	r |= (num >> 1);

	/* We use a single HAT for 0 <= num < 2^NHAT */
	if (r < NHAT)
		return NHAT;

	return (1 + r);
}


/* cint_find --- locate the integer subscript */

static inline NODE **
cint_find(NODE *symbol, long k, int h1)
{
	NODE *tn;

	if (symbol->nodes == NULL || (tn = symbol->nodes[h1]) == NULL)
		return NULL;
	return tree_exists(tn, k);
}


#ifdef ARRAYDEBUG

/* cint_print --- print structural info */

static void
cint_print(NODE *symbol)
{
	NODE *tn;
	size_t i;

	fprintf(output_fp, "I[%4lu:%-4lu]\n", (unsigned long) INT32_BIT,
				(unsigned long) symbol->table_size);
	for (i = NHAT; i < INT32_BIT; i++) {
		tn = symbol->nodes[i];
		if (tn == NULL)
			continue;
		tree_print(tn, i, 1);
	}
}

#endif


/*------------------------ Hashed Array Trees -----------------------------*/

/*
 * HATs: Hashed Array Trees
 * Fast variable-length arrays
 * Edward Sitarski
 * http://www.drdobbs.com/architecture-and-design/184409965
 *
 *  HAT has a top-level array containing a power of two
 *  number of leaf arrays. All leaf arrays are the same size as the
 *  top-level array. A full HAT can hold n^2 elements,
 *  where n (some power of 2) is the size of each leaf array.
 *  [i/n][i & (n - 1)] locates the `i th' element in a HAT.
 *
 */

/*
 *  A half HAT is defined here as a HAT with a top-level array of size n^2/2
 *  and holds the first n^2/2 elements.
 *
 *   1. 2^8 elements can be stored in a full HAT of size 2^4.
 *   2. 2^9 elements can be stored in a half HAT of size 2^5.
 *   3. When the number of elements is some power of 2, it
 *      can be stored in a full or a half HAT.
 *   4. When the number of elements is some power of 2, it
 *      can be stored in a HAT (full or half) with HATs as leaf elements
 *      (full or half),  and so on (e.g. 2^8 elements in a HAT of size 2^4 (top-level
 *      array dimension) with each leaf array being a HAT of size 2^2).
 *
 *  IMPLEMENTATION DETAILS:
 *    1. A HAT of 2^12 elements needs 2^6 house-keeping NODEs
 *       of Node_array_leaf.
 *
 *    2. A HAT of HATS of 2^12 elements needs
 *       2^6 * (1 Node_array_tree + 2^3 Node_array_leaf)
 *       ~ 2^9 house-keeping NODEs.
 *
 *    3. When a leaf array (or leaf HAT) becomes empty, the memory
 *       is deallocated, and when there is no leaf array (or leaf HAT) left,
 *       the HAT is deleted.
 *
 *    4. A HAT stores the base (first) element, and locates the leaf array/HAT
 *       for the `i th' element using integer division
 *       (i - base)/n where n is the size of the top-level array.
 *
 */

/* make_node --- initialize a NODE */

static inline NODE *
make_node(NODETYPE type)
{
	NODE *n;
	getnode(n);
	memset(n, '\0', sizeof(NODE));
	n->type = type;
	return n;
}


/* tree_lookup --- Find an integer subscript in a HAT; Install it if it isn't there */

static NODE **
tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base)
{
	NODE **lhs;
	NODE *tn;
	int i, n;
	size_t size;
	long num = k;

	/*
	 * HAT size (size of Top & Leaf array) = 2^n
	 * where n = Floor ((m + 1)/2). For an odd value of m,
	 * only the first half of the HAT is needed.
	 */

	n = (m + 1) / 2;

	if (tree->table_size == 0) {
		size_t actual_size;
		NODE **table;

		assert(tree->nodes == NULL);

		/* initialize top-level array */
		size = actual_size = power_two_table[n];
		tree->array_base = base;
		tree->array_size = size;
		tree->table_size = 0;	/* # of elements in the array */
		if (n > m/2) {
			/* only first half of the array used */
			actual_size /= 2;
			tree->flags |= HALFHAT;
		}
		ezalloc(table, NODE **, actual_size * sizeof(NODE *), "tree_lookup");
		tree->nodes = table;
	} else
		size = tree->array_size;

	num -= tree->array_base;
	i = num / size;	/* top-level array index */
	assert(i >= 0);

	if ((lhs = tree_find(tree, k, i)) != NULL)
		return lhs;

	/* It's not there, install it */

	tree->table_size++;
	base += (size * i);
	tn = tree->nodes[i];
	if (n > NHAT) {
		if (tn == NULL)
			tn = tree->nodes[i] = make_node(Node_array_tree);
		return tree_lookup(symbol, tn, k, n, base);
	} else {
		if (tn == NULL)
			tn = tree->nodes[i] = make_node(Node_array_leaf);
		return leaf_lookup(symbol, tn, k, size, base);
	}
}


/* tree_exists --- test whether integer subscript `k' exists or not */

static NODE **
tree_exists(NODE *tree, long k)
{
	int i;
	NODE *tn;

	i = (k - tree->array_base) / tree->array_size;
	assert(i >= 0);
	tn = tree->nodes[i];
	if (tn == NULL)
		return NULL;
	if (tn->type == Node_array_tree)
		return tree_exists(tn, k);
	return leaf_exists(tn, k);
}

/* tree_clear --- flush all the values */

static void
tree_clear(NODE *tree)
{
	NODE *tn;
	size_t	j, hsize;

	hsize = tree->array_size;
	if ((tree->flags & HALFHAT) != 0)
		hsize /= 2;

	for (j = 0; j < hsize; j++) {
		tn = tree->nodes[j];
		if (tn == NULL)
			continue;
		if (tn->type == Node_array_tree)
			tree_clear(tn);
		else
			leaf_clear(tn);
		freenode(tn);
	}

	efree(tree->nodes);
	memset(tree, '\0', sizeof(NODE));
	tree->type = Node_array_tree;
}


/* tree_remove --- If the integer subscript is in the HAT, remove it */

static int
tree_remove(NODE *symbol, NODE *tree, long k)
{
	int i;
	NODE *tn;

	i = (k - tree->array_base) / tree->array_size;
	assert(i >= 0);
	tn = tree->nodes[i];
	if (tn == NULL)
		return false;

	if (tn->type == Node_array_tree
			&& ! tree_remove(symbol, tn, k))
		return false;
	else if (tn->type == Node_array_leaf
			&& ! leaf_remove(symbol, tn, k))
		return false;

	if (tn->table_size == 0) {
		freenode(tn);
		tree->nodes[i] = NULL;
	}

	/* one less item in array */
	if (--tree->table_size == 0) {
		efree(tree->nodes);
		memset(tree, '\0', sizeof(NODE));
		tree->type = Node_array_tree;
	}
	return true;
}


/* tree_find --- locate an interger subscript in the HAT */

static inline NODE **
tree_find(NODE *tree, long k, int i)
{
	NODE *tn;

	assert(tree->nodes != NULL);
	tn = tree->nodes[i];
	if (tn != NULL) {
		if (tn->type == Node_array_tree)
			return tree_exists(tn, k);
		return leaf_exists(tn, k);
	}
	return NULL;
}


/* tree_list --- return a list of items in the HAT */

static long
tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind)
{
	NODE *tn;
	size_t j, cj, hsize;
	long k = 0;

	assert(list != NULL);

	hsize = tree->array_size;
	if ((tree->flags & HALFHAT) != 0)
		hsize /= 2;

	for (j = 0; j < hsize; j++) {
		cj = (assoc_kind & ADESC) != 0 ? (hsize - 1 - j) : j;
		tn = tree->nodes[cj];
		if (tn == NULL)
			continue;
		if (tn->type == Node_array_tree)
			k += tree_list(tn, list + k, assoc_kind);
		else
			k += leaf_list(tn, list + k, assoc_kind);
		if ((assoc_kind & ADELETE) != 0 && k >= 1)
			return k;
	}
	return k;
}


/* tree_copy --- duplicate a HAT */

static void
tree_copy(NODE *newsymb, NODE *tree, NODE *newtree)
{
	NODE **old, **new;
	size_t j, hsize;

	hsize = tree->array_size;
	if ((tree->flags & HALFHAT) != 0)
		hsize /= 2;

	ezalloc(new, NODE **, hsize * sizeof(NODE *), "tree_copy");
	newtree->nodes = new;
	newtree->array_base = tree->array_base;
	newtree->array_size = tree->array_size;
	newtree->table_size = tree->table_size;
	newtree->flags = tree->flags;

	old = tree->nodes;
	for (j = 0; j < hsize; j++) {
		if (old[j] == NULL)
			continue;
		if (old[j]->type == Node_array_tree) {
			new[j] = make_node(Node_array_tree);
			tree_copy(newsymb, old[j], new[j]);
		} else {
			new[j] = make_node(Node_array_leaf);
			leaf_copy(newsymb, old[j], new[j]);
		}
	}
}


/* tree_info --- print index, value info */

static void
tree_info(NODE *tree, NODE *ndump, const char *aname)
{
	NODE *tn;
	size_t j, hsize;

	hsize = tree->array_size;
	if ((tree->flags & HALFHAT) != 0)
		hsize /= 2;

	for (j = 0; j < hsize; j++) {
		tn = tree->nodes[j];
		if (tn == NULL)
			continue;
		if (tn->type == Node_array_tree)
			tree_info(tn, ndump, aname);
		else
			leaf_info(tn, ndump, aname);
	}
}


/* tree_kilobytes --- calculate memory consumption of a HAT */

static size_t
tree_kilobytes(NODE *tree)
{
	NODE *tn;
	size_t j, hsize;
	size_t sz = 0;

	hsize = tree->array_size;
	if ((tree->flags & HALFHAT) != 0)
		hsize /= 2;
	for (j = 0; j < hsize; j++) {
		tn = tree->nodes[j];
		if (tn == NULL)
			continue;
		sz += sizeof(NODE);	/* Node_array_tree or Node_array_leaf */
		if (tn->type == Node_array_tree)
			sz += tree_kilobytes(tn);
	}
	sz += hsize * sizeof(NODE *);	/* tree->nodes */
	return sz;
}

#ifdef ARRAYDEBUG

/* tree_print --- print the HAT structures */

static void
tree_print(NODE *tree, size_t bi, int indent_level)
{
	NODE *tn;
	size_t j, hsize;

	indent(indent_level);

	hsize = tree->array_size;
	if ((tree->flags & HALFHAT) != 0)
		hsize /= 2;
	fprintf(output_fp, "%4lu:%s[%4lu:%-4lu]\n",
			(unsigned long) bi,
			(tree->flags & HALFHAT) != 0 ? "HH" : "H",
			(unsigned long) hsize, (unsigned long) tree->table_size);

	for (j = 0; j < hsize; j++) {
		tn = tree->nodes[j];
		if (tn == NULL)
			continue;
		if (tn->type == Node_array_tree)
			tree_print(tn, j, indent_level + 1);
		else
			leaf_print(tn, j, indent_level + 1);
	}
}
#endif

/*--------------------- leaf (linear 1-D) array --------------------*/

/*
 * leaf_lookup --- find an integer subscript in the array; Install it if
 *	it isn't there.
 */

static inline NODE **
leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base)
{
	NODE **lhs;

	if (array->nodes == NULL) {
		array->table_size = 0;	/* sanity */
		array->array_size = size;
		array->array_base = base;
		ezalloc(array->nodes, NODE **, size * sizeof(NODE *), "leaf_lookup");
		symbol->array_capacity += size;
	}

	lhs = array->nodes + (k - base); /* leaf element */
	if (*lhs == NULL) {
		array->table_size++;	/* one more element in leaf array */
		*lhs = dupnode(Nnull_string);
	}
	return lhs;
}


/* leaf_exists --- check if the array contains an integer subscript */

static inline NODE **
leaf_exists(NODE *array, long k)
{
	NODE **lhs;
	lhs = array->nodes + (k - array->array_base);
	return (*lhs != NULL) ? lhs : NULL;
}


/* leaf_clear --- flush all values in the array */

static void
leaf_clear(NODE *array)
{
	long i, size = array->array_size;
	NODE *r;

	for (i = 0; i < size; i++) {
		r = array->nodes[i];
		if (r == NULL)
			continue;
		if (r->type == Node_var_array) {
			assoc_clear(r);		/* recursively clear all sub-arrays */
			efree(r->vname);
			freenode(r);
		} else
			unref(r);
	}
	efree(array->nodes);
	array->nodes = NULL;
	array->array_size = array->table_size = 0;
}


/* leaf_remove --- remove an integer subscript from the array */

static int
leaf_remove(NODE *symbol, NODE *array, long k)
{
	NODE **lhs;

	lhs = array->nodes + (k - array->array_base);
	if (*lhs == NULL)
		return false;
	*lhs = NULL;
	if (--array->table_size == 0) {
		efree(array->nodes);
		array->nodes = NULL;
		symbol->array_capacity -= array->array_size;
		array->array_size = 0;	/* sanity */
	}
	return true;
}


/* leaf_copy --- duplicate a leaf array */

static void
leaf_copy(NODE *newsymb, NODE *array, NODE *newarray)
{
	NODE **old, **new;
	long size, i;

	size = array->array_size;
	ezalloc(new, NODE **, size * sizeof(NODE *), "leaf_copy");
	newarray->nodes = new;
	newarray->array_size = size;
	newarray->array_base = array->array_base;
	newarray->flags = array->flags;
	newarray->table_size = array->table_size;

	old = array->nodes;
	for (i = 0; i < size; i++) {
		if (old[i] == NULL)
			continue;
		if (old[i]->type == Node_val)
			new[i] = dupnode(old[i]);
		else {
			NODE *r;
			r = make_array();
			r->vname = estrdup(old[i]->vname, strlen(old[i]->vname));
			r->parent_array = newsymb;
			new[i] = assoc_copy(old[i], r);
		}
	}
}


/* leaf_list --- return a list of items */

static long
leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind)
{
	NODE *r, *subs;
	long num, i, ci, k = 0;
	long size = array->array_size;
	static char buf[100];

	for (i = 0; i < size; i++) {
		ci = (assoc_kind & ADESC) != 0 ? (size - 1 - i) : i;
		r = array->nodes[ci];
		if (r == NULL)
			continue;

		/* index */
		num = array->array_base + ci;
		if ((assoc_kind & AISTR) != 0) {
			sprintf(buf, "%ld", num);
			subs = make_string(buf, strlen(buf));
			subs->numbr = num;
			subs->flags |= (NUMCUR|NUMINT);
		} else {
			subs = make_number((AWKNUM) num);
			subs->flags |= (INTIND|NUMINT);
		}
		list[k++] = subs;

		/* value */
		if ((assoc_kind & AVALUE) != 0) {
			if (r->type == Node_val) {
				if ((assoc_kind & AVNUM) != 0)
					(void) force_number(r);
				else if ((assoc_kind & AVSTR) != 0)
					r = force_string(r);
			}
			list[k++] = r;
		}
		if ((assoc_kind & ADELETE) != 0 && k >= 1)
			return k;
	}

	return k;
}


/* leaf_info --- print index, value info */

static void
leaf_info(NODE *array, NODE *ndump, const char *aname)
{
	NODE *subs, *val;
	size_t i, size;

	size = array->array_size;

	subs = make_number((AWKNUM) 0.0);
	subs->flags |= (INTIND|NUMINT);
	for (i = 0; i < size; i++) {
		val = array->nodes[i];
		if (val == NULL)
			continue;
		subs->numbr = array->array_base + i;
		assoc_info(subs, val, ndump, aname);
	}
	unref(subs);
}

#ifdef ARRAYDEBUG

/* leaf_print --- print the leaf-array structure */


static void
leaf_print(NODE *array, size_t bi, int indent_level)
{
	indent(indent_level);
	fprintf(output_fp, "%4lu:L[%4lu:%-4lu]\n",
			(unsigned long) bi,
			(unsigned long) array->array_size,
			(unsigned long) array->table_size);
}
#endif