/*
* array.c - routines for awk arrays.
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991-2014, 2016, 2018,
* the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "awk.h"
extern FILE *output_fp;
extern NODE **fmt_list; /* declared in eval.c */
NODE *success_node;
static size_t SUBSEPlen;
static char *SUBSEP;
static char indent_char[] = " ";
static NODE **null_lookup(NODE *symbol, NODE *subs);
static NODE **null_dump(NODE *symbol, NODE *subs);
static afunc_t null_array_func[] = {
(afunc_t) 0,
(afunc_t) 0,
null_length,
null_lookup,
null_afunc,
null_afunc,
null_afunc,
null_afunc,
null_afunc,
null_dump,
(afunc_t) 0,
};
#define MAX_ATYPE 10
static afunc_t *array_types[MAX_ATYPE];
static int num_array_types = 0;
/* array func to index mapping */
#define AFUNC(F) (F ## _ind)
/* register_array_func --- add routines to handle arrays */
int
register_array_func(afunc_t *afunc)
{
if (afunc && num_array_types < MAX_ATYPE) {
if (afunc != str_array_func && ! afunc[AFUNC(atypeof)])
return false;
array_types[num_array_types++] = afunc;
if (afunc[AFUNC(ainit)]) /* execute init routine if any */
(void) (*afunc[AFUNC(ainit)])(NULL, NULL);
return true;
}
return false;
}
/* array_init --- register all builtin array types */
void
array_init()
{
(void) register_array_func(str_array_func); /* the default */
if (! do_mpfr) {
(void) register_array_func(int_array_func);
(void) register_array_func(cint_array_func);
}
}
/* make_array --- create an array node */
NODE *
make_array()
{
NODE *array;
getnode(array);
memset(array, '\0', sizeof(NODE));
array->type = Node_var_array;
array->array_funcs = null_array_func;
/* vname, flags, and parent_array not set here */
return array;
}
/* null_array --- force symbol to be an empty typeless array */
void
null_array(NODE *symbol)
{
symbol->type = Node_var_array;
symbol->array_funcs = null_array_func;
symbol->buckets = NULL;
symbol->table_size = symbol->array_size = 0;
symbol->array_capacity = 0;
symbol->flags = 0;
assert(symbol->xarray == NULL);
/* vname, parent_array not (re)initialized */
}
/* null_lookup --- assign type to an empty array. */
static NODE **
null_lookup(NODE *symbol, NODE *subs)
{
int i;
afunc_t *afunc = NULL;
assert(symbol->table_size == 0);
/*
* Check which array type wants to accept this sub; traverse
* array type list in reverse order.
*/
for (i = num_array_types - 1; i >= 1; i--) {
afunc = array_types[i];
if (afunc[AFUNC(atypeof)](symbol, subs) != NULL)
break;
}
if (i == 0 || afunc == NULL)
afunc = array_types[0]; /* default is str_array_func */
symbol->array_funcs = afunc;
/* We have the right type of array; install the subscript */
return symbol->alookup(symbol, subs);
}
/* null_length --- default function for array length interface */
NODE **
null_length(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
{
static NODE *tmp;
tmp = symbol;
return & tmp;
}
/* null_afunc --- default function for array interface */
NODE **
null_afunc(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
{
return NULL;
}
/* null_dump --- dump function for an empty array */
static NODE **
null_dump(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
{
fprintf(output_fp, "array `%s' is empty\n", array_vname(symbol));
return NULL;
}
/* assoc_copy --- duplicate input array "symbol" */
NODE *
assoc_copy(NODE *symbol, NODE *newsymb)
{
assert(newsymb->vname != NULL);
assoc_clear(newsymb);
(void) symbol->acopy(symbol, newsymb);
newsymb->array_funcs = symbol->array_funcs;
newsymb->flags = symbol->flags;
return newsymb;
}
/* assoc_dump --- dump array */
void
assoc_dump(NODE *symbol, NODE *ndump)
{
if (symbol->adump)
(void) symbol->adump(symbol, ndump);
}
/* make_aname --- construct a 'vname' for a (sub)array */
const char *
make_aname(const NODE *symbol)
{
static char *aname = NULL;
static size_t alen;
static size_t max_alen;
#define SLEN 256
if (symbol->parent_array != NULL) {
size_t slen;
(void) make_aname(symbol->parent_array);
slen = strlen(symbol->vname); /* subscript in parent array */
if (alen + slen + 4 > max_alen) { /* sizeof("[\"\"]") = 4 */
max_alen = alen + slen + 4 + SLEN;
erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
}
alen += sprintf(aname + alen, "[\"%s\"]", symbol->vname);
} else {
alen = strlen(symbol->vname);
if (aname == NULL) {
max_alen = alen + SLEN;
emalloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
} else if (alen > max_alen) {
max_alen = alen + SLEN;
erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
}
memcpy(aname, symbol->vname, alen + 1);
}
return aname;
}
#undef SLEN
/*
* array_vname --- print the name of the array
*
* Returns a pointer to a statically maintained dynamically allocated string.
* It's appropriate for printing the name once; if the caller wants
* to save it, they have to make a copy.
*/
const char *
array_vname(const NODE *symbol)
{
static char *message = NULL;
static size_t msglen = 0;
char *s;
size_t len;
int n;
const NODE *save_symbol = symbol;
const char *from = _("from %s");
const char *aname;
if (symbol->type != Node_array_ref
|| symbol->orig_array->type != Node_var_array
) {
if (symbol->type != Node_var_array || symbol->parent_array == NULL)
return symbol->vname;
return make_aname(symbol);
}
/* First, we have to compute the length of the string: */
len = 2; /* " (" */
n = 0;
while (symbol->type == Node_array_ref) {
len += strlen(symbol->vname);
n++;
symbol = symbol->prev_array;
}
/* Get the (sub)array name */
if (symbol->parent_array == NULL)
aname = symbol->vname;
else
aname = make_aname(symbol);
len += strlen(aname);
/*
* Each node contributes by strlen(from) minus the length
* of "%s" in the translation (which is at least 2)
* plus 2 for ", " or ")\0"; this adds up to strlen(from).
*/
len += n * strlen(from);
/* (Re)allocate memory: */
if (message == NULL) {
emalloc(message, char *, len, "array_vname");
msglen = len;
} else if (len > msglen) {
erealloc(message, char *, len, "array_vname");
msglen = len;
} /* else
current buffer can hold new name */
/* We're ready to print: */
symbol = save_symbol;
s = message;
/*
* Ancient systems have sprintf() returning char *, not int.
* If you have one of those, use sprintf(..); s += strlen(s) instead.
*/
s += sprintf(s, "%s (", symbol->vname);
for (;;) {
symbol = symbol->prev_array;
if (symbol->type != Node_array_ref)
break;
s += sprintf(s, from, symbol->vname);
s += sprintf(s, ", ");
}
s += sprintf(s, from, aname);
strcpy(s, ")");
return message;
}
/*
* force_array --- proceed to the actual Node_var_array,
* change Node_var_new to an array.
* If canfatal and type isn't good, die fatally,
* otherwise return the final actual value.
*/
NODE *
force_array(NODE *symbol, bool canfatal)
{
NODE *save_symbol = symbol;
bool isparam = false;
if (symbol->type == Node_param_list) {
save_symbol = symbol = GET_PARAM(symbol->param_cnt);
isparam = true;
if (symbol->type == Node_array_ref)
symbol = symbol->orig_array;
}
switch (symbol->type) {
case Node_var_new:
symbol->xarray = NULL; /* make sure union is as it should be */
null_array(symbol);
symbol->parent_array = NULL; /* main array has no parent */
/* fall through */
case Node_var_array:
break;
case Node_array_ref:
default:
/* notably Node_var but catches also e.g. a[1] = "x"; a[1][1] = "y" */
if (canfatal) {
if (symbol->type == Node_val)
fatal(_("attempt to use a scalar value as array"));
if (isparam)
fatal(_("attempt to use scalar parameter `%s' as an array"),
save_symbol->vname);
else
fatal(_("attempt to use scalar `%s' as an array"),
save_symbol->vname);
} else
break;
}
return symbol;
}
/* set_SUBSEP --- update SUBSEP related variables when SUBSEP assigned to */
void
set_SUBSEP()
{
SUBSEP_node->var_value = force_string(SUBSEP_node->var_value);
SUBSEP = SUBSEP_node->var_value->stptr;
SUBSEPlen = SUBSEP_node->var_value->stlen;
}
/* concat_exp --- concatenate expression list into a single string */
NODE *
concat_exp(int nargs, bool do_subsep)
{
/* do_subsep is false for Op_concat */
NODE *r;
char *str;
char *s;
size_t len;
size_t subseplen = 0;
int i;
extern NODE **args_array;
if (nargs == 1)
return POP_STRING();
if (do_subsep)
subseplen = SUBSEPlen;
len = 0;
for (i = 1; i <= nargs; i++) {
r = TOP();
if (r->type == Node_var_array) {
while (--i > 0)
DEREF(args_array[i]); /* avoid memory leak */
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(r));
}
r = POP_STRING();
args_array[i] = r;
len += r->stlen;
}
len += (nargs - 1) * subseplen;
emalloc(str, char *, len + 1, "concat_exp");
r = args_array[nargs];
memcpy(str, r->stptr, r->stlen);
s = str + r->stlen;
DEREF(r);
for (i = nargs - 1; i > 0; i--) {
if (subseplen == 1)
*s++ = *SUBSEP;
else if (subseplen > 0) {
memcpy(s, SUBSEP, subseplen);
s += subseplen;
}
r = args_array[i];
memcpy(s, r->stptr, r->stlen);
s += r->stlen;
DEREF(r);
}
return make_str_node(str, len, ALREADY_MALLOCED);
}
/*
* adjust_fcall_stack: remove subarray(s) of symbol[] from
* function call stack.
*/
static void
adjust_fcall_stack(NODE *symbol, int nsubs)
{
NODE *func, *r, *n;
NODE **sp;
int pcount;
/*
* Solve the nasty problem of disappearing subarray arguments:
*
* function f(c, d) { delete c; .. use non-existent array d .. }
* BEGIN { a[0][0] = 1; f(a, a[0]); .. }
*
* The fix is to convert 'd' to a local empty array; This has
* to be done before clearing the parent array to avoid referring to
* already free-ed memory.
*
* Similar situations exist for builtins accepting more than
* one array argument: split, patsplit, asort and asorti. For example:
*
* BEGIN { a[0][0] = 1; split("abc", a, "", a[0]) }
*
* These cases do not involve the function call stack, and are
* handled individually in their respective routines.
*/
func = frame_ptr->func_node;
if (func == NULL) /* in main */
return;
pcount = func->param_cnt;
sp = frame_ptr->stack;
for (; pcount > 0; pcount--) {
r = *sp++;
if (r->type != Node_array_ref
|| r->orig_array->type != Node_var_array)
continue;
n = r->orig_array;
/* Case 1 */
if (n == symbol
&& symbol->parent_array != NULL
&& nsubs > 0
) {
/*
* 'symbol' is a subarray, and 'r' is the same subarray:
*
* function f(c, d) { delete c[0]; .. }
* BEGIN { a[0][0] = 1; f(a, a[0]); .. }
*
* But excludes cases like (nsubs = 0):
*
* function f(c, d) { delete c; ..}
* BEGIN { a[0][0] = 1; f(a[0], a[0]); ...}
*/
null_array(r);
r->parent_array = NULL;
continue;
}
/* Case 2 */
for (n = n->parent_array; n != NULL; n = n->parent_array) {
assert(n->type == Node_var_array);
if (n == symbol) {
/*
* 'r' is a subarray of 'symbol':
*
* function f(c, d) { delete c; .. use d as array .. }
* BEGIN { a[0][0] = 1; f(a, a[0]); .. }
* OR
* BEGIN { a[0][0][0][0] = 1; f(a[0], a[0][0][0]); .. }
*
*/
null_array(r);
r->parent_array = NULL;
break;
}
}
}
}
/* do_delete --- perform `delete array[s]' */
/*
* `symbol' is array
* `nsubs' is no of subscripts
*/
void
do_delete(NODE *symbol, int nsubs)
{
NODE *val, *subs;
int i;
assert(symbol->type == Node_var_array);
subs = val = NULL; /* silence the compiler */
/*
* The force_string() call is needed to make sure that
* the string subscript is reasonable. For example, with it:
*
* $ ./gawk --posix 'BEGIN { CONVFMT="%ld"; delete a[1.233]}'
* gawk: cmd. line:1: fatal: `%l' is not permitted in POSIX awk formats
*
* Without it, the code does not fail.
*/
#define free_subs(n) do { \
NODE *s = PEEK(n - 1); \
if (s->type == Node_val) { \
(void) force_string(s); /* may have side effects. */ \
DEREF(s); \
} \
} while (--n > 0)
if (nsubs == 0) {
/* delete array */
adjust_fcall_stack(symbol, 0); /* fix function call stack; See above. */
assoc_clear(symbol);
return;
}
/* NB: subscripts are in reverse order on stack */
for (i = nsubs; i > 0; i--) {
subs = PEEK(i - 1);
if (subs->type != Node_val) {
free_subs(i);
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(subs));
}
val = in_array(symbol, subs);
if (val == NULL) {
if (do_lint) {
subs = force_string(subs);
lintwarn(_("delete: index `%.*s' not in array `%s'"),
(int) subs->stlen, subs->stptr, array_vname(symbol));
}
/* avoid memory leak, free all subs */
free_subs(i);
return;
}
if (i > 1) {
if (val->type != Node_var_array) {
/* e.g.: a[1] = 1; delete a[1][1] */
free_subs(i);
subs = force_string(subs);
fatal(_("attempt to use scalar `%s[\"%.*s\"]' as an array"),
array_vname(symbol),
(int) subs->stlen,
subs->stptr);
}
symbol = val;
DEREF(subs);
}
}
if (val->type == Node_var_array) {
adjust_fcall_stack(val, nsubs); /* fix function call stack; See above. */
assoc_clear(val);
/* cleared a sub-array, free Node_var_array */
efree(val->vname);
freenode(val);
} else
unref(val);
(void) assoc_remove(symbol, subs);
DEREF(subs);
#undef free_subs
}
/* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */
/*
* The primary hassle here is that `iggy' needs to have some arbitrary
* array index put in it before we can clear the array, we can't
* just replace the loop with `delete foo'.
*/
void
do_delete_loop(NODE *symbol, NODE **lhs)
{
NODE **list;
NODE akind;
akind.flags = AINDEX|ADELETE; /* need a single index */
list = symbol->alist(symbol, & akind);
if (assoc_empty(symbol))
return;
unref(*lhs);
*lhs = list[0];
efree(list);
/* blast the array in one shot */
adjust_fcall_stack(symbol, 0);
assoc_clear(symbol);
}
/* value_info --- print scalar node info */
static void
value_info(NODE *n)
{
#define PREC_NUM -1
if (n == Nnull_string || n == Null_field) {
fprintf(output_fp, "<(null)>");
return;
}
if ((n->flags & (STRING|STRCUR)) != 0) {
fprintf(output_fp, "<");
fprintf(output_fp, "\"%.*s\"", (int) n->stlen, n->stptr);
if ((n->flags & (NUMBER|NUMCUR)) != 0) {
#ifdef HAVE_MPFR
if (is_mpg_float(n))
fprintf(output_fp, ":%s",
mpg_fmt("%.*R*g", PREC_NUM, ROUND_MODE, n->mpg_numbr));
else if (is_mpg_integer(n))
fprintf(output_fp, ":%s", mpg_fmt("%Zd", n->mpg_i));
else
#endif
fprintf(output_fp, ":%.*g", PREC_NUM, n->numbr);
}
fprintf(output_fp, ">");
} else {
#ifdef HAVE_MPFR
if (is_mpg_float(n))
fprintf(output_fp, "<%s>",
mpg_fmt("%.*R*g", PREC_NUM, ROUND_MODE, n->mpg_numbr));
else if (is_mpg_integer(n))
fprintf(output_fp, "<%s>", mpg_fmt("%Zd", n->mpg_i));
else
#endif
fprintf(output_fp, "<%.*g>", PREC_NUM, n->numbr);
}
fprintf(output_fp, ":%s", flags2str(n->flags));
if ((n->flags & MALLOC) != 0)
fprintf(output_fp, ":%ld", n->valref);
else
fprintf(output_fp, ":");
if ((n->flags & (STRING|STRCUR)) == STRCUR) {
size_t len;
fprintf(output_fp, "][");
fprintf(output_fp, "stfmt=%d, ", n->stfmt);
/*
* If not STFMT_UNUSED, could be CONVFMT or OFMT if last
* used in a print statement. If immutable, could be that it
* was originally set as a string, or it's a number that has
* an integer value.
*/
len = fmt_list[n->stfmt]->stlen;
fmt_list[n->stfmt]->stptr[len] = '\0';
fprintf(output_fp, "FMT=\"%s\"",
n->stfmt == STFMT_UNUSED ? "<unused>"
: fmt_list[n->stfmt]->stptr);
#ifdef HAVE_MPFR
fprintf(output_fp, ", RNDMODE=\"%c\"", n->strndmode);
#endif
}
#undef PREC_NUM
}
void
indent(int indent_level)
{
int i;
for (i = 0; i < indent_level; i++)
fprintf(output_fp, "%s", indent_char);
}
/* assoc_info --- print index, value info */
void
assoc_info(NODE *subs, NODE *val, NODE *ndump, const char *aname)
{
int indent_level = ndump->alevel;
indent_level++;
indent(indent_level);
fprintf(output_fp, "I: [%s:", aname);
if ((subs->flags & (MPFN|MPZN|INTIND)) == INTIND)
fprintf(output_fp, "<%ld>", (long) subs->numbr);
else
value_info(subs);
fprintf(output_fp, "]\n");
indent(indent_level);
if (val->type == Node_val) {
fprintf(output_fp, "V: [scalar: ");
value_info(val);
} else {
fprintf(output_fp, "V: [");
ndump->alevel++;
ndump->adepth--;
assoc_dump(val, ndump);
ndump->adepth++;
ndump->alevel--;
indent(indent_level);
}
fprintf(output_fp, "]\n");
}
/* do_adump --- dump an array: interface to assoc_dump */
NODE *
do_adump(int nargs)
{
NODE *symbol, *tmp;
static NODE ndump;
long depth = 0;
/*
* depth < 0, no index and value info.
* = 0, main array index and value info; does not descend into sub-arrays.
* > 0, descends into 'depth' sub-arrays, and prints index and value info.
*/
if (nargs == 2) {
tmp = POP_NUMBER();
depth = get_number_si(tmp);
DEREF(tmp);
}
symbol = POP_PARAM();
if (symbol->type != Node_var_array)
fatal(_("adump: first argument not an array"));
ndump.type = Node_dump_array;
ndump.adepth = depth;
ndump.alevel = 0;
assoc_dump(symbol, & ndump);
return make_number((AWKNUM) 0);
}
/* asort_actual --- do the actual work to sort the input array */
static NODE *
asort_actual(int nargs, sort_context_t ctxt)
{
NODE *array, *dest = NULL, *result;
NODE *r, *subs, *s;
NODE **list = NULL, **ptr, **lhs;
unsigned long num_elems, i;
const char *sort_str;
char save;
if (nargs == 3) /* 3rd optional arg */
s = POP_STRING();
else
s = dupnode(Nnull_string); /* "" => default sorting */
s = force_string(s);
sort_str = s->stptr;
save = s->stptr[s->stlen];
s->stptr[s->stlen] = '\0';
if (s->stlen == 0) { /* default sorting */
if (ctxt == ASORT)
sort_str = "@val_type_asc";
else
sort_str = "@ind_str_asc";
}
if (nargs >= 2) { /* 2nd optional arg */
dest = POP_PARAM();
if (dest->type != Node_var_array) {
fatal(ctxt == ASORT ?
_("asort: second argument not an array") :
_("asorti: second argument not an array"));
}
}
array = POP_PARAM();
if (array->type != Node_var_array) {
fatal(ctxt == ASORT ?
_("asort: first argument not an array") :
_("asorti: first argument not an array"));
}
if (dest != NULL) {
for (r = dest->parent_array; r != NULL; r = r->parent_array) {
if (r == array)
fatal(ctxt == ASORT ?
_("asort: cannot use a subarray of first arg for second arg") :
_("asorti: cannot use a subarray of first arg for second arg"));
}
for (r = array->parent_array; r != NULL; r = r->parent_array) {
if (r == dest)
fatal(ctxt == ASORT ?
_("asort: cannot use a subarray of second arg for first arg") :
_("asorti: cannot use a subarray of second arg for first arg"));
}
}
/* sorting happens inside assoc_list */
list = assoc_list(array, sort_str, ctxt);
s->stptr[s->stlen] = save;
DEREF(s);
num_elems = assoc_length(array);
if (num_elems == 0 || list == NULL) {
/* source array is empty */
if (dest != NULL && dest != array)
assoc_clear(dest);
if (list != NULL)
efree(list);
return make_number((AWKNUM) 0);
}
/*
* Must not assoc_clear() the source array before constructing
* the output array. assoc_list() does not duplicate array values
* which are needed for asort().
*/
if (dest != NULL && dest != array) {
assoc_clear(dest);
result = dest;
} else {
/* use 'result' as a temporary destination array */
result = make_array();
result->vname = array->vname;
result->parent_array = array->parent_array;
}
if (ctxt == ASORTI) {
/* We want the indices of the source array. */
for (i = 1, ptr = list; i <= num_elems; i++, ptr += 2) {
subs = make_number(i);
lhs = assoc_lookup(result, subs);
unref(*lhs);
*lhs = *ptr;
if (result->astore != NULL)
(*result->astore)(result, subs);
unref(subs);
}
} else {
/* We want the values of the source array. */
for (i = 1, ptr = list; i <= num_elems; i++) {
subs = make_number(i);
/* free index node */
r = *ptr++;
unref(r);
/* value node */
r = *ptr++;
if (r->type == Node_val) {
lhs = assoc_lookup(result, subs);
unref(*lhs);
*lhs = dupnode(r);
} else {
NODE *arr;
arr = make_array();
subs = force_string(subs);
arr->vname = subs->stptr;
arr->vname[subs->stlen] = '\0';
subs->stptr = NULL;
subs->flags &= ~STRCUR;
arr->parent_array = array; /* actual parent, not the temporary one. */
lhs = assoc_lookup(result, subs);
unref(*lhs);
*lhs = assoc_copy(r, arr);
}
if (result->astore != NULL)
(*result->astore)(result, subs);
unref(subs);
}
}
efree(list);
if (result != dest) {
/* dest == NULL or dest == array */
assoc_clear(array);
*array = *result; /* copy result into array */
freenode(result);
} /* else
result == dest
dest != NULL and dest != array */
return make_number((AWKNUM) num_elems);
}
/* do_asort --- sort array by value */
NODE *
do_asort(int nargs)
{
return asort_actual(nargs, ASORT);
}
/* do_asorti --- sort array by index */
NODE *
do_asorti(int nargs)
{
return asort_actual(nargs, ASORTI);
}
/*
* cmp_strings --- compare two strings; logic similar to cmp_nodes() in eval.c
* except the extra case-sensitive comparison when the case-insensitive
* result is a match.
*/
static int
cmp_strings(const NODE *n1, const NODE *n2)
{
char *s1, *s2;
size_t len1, len2;
int ret;
size_t lmin;
s1 = n1->stptr;
len1 = n1->stlen;
s2 = n2->stptr;
len2 = n2->stlen;
if (len1 == 0)
return len2 == 0 ? 0 : -1;
if (len2 == 0)
return 1;
/* len1 > 0 && len2 > 0 */
lmin = len1 < len2 ? len1 : len2;
if (IGNORECASE) {
const unsigned char *cp1 = (const unsigned char *) s1;
const unsigned char *cp2 = (const unsigned char *) s2;
if (gawk_mb_cur_max > 1) {
ret = strncasecmpmbs((const unsigned char *) cp1,
(const unsigned char *) cp2, lmin);
} else {
for (ret = 0; lmin-- > 0 && ret == 0; cp1++, cp2++)
ret = casetable[*cp1] - casetable[*cp2];
}
if (ret != 0)
return ret;
/*
* If case insensitive result is "they're the same",
* use case sensitive comparison to force distinct order.
*/
}
ret = memcmp(s1, s2, lmin);
if (ret != 0 || len1 == len2)
return ret;
return (len1 < len2) ? -1 : 1;
}
/* sort_up_index_string --- qsort comparison function; ascending index strings. */
static int
sort_up_index_string(const void *p1, const void *p2)
{
const NODE *t1, *t2;
/* Array indices are strings */
t1 = *((const NODE *const *) p1);
t2 = *((const NODE *const *) p2);
return cmp_strings(t1, t2);
}
/* sort_down_index_str --- qsort comparison function; descending index strings. */
static int
sort_down_index_string(const void *p1, const void *p2)
{
/*
* Negation versus transposed arguments: when all keys are
* distinct, as with array indices here, either method will
* transform an ascending sort into a descending one. But if
* there are equal keys--such as when IGNORECASE is honored--
* that get disambiguated into a determisitc order, negation
* will reverse those but transposed arguments would retain
* their relative order within the rest of the reversed sort.
*/
return -sort_up_index_string(p1, p2);
}
/* sort_up_index_number --- qsort comparison function; ascending index numbers. */
static int
sort_up_index_number(const void *p1, const void *p2)
{
const NODE *t1, *t2;
int ret;
t1 = *((const NODE *const *) p1);
t2 = *((const NODE *const *) p2);
ret = cmp_numbers(t1, t2);
if (ret != 0)
return ret;
/* break a tie with the index string itself */
t1 = force_string((NODE *) t1);
t2 = force_string((NODE *) t2);
return cmp_strings(t1, t2);
}
/* sort_down_index_number --- qsort comparison function; descending index numbers */
static int
sort_down_index_number(const void *p1, const void *p2)
{
return -sort_up_index_number(p1, p2);
}
/* sort_up_value_string --- qsort comparison function; ascending value string */
static int
sort_up_value_string(const void *p1, const void *p2)
{
const NODE *t1, *t2;
t1 = *((const NODE *const *) p1 + 1);
t2 = *((const NODE *const *) p2 + 1);
if (t1->type == Node_var_array) {
/* return 0 if t2 is a sub-array too, else return 1 */
return (t2->type != Node_var_array);
}
if (t2->type == Node_var_array)
return -1; /* t1 (scalar) < t2 (sub-array) */
/* t1 and t2 both have string values */
return cmp_strings(t1, t2);
}
/* sort_down_value_string --- qsort comparison function; descending value string */
static int
sort_down_value_string(const void *p1, const void *p2)
{
return -sort_up_value_string(p1, p2);
}
/* sort_up_value_number --- qsort comparison function; ascending value number */
static int
sort_up_value_number(const void *p1, const void *p2)
{
NODE *t1, *t2;
int ret;
t1 = *((NODE *const *) p1 + 1);
t2 = *((NODE *const *) p2 + 1);
if (t1->type == Node_var_array) {
/* return 0 if t2 is a sub-array too, else return 1 */
return (t2->type != Node_var_array);
}
if (t2->type == Node_var_array)
return -1; /* t1 (scalar) < t2 (sub-array) */
ret = cmp_numbers(t1, t2);
if (ret != 0)
return ret;
/*
* Use string value to guarantee same sort order on all
* versions of qsort().
*/
t1 = force_string(t1);
t2 = force_string(t2);
return cmp_strings(t1, t2);
}
/* sort_down_value_number --- qsort comparison function; descending value number */
static int
sort_down_value_number(const void *p1, const void *p2)
{
return -sort_up_value_number(p1, p2);
}
/* sort_up_value_type --- qsort comparison function; ascending value type */
static int
sort_up_value_type(const void *p1, const void *p2)
{
NODE *n1, *n2;
/* we want to compare the element values */
n1 = *((NODE *const *) p1 + 1);
n2 = *((NODE *const *) p2 + 1);
/* 1. Arrays vs. scalar, scalar is less than array */
if (n1->type == Node_var_array) {
/* return 0 if n2 is a sub-array too, else return 1 */
return (n2->type != Node_var_array);
}
if (n2->type == Node_var_array) {
return -1; /* n1 (scalar) < n2 (sub-array) */
}
/* two scalars */
(void) fixtype(n1);
(void) fixtype(n2);
if ((n1->flags & NUMBER) != 0 && (n2->flags & NUMBER) != 0) {
return cmp_numbers(n1, n2);
}
/* 3. All numbers are less than all strings. This is aribitrary. */
if ((n1->flags & NUMBER) != 0 && (n2->flags & STRING) != 0) {
return -1;
} else if ((n1->flags & STRING) != 0 && (n2->flags & NUMBER) != 0) {
return 1;
}
/* 4. Two strings */
return cmp_strings(n1, n2);
}
/* sort_down_value_type --- qsort comparison function; descending value type */
static int
sort_down_value_type(const void *p1, const void *p2)
{
return -sort_up_value_type(p1, p2);
}
/* sort_user_func --- user defined qsort comparison function */
static int
sort_user_func(const void *p1, const void *p2)
{
NODE *idx1, *idx2, *val1, *val2, *r;
int ret;
INSTRUCTION *code;
idx1 = *((NODE *const *) p1);
idx2 = *((NODE *const *) p2);
val1 = *((NODE *const *) p1 + 1);
val2 = *((NODE *const *) p2 + 1);
code = TOP()->code_ptr; /* comparison function call instructions */
/* setup 4 arguments to comp_func() */
UPREF(idx1);
PUSH(idx1);
if (val1->type == Node_val)
UPREF(val1);
PUSH(val1);
UPREF(idx2);
PUSH(idx2);
if (val2->type == Node_val)
UPREF(val2);
PUSH(val2);
/* execute the comparison function */
(void) (*interpret)(code);
/* return value of the comparison function */
r = POP_NUMBER();
#ifdef HAVE_MPFR
/*
* mpfr_sgn(mpz_sgn): Returns a positive value if op > 0,
* zero if op = 0, and a negative value if op < 0.
*/
if (is_mpg_float(r))
ret = mpfr_sgn(r->mpg_numbr);
else if (is_mpg_integer(r))
ret = mpz_sgn(r->mpg_i);
else
#endif
ret = (r->numbr < 0.0) ? -1 : (r->numbr > 0.0);
DEREF(r);
return ret;
}
/* assoc_list -- construct, and optionally sort, a list of array elements */
NODE **
assoc_list(NODE *symbol, const char *sort_str, sort_context_t sort_ctxt)
{
typedef int (*qsort_compfunc)(const void *, const void *);
static const struct qsort_funcs {
const char *name;
qsort_compfunc comp_func;
assoc_kind_t kind;
} sort_funcs[] = {
{ "@ind_str_asc", sort_up_index_string, AINDEX|AISTR|AASC },
{ "@ind_num_asc", sort_up_index_number, AINDEX|AINUM|AASC },
{ "@val_str_asc", sort_up_value_string, AVALUE|AVSTR|AASC },
{ "@val_num_asc", sort_up_value_number, AVALUE|AVNUM|AASC },
{ "@ind_str_desc", sort_down_index_string, AINDEX|AISTR|ADESC },
{ "@ind_num_desc", sort_down_index_number, AINDEX|AINUM|ADESC },
{ "@val_str_desc", sort_down_value_string, AVALUE|AVSTR|ADESC },
{ "@val_num_desc", sort_down_value_number, AVALUE|AVNUM|ADESC },
{ "@val_type_asc", sort_up_value_type, AVALUE|AASC },
{ "@val_type_desc", sort_down_value_type, AVALUE|ADESC },
{ "@unsorted", 0, AINDEX },
};
/*
* N.B.: AASC and ADESC are hints to the specific array types.
* See cint_list() in cint_array.c.
*/
NODE **list;
NODE akind;
unsigned long num_elems, j;
int elem_size, qi;
qsort_compfunc cmp_func = 0;
INSTRUCTION *code = NULL;
extern int currule;
int save_rule = 0;
assoc_kind_t assoc_kind = ANONE;
elem_size = 1;
for (qi = 0, j = sizeof(sort_funcs)/sizeof(sort_funcs[0]); qi < j; qi++) {
if (strcmp(sort_funcs[qi].name, sort_str) == 0)
break;
}
if (qi < j) {
cmp_func = sort_funcs[qi].comp_func;
assoc_kind = sort_funcs[qi].kind;
if (symbol->array_funcs != cint_array_func)
assoc_kind &= ~(AASC|ADESC);
if (sort_ctxt != SORTED_IN || (assoc_kind & AVALUE) != 0) {
/* need index and value pair in the list */
assoc_kind |= (AINDEX|AVALUE);
elem_size = 2;
}
} else { /* unrecognized */
NODE *f;
const char *sp;
for (sp = sort_str; *sp != '\0' && ! isspace((unsigned char) *sp); sp++)
continue;
/* empty string or string with space(s) not valid as function name */
if (sp == sort_str || *sp != '\0')
fatal(_("`%s' is invalid as a function name"), sort_str);
f = lookup(sort_str);
if (f == NULL || f->type != Node_func)
fatal(_("sort comparison function `%s' is not defined"), sort_str);
cmp_func = sort_user_func;
/* need index and value pair in the list */
assoc_kind |= (AVALUE|AINDEX);
elem_size = 2;
/* make function call instructions */
code = bcalloc(Op_func_call, 2, 0);
code->func_body = f;
code->func_name = NULL; /* not needed, func_body already assigned */
(code + 1)->expr_count = 4; /* function takes 4 arguments */
code->nexti = bcalloc(Op_stop, 1, 0);
/*
* make non-redirected getline, exit, `next' and `nextfile' fatal in
* callback function by setting currule in interpret()
* to undefined (0).
*/
save_rule = currule; /* save current rule */
currule = 0;
PUSH_CODE(code);
}
akind.flags = (unsigned int) assoc_kind; /* kludge */
list = symbol->alist(symbol, & akind);
assoc_kind = (assoc_kind_t) akind.flags; /* symbol->alist can modify it */
/* check for empty list or unsorted, or list already sorted */
if (list != NULL && cmp_func != NULL && (assoc_kind & (AASC|ADESC)) == 0) {
num_elems = assoc_length(symbol);
qsort(list, num_elems, elem_size * sizeof(NODE *), cmp_func); /* shazzam! */
if (sort_ctxt == SORTED_IN && (assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) {
/* relocate all index nodes to the first half of the list. */
for (j = 1; j < num_elems; j++)
list[j] = list[2 * j];
/* give back extra memory */
erealloc(list, NODE **, num_elems * sizeof(NODE *), "assoc_list");
}
}
if (cmp_func == sort_user_func) {
code = POP_CODE();
currule = save_rule; /* restore current rule */
bcfree(code->nexti); /* Op_stop */
bcfree(code); /* Op_func_call */
}
return list;
}