/*
* symbol.c - routines for symbol table management and code allocation
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991-2015, 2017, 2018,
* the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "awk.h"
extern SRCFILE *srcfiles;
extern INSTRUCTION *rule_list;
#define HASHSIZE 1021
static int func_count; /* total number of functions */
static int var_count; /* total number of global variables and functions */
static NODE *symbol_list;
static void (*install_func)(NODE *) = NULL;
static NODE *make_symbol(const char *name, NODETYPE type);
static NODE *install(const char *name, NODE *parm, NODETYPE type);
static void free_bcpool(INSTRUCTION_POOL *pl);
static AWK_CONTEXT *curr_ctxt = NULL;
static int ctxt_level;
static NODE *global_table, *param_table;
NODE *symbol_table, *func_table;
/* Use a flag to avoid a strcmp() call inside install() */
static bool installing_specials = false;
/* init_symbol_table --- make sure the symbol tables are initialized */
void
init_symbol_table()
{
getnode(global_table);
memset(global_table, '\0', sizeof(NODE));
null_array(global_table);
getnode(param_table);
memset(param_table, '\0', sizeof(NODE));
null_array(param_table);
installing_specials = true;
func_table = install_symbol(estrdup("FUNCTAB", 7), Node_var_array);
symbol_table = install_symbol(estrdup("SYMTAB", 6), Node_var_array);
installing_specials = false;
}
/*
* install_symbol:
* Install a global name in the symbol table, even if it is already there.
* Caller must check against redefinition if that is desired.
*/
NODE *
install_symbol(const char *name, NODETYPE type)
{
return install(name, NULL, type);
}
/*
* lookup --- find the most recent global or param node for name
* installed by install_symbol
*/
NODE *
lookup(const char *name)
{
NODE *n;
NODE *tmp;
NODE *tables[5]; /* manual init below, for z/OS */
int i;
/* ``It's turtles, all the way down.'' */
tables[0] = param_table; /* parameters shadow everything */
tables[1] = global_table; /* SYMTAB and FUNCTAB found first, can't be redefined */
tables[2] = func_table; /* then functions */
tables[3] = symbol_table; /* then globals */
tables[4] = NULL;
tmp = make_string(name, strlen(name));
n = NULL;
for (i = 0; tables[i] != NULL; i++) {
if (tables[i]->table_size == 0)
continue;
if ((do_posix || do_traditional) && tables[i] == global_table)
continue;
n = in_array(tables[i], tmp);
if (n != NULL)
break;
}
unref(tmp);
if (n == NULL || n->type == Node_val) /* non-variable in SYMTAB */
return NULL;
return n; /* new place */
}
/* make_params --- allocate function parameters for the symbol table */
NODE *
make_params(char **pnames, int pcount)
{
NODE *p, *parms;
int i;
if (pcount <= 0 || pnames == NULL)
return NULL;
ezalloc(parms, NODE *, pcount * sizeof(NODE), "make_params");
for (i = 0, p = parms; i < pcount; i++, p++) {
p->type = Node_param_list;
p->param = pnames[i]; /* shadows pname and vname */
p->param_cnt = i;
}
return parms;
}
/* install_params --- install function parameters into the symbol table */
void
install_params(NODE *func)
{
int i, pcount;
NODE *parms;
if (func == NULL)
return;
assert(func->type == Node_func);
if ( (pcount = func->param_cnt) <= 0
|| (parms = func->fparms) == NULL)
return;
for (i = 0; i < pcount; i++)
(void) install(parms[i].param, parms + i, Node_param_list);
}
/*
* remove_params --- remove function parameters out of the symbol table.
*/
void
remove_params(NODE *func)
{
NODE *parms, *p;
int i, pcount;
if (func == NULL)
return;
assert(func->type == Node_func);
if ( (pcount = func->param_cnt) <= 0
|| (parms = func->fparms) == NULL)
return;
for (i = pcount - 1; i >= 0; i--) {
NODE *tmp;
NODE *tmp2;
p = parms + i;
assert(p->type == Node_param_list);
tmp = make_string(p->vname, strlen(p->vname));
tmp2 = in_array(param_table, tmp);
if (tmp2 != NULL && tmp2->dup_ent != NULL)
tmp2->dup_ent = tmp2->dup_ent->dup_ent;
else
(void) assoc_remove(param_table, tmp);
unref(tmp);
}
assoc_clear(param_table); /* shazzam! */
}
/* remove_symbol --- remove a symbol from the symbol table */
NODE *
remove_symbol(NODE *r)
{
NODE *n = in_array(symbol_table, r);
if (n == NULL)
return n;
n = dupnode(n);
(void) assoc_remove(symbol_table, r);
return n;
}
/*
* destroy_symbol --- remove a symbol from symbol table
* and free all associated memory.
*/
void
destroy_symbol(NODE *r)
{
r = remove_symbol(r);
if (r == NULL)
return;
switch (r->type) {
case Node_func:
if (r->param_cnt > 0) {
NODE *n;
int i;
int pcount = r->param_cnt;
/* function parameters of type Node_param_list */
for (i = 0; i < pcount; i++) {
n = r->fparms + i;
efree(n->param);
}
efree(r->fparms);
}
break;
case Node_ext_func:
bcfree(r->code_ptr);
break;
case Node_var_array:
assoc_clear(r);
break;
case Node_var:
unref(r->var_value);
break;
default:
/* Node_param_list -- YYABORT */
break; /* use break so that storage is freed */
}
efree(r->vname);
freenode(r);
}
/* make_symbol --- allocates a global symbol for the symbol table. */
static NODE *
make_symbol(const char *name, NODETYPE type)
{
NODE *r;
getnode(r);
memset(r, '\0', sizeof(NODE));
if (type == Node_var_array)
null_array(r);
else if (type == Node_var)
r->var_value = dupnode(Nnull_string);
r->vname = (char *) name;
r->type = type;
return r;
}
/* install --- install a global name or function parameter in the symbol table */
static NODE *
install(const char *name, NODE *parm, NODETYPE type)
{
NODE *r;
NODE **aptr;
NODE *table;
NODE *n_name;
NODE *prev;
n_name = make_string(name, strlen(name));
table = symbol_table;
if (type == Node_param_list) {
table = param_table;
} else if ( type == Node_func
|| type == Node_ext_func
|| type == Node_builtin_func) {
table = func_table;
} else if (installing_specials) {
table = global_table;
}
if (parm != NULL)
r = parm;
else {
/* global symbol */
r = make_symbol(name, type);
if (type == Node_func)
func_count++;
if (type != Node_ext_func && type != Node_builtin_func && table != global_table)
var_count++; /* total, includes Node_func */
}
if (type == Node_param_list) {
prev = in_array(table, n_name);
if (prev == NULL)
goto simple;
r->dup_ent = prev->dup_ent;
prev->dup_ent = r;
} else {
simple:
/* the simple case */
aptr = assoc_lookup(table, n_name);
unref(*aptr);
*aptr = r;
}
unref(n_name);
if (install_func)
(*install_func)(r);
return r;
}
/* comp_symbol --- compare two (variable or function) names */
static int
comp_symbol(const void *v1, const void *v2)
{
const NODE *const *npp1, *const *npp2;
const NODE *n1, *n2;
npp1 = (const NODE *const *) v1;
npp2 = (const NODE *const *) v2;
n1 = *npp1;
n2 = *npp2;
return strcmp(n1->vname, n2->vname);
}
typedef enum { FUNCTION = 1, VARIABLE } SYMBOL_TYPE;
/* get_symbols --- return a list of optionally sorted symbols */
static NODE **
get_symbols(SYMBOL_TYPE what, bool sort)
{
int i;
NODE **table;
NODE **list;
NODE *r;
long count = 0;
long max;
NODE *the_table;
/*
* assoc_list() returns an array with two elements per awk array
* element. Elements i and i+1 in the C array represent the key
* and value of element j in the awk array. Thus the loops use += 2
* to go through the awk array.
*/
if (what == FUNCTION) {
the_table = func_table;
max = the_table->table_size * 2;
list = assoc_list(the_table, "@unsorted", ASORTI);
emalloc(table, NODE **, (func_count + 1) * sizeof(NODE *), "get_symbols");
for (i = count = 0; i < max; i += 2) {
r = list[i+1];
if (r->type == Node_ext_func || r->type == Node_builtin_func)
continue;
assert(r->type == Node_func);
table[count++] = r;
}
} else { /* what == VARIABLE */
update_global_values();
the_table = symbol_table;
max = the_table->table_size * 2;
list = assoc_list(the_table, "@unsorted", ASORTI);
/* add three: one for FUNCTAB, one for SYMTAB, and one for a final NULL */
emalloc(table, NODE **, (var_count + 1 + 1 + 1) * sizeof(NODE *), "get_symbols");
for (i = count = 0; i < max; i += 2) {
r = list[i+1];
if (r->type == Node_val) /* non-variable in SYMTAB */
continue;
table[count++] = r;
}
table[count++] = func_table;
table[count++] = symbol_table;
}
efree(list);
if (sort && count > 1)
qsort(table, count, sizeof(NODE *), comp_symbol); /* Shazzam! */
table[count] = NULL; /* null terminate the list */
return table;
}
/* variable_list --- list of global variables */
NODE **
variable_list()
{
return get_symbols(VARIABLE, true);
}
/* function_list --- list of functions */
NODE **
function_list(bool sort)
{
return get_symbols(FUNCTION, sort);
}
/* print_vars --- print names and values of global variables */
void
print_vars(NODE **table, int (*print_func)(FILE *, const char *, ...), FILE *fp)
{
int i;
NODE *r;
assert(table != NULL);
for (i = 0; (r = table[i]) != NULL; i++) {
if (r->type == Node_func || r->type == Node_ext_func)
continue;
print_func(fp, "%s: ", r->vname);
if (r->type == Node_var_array)
print_func(fp, "array, %ld elements\n", assoc_length(r));
else if (r->type == Node_var_new)
print_func(fp, "untyped variable\n");
else if (r->type == Node_var)
valinfo(r->var_value, print_func, fp);
}
}
/* foreach_func --- execute given function for each awk function in table. */
int
foreach_func(NODE **table, int (*pfunc)(INSTRUCTION *, void *), void *data)
{
int i;
NODE *r;
int ret = 0;
assert(table != NULL);
for (i = 0; (r = table[i]) != NULL; i++) {
if ((ret = pfunc(r->code_ptr, data)) != 0)
break;
}
return ret;
}
/* release_all_vars --- free all variable memory */
void
release_all_vars()
{
assoc_clear(symbol_table);
assoc_clear(func_table);
assoc_clear(global_table);
}
/* append_symbol --- append symbol to the list of symbols
* installed in the symbol table.
*/
void
append_symbol(NODE *r)
{
NODE *p;
getnode(p);
p->lnode = r;
p->rnode = symbol_list->rnode;
symbol_list->rnode = p;
}
/* release_symbols --- free symbol list and optionally remove symbol from symbol table */
void
release_symbols(NODE *symlist, int keep_globals)
{
NODE *p, *next;
for (p = symlist->rnode; p != NULL; p = next) {
if (! keep_globals) {
/*
* destroys globals, function, and params
* if still in symbol table
*/
destroy_symbol(p->lnode);
}
next = p->rnode;
freenode(p);
}
symlist->rnode = NULL;
}
/* load_symbols --- fill in symbols' information */
void
load_symbols()
{
NODE *r;
NODE *tmp;
NODE *sym_array;
NODE **aptr;
long i, j, max;
NODE *user, *extension, *untyped, *scalar, *array, *built_in;
NODE **list;
NODE *tables[4];
if (PROCINFO_node == NULL)
return;
tables[0] = func_table;
tables[1] = symbol_table;
tables[2] = global_table;
tables[3] = NULL;
tmp = make_string("identifiers", 11);
aptr = assoc_lookup(PROCINFO_node, tmp);
getnode(sym_array);
memset(sym_array, '\0', sizeof(NODE)); /* PPC Mac OS X wants this */
null_array(sym_array);
unref(tmp);
unref(*aptr);
*aptr = sym_array;
sym_array->parent_array = PROCINFO_node;
sym_array->vname = estrdup("identifiers", 11);
user = make_string("user", 4);
extension = make_string("extension", 9);
scalar = make_string("scalar", 6);
untyped = make_string("untyped", 7);
array = make_string("array", 5);
built_in = make_string("builtin", 7);
for (i = 0; tables[i] != NULL; i++) {
list = assoc_list(tables[i], "@unsorted", ASORTI);
max = tables[i]->table_size * 2;
if (max == 0)
continue;
for (j = 0; j < max; j += 2) {
r = list[j+1];
if ( r->type == Node_ext_func
|| r->type == Node_func
|| r->type == Node_builtin_func
|| r->type == Node_var
|| r->type == Node_var_array
|| r->type == Node_var_new) {
tmp = make_string(r->vname, strlen(r->vname));
aptr = assoc_lookup(sym_array, tmp);
unref(tmp);
unref(*aptr);
switch (r->type) {
case Node_ext_func:
*aptr = dupnode(extension);
break;
case Node_func:
*aptr = dupnode(user);
break;
case Node_builtin_func:
*aptr = dupnode(built_in);
break;
case Node_var:
*aptr = dupnode(scalar);
break;
case Node_var_array:
*aptr = dupnode(array);
break;
case Node_var_new:
*aptr = dupnode(untyped);
break;
default:
cant_happen();
break;
}
}
}
efree(list);
}
unref(user);
unref(extension);
unref(scalar);
unref(untyped);
unref(array);
}
/* check_param_names --- make sure no parameter is the name of a function */
bool
check_param_names(void)
{
int i, j;
NODE **list;
NODE *f;
long max;
bool result = true;
NODE n;
if (func_table->table_size == 0)
return result;
max = func_table->table_size * 2;
memset(& n, 0, sizeof n);
n.type = Node_val;
n.flags = STRING|STRCUR;
n.stfmt = STFMT_UNUSED;
#ifdef HAVE_MPFR
n.strndmode = MPFR_round_mode;
#endif
/*
* assoc_list() returns an array with two elements per awk array
* element. Elements i and i+1 in the C array represent the key
* and value of element j in the awk array. Thus the loops use += 2
* to go through the awk array.
*
* In this case, the name is in list[i], and the function is
* in list[i+1]. Just what we need.
*/
list = assoc_list(func_table, "@unsorted", ASORTI);
for (i = 0; i < max; i += 2) {
f = list[i+1];
if (f->type == Node_builtin_func || f->param_cnt == 0)
continue;
/* loop over each param in function i */
for (j = 0; j < f->param_cnt; j++) {
/* compare to function names */
/* use a fake node to avoid malloc/free of make_string */
n.stptr = f->fparms[j].param;
n.stlen = strlen(f->fparms[j].param);
if (in_array(func_table, & n)) {
error(
_("function `%s': can't use function `%s' as a parameter name"),
list[i]->stptr,
f->fparms[j].param);
result = false;
}
}
}
efree(list);
return result;
}
static INSTRUCTION_POOL *pools;
/*
* For best performance, the INSTR_CHUNK value should be divisible by all
* possible sizes, i.e. 1 through MAX_INSTRUCTION_ALLOC. Otherwise, there
* will be wasted space at the end of the block.
*/
#define INSTR_CHUNK (2*3*21)
struct instruction_block {
struct instruction_block *next;
INSTRUCTION i[INSTR_CHUNK];
};
/* bcfree --- deallocate instruction */
void
bcfree(INSTRUCTION *cp)
{
assert(cp->pool_size >= 1 && cp->pool_size <= MAX_INSTRUCTION_ALLOC);
cp->opcode = Op_illegal;
cp->nexti = pools->pool[cp->pool_size - 1].free_list;
pools->pool[cp->pool_size - 1].free_list = cp;
}
/* bcalloc --- allocate a new instruction */
INSTRUCTION *
bcalloc(OPCODE op, int size, int srcline)
{
INSTRUCTION *cp;
struct instruction_mem_pool *pool;
assert(size >= 1 && size <= MAX_INSTRUCTION_ALLOC);
pool = &pools->pool[size - 1];
if (pool->free_list != NULL) {
cp = pool->free_list;
pool->free_list = cp->nexti;
} else if (pool->free_space && pool->free_space + size <= & pool->block_list->i[INSTR_CHUNK]) {
cp = pool->free_space;
pool->free_space += size;
} else {
struct instruction_block *block;
emalloc(block, struct instruction_block *, sizeof(struct instruction_block), "bcalloc");
block->next = pool->block_list;
pool->block_list = block;
cp = &block->i[0];
pool->free_space = &block->i[size];
}
memset(cp, 0, size * sizeof(INSTRUCTION));
cp->pool_size = size;
cp->opcode = op;
cp->source_line = srcline;
return cp;
}
/* new_context --- create a new execution context. */
AWK_CONTEXT *
new_context()
{
AWK_CONTEXT *ctxt;
ezalloc(ctxt, AWK_CONTEXT *, sizeof(AWK_CONTEXT), "new_context");
ctxt->srcfiles.next = ctxt->srcfiles.prev = & ctxt->srcfiles;
ctxt->rule_list.opcode = Op_list;
ctxt->rule_list.lasti = & ctxt->rule_list;
return ctxt;
}
/* set_context --- change current execution context. */
static void
set_context(AWK_CONTEXT *ctxt)
{
pools = & ctxt->pools;
symbol_list = & ctxt->symbols;
srcfiles = & ctxt->srcfiles;
rule_list = & ctxt->rule_list;
install_func = ctxt->install_func;
curr_ctxt = ctxt;
}
/*
* push_context:
*
* Switch to the given context after saving the current one. The set
* of active execution contexts forms a stack; the global or main context
* is at the bottom of the stack.
*/
void
push_context(AWK_CONTEXT *ctxt)
{
ctxt->prev = curr_ctxt;
/* save current source and sourceline */
if (curr_ctxt != NULL) {
curr_ctxt->sourceline = sourceline;
curr_ctxt->source = source;
}
sourceline = 0;
source = NULL;
set_context(ctxt);
ctxt_level++;
}
/* pop_context --- switch to previous execution context. */
void
pop_context()
{
AWK_CONTEXT *ctxt;
assert(curr_ctxt != NULL);
if (curr_ctxt->prev == NULL)
fatal(_("can not pop main context"));
ctxt = curr_ctxt->prev;
/* restore source and sourceline */
sourceline = ctxt->sourceline;
source = ctxt->source;
set_context(ctxt);
ctxt_level--;
}
/* in_main_context --- are we in the main context ? */
int
in_main_context()
{
assert(ctxt_level > 0);
return (ctxt_level == 1);
}
/* free_context --- free context structure and related data. */
void
free_context(AWK_CONTEXT *ctxt, bool keep_globals)
{
SRCFILE *s, *sn;
if (ctxt == NULL)
return;
assert(curr_ctxt != ctxt);
/* free all code including function codes */
free_bcpool(& ctxt->pools);
/* free symbols */
release_symbols(& ctxt->symbols, keep_globals);
/* free srcfiles */
for (s = & ctxt->srcfiles; s != & ctxt->srcfiles; s = sn) {
sn = s->next;
if (s->stype != SRC_CMDLINE && s->stype != SRC_STDIN)
efree(s->fullpath);
efree(s->src);
efree(s);
}
efree(ctxt);
}
/* free_bc_internal --- free internal memory of an instruction. */
static void
free_bc_internal(INSTRUCTION *cp)
{
NODE *m;
switch(cp->opcode) {
case Op_func_call:
if (cp->func_name != NULL)
efree(cp->func_name);
break;
case Op_push_re:
case Op_match_rec:
case Op_match:
case Op_nomatch:
m = cp->memory;
if (m->re_reg[0] != NULL)
refree(m->re_reg[0]);
if (m->re_reg[1] != NULL)
refree(m->re_reg[1]);
if (m->re_exp != NULL)
unref(m->re_exp);
if (m->re_text != NULL)
unref(m->re_text);
freenode(m);
break;
case Op_token:
/* token lost during error recovery in yyparse */
if (cp->lextok != NULL)
efree(cp->lextok);
break;
case Op_push_i:
m = cp->memory;
unref(m);
break;
case Op_store_var:
m = cp->initval;
if (m != NULL)
unref(m);
break;
case Op_illegal:
cant_happen();
default:
break;
}
}
/* free_bc_mempool --- free a single pool */
static void
free_bc_mempool(struct instruction_mem_pool *pool, int size)
{
bool first = true;
struct instruction_block *block, *next;
for (block = pool->block_list; block; block = next) {
INSTRUCTION *cp, *end;
end = (first ? pool->free_space : & block->i[INSTR_CHUNK]);
for (cp = & block->i[0]; cp + size <= end; cp += size) {
if (cp->opcode != Op_illegal)
free_bc_internal(cp);
}
next = block->next;
efree(block);
first = false;
}
}
/* free_bcpool --- free list of instruction memory pools */
static void
free_bcpool(INSTRUCTION_POOL *pl)
{
int i;
for (i = 0; i < MAX_INSTRUCTION_ALLOC; i++)
free_bc_mempool(& pl->pool[i], i + 1);
}