/* * awkgram.y --- yacc/bison parser */ /* * Copyright (C) 1986, 1988, 1989, 1991-2018 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. * * GAWK is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * GAWK is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ %{ #ifdef GAWKDEBUG #define YYDEBUG 12 #endif #include "awk.h" #if defined(__STDC__) && __STDC__ < 1 /* VMS weirdness, maybe elsewhere */ #define signed /**/ #endif static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1; static void error_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2; static void lintwarn_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2; static void warning_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2; static char *get_src_buf(void); static int yylex(void); int yyparse(void); static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op); static char **check_params(char *fname, int pcount, INSTRUCTION *list); static int install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist); static NODE *mk_rexp(INSTRUCTION *exp); static void param_sanity(INSTRUCTION *arglist); static int parms_shadow(INSTRUCTION *pc, bool *shadow); #ifndef NO_LINT static int isnoeffect(OPCODE type); #endif static INSTRUCTION *make_assignable(INSTRUCTION *ip); static void dumpintlstr(const char *str, size_t len); static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2); static int include_source(INSTRUCTION *file); static int load_library(INSTRUCTION *file); static void next_sourcefile(void); static char *tokexpand(void); static NODE *set_profile_text(NODE *n, const char *str, size_t len); #define instruction(t) bcalloc(t, 1, 0) static INSTRUCTION *mk_program(void); static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action); static INSTRUCTION *mk_function(INSTRUCTION *fi, INSTRUCTION *def); static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch, INSTRUCTION *elsep, INSTRUCTION *false_branch); static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1); static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond, INSTRUCTION *incr, INSTRUCTION *body); static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target); static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op); static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op); static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op); static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype); static int count_expressions(INSTRUCTION **list, bool isarg); static INSTRUCTION *optimize_assignment(INSTRUCTION *exp); static void add_lint(INSTRUCTION *list, LINTTYPE linttype); enum defref { FUNC_DEFINE, FUNC_USE, FUNC_EXT }; static void func_use(const char *name, enum defref how); static void check_funcs(void); static ssize_t read_one_line(int fd, void *buffer, size_t count); static int one_line_close(int fd); static void split_comment(void); static void check_comment(void); static void add_sign_to_num(NODE *n, char sign); static bool at_seen = false; static bool want_source = false; static bool want_regexp = false; /* lexical scanning kludge */ static enum { FUNC_HEADER, FUNC_BODY, DONT_CHECK } want_param_names = DONT_CHECK; /* ditto */ static char *in_function; /* parsing kludge */ static int rule = 0; const char *const ruletab[] = { "?", "BEGIN", "Rule", "END", "BEGINFILE", "ENDFILE", }; static bool in_print = false; /* lexical scanning kludge for print */ static int in_parens = 0; /* lexical scanning kludge for print */ static int sub_counter = 0; /* array dimension counter for use in delete */ static char *lexptr; /* pointer to next char during parsing */ static char *lexend; /* end of buffer */ static char *lexptr_begin; /* keep track of where we were for error msgs */ static char *lexeme; /* beginning of lexeme for debugging */ static bool lexeof; /* seen EOF for current source? */ static char *thisline = NULL; static int in_braces = 0; /* count braces for firstline, lastline in an 'action' */ static int lastline = 0; static int firstline = 0; static SRCFILE *sourcefile = NULL; /* current program source */ static int lasttok = 0; static bool eof_warned = false; /* GLOBAL: want warning for each file */ static int break_allowed; /* kludge for break */ static int continue_allowed; /* kludge for continue */ #define END_FILE -1000 #define END_SRC -2000 #define YYDEBUG_LEXER_TEXT (lexeme) static char *tokstart = NULL; static char *tok = NULL; static char *tokend; static int errcount = 0; extern char *source; extern int sourceline; extern SRCFILE *srcfiles; extern INSTRUCTION *rule_list; extern int max_args; extern NODE **args_array; static INSTRUCTION *rule_block[sizeof(ruletab)]; static INSTRUCTION *ip_rec; static INSTRUCTION *ip_newfile; static INSTRUCTION *ip_atexit = NULL; static INSTRUCTION *ip_end; static INSTRUCTION *ip_endfile; static INSTRUCTION *ip_beginfile; INSTRUCTION *main_beginfile; static INSTRUCTION *comment = NULL; static INSTRUCTION *prior_comment = NULL; static INSTRUCTION *comment_to_save = NULL; static INSTRUCTION *program_comment = NULL; static INSTRUCTION *function_comment = NULL; static INSTRUCTION *block_comment = NULL; static bool func_first = true; static bool first_rule = true; static inline INSTRUCTION *list_create(INSTRUCTION *x); static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x); static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x); static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2); static inline INSTRUCTION *add_pending_comment(INSTRUCTION *stmt); extern double fmod(double x, double y); #define YYSTYPE INSTRUCTION * %} %token FUNC_CALL NAME REGEXP FILENAME %token YNUMBER YSTRING TYPED_REGEXP %token RELOP IO_OUT IO_IN %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP %token SUBSCRIPT %token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE %token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE %token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION %token LEX_BEGINFILE LEX_ENDFILE %token LEX_GETLINE LEX_NEXTFILE %token LEX_IN %token LEX_AND LEX_OR INCREMENT DECREMENT %token LEX_BUILTIN LEX_LENGTH %token LEX_EOF %token LEX_INCLUDE LEX_EVAL LEX_LOAD %token NEWLINE /* Lowest to highest */ %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL %right '?' ':' %left LEX_OR %left LEX_AND %left LEX_GETLINE %nonassoc LEX_IN %left FUNC_CALL LEX_BUILTIN LEX_LENGTH %nonassoc ',' %left MATCHOP %nonassoc RELOP '<' '>' IO_IN IO_OUT %left CONCAT_OP %left YSTRING YNUMBER TYPED_REGEXP %left '+' '-' %left '*' '/' '%' %right '!' UNARY %right '^' %left INCREMENT DECREMENT %left '$' %left '(' ')' %% program : /* empty */ | program rule { rule = 0; yyerrok; } | program nls | program LEX_EOF { next_sourcefile(); } | program error { rule = 0; /* * If errors, give up, don't produce an infinite * stream of syntax error messages. */ /* yyerrok; */ } ; rule : pattern action { (void) append_rule($1, $2); first_rule = false; } | pattern statement_term { if (rule != Rule) { msg(_("%s blocks must have an action part"), ruletab[rule]); errcount++; } else if ($1 == NULL) { msg(_("each rule must have a pattern or an action part")); errcount++; } else /* pattern rule with non-empty pattern */ (void) append_rule($1, NULL); } | function_prologue action { in_function = NULL; (void) mk_function($1, $2); want_param_names = DONT_CHECK; yyerrok; } | '@' LEX_INCLUDE source statement_term { want_source = false; at_seen = false; yyerrok; } | '@' LEX_LOAD library statement_term { want_source = false; at_seen = false; yyerrok; } ; source : FILENAME { if (include_source($1) < 0) YYABORT; efree($1->lextok); bcfree($1); $$ = NULL; } | FILENAME error { $$ = NULL; } | error { $$ = NULL; } ; library : FILENAME { if (load_library($1) < 0) YYABORT; efree($1->lextok); bcfree($1); $$ = NULL; } | FILENAME error { $$ = NULL; } | error { $$ = NULL; } ; pattern : /* empty */ { rule = Rule; if (comment != NULL) { $$ = list_create(comment); comment = NULL; } else $$ = NULL; } | exp { rule = Rule; if (comment != NULL) { $$ = list_prepend($1, comment); comment = NULL; } else $$ = $1; } | exp ',' opt_nls exp { INSTRUCTION *tp; add_lint($1, LINT_assign_in_cond); add_lint($4, LINT_assign_in_cond); tp = instruction(Op_no_op); list_prepend($1, bcalloc(Op_line_range, !!do_pretty_print + 1, 0)); $1->nexti->triggered = false; $1->nexti->target_jmp = $4->nexti; list_append($1, instruction(Op_cond_pair)); $1->lasti->line_range = $1->nexti; $1->lasti->target_jmp = tp; list_append($4, instruction(Op_cond_pair)); $4->lasti->line_range = $1->nexti; $4->lasti->target_jmp = tp; if (do_pretty_print) { ($1->nexti + 1)->condpair_left = $1->lasti; ($1->nexti + 1)->condpair_right = $4->lasti; } if (comment != NULL) { $$ = list_append(list_merge(list_prepend($1, comment), $4), tp); comment = NULL; } else $$ = list_append(list_merge($1, $4), tp); rule = Rule; } | LEX_BEGIN { static int begin_seen = 0; func_first = false; if (do_lint_old && ++begin_seen == 2) warning_ln($1->source_line, _("old awk does not support multiple `BEGIN' or `END' rules")); $1->in_rule = rule = BEGIN; $1->source_file = source; check_comment(); $$ = $1; } | LEX_END { static int end_seen = 0; func_first = false; if (do_lint_old && ++end_seen == 2) warning_ln($1->source_line, _("old awk does not support multiple `BEGIN' or `END' rules")); $1->in_rule = rule = END; $1->source_file = source; check_comment(); $$ = $1; } | LEX_BEGINFILE { func_first = false; $1->in_rule = rule = BEGINFILE; $1->source_file = source; check_comment(); $$ = $1; } | LEX_ENDFILE { func_first = false; $1->in_rule = rule = ENDFILE; $1->source_file = source; check_comment(); $$ = $1; } ; action : l_brace statements r_brace opt_semi opt_nls { INSTRUCTION *ip; if ($2 == NULL) ip = list_create(instruction(Op_no_op)); else ip = $2; $$ = ip; } ; func_name : NAME { $$ = $1; } | FUNC_CALL { $$ = $1; } | lex_builtin { yyerror(_("`%s' is a built-in function, it cannot be redefined"), tokstart); YYABORT; } | '@' LEX_EVAL { $$ = $2; at_seen = false; } ; lex_builtin : LEX_BUILTIN | LEX_LENGTH ; function_prologue : LEX_FUNCTION func_name '(' { want_param_names = FUNC_HEADER; } opt_param_list r_paren opt_nls { /* * treat any comments between BOF and the first function * definition (with no intervening BEGIN etc block) as * program comments. Special kludge: iff there are more * than one such comments, treat the last as a function * comment. */ if (prior_comment != NULL) { comment_to_save = prior_comment; prior_comment = NULL; } else if (comment != NULL) { comment_to_save = comment; comment = NULL; } else comment_to_save = NULL; if (comment_to_save != NULL && func_first && strstr(comment_to_save->memory->stptr, "\n\n") != NULL) split_comment(); /* save any other pre-function comment as function comment */ if (comment_to_save != NULL) { function_comment = comment_to_save; comment_to_save = NULL; } func_first = false; $1->source_file = source; if (install_function($2->lextok, $1, $5) < 0) YYABORT; in_function = $2->lextok; $2->lextok = NULL; bcfree($2); /* $5 already free'd in install_function */ $$ = $1; want_param_names = FUNC_BODY; } ; regexp /* * In this rule, want_regexp tells yylex that the next thing * is a regexp so it should read up to the closing slash. */ : a_slash { want_regexp = true; } REGEXP /* The terminating '/' is consumed by yylex(). */ { NODE *n, *exp; char *re; size_t len; re = $3->lextok; $3->lextok = NULL; len = strlen(re); if (do_lint) { if (len == 0) lintwarn_ln($3->source_line, _("regexp constant `//' looks like a C++ comment, but is not")); else if (re[0] == '*' && re[len-1] == '*') /* possible C comment */ lintwarn_ln($3->source_line, _("regexp constant `/%s/' looks like a C comment, but is not"), re); } exp = make_str_node(re, len, ALREADY_MALLOCED); n = make_regnode(Node_regex, exp); if (n == NULL) { unref(exp); YYABORT; } $$ = $3; $$->opcode = Op_match_rec; $$->memory = n; } ; typed_regexp : TYPED_REGEXP { char *re; size_t len; re = $1->lextok; $1->lextok = NULL; len = strlen(re); $$ = $1; $$->opcode = Op_push_re; $$->memory = make_typed_regex(re, len); } a_slash : '/' { bcfree($1); } | SLASH_BEFORE_EQUAL ; statements : /* empty */ { if (prior_comment != NULL) { $$ = list_create(prior_comment); prior_comment = NULL; } else if (comment != NULL) { $$ = list_create(comment); comment = NULL; } else $$ = NULL; } | statements statement { if ($2 == NULL) { if (prior_comment != NULL) { $$ = list_append($1, prior_comment); prior_comment = NULL; if (comment != NULL) { $$ = list_append($$, comment); comment = NULL; } } else if (comment != NULL) { $$ = list_append($1, comment); comment = NULL; } else $$ = $1; } else { add_lint($2, LINT_no_effect); if ($1 == NULL) { if (prior_comment != NULL) { $$ = list_append($2, prior_comment); prior_comment = NULL; if (comment != NULL) { $$ = list_append($$, comment); comment = NULL; } } else if (comment != NULL) { $$ = list_append($2, comment); comment = NULL; } else $$ = $2; } else { if (prior_comment != NULL) { list_append($2, prior_comment); prior_comment = NULL; if (comment != NULL) { list_append($2, comment); comment = NULL; } } else if (comment != NULL) { list_append($2, comment); comment = NULL; } $$ = list_merge($1, $2); } } yyerrok; } | statements error { $$ = NULL; } ; statement_term : nls | semi opt_nls ; statement : semi opt_nls { $$ = NULL; } | l_brace statements r_brace { $$ = $2; } | if_statement { if (do_pretty_print) $$ = list_prepend($1, instruction(Op_exec_count)); else $$ = $1; } | LEX_SWITCH '(' exp r_paren opt_nls l_brace case_statements opt_nls r_brace { INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt; INSTRUCTION *ip, *nextc, *tbreak; const char **case_values = NULL; int maxcount = 128; int case_count = 0; int i; tbreak = instruction(Op_no_op); cstmt = list_create(tbreak); cexp = list_create(instruction(Op_pop)); dflt = instruction(Op_jmp); dflt->target_jmp = tbreak; /* if no case match and no explicit default */ if ($7 != NULL) { curr = $7->nexti; bcfree($7); /* Op_list */ } /* else curr = NULL; */ for (; curr != NULL; curr = nextc) { INSTRUCTION *caseexp = curr->case_exp; INSTRUCTION *casestmt = curr->case_stmt; nextc = curr->nexti; if (curr->opcode == Op_K_case) { if (caseexp->opcode == Op_push_i) { /* a constant scalar */ char *caseval; caseval = force_string(caseexp->memory)->stptr; for (i = 0; i < case_count; i++) { if (strcmp(caseval, case_values[i]) == 0) error_ln(curr->source_line, _("duplicate case values in switch body: %s"), caseval); } if (case_values == NULL) emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement"); else if (case_count >= maxcount) { maxcount += 128; erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement"); } case_values[case_count++] = caseval; } else { /* match a constant regex against switch expression. */ (curr + 1)->match_exp = true; } curr->stmt_start = casestmt->nexti; curr->stmt_end = casestmt->lasti; (void) list_prepend(cexp, curr); (void) list_prepend(cexp, caseexp); } else { if (dflt->target_jmp != tbreak) error_ln(curr->source_line, _("duplicate `default' detected in switch body")); else dflt->target_jmp = casestmt->nexti; if (do_pretty_print) { curr->stmt_start = casestmt->nexti; curr->stmt_end = casestmt->lasti; (void) list_prepend(cexp, curr); } else bcfree(curr); } cstmt = list_merge(casestmt, cstmt); } if (case_values != NULL) efree(case_values); ip = $3; if (do_pretty_print) { (void) list_prepend(ip, $1); (void) list_prepend(ip, instruction(Op_exec_count)); $1->target_break = tbreak; ($1 + 1)->switch_start = cexp->nexti; ($1 + 1)->switch_end = cexp->lasti; }/* else $1 is NULL */ (void) list_append(cexp, dflt); (void) list_merge(ip, cexp); $$ = list_merge(ip, cstmt); break_allowed--; fix_break_continue(ip, tbreak, NULL); } | LEX_WHILE '(' exp r_paren opt_nls statement { /* * ----------------- * tc: * cond * ----------------- * [Op_jmp_false tb ] * ----------------- * body * ----------------- * [Op_jmp tc ] * tb:[Op_no_op ] */ INSTRUCTION *ip, *tbreak, *tcont; tbreak = instruction(Op_no_op); add_lint($3, LINT_assign_in_cond); tcont = $3->nexti; ip = list_append($3, instruction(Op_jmp_false)); ip->lasti->target_jmp = tbreak; if (do_pretty_print) { (void) list_append(ip, instruction(Op_exec_count)); $1->target_break = tbreak; $1->target_continue = tcont; ($1 + 1)->while_body = ip->lasti; (void) list_prepend(ip, $1); }/* else $1 is NULL */ if ($6 != NULL) (void) list_merge(ip, $6); (void) list_append(ip, instruction(Op_jmp)); ip->lasti->target_jmp = tcont; $$ = list_append(ip, tbreak); break_allowed--; continue_allowed--; fix_break_continue(ip, tbreak, tcont); } | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls { /* * ----------------- * z: * body * ----------------- * tc: * cond * ----------------- * [Op_jmp_true | z ] * tb:[Op_no_op ] */ INSTRUCTION *ip, *tbreak, *tcont; tbreak = instruction(Op_no_op); tcont = $6->nexti; add_lint($6, LINT_assign_in_cond); if ($3 != NULL) ip = list_merge($3, $6); else ip = list_prepend($6, instruction(Op_no_op)); if (do_pretty_print) (void) list_prepend(ip, instruction(Op_exec_count)); (void) list_append(ip, instruction(Op_jmp_true)); ip->lasti->target_jmp = ip->nexti; $$ = list_append(ip, tbreak); break_allowed--; continue_allowed--; fix_break_continue(ip, tbreak, tcont); if (do_pretty_print) { $1->target_break = tbreak; $1->target_continue = tcont; ($1 + 1)->doloop_cond = tcont; $$ = list_prepend(ip, $1); bcfree($4); } /* else $1 and $4 are NULLs */ } | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement { INSTRUCTION *ip; char *var_name = $3->lextok; if ($8 != NULL && $8->lasti->opcode == Op_K_delete && $8->lasti->expr_count == 1 && $8->nexti->opcode == Op_push && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update)) && strcmp($8->nexti->memory->vname, var_name) == 0 ) { /* Efficiency hack. Recognize the special case of * * for (iggy in foo) * delete foo[iggy] * * and treat it as if it were * * delete foo * * Check that the body is a `delete a[i]' statement, * and that both the loop var and array names match. */ NODE *arr = NULL; ip = $8->nexti->nexti; if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti) arr = $5->nexti->memory; if (arr != NULL && ip->opcode == Op_no_op && ip->nexti->opcode == Op_push_array && strcmp(ip->nexti->memory->vname, arr->vname) == 0 && ip->nexti->nexti == $8->lasti ) { (void) make_assignable($8->nexti); $8->lasti->opcode = Op_K_delete_loop; $8->lasti->expr_count = 0; if ($1 != NULL) bcfree($1); efree(var_name); bcfree($3); bcfree($4); bcfree($5); $$ = $8; } else goto regular_loop; } else { INSTRUCTION *tbreak, *tcont; /* [ Op_push_array a ] * [ Op_arrayfor_init | ib ] * ic:[ Op_arrayfor_incr | ib ] * [ Op_var_assign if any ] * * body * * [Op_jmp | ic ] * ib:[Op_arrayfor_final ] */ regular_loop: ip = $5; ip->nexti->opcode = Op_push_array; tbreak = instruction(Op_arrayfor_final); $4->opcode = Op_arrayfor_incr; $4->array_var = variable($3->source_line, var_name, Node_var); $4->target_jmp = tbreak; tcont = $4; $3->opcode = Op_arrayfor_init; $3->target_jmp = tbreak; (void) list_append(ip, $3); if (do_pretty_print) { $1->opcode = Op_K_arrayfor; $1->target_continue = tcont; $1->target_break = tbreak; (void) list_append(ip, $1); } /* else $1 is NULL */ /* add update_FOO instruction if necessary */ if ($4->array_var->type == Node_var && $4->array_var->var_update) { (void) list_append(ip, instruction(Op_var_update)); ip->lasti->update_var = $4->array_var->var_update; } (void) list_append(ip, $4); /* add set_FOO instruction if necessary */ if ($4->array_var->type == Node_var && $4->array_var->var_assign) { (void) list_append(ip, instruction(Op_var_assign)); ip->lasti->assign_var = $4->array_var->var_assign; } if (do_pretty_print) { (void) list_append(ip, instruction(Op_exec_count)); ($1 + 1)->forloop_cond = $4; ($1 + 1)->forloop_body = ip->lasti; } if ($8 != NULL) (void) list_merge(ip, $8); (void) list_append(ip, instruction(Op_jmp)); ip->lasti->target_jmp = $4; $$ = list_append(ip, tbreak); fix_break_continue(ip, tbreak, tcont); } break_allowed--; continue_allowed--; } | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement { $$ = mk_for_loop($1, $3, $6, $9, $12); break_allowed--; continue_allowed--; } | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement { $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11); break_allowed--; continue_allowed--; } | non_compound_stmt { if (do_pretty_print) $$ = list_prepend($1, instruction(Op_exec_count)); else $$ = $1; $$ = add_pending_comment($$); } ; non_compound_stmt : LEX_BREAK statement_term { if (! break_allowed) error_ln($1->source_line, _("`break' is not allowed outside a loop or switch")); $1->target_jmp = NULL; $$ = list_create($1); $$ = add_pending_comment($$); } | LEX_CONTINUE statement_term { if (! continue_allowed) error_ln($1->source_line, _("`continue' is not allowed outside a loop")); $1->target_jmp = NULL; $$ = list_create($1); $$ = add_pending_comment($$); } | LEX_NEXT statement_term { /* if inside function (rule = 0), resolve context at run-time */ if (rule && rule != Rule) error_ln($1->source_line, _("`next' used in %s action"), ruletab[rule]); $1->target_jmp = ip_rec; $$ = list_create($1); $$ = add_pending_comment($$); } | LEX_NEXTFILE statement_term { /* if inside function (rule = 0), resolve context at run-time */ if (rule == BEGIN || rule == END || rule == ENDFILE) error_ln($1->source_line, _("`nextfile' used in %s action"), ruletab[rule]); $1->target_newfile = ip_newfile; $1->target_endfile = ip_endfile; $$ = list_create($1); $$ = add_pending_comment($$); } | LEX_EXIT opt_exp statement_term { /* Initialize the two possible jump targets, the actual target * is resolved at run-time. */ $1->target_end = ip_end; /* first instruction in end_block */ $1->target_atexit = ip_atexit; /* cleanup and go home */ if ($2 == NULL) { $$ = list_create($1); (void) list_prepend($$, instruction(Op_push_i)); $$->nexti->memory = dupnode(Nnull_string); } else $$ = list_append($2, $1); $$ = add_pending_comment($$); } | LEX_RETURN { if (! in_function) yyerror(_("`return' used outside function context")); } opt_exp statement_term { if ($3 == NULL) { $$ = list_create($1); (void) list_prepend($$, instruction(Op_push_i)); $$->nexti->memory = dupnode(Nnull_string); } else { if (do_optimize && $3->lasti->opcode == Op_func_call && strcmp($3->lasti->func_name, in_function) == 0 ) { /* Do tail recursion optimization. Tail * call without a return value is recognized * in mk_function(). */ ($3->lasti + 1)->tail_call = true; } $$ = list_append($3, $1); } $$ = add_pending_comment($$); } | simple_stmt statement_term ; /* * A simple_stmt exists to satisfy a constraint in the POSIX * grammar allowing them to occur as the 1st and 3rd parts * in a `for (...;...;...)' loop. This is a historical oddity * inherited from Unix awk, not at all documented in the AK&W * awk book. We support it, as this was reported as a bug. * We don't bother to document it though. So there. */ simple_stmt : print { in_print = true; in_parens = 0; } print_expression_list output_redir { /* * Optimization: plain `print' has no expression list, so $3 is null. * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec, * which is faster for these two cases. */ if ($1->opcode == Op_K_print && ($3 == NULL || ($3->lasti->opcode == Op_field_spec && $3->nexti->nexti->nexti == $3->lasti && $3->nexti->nexti->opcode == Op_push_i && $3->nexti->nexti->memory->type == Node_val) ) ) { static bool warned = false; /* ----------------- * output_redir * [ redirect exp ] * ----------------- * expression_list * ------------------ * [Op_K_print_rec | NULL | redir_type | expr_count] */ if ($3 != NULL) { NODE *n = $3->nexti->nexti->memory; if (! iszero(n)) goto regular_print; bcfree($3->lasti); /* Op_field_spec */ unref(n); /* Node_val */ bcfree($3->nexti->nexti); /* Op_push_i */ bcfree($3->nexti); /* Op_list */ bcfree($3); /* Op_list */ } else { if (do_lint && (rule == BEGIN || rule == END) && ! warned) { warned = true; lintwarn_ln($1->source_line, _("plain `print' in BEGIN or END rule should probably be `print \"\"'")); } } $1->expr_count = 0; $1->opcode = Op_K_print_rec; if ($4 == NULL) { /* no redircetion */ $1->redir_type = redirect_none; $$ = list_create($1); } else { INSTRUCTION *ip; ip = $4->nexti; $1->redir_type = ip->redir_type; $4->nexti = ip->nexti; bcfree(ip); $$ = list_append($4, $1); } } else { /* ----------------- * [ output_redir ] * [ redirect exp ] * ----------------- * [ expression_list ] * ------------------ * [$1 | NULL | redir_type | expr_count] * */ regular_print: if ($4 == NULL) { /* no redirection */ if ($3 == NULL) { /* printf without arg */ $1->expr_count = 0; $1->redir_type = redirect_none; $$ = list_create($1); } else { INSTRUCTION *t = $3; $1->expr_count = count_expressions(&t, false); $1->redir_type = redirect_none; $$ = list_append(t, $1); } } else { INSTRUCTION *ip; ip = $4->nexti; $1->redir_type = ip->redir_type; $4->nexti = ip->nexti; bcfree(ip); if ($3 == NULL) { $1->expr_count = 0; $$ = list_append($4, $1); } else { INSTRUCTION *t = $3; $1->expr_count = count_expressions(&t, false); $$ = list_append(list_merge($4, t), $1); } } } $$ = add_pending_comment($$); } | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list { char *arr = $2->lextok; $2->opcode = Op_push_array; $2->memory = variable($2->source_line, arr, Node_var_new); if (! do_posix && ! do_traditional) { if ($2->memory == symbol_table) fatal(_("`delete' is not allowed with SYMTAB")); else if ($2->memory == func_table) fatal(_("`delete' is not allowed with FUNCTAB")); } if ($4 == NULL) { /* * As of September 2012, POSIX has added support * for `delete array'. See: * http://austingroupbugs.net/view.php?id=544 * * Thanks to Nathan Weeks for the initiative. * * Thus we no longer warn or check do_posix. * Also, since BWK awk supports it, we don't have to * check do_traditional either. */ $1->expr_count = 0; $$ = list_append(list_create($2), $1); } else { $1->expr_count = sub_counter; $$ = list_append(list_append($4, $2), $1); } $$ = add_pending_comment($$); } | LEX_DELETE '(' NAME ')' /* * this is for tawk compatibility. maybe the warnings * should always be done. */ { static bool warned = false; char *arr = $3->lextok; if (do_lint && ! warned) { warned = true; lintwarn_ln($1->source_line, _("`delete(array)' is a non-portable tawk extension")); } if (do_traditional) { error_ln($1->source_line, _("`delete(array)' is a non-portable tawk extension")); } $3->memory = variable($3->source_line, arr, Node_var_new); $3->opcode = Op_push_array; $1->expr_count = 0; $$ = list_append(list_create($3), $1); if (! do_posix && ! do_traditional) { if ($3->memory == symbol_table) fatal(_("`delete' is not allowed with SYMTAB")); else if ($3->memory == func_table) fatal(_("`delete' is not allowed with FUNCTAB")); } $$ = add_pending_comment($$); } | exp { $$ = optimize_assignment($1); $$ = add_pending_comment($$); } ; opt_simple_stmt : /* empty */ { $$ = NULL; } | simple_stmt { $$ = $1; } ; case_statements : /* empty */ { $$ = NULL; } | case_statements case_statement { if ($1 == NULL) $$ = list_create($2); else $$ = list_prepend($1, $2); } | case_statements error { $$ = NULL; } ; case_statement : LEX_CASE case_value colon opt_nls statements { INSTRUCTION *casestmt = $5; if ($5 == NULL) casestmt = list_create(instruction(Op_no_op)); if (do_pretty_print) (void) list_prepend(casestmt, instruction(Op_exec_count)); $1->case_exp = $2; $1->case_stmt = casestmt; bcfree($3); $$ = $1; } | LEX_DEFAULT colon opt_nls statements { INSTRUCTION *casestmt = $4; if ($4 == NULL) casestmt = list_create(instruction(Op_no_op)); if (do_pretty_print) (void) list_prepend(casestmt, instruction(Op_exec_count)); bcfree($2); $1->case_stmt = casestmt; $$ = $1; } ; case_value : YNUMBER { $$ = $1; } | '-' YNUMBER %prec UNARY { NODE *n = $2->memory; (void) force_number(n); negate_num(n); bcfree($1); $$ = $2; } | '+' YNUMBER %prec UNARY { NODE *n = $2->lasti->memory; bcfree($1); add_sign_to_num(n, '+'); $$ = $2; } | YSTRING { $$ = $1; } | regexp { if ($1->memory->type == Node_regex) $1->opcode = Op_push_re; else $1->opcode = Op_push; $$ = $1; } | typed_regexp { assert(($1->memory->flags & REGEX) == REGEX); $1->opcode = Op_push_re; $$ = $1; } ; print : LEX_PRINT { $$ = $1; } | LEX_PRINTF { $$ = $1; } ; /* * Note: ``print(x)'' is already parsed by the first rule, * so there is no good in covering it by the second one too. */ print_expression_list : opt_expression_list | '(' expression_list r_paren { $$ = $2; } ; output_redir : /* empty */ { in_print = false; in_parens = 0; $$ = NULL; } | IO_OUT { in_print = false; in_parens = 0; } common_exp { if ($1->redir_type == redirect_twoway && $3->lasti->opcode == Op_K_getline_redir && $3->lasti->redir_type == redirect_twoway) yyerror(_("multistage two-way pipelines don't work")); $$ = list_prepend($3, $1); } ; if_statement : LEX_IF '(' exp r_paren opt_nls statement { $$ = mk_condition($3, $1, $6, NULL, NULL); } | LEX_IF '(' exp r_paren opt_nls statement LEX_ELSE opt_nls statement { $$ = mk_condition($3, $1, $6, $7, $9); } ; nls : NEWLINE | nls NEWLINE ; opt_nls : /* empty */ | nls ; input_redir : /* empty */ { $$ = NULL; } | '<' simp_exp { bcfree($1); $$ = $2; } ; opt_param_list : /* empty */ { $$ = NULL; } | param_list { $$ = $1; } ; param_list : NAME { $1->param_count = 0; $$ = list_create($1); } | param_list comma NAME { if ($1 != NULL && $3 != NULL) { $3->param_count = $1->lasti->param_count + 1; $$ = list_append($1, $3); yyerrok; } else $$ = NULL; } | error { $$ = NULL; } | param_list error { $$ = $1; } | param_list comma error { $$ = $1; } ; /* optional expression, as in for loop */ opt_exp : /* empty */ { $$ = NULL; } | exp { $$ = $1; } ; opt_expression_list : /* empty */ { $$ = NULL; } | expression_list { $$ = $1; } ; expression_list : exp { $$ = mk_expression_list(NULL, $1); } | expression_list comma exp { $$ = mk_expression_list($1, $3); yyerrok; } | error { $$ = NULL; } | expression_list error { /* * Returning the expression list instead of NULL lets * snode get a list of arguments that it can count. */ $$ = $1; } | expression_list error exp { /* Ditto */ $$ = mk_expression_list($1, $3); } | expression_list comma error { /* Ditto */ $$ = $1; } ; opt_fcall_expression_list : /* empty */ { $$ = NULL; } | fcall_expression_list { $$ = $1; } ; fcall_expression_list : fcall_exp { $$ = mk_expression_list(NULL, $1); } | fcall_expression_list comma fcall_exp { $$ = mk_expression_list($1, $3); yyerrok; } | error { $$ = NULL; } | fcall_expression_list error { /* * Returning the expression list instead of NULL lets * snode get a list of arguments that it can count. */ $$ = $1; } | fcall_expression_list error fcall_exp { /* Ditto */ $$ = mk_expression_list($1, $3); } | fcall_expression_list comma error { /* Ditto */ $$ = $1; } ; fcall_exp : exp { $$ = $1; } | typed_regexp { $$ = list_create($1); } ; /* Expressions, not including the comma operator. */ exp : variable assign_operator exp %prec ASSIGNOP { if (do_lint && $3->lasti->opcode == Op_match_rec) lintwarn_ln($2->source_line, _("regular expression on right of assignment")); $$ = mk_assignment($1, $3, $2); } | variable ASSIGN typed_regexp %prec ASSIGNOP { $$ = mk_assignment($1, list_create($3), $2); } | exp LEX_AND exp { $$ = mk_boolean($1, $3, $2); } | exp LEX_OR exp { $$ = mk_boolean($1, $3, $2); } | exp MATCHOP typed_regexp { if ($1->lasti->opcode == Op_match_rec) warning_ln($2->source_line, _("regular expression on left of `~' or `!~' operator")); assert($3->opcode == Op_push_re && ($3->memory->flags & REGEX) != 0); /* RHS is @/.../ */ $2->memory = $3->memory; bcfree($3); $$ = list_append($1, $2); } | exp MATCHOP exp { if ($1->lasti->opcode == Op_match_rec) warning_ln($2->source_line, _("regular expression on left of `~' or `!~' operator")); if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) { /* RHS is /.../ */ $2->memory = $3->nexti->memory; bcfree($3->nexti); /* Op_match_rec */ bcfree($3); /* Op_list */ $$ = list_append($1, $2); } else { $2->memory = make_regnode(Node_dynregex, NULL); $$ = list_append(list_merge($1, $3), $2); } } | exp LEX_IN simple_variable { if (do_lint_old) warning_ln($2->source_line, _("old awk does not support the keyword `in' except after `for'")); $3->nexti->opcode = Op_push_array; $2->opcode = Op_in_array; $2->expr_count = 1; $$ = list_append(list_merge($1, $3), $2); } | exp a_relop exp %prec RELOP { if (do_lint && $3->lasti->opcode == Op_match_rec) lintwarn_ln($2->source_line, _("regular expression on right of comparison")); $$ = list_append(list_merge($1, $3), $2); } | exp '?' exp ':' exp { $$ = mk_condition($1, $2, $3, $4, $5); } | common_exp { $$ = $1; } ; assign_operator : ASSIGN { $$ = $1; } | ASSIGNOP { $$ = $1; } | SLASH_BEFORE_EQUAL ASSIGN /* `/=' */ { $2->opcode = Op_assign_quotient; $$ = $2; } ; relop_or_less : RELOP { $$ = $1; } | '<' { $$ = $1; } ; a_relop : relop_or_less { $$ = $1; } | '>' { $$ = $1; } ; common_exp : simp_exp { $$ = $1; } | simp_exp_nc { $$ = $1; } | common_exp simp_exp %prec CONCAT_OP { int count = 2; bool is_simple_var = false; if ($1->lasti->opcode == Op_concat) { /* multiple (> 2) adjacent strings optimization */ is_simple_var = ($1->lasti->concat_flag & CSVAR) != 0; count = $1->lasti->expr_count + 1; $1->lasti->opcode = Op_no_op; } else { is_simple_var = ($1->nexti->opcode == Op_push && $1->lasti == $1->nexti); /* first exp. is a simple * variable?; kludge for use * in Op_assign_concat. */ } if (do_optimize && $1->nexti == $1->lasti && $1->nexti->opcode == Op_push_i && $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i ) { NODE *n1 = $1->nexti->memory; NODE *n2 = $2->nexti->memory; size_t nlen; // 1.5 "" # can't fold this if program mucks with CONVFMT. // See test #12 in test/posix.awk. // Also can't fold if one or the other is translatable. if ((n1->flags & (NUMBER|NUMINT|INTLSTR)) != 0 || (n2->flags & (NUMBER|NUMINT|INTLSTR)) != 0) goto plain_concat; n1 = force_string(n1); n2 = force_string(n2); nlen = n1->stlen + n2->stlen; erealloc(n1->stptr, char *, nlen + 1, "constant fold"); memcpy(n1->stptr + n1->stlen, n2->stptr, n2->stlen); n1->stlen = nlen; n1->stptr[nlen] = '\0'; n1->flags &= ~(NUMCUR|NUMBER|NUMINT); n1->flags |= (STRING|STRCUR); unref(n2); bcfree($2->nexti); bcfree($2); $$ = $1; } else { plain_concat: $$ = list_append(list_merge($1, $2), instruction(Op_concat)); $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0); $$->lasti->expr_count = count; if (count > max_args) max_args = count; } } ; simp_exp : non_post_simp_exp /* Binary operators in order of decreasing precedence. */ | simp_exp '^' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp '*' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp '/' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp '%' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp '+' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp '-' simp_exp { $$ = mk_binary($1, $3, $2); } | LEX_GETLINE opt_variable input_redir { /* * In BEGINFILE/ENDFILE, allow `getline [var] < file' */ if ((rule == BEGINFILE || rule == ENDFILE) && $3 == NULL) error_ln($1->source_line, _("non-redirected `getline' invalid inside `%s' rule"), ruletab[rule]); if (do_lint && rule == END && $3 == NULL) lintwarn_ln($1->source_line, _("non-redirected `getline' undefined inside END action")); $$ = mk_getline($1, $2, $3, redirect_input); } | variable INCREMENT { $2->opcode = Op_postincrement; $$ = mk_assignment($1, NULL, $2); } | variable DECREMENT { $2->opcode = Op_postdecrement; $$ = mk_assignment($1, NULL, $2); } | '(' expression_list r_paren LEX_IN simple_variable { if (do_lint_old) { warning_ln($4->source_line, _("old awk does not support the keyword `in' except after `for'")); warning_ln($4->source_line, _("old awk does not support multidimensional arrays")); } $5->nexti->opcode = Op_push_array; $4->opcode = Op_in_array; if ($2 == NULL) { /* error */ errcount++; $4->expr_count = 0; $$ = list_merge($5, $4); } else { INSTRUCTION *t = $2; $4->expr_count = count_expressions(&t, false); $$ = list_append(list_merge(t, $5), $4); } } ; /* Expressions containing "| getline" lose the ability to be on the right-hand side of a concatenation. */ simp_exp_nc : common_exp IO_IN LEX_GETLINE opt_variable { $$ = mk_getline($3, $4, $1, $2->redir_type); bcfree($2); } /* Binary operators in order of decreasing precedence. */ | simp_exp_nc '^' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '*' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '/' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '%' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '+' simp_exp { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '-' simp_exp { $$ = mk_binary($1, $3, $2); } ; non_post_simp_exp : regexp { $$ = list_create($1); } | '!' simp_exp %prec UNARY { if ($2->opcode == Op_match_rec) { $2->opcode = Op_nomatch; $1->opcode = Op_push_i; $1->memory = set_profile_text(make_number(0.0), "0", 1); $$ = list_append(list_append(list_create($1), instruction(Op_field_spec)), $2); } else { if (do_optimize && $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i && ($2->nexti->memory->flags & (MPFN|MPZN|INTLSTR)) == 0 ) { NODE *n = $2->nexti->memory; if ((n->flags & STRING) != 0) { n->numbr = (AWKNUM) (n->stlen == 0); n->flags &= ~(STRCUR|STRING); n->flags |= (NUMCUR|NUMBER); efree(n->stptr); n->stptr = NULL; n->stlen = 0; } else n->numbr = (AWKNUM) (n->numbr == 0.0); bcfree($1); $$ = $2; } else { $1->opcode = Op_not; add_lint($2, LINT_assign_in_cond); $$ = list_append($2, $1); } } } | '(' exp r_paren { if (do_pretty_print) $$ = list_append($2, bcalloc(Op_parens, 1, sourceline)); else $$ = $2; } | LEX_BUILTIN '(' opt_fcall_expression_list r_paren { $$ = snode($3, $1); if ($$ == NULL) YYABORT; } | LEX_LENGTH '(' opt_fcall_expression_list r_paren { $$ = snode($3, $1); if ($$ == NULL) YYABORT; } | LEX_LENGTH { static bool warned = false; if (do_lint && ! warned) { warned = true; lintwarn_ln($1->source_line, _("call of `length' without parentheses is not portable")); } $$ = snode(NULL, $1); if ($$ == NULL) YYABORT; } | func_call | variable | INCREMENT variable { $1->opcode = Op_preincrement; $$ = mk_assignment($2, NULL, $1); } | DECREMENT variable { $1->opcode = Op_predecrement; $$ = mk_assignment($2, NULL, $1); } | YNUMBER { $$ = list_create($1); } | YSTRING { $$ = list_create($1); } | '-' simp_exp %prec UNARY { if ($2->lasti->opcode == Op_push_i && ($2->lasti->memory->flags & STRING) == 0 ) { NODE *n = $2->lasti->memory; (void) force_number(n); negate_num(n); $$ = $2; bcfree($1); } else { $1->opcode = Op_unary_minus; $$ = list_append($2, $1); } } | '+' simp_exp %prec UNARY { if ($2->lasti->opcode == Op_push_i && ($2->lasti->memory->flags & STRING) == 0 && ($2->lasti->memory->flags & NUMCONSTSTR) != 0) { NODE *n = $2->lasti->memory; add_sign_to_num(n, '+'); $$ = $2; bcfree($1); } else { /* * was: $$ = $2 * POSIX semantics: force a conversion to numeric type */ $1->opcode = Op_unary_plus; $$ = list_append($2, $1); } } ; func_call : direct_func_call { func_use($1->lasti->func_name, FUNC_USE); $$ = $1; } | '@' direct_func_call { /* indirect function call */ INSTRUCTION *f, *t; char *name; NODE *indirect_var; static bool warned = false; const char *msg = _("indirect function calls are a gawk extension"); if (do_traditional || do_posix) yyerror("%s", msg); else if (do_lint && ! warned) { warned = true; lintwarn("%s", msg); } f = $2->lasti; f->opcode = Op_indirect_func_call; name = estrdup(f->func_name, strlen(f->func_name)); if (is_std_var(name)) yyerror(_("can not use special variable `%s' for indirect function call"), name); indirect_var = variable(f->source_line, name, Node_var_new); t = instruction(Op_push); t->memory = indirect_var; /* prepend indirect var instead of appending to arguments (opt_expression_list), * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case: * f = "fun" * @f(f="real_fun") */ $$ = list_prepend($2, t); at_seen = false; } ; direct_func_call : FUNC_CALL '(' opt_fcall_expression_list r_paren { NODE *n; if (! at_seen) { n = lookup($1->func_name); if (n != NULL && n->type != Node_func && n->type != Node_ext_func) { error_ln($1->source_line, _("attempt to use non-function `%s' in function call"), $1->func_name); } } param_sanity($3); $1->opcode = Op_func_call; $1->func_body = NULL; if ($3 == NULL) { /* no argument or error */ ($1 + 1)->expr_count = 0; $$ = list_create($1); } else { INSTRUCTION *t = $3; ($1 + 1)->expr_count = count_expressions(&t, true); $$ = list_append(t, $1); } } ; opt_variable : /* empty */ { $$ = NULL; } | variable { $$ = $1; } ; delete_subscript_list : /* empty */ { $$ = NULL; } | delete_subscript SUBSCRIPT { $$ = $1; } ; delete_subscript : delete_exp_list { $$ = $1; } | delete_subscript delete_exp_list { $$ = list_merge($1, $2); } ; delete_exp_list : bracketed_exp_list { INSTRUCTION *ip = $1->lasti; int count = ip->sub_count; /* # of SUBSEP-seperated expressions */ if (count > 1) { /* change Op_subscript or Op_sub_array to Op_concat */ ip->opcode = Op_concat; ip->concat_flag = CSUBSEP; ip->expr_count = count; } else ip->opcode = Op_no_op; sub_counter++; /* count # of dimensions */ $$ = $1; } ; bracketed_exp_list : '[' expression_list ']' { INSTRUCTION *t = $2; if ($2 == NULL) { error_ln($3->source_line, _("invalid subscript expression")); /* install Null string as subscript. */ t = list_create(instruction(Op_push_i)); t->nexti->memory = dupnode(Nnull_string); $3->sub_count = 1; } else $3->sub_count = count_expressions(&t, false); $$ = list_append(t, $3); } ; subscript : bracketed_exp_list { $$ = $1; } | subscript bracketed_exp_list { $$ = list_merge($1, $2); } ; subscript_list : subscript SUBSCRIPT { $$ = $1; } ; simple_variable : NAME { char *var_name = $1->lextok; $1->opcode = Op_push; $1->memory = variable($1->source_line, var_name, Node_var_new); $$ = list_create($1); } | NAME subscript_list { char *arr = $1->lextok; $1->memory = variable($1->source_line, arr, Node_var_new); $1->opcode = Op_push_array; $$ = list_prepend($2, $1); } ; variable : simple_variable { INSTRUCTION *ip = $1->nexti; if (ip->opcode == Op_push && ip->memory->type == Node_var && ip->memory->var_update ) { $$ = list_prepend($1, instruction(Op_var_update)); $$->nexti->update_var = ip->memory->var_update; } else $$ = $1; } | '$' non_post_simp_exp opt_incdec { $$ = list_append($2, $1); if ($3 != NULL) mk_assignment($2, NULL, $3); } ; opt_incdec : INCREMENT { $1->opcode = Op_postincrement; } | DECREMENT { $1->opcode = Op_postdecrement; } | /* empty */ { $$ = NULL; } ; l_brace : '{' opt_nls ; r_brace : '}' opt_nls { yyerrok; } ; r_paren : ')' { yyerrok; } ; opt_semi : /* empty */ | semi ; semi : ';' { yyerrok; } ; colon : ':' { $$ = $1; yyerrok; } ; comma : ',' opt_nls { yyerrok; } ; %% struct token { const char *operator; /* text to match */ OPCODE value; /* type */ int class; /* lexical class */ unsigned flags; /* # of args. allowed and compatability */ # define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */ # define A(n) (1<<(n)) # define VERSION_MASK 0xFF00 /* old awk is zero */ # define NOT_OLD 0x0100 /* feature not in old awk */ # define NOT_POSIX 0x0200 /* feature not in POSIX */ # define GAWKX 0x0400 /* gawk extension */ # define BREAK 0x0800 /* break allowed inside */ # define CONTINUE 0x1000 /* continue allowed inside */ # define DEBUG_USE 0x2000 /* for use by developers */ NODE *(*ptr)(int); /* function that implements this keyword */ NODE *(*ptr2)(int); /* alternate arbitrary-precision function */ }; #ifdef USE_EBCDIC /* tokcompare --- lexicographically compare token names for sorting */ static int tokcompare(const void *l, const void *r) { struct token *lhs, *rhs; lhs = (struct token *) l; rhs = (struct token *) r; return strcmp(lhs->operator, rhs->operator); } #endif /* * Tokentab is sorted ASCII ascending order, so it can be binary searched. * See check_special(), which sorts the table on EBCDIC systems. * Function pointers come from declarations in awk.h. */ #ifdef HAVE_MPFR #define MPF(F) do_mpfr_##F #else #define MPF(F) 0 #endif static const struct token tokentab[] = { {"BEGIN", Op_rule, LEX_BEGIN, 0, 0, 0}, {"BEGINFILE", Op_rule, LEX_BEGINFILE, GAWKX, 0, 0}, {"END", Op_rule, LEX_END, 0, 0, 0}, {"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0, 0}, #ifdef ARRAYDEBUG {"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE, do_adump, 0}, #endif {"and", Op_builtin, LEX_BUILTIN, GAWKX, do_and, MPF(and)}, {"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort, 0}, {"asorti", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asorti, 0}, {"atan2", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2, MPF(atan2)}, {"bindtextdomain", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain, 0}, {"break", Op_K_break, LEX_BREAK, 0, 0, 0}, {"case", Op_K_case, LEX_CASE, GAWKX, 0, 0}, {"close", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close, 0}, {"compl", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl, MPF(compl)}, {"continue", Op_K_continue, LEX_CONTINUE, 0, 0, 0}, {"cos", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos, MPF(cos)}, {"dcgettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext, 0}, {"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext, 0}, {"default", Op_K_default, LEX_DEFAULT, GAWKX, 0, 0}, {"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0, 0}, {"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0, 0}, {"else", Op_K_else, LEX_ELSE, 0, 0, 0}, {"eval", Op_symbol, LEX_EVAL, 0, 0, 0}, {"exit", Op_K_exit, LEX_EXIT, 0, 0, 0}, {"exp", Op_builtin, LEX_BUILTIN, A(1), do_exp, MPF(exp)}, {"fflush", Op_builtin, LEX_BUILTIN, A(0)|A(1), do_fflush, 0}, {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0, 0}, {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0, 0}, {"function",Op_func, LEX_FUNCTION, NOT_OLD, 0, 0}, {"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0, 0}, {"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0, 0}, {"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0, 0}, {"if", Op_K_if, LEX_IF, 0, 0, 0}, {"in", Op_symbol, LEX_IN, 0, 0, 0}, {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0, 0}, {"index", Op_builtin, LEX_BUILTIN, A(2), do_index, 0}, {"int", Op_builtin, LEX_BUILTIN, A(1), do_int, MPF(int)}, #ifdef SUPPLY_INTDIV {"intdiv0", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_intdiv, MPF(intdiv)}, #endif {"isarray", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_isarray, 0}, {"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length, 0}, {"load", Op_symbol, LEX_LOAD, GAWKX, 0, 0}, {"log", Op_builtin, LEX_BUILTIN, A(1), do_log, MPF(log)}, {"lshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift, MPF(lshift)}, {"match", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match, 0}, {"mktime", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_mktime, 0}, {"next", Op_K_next, LEX_NEXT, 0, 0, 0}, {"nextfile", Op_K_nextfile, LEX_NEXTFILE, 0, 0, 0}, {"or", Op_builtin, LEX_BUILTIN, GAWKX, do_or, MPF(or)}, {"patsplit", Op_builtin, LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit, 0}, {"print", Op_K_print, LEX_PRINT, 0, 0, 0}, {"printf", Op_K_printf, LEX_PRINTF, 0, 0, 0}, {"rand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand, MPF(rand)}, {"return", Op_K_return, LEX_RETURN, NOT_OLD, 0, 0}, {"rshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift, MPF(rshift)}, {"sin", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin, MPF(sin)}, {"split", Op_builtin, LEX_BUILTIN, A(2)|A(3)|A(4), do_split, 0}, {"sprintf", Op_builtin, LEX_BUILTIN, 0, do_sprintf, 0}, {"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt, MPF(sqrt)}, {"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand, MPF(srand)}, #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */ {"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|DEBUG_USE, stopme, 0}, #endif {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0}, {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum, MPF(strtonum)}, {"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0, 0}, {"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr, 0}, {"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0, 0}, {"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system, 0}, {"systime", Op_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime, 0}, {"tolower", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower, 0}, {"toupper", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper, 0}, {"typeof", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_typeof, 0}, {"while", Op_K_while, LEX_WHILE, BREAK|CONTINUE, 0, 0}, {"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)}, }; /* Variable containing the current shift state. */ static mbstate_t cur_mbstate; /* Ring buffer containing current characters. */ #define MAX_CHAR_IN_RING_BUFFER 8 #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX) static char cur_char_ring[RING_BUFFER_SIZE]; /* Index for ring buffers. */ static int cur_ring_idx; /* This macro means that last nextc() return a singlebyte character or 1st byte of a multibyte character. */ #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1) /* getfname --- return name of a builtin function (for pretty printing) */ const char * getfname(NODE *(*fptr)(int)) { int i, j; j = sizeof(tokentab) / sizeof(tokentab[0]); /* linear search, no other way to do it */ for (i = 0; i < j; i++) if (tokentab[i].ptr == fptr || tokentab[i].ptr2 == fptr) return tokentab[i].operator; return NULL; } /* negate_num --- negate a number in NODE */ void negate_num(NODE *n) { #ifdef HAVE_MPFR int tval = 0; #endif add_sign_to_num(n, '-'); if (! is_mpg_number(n)) { n->numbr = -n->numbr; return; } #ifdef HAVE_MPFR if (is_mpg_integer(n)) { if (! iszero(n)) { mpz_neg(n->mpg_i, n->mpg_i); return; } /* * 0 --> -0 conversion. Requires turning the MPG integer * into an MPFR float. */ mpz_clear(n->mpg_i); /* release the integer storage */ /* Convert and fall through. */ tval = mpfr_set_d(n->mpg_numbr, 0.0, ROUND_MODE); IEEE_FMT(n->mpg_numbr, tval); n->flags &= ~MPZN; n->flags |= MPFN; } /* mpfr float case */ tval = mpfr_neg(n->mpg_numbr, n->mpg_numbr, ROUND_MODE); IEEE_FMT(n->mpg_numbr, tval); #endif } /* add_sign_to_num --- make a constant unary plus or minus for profiling */ static void add_sign_to_num(NODE *n, char sign) { if ((n->flags & NUMCONSTSTR) != 0) { char *s; s = n->stptr; memmove(& s[1], & s[0], n->stlen + 1); s[0] = sign; n->stlen++; } } /* print_included_from --- print `Included from ..' file names and locations */ static void print_included_from() { int saveline, line; SRCFILE *s; /* suppress current file name, line # from `.. included from ..' msgs */ saveline = sourceline; sourceline = 0; for (s = sourcefile; s != NULL && s->stype == SRC_INC; ) { s = s->next; if (s == NULL || s->fd <= INVALID_HANDLE) continue; line = s->srclines; /* if last token is NEWLINE, line number is off by 1. */ if (s->lasttok == NEWLINE) line--; msg("%s %s:%d%c", s->prev == sourcefile ? "In file included from" : " from", (s->stype == SRC_INC || s->stype == SRC_FILE) ? s->src : "cmd. line", line, s->stype == SRC_INC ? ',' : ':' ); } sourceline = saveline; } /* warning_ln --- print a warning message with location */ static void warning_ln(int line, const char *mesg, ...) { va_list args; int saveline; saveline = sourceline; sourceline = line; print_included_from(); va_start(args, mesg); err(false, _("warning: "), mesg, args); va_end(args); sourceline = saveline; } /* lintwarn_ln --- print a lint warning and location */ static void lintwarn_ln(int line, const char *mesg, ...) { va_list args; int saveline; saveline = sourceline; sourceline = line; print_included_from(); va_start(args, mesg); if (lintfunc == r_fatal) err(true, _("fatal: "), mesg, args); else err(false, _("warning: "), mesg, args); va_end(args); sourceline = saveline; if (lintfunc == r_fatal) gawk_exit(EXIT_FATAL); } /* error_ln --- print an error message and location */ static void error_ln(int line, const char *m, ...) { va_list args; int saveline; saveline = sourceline; sourceline = line; print_included_from(); errcount++; va_start(args, m); err(false, "error: ", m, args); va_end(args); sourceline = saveline; } /* yyerror --- print a syntax error message, show where */ static void yyerror(const char *m, ...) { va_list args; const char *mesg = NULL; char *bp, *cp; char *scan; char *buf; int count; static char end_of_file_line[] = "(END OF FILE)"; print_included_from(); errcount++; /* Find the current line in the input file */ if (lexptr && lexeme) { if (thisline == NULL) { cp = lexeme; if (*cp == '\n') { if (cp > lexptr_begin) cp--; mesg = _("unexpected newline or end of string"); } for (; cp != lexptr_begin && *cp != '\n'; --cp) continue; if (*cp == '\n') cp++; thisline = cp; } /* NL isn't guaranteed */ bp = lexeme; if (bp < thisline) bp = thisline + 1; while (bp < lexend && *bp && *bp != '\n') bp++; } else { thisline = end_of_file_line; bp = thisline + strlen(thisline); } msg("%.*s", (int) (bp - thisline), thisline); va_start(args, m); if (mesg == NULL) mesg = m; count = strlen(mesg) + 1; if (lexptr != NULL) count += (lexeme - thisline) + 2; ezalloc(buf, char *, count+1, "yyerror"); bp = buf; if (lexptr != NULL) { scan = thisline; while (scan < lexeme) if (*scan++ == '\t') *bp++ = '\t'; else *bp++ = ' '; *bp++ = '^'; *bp++ = ' '; } strcpy(bp, mesg); err(false, "", buf, args); va_end(args); efree(buf); } /* mk_program --- create a single list of instructions */ static INSTRUCTION * mk_program() { INSTRUCTION *cp, *tmp; #define begin_block rule_block[BEGIN] #define end_block rule_block[END] #define prog_block rule_block[Rule] #define beginfile_block rule_block[BEGINFILE] #define endfile_block rule_block[ENDFILE] if (end_block == NULL) end_block = list_create(ip_end); else (void) list_prepend(end_block, ip_end); if (! in_main_context()) { if (begin_block != NULL && prog_block != NULL) cp = list_merge(begin_block, prog_block); else cp = (begin_block != NULL) ? begin_block : prog_block; if (cp != NULL) (void) list_merge(cp, end_block); else cp = end_block; (void) list_append(cp, instruction(Op_stop)); goto out; } if (endfile_block == NULL) endfile_block = list_create(ip_endfile); else { ip_rec->has_endfile = true; (void) list_prepend(endfile_block, ip_endfile); } if (beginfile_block == NULL) beginfile_block = list_create(ip_beginfile); else (void) list_prepend(beginfile_block, ip_beginfile); if (prog_block == NULL) { if (end_block->nexti == end_block->lasti && beginfile_block->nexti == beginfile_block->lasti && endfile_block->nexti == endfile_block->lasti ) { /* no pattern-action and (real) end, beginfile or endfile blocks */ bcfree(ip_rec); bcfree(ip_newfile); ip_rec = ip_newfile = NULL; list_append(beginfile_block, instruction(Op_after_beginfile)); (void) list_append(endfile_block, instruction(Op_after_endfile)); if (begin_block == NULL) /* no program at all */ cp = end_block; else cp = list_merge(begin_block, end_block); if (program_comment != NULL) { (void) list_prepend(cp, program_comment); } if (comment != NULL) (void) list_append(cp, comment); (void) list_append(cp, ip_atexit); (void) list_append(cp, instruction(Op_stop)); /* append beginfile_block and endfile_block for sole use * in getline without redirection (Op_K_getline). */ (void) list_merge(cp, beginfile_block); (void) list_merge(cp, endfile_block); goto out; } else { /* install a do-nothing prog block */ prog_block = list_create(instruction(Op_no_op)); } } (void) list_append(endfile_block, instruction(Op_after_endfile)); (void) list_prepend(prog_block, ip_rec); (void) list_append(prog_block, instruction(Op_jmp)); prog_block->lasti->target_jmp = ip_rec; list_append(beginfile_block, instruction(Op_after_beginfile)); cp = list_merge(beginfile_block, prog_block); (void) list_prepend(cp, ip_newfile); (void) list_merge(cp, endfile_block); (void) list_merge(cp, end_block); if (begin_block != NULL) cp = list_merge(begin_block, cp); if (program_comment != NULL) { (void) list_prepend(cp, program_comment); } if (comment != NULL) { (void) list_append(cp, comment); } (void) list_append(cp, ip_atexit); (void) list_append(cp, instruction(Op_stop)); out: /* delete the Op_list, not needed */ tmp = cp->nexti; bcfree(cp); /* these variables are not used again but zap them anyway. */ comment = NULL; function_comment = NULL; program_comment = NULL; return tmp; #undef begin_block #undef end_block #undef prog_block #undef beginfile_block #undef endfile_block } /* parse_program --- read in the program and convert into a list of instructions */ int parse_program(INSTRUCTION **pcode) { int ret; /* pre-create non-local jump targets * ip_end (Op_no_op) -- used as jump target for `exit' * outside an END block. */ ip_end = instruction(Op_no_op); if (! in_main_context()) ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL; else { ip_endfile = instruction(Op_no_op); main_beginfile = ip_beginfile = instruction(Op_no_op); ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */ ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */ ip_newfile->target_jmp = ip_end; ip_newfile->target_endfile = ip_endfile; (ip_newfile + 1)->target_get_record = ip_rec; ip_rec->target_newfile = ip_newfile; ip_atexit = instruction(Op_atexit); /* target for `exit' in END block */ } for (sourcefile = srcfiles->next; sourcefile->stype == SRC_EXTLIB; sourcefile = sourcefile->next) ; lexeof = false; lexptr = NULL; lasttok = 0; memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *)); errcount = 0; tok = tokstart != NULL ? tokstart : tokexpand(); ret = yyparse(); *pcode = mk_program(); /* avoid false source indications */ source = NULL; sourceline = 0; if (ret == 0) /* avoid spurious warning if parser aborted with YYABORT */ check_funcs(); if (do_posix && ! check_param_names()) errcount++; if (args_array == NULL) emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program"); else erealloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program"); return (ret || errcount); } /* free_srcfile --- free a SRCFILE struct */ void free_srcfile(SRCFILE *thisfile) { efree(thisfile->src); efree(thisfile); } /* do_add_srcfile --- add one item to srcfiles */ static SRCFILE * do_add_srcfile(enum srctype stype, char *src, char *path, SRCFILE *thisfile) { SRCFILE *s; ezalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile"); s->src = estrdup(src, strlen(src)); s->fullpath = path; s->stype = stype; s->fd = INVALID_HANDLE; s->next = thisfile; s->prev = thisfile->prev; thisfile->prev->next = s; thisfile->prev = s; return s; } /* add_srcfile --- add one item to srcfiles after checking if * a source file exists and not already in list. */ SRCFILE * add_srcfile(enum srctype stype, char *src, SRCFILE *thisfile, bool *already_included, int *errcode) { SRCFILE *s; struct stat sbuf; char *path; int errno_val = 0; if (already_included) *already_included = false; if (errcode) *errcode = 0; if (stype == SRC_CMDLINE || stype == SRC_STDIN) return do_add_srcfile(stype, src, NULL, thisfile); path = find_source(src, & sbuf, & errno_val, stype == SRC_EXTLIB); if (path == NULL) { if (errcode) { *errcode = errno_val; return NULL; } /* use full messages to ease translation */ fatal(stype != SRC_EXTLIB ? _("can't open source file `%s' for reading (%s)") : _("can't open shared library `%s' for reading (%s)"), src, errno_val ? strerror(errno_val) : _("reason unknown")); } /* N.B. We do not eliminate duplicate SRC_FILE (-f) programs. */ for (s = srcfiles->next; s != srcfiles; s = s->next) { if ((s->stype == SRC_FILE || s->stype == SRC_INC || s->stype == SRC_EXTLIB) && files_are_same(path, s)) { if (stype == SRC_INC || stype == SRC_EXTLIB) { /* eliminate duplicates */ if ((stype == SRC_INC) && (s->stype == SRC_FILE)) fatal(_("can't include `%s' and use it as a program file"), src); if (do_lint) { int line = sourceline; /* Kludge: the line number may be off for `@include file'. * Since, this function is also used for '-f file' in main.c, * sourceline > 1 check ensures that the call is at * parse time. */ if (sourceline > 1 && lasttok == NEWLINE) line--; lintwarn_ln(line, stype != SRC_EXTLIB ? _("already included source file `%s'") : _("already loaded shared library `%s'"), src); } efree(path); if (already_included) *already_included = true; return NULL; } else { /* duplicates are allowed for -f */ if (s->stype == SRC_INC) fatal(_("can't include `%s' and use it as a program file"), src); /* no need to scan for further matches, since * they must be of homogeneous type */ break; } } } s = do_add_srcfile(stype, src, path, thisfile); s->sbuf = sbuf; s->mtime = sbuf.st_mtime; return s; } /* include_source --- read program from source included using `@include' */ static int include_source(INSTRUCTION *file) { SRCFILE *s; char *src = file->lextok; int errcode; bool already_included; if (do_traditional || do_posix) { error_ln(file->source_line, _("@include is a gawk extension")); return -1; } if (strlen(src) == 0) { if (do_lint) lintwarn_ln(file->source_line, _("empty filename after @include")); return 0; } s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode); if (s == NULL) { if (already_included) return 0; error_ln(file->source_line, _("can't open source file `%s' for reading (%s)"), src, errcode ? strerror(errcode) : _("reason unknown")); return -1; } /* save scanner state for the current sourcefile */ sourcefile->srclines = sourceline; sourcefile->lexptr = lexptr; sourcefile->lexend = lexend; sourcefile->lexptr_begin = lexptr_begin; sourcefile->lexeme = lexeme; sourcefile->lasttok = lasttok; /* included file becomes the current source */ sourcefile = s; lexptr = NULL; sourceline = 0; source = NULL; lasttok = 0; lexeof = false; eof_warned = false; return 0; } /* load_library --- load a shared library */ static int load_library(INSTRUCTION *file) { SRCFILE *s; char *src = file->lextok; int errcode; bool already_included; if (do_traditional || do_posix) { error_ln(file->source_line, _("@load is a gawk extension")); return -1; } if (strlen(src) == 0) { if (do_lint) lintwarn_ln(file->source_line, _("empty filename after @load")); return 0; } s = add_srcfile(SRC_EXTLIB, src, sourcefile, &already_included, &errcode); if (s == NULL) { if (already_included) return 0; error_ln(file->source_line, _("can't open shared library `%s' for reading (%s)"), src, errcode ? strerror(errcode) : _("reason unknown")); return -1; } load_ext(s->fullpath); return 0; } /* next_sourcefile --- read program from the next source in srcfiles */ static void next_sourcefile() { static int (*closefunc)(int fd) = NULL; if (closefunc == NULL) { char *cp = getenv("AWKREADFUNC"); /* If necessary, one day, test value for different functions. */ if (cp == NULL) closefunc = close; else closefunc = one_line_close; } /* * This won't be true if there's an invalid character in * the source file or source string (e.g., user typo). * Previous versions of gawk did not core dump in such a * case. * * assert(lexeof == true); */ lexeof = false; eof_warned = false; sourcefile->srclines = sourceline; /* total no of lines in current file */ if (sourcefile->fd > INVALID_HANDLE) { if (sourcefile->fd != fileno(stdin)) /* safety */ (*closefunc)(sourcefile->fd); sourcefile->fd = INVALID_HANDLE; } if (sourcefile->buf != NULL) { efree(sourcefile->buf); sourcefile->buf = NULL; sourcefile->lexptr_begin = NULL; } while ((sourcefile = sourcefile->next) != NULL) { if (sourcefile == srcfiles) return; if (sourcefile->stype != SRC_EXTLIB) break; } if (sourcefile->lexptr_begin != NULL) { /* resume reading from already opened file (postponed to process '@include') */ lexptr = sourcefile->lexptr; lexend = sourcefile->lexend; lasttok = sourcefile->lasttok; lexptr_begin = sourcefile->lexptr_begin; lexeme = sourcefile->lexeme; sourceline = sourcefile->srclines; source = sourcefile->src; } else { lexptr = NULL; sourceline = 0; source = NULL; lasttok = 0; } } /* get_src_buf --- read the next buffer of source program */ static char * get_src_buf() { int n; char *scan; bool newfile; int savelen; struct stat sbuf; /* * No argument prototype on readfunc on purpose, * avoids problems with some ancient systems where * the types of arguments to read() aren't up to date. */ static ssize_t (*readfunc)() = 0; if (readfunc == NULL) { char *cp = getenv("AWKREADFUNC"); /* If necessary, one day, test value for different functions. */ if (cp == NULL) /* * cast is to remove warnings on systems with * different return types for read. */ readfunc = ( ssize_t(*)() ) read; else readfunc = read_one_line; } newfile = false; if (sourcefile == srcfiles) return NULL; if (sourcefile->stype == SRC_CMDLINE) { if (sourcefile->bufsize == 0) { sourcefile->bufsize = strlen(sourcefile->src); lexptr = lexptr_begin = lexeme = sourcefile->src; lexend = lexptr + sourcefile->bufsize; sourceline = 1; if (sourcefile->bufsize == 0) { /* * Yet Another Special case: * gawk '' /path/name * Sigh. */ static bool warned = false; if (do_lint && ! warned) { warned = true; lintwarn(_("empty program text on command line")); } lexeof = true; } } else if (sourcefile->buf == NULL && *(lexptr-1) != '\n') { /* * The following goop is to ensure that the source * ends with a newline and that the entire current * line is available for error messages. */ int offset; char *buf; offset = lexptr - lexeme; for (scan = lexeme; scan > lexptr_begin; scan--) if (*scan == '\n') { scan++; break; } savelen = lexptr - scan; emalloc(buf, char *, savelen + 1, "get_src_buf"); memcpy(buf, scan, savelen); thisline = buf; lexptr = buf + savelen; *lexptr = '\n'; lexeme = lexptr - offset; lexptr_begin = buf; lexend = lexptr + 1; sourcefile->buf = buf; } else lexeof = true; return lexptr; } if (sourcefile->fd <= INVALID_HANDLE) { int fd; int l; source = sourcefile->src; if (source == NULL) return NULL; fd = srcopen(sourcefile); if (fd <= INVALID_HANDLE) { char *in; /* suppress file name and line no. in error mesg */ in = source; source = NULL; error(_("can't open source file `%s' for reading (%s)"), in, strerror(errno)); errcount++; lexeof = true; return sourcefile->src; } sourcefile->fd = fd; l = optimal_bufsize(fd, &sbuf); /* * Make sure that something silly like * AWKBUFSIZE=8 make check * works ok. */ #define A_DECENT_BUFFER_SIZE 128 if (l < A_DECENT_BUFFER_SIZE) l = A_DECENT_BUFFER_SIZE; #undef A_DECENT_BUFFER_SIZE sourcefile->bufsize = l; newfile = true; emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf"); lexptr = lexptr_begin = lexeme = sourcefile->buf; savelen = 0; sourceline = 1; thisline = NULL; } else { /* * Here, we retain the current source line in the beginning of the buffer. */ int offset; for (scan = lexeme; scan > lexptr_begin; scan--) if (*scan == '\n') { scan++; break; } savelen = lexptr - scan; offset = lexptr - lexeme; if (savelen > 0) { /* * Need to make sure we have room left for reading new text; * grow the buffer (by doubling, an arbitrary choice), if the retained line * takes up more than a certain percentage (50%, again an arbitrary figure) * of the available space. */ if (savelen > sourcefile->bufsize / 2) { /* long line or token */ sourcefile->bufsize *= 2; erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf"); scan = sourcefile->buf + (scan - lexptr_begin); lexptr_begin = sourcefile->buf; } thisline = lexptr_begin; memmove(thisline, scan, savelen); lexptr = thisline + savelen; lexeme = lexptr - offset; } else { savelen = 0; lexptr = lexeme = lexptr_begin; thisline = NULL; } } n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen); if (n == -1) { error(_("can't read sourcefile `%s' (%s)"), source, strerror(errno)); errcount++; lexeof = true; } else { lexend = lexptr + n; if (n == 0) { static bool warned = false; if (do_lint && newfile && ! warned) { warned = true; sourceline = 0; lintwarn(_("source file `%s' is empty"), source); } lexeof = true; } } return sourcefile->buf; } /* tokadd --- add a character to the token buffer */ #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok) /* tokexpand --- grow the token buffer */ static char * tokexpand() { static int toksize; int tokoffset; if (tokstart != NULL) { tokoffset = tok - tokstart; toksize *= 2; erealloc(tokstart, char *, toksize, "tokexpand"); tok = tokstart + tokoffset; } else { toksize = 60; emalloc(tokstart, char *, toksize, "tokexpand"); tok = tokstart; } tokend = tokstart + toksize; return tok; } /* check_bad_char --- fatal if c isn't allowed in gawk source code */ /* * The error message was inspired by someone who decided to put * a physical \0 byte into the source code to see what would * happen and then filed a bug report about it. Sigh. */ static void check_bad_char(int c) { /* allow escapes. needed for autoconf. bleah. */ switch (c) { case '\a': case '\b': case '\f': case '\n': case '\r': case '\t': return; default: break; } if (iscntrl(c) && ! isspace(c)) fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c & 0xFF); } /* nextc --- get the next input character */ static int nextc(bool check_for_bad) { if (gawk_mb_cur_max > 1) { again: #ifdef NO_CONTINUE_SOURCE_STRINGS if (lexeof) return END_FILE; #else if (lexeof) { if (sourcefile->next == srcfiles) return END_FILE; else next_sourcefile(); } #endif if (lexptr == NULL || lexptr >= lexend) { if (get_src_buf()) goto again; return END_SRC; } /* Update the buffer index. */ cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 : cur_ring_idx + 1; /* Did we already check the current character? */ if (cur_char_ring[cur_ring_idx] == 0) { /* No, we need to check the next character on the buffer. */ int idx, work_ring_idx = cur_ring_idx; mbstate_t tmp_state; size_t mbclen; for (idx = 0; lexptr + idx < lexend; idx++) { tmp_state = cur_mbstate; mbclen = mbrlen(lexptr, idx + 1, &tmp_state); if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) { /* It is a singlebyte character, non-complete multibyte character or EOF. We treat it as a singlebyte character. */ cur_char_ring[work_ring_idx] = 1; break; } else if (mbclen == (size_t)-2) { /* It is not a complete multibyte character. */ cur_char_ring[work_ring_idx] = idx + 1; } else { /* mbclen > 1 */ cur_char_ring[work_ring_idx] = mbclen; break; } work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)? 0 : work_ring_idx + 1; } cur_mbstate = tmp_state; /* Put a mark on the position on which we write next character. */ work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)? 0 : work_ring_idx + 1; cur_char_ring[work_ring_idx] = 0; } if (check_for_bad || *lexptr == '\0') check_bad_char(*lexptr); return (int) (unsigned char) *lexptr++; } else { do { #ifdef NO_CONTINUE_SOURCE_STRINGS if (lexeof) return END_FILE; #else if (lexeof) { if (sourcefile->next == srcfiles) return END_FILE; else next_sourcefile(); } #endif if (lexptr && lexptr < lexend) { if (check_for_bad || *lexptr == '\0') check_bad_char(*lexptr); return ((int) (unsigned char) *lexptr++); } } while (get_src_buf()); return END_SRC; } } /* pushback --- push a character back on the input */ static inline void pushback(void) { if (gawk_mb_cur_max > 1) cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 : cur_ring_idx - 1; (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); } /* check_comment --- check for block comment */ void check_comment(void) { if (comment != NULL) { if (first_rule) { program_comment = comment; } else block_comment = comment; comment = NULL; } first_rule = false; } /* * get_comment --- collect comment text. * Flag = EOL_COMMENT for end-of-line comments. * Flag = FULL_COMMENT for self-contained comments. */ int get_comment(int flag) { int c; int sl; tok = tokstart; tokadd('#'); sl = sourceline; char *p1; char *p2; while (true) { while ((c = nextc(false)) != '\n' && c != END_FILE) { /* ignore \r characters */ if (c != '\r') tokadd(c); } if (flag == EOL_COMMENT) { /* comment at end of line. */ if (c == '\n') tokadd(c); break; } if (c == '\n') { tokadd(c); sourceline++; do { c = nextc(false); if (c == '\n') { sourceline++; tokadd(c); } } while (isspace(c) && c != END_FILE); if (c == END_FILE) break; else if (c != '#') { pushback(); sourceline--; break; } else tokadd(c); } else break; } if (comment != NULL) prior_comment = comment; /* remove any trailing blank lines (consecutive \n) from comment */ p1 = tok - 1; p2 = tok - 2; while (*p1 == '\n' && *p2 == '\n') { p1--; p2--; tok--; } comment = bcalloc(Op_comment, 1, sl); comment->source_file = source; comment->memory = make_str_node(tokstart, tok - tokstart, 0); comment->memory->comment_type = flag; return c; } /* split_comment --- split initial comment text into program and function parts */ static void split_comment(void) { char *p; int l; NODE *n; p = comment_to_save->memory->stptr; l = comment_to_save->memory->stlen - 3; /* have at least two comments so split at last blank line (\n\n) */ while (l >= 0) { if (p[l] == '\n' && p[l+1] == '\n') { function_comment = comment_to_save; n = function_comment->memory; function_comment->memory = make_string(p + l + 2, n->stlen - l - 2); /* create program comment */ program_comment = bcalloc(Op_comment, 1, sourceline); program_comment->source_file = comment_to_save->source_file; p[l + 2] = 0; program_comment->memory = make_str_node(p, l + 2, 0); comment_to_save = NULL; freenode(n); break; } else l--; } } /* allow_newline --- allow newline after &&, ||, ? and : */ static void allow_newline(void) { int c; for (;;) { c = nextc(true); if (c == END_FILE) { pushback(); break; } if (c == '#') { if (do_pretty_print && ! do_profile) { /* collect comment byte code iff doing pretty print but not profiling. */ c = get_comment(EOL_COMMENT); } else { while ((c = nextc(false)) != '\n' && c != END_FILE) continue; } if (c == END_FILE) { pushback(); break; } } if (c == '\n') sourceline++; if (! isspace(c)) { pushback(); break; } } } /* newline_eof --- return newline or EOF as needed and adjust variables */ /* * This routine used to be a macro, however GCC 4.6.2 warned about * the result of a computation not being used. Converting to a function * removes the warnings. */ static int newline_eof() { /* NB: a newline at end does not start a source line. */ if (lasttok != NEWLINE) { pushback(); if (do_lint && ! eof_warned) { lintwarn(_("source file does not end in newline")); eof_warned = true; } sourceline++; return NEWLINE; } sourceline--; eof_warned = false; return LEX_EOF; } /* yylex --- Read the input and turn it into tokens. */ static int #ifdef USE_EBCDIC yylex_ebcdic(void) #else yylex(void) #endif { int c; bool seen_e = false; /* These are for numbers */ bool seen_point = false; bool esc_seen; /* for literal strings */ int mid; int base; static bool did_newline = false; char *tokkey; bool inhex = false; bool intlstr = false; AWKNUM d; bool collecting_typed_regexp = false; #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline) #define NEWLINE_EOF newline_eof() yylval = (INSTRUCTION *) NULL; if (lasttok == SUBSCRIPT) { lasttok = 0; return SUBSCRIPT; } if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */ return 0; c = nextc(! want_regexp); if (c == END_SRC) return 0; if (c == END_FILE) return lasttok = NEWLINE_EOF; pushback(); #if defined __EMX__ /* * added for OS/2's extproc feature of cmd.exe * (like #! in BSD sh) */ if (strncasecmp(lexptr, "extproc ", 8) == 0) { while (*lexptr && *lexptr != '\n') lexptr++; } #endif lexeme = lexptr; thisline = NULL; collect_regexp: if (want_regexp) { int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ int b_index = -1; int cur_index = 0; /* * Here is what's ok with brackets: * * [..[..] []] [^]] [.../...] * [...\[...] [...\]...] [...\/...] * * (Remember that all of the above are inside /.../) * * The code for \ handles \[, \] and \/. * * Otherwise, track the first open [ position, and if * an embedded ] occurs, allow it to pass through * if it's right after the first [ or after [^. * * Whew! */ want_regexp = false; tok = tokstart; for (;;) { c = nextc(false); cur_index = tok - tokstart; if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case '[': if (nextc(false) == ':' || in_brack == 0) { in_brack++; if (in_brack == 1) b_index = tok - tokstart; } pushback(); break; case ']': if (in_brack > 0 && (cur_index == b_index + 1 || (cur_index == b_index + 2 && tok[-1] == '^'))) ; /* do nothing */ else { in_brack--; if (in_brack == 0) b_index = -1; } break; case '\\': if ((c = nextc(false)) == END_FILE) { pushback(); yyerror(_("unterminated regexp ends with `\\' at end of file")); goto end_regexp; /* kludge */ } if (c == '\r') /* allow MS-DOS files. bleah */ c = nextc(true); if (c == '\n') { sourceline++; continue; } else { tokadd('\\'); tokadd(c); continue; } break; case '/': /* end of the regexp */ if (in_brack > 0) break; end_regexp: yylval = GET_INSTRUCTION(Op_token); yylval->lextok = estrdup(tokstart, tok - tokstart); if (do_lint) { int peek = nextc(true); pushback(); if (peek == 'i' || peek == 's') { if (source) lintwarn( _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"), source, sourceline, peek); else lintwarn( _("tawk regex modifier `/.../%c' doesn't work in gawk"), peek); } } if (collecting_typed_regexp) { collecting_typed_regexp = false; lasttok = TYPED_REGEXP; } else lasttok = REGEXP; return lasttok; case '\n': pushback(); yyerror(_("unterminated regexp")); goto end_regexp; /* kludge */ case END_FILE: pushback(); yyerror(_("unterminated regexp at end of file")); goto end_regexp; /* kludge */ } tokadd(c); } } retry: /* skipping \r is a hack, but windows is just too pervasive. sigh. */ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r') continue; lexeme = lexptr ? lexptr - 1 : lexptr; thisline = NULL; tok = tokstart; if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case END_SRC: return 0; case END_FILE: return lasttok = NEWLINE_EOF; case '\n': sourceline++; return lasttok = NEWLINE; case '#': /* it's a comment */ if (do_pretty_print && ! do_profile) { /* * Collect comment byte code iff doing pretty print * but not profiling. */ if (lasttok == NEWLINE || lasttok == 0) c = get_comment(FULL_COMMENT); else c = get_comment(EOL_COMMENT); if (c == END_FILE) return lasttok = NEWLINE_EOF; } else { while ((c = nextc(false)) != '\n') { if (c == END_FILE) return lasttok = NEWLINE_EOF; } } sourceline++; return lasttok = NEWLINE; case '@': c = nextc(true); if (c == '/') { want_regexp = true; collecting_typed_regexp = true; goto collect_regexp; } pushback(); at_seen = true; return lasttok = '@'; case '\\': #ifdef RELAXED_CONTINUATION /* * This code purports to allow comments and/or whitespace * after the `\' at the end of a line used for continuation. * Use it at your own risk. We think it's a bad idea, which * is why it's not on by default. */ if (! do_traditional) { /* strip trailing white-space and/or comment */ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r') continue; if (c == '#') { static bool warned = false; if (do_lint && ! warned) { warned = true; lintwarn( _("use of `\\ #...' line continuation is not portable")); } if (do_pretty_print && ! do_profile) c = get_comment(EOL_COMMENT); else { while ((c = nextc(false)) != '\n') if (c == END_FILE) break; } } pushback(); } #endif /* RELAXED_CONTINUATION */ c = nextc(true); if (c == '\r') /* allow MS-DOS files. bleah */ c = nextc(true); if (c == '\n') { sourceline++; goto retry; } else { yyerror(_("backslash not last character on line")); return lasttok = LEX_EOF; } break; case ':': case '?': yylval = GET_INSTRUCTION(Op_cond_exp); if (! do_posix) allow_newline(); return lasttok = c; /* * in_parens is undefined unless we are parsing a print * statement (in_print), but why bother with a check? */ case ')': in_parens--; return lasttok = c; case '(': in_parens++; return lasttok = c; case '$': yylval = GET_INSTRUCTION(Op_field_spec); return lasttok = c; case '{': if (++in_braces == 1) firstline = sourceline; case ';': case ',': case '[': return lasttok = c; case ']': c = nextc(true); pushback(); if (c == '[') { if (do_traditional) fatal(_("multidimensional arrays are a gawk extension")); if (do_lint) lintwarn(_("multidimensional arrays are a gawk extension")); yylval = GET_INSTRUCTION(Op_sub_array); lasttok = ']'; } else { yylval = GET_INSTRUCTION(Op_subscript); lasttok = SUBSCRIPT; /* end of subscripts */ } return ']'; case '*': if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_times); return lasttok = ASSIGNOP; } else if (do_posix) { pushback(); yylval = GET_INSTRUCTION(Op_times); return lasttok = '*'; } else if (c == '*') { /* make ** and **= aliases for ^ and ^= */ static bool did_warn_op = false, did_warn_assgn = false; if (nextc(true) == '=') { if (! did_warn_assgn) { did_warn_assgn = true; if (do_lint) lintwarn(_("POSIX does not allow operator `**='")); if (do_lint_old) warning(_("old awk does not support operator `**='")); } yylval = GET_INSTRUCTION(Op_assign_exp); return ASSIGNOP; } else { pushback(); if (! did_warn_op) { did_warn_op = true; if (do_lint) lintwarn(_("POSIX does not allow operator `**'")); if (do_lint_old) warning(_("old awk does not support operator `**'")); } yylval = GET_INSTRUCTION(Op_exp); return lasttok = '^'; } } pushback(); yylval = GET_INSTRUCTION(Op_times); return lasttok = '*'; case '/': if (nextc(false) == '=') { pushback(); return lasttok = SLASH_BEFORE_EQUAL; } pushback(); yylval = GET_INSTRUCTION(Op_quotient); return lasttok = '/'; case '%': if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_assign_mod); return lasttok = ASSIGNOP; } pushback(); yylval = GET_INSTRUCTION(Op_mod); return lasttok = '%'; case '^': { static bool did_warn_op = false, did_warn_assgn = false; if (nextc(true) == '=') { if (do_lint_old && ! did_warn_assgn) { did_warn_assgn = true; warning(_("operator `^=' is not supported in old awk")); } yylval = GET_INSTRUCTION(Op_assign_exp); return lasttok = ASSIGNOP; } pushback(); if (do_lint_old && ! did_warn_op) { did_warn_op = true; warning(_("operator `^' is not supported in old awk")); } yylval = GET_INSTRUCTION(Op_exp); return lasttok = '^'; } case '+': if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_plus); return lasttok = ASSIGNOP; } if (c == '+') { yylval = GET_INSTRUCTION(Op_symbol); return lasttok = INCREMENT; } pushback(); yylval = GET_INSTRUCTION(Op_plus); return lasttok = '+'; case '!': if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_notequal); return lasttok = RELOP; } if (c == '~') { yylval = GET_INSTRUCTION(Op_nomatch); return lasttok = MATCHOP; } pushback(); yylval = GET_INSTRUCTION(Op_symbol); return lasttok = '!'; case '<': if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_leq); return lasttok = RELOP; } yylval = GET_INSTRUCTION(Op_less); pushback(); return lasttok = '<'; case '=': if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_equal); return lasttok = RELOP; } yylval = GET_INSTRUCTION(Op_assign); pushback(); return lasttok = ASSIGN; case '>': if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_geq); return lasttok = RELOP; } else if (c == '>') { yylval = GET_INSTRUCTION(Op_symbol); yylval->redir_type = redirect_append; return lasttok = IO_OUT; } pushback(); if (in_print && in_parens == 0) { yylval = GET_INSTRUCTION(Op_symbol); yylval->redir_type = redirect_output; return lasttok = IO_OUT; } yylval = GET_INSTRUCTION(Op_greater); return lasttok = '>'; case '~': yylval = GET_INSTRUCTION(Op_match); return lasttok = MATCHOP; case '}': /* * Added did newline stuff. Easier than * hacking the grammar. */ if (did_newline) { did_newline = false; if (--in_braces == 0) lastline = sourceline; return lasttok = c; } did_newline = true; --lexptr; /* pick up } next time */ return lasttok = NEWLINE; case '"': string: esc_seen = false; /* * Allow any kind of junk in quoted string, * so pass false to nextc(). */ while ((c = nextc(false)) != '"') { if (c == '\n') { pushback(); yyerror(_("unterminated string")); return lasttok = LEX_EOF; } if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) && c == '\\') { c = nextc(true); if (c == '\r') /* allow MS-DOS files. bleah */ c = nextc(true); if (c == '\n') { sourceline++; continue; } esc_seen = true; if (! want_source || c != '"') tokadd('\\'); } if (c == END_FILE) { pushback(); yyerror(_("unterminated string")); return lasttok = LEX_EOF; } tokadd(c); } yylval = GET_INSTRUCTION(Op_token); if (want_source) { yylval->lextok = estrdup(tokstart, tok - tokstart); return lasttok = FILENAME; } yylval->opcode = Op_push_i; yylval->memory = make_str_node(tokstart, tok - tokstart, esc_seen ? SCAN : 0); if (intlstr) { yylval->memory->flags |= INTLSTR; intlstr = false; if (do_intl) dumpintlstr(yylval->memory->stptr, yylval->memory->stlen); } return lasttok = YSTRING; case '-': if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_minus); return lasttok = ASSIGNOP; } if (c == '-') { yylval = GET_INSTRUCTION(Op_symbol); return lasttok = DECREMENT; } pushback(); yylval = GET_INSTRUCTION(Op_minus); return lasttok = '-'; case '.': c = nextc(true); pushback(); if (! isdigit(c)) return lasttok = '.'; else c = '.'; /* FALL THROUGH */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* It's a number */ for (;;) { bool gotnumber = false; tokadd(c); switch (c) { case 'x': case 'X': if (do_traditional) goto done; if (tok == tokstart + 2) { int peek = nextc(true); if (isxdigit(peek)) { inhex = true; pushback(); /* following digit */ } else { pushback(); /* x or X */ goto done; } } break; case '.': /* period ends exponent part of floating point number */ if (seen_point || seen_e) { gotnumber = true; break; } seen_point = true; break; case 'e': case 'E': if (inhex) break; if (seen_e) { gotnumber = true; break; } seen_e = true; if ((c = nextc(true)) == '-' || c == '+') { int c2 = nextc(true); if (isdigit(c2)) { tokadd(c); tokadd(c2); } else { pushback(); /* non-digit after + or - */ pushback(); /* + or - */ pushback(); /* e or E */ } } else if (! isdigit(c)) { pushback(); /* character after e or E */ pushback(); /* e or E */ } else { pushback(); /* digit */ } break; case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': case 'D': case 'd': case 'f': case 'F': if (do_traditional || ! inhex) goto done; /* fall through */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; default: done: gotnumber = true; } if (gotnumber) break; c = nextc(true); } pushback(); tokadd('\0'); yylval = GET_INSTRUCTION(Op_push_i); base = 10; if (! do_traditional) { base = get_numbase(tokstart, strlen(tokstart)-1, false); if (do_lint) { if (base == 8) lintwarn("numeric constant `%.*s' treated as octal", (int) strlen(tokstart)-1, tokstart); else if (base == 16) lintwarn("numeric constant `%.*s' treated as hexadecimal", (int) strlen(tokstart)-1, tokstart); } } #ifdef HAVE_MPFR if (do_mpfr) { NODE *r; if (! seen_point && ! seen_e) { r = mpg_integer(); mpg_strtoui(r->mpg_i, tokstart, strlen(tokstart), NULL, base); errno = 0; } else { int tval; r = mpg_float(); tval = mpfr_strtofr(r->mpg_numbr, tokstart, NULL, base, ROUND_MODE); errno = 0; IEEE_FMT(r->mpg_numbr, tval); } yylval->memory = set_profile_text(r, tokstart, strlen(tokstart)-1); return lasttok = YNUMBER; } #endif if (base != 10) d = nondec2awknum(tokstart, strlen(tokstart)-1, NULL); else d = atof(tokstart); yylval->memory = set_profile_text(make_number(d), tokstart, strlen(tokstart) - 1); if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) yylval->memory->flags |= NUMINT; return lasttok = YNUMBER; case '&': if ((c = nextc(true)) == '&') { yylval = GET_INSTRUCTION(Op_and); allow_newline(); return lasttok = LEX_AND; } pushback(); yylval = GET_INSTRUCTION(Op_symbol); return lasttok = '&'; case '|': if ((c = nextc(true)) == '|') { yylval = GET_INSTRUCTION(Op_or); allow_newline(); return lasttok = LEX_OR; } else if (! do_traditional && c == '&') { yylval = GET_INSTRUCTION(Op_symbol); yylval->redir_type = redirect_twoway; return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN); } pushback(); if (in_print && in_parens == 0) { yylval = GET_INSTRUCTION(Op_symbol); yylval->redir_type = redirect_pipe; return lasttok = IO_OUT; } else { yylval = GET_INSTRUCTION(Op_symbol); yylval->redir_type = redirect_pipein; return lasttok = IO_IN; } } if (! is_letter(c)) { yyerror(_("invalid char '%c' in expression"), c); return lasttok = LEX_EOF; } /* * Lots of fog here. Consider: * * print "xyzzy"$_"foo" * * Without the check for ` lasttok != '$' ', this is parsed as * * print "xxyzz" $(_"foo") * * With the check, it is "correctly" parsed as three * string concatenations. Sigh. This seems to be * "more correct", but this is definitely one of those * occasions where the interactions are funny. */ if (! do_traditional && c == '_' && lasttok != '$') { if ((c = nextc(true)) == '"') { intlstr = true; goto string; } pushback(); c = '_'; } /* it's some type of name-type-thing. Find its length. */ tok = tokstart; while (c != END_FILE && is_identchar(c)) { tokadd(c); c = nextc(true); } tokadd('\0'); pushback(); /* See if it is a special token. */ if ((mid = check_special(tokstart)) >= 0) { static int warntab[sizeof(tokentab) / sizeof(tokentab[0])]; int class = tokentab[mid].class; if ((class == LEX_INCLUDE || class == LEX_LOAD || class == LEX_EVAL) && lasttok != '@') goto out; /* allow parameter names to shadow the names of gawk extension built-ins */ if ((tokentab[mid].flags & GAWKX) != 0) { NODE *f; switch (want_param_names) { case FUNC_HEADER: /* in header, defining parameter names */ goto out; case FUNC_BODY: /* in body, name must be in symbol table for it to be a parameter */ if ((f = lookup(tokstart)) != NULL) { if (f->type == Node_builtin_func) break; else goto out; } /* else fall through */ case DONT_CHECK: /* regular code */ break; default: cant_happen(); break; } } if (do_lint) { if ((tokentab[mid].flags & GAWKX) != 0 && (warntab[mid] & GAWKX) == 0) { lintwarn(_("`%s' is a gawk extension"), tokentab[mid].operator); warntab[mid] |= GAWKX; } if ((tokentab[mid].flags & NOT_POSIX) != 0 && (warntab[mid] & NOT_POSIX) == 0) { lintwarn(_("POSIX does not allow `%s'"), tokentab[mid].operator); warntab[mid] |= NOT_POSIX; } } if (do_lint_old && (tokentab[mid].flags & NOT_OLD) != 0 && (warntab[mid] & NOT_OLD) == 0 ) { warning(_("`%s' is not supported in old awk"), tokentab[mid].operator); warntab[mid] |= NOT_OLD; } if ((tokentab[mid].flags & BREAK) != 0) break_allowed++; if ((tokentab[mid].flags & CONTINUE) != 0) continue_allowed++; switch (class) { case LEX_INCLUDE: case LEX_LOAD: want_source = true; break; case LEX_EVAL: if (in_main_context()) goto out; emalloc(tokkey, char *, tok - tokstart + 1, "yylex"); tokkey[0] = '@'; memcpy(tokkey + 1, tokstart, tok - tokstart); yylval = GET_INSTRUCTION(Op_token); yylval->lextok = tokkey; break; case LEX_FUNCTION: case LEX_BEGIN: case LEX_END: case LEX_BEGINFILE: case LEX_ENDFILE: yylval = bcalloc(tokentab[mid].value, 3, sourceline); break; case LEX_FOR: case LEX_WHILE: case LEX_DO: case LEX_SWITCH: if (! do_pretty_print) return lasttok = class; /* fall through */ case LEX_CASE: yylval = bcalloc(tokentab[mid].value, 2, sourceline); break; /* * These must be checked here, due to the LALR nature of the parser, * the rules for continue and break may not be reduced until after * a token that increments the xxx_allowed varibles is seen. Bleah. */ case LEX_CONTINUE: if (! continue_allowed) { error_ln(sourceline, _("`continue' is not allowed outside a loop")); errcount++; } goto make_instruction; case LEX_BREAK: if (! break_allowed) { error_ln(sourceline, _("`break' is not allowed outside a loop or switch")); errcount++; } goto make_instruction; default: make_instruction: yylval = GET_INSTRUCTION(tokentab[mid].value); if (class == LEX_BUILTIN || class == LEX_LENGTH) yylval->builtin_idx = mid; break; } return lasttok = class; } out: tokkey = estrdup(tokstart, tok - tokstart); if (*lexptr == '(') { yylval = bcalloc(Op_token, 2, sourceline); yylval->lextok = tokkey; return lasttok = FUNC_CALL; } else { static bool goto_warned = false; yylval = GET_INSTRUCTION(Op_token); yylval->lextok = tokkey; #define SMART_ALECK 1 if (SMART_ALECK && do_lint && ! goto_warned && strcasecmp(tokkey, "goto") == 0) { goto_warned = true; lintwarn(_("`goto' considered harmful!\n")); } return lasttok = NAME; } #undef GET_INSTRUCTION #undef NEWLINE_EOF } /* It's EBCDIC in a Bison grammar, run for the hills! Or, convert single-character tokens coming out of yylex() from EBCDIC to ASCII values on-the-fly so that the parse tables need not be regenerated for EBCDIC systems. */ #ifdef USE_EBCDIC static int yylex(void) { static char etoa_xlate[256]; static int do_etoa_init = 1; int tok; if (do_etoa_init) { for (tok = 0; tok < 256; tok++) etoa_xlate[tok] = (char) tok; #ifdef HAVE___ETOA_L /* IBM helpfully provides this function. */ __etoa_l(etoa_xlate, sizeof(etoa_xlate)); #else # error "An EBCDIC-to-ASCII translation function is needed for this system" #endif do_etoa_init = 0; } tok = yylex_ebcdic(); if (tok >= 0 && tok <= 0xFF) tok = etoa_xlate[tok]; return tok; } #endif /* USE_EBCDIC */ /* snode --- instructions for builtin functions. Checks for arg. count and supplies defaults where possible. */ static INSTRUCTION * snode(INSTRUCTION *subn, INSTRUCTION *r) { INSTRUCTION *arg; INSTRUCTION *ip; NODE *n; int nexp = 0; int args_allowed; int idx = r->builtin_idx; if (subn != NULL) { INSTRUCTION *tp; for (tp = subn->nexti; tp; tp = tp->nexti) { tp = tp->lasti; nexp++; } assert(nexp > 0); } /* check against how many args. are allowed for this builtin */ args_allowed = tokentab[idx].flags & ARGS; if (args_allowed && (args_allowed & A(nexp)) == 0) { yyerror(_("%d is invalid as number of arguments for %s"), nexp, tokentab[idx].operator); return NULL; } /* special processing for sub, gsub and gensub */ if (tokentab[idx].value == Op_sub_builtin) { const char *operator = tokentab[idx].operator; r->sub_flags = 0; arg = subn->nexti; /* first arg list */ (void) mk_rexp(arg); if (strcmp(operator, "gensub") != 0) { /* sub and gsub */ if (strcmp(operator, "gsub") == 0) r->sub_flags |= GSUB; arg = arg->lasti->nexti; /* 2nd arg list */ if (nexp == 2) { INSTRUCTION *expr; expr = list_create(instruction(Op_push_i)); expr->nexti->memory = set_profile_text(make_number(0.0), "0", 1); (void) mk_expression_list(subn, list_append(expr, instruction(Op_field_spec))); } arg = arg->lasti->nexti; /* third arg list */ ip = arg->lasti; if (ip->opcode == Op_push_i) { if (do_lint) lintwarn(_("%s: string literal as last arg of substitute has no effect"), operator); r->sub_flags |= LITERAL; } else { if (make_assignable(ip) == NULL) yyerror(_("%s third parameter is not a changeable object"), operator); else ip->do_reference = true; } r->expr_count = count_expressions(&subn, false); ip = subn->lasti; (void) list_append(subn, r); /* add after_assign code */ if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { (void) list_append(subn, instruction(Op_var_assign)); subn->lasti->assign_ctxt = Op_sub_builtin; subn->lasti->assign_var = ip->memory->var_assign; } else if (ip->opcode == Op_field_spec_lhs) { (void) list_append(subn, instruction(Op_field_assign)); subn->lasti->assign_ctxt = Op_sub_builtin; subn->lasti->field_assign = (Func_ptr) 0; ip->target_assign = subn->lasti; } else if (ip->opcode == Op_subscript_lhs) { (void) list_append(subn, instruction(Op_subscript_assign)); subn->lasti->assign_ctxt = Op_sub_builtin; } return subn; } else { /* gensub */ r->sub_flags |= GENSUB; if (nexp == 3) { ip = instruction(Op_push_i); ip->memory = set_profile_text(make_number(0.0), "0", 1); (void) mk_expression_list(subn, list_append(list_create(ip), instruction(Op_field_spec))); } r->expr_count = count_expressions(&subn, false); return list_append(subn, r); } } #ifdef HAVE_MPFR /* N.B.: If necessary, add special processing for alternate builtin, below */ if (do_mpfr && tokentab[idx].ptr2) r->builtin = tokentab[idx].ptr2; else #endif r->builtin = tokentab[idx].ptr; /* special case processing for a few builtins */ if (r->builtin == do_length) { if (nexp == 0) { /* no args. Use $0 */ INSTRUCTION *list; r->expr_count = 1; list = list_create(r); (void) list_prepend(list, instruction(Op_field_spec)); (void) list_prepend(list, instruction(Op_push_i)); list->nexti->memory = set_profile_text(make_number(0.0), "0", 1); return list; } else { arg = subn->nexti; if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push) arg->nexti->opcode = Op_push_arg; /* argument may be array */ } } else if (r->builtin == do_isarray || r->builtin == do_typeof) { arg = subn->nexti; if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push) arg->nexti->opcode = Op_push_arg_untyped; /* argument may be untyped */ #ifdef SUPPLY_INTDIV } else if (r->builtin == do_intdiv #ifdef HAVE_MPFR || r->builtin == MPF(intdiv) #endif ) { arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ ip = arg->lasti; if (ip->opcode == Op_push) ip->opcode = Op_push_array; #endif /* SUPPLY_INTDIV */ } else if (r->builtin == do_match) { static bool warned = false; arg = subn->nexti->lasti->nexti; /* 2nd arg list */ (void) mk_rexp(arg); if (nexp == 3) { /* 3rd argument there */ if (do_lint && ! warned) { warned = true; lintwarn(_("match: third argument is a gawk extension")); } if (do_traditional) { yyerror(_("match: third argument is a gawk extension")); return NULL; } arg = arg->lasti->nexti; /* third arg list */ ip = arg->lasti; if (/*ip == arg->nexti && */ ip->opcode == Op_push) ip->opcode = Op_push_array; } } else if (r->builtin == do_split) { arg = subn->nexti->lasti->nexti; /* 2nd arg list */ ip = arg->lasti; if (ip->opcode == Op_push) ip->opcode = Op_push_array; if (nexp == 2) { INSTRUCTION *expr; expr = list_create(instruction(Op_push)); expr->nexti->memory = FS_node; (void) mk_expression_list(subn, expr); } arg = arg->lasti->nexti; n = mk_rexp(arg); if (nexp == 2) n->re_flags |= FS_DFLT; if (nexp == 4) { arg = arg->lasti->nexti; ip = arg->lasti; if (ip->opcode == Op_push) ip->opcode = Op_push_array; } } else if (r->builtin == do_patsplit) { arg = subn->nexti->lasti->nexti; /* 2nd arg list */ ip = arg->lasti; if (ip->opcode == Op_push) ip->opcode = Op_push_array; if (nexp == 2) { INSTRUCTION *expr; expr = list_create(instruction(Op_push)); expr->nexti->memory = FPAT_node; (void) mk_expression_list(subn, expr); } arg = arg->lasti->nexti; n = mk_rexp(arg); if (nexp == 4) { arg = arg->lasti->nexti; ip = arg->lasti; if (ip->opcode == Op_push) ip->opcode = Op_push_array; } } else if (r->builtin == do_close) { static bool warned = false; if (nexp == 2) { if (do_lint && ! warned) { warned = true; lintwarn(_("close: second argument is a gawk extension")); } if (do_traditional) { yyerror(_("close: second argument is a gawk extension")); return NULL; } } } else if (do_intl /* --gen-po */ && r->builtin == do_dcgettext /* dcgettext(...) */ && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */ && (subn->nexti->lasti->memory->flags & STRING) != 0) { /* it's a string constant */ /* ala xgettext, dcgettext("some string" ...) dumps the string */ NODE *str = subn->nexti->lasti->memory; if ((str->flags & INTLSTR) != 0) warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore")); /* don't dump it, the lexer already did */ else dumpintlstr(str->stptr, str->stlen); } else if (do_intl /* --gen-po */ && r->builtin == do_dcngettext /* dcngettext(...) */ && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */ && (subn->nexti->lasti->memory->flags & STRING) != 0 /* it's a string constant */ && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i /* 2nd arg is constant too */ && (subn->nexti->lasti->nexti->lasti->memory->flags & STRING) != 0) { /* it's a string constant */ /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */ NODE *str1 = subn->nexti->lasti->memory; NODE *str2 = subn->nexti->lasti->nexti->lasti->memory; if (((str1->flags | str2->flags) & INTLSTR) != 0) warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore")); else dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen); } else if (r->builtin == do_asort || r->builtin == do_asorti) { arg = subn->nexti; /* 1st arg list */ ip = arg->lasti; if (ip->opcode == Op_push) ip->opcode = Op_push_array; if (nexp >= 2) { arg = ip->nexti; ip = arg->lasti; if (ip->opcode == Op_push) ip->opcode = Op_push_array; } } else if (r->builtin == do_index) { arg = subn->nexti->lasti->nexti; /* 2nd arg list */ ip = arg->lasti; if (ip->opcode == Op_match_rec) fatal(_("index: regexp constant as second argument is not allowed")); } #ifdef ARRAYDEBUG else if (r->builtin == do_adump) { ip = subn->nexti->lasti; if (ip->opcode == Op_push) ip->opcode = Op_push_array; } #endif if (subn != NULL) { r->expr_count = count_expressions(&subn, false); return list_append(subn, r); } r->expr_count = 0; return list_create(r); } /* parms_shadow --- check if parameters shadow globals */ static int parms_shadow(INSTRUCTION *pc, bool *shadow) { int pcount, i; bool ret = false; NODE *func, *fp; char *fname; func = pc->func_body; fname = func->vname; fp = func->fparms; #if 0 /* can't happen, already exited if error ? */ if (fname == NULL || func == NULL) /* error earlier */ return false; #endif pcount = func->param_cnt; if (pcount == 0) /* no args, no problem */ return 0; source = pc->source_file; sourceline = pc->source_line; /* * Use warning() and not lintwarn() so that can warn * about all shadowed parameters. */ for (i = 0; i < pcount; i++) { if (lookup(fp[i].param) != NULL) { warning( _("function `%s': parameter `%s' shadows global variable"), fname, fp[i].param); ret = true; } } *shadow |= ret; return 0; } /* valinfo --- dump var info */ void valinfo(NODE *n, Func_print print_func, FILE *fp) { if (n == Nnull_string) print_func(fp, "uninitialized scalar\n"); else if ((n->flags & REGEX) != 0) print_func(fp, "@/%.*s/\n", n->stlen, n->stptr); else if ((n->flags & STRING) != 0) { pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false); print_func(fp, "\n"); } else if ((n->flags & NUMBER) != 0) { #ifdef HAVE_MPFR if (is_mpg_float(n)) print_func(fp, "%s\n", mpg_fmt("%.17R*g", ROUND_MODE, n->mpg_numbr)); else if (is_mpg_integer(n)) print_func(fp, "%s\n", mpg_fmt("%Zd", n->mpg_i)); else #endif print_func(fp, "%.17g\n", n->numbr); } else print_func(fp, "?? flags %s\n", flags2str(n->flags)); } /* dump_vars --- dump the symbol table */ void dump_vars(const char *fname) { FILE *fp; NODE **vars; if (fname == NULL) fp = stderr; else if (strcmp(fname, "-") == 0) fp = stdout; else if ((fp = fopen(fname, "w")) == NULL) { warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno)); warning(_("sending variable list to standard error")); fp = stderr; } vars = variable_list(); print_vars(vars, fprintf, fp); efree(vars); if (fp != stdout && fp != stderr && fclose(fp) != 0) warning(_("%s: close failed (%s)"), fname, strerror(errno)); } /* dump_funcs --- print all functions */ void dump_funcs() { NODE **funcs; funcs = function_list(true); (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) pp_func, (void *) 0); efree(funcs); } /* shadow_funcs --- check all functions for parameters that shadow globals */ void shadow_funcs() { static int calls = 0; bool shadow = false; NODE **funcs; if (calls++ != 0) fatal(_("shadow_funcs() called twice!")); funcs = function_list(true); (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) parms_shadow, & shadow); efree(funcs); /* End with fatal if the user requested it. */ if (shadow && lintfunc == r_fatal) lintwarn(_("there were shadowed variables.")); } /* mk_function --- finalize function definition node; remove parameters * out of the symbol table. */ static INSTRUCTION * mk_function(INSTRUCTION *fi, INSTRUCTION *def) { NODE *thisfunc; thisfunc = fi->func_body; assert(thisfunc != NULL); if (do_optimize && def->lasti->opcode == Op_pop) { /* tail call which does not return any value. */ INSTRUCTION *t; for (t = def->nexti; t->nexti != def->lasti; t = t->nexti) ; if (t->opcode == Op_func_call && strcmp(t->func_name, thisfunc->vname) == 0) (t + 1)->tail_call = true; } /* add any pre-function comment to start of action for profile.c */ if (function_comment != NULL) { function_comment->source_line = 0; (void) list_prepend(def, function_comment); function_comment = NULL; } /* add an implicit return at end; * also used by 'return' command in debugger */ (void) list_append(def, instruction(Op_push_i)); def->lasti->memory = dupnode(Nnull_string); (void) list_append(def, instruction(Op_K_return)); if (do_pretty_print) (void) list_prepend(def, instruction(Op_exec_count)); /* fi->opcode = Op_func */ (fi + 1)->firsti = def->nexti; (fi + 1)->lasti = def->lasti; (fi + 2)->first_line = fi->source_line; (fi + 2)->last_line = lastline; fi->nexti = def->nexti; bcfree(def); (void) list_append(rule_list, fi + 1); /* debugging */ /* update lint table info */ func_use(thisfunc->vname, FUNC_DEFINE); /* remove params from symbol table */ remove_params(thisfunc); return fi; } /* * install_function: * install function name in the symbol table. * Extra work, build up and install a list of the parameter names. */ static int install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist) { NODE *r, *f; int pcount = 0; r = lookup(fname); if (r != NULL) { error_ln(fi->source_line, _("function name `%s' previously defined"), fname); return -1; } if (plist != NULL) pcount = plist->lasti->param_count + 1; f = install_symbol(fname, Node_func); fi->func_body = f; f->param_cnt = pcount; f->code_ptr = fi; f->fparms = NULL; if (pcount > 0) { char **pnames; pnames = check_params(fname, pcount, plist); /* frees plist */ f->fparms = make_params(pnames, pcount); efree(pnames); install_params(f); } return 0; } /* check_params --- build a list of function parameter names after * making sure that the names are valid and there are no duplicates. */ static char ** check_params(char *fname, int pcount, INSTRUCTION *list) { INSTRUCTION *p, *np; int i, j; char *name; char **pnames; assert(pcount > 0); emalloc(pnames, char **, pcount * sizeof(char *), "check_params"); for (i = 0, p = list->nexti; p != NULL; i++, p = np) { np = p->nexti; name = p->lextok; p->lextok = NULL; if (strcmp(name, fname) == 0) { /* check for function foo(foo) { ... }. bleah. */ error_ln(p->source_line, _("function `%s': can't use function name as parameter name"), fname); } else if (is_std_var(name)) { error_ln(p->source_line, _("function `%s': can't use special variable `%s' as a function parameter"), fname, name); } /* check for duplicate parameters */ for (j = 0; j < i; j++) { if (strcmp(name, pnames[j]) == 0) { error_ln(p->source_line, _("function `%s': parameter #%d, `%s', duplicates parameter #%d"), fname, i + 1, name, j + 1); } } pnames[i] = name; bcfree(p); } bcfree(list); return pnames; } #ifdef HASHSIZE undef HASHSIZE #endif #define HASHSIZE 1021 static struct fdesc { char *name; short used; short defined; short extension; struct fdesc *next; } *ftable[HASHSIZE]; /* func_use --- track uses and definitions of functions */ static void func_use(const char *name, enum defref how) { struct fdesc *fp; int len; int ind; len = strlen(name); ind = hash(name, len, HASHSIZE, NULL); for (fp = ftable[ind]; fp != NULL; fp = fp->next) if (strcmp(fp->name, name) == 0) goto update_value; /* not in the table, fall through to allocate a new one */ ezalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use"); emalloc(fp->name, char *, len + 1, "func_use"); strcpy(fp->name, name); fp->next = ftable[ind]; ftable[ind] = fp; update_value: if (how == FUNC_DEFINE) fp->defined++; else if (how == FUNC_EXT) { fp->defined++; fp->extension++; } else fp->used++; } /* track_ext_func --- add an extension function to the table */ void track_ext_func(const char *name) { func_use(name, FUNC_EXT); } /* check_funcs --- verify functions that are called but not defined */ static void check_funcs() { struct fdesc *fp, *next; int i; if (! in_main_context()) goto free_mem; for (i = 0; i < HASHSIZE; i++) { for (fp = ftable[i]; fp != NULL; fp = fp->next) { #ifdef REALLYMEAN /* making this the default breaks old code. sigh. */ if (fp->defined == 0 && ! fp->extension) { error( _("function `%s' called but never defined"), fp->name); errcount++; } #else if (do_lint && fp->defined == 0 && ! fp->extension) lintwarn( _("function `%s' called but never defined"), fp->name); #endif if (do_lint && fp->used == 0 && ! fp->extension) { lintwarn(_("function `%s' defined but never called directly"), fp->name); } } } free_mem: /* now let's free all the memory */ for (i = 0; i < HASHSIZE; i++) { for (fp = ftable[i]; fp != NULL; fp = next) { next = fp->next; efree(fp->name); efree(fp); } ftable[i] = NULL; } } /* param_sanity --- look for parameters that are regexp constants */ static void param_sanity(INSTRUCTION *arglist) { INSTRUCTION *argl, *arg; int i = 1; if (arglist == NULL) return; for (argl = arglist->nexti; argl; ) { arg = argl->lasti; if (arg->opcode == Op_match_rec) warning_ln(arg->source_line, _("regexp constant for parameter #%d yields boolean value"), i); argl = arg->nexti; i++; } } /* variable --- make sure NAME is in the symbol table */ NODE * variable(int location, char *name, NODETYPE type) { NODE *r; if ((r = lookup(name)) != NULL) { if (r->type == Node_func || r->type == Node_ext_func ) error_ln(location, _("function `%s' called with space between name and `(',\nor used as a variable or an array"), r->vname); } else { /* not found */ return install_symbol(name, type); } efree(name); return r; } /* make_regnode --- make a regular expression node */ NODE * make_regnode(int type, NODE *exp) { NODE *n; assert(type == Node_regex || type == Node_dynregex); getnode(n); memset(n, 0, sizeof(NODE)); n->type = type; n->re_cnt = 1; if (type == Node_regex) { n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, false); if (n->re_reg[0] == NULL) { freenode(n); return NULL; } n->re_reg[1] = make_regexp(exp->stptr, exp->stlen, true, true, false); if (n->re_reg[1] == NULL) { refree(n->re_reg[0]); freenode(n); return NULL; } n->re_exp = exp; n->re_flags = CONSTANT; } return n; } /* mk_rexp --- make a regular expression constant */ static NODE * mk_rexp(INSTRUCTION *list) { INSTRUCTION *ip; ip = list->nexti; if (ip == list->lasti && ip->opcode == Op_match_rec) ip->opcode = Op_push_re; else if (ip == list->lasti && ip->opcode == Op_push_re) ; /* do nothing --- @/.../ */ else { ip = instruction(Op_push_re); ip->memory = make_regnode(Node_dynregex, NULL); ip->nexti = list->lasti->nexti; list->lasti->nexti = ip; list->lasti = ip; } return ip->memory; } #ifndef NO_LINT /* isnoeffect --- when used as a statement, has no side effects */ static int isnoeffect(OPCODE type) { switch (type) { case Op_times: case Op_times_i: case Op_quotient: case Op_quotient_i: case Op_mod: case Op_mod_i: case Op_plus: case Op_plus_i: case Op_minus: case Op_minus_i: case Op_subscript: case Op_concat: case Op_exp: case Op_exp_i: case Op_unary_minus: case Op_field_spec: case Op_and_final: case Op_or_final: case Op_equal: case Op_notequal: case Op_less: case Op_greater: case Op_leq: case Op_geq: case Op_match: case Op_nomatch: case Op_match_rec: case Op_not: case Op_in_array: return true; default: break; /* keeps gcc -Wall happy */ } return false; } #endif /* NO_LINT */ /* make_assignable --- make this operand an assignable one if posiible */ static INSTRUCTION * make_assignable(INSTRUCTION *ip) { switch (ip->opcode) { case Op_push: ip->opcode = Op_push_lhs; return ip; case Op_field_spec: ip->opcode = Op_field_spec_lhs; return ip; case Op_subscript: ip->opcode = Op_subscript_lhs; return ip; default: break; /* keeps gcc -Wall happy */ } return NULL; } /* stopme --- for debugging */ NODE * stopme(int nargs ATTRIBUTE_UNUSED) { return make_number(0.0); } /* dumpintlstr --- write out an initial .po file entry for the string */ static void dumpintlstr(const char *str, size_t len) { char *cp; /* See the GNU gettext distribution for details on the file format */ if (source != NULL) { /* ala the gettext sources, remove leading `./'s */ for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2) continue; printf("#: %s:%d\n", cp, sourceline); } printf("msgid "); pp_string_fp(fprintf, stdout, str, len, '"', true); putchar('\n'); printf("msgstr \"\"\n\n"); fflush(stdout); } /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */ static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2) { char *cp; /* See the GNU gettext distribution for details on the file format */ if (source != NULL) { /* ala the gettext sources, remove leading `./'s */ for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2) continue; printf("#: %s:%d\n", cp, sourceline); } printf("msgid "); pp_string_fp(fprintf, stdout, str1, len1, '"', true); putchar('\n'); printf("msgid_plural "); pp_string_fp(fprintf, stdout, str2, len2, '"', true); putchar('\n'); printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n"); fflush(stdout); } /* mk_binary --- instructions for binary operators */ static INSTRUCTION * mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op) { INSTRUCTION *ip1,*ip2; AWKNUM res; ip2 = s2->nexti; if (s2->lasti == ip2 && ip2->opcode == Op_push_i) { /* do any numeric constant folding */ ip1 = s1->nexti; if (do_optimize && ip1 == s1->lasti && ip1->opcode == Op_push_i && (ip1->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0 && (ip2->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0 ) { NODE *n1 = ip1->memory, *n2 = ip2->memory; res = force_number(n1)->numbr; (void) force_number(n2); switch (op->opcode) { case Op_times: res *= n2->numbr; break; case Op_quotient: if (n2->numbr == 0.0) { /* don't fatalize, allow parsing rest of the input */ error_ln(op->source_line, _("division by zero attempted")); goto regular; } res /= n2->numbr; break; case Op_mod: if (n2->numbr == 0.0) { /* don't fatalize, allow parsing rest of the input */ error_ln(op->source_line, _("division by zero attempted in `%%'")); goto regular; } #ifdef HAVE_FMOD res = fmod(res, n2->numbr); #else /* ! HAVE_FMOD */ (void) modf(res / n2->numbr, &res); res = n1->numbr - res * n2->numbr; #endif /* ! HAVE_FMOD */ break; case Op_plus: res += n2->numbr; break; case Op_minus: res -= n2->numbr; break; case Op_exp: res = calc_exp(res, n2->numbr); break; default: goto regular; } op->opcode = Op_push_i; // We don't need to call set_profile_text() here since // optimizing is disabled when doing pretty printing. op->memory = make_number(res); unref(n1); unref(n2); bcfree(ip1); bcfree(ip2); bcfree(s1); bcfree(s2); return list_create(op); } else { /* do basic arithmetic optimisation */ /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */ switch (op->opcode) { case Op_times: op->opcode = Op_times_i; break; case Op_quotient: op->opcode = Op_quotient_i; break; case Op_mod: op->opcode = Op_mod_i; break; case Op_plus: op->opcode = Op_plus_i; break; case Op_minus: op->opcode = Op_minus_i; break; case Op_exp: op->opcode = Op_exp_i; break; default: goto regular; } op->memory = ip2->memory; bcfree(ip2); bcfree(s2); /* Op_list */ return list_append(s1, op); } } regular: /* append lists s1, s2 and add `op' bytecode */ (void) list_merge(s1, s2); return list_append(s1, op); } /* mk_boolean --- instructions for boolean and, or */ static INSTRUCTION * mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op) { INSTRUCTION *tp; OPCODE opc, final_opc; opc = op->opcode; /* Op_and or Op_or */ final_opc = (opc == Op_or) ? Op_or_final : Op_and_final; add_lint(right, LINT_assign_in_cond); tp = left->lasti; if (tp->opcode != final_opc) { /* x || y */ list_append(right, instruction(final_opc)); add_lint(left, LINT_assign_in_cond); (void) list_append(left, op); left->lasti->target_jmp = right->lasti; /* NB: target_stmt points to previous Op_and(Op_or) in a chain; * target_stmt only used in the parser (see below). */ left->lasti->target_stmt = left->lasti; right->lasti->target_stmt = left->lasti; } else { /* optimization for x || y || z || ... */ INSTRUCTION *ip; op->opcode = final_opc; (void) list_append(right, op); op->target_stmt = tp; tp->opcode = opc; tp->target_jmp = op; /* update jump targets */ for (ip = tp->target_stmt; ; ip = ip->target_stmt) { assert(ip->opcode == opc); assert(ip->target_jmp == tp); /* if (ip->opcode == opc && ip->target_jmp == tp) */ ip->target_jmp = op; if (ip->target_stmt == ip) break; } } return list_merge(left, right); } /* mk_condition --- if-else and conditional */ static INSTRUCTION * mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch, INSTRUCTION *elsep, INSTRUCTION *false_branch) { /* * ---------------- * cond * ---------------- * t: [Op_jmp_false f ] * ---------------- * true_branch * * ---------------- * [Op_jmp y] * ---------------- * f: * false_branch * ---------------- * y: [Op_no_op] * ---------------- */ INSTRUCTION *ip; bool setup_else_part = true; if (false_branch == NULL) { false_branch = list_create(instruction(Op_no_op)); if (elsep == NULL) { /* else { } */ setup_else_part = false; } } else { /* assert(elsep != NULL); */ /* avoid a series of no_op's: if .. else if .. else if .. */ if (false_branch->lasti->opcode != Op_no_op) (void) list_append(false_branch, instruction(Op_no_op)); } if (setup_else_part) { if (do_pretty_print) { (void) list_prepend(false_branch, elsep); false_branch->nexti->branch_end = false_branch->lasti; (void) list_prepend(false_branch, instruction(Op_exec_count)); } else bcfree(elsep); } (void) list_prepend(false_branch, instruction(Op_jmp)); false_branch->nexti->target_jmp = false_branch->lasti; add_lint(cond, LINT_assign_in_cond); ip = list_append(cond, instruction(Op_jmp_false)); ip->lasti->target_jmp = false_branch->nexti->nexti; if (do_pretty_print) { (void) list_prepend(ip, ifp); (void) list_append(ip, instruction(Op_exec_count)); ip->nexti->branch_if = ip->lasti; ip->nexti->branch_else = false_branch->nexti; } else bcfree(ifp); if (true_branch != NULL) list_merge(ip, true_branch); return list_merge(ip, false_branch); } enum defline { FIRST_LINE, LAST_LINE }; /* find_line -- find the first(last) line in a list of (pattern) instructions */ static int find_line(INSTRUCTION *pattern, enum defline what) { INSTRUCTION *ip; int lineno = 0; for (ip = pattern->nexti; ip; ip = ip->nexti) { if (what == LAST_LINE) { if (ip->source_line > lineno) lineno = ip->source_line; } else { /* FIRST_LINE */ if (ip->source_line > 0 && (lineno == 0 || ip->source_line < lineno)) lineno = ip->source_line; } if (ip == pattern->lasti) break; } assert(lineno > 0); return lineno; } /* append_rule --- pattern-action instructions */ static INSTRUCTION * append_rule(INSTRUCTION *pattern, INSTRUCTION *action) { /* * ---------------- * pattern * ---------------- * [Op_jmp_false f ] * ---------------- * action * ---------------- * f: [Op_no_op ] * ---------------- */ INSTRUCTION *rp; INSTRUCTION *tp; INSTRUCTION *ip; if (rule != Rule) { rp = pattern; if (do_pretty_print) (void) list_append(action, instruction(Op_no_op)); (rp + 1)->firsti = action->nexti; (rp + 1)->lasti = action->lasti; (rp + 2)->first_line = pattern->source_line; (rp + 2)->last_line = lastline; if (block_comment != NULL) { ip = list_prepend(list_prepend(action, block_comment), rp); block_comment = NULL; } else ip = list_prepend(action, rp); } else { rp = bcalloc(Op_rule, 3, 0); rp->in_rule = Rule; rp->source_file = source; tp = instruction(Op_no_op); if (pattern == NULL) { /* assert(action != NULL); */ if (do_pretty_print) (void) list_prepend(action, instruction(Op_exec_count)); (rp + 1)->firsti = action->nexti; (rp + 1)->lasti = tp; (rp + 2)->first_line = firstline; (rp + 2)->last_line = lastline; rp->source_line = firstline; ip = list_prepend(list_append(action, tp), rp); } else { (void) list_append(pattern, instruction(Op_jmp_false)); pattern->lasti->target_jmp = tp; (rp + 2)->first_line = find_line(pattern, FIRST_LINE); rp->source_line = (rp + 2)->first_line; if (action == NULL) { (rp + 2)->last_line = find_line(pattern, LAST_LINE); action = list_create(instruction(Op_K_print_rec)); if (do_pretty_print) (void) list_prepend(action, instruction(Op_exec_count)); } else (rp + 2)->last_line = lastline; if (do_pretty_print) { (void) list_prepend(pattern, instruction(Op_exec_count)); (void) list_prepend(action, instruction(Op_exec_count)); } (rp + 1)->firsti = action->nexti; (rp + 1)->lasti = tp; ip = list_append( list_merge(list_prepend(pattern, rp), action), tp); } } list_append(rule_list, rp + 1); if (rule_block[rule] == NULL) rule_block[rule] = ip; else (void) list_merge(rule_block[rule], ip); return rule_block[rule]; } /* mk_assignment --- assignment bytecodes */ static INSTRUCTION * mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op) { INSTRUCTION *tp; INSTRUCTION *ip; tp = lhs->lasti; switch (tp->opcode) { case Op_field_spec: tp->opcode = Op_field_spec_lhs; break; case Op_subscript: tp->opcode = Op_subscript_lhs; break; case Op_push: case Op_push_array: tp->opcode = Op_push_lhs; break; case Op_field_assign: yyerror(_("cannot assign a value to the result of a field post-increment expression")); break; default: yyerror(_("invalid target of assignment (opcode %s)"), opcode2str(tp->opcode)); break; } tp->do_reference = (op->opcode != Op_assign); /* check for uninitialized reference */ if (rhs != NULL) ip = list_merge(rhs, lhs); else ip = lhs; (void) list_append(ip, op); if (tp->opcode == Op_push_lhs && tp->memory->type == Node_var && tp->memory->var_assign ) { tp->do_reference = false; /* no uninitialized reference checking * for a special variable. */ (void) list_append(ip, instruction(Op_var_assign)); ip->lasti->assign_var = tp->memory->var_assign; } else if (tp->opcode == Op_field_spec_lhs) { (void) list_append(ip, instruction(Op_field_assign)); ip->lasti->field_assign = (Func_ptr) 0; tp->target_assign = ip->lasti; } else if (tp->opcode == Op_subscript_lhs) { (void) list_append(ip, instruction(Op_subscript_assign)); } return ip; } /* optimize_assignment --- peephole optimization for assignment */ static INSTRUCTION * optimize_assignment(INSTRUCTION *exp) { INSTRUCTION *i1, *i2, *i3; /* * Optimize assignment statements array[subs] = x; var = x; $n = x; * string concatenation of the form s = s t. * * 1) Array element assignment array[subs] = x: * Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop * with single instruction Op_store_sub. * Limitation: 1 dimension and sub is simple var/value. * * 2) Simple variable assignment var = x: * Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var. * * 3) Field assignment $n = x: * Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop * with Op_store_field. * * 4) Optimization for string concatenation: * For cases like x = x y, uses realloc to include y in x; * also eliminates instructions Op_push_lhs and Op_pop. */ /* * N.B.: do not append Op_pop instruction to the returned * instruction list if optimized. None of these * optimized instructions pushes the r-value of assignment * onto the runtime stack. */ i2 = NULL; i1 = exp->lasti; if ( i1->opcode != Op_assign && i1->opcode != Op_field_assign) return list_append(exp, instruction(Op_pop)); for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) { switch (i2->opcode) { case Op_concat: if (i2->nexti->opcode == Op_push_lhs /* l.h.s is a simple variable */ && (i2->concat_flag & CSVAR) != 0 /* 1st exp in r.h.s is a simple variable; * see Op_concat in the grammer above. */ && i2->nexti->memory == exp->nexti->memory /* and the same as in l.h.s */ && i2->nexti->nexti == i1 && i1->opcode == Op_assign ) { /* s = s ... optimization */ /* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x); * check for l-value reference to this variable in the r.h.s. * Also, avoid function calls in general to guard against * global variable assignment. */ for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) { if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory) || i3->opcode == Op_func_call) return list_append(exp, instruction(Op_pop)); /* no optimization */ } /* remove the variable from r.h.s */ i3 = exp->nexti; exp->nexti = i3->nexti; bcfree(i3); if (--i2->expr_count == 1) /* one less expression in Op_concat */ i2->opcode = Op_no_op; i3 = i2->nexti; assert(i3->opcode == Op_push_lhs); i3->opcode = Op_assign_concat; /* change Op_push_lhs to Op_assign_concat */ i3->nexti = NULL; bcfree(i1); /* Op_assign */ exp->lasti = i3; /* update Op_list */ return exp; } break; case Op_field_spec_lhs: if (i2->nexti->opcode == Op_assign && i2->nexti->nexti == i1 && i1->opcode == Op_field_assign ) { /* $n = .. */ i2->opcode = Op_store_field; bcfree(i2->nexti); /* Op_assign */ i2->nexti = NULL; bcfree(i1); /* Op_field_assign */ exp->lasti = i2; /* update Op_list */ return exp; } break; case Op_push_array: if (i2->nexti->nexti->opcode == Op_subscript_lhs) { i3 = i2->nexti->nexti; if (i3->sub_count == 1 && i3->nexti == i1 && i1->opcode == Op_assign ) { /* array[sub] = .. */ i3->opcode = Op_store_sub; i3->memory = i2->memory; i3->expr_count = 1; /* sub_count shadows memory, * so use expr_count instead. */ i3->nexti = NULL; i2->opcode = Op_no_op; bcfree(i1); /* Op_assign */ exp->lasti = i3; /* update Op_list */ return exp; } } break; case Op_push_lhs: if (i2->nexti == i1 && i1->opcode == Op_assign ) { /* var = .. */ i2->opcode = Op_store_var; i2->nexti = NULL; bcfree(i1); /* Op_assign */ exp->lasti = i2; /* update Op_list */ i3 = exp->nexti; if (i3->opcode == Op_push_i && (i3->memory->flags & INTLSTR) == 0 && i3->nexti == i2 ) { /* constant initializer */ i2->initval = i3->memory; bcfree(i3); exp->nexti = i2; } else i2->initval = NULL; return exp; } break; default: break; } } /* no optimization */ return list_append(exp, instruction(Op_pop)); } /* mk_getline --- make instructions for getline */ static INSTRUCTION * mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype) { INSTRUCTION *ip; INSTRUCTION *tp; INSTRUCTION *asgn = NULL; /* * getline [var] < [file] * * [ file (simp_exp)] * [ [ var ] ] * [ Op_K_getline_redir|NULL|redir_type|into_var] * [ [var_assign] ] * */ if (redir == NULL) { int sline = op->source_line; bcfree(op); op = bcalloc(Op_K_getline, 2, sline); (op + 1)->target_endfile = ip_endfile; (op + 1)->target_beginfile = ip_beginfile; } if (var != NULL) { tp = make_assignable(var->lasti); assert(tp != NULL); /* check if we need after_assign bytecode */ if (tp->opcode == Op_push_lhs && tp->memory->type == Node_var && tp->memory->var_assign ) { asgn = instruction(Op_var_assign); asgn->assign_ctxt = op->opcode; asgn->assign_var = tp->memory->var_assign; } else if (tp->opcode == Op_field_spec_lhs) { asgn = instruction(Op_field_assign); asgn->assign_ctxt = op->opcode; asgn->field_assign = (Func_ptr) 0; /* determined at run time */ tp->target_assign = asgn; } else if (tp->opcode == Op_subscript_lhs) { asgn = instruction(Op_subscript_assign); asgn->assign_ctxt = op->opcode; } if (redir != NULL) { ip = list_merge(redir, var); (void) list_append(ip, op); } else ip = list_append(var, op); } else if (redir != NULL) ip = list_append(redir, op); else ip = list_create(op); op->into_var = (var != NULL); op->redir_type = (redir != NULL) ? redirtype : redirect_none; return (asgn == NULL ? ip : list_append(ip, asgn)); } /* mk_for_loop --- for loop bytecodes */ static INSTRUCTION * mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond, INSTRUCTION *incr, INSTRUCTION *body) { /* * ------------------------ * init (may be NULL) * ------------------------ * x: * cond (Op_no_op if NULL) * ------------------------ * [ Op_jmp_false tb ] * ------------------------ * body (may be NULL) * ------------------------ * tc: * incr (may be NULL) * [ Op_jmp x ] * ------------------------ * tb:[ Op_no_op ] */ INSTRUCTION *ip, *tbreak, *tcont; INSTRUCTION *jmp; INSTRUCTION *pp_cond; INSTRUCTION *ret; tbreak = instruction(Op_no_op); if (cond != NULL) { add_lint(cond, LINT_assign_in_cond); pp_cond = cond->nexti; ip = cond; (void) list_append(ip, instruction(Op_jmp_false)); ip->lasti->target_jmp = tbreak; } else { pp_cond = instruction(Op_no_op); ip = list_create(pp_cond); } if (init != NULL) ip = list_merge(init, ip); if (do_pretty_print) { (void) list_append(ip, instruction(Op_exec_count)); (forp + 1)->forloop_cond = pp_cond; (forp + 1)->forloop_body = ip->lasti; } if (body != NULL) (void) list_merge(ip, body); jmp = instruction(Op_jmp); jmp->target_jmp = pp_cond; if (incr == NULL) tcont = jmp; else { tcont = incr->nexti; (void) list_merge(ip, incr); } (void) list_append(ip, jmp); ret = list_append(ip, tbreak); fix_break_continue(ret, tbreak, tcont); if (do_pretty_print) { forp->target_break = tbreak; forp->target_continue = tcont; ret = list_prepend(ret, forp); } /* else forp is NULL */ return ret; } /* add_lint --- add lint warning bytecode if needed */ static void add_lint(INSTRUCTION *list, LINTTYPE linttype) { #ifndef NO_LINT INSTRUCTION *ip; switch (linttype) { case LINT_assign_in_cond: ip = list->lasti; if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) { assert(ip != list->nexti); for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti) ; } if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) { list_append(list, instruction(Op_lint)); list->lasti->lint_type = linttype; } break; case LINT_no_effect: if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) { int line = 0; // Get down to the last instruction (FIXME: why?) for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti) { // along the way track line numbers, we will use the line // closest to the opcode if that opcode doesn't have one if (ip->source_line != 0) line = ip->source_line; } if (do_lint) { /* compile-time warning */ if (isnoeffect(ip->opcode)) { if (ip->source_line != 0) line = ip->source_line; lintwarn_ln(line, ("statement may have no effect")); } } if (ip->opcode == Op_push) { /* run-time warning */ list_append(list, instruction(Op_lint)); list->lasti->lint_type = linttype; } } break; default: break; } #endif } /* mk_expression_list --- list of bytecode lists */ static INSTRUCTION * mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1) { INSTRUCTION *r; /* we can't just combine all bytecodes, since we need to * process individual expressions for a few builtins in snode() (-: */ /* -- list of lists */ /* [Op_list| ... ]------ * | * [Op_list| ... ] -- | * ... | | * ... <------- | * [Op_list| ... ] -- | * ... | | * ... | | * ... <------- -- */ assert(s1 != NULL && s1->opcode == Op_list); if (list == NULL) { list = instruction(Op_list); list->nexti = s1; list->lasti = s1->lasti; return list; } /* append expression to the end of the list */ r = list->lasti; r->nexti = s1; list->lasti = s1->lasti; return list; } /* count_expressions --- fixup expression_list from mk_expression_list. * returns no of expressions in list. isarg is true * for function arguments. */ static int count_expressions(INSTRUCTION **list, bool isarg) { INSTRUCTION *expr; INSTRUCTION *r = NULL; int count = 0; if (*list == NULL) /* error earlier */ return 0; for (expr = (*list)->nexti; expr; ) { INSTRUCTION *t1, *t2; t1 = expr->nexti; t2 = expr->lasti; if (isarg && t1 == t2 && t1->opcode == Op_push) t1->opcode = Op_push_param; if (++count == 1) r = expr; else (void) list_merge(r, expr); expr = t2->nexti; } assert(count > 0); if (! isarg && count > max_args) max_args = count; bcfree(*list); *list = r; return count; } /* fix_break_continue --- fix up break & continue codes in loop bodies */ static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target) { INSTRUCTION *ip; list->lasti->nexti = NULL; /* just to make sure */ for (ip = list->nexti; ip != NULL; ip = ip->nexti) { switch (ip->opcode) { case Op_K_break: if (ip->target_jmp == NULL) ip->target_jmp = b_target; break; case Op_K_continue: if (ip->target_jmp == NULL) ip->target_jmp = c_target; break; default: /* this is to keep the compiler happy. sheesh. */ break; } } } static inline INSTRUCTION * list_create(INSTRUCTION *x) { INSTRUCTION *l; l = instruction(Op_list); l->nexti = x; l->lasti = x; return l; } static inline INSTRUCTION * list_append(INSTRUCTION *l, INSTRUCTION *x) { #ifdef GAWKDEBUG if (l->opcode != Op_list) cant_happen(); #endif l->lasti->nexti = x; l->lasti = x; return l; } static inline INSTRUCTION * list_prepend(INSTRUCTION *l, INSTRUCTION *x) { #ifdef GAWKDEBUG if (l->opcode != Op_list) cant_happen(); #endif x->nexti = l->nexti; l->nexti = x; return l; } static inline INSTRUCTION * list_merge(INSTRUCTION *l1, INSTRUCTION *l2) { #ifdef GAWKDEBUG if (l1->opcode != Op_list) cant_happen(); if (l2->opcode != Op_list) cant_happen(); #endif l1->lasti->nexti = l2->nexti; l1->lasti = l2->lasti; bcfree(l2); return l1; } /* add_pending_comment --- add a pending comment to a statement */ static inline INSTRUCTION * add_pending_comment(INSTRUCTION *stmt) { INSTRUCTION *ret = stmt; if (prior_comment != NULL) { if (function_comment != prior_comment) ret = list_append(stmt, prior_comment); prior_comment = NULL; } else if (comment != NULL && comment->memory->comment_type == EOL_COMMENT) { if (function_comment != comment) ret = list_append(stmt, comment); comment = NULL; } return ret; } /* See if name is a special token. */ int check_special(const char *name) { int low, high, mid; int i; int non_standard_flags = 0; #ifdef USE_EBCDIC static bool did_sort = false; if (! did_sort) { qsort((void *) tokentab, sizeof(tokentab) / sizeof(tokentab[0]), sizeof(tokentab[0]), tokcompare); did_sort = true; } #endif if (do_traditional) non_standard_flags |= GAWKX; if (do_posix) non_standard_flags |= NOT_POSIX; low = 0; high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1; while (low <= high) { mid = (low + high) / 2; i = *name - tokentab[mid].operator[0]; if (i == 0) i = strcmp(name, tokentab[mid].operator); if (i < 0) /* token < mid */ high = mid - 1; else if (i > 0) /* token > mid */ low = mid + 1; else { if ((tokentab[mid].flags & non_standard_flags) != 0) return -1; return mid; } } return -1; } /* * This provides a private version of functions that act like VMS's * variable-length record filesystem, where there was a bug on * certain source files. */ static FILE *fp = NULL; /* read_one_line --- return one input line at a time. mainly for debugging. */ static ssize_t read_one_line(int fd, void *buffer, size_t count) { char buf[BUFSIZ]; /* Minor potential memory leak here. Too bad. */ if (fp == NULL) { fp = fdopen(fd, "r"); if (fp == NULL) { fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno)); gawk_exit(EXIT_FAILURE); } } if (fgets(buf, sizeof buf, fp) == NULL) return 0; memcpy(buffer, buf, strlen(buf)); return strlen(buf); } /* one_line_close --- close the open file being read with read_one_line() */ static int one_line_close(int fd) { int ret; if (fp == NULL || fd != fileno(fp)) fatal("debugging read/close screwed up!"); ret = fclose(fp); fp = NULL; return ret; } /* lookup_builtin --- find a builtin function or return NULL */ builtin_func_t lookup_builtin(const char *name) { int mid = check_special(name); if (mid == -1) return NULL; switch (tokentab[mid].class) { case LEX_BUILTIN: case LEX_LENGTH: break; default: return NULL; } /* And another special case... */ if (tokentab[mid].value == Op_sub_builtin) return (builtin_func_t) do_sub; #ifdef HAVE_MPFR if (do_mpfr) return tokentab[mid].ptr2; #endif return tokentab[mid].ptr; } /* install_builtins --- add built-in functions to FUNCTAB */ void install_builtins(void) { int i, j; int flags_that_must_be_clear = DEBUG_USE; if (do_traditional) flags_that_must_be_clear |= GAWKX; if (do_posix) flags_that_must_be_clear |= NOT_POSIX; j = sizeof(tokentab) / sizeof(tokentab[0]); for (i = 0; i < j; i++) { if ( (tokentab[i].class == LEX_BUILTIN || tokentab[i].class == LEX_LENGTH) && (tokentab[i].flags & flags_that_must_be_clear) == 0) { (void) install_symbol(tokentab[i].operator, Node_builtin_func); } } } /* * 9/2014: Gawk cannot use isalpha or isalnum when * parsing the program since that can let through non-English * letters. So, we supply our own. !@#$%^&*()-ing locales! */ /* is_alpha --- return true if c is an English letter */ /* * The scene of the murder was grisly to look upon. When the inspector * arrived, the sergeant turned to him and said, "Another programmer stabbed * in the back. He never knew what happened." * * The inspector replied, "Looks like the MO of isalpha, and his even meaner * big brother, isalnum. The Locale brothers." The sergeant merely * shuddered in horror. */ bool is_alpha(int c) { #ifdef I_DONT_KNOW_WHAT_IM_DOING return isalpha(c); #else /* ! I_DONT_KNOW_WHAT_IM_DOING */ switch (c) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': return true; } return false; #endif /* ! I_DONT_KNOW_WHAT_IM_DOING */ } /* is_alnum --- return true for alphanumeric, English only letters */ bool is_alnum(int c) { /* digit test is good for EBCDIC too. so there. */ return (is_alpha(c) || ('0' <= c && c <= '9')); } /* * is_letter --- function to check letters * isalpha() isn't good enough since it can look at the locale. * Underscore counts as a letter in awk identifiers */ bool is_letter(int c) { return (is_alpha(c) || c == '_'); } /* is_identchar --- return true if c can be in an identifier */ bool is_identchar(int c) { return (is_alnum(c) || c == '_'); } /* set_profile_text --- make a number that can be printed when profiling */ static NODE * set_profile_text(NODE *n, const char *str, size_t len) { if (do_pretty_print) { // two extra bytes: one for NUL termination, and another in // case we need to add a leading minus sign in add_sign_to_num emalloc(n->stptr, char *, len + 2, "set_profile_text"); memcpy(n->stptr, str, len); n->stptr[len] = '\0'; n->stlen = len; // Set STRCUR and n->stfmt for use when profiling // (i.e., actually running the program) so that // force_string() on this item will work ok. // Thanks and a tip of the hatlo to valgrind. n->flags |= (NUMCONSTSTR|STRCUR); n->stfmt = STFMT_UNUSED; #ifdef HAVE_MPFR n->strndmode = MPFR_round_mode; #endif } return n; }