/* Copyright (C) 1995 Bjoern Beutel. */
/* Description. =============================================================*/
/* This module parses Malaga rule files. */
/* Includes. ================================================================*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <setjmp.h>
#include <glib.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "symbols.h"
#include "patterns.h"
#include "files.h"
#include "scanner.h"
#include "rule_type.h"
#include "rule_code.h"
#include "rule_symbols.h"
#include "hangul.h"
#include "rule_parser.h"
/* Types. ===================================================================*/
typedef struct /* Information about a variable, for list assignments. */
{
list_node_t *next;
int_t var_index, path_index;
} var_node_t;
typedef struct /* A pointer to an instruction to be patched. */
{
list_node_t *next;
instr_t *instr;
} instr_node_t;
typedef struct /* Chains of instructions to patch in a condition. */
{
list_t true_jumps; /* Instrs that jump if condition is TRUE. */
list_t false_jumps; /* Instrs that jump if condition is FALSE. */
instr_t *last_jump; /* The last jump (not included above). */
} condition_t;
typedef struct /* An open scope. */
{
list_node_t *next;
int_t initial_index; /* Index of the stack as the scope was opened. */
list_t var_name_list; /* The names of all variables in the scope. */
} scope_node_t;
typedef struct /* A label for a "foreach" statement. */
{
list_node_t *next;
string_t name; /* Label of this loop or NULL. */
list_t repeat_jumps; /* Pointers to instrs that repeat this loop. */
list_t out_jumps; /* Pointers to instrs that jump out of this loop. */
} loop_node_t;
typedef struct /* A name in a list of names. */
{
list_node_t *next;
string_t name;
} name_node_t;
typedef struct /* A rule index in a list of indexes. */
{
list_node_t *next;
int_t index;
} rule_node_t;
typedef enum {VALUE, CONST_VALUE, CONDITION} expr_type_t;
/* The type of a Malaga expression. */
/* Variables. ===============================================================*/
/* File name and line number at the beginning of the last statement. */
static int_t last_statement_line;
static string_t last_statement_file_name;
static rule_type_t rule_type; /* Type of rule that is to be parsed. */
static list_t scope_list; /* List of variables currently defined. */
static list_t loop_list; /* List of loops currently open. */
static struct /* The pattern variables defined in a "matches" condition. */
{
int_t count; /* Number of pattern variables.
* Will be reset when variables are defined. */
string_t name[ PATTERN_VAR_MAX ]; /* Names of the pattern variables. */
} pattern_vars;
/* Forward declarations. ====================================================*/
static expr_type_t parse_value_local( condition_t *condition );
static expr_type_t parse_expression( condition_t *condition );
static void parse_statements( void );
/* Support for source line associations. ====================================*/
static void
new_source_line( void )
/* Associate the next instructions with the current source line. */
{
src_line_t src_line;
if (current_line_number() != last_statement_line
|| current_file_name() != last_statement_file_name)
{
src_line.file = pool_index( code.string_pool, current_file_name() );
src_line.line = current_line_number();
src_line.instr = code.instr_count;
copy_to_pool( code.src_line_pool, &src_line, 1, NULL );
last_statement_line = current_line_number();
last_statement_file_name = current_file_name();
}
}
/*---------------------------------------------------------------------------*/
static void
no_source_line( void )
/* Associate the next instructions generated with no source. */
{
src_line_t src_line;
if (last_statement_line != -1 || last_statement_file_name != NULL)
{
src_line.file = -1;
src_line.line = -1;
src_line.instr = code.instr_count;
copy_to_pool( code.src_line_pool, &src_line, 1, NULL );
last_statement_line = -1;
last_statement_file_name = NULL;
}
}
/* Support for parsing conditions. ==========================================*/
static void
add_instr_to_list( list_t *instr_list, instr_t *instr )
/* Add instruction INSTR to INSTR_LIST. */
{
instr_node_t *instr_node;
instr_node = new_node( instr_list, sizeof( instr_node_t ), LIST_END );
instr_node->instr = instr;
}
/*---------------------------------------------------------------------------*/
static void
patch_instr( instr_t *instr, int_t info )
/* Patch *INSTR to have its info set to INFO. */
{
if (info >= INSTR_INFO_MAX)
complain( "Internal error." );
*instr = INSTR( OPCODE( *instr ), info );
}
/*---------------------------------------------------------------------------*/
static void
patch_instr_list( list_t *instr_list, int_t info )
/* Patch all instructions in INSTR_LIST to have their info set to INFO.
* INSTR_LIST is freed by this function. */
{
instr_node_t *instr_node;
FOREACH_FREE( instr_node, *instr_list )
patch_instr( instr_node->instr, info );
}
/*---------------------------------------------------------------------------*/
static void
negate_jump( instr_t *jump )
/* If *JUMP is a conditional jump, negate its condition. */
{
int_t opcode;
opcode = OPCODE( *jump );
switch (opcode)
{
case INS_JUMP_IF_EQUAL:
opcode = INS_JUMP_IF_NOT_EQUAL;
break;
case INS_JUMP_IF_NOT_EQUAL:
opcode = INS_JUMP_IF_EQUAL;
break;
case INS_JUMP_IF_CONGR:
opcode = INS_JUMP_IF_NOT_CONGR;
break;
case INS_JUMP_IF_NOT_CONGR:
opcode = INS_JUMP_IF_CONGR;
break;
case INS_JUMP_IF_IN:
opcode = INS_JUMP_IF_NOT_IN;
break;
case INS_JUMP_IF_NOT_IN:
opcode = INS_JUMP_IF_IN;
break;
case INS_JUMP_IF_LESS:
opcode = INS_JUMP_IF_NOT_LESS;
break;
case INS_JUMP_IF_NOT_LESS:
opcode = INS_JUMP_IF_LESS;
break;
case INS_JUMP_IF_GREATER:
opcode = INS_JUMP_IF_NOT_GREATER;
break;
case INS_JUMP_IF_NOT_GREATER:
opcode = INS_JUMP_IF_GREATER;
break;
case INS_JUMP_IF_NULL:
opcode = INS_JUMP_IF_NOT_NULL;
break;
case INS_JUMP_IF_NOT_NULL:
opcode = INS_JUMP_IF_NULL;
break;
case INS_JUMP_IF_YES:
opcode = INS_JUMP_IF_NO;
break;
case INS_JUMP_IF_NO:
opcode = INS_JUMP_IF_YES;
break;
default:
break;
}
*jump = INSTR( opcode, INSTR_INFO( *jump ) );
}
/* Variable scopes. =========================================================*/
static void
open_scope( void )
/* Open a new scope. */
{
scope_node_t *scope_node;
scope_node = new_node( &scope_list, sizeof( scope_node_t ), LIST_START );
scope_node->initial_index = code.stack_index;
clear_list( &scope_node->var_name_list );
}
/*---------------------------------------------------------------------------*/
static void
close_scope( bool_t do_pop )
/* Close the current scope. Emit a pop instruction that resets
* the stack index to the initial stack index if DO_POP is TRUE. */
{
scope_node_t *scope_node;
name_node_t *name_node;
scope_node = (scope_node_t *) scope_list.first;
FOREACH_FREE( name_node, scope_node->var_name_list )
undefine_variable( name_node->name );
/* See if we have to pop to reach the start index again. */
if (do_pop && code.stack_index > scope_node->initial_index)
{
no_source_line();
emit_instr( INS_POP, code.stack_index - scope_node->initial_index );
}
/* Reset stack index. */
code.stack_index = scope_node->initial_index;
free_first_node( &scope_list );
}
/*---------------------------------------------------------------------------*/
static void
define_var_in_scope( string_t name, int_t var_index )
/* Define a variable and add it to the current scope. */
{
name_node_t *name_node;
scope_node_t *scope_node;
scope_node = (scope_node_t *) scope_list.first;
name_node = new_node( &scope_node->var_name_list, sizeof( name_node_t ),
LIST_END );
name_node->name = define_variable( name, var_index );
}
/* Simple Malaga parse functions. ===========================================*/
static void
parse_var_name( string_t *var_name )
/* Parse variable name and allocate memory to save its name in *VAR_NAME. */
{
test_token( TOK_VARIABLE );
*var_name = new_string( token_name, NULL );
read_next_token();
}
/*---------------------------------------------------------------------------*/
static int_t
parse_rule_name( list_t *rule_list )
/* Parse a rule name which must not be part of RULE_LIST. */
{
rule_node_t *rule_node;
rule_t *rule;
int_t rule_number;
test_token( TOK_IDENT );
rule = find_combi_rule( token_name, &rule_number,
current_line_number(), current_file_name() );
/* Check if we already have this rule in this rule set. */
FOREACH( rule_node, *rule_list )
{
if (rule_node->index == rule_number)
complain( "Rule \"%s\" twice in rule set.", token_name );
}
read_next_token();
return rule_number;
}
/*---------------------------------------------------------------------------*/
static void
parse_rule_set( int_t *rule_set_index )
/* Parse a list of rule names and enter it into CODE.RULE_SET_POOL.
* Return the index of the rule set in *RULE_SET_INDEX. */
{
list_t rule_list;
rule_node_t *rule_node;
bool_t has_parentheses;
int_t i, rule_count;
int_t *rules;
rule_count = 0;
clear_list( &rule_list );
parse_token( TOK_RULES );
has_parentheses = (next_token == '(');
if (has_parentheses)
parse_token( '(' );
while (TRUE)
{
while (TRUE)
{
i = parse_rule_name( &rule_list );
rule_node = new_node( &rule_list, sizeof( rule_node_t ), LIST_END );
rule_node->index = i;
rule_count++;
if (next_token != ',')
break;
read_next_token();
}
if (next_token != TOK_ELSE)
break;
read_next_token();
rule_node = new_node( &rule_list, sizeof( rule_node_t ), LIST_END );
rule_node->index = -2;
rule_count++;
}
if (has_parentheses)
parse_token( ')' );
rule_node = new_node( &rule_list, sizeof( rule_node_t ), LIST_END );
rule_node->index = -1;
rule_count++;
rules = get_pool_space( code.rule_set_pool, rule_count, rule_set_index );
i = 0;
FOREACH_FREE( rule_node, rule_list )
rules[ i++ ] = rule_node->index;
}
/* Functions to parse Malaga values. ========================================*/
/* Functions to parse a value do not always emit instructions immediately.
* Instead, they sometimes write instructions in a buffer,
* so they can do constant folding.
* Use the functions "parse_value", "parse_constant_value"
* and "parse_condition" to get values or conditions. */
/*---------------------------------------------------------------------------*/
static void
convert_to_condition( expr_type_t *type, condition_t *condition )
/* Make sure an expression is a condition. */
{
if (*type != CONDITION)
{
clear_list( &condition->true_jumps );
clear_list( &condition->false_jumps );
condition->last_jump = emit_instr( INS_JUMP_IF_YES, 0 );
*type = CONDITION;
}
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_value_no_flush( void )
/* Parse any value. Convert conditions into yes/no symbols. Do *not* flush the
* instruction buffer. */
{
expr_type_t type;
condition_t condition;
int_t pattern_var_count;
pattern_var_count = pattern_vars.count;
type = parse_value_local( &condition );
if (pattern_vars.count > pattern_var_count)
complain( "Variable definition not allowed in values." );
if (type == CONDITION)
{
negate_jump( condition.last_jump );
patch_instr_list( &condition.true_jumps, code.instr_count );
emit_instr( INS_PUSH_SYMBOL, YES_SYMBOL );
emit_instr( INS_JUMP, code.instr_count + 2 );
code.stack_index--;
patch_instr( condition.last_jump, code.instr_count );
patch_instr_list( &condition.false_jumps, code.instr_count );
emit_instr( INS_PUSH_SYMBOL, NO_SYMBOL );
return VALUE;
}
else
return type;
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_value( void )
/* Parse any value. Convert conditions into yes/no symbols. Flush the
* instruction buffer. */
{
expr_type_t type;
type = parse_value_no_flush();
flush_buffer();
return type;
}
/*---------------------------------------------------------------------------*/
static void
parse_constant_value( int_t *value_index )
/* Parse a constant value and save its index in *VALUE_INDEX. */
{
expr_type_t type;
condition_t condition;
type = parse_value_local( &condition );
if (type != CONST_VALUE)
complain( "Constant value expected." );
copy_value_to_pool( code.value_pool, pop_buffer_top_value(), value_index );
}
/*---------------------------------------------------------------------------*/
static void
parse_condition( condition_t *condition )
/* Parse any condition and set CONDITION. */
{
expr_type_t type;
type = parse_expression( condition );
convert_to_condition( &type, condition );
}
/*---------------------------------------------------------------------------*/
static void
parse_subrule_call( string_t ident )
/* Parse a subrule call.
* The subrule name IDENT has already been parsed. */
{
int_t param_count, rule_number;
rule_t *rule;
rule = find_subrule( ident, &rule_number, current_line_number(),
current_file_name());
parse_token( '(' );
param_count = 0;
if (next_token != ')')
{
while (TRUE)
{
parse_value();
param_count++;
if (next_token != ',')
break;
read_next_token();
}
}
parse_token( ')' );
if (rule == NULL)
{
/* Call a standard function. */
switch (rule_number)
{
case FUNC_TO_ATOMS:
case FUNC_IS_CAPITAL:
case FUNC_GET_LENGTH:
case FUNC_TO_MULTI:
case FUNC_TO_SET:
case FUNC_GET_SWITCH:
case FUNC_GET_VALUE_TYPE:
case FUNC_GET_VALUE_STRING:
case FUNC_TRANSMIT:
case FUNC_FLOOR:
if (param_count != 1)
complain( "Function \"%s\" takes one parameter.", ident );
break;
case FUNC_SUBSTRING:
if (param_count < 2 || param_count > 3)
complain( "Function \"%s\" takes 2 or 3 parameters.", ident );
if (param_count == 2)
emit_instr( INS_PUSH_NULL, 1 );
break;
}
emit_instr( INS_STD_FUNCTION, rule_number );
}
else
{
/* Call a real subrule. */
if (rule->param_count == -1)
rule->param_count = param_count;
else if (param_count != rule->param_count)
complain( "\"%s\" takes %d parameters.", ident, rule->param_count );
code.stack_index -= param_count; /* Subrule jump kills all parameters. */
emit_instr( INS_JUMP_SUBRULE, rule_number );
}
}
/*---------------------------------------------------------------------------*/
static void
parse_if_expression( void )
/* Parse an "if" expression. */
{
condition_t condition;
list_t end_jumps;
list_t else_jumps;
instr_t *end_jump;
int_t pattern_var_count;
clear_list( &else_jumps );
clear_list( &end_jumps );
test_token( TOK_IF );
do
{
read_next_token();
/* Jump here if previous conditions were false. */
patch_instr_list( &else_jumps, code.instr_count );
pattern_var_count = pattern_vars.count;
parse_condition( &condition );
if (pattern_vars.count > pattern_var_count)
complain( "No variable definitions allowed in if-expressions." );
parse_token( TOK_THEN );
/* Fall through if condition is true. */
else_jumps = condition.false_jumps;
negate_jump( condition.last_jump );
add_instr_to_list( &else_jumps, condition.last_jump );
patch_instr_list( &condition.true_jumps, code.instr_count );
parse_value();
code.stack_index--;
/* Emit jump to end. */
end_jump = emit_instr( INS_JUMP, 0 );
add_instr_to_list( &end_jumps, end_jump );
} while (next_token == TOK_ELSEIF);
parse_token( TOK_ELSE );
patch_instr_list( &else_jumps, code.instr_count );
parse_value();
parse_token( TOK_END );
if (next_token == TOK_IF)
read_next_token();
patch_instr_list( &end_jumps, code.instr_count );
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_simple_value( condition_t *condition )
/* Parse a simple value or a condition in parentheses.
* If CONSTANT is TRUE, the value must be constant.
* Any jumps are returned in CONDITION. */
{
int_t i; /* Number of values in list or record. */
expr_type_t type, elem_type; /* Result type of expression. */
string_t ident, var_name;
int_t var_index;
switch (next_token)
{
case '<':
/* Parse a list. */
read_next_token();
type = CONST_VALUE;
i = 0;
if (next_token != '>')
{
while (TRUE)
{
elem_type = parse_value_no_flush();
if (elem_type == VALUE)
type = VALUE;
i++;
if (next_token != ',')
break;
read_next_token();
}
}
parse_token( '>' );
buffer_instr( INS_BUILD_LIST, i );
return type;
case '[':
/* Parse a record. */
read_next_token();
type = CONST_VALUE;
i = 0;
if (next_token != ']')
{
while (TRUE)
{
elem_type = parse_value_local( condition );
if (elem_type == CONDITION)
complain( "No conditions allowed as attribute names." );
else if (elem_type == VALUE)
type = VALUE;
parse_token( ':' );
elem_type = parse_value_no_flush();
if (elem_type == VALUE)
type = VALUE;
i++;
if (next_token != ',')
break;
read_next_token();
}
}
parse_token( ']' );
buffer_instr( INS_BUILD_RECORD, i );
return type;
case TOK_IDENT:
ident = new_string( token_name, NULL );
read_next_token();
if (next_token == '(')
{
/* Parse a subrule call. */
parse_subrule_call( ident );
type = VALUE;
}
else
{
buffer_instr( INS_PUSH_SYMBOL, find_symbol( ident ) );
type = CONST_VALUE;
}
free_mem( &ident );
return type;
case TOK_STRING:
encode_hangul( &token_string );
buffer_push_string_instr( token_string, NULL );
read_next_token();
return CONST_VALUE;
case TOK_NUMBER:
buffer_push_number_instr( token_number );
read_next_token();
return CONST_VALUE;
case TOK_CONSTANT:
buffer_instr( INS_PUSH_CONST, find_constant( token_name ) );
read_next_token();
return CONST_VALUE;
case TOK_VARIABLE:
parse_var_name( &var_name );
var_index = find_variable( var_name );
emit_instr( INS_PUSH_VAR, var_index );
free_mem( &var_name );
return VALUE;
case '(':
read_next_token();
type = parse_expression( condition );
parse_token( ')' );
return type;
case TOK_IF:
parse_if_expression();
return VALUE;
default:
complain( "Value expected, not %s.", token_as_text( next_token ) );
}
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_unary_value( condition_t *condition )
/* Parse a value that may be prefixed by a unary "-". */
{
expr_type_t type;
if (next_token == '-')
{
read_next_token();
type = parse_simple_value( condition );
if (type == CONDITION)
complain( "Conditions not allowed after unary \"-\"." );
buffer_instr( INS_UNARY_MINUS_OP, 0 );
}
else
type = parse_simple_value( condition );
return type;
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_dotted_value( condition_t *condition )
/* Parse a value and a sequence of following ".IDENT" or ".NUMBER". */
{
expr_type_t type, elem_type;
type = parse_unary_value( condition );
if (type == CONDITION)
return type;
while (next_token == '.')
{
read_next_token();
elem_type = parse_unary_value( condition );
if (elem_type == CONDITION)
complain( "Conditions not allowed after \".\"." );
else if (elem_type == VALUE)
type = VALUE;
if (elem_type == CONST_VALUE
&& get_value_type( get_buffer_top_value() ) == SYMBOL_SYMBOL)
{
buffer_instr( INS_GET_ATTRIBUTE,
value_to_symbol( pop_buffer_top_value() ) );
}
else
buffer_instr( INS_DOT_OPERATION, 0 );
}
return type;
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_term_value( condition_t *condition )
/* Parse a value that may contain the "*" and the "/" operator. */
{
expr_type_t type, elem_type;
int_t operator_token;
type = parse_dotted_value( condition );
if (type == CONDITION)
return type;
while (next_token == '*' || next_token == '/')
{
operator_token = next_token;
read_next_token();
elem_type = parse_dotted_value( condition );
if (elem_type == CONDITION)
complain( "Conditions not allowed after \"*\" and \"/\"." );
else if (elem_type == VALUE)
type = VALUE;
if (operator_token == '*')
buffer_instr( INS_ASTERISK_OPERATION, 0 );
else
buffer_instr( INS_SLASH_OPERATION, 0 );
}
return type;
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_value_local( condition_t *condition )
/* Parse any value.
* The code is not necessarily emitted, it may still be in the buffer.
* Use "parse_value" to get real code. */
{
expr_type_t type, elem_type;
int_t operator_token;
type = parse_term_value( condition );
if (type == CONDITION)
return type;
while (next_token == '+' || next_token == '-')
{
operator_token = next_token;
read_next_token();
if (operator_token == '-')
{
elem_type = parse_term_value( condition );
if (elem_type == CONDITION)
complain( "Conditions not allowed after \"-\"." );
else if (elem_type == VALUE)
type = VALUE;
if (elem_type == CONST_VALUE
&& get_value_type( get_buffer_top_value() ) == SYMBOL_SYMBOL)
{
buffer_instr( INS_REMOVE_ATTRIBUTE,
value_to_symbol( pop_buffer_top_value() ) );
}
else
buffer_instr( INS_MINUS_OPERATION, 0 );
}
else /* operator_token == '+' */
{
elem_type = parse_term_value( condition );
if (elem_type == CONDITION)
complain( "Conditions not allowed after \"+\"." );
else if (elem_type == VALUE)
type = VALUE;
buffer_instr( INS_PLUS_OPERATION, 0 );
}
}
return type;
}
/*---------------------------------------------------------------------------*/
static void parse_string_or_var( int_t *var_number, string_t *string )
/* Parse a constant string or a variable name and return it in
* *VAR_NUMBER or *STRING. */
{
condition_t condition;
expr_type_t type;
if (next_token == TOK_VARIABLE)
{
if (*var_number != -1)
complain( "Constant string expected." );
if (pattern_vars.count == PATTERN_VAR_MAX)
complain( "Too many variables in pattern." );
parse_var_name( pattern_vars.name + pattern_vars.count );
*var_number = pattern_vars.count++;
}
else
{
if (*string != NULL)
complain( "Variable expected." );
type = parse_value_local( &condition );
if (type != CONST_VALUE
|| get_value_type( get_buffer_top_value() ) != STRING_SYMBOL)
{
complain( "Constant string expected." );
}
*string = value_to_string( pop_buffer_top_value() );
}
}
/*---------------------------------------------------------------------------*/
static void
parse_pattern( string_t *pattern )
/* Parse a pattern and return it in PATTERN. */
{
int_t var_number;
string_t segment, string;
text_t *text;
text = new_text();
while (TRUE)
{
string = NULL;
var_number = -1;
parse_string_or_var( &var_number, &string );
if (var_number != -1 || next_token == ':')
{
parse_token( ':' );
parse_string_or_var( &var_number, &string );
}
segment = compile_pattern( string, var_number );
add_to_text( text, segment );
free_mem( &segment );
if (next_token != ',')
break;
read_next_token();
}
*pattern = text_to_string( &text );
}
/*---------------------------------------------------------------------------*/
static void
define_pattern_vars( void )
/* Generate code to define the pattern variables PATTERN_VARS. */
{
int_t i;
for (i = 0; i < pattern_vars.count; i++)
{
emit_instr( INS_PUSH_PATTERN_VAR, i );
define_var_in_scope( pattern_vars.name[i], code.stack_index - 1 );
free_mem( &pattern_vars.name[i] );
}
pattern_vars.count = 0;
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_comparison( condition_t *condition )
/* Parse a malaga comparison and set CONDITION accordingly. */
{
int_t opcode;
expr_type_t type, type2;
string_t pattern;
int_t pattern_index;
type = parse_value_local( condition );
if (type == CONDITION)
return type;
switch (next_token)
{
case '=':
opcode = INS_JUMP_IF_EQUAL;
break;
case TOK_NOT_EQUAL:
opcode = INS_JUMP_IF_NOT_EQUAL;
break;
case '~':
opcode = INS_JUMP_IF_CONGR;
break;
case TOK_NOT_CONGRUENT:
opcode = INS_JUMP_IF_NOT_CONGR;
break;
case TOK_IN:
opcode = INS_JUMP_IF_IN;
break;
case TOK_LESS:
opcode = INS_JUMP_IF_LESS;
break;
case TOK_LESS_EQUAL:
opcode = INS_JUMP_IF_NOT_GREATER;
break;
case TOK_GREATER:
opcode = INS_JUMP_IF_GREATER;
break;
case TOK_GREATER_EQUAL:
opcode = INS_JUMP_IF_NOT_LESS;
break;
case TOK_MATCHES:
read_next_token();
if (next_token == '(')
{
read_next_token();
parse_pattern( &pattern );
parse_token( ')' );
}
else
parse_pattern( &pattern );
copy_string_to_pool( code.string_pool, pattern, &pattern_index );
free_mem( &pattern );
emit_instr( INS_MATCH, pattern_index );
clear_list( &condition->true_jumps );
clear_list( &condition->false_jumps );
condition->last_jump = emit_instr( INS_JUMP_IF_YES, 0 );
return CONDITION;
default:
return type;
}
read_next_token();
type2 = parse_value_local( condition );
if (type2 == CONDITION)
complain( "Conditions not allowed in comparisons." );
/* Emit code and produce CONDITION. */
clear_list( &condition->true_jumps );
clear_list( &condition->false_jumps );
condition->last_jump = emit_instr( opcode, 0 );
return CONDITION;
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_simple_condition( condition_t *condition )
/* Parse a malaga comparison that may be negated and return CONDITION. */
{
expr_type_t type;
int_t pattern_var_count;
condition_t condition2;
pattern_var_count = pattern_vars.count;
if (next_token == TOK_NOT)
{
read_next_token();
type = parse_comparison( &condition2 );
if (pattern_vars.count > pattern_var_count)
complain( "No variable definition allowed in negations." );
convert_to_condition( &type, &condition2 );
negate_jump( condition2.last_jump );
condition->last_jump = condition2.last_jump;
condition->true_jumps = condition2.false_jumps;
condition->false_jumps = condition2.true_jumps;
}
else
type = parse_comparison( condition );
return type;
}
/*---------------------------------------------------------------------------*/
static expr_type_t
parse_expression( condition_t *condition )
/* Parse any expression, i.e., a value or a regular condition. */
{
condition_t condition2;
expr_type_t type, type2;
int_t pattern_var_count;
pattern_var_count = pattern_vars.count;
type = parse_simple_condition( condition );
if (next_token == TOK_OR)
{
/* There may be a chain of "or"s behind the first simple condition. */
convert_to_condition( &type, condition );
while (next_token == TOK_OR)
{
read_next_token();
/* All jumps in FALSE_JUMP should point to next instruction. */
patch_instr_list( &condition->false_jumps, code.instr_count );
type2 = parse_simple_condition( &condition2 );
convert_to_condition( &type2, &condition2 );
/* Create a new TRUE_JUMP list. */
combine_lists( &condition->true_jumps, &condition2.true_jumps );
combine_lists( &condition->false_jumps, &condition2.false_jumps );
add_instr_to_list( &condition->true_jumps, condition->last_jump );
condition->last_jump = condition2.last_jump;
}
if (pattern_vars.count > pattern_var_count)
complain( "No variable definition allowed in disjunctions." );
}
else if (next_token == TOK_AND)
{
/* There may be a chain of "and"s behind the first simple condition. */
convert_to_condition( &type, condition );
while (next_token == TOK_AND)
{
read_next_token();
/* All jumps in TRUE_JUMP should point to next instruction. */
patch_instr_list( &condition->true_jumps, code.instr_count );
type2 = parse_simple_condition( &condition2 );
convert_to_condition( &type2, &condition2 );
/* Create a new FALSE_JUMP list. */
negate_jump( condition->last_jump );
combine_lists( &condition->true_jumps, &condition2.true_jumps );
combine_lists( &condition->false_jumps, &condition2.false_jumps );
add_instr_to_list( &condition->false_jumps, condition->last_jump );
condition->last_jump = condition2.last_jump;
}
}
return type;
}
/*---------------------------------------------------------------------------*/
static void
parse_attribute_path( void )
/* Parse a sequence of values separated by a ".",
* it will be returned on the stack as a list. */
{
int_t path_length;
condition_t condition;
expr_type_t type;
path_length = 0;
while (TRUE)
{
type = parse_unary_value( &condition );
if (type == CONDITION)
complain( "Conditions not allowed in paths." );
path_length++;
if (next_token != '.')
break;
read_next_token();
}
/* Emit the instruction that pushes the selection path. */
buffer_instr( INS_BUILD_PATH, path_length );
flush_buffer();
}
/* Parse functions for statements. ==========================================*/
static void
parse_assert_statement( void )
/* Parse an assert statement. */
{
condition_t condition;
new_source_line();
if (next_token != TOK_ASSERT && next_token != '!')
{
complain( "\"!\" or \"assert\" expected, not %s.",
token_as_text( next_token ) );
}
read_next_token();
parse_condition( &condition );
patch_instr_list( &condition.false_jumps, code.instr_count );
emit_instr( INS_SYSTEM_ERROR, ASSERTION_ERROR );
patch_instr_list( &condition.true_jumps, code.instr_count );
patch_instr( condition.last_jump, code.instr_count );
define_pattern_vars();
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_assignment( void )
/* Parse an assignment. */
{
string_t var_name; /* The name of the variable. */
int_t index;
int_t assignment; /* Type of assignment token. */
bool_t has_path;
new_source_line();
parse_var_name( &var_name );
index = find_variable( var_name );
free_mem( &var_name );
has_path = (next_token == '.');
if (has_path)
{
read_next_token();
parse_attribute_path();
}
/* Read the assignment token. */
assignment = next_token;
if (assignment != TOK_ASSIGN
&& assignment != TOK_ASSIGN_PLUS && assignment != TOK_ASSIGN_MINUS
&& assignment != TOK_ASSIGN_ASTERISK && assignment != TOK_ASSIGN_SLASH)
{
complain( "\":=\", \":=+\", \":=-\", \":=*\", or \":=/\" expected, "
"not %s.", token_as_text( next_token ) );
}
read_next_token();
parse_value();
if (has_path)
{
switch (assignment)
{
case TOK_ASSIGN:
emit_instr( INS_SET_VAR_PATH, index );
break;
case TOK_ASSIGN_PLUS:
emit_instr( INS_PLUS_VAR_PATH, index );
break;
case TOK_ASSIGN_MINUS:
emit_instr( INS_MINUS_VAR_PATH, index );
break;
case TOK_ASSIGN_ASTERISK:
emit_instr( INS_ASTERISK_VAR_PATH, index );
break;
case TOK_ASSIGN_SLASH:
emit_instr( INS_SLASH_VAR_PATH, index );
break;
}
}
else
{
switch (assignment)
{
case TOK_ASSIGN:
emit_instr( INS_SET_VAR, index );
break;
case TOK_ASSIGN_PLUS:
emit_instr( INS_PLUS_VAR, index );
break;
case TOK_ASSIGN_MINUS:
emit_instr( INS_MINUS_VAR, index );
break;
case TOK_ASSIGN_ASTERISK:
emit_instr( INS_ASTERISK_VAR, index );
break;
case TOK_ASSIGN_SLASH:
emit_instr( INS_SLASH_VAR, index );
break;
}
}
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_break_statement( void )
/* Parse a "break" statement. */
{
loop_node_t *loop;
new_source_line();
parse_token( TOK_BREAK );
if (next_token == TOK_IDENT)
{
FOREACH( loop, loop_list )
{
if (loop->name != NULL && strcmp_no_case( token_name, loop->name ) == 0)
break;
}
read_next_token();
}
else
loop = (loop_node_t *) loop_list.first;
if (loop == NULL)
complain( "\"break\" must be within a loop." );
add_instr_to_list( &loop->out_jumps, emit_instr( INS_JUMP, 0 ) );
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_choose_statement( void )
/* Parse a "choose" statement. */
{
string_t var_name;
instr_t *patch_p;
int_t label;
int_t var_index;
new_source_line();
parse_token( TOK_CHOOSE );
parse_var_name( &var_name );
parse_token( TOK_IN );
/* Reserve place for variables. */
var_index = code.stack_index;
emit_instr( INS_PUSH_NULL, 1 );
parse_value();
emit_instr( INS_PUSH_VAR, var_index + 1 );
emit_instr( INS_GET_1ST_ELEMENT, 0 );
emit_instr( INS_PUSH_VAR, var_index + 2 );
emit_instr( INS_TERMINATE_IF_NULL, 0 );
label = code.instr_count;
emit_instr( INS_PUSH_VAR, var_index + 2 );
emit_instr( INS_SET_VAR, var_index );
emit_instr( INS_ITERATE, var_index + 2 );
emit_instr( INS_PUSH_VAR, var_index + 2 );
patch_p = emit_instr( INS_JUMP_IF_NULL, 0 );
emit_instr( INS_JUMP_LATER, label );
patch_instr( patch_p, code.instr_count );
define_var_in_scope( var_name, var_index );
free_mem( &var_name );
emit_instr( INS_POP, 2 ); /* Pop list and element of this list. */
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_continue_statement( void )
/* Parse a "continue" statement. */
{
loop_node_t *loop;
new_source_line();
parse_token( TOK_CONTINUE );
if (next_token == TOK_IDENT)
{
FOREACH( loop, loop_list )
{
if (loop->name != NULL && strcmp_no_case( token_name, loop->name ) == 0)
break;
}
read_next_token();
}
else
loop = (loop_node_t *) loop_list.first;
if (loop == NULL)
complain( "\"continue\" must be within a loop." );
add_instr_to_list( &loop->repeat_jumps, emit_instr( INS_JUMP, 0 ) );
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_define_statement( void )
/* Parse a "define" statement. */
{
int_t var_count; /* Number of variables to be assigned. */
list_t var_name_list;
name_node_t *name_node;
int_t base_index;
string_t var_name;
new_source_line();
parse_token( TOK_DEFINE );
if (next_token == '<')
{
/* Parse a list assignment. */
base_index = code.stack_index;
var_count = 0;
clear_list( &var_name_list );
parse_token( '<' );
while (TRUE)
{
name_node = new_node( &var_name_list, sizeof( name_node_t ), LIST_END );
parse_var_name( &name_node->name );
var_count++;
if (next_token != ',')
break;
read_next_token();
}
parse_token( '>' );
parse_token( TOK_ASSIGN );
parse_value();
emit_instr( INS_DECOMPOSE_LIST, var_count );
FOREACH_FREE( name_node, var_name_list )
{
define_var_in_scope( name_node->name, base_index );
free_mem( &name_node->name );
base_index++;
}
}
else
{
/* Parse a regular assignment. */
parse_var_name( &var_name );
parse_token( TOK_ASSIGN );
parse_value();
define_var_in_scope( var_name, code.stack_index - 1 );
free_mem( &var_name );
}
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_foreach_statement( void )
/* Parse a "foreach" statement. */
{
string_t loop_name; /* Name of loop. */
string_t var_name; /* Name of iteration variable. */
int_t label; /* PC to jump to when repeating loop. */
int_t list_index;
loop_node_t *loop;
new_source_line();
if (next_token == TOK_IDENT)
{
FOREACH( loop, loop_list )
{
if (loop->name != NULL && strcmp_no_case( token_name, loop->name ) == 0)
complain( "No nested loops of same name allowed." );
}
loop_name = new_string( token_name, NULL );
read_next_token();
parse_token( ':' );
}
else
loop_name = NULL;
/* Create loop list. */
loop = new_node( &loop_list, sizeof( loop_node_t ), LIST_START );
loop->name = loop_name;
clear_list( &loop->repeat_jumps );
clear_list( &loop->out_jumps );
parse_token( TOK_FOREACH );
parse_var_name( &var_name );
parse_token( TOK_IN );
parse_value();
parse_token( ':' );
list_index = code.stack_index - 1;
emit_instr( INS_PUSH_VAR, list_index );
emit_instr( INS_GET_1ST_ELEMENT, 0 );
/* Emit code to test if loop will be repeated. */
label = code.instr_count;
emit_instr( INS_PUSH_VAR, list_index + 1 );
add_instr_to_list( &loop->out_jumps, emit_instr( INS_JUMP_IF_NULL, 0 ) );
/* Open a new scope, create visible copies of iteration variables,
* parse statements and close scope. */
open_scope();
emit_instr( INS_PUSH_VAR, list_index + 1 );
define_var_in_scope( var_name, code.stack_index - 1 );
parse_statements();
close_scope( TRUE );
parse_token( TOK_END );
if (next_token == TOK_FOREACH)
read_next_token();
parse_token( ';' );
no_source_line();
/* Patch the jumps to repeat the loop. */
if (loop->repeat_jumps.first != NULL)
{
patch_instr_list( &loop->repeat_jumps, code.instr_count );
emit_instr( INS_POP_TO, list_index + 2 );
}
/* Iterate variables. */
emit_instr( INS_ITERATE, list_index + 1 );
emit_instr( INS_JUMP, label );
/* Patch the jumps to exit. */
patch_instr_list( &loop->out_jumps, code.instr_count );
free_first_node( &loop_list );
free_mem( &loop_name );
/* Pop iteration variable and list. */
emit_instr( INS_POP_TO, list_index );
free_mem( &var_name );
}
/*---------------------------------------------------------------------------*/
static void
parse_if_statement( void )
/* Parse an "if" statement. */
{
condition_t condition;
list_t end_jumps;
list_t else_jumps;
instr_t *end_jump;
bool_t need_jump_to_end;
clear_list( &else_jumps );
clear_list( &end_jumps );
need_jump_to_end = FALSE;
test_token( TOK_IF );
do
{
if (need_jump_to_end)
{
/* Emit jump if there is already another branch. */
no_source_line();
end_jump = emit_instr( INS_JUMP, 0 );
add_instr_to_list( &end_jumps, end_jump );
}
read_next_token();
new_source_line();
/* Jump here if previous conditions were false. */
patch_instr_list( &else_jumps, code.instr_count );
parse_condition( &condition );
parse_token( TOK_THEN );
/* Fall through if condition is true. */
else_jumps = condition.false_jumps;
negate_jump( condition.last_jump );
add_instr_to_list( &else_jumps, condition.last_jump );
patch_instr_list( &condition.true_jumps, code.instr_count );
open_scope();
define_pattern_vars();
parse_statements();
close_scope( TRUE );
need_jump_to_end = TRUE;
} while (next_token == TOK_ELSEIF);
if (next_token == TOK_ELSE)
{
/* Emit a jump to the end. */
no_source_line();
end_jump = emit_instr( INS_JUMP, 0 );
add_instr_to_list( &end_jumps, end_jump );
read_next_token();
patch_instr_list( &else_jumps, code.instr_count );
open_scope();
parse_statements();
close_scope( TRUE );
}
else
patch_instr_list( &else_jumps, code.instr_count );
parse_token( TOK_END );
if (next_token == TOK_IF)
read_next_token();
patch_instr_list( &end_jumps, code.instr_count );
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_list_assignment( void )
/* Parse a list assignment. */
{
int_t var_count; /* Number of variables to be assigned. */
int_t path_var_count; /* Number of variables that have a path. */
int_t i;
list_t var_list;
var_node_t *var_node;
string_t var_name;
int_t base_index, path_index, var_index;
base_index = code.stack_index;
new_source_line();
var_count = path_var_count = 0;
clear_list( &var_list );
parse_token( '<' );
while (TRUE)
{
parse_var_name( &var_name );
var_index = find_variable( var_name );
free_mem( &var_name );
if (next_token == '.')
{
read_next_token();
path_index = base_index + path_var_count;
parse_attribute_path();
path_var_count++;
}
else
path_index = -1;
var_node = new_node( &var_list, sizeof( var_node_t ), LIST_END );
var_node->var_index = var_index;
var_node->path_index = path_index;
var_count++;
if (next_token != ',')
break;
read_next_token();
}
parse_token( '>' );
parse_token( TOK_ASSIGN );
parse_value();
emit_instr( INS_DECOMPOSE_LIST, var_count );
for (i = 0; i < var_count; i++)
{
var_node = (var_node_t *) var_list.first;
var_index = var_node->var_index;
path_index = var_node->path_index;
if (path_index != -1)
{
emit_instr( INS_PUSH_VAR, path_index );
emit_instr( INS_PUSH_VAR, base_index + path_var_count + i );
emit_instr( INS_SET_VAR_PATH, var_index );
}
else
{
emit_instr( INS_PUSH_VAR, base_index + path_var_count + i );
emit_instr( INS_SET_VAR, var_index );
}
free_first_node( &var_list );
}
emit_instr( INS_POP, path_var_count + var_count );
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_select_statement( void )
/* Parse a "select" or "parallel" statement. */
{
list_t end_jumps;
instr_t *last_jump;
int_t start_token;
last_jump = NULL;
clear_list( &end_jumps );
if (next_token != TOK_SELECT && next_token != TOK_PARALLEL)
complain( "Missing \"select\" or \"parallel\"." );
start_token = next_token;
do
{
read_next_token(); /* Read over "select, "parallel", "or" or "and". */
if (last_jump != NULL)
{
/* Jump to end and patch JUMP_LATER if there is already a subrule. */
no_source_line();
add_instr_to_list( &end_jumps, emit_instr( INS_JUMP, 0 ) );
patch_instr( last_jump, code.instr_count );
}
else
new_source_line();
last_jump = emit_instr( INS_JUMP_LATER, 0 );
open_scope();
parse_statements();
close_scope( TRUE );
} while ((start_token == TOK_PARALLEL && next_token == TOK_AND)
|| (start_token == TOK_SELECT && next_token == TOK_OR));
/* The last patch is invalid, replace it by an INS_NOP. */
*last_jump = INSTR( INS_NOP, 0 );
patch_instr_list( &end_jumps, code.instr_count );
parse_token( TOK_END );
if (next_token == start_token)
read_next_token();
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_repeat_statement( void )
/* Parse a "repeat" statement. */
{
condition_t condition;
int_t label; /* Label where to jump back. */
int_t values_to_pop; /* Number of values to pop when exiting. */
scope_node_t *scope;
/* No need for "new_source_line" since no code is generated here. */
parse_token( TOK_REPEAT );
label = code.instr_count;
open_scope();
parse_statements();
scope = (scope_node_t *) scope_list.first;
values_to_pop = code.stack_index - scope->initial_index;
new_source_line();
parse_token( TOK_WHILE );
parse_condition( &condition );
parse_token( ';' );
patch_instr_list( &condition.true_jumps, code.instr_count );
define_pattern_vars();
parse_statements();
close_scope( TRUE );
parse_token( TOK_END );
if (next_token == TOK_REPEAT)
read_next_token();
parse_token( ';' );
no_source_line();
emit_instr( INS_JUMP, label );
patch_instr_list( &condition.false_jumps, code.instr_count );
negate_jump( condition.last_jump );
patch_instr( condition.last_jump, code.instr_count );
/* We have to pop manually, since we jumped from midst a scope. */
if (values_to_pop > 0)
emit_instr( INS_POP, values_to_pop );
}
/*---------------------------------------------------------------------------*/
static void
parse_require_statement( void )
/* Parse a "require" statement. */
{
condition_t condition;
new_source_line();
if (next_token != TOK_REQUIRE && next_token != '?')
{
complain( "\"require\" or \"?\" expected, not %s.",
token_as_text( next_token ) );
}
read_next_token();
parse_condition( &condition );
patch_instr_list( &condition.false_jumps, code.instr_count );
emit_instr( INS_TERMINATE, 0 );
patch_instr_list( &condition.true_jumps, code.instr_count );
patch_instr( condition.last_jump, code.instr_count );
define_pattern_vars();
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_result_statement( void )
/* Parse a "result" statement. */
{
int_t rule_set;
new_source_line();
parse_token( TOK_RESULT );
switch (rule_type)
{
case COMBI_RULE:
parse_value();
parse_token( ',' );
if (next_token == TOK_ACCEPT)
{
read_next_token();
emit_instr( INS_ADD_END_STATE, 0 );
}
else
{
parse_rule_set( &rule_set );
emit_instr( INS_ADD_STATE, rule_set );
}
break;
case END_RULE:
parse_value();
parse_token( ',' );
parse_token( TOK_ACCEPT );
emit_instr( INS_ADD_END_STATE, 0 );
break;
case ALLO_RULE:
parse_value();
parse_token( ',' );
parse_value();
emit_instr( INS_ADD_ALLO, 0 );
break;
case ROBUST_RULE:
parse_value();
if (next_token == ',')
{
read_next_token();
parse_value();
emit_instr( INS_ADD_ALLO, 0 );
}
else
emit_instr( INS_ADD_END_STATE, 0 );
break;
case FILTER_RULE:
parse_value();
emit_instr( INS_ADD_END_STATE, 0 );
break;
default:
complain( "\"result\" not allowed in this rule." );
}
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_return_statement( void )
/* Parse a "return" statement. */
{
rule_t *rule;
new_source_line();
parse_token( TOK_RETURN );
parse_value();
if (rule_type == SUBRULE)
{
rule = pool_item( code.rule_pool, code.rule_count );
emit_instr( INS_RETURN, rule->param_count );
}
else if (rule_type == PRUNING_RULE)
emit_instr( INS_ACCEPT, 0 );
else
complain( "\"return\" is only allowed in subrules and pruning rules." );
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_statements( void )
/* Parse a statement sequence. */
{
while (TRUE)
{
switch (next_token)
{
case TOK_ASSERT:
case '!':
parse_assert_statement();
break;
case TOK_VARIABLE:
parse_assignment();
break;
case TOK_BREAK:
parse_break_statement();
break;
case TOK_CHOOSE:
parse_choose_statement();
break;
case TOK_CONTINUE:
parse_continue_statement();
break;
case TOK_DEFINE:
parse_define_statement();
break;
case TOK_ERROR:
new_source_line();
read_next_token();
parse_value();
emit_instr( INS_ERROR, 0 );
parse_token( ';' );
break;
case TOK_FOREACH:
case TOK_IDENT:
parse_foreach_statement();
break;
case TOK_IF:
parse_if_statement();
break;
case '<':
parse_list_assignment();
break;
case TOK_SELECT:
case TOK_PARALLEL:
parse_select_statement();
break;
case TOK_REPEAT:
parse_repeat_statement();
break;
case TOK_REQUIRE:
case '?':
parse_require_statement();
break;
case TOK_RESULT:
parse_result_statement();
break;
case TOK_RETURN:
parse_return_statement();
break;
case TOK_STOP:
new_source_line();
read_next_token();
emit_instr( INS_TERMINATE, 0 );
parse_token( ';' );
break;
default:
return;
}
}
}
/* Parse functions for rules. ===============================================*/
static void
parse_rule( void )
/* Parse a rule. */
{
int_t rule_token;
string_t rule_name, rule_file;
int_t param_count, i, rule_line;
list_t params;
name_node_t *param;
rule_line = current_line_number();
rule_file = current_file_name();
rule_token = next_token;
switch (rule_token)
{
case TOK_ALLO_RULE:
if (code.file_type != ALLO_RULE_FILE)
complain( "\"allo_rule\" only allowed in allo rule files." );
rule_type = ALLO_RULE;
break;
case TOK_COMBI_RULE:
if (code.file_type == ALLO_RULE_FILE)
complain( "\"combi_rule\" not allowed in allo rule files." );
rule_type = COMBI_RULE;
break;
case TOK_END_RULE:
if (code.file_type == ALLO_RULE_FILE)
complain( "\"end_rule\" not allowed in allo rule files." );
rule_type = END_RULE;
break;
case TOK_INPUT_FILTER:
if (code.file_type != SYNTAX_RULE_FILE)
complain( "\"input_filter\" only allowed in syntax rule files." );
rule_type = FILTER_RULE;
break;
case TOK_OUTPUT_FILTER:
rule_type = FILTER_RULE;
break;
case TOK_PRUNING_RULE:
if (code.file_type == ALLO_RULE_FILE)
complain( "\"pruning_rule\" not allowed in allo rule files." );
rule_type = PRUNING_RULE;
break;
case TOK_ROBUST_RULE:
if (code.file_type != MORPHO_RULE_FILE)
complain( "\"robust_rule\" only allowed in morphology rule files." );
rule_type = ROBUST_RULE;
break;
case TOK_SUBRULE:
rule_type = SUBRULE;
break;
}
read_next_token();
/* Remember rule name. */
test_token( TOK_IDENT );
rule_name = new_string( token_name, NULL );
read_next_token();
/* Read parameter list. */
clear_list( ¶ms );
param_count = 0;
parse_token( '(' );
while (next_token != ')')
{
test_token( TOK_VARIABLE );
param = new_node( ¶ms, sizeof( name_node_t ), LIST_END );
param->name = new_string( token_name, NULL );
param_count++;
read_next_token();
if (next_token != ',')
break;
read_next_token();
}
parse_token( ')' );
switch (rule_type)
{
case ALLO_RULE:
case FILTER_RULE:
case PRUNING_RULE:
if (param_count != 1)
complain( "%s takes one parameter.", token_as_text( rule_token ) );
break;
case END_RULE:
case ROBUST_RULE:
if (param_count < 1 || param_count > 2)
complain( "%s takes 1 or 2 parameters.", token_as_text( rule_token ) );
break;
case COMBI_RULE:
if (param_count < 2 || param_count > 4)
complain( "\"combi_rule\" takes 2 to 4 parameters." );
break;
case SUBRULE:
break;
}
code.rule_count = enter_rule( rule_name, code.instr_count, rule_type,
param_count, rule_line, rule_file );
/* Save rule number if this is a special rule. */
switch (rule_token)
{
case TOK_PRUNING_RULE:
if (code.pruning_rule != -1)
complain( "\"pruning_rule\" defined twice." );
code.pruning_rule = code.rule_count;
break;
case TOK_ROBUST_RULE:
if (code.robust_rule != -1)
complain( "\"robust_rule\" defined twice." );
code.robust_rule = code.rule_count;
break;
case TOK_ALLO_RULE:
if (code.allo_rule != -1)
complain( "\"allo_rule\" defined twice." );
code.allo_rule = code.rule_count;
break;
case TOK_INPUT_FILTER:
if (code.input_filter != -1)
complain( "\"input_filter\" defined twice." );
code.input_filter = code.rule_count;
break;
case TOK_OUTPUT_FILTER:
if (code.output_filter != -1)
complain( "\"output_filter\" defined twice." );
code.output_filter = code.rule_count;
break;
}
parse_token( ':' );
/* Parse rule body. */
open_scope();
if (rule_type == SUBRULE)
i = - (param_count + 2);
else
{
i = code.stack_index;
code.stack_index += param_count;
}
FOREACH_FREE( param, params )
{
define_var_in_scope( param->name, i );
i++;
free_mem( ¶m->name );
}
parse_statements();
close_scope( FALSE );
/* Parse rule end. */
new_source_line();
if (rule_type == SUBRULE || rule_type == PRUNING_RULE)
emit_instr( INS_SYSTEM_ERROR, NO_RETURN_ERROR );
else
emit_instr( INS_TERMINATE, 0 );
parse_token( TOK_END );
if (next_token == rule_token)
read_next_token();
if (next_token == TOK_IDENT)
{
if (strcmp_no_case( token_name, rule_name ) != 0)
complain( "\"%s\" expected, not \"%s\".", rule_name, token_name );
read_next_token();
}
free_mem( &rule_name );
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_constant_definition( bool_t fixed )
/* Parse a constant definition. If FIXED == FALSE, define a default value. */
{
string_t const_name;
int_t const_index;
read_next_token();
test_token( TOK_CONSTANT );
const_name = new_string( token_name, NULL );
read_next_token();
parse_token( TOK_ASSIGN );
parse_constant_value( &const_index );
define_constant( const_name, const_index, fixed );
free_mem( &const_name );
parse_token( ';' );
}
/*---------------------------------------------------------------------------*/
static void
parse_rules( void )
/* Parse rules, constant definitions and includes until EOF. */
{
string_t file_name, path;
while (next_token != EOF)
{
if (next_token == TOK_INCLUDE)
{
/* Parse file name and expand to an absolute file name. */
read_next_token();
test_token( TOK_STRING );
path = absolute_path( token_string, current_file_name() );
file_name = copy_string_to_pool( code.string_pool, path, NULL );
free_mem( &path );
read_next_token();
begin_include( file_name );
parse_rules();
end_include();
parse_token( ';' );
}
else if (next_token == TOK_DEFINE || next_token == TOK_DEFAULT)
parse_constant_definition( (next_token == TOK_DEFINE) );
else if (next_token == TOK_INITIAL)
{
if (code.file_type != MORPHO_RULE_FILE
&& code.file_type != SYNTAX_RULE_FILE)
{
complain( "\"initial\" only allowed in combi rule files." );
}
if (code.initial_rule_set != -1)
complain( "Initial state is already defined." );
read_next_token();
parse_constant_value( &code.initial_feat );
parse_token( ',' );
parse_rule_set( &code.initial_rule_set );
parse_token( ';' );
}
else
parse_rule();
}
}
/*---------------------------------------------------------------------------*/
void
parse_rule_file( void )
/* Parse a rule file. */
{
last_statement_line = -1;
last_statement_file_name = NULL;
code.initial_rule_set = -1;
code.robust_rule = code.pruning_rule = -1;
code.allo_rule = code.input_filter = code.output_filter = -1;
parse_rules();
if ((code.file_type == MORPHO_RULE_FILE
|| code.file_type == SYNTAX_RULE_FILE)
&& code.initial_rule_set == -1)
{
complain( "Missing initial state." );
}
else if (code.file_type == ALLO_RULE_FILE && code.allo_rule == -1)
complain( "Missing allo rule." );
}
/* End of file. =============================================================*/