// Copyright (c) 2015-2018 Dr. Colin Hirsch and Daniel Frey // Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ #include #include #include namespace lua53 { // PEGTL grammar for the Lua 5.3.0 lexer and parser. // // The grammar here is not very similar to the grammar // in the Lua reference documentation on which it is based // which is due to multiple causes. // // The main difference is that this grammar includes really // "everything", not just the structural parts from the // reference documentation: // - The PEG-approach combines lexer and parser; this grammar // handles comments and tokenisation. // - The operator precedence and associativity are reflected // in the structure of this grammar. // - All details for all types of literals are included, with // escape-sequences for literal strings, and long literals. // // The second necessary difference is that all left-recursion // had to be eliminated. // // In some places the grammar was optimised to require as little // back-tracking as possible, most prominently for expressions. // The original grammar contains the following production rules: // // prefixexp ::= var | functioncall | ‘(’ exp ‘)’ // functioncall ::= prefixexp args | prefixexp ‘:’ Name args // var ::= Name | prefixexp ‘[’ exp ‘]’ | prefixexp ‘.’ Name // // We need to eliminate the left-recursion, and we also want to // remove the ambiguity between function calls and variables, // i.e. the fact that we can have expressions like // // ( a * b ).c()[ d ].e:f() // // where only the last element decides between function call and // variable, making it necessary to parse the whole thing again // if we chose wrong at the beginning. // First we eliminate prefixexp and obtain: // // functioncall ::= ( var | functioncall | ‘(’ exp ‘)’ ) ( args | ‘:’ Name args ) // var ::= Name | ( var | functioncall | ‘(’ exp ‘)’ ) ( ‘[’ exp ‘]’ | ‘.’ Name ) // // Next we split function_call and variable into a first part, // a "head", or how they can start, and a second part, the "tail", // which, in a sequence like above, is the final deciding part: // // vartail ::= '[' exp ']' | '.' Name // varhead ::= Name | '(' exp ')' vartail // functail ::= args | ':' Name args // funchead ::= Name | '(' exp ')' // // This allows us to rewrite var and function_call as follows. // // var ::= varhead { { functail } vartail } // function_call ::= funchead [ { vartail } functail ] // // Finally we can define a single expression that takes care // of var, function_call, and expressions in a bracket: // // chead ::= '(' exp ')' | Name // combined ::= chead { functail | vartail } // // Such a combined expression starts with a bracketed // expression or a name, and continues with an arbitrary // number of functail and/or vartail parts, all in a one // grammar rule without back-tracking. // // The rule expr_thirteen below implements "combined". // // Another issue of interest when writing a PEG is how to // manage the separators, the white-space and comments that // can occur in many places; in the classical two-stage // lexer-parser approach the lexer would have taken care of // this, but here we use the PEG approach that combines both. // // In the following grammar most rules adopt the convention // that they take care of "internal padding", i.e. spaces // and comments that can occur within the rule, but not // "external padding", i.e. they don't start or end with // a rule that "eats up" all extra padding (spaces and // comments). In some places, where it is more efficient, // right padding is used. namespace pegtl = tao::TAO_PEGTL_NAMESPACE; // clang-format off struct short_comment : pegtl::until< pegtl::eolf > {}; struct long_string : pegtl::raw_string< '[', '=', ']' > {}; struct comment : pegtl::disable< pegtl::two< '-' >, pegtl::sor< long_string, short_comment > > {}; struct sep : pegtl::sor< pegtl::ascii::space, comment > {}; struct seps : pegtl::star< sep > {}; struct str_and : TAO_PEGTL_STRING( "and" ) {}; struct str_break : TAO_PEGTL_STRING( "break" ) {}; struct str_do : TAO_PEGTL_STRING( "do" ) {}; struct str_else : TAO_PEGTL_STRING( "else" ) {}; struct str_elseif : TAO_PEGTL_STRING( "elseif" ) {}; struct str_end : TAO_PEGTL_STRING( "end" ) {}; struct str_false : TAO_PEGTL_STRING( "false" ) {}; struct str_for : TAO_PEGTL_STRING( "for" ) {}; struct str_function : TAO_PEGTL_STRING( "function" ) {}; struct str_goto : TAO_PEGTL_STRING( "goto" ) {}; struct str_if : TAO_PEGTL_STRING( "if" ) {}; struct str_in : TAO_PEGTL_STRING( "in" ) {}; struct str_local : TAO_PEGTL_STRING( "local" ) {}; struct str_nil : TAO_PEGTL_STRING( "nil" ) {}; struct str_not : TAO_PEGTL_STRING( "not" ) {}; struct str_or : TAO_PEGTL_STRING( "or" ) {}; struct str_repeat : TAO_PEGTL_STRING( "repeat" ) {}; struct str_return : TAO_PEGTL_STRING( "return" ) {}; struct str_then : TAO_PEGTL_STRING( "then" ) {}; struct str_true : TAO_PEGTL_STRING( "true" ) {}; struct str_until : TAO_PEGTL_STRING( "until" ) {}; struct str_while : TAO_PEGTL_STRING( "while" ) {}; // Note that 'elseif' precedes 'else' in order to prevent only matching // the "else" part of an "elseif" and running into an error in the // 'keyword' rule. struct str_keyword : pegtl::sor< str_and, str_break, str_do, str_elseif, str_else, str_end, str_false, str_for, str_function, str_goto, str_if, str_in, str_local, str_nil, str_not, str_repeat, str_return, str_then, str_true, str_until, str_while > {}; template< typename Key > struct key : pegtl::seq< Key, pegtl::not_at< pegtl::identifier_other > > {}; struct key_and : key< str_and > {}; struct key_break : key< str_break > {}; struct key_do : key< str_do > {}; struct key_else : key< str_else > {}; struct key_elseif : key< str_elseif > {}; struct key_end : key< str_end > {}; struct key_false : key< str_false > {}; struct key_for : key< str_for > {}; struct key_function : key< str_function > {}; struct key_goto : key< str_goto > {}; struct key_if : key< str_if > {}; struct key_in : key< str_in > {}; struct key_local : key< str_local > {}; struct key_nil : key< str_nil > {}; struct key_not : key< str_not > {}; struct key_or : key< str_or > {}; struct key_repeat : key< str_repeat > {}; struct key_return : key< str_return > {}; struct key_then : key< str_then > {}; struct key_true : key< str_true > {}; struct key_until : key< str_until > {}; struct key_while : key< str_while > {}; struct keyword : key< str_keyword > {}; template< typename R > struct pad : pegtl::pad< R, sep > {}; struct three_dots : pegtl::three< '.' > {}; struct name : pegtl::seq< pegtl::not_at< keyword >, pegtl::identifier > {}; struct single : pegtl::one< 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'', '0', '\n' > {}; struct spaces : pegtl::seq< pegtl::one< 'z' >, pegtl::star< pegtl::space > > {}; struct hexbyte : pegtl::if_must< pegtl::one< 'x' >, pegtl::xdigit, pegtl::xdigit > {}; struct decbyte : pegtl::if_must< pegtl::digit, pegtl::rep_opt< 2, pegtl::digit > > {}; struct unichar : pegtl::if_must< pegtl::one< 'u' >, pegtl::one< '{' >, pegtl::plus< pegtl::xdigit >, pegtl::one< '}' > > {}; struct escaped : pegtl::if_must< pegtl::one< '\\' >, pegtl::sor< hexbyte, decbyte, unichar, single, spaces > > {}; struct regular : pegtl::not_one< '\r', '\n' > {}; struct character : pegtl::sor< escaped, regular > {}; template< char Q > struct short_string : pegtl::if_must< pegtl::one< Q >, pegtl::until< pegtl::one< Q >, character > > {}; struct literal_string : pegtl::sor< short_string< '"' >, short_string< '\'' >, long_string > {}; template< typename E > struct exponent : pegtl::opt_must< E, pegtl::opt< pegtl::one< '+', '-' > >, pegtl::plus< pegtl::digit > > {}; template< typename D, typename E > struct numeral_three : pegtl::seq< pegtl::if_must< pegtl::one< '.' >, pegtl::plus< D > >, exponent< E > > {}; template< typename D, typename E > struct numeral_two : pegtl::seq< pegtl::plus< D >, pegtl::opt< pegtl::one< '.' >, pegtl::star< D > >, exponent< E > > {}; template< typename D, typename E > struct numeral_one : pegtl::sor< numeral_two< D, E >, numeral_three< D, E > > {}; struct decimal : numeral_one< pegtl::digit, pegtl::one< 'e', 'E' > > {}; struct hexadecimal : pegtl::if_must< pegtl::istring< '0', 'x' >, numeral_one< pegtl::xdigit, pegtl::one< 'p', 'P' > > > {}; struct numeral : pegtl::sor< hexadecimal, decimal > {}; struct label_statement : pegtl::if_must< pegtl::two< ':' >, seps, name, seps, pegtl::two< ':' > > {}; struct goto_statement : pegtl::if_must< key_goto, seps, name > {}; struct statement; struct expression; struct name_list : pegtl::list< name, pegtl::one< ',' >, sep > {}; struct name_list_must : pegtl::list_must< name, pegtl::one< ',' >, sep > {}; struct expr_list_must : pegtl::list_must< expression, pegtl::one< ',' >, sep > {}; struct statement_return : pegtl::seq< pegtl::pad_opt< expr_list_must, sep >, pegtl::opt< pegtl::one< ';' >, seps > > {}; template< typename E > struct statement_list : pegtl::seq< seps, pegtl::until< pegtl::sor< E, pegtl::if_must< key_return, statement_return, E > >, statement, seps > > {}; template< char O, char... N > struct op_one : pegtl::seq< pegtl::one< O >, pegtl::at< pegtl::not_one< N... > > > {}; template< char O, char P, char... N > struct op_two : pegtl::seq< pegtl::string< O, P >, pegtl::at< pegtl::not_one< N... > > > {}; struct table_field_one : pegtl::if_must< pegtl::one< '[' >, seps, expression, seps, pegtl::one< ']' >, seps, pegtl::one< '=' >, seps, expression > {}; struct table_field_two : pegtl::if_must< pegtl::seq< name, seps, op_one< '=', '=' > >, seps, expression > {}; struct table_field : pegtl::sor< table_field_one, table_field_two, expression > {}; struct table_field_list : pegtl::list_tail< table_field, pegtl::one< ',', ';' >, sep > {}; struct table_constructor : pegtl::if_must< pegtl::one< '{' >, pegtl::pad_opt< table_field_list, sep >, pegtl::one< '}' > > {}; struct parameter_list_one : pegtl::seq< name_list, pegtl::opt_must< pad< pegtl::one< ',' > >, three_dots > > {}; struct parameter_list : pegtl::sor< three_dots, parameter_list_one > {}; struct function_body : pegtl::seq< pegtl::one< '(' >, pegtl::pad_opt< parameter_list, sep >, pegtl::one< ')' >, seps, statement_list< key_end > > {}; struct function_literal : pegtl::if_must< key_function, seps, function_body > {}; struct bracket_expr : pegtl::if_must< pegtl::one< '(' >, seps, expression, seps, pegtl::one< ')' > > {}; struct function_args_one : pegtl::if_must< pegtl::one< '(' >, pegtl::pad_opt< expr_list_must, sep >, pegtl::one< ')' > > {}; struct function_args : pegtl::sor< function_args_one, table_constructor, literal_string > {}; struct variable_tail_one : pegtl::if_must< pegtl::one< '[' >, seps, expression, seps, pegtl::one< ']' > > {}; struct variable_tail_two : pegtl::if_must< pegtl::seq< pegtl::not_at< pegtl::two< '.' > >, pegtl::one< '.' > >, seps, name > {}; struct variable_tail : pegtl::sor< variable_tail_one, variable_tail_two > {}; struct function_call_tail_one : pegtl::if_must< pegtl::seq< pegtl::not_at< pegtl::two< ':' > >, pegtl::one< ':' > >, seps, name, seps, function_args > {}; struct function_call_tail : pegtl::sor< function_args, function_call_tail_one > {}; struct variable_head_one : pegtl::seq< bracket_expr, seps, variable_tail > {}; struct variable_head : pegtl::sor< name, variable_head_one > {}; struct function_call_head : pegtl::sor< name, bracket_expr > {}; struct variable : pegtl::seq< variable_head, pegtl::star< pegtl::star< seps, function_call_tail >, seps, variable_tail > > {}; struct function_call : pegtl::seq< function_call_head, pegtl::plus< pegtl::until< pegtl::seq< seps, function_call_tail >, seps, variable_tail > > > {}; template< typename S, typename O > struct left_assoc : pegtl::seq< S, seps, pegtl::star_must< O, seps, S, seps > > {}; template< typename S, typename O > struct right_assoc : pegtl::seq< S, seps, pegtl::opt_must< O, seps, right_assoc< S, O > > > {}; struct unary_operators : pegtl::sor< pegtl::one< '-' >, pegtl::one< '#' >, op_one< '~', '=' >, key_not > {}; struct expr_ten; struct expr_thirteen : pegtl::seq< pegtl::sor< bracket_expr, name >, pegtl::star< seps, pegtl::sor< function_call_tail, variable_tail > > > {}; struct expr_twelve : pegtl::sor< key_nil, key_true, key_false, three_dots, numeral, literal_string, function_literal, expr_thirteen, table_constructor > {}; struct expr_eleven : pegtl::seq< expr_twelve, seps, pegtl::opt< pegtl::one< '^' >, seps, expr_ten, seps > > {}; struct unary_apply : pegtl::if_must< unary_operators, seps, expr_ten, seps > {}; struct expr_ten : pegtl::sor< unary_apply, expr_eleven > {}; struct operators_nine : pegtl::sor< pegtl::two< '/' >, pegtl::one< '/' >, pegtl::one< '*' >, pegtl::one< '%' > > {}; struct expr_nine : left_assoc< expr_ten, operators_nine > {}; struct operators_eight : pegtl::sor< pegtl::one< '+' >, pegtl::one< '-' > > {}; struct expr_eight : left_assoc< expr_nine, operators_eight > {}; struct expr_seven : right_assoc< expr_eight, op_two< '.', '.', '.' > > {}; struct operators_six : pegtl::sor< pegtl::two< '<' >, pegtl::two< '>' > > {}; struct expr_six : left_assoc< expr_seven, operators_six > {}; struct expr_five : left_assoc< expr_six, pegtl::one< '&' > > {}; struct expr_four : left_assoc< expr_five, op_one< '~', '=' > > {}; struct expr_three : left_assoc< expr_four, pegtl::one< '|' > > {}; struct operators_two : pegtl::sor< pegtl::two< '=' >, pegtl::string< '<', '=' >, pegtl::string< '>', '=' >, op_one< '<', '<' >, op_one< '>', '>' >, pegtl::string< '~', '=' > > {}; struct expr_two : left_assoc< expr_three, operators_two > {}; struct expr_one : left_assoc< expr_two, key_and > {}; struct expression : left_assoc< expr_one, key_or > {}; struct do_statement : pegtl::if_must< key_do, statement_list< key_end > > {}; struct while_statement : pegtl::if_must< key_while, seps, expression, seps, key_do, statement_list< key_end > > {}; struct repeat_statement : pegtl::if_must< key_repeat, statement_list< key_until >, seps, expression > {}; struct at_elseif_else_end : pegtl::sor< pegtl::at< key_elseif >, pegtl::at< key_else >, pegtl::at< key_end > > {}; struct elseif_statement : pegtl::if_must< key_elseif, seps, expression, seps, key_then, statement_list< at_elseif_else_end > > {}; struct else_statement : pegtl::if_must< key_else, statement_list< key_end > > {}; struct if_statement : pegtl::if_must< key_if, seps, expression, seps, key_then, statement_list< at_elseif_else_end >, seps, pegtl::until< pegtl::sor< else_statement, key_end >, elseif_statement, seps > > {}; struct for_statement_one : pegtl::seq< pegtl::one< '=' >, seps, expression, seps, pegtl::one< ',' >, seps, expression, pegtl::pad_opt< pegtl::if_must< pegtl::one< ',' >, seps, expression >, sep > > {}; struct for_statement_two : pegtl::seq< pegtl::opt_must< pegtl::one< ',' >, seps, name_list_must, seps >, key_in, seps, expr_list_must, seps > {}; struct for_statement : pegtl::if_must< key_for, seps, name, seps, pegtl::sor< for_statement_one, for_statement_two >, key_do, statement_list< key_end > > {}; struct assignment_variable_list : pegtl::list_must< variable, pegtl::one< ',' >, sep > {}; struct assignments_one : pegtl::if_must< pegtl::one< '=' >, seps, expr_list_must > {}; struct assignments : pegtl::seq< assignment_variable_list, seps, assignments_one > {}; struct function_name : pegtl::seq< pegtl::list< name, pegtl::one< '.' >, sep >, seps, pegtl::opt_must< pegtl::one< ':' >, seps, name, seps > > {}; struct function_definition : pegtl::if_must< key_function, seps, function_name, function_body > {}; struct local_function : pegtl::if_must< key_function, seps, name, seps, function_body > {}; struct local_variables : pegtl::if_must< name_list_must, seps, pegtl::opt< assignments_one > > {}; struct local_statement : pegtl::if_must< key_local, seps, pegtl::sor< local_function, local_variables > > {}; struct semicolon : pegtl::one< ';' > {}; struct statement : pegtl::sor< semicolon, assignments, function_call, label_statement, key_break, goto_statement, do_statement, while_statement, repeat_statement, if_statement, for_statement, function_definition, local_statement > {}; struct interpreter : pegtl::seq< pegtl::one< '#' >, pegtl::until< pegtl::eolf > > {}; struct grammar : pegtl::must< pegtl::opt< interpreter >, statement_list< pegtl::eof > > {}; // clang-format on } // namespace lua53 int main( int argc, char** argv ) { tao::TAO_PEGTL_NAMESPACE::analyze< lua53::grammar >(); for( int i = 1; i < argc; ++i ) { tao::TAO_PEGTL_NAMESPACE::file_input<> in( argv[ i ] ); tao::TAO_PEGTL_NAMESPACE::parse< lua53::grammar >( in ); } return 0; }