/* -*- mode: c++; c-basic-offset:4 -*- This file is part of libdap, A C++ implementation of the OPeNDAP Data Access Protocol. Copyright (c) 2014 OPeNDAP, Inc. Author: James Gallagher This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. */ %{ /* -*- C++ -*- */ //#include "config.h" #include #include "Error.h" #include "D4FunctionScanner.h" /* typedef to make the returns for the tokens shorter */ /* NB: It would be best to use the same scanner (and maybe parser) for both the D4 CE and Function parameters, but for the initial version far less complexity is require by the Function expression scanner (since the initial version will just support variables, constants, functions and the $ array special form) and not function arguments that are general expressions (like array slicing and/or filters). This comment is here because this is the first place where there is coupling between the CE parser and its scanner. I'm not sure, however, if one string can be parsed by two parsers if they are using two scanners, so extending the Function parser to allow function args to be any CE clause may mean some more serious work with the parsers. jhrg 3/10/14 */ typedef libdap::D4FunctionParser::token token; /* This was added because of some notes on the net about compiler version issues. I don't know if it's needed when using the C++ mode of flex. */ #undef yywrap #define yywrap() 1 /* define yyterminate as this instead of NULL */ #define yyterminate() return(token::END) #define YY_FATAL_ERROR(msg) {\ throw(libdap::Error(malformed_expr, std::string("Error scanning function expression text: ") + std::string(msg))); \ } %} %option c++ %option yyclass="D4FunctionScanner" /* Use this if several scanners are needed. This will cause flex to #define yyFlexLexer to be FlexLexer (the yyFlexLexer is defined in lex..cc. jhrg 8/8/13 */ %option prefix="d4_function" /* These two options turn on line counting - useful for error messages - and debugging, respectively. When debugging is on, it's possible to see which scanner rules are used at which points in the input. */ %option yylineno %option debug /* Do not output the default rule (where any unmatched input is echoed to stdout). When set, nodefault will cause the scanner to exit on an error. */ %option nodefault /* noyywrap makes the scanner assume that EOF/EOS is the end of the input. If this is not set, the scanner will assume there are more files to scan. */ %option noyywrap %option nounput /* When set, warn prints a message when the default rule can be matched but nodefault is given (among other warnings). */ %option warn %option batch %x quote /* This pattern is slightly different from the one used by the CE scanner because it allows a WORD to start with a '#' so that the # array constant syntax can be used in functions. Otherwise, a WORD must be able to contain this hideous mix of characters because a variable can. jhrg 3/10/14 */ WORD [-+a-zA-Z0-9_%*\\~@!#][-+a-zA-Z0-9_%*\\~@!#]* /* I added these tokens because floating point values may contain dots and added a '.' to WORD will break the parsing of paths (or make for some fairly obscure code - where $Float32() takes tokens that match 'path'. Since we have a separate scanner for the function expressions, might as well add a FLOAT token... jhg 3/17/14 FLOAT [-+eE.0-9][-+eE.0-9]* */ %{ // Code run each time a pattern is matched #define YY_USER_ACTION loc->columns(yyleng); %} %% %{ // Code run each time yylex is called loc->step(); %} "," return token::COMMA; ";" return token::SEMICOLON; ":" return token::COLON; "(" return token::LPAREN; ")" return token::RPAREN; "/" return token::GROUP_SEP; "." return token::PATH_SEP; "$Byte" return token::DOLLAR_BYTE; "$UInt8" return token::DOLLAR_UINT8; "$Int8" return token::DOLLAR_INT8; "$UInt16" return token::DOLLAR_UINT16; "$Int16" return token::DOLLAR_INT16; "$UInt32" return token::DOLLAR_UINT32; "$Int32" return token::DOLLAR_INT32; "$UInt64" return token::DOLLAR_UINT64; "$Int64" return token::DOLLAR_INT64; "$Float32" return token::DOLLAR_FLOAT32; "$Float64" return token::DOLLAR_FLOAT64; [ \t]+ /* ignore these */ [\r\n]+ /* ignore these */ {WORD} { yylval->build(yytext); return token::WORD; } <> return token::END; ["] { BEGIN(quote); yymore(); } [^"\\]* yymore(); /* Anything that's not a double quote or a backslash */ [\\]["] yymore(); /* This matches the escaped double quote (\") */ [\\]{2} yymore(); /* This matches an escaped escape (\\) */ [\\]{1} { BEGIN(INITIAL); if (yytext) { YY_FATAL_ERROR("Inside a string, backslash (\\) can escape a double quote or must itself be escaped (\\\\)."); } } ["] { /* An unescaped double quote in the 'quote' state indicates the end of the string */ BEGIN(INITIAL); yylval->build(yytext); return token::STRING; } <> { BEGIN(INITIAL); /* resetting the state is needed for reentrant parsers */ YY_FATAL_ERROR("Unterminated quote"); } . { BEGIN(INITIAL); if (yytext) { YY_FATAL_ERROR("Characters found in the input were not recognized."); } } %%