Blame d4_function/d4_function_scanner.ll

Packit a4aae4
Packit a4aae4
/*
Packit a4aae4
 -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
 This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
 Access Protocol.
Packit a4aae4
Packit a4aae4
 Copyright (c) 2014 OPeNDAP, Inc.
Packit a4aae4
 Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
Packit a4aae4
 This library is free software; you can redistribute it and/or
Packit a4aae4
 modify it under the terms of the GNU Lesser General Public
Packit a4aae4
 License as published by the Free Software Foundation; either
Packit a4aae4
 version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
 
Packit a4aae4
 This library is distributed in the hope that it will be useful,
Packit a4aae4
 but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
 Lesser General Public License for more details.
Packit a4aae4
 
Packit a4aae4
 You should have received a copy of the GNU Lesser General Public
Packit a4aae4
 License along with this library; if not, write to the Free Software
Packit a4aae4
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
Packit a4aae4
Packit a4aae4
 You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
*/ 
Packit a4aae4
Packit a4aae4
%{ /* -*- C++ -*- */
Packit a4aae4
Packit a4aae4
//#include "config.h"
Packit a4aae4
Packit a4aae4
#include <string>
Packit a4aae4
#include "Error.h"
Packit a4aae4
Packit a4aae4
#include "D4FunctionScanner.h"
Packit a4aae4
Packit a4aae4
/* typedef to make the returns for the tokens shorter */
Packit a4aae4
Packit a4aae4
/* NB: It would be best to use the same scanner (and maybe parser) for
Packit a4aae4
   both the D4 CE and Function parameters, but for the initial version 
Packit a4aae4
   far less complexity is require by the Function expression scanner
Packit a4aae4
   (since the initial version will just support variables, constants, 
Packit a4aae4
   functions and the $<type> array special form) and not function arguments
Packit a4aae4
   that are general expressions (like array slicing and/or filters).
Packit a4aae4
   
Packit a4aae4
   This comment is here because this is the first place where there is 
Packit a4aae4
   coupling between the CE parser and its scanner. I'm not sure, however,
Packit a4aae4
   if one string can be parsed by two parsers if they are using two scanners,
Packit a4aae4
   so extending the Function parser to allow function args to be any CE 
Packit a4aae4
   clause may mean some more serious work with the parsers.
Packit a4aae4
   
Packit a4aae4
   jhrg 3/10/14 */
Packit a4aae4
typedef libdap::D4FunctionParser::token token;
Packit a4aae4
Packit a4aae4
/* This was added because of some notes on the net about compiler version
Packit a4aae4
   issues. I don't know if it's needed when using the C++ mode of flex. */
Packit a4aae4
#undef yywrap
Packit a4aae4
#define yywrap() 1
Packit a4aae4
Packit a4aae4
/* define yyterminate as this instead of NULL */
Packit a4aae4
#define yyterminate() return(token::END)
Packit a4aae4
Packit a4aae4
#define YY_FATAL_ERROR(msg) {\
Packit a4aae4
    throw(libdap::Error(malformed_expr, std::string("Error scanning function expression text: ") + std::string(msg))); \
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
%}
Packit a4aae4
Packit a4aae4
%option c++
Packit a4aae4
%option yyclass="D4FunctionScanner"
Packit a4aae4
Packit a4aae4
/* Use this if several scanners are needed. This will cause flex to
Packit a4aae4
   #define yyFlexLexer to be <prefix>FlexLexer (the yyFlexLexer is defined
Packit a4aae4
   in lex.<prefix>.cc. jhrg 8/8/13 */
Packit a4aae4
%option prefix="d4_function"
Packit a4aae4
Packit a4aae4
/* These two options turn on line counting - useful for error messages - 
Packit a4aae4
   and debugging, respectively. When debugging is on, it's possible to see
Packit a4aae4
   which scanner rules are used at which points in the input. */
Packit a4aae4
%option yylineno
Packit a4aae4
%option debug
Packit a4aae4
Packit a4aae4
/* Do not output the default rule (where any unmatched input is echoed to 
Packit a4aae4
   stdout). When set, nodefault will cause the scanner to exit on an error. */
Packit a4aae4
%option nodefault
Packit a4aae4
/* noyywrap makes the scanner assume that EOF/EOS is the end of the input.
Packit a4aae4
   If this is not set, the scanner will assume there are more files to 
Packit a4aae4
   scan. */ 
Packit a4aae4
%option noyywrap
Packit a4aae4
%option nounput
Packit a4aae4
/* When set, warn prints a message when the default rule can be matched
Packit a4aae4
   but nodefault is given (among other warnings). */
Packit a4aae4
%option warn
Packit a4aae4
Packit a4aae4
%option batch
Packit a4aae4
Packit a4aae4
%x quote
Packit a4aae4
Packit a4aae4
/* This pattern is slightly different from the one used by the CE scanner
Packit a4aae4
   because it allows a WORD to start with a '#' so that the #<type> 
Packit a4aae4
   array constant syntax can be used in functions. Otherwise, a WORD must
Packit a4aae4
   be able to contain this hideous mix of characters because a variable 
Packit a4aae4
   can. jhrg 3/10/14 */
Packit a4aae4
WORD    [-+a-zA-Z0-9_%*\\~@!#][-+a-zA-Z0-9_%*\\~@!#]* 
Packit a4aae4
Packit a4aae4
/* I added these tokens because floating point values may contain dots and
Packit a4aae4
   added a '.' to WORD will break the parsing of paths (or make for some 
Packit a4aae4
   fairly obscure code - where $Float32() takes tokens that match 'path'.
Packit a4aae4
   Since we have a separate scanner for the function expressions, might as
Packit a4aae4
   well add a FLOAT token... jhg 3/17/14 
Packit a4aae4
FLOAT   [-+eE.0-9][-+eE.0-9]*
Packit a4aae4
*/
Packit a4aae4
Packit a4aae4
%{
Packit a4aae4
// Code run each time a pattern is matched
Packit a4aae4
#define YY_USER_ACTION loc->columns(yyleng);
Packit a4aae4
%}
Packit a4aae4
Packit a4aae4
%%
Packit a4aae4
Packit a4aae4
%{
Packit a4aae4
// Code run each time yylex is called
Packit a4aae4
loc->step();
Packit a4aae4
%}
Packit a4aae4
Packit a4aae4
","		return token::COMMA;
Packit a4aae4
";"		return token::SEMICOLON;
Packit a4aae4
":"     return token::COLON;
Packit a4aae4
Packit a4aae4
"("     return token::LPAREN;
Packit a4aae4
")"     return token::RPAREN;
Packit a4aae4
Packit a4aae4
"/"     return token::GROUP_SEP;
Packit a4aae4
"."     return token::PATH_SEP;
Packit a4aae4
Packit a4aae4
"$Byte" return token::DOLLAR_BYTE;
Packit a4aae4
"$UInt8" return token::DOLLAR_UINT8;
Packit a4aae4
"$Int8" return token::DOLLAR_INT8;
Packit a4aae4
"$UInt16" return token::DOLLAR_UINT16;
Packit a4aae4
"$Int16" return token::DOLLAR_INT16;
Packit a4aae4
"$UInt32" return token::DOLLAR_UINT32;
Packit a4aae4
"$Int32" return token::DOLLAR_INT32;
Packit a4aae4
"$UInt64" return token::DOLLAR_UINT64;
Packit a4aae4
"$Int64" return token::DOLLAR_INT64;
Packit a4aae4
"$Float32" return token::DOLLAR_FLOAT32;
Packit a4aae4
"$Float64" return token::DOLLAR_FLOAT64;
Packit a4aae4
Packit a4aae4
[ \t]+  /* ignore these */
Packit a4aae4
Packit a4aae4
[\r\n]+ /* ignore these */
Packit a4aae4
Packit a4aae4
{WORD}  { yylval->build<std::string>(yytext); return token::WORD; }
Packit a4aae4
Packit a4aae4
<INITIAL><<EOF>> return token::END;
Packit a4aae4
Packit a4aae4
["]    { BEGIN(quote); yymore(); }
Packit a4aae4
Packit a4aae4
<quote>[^"\\]*  yymore(); /* Anything that's not a double quote or a backslash */
Packit a4aae4
Packit a4aae4
<quote>[\\]["]	yymore(); /* This matches the escaped double quote (\") */
Packit a4aae4
Packit a4aae4
<quote>[\\]{2}  yymore(); /* This matches an escaped escape (\\) */
Packit a4aae4
Packit a4aae4
<quote>[\\]{1}  {
Packit a4aae4
                    BEGIN(INITIAL);
Packit a4aae4
                    if (yytext) {
Packit a4aae4
                        YY_FATAL_ERROR("Inside a string, backslash (\\) can escape a double quote or must itself be escaped (\\\\).");
Packit a4aae4
                    }
Packit a4aae4
                }
Packit a4aae4
Packit a4aae4
<quote>["]  { 
Packit a4aae4
                /* An unescaped double quote in the 'quote' state indicates the end of the string */
Packit a4aae4
                BEGIN(INITIAL); 
Packit a4aae4
                yylval->build<std::string>(yytext); 
Packit a4aae4
                return token::STRING;
Packit a4aae4
            }
Packit a4aae4
Packit a4aae4
<quote><<EOF>>	{
Packit a4aae4
                  BEGIN(INITIAL);   /* resetting the state is needed for reentrant parsers */
Packit a4aae4
                  YY_FATAL_ERROR("Unterminated quote");
Packit a4aae4
                }
Packit a4aae4
Packit a4aae4
.   {
Packit a4aae4
        BEGIN(INITIAL);
Packit a4aae4
        if (yytext) {
Packit a4aae4
            YY_FATAL_ERROR("Characters found in the input were not recognized.");
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
%%