|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/*
|
|
Packit |
a4aae4 |
-*- mode: c++; c-basic-offset:4 -*-
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This file is part of libdap, A C++ implementation of the OPeNDAP Data
|
|
Packit |
a4aae4 |
Access Protocol.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Copyright (c) 2014 OPeNDAP, Inc.
|
|
Packit |
a4aae4 |
Author: James Gallagher <jgallagher@opendap.org>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This library is free software; you can redistribute it and/or
|
|
Packit |
a4aae4 |
modify it under the terms of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
License as published by the Free Software Foundation; either
|
|
Packit |
a4aae4 |
version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This library is distributed in the hope that it will be useful,
|
|
Packit |
a4aae4 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
a4aae4 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
a4aae4 |
Lesser General Public License for more details.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
You should have received a copy of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
License along with this library; if not, write to the Free Software
|
|
Packit |
a4aae4 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
|
|
Packit |
a4aae4 |
*/
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
%{ /* -*- C++ -*- */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
//#include "config.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <string>
|
|
Packit |
a4aae4 |
#include "Error.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "D4FunctionScanner.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* typedef to make the returns for the tokens shorter */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* NB: It would be best to use the same scanner (and maybe parser) for
|
|
Packit |
a4aae4 |
both the D4 CE and Function parameters, but for the initial version
|
|
Packit |
a4aae4 |
far less complexity is require by the Function expression scanner
|
|
Packit |
a4aae4 |
(since the initial version will just support variables, constants,
|
|
Packit |
a4aae4 |
functions and the $<type> array special form) and not function arguments
|
|
Packit |
a4aae4 |
that are general expressions (like array slicing and/or filters).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This comment is here because this is the first place where there is
|
|
Packit |
a4aae4 |
coupling between the CE parser and its scanner. I'm not sure, however,
|
|
Packit |
a4aae4 |
if one string can be parsed by two parsers if they are using two scanners,
|
|
Packit |
a4aae4 |
so extending the Function parser to allow function args to be any CE
|
|
Packit |
a4aae4 |
clause may mean some more serious work with the parsers.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
jhrg 3/10/14 */
|
|
Packit |
a4aae4 |
typedef libdap::D4FunctionParser::token token;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* This was added because of some notes on the net about compiler version
|
|
Packit |
a4aae4 |
issues. I don't know if it's needed when using the C++ mode of flex. */
|
|
Packit |
a4aae4 |
#undef yywrap
|
|
Packit |
a4aae4 |
#define yywrap() 1
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* define yyterminate as this instead of NULL */
|
|
Packit |
a4aae4 |
#define yyterminate() return(token::END)
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#define YY_FATAL_ERROR(msg) {\
|
|
Packit |
a4aae4 |
throw(libdap::Error(malformed_expr, std::string("Error scanning function expression text: ") + std::string(msg))); \
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
%}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
%option c++
|
|
Packit |
a4aae4 |
%option yyclass="D4FunctionScanner"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* Use this if several scanners are needed. This will cause flex to
|
|
Packit |
a4aae4 |
#define yyFlexLexer to be <prefix>FlexLexer (the yyFlexLexer is defined
|
|
Packit |
a4aae4 |
in lex.<prefix>.cc. jhrg 8/8/13 */
|
|
Packit |
a4aae4 |
%option prefix="d4_function"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* These two options turn on line counting - useful for error messages -
|
|
Packit |
a4aae4 |
and debugging, respectively. When debugging is on, it's possible to see
|
|
Packit |
a4aae4 |
which scanner rules are used at which points in the input. */
|
|
Packit |
a4aae4 |
%option yylineno
|
|
Packit |
a4aae4 |
%option debug
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* Do not output the default rule (where any unmatched input is echoed to
|
|
Packit |
a4aae4 |
stdout). When set, nodefault will cause the scanner to exit on an error. */
|
|
Packit |
a4aae4 |
%option nodefault
|
|
Packit |
a4aae4 |
/* noyywrap makes the scanner assume that EOF/EOS is the end of the input.
|
|
Packit |
a4aae4 |
If this is not set, the scanner will assume there are more files to
|
|
Packit |
a4aae4 |
scan. */
|
|
Packit |
a4aae4 |
%option noyywrap
|
|
Packit |
a4aae4 |
%option nounput
|
|
Packit |
a4aae4 |
/* When set, warn prints a message when the default rule can be matched
|
|
Packit |
a4aae4 |
but nodefault is given (among other warnings). */
|
|
Packit |
a4aae4 |
%option warn
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
%option batch
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
%x quote
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* This pattern is slightly different from the one used by the CE scanner
|
|
Packit |
a4aae4 |
because it allows a WORD to start with a '#' so that the #<type>
|
|
Packit |
a4aae4 |
array constant syntax can be used in functions. Otherwise, a WORD must
|
|
Packit |
a4aae4 |
be able to contain this hideous mix of characters because a variable
|
|
Packit |
a4aae4 |
can. jhrg 3/10/14 */
|
|
Packit |
a4aae4 |
WORD [-+a-zA-Z0-9_%*\\~@!#][-+a-zA-Z0-9_%*\\~@!#]*
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/* I added these tokens because floating point values may contain dots and
|
|
Packit |
a4aae4 |
added a '.' to WORD will break the parsing of paths (or make for some
|
|
Packit |
a4aae4 |
fairly obscure code - where $Float32() takes tokens that match 'path'.
|
|
Packit |
a4aae4 |
Since we have a separate scanner for the function expressions, might as
|
|
Packit |
a4aae4 |
well add a FLOAT token... jhg 3/17/14
|
|
Packit |
a4aae4 |
FLOAT [-+eE.0-9][-+eE.0-9]*
|
|
Packit |
a4aae4 |
*/
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
%{
|
|
Packit |
a4aae4 |
// Code run each time a pattern is matched
|
|
Packit |
a4aae4 |
#define YY_USER_ACTION loc->columns(yyleng);
|
|
Packit |
a4aae4 |
%}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
%%
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
%{
|
|
Packit |
a4aae4 |
// Code run each time yylex is called
|
|
Packit |
a4aae4 |
loc->step();
|
|
Packit |
a4aae4 |
%}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
"," return token::COMMA;
|
|
Packit |
a4aae4 |
";" return token::SEMICOLON;
|
|
Packit |
a4aae4 |
":" return token::COLON;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
"(" return token::LPAREN;
|
|
Packit |
a4aae4 |
")" return token::RPAREN;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
"/" return token::GROUP_SEP;
|
|
Packit |
a4aae4 |
"." return token::PATH_SEP;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
"$Byte" return token::DOLLAR_BYTE;
|
|
Packit |
a4aae4 |
"$UInt8" return token::DOLLAR_UINT8;
|
|
Packit |
a4aae4 |
"$Int8" return token::DOLLAR_INT8;
|
|
Packit |
a4aae4 |
"$UInt16" return token::DOLLAR_UINT16;
|
|
Packit |
a4aae4 |
"$Int16" return token::DOLLAR_INT16;
|
|
Packit |
a4aae4 |
"$UInt32" return token::DOLLAR_UINT32;
|
|
Packit |
a4aae4 |
"$Int32" return token::DOLLAR_INT32;
|
|
Packit |
a4aae4 |
"$UInt64" return token::DOLLAR_UINT64;
|
|
Packit |
a4aae4 |
"$Int64" return token::DOLLAR_INT64;
|
|
Packit |
a4aae4 |
"$Float32" return token::DOLLAR_FLOAT32;
|
|
Packit |
a4aae4 |
"$Float64" return token::DOLLAR_FLOAT64;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
[ \t]+ /* ignore these */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
[\r\n]+ /* ignore these */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
{WORD} { yylval->build<std::string>(yytext); return token::WORD; }
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
<INITIAL><<EOF>> return token::END;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
["] { BEGIN(quote); yymore(); }
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
<quote>[^"\\]* yymore(); /* Anything that's not a double quote or a backslash */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
<quote>[\\]["] yymore(); /* This matches the escaped double quote (\") */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
<quote>[\\]{2} yymore(); /* This matches an escaped escape (\\) */
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
<quote>[\\]{1} {
|
|
Packit |
a4aae4 |
BEGIN(INITIAL);
|
|
Packit |
a4aae4 |
if (yytext) {
|
|
Packit |
a4aae4 |
YY_FATAL_ERROR("Inside a string, backslash (\\) can escape a double quote or must itself be escaped (\\\\).");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
<quote>["] {
|
|
Packit |
a4aae4 |
/* An unescaped double quote in the 'quote' state indicates the end of the string */
|
|
Packit |
a4aae4 |
BEGIN(INITIAL);
|
|
Packit |
a4aae4 |
yylval->build<std::string>(yytext);
|
|
Packit |
a4aae4 |
return token::STRING;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
<quote><<EOF>> {
|
|
Packit |
a4aae4 |
BEGIN(INITIAL); /* resetting the state is needed for reentrant parsers */
|
|
Packit |
a4aae4 |
YY_FATAL_ERROR("Unterminated quote");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
. {
|
|
Packit |
a4aae4 |
BEGIN(INITIAL);
|
|
Packit |
a4aae4 |
if (yytext) {
|
|
Packit |
a4aae4 |
YY_FATAL_ERROR("Characters found in the input were not recognized.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
%%
|