Blame d4_ce/d4_ce_scanner.ll

Packit a4aae4
Packit a4aae4
/*
Packit a4aae4
 -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
 This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
 Access Protocol.
Packit a4aae4
Packit a4aae4
 Copyright (c) 2013 OPeNDAP, Inc.
Packit a4aae4
 Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
Packit a4aae4
 This library is free software; you can redistribute it and/or
Packit a4aae4
 modify it under the terms of the GNU Lesser General Public
Packit a4aae4
 License as published by the Free Software Foundation; either
Packit a4aae4
 version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
 
Packit a4aae4
 This library is distributed in the hope that it will be useful,
Packit a4aae4
 but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
 Lesser General Public License for more details.
Packit a4aae4
 
Packit a4aae4
 You should have received a copy of the GNU Lesser General Public
Packit a4aae4
 License along with this library; if not, write to the Free Software
Packit a4aae4
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
Packit a4aae4
Packit a4aae4
 You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
*/ 
Packit a4aae4
Packit a4aae4
%{ /* -*- C++ -*- */
Packit a4aae4
//#include "config.h"
Packit a4aae4
Packit a4aae4
#include <string>
Packit a4aae4
#include "Error.h"
Packit a4aae4
Packit a4aae4
#include "D4CEScanner.h"
Packit a4aae4
Packit a4aae4
/* typedef to make the returns for the tokens shorter */
Packit a4aae4
typedef libdap::D4CEParser::token token;
Packit a4aae4
Packit a4aae4
/* This was added because of some notes on the net about compiler version
Packit a4aae4
   issues. I don't know if it's needed when using the C++ mode of flex. */
Packit a4aae4
#undef yywrap
Packit a4aae4
#define yywrap() 1
Packit a4aae4
Packit a4aae4
/* define yyterminate as this instead of NULL */
Packit a4aae4
#define yyterminate() return(token::END)
Packit a4aae4
Packit a4aae4
#define YY_FATAL_ERROR(msg) {\
Packit a4aae4
    throw(libdap::Error(malformed_expr, std::string("Error scanning constraint expression text: ") + std::string(msg))); \
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
%}
Packit a4aae4
Packit a4aae4
%option c++
Packit a4aae4
%option yyclass="D4CEScanner"
Packit a4aae4
Packit a4aae4
/* Use this if several scanners are needed. This will cause flex to
Packit a4aae4
   #define yyFlexLexer to be <prefix>FlexLexer (the yyFlexLexer is defined
Packit a4aae4
   in lex.<prefix>.cc. jhrg 8/8/13 */
Packit a4aae4
%option prefix="d4_ce"
Packit a4aae4
Packit a4aae4
/* These two options turn on line counting - useful for error messages - 
Packit a4aae4
   and debugging, respectively. When debugging is on, it's possible to see
Packit a4aae4
   which scanner rules are used at which points in the input. */
Packit a4aae4
%option yylineno
Packit a4aae4
%option debug
Packit a4aae4
Packit a4aae4
/* Do not output the default rule (where any unmatched input is echoed to 
Packit a4aae4
   stdout). When set, nodefault will cause the scanner to exit on an error. */
Packit a4aae4
%option nodefault
Packit a4aae4
/* noyywrap makes the scanner assume that EOF/EOS is the end of the input.
Packit a4aae4
   If this is not set, the scanner will assume there are more files to 
Packit a4aae4
   scan. */ 
Packit a4aae4
%option noyywrap
Packit a4aae4
%option nounput
Packit a4aae4
/* When set, warn prints a message when the default rule can be matched
Packit a4aae4
   but nodefault is given (among other warnings). */
Packit a4aae4
%option warn
Packit a4aae4
Packit a4aae4
%option batch
Packit a4aae4
Packit a4aae4
%x quote
Packit a4aae4
Packit a4aae4
/* This pattern just ensures that a word does not start with '#' which
Packit a4aae4
   is the DAP2 comment character. 
Packit a4aae4
Packit a4aae4
   Having the characters !, ~, and @ in the second set of the chars allowed
Packit a4aae4
   in a WORD token meant that 'var!=' parsed as WORD == 'var!' and '=' ane not
Packit a4aae4
   'var' and '!='. I see that in DAP2 I did not include these in the definition
Packit a4aae4
   of a WORD.
Packit a4aae4
   jhrg 4/29/16 */
Packit a4aae4
   
Packit a4aae4
WORD    [-+a-zA-Z0-9_%*\\!~@][-+a-zA-Z0-9_%*\\#]* 
Packit a4aae4
Packit a4aae4
%{
Packit a4aae4
// Code run each time a pattern is matched
Packit a4aae4
#define YY_USER_ACTION loc->columns(yyleng);
Packit a4aae4
%}
Packit a4aae4
Packit a4aae4
%%
Packit a4aae4
Packit a4aae4
%{
Packit a4aae4
// Code run each time yylex is called
Packit a4aae4
loc->step();
Packit a4aae4
%}
Packit a4aae4
Packit a4aae4
"["     return token::LBRACKET;
Packit a4aae4
"]"     return token::RBRACKET;
Packit a4aae4
":"     return token::COLON;
Packit a4aae4
","		return token::COMMA;
Packit a4aae4
";"		return token::SEMICOLON;
Packit a4aae4
"|"     return token::PIPE;
Packit a4aae4
"{"		return token::LBRACE;
Packit a4aae4
"}"		return token::RBRACE;
Packit a4aae4
"/"     return token::GROUP_SEP;
Packit a4aae4
"."     return token::PATH_SEP;
Packit a4aae4
"="     return token::ASSIGN;
Packit a4aae4
Packit a4aae4
"=="    return token::EQUAL;
Packit a4aae4
"!="    return token::NOT_EQUAL;
Packit a4aae4
">"	    return token::GREATER;
Packit a4aae4
">="    return token::GREATER_EQUAL;
Packit a4aae4
"<"     return token::LESS;
Packit a4aae4
"<="    return token::LESS_EQUAL;
Packit a4aae4
"~="    return token::REGEX_MATCH;
Packit a4aae4
"<<"    return token::LESS_BBOX;
Packit a4aae4
">>"    return token::GREATER_BBOX;
Packit a4aae4
"@="    return token::MASK;
Packit a4aae4
Packit a4aae4
[ \t]+  /* ignore these */
Packit a4aae4
Packit a4aae4
[\r\n]+ /* ignore these */
Packit a4aae4
Packit a4aae4
{WORD}  { yylval->build<std::string>(yytext); return token::WORD; }
Packit a4aae4
Packit a4aae4
<INITIAL><<EOF>> return token::END;
Packit a4aae4
Packit a4aae4
["]    { BEGIN(quote); yymore(); }
Packit a4aae4
Packit a4aae4
<quote>[^"\\]*  yymore(); /* Anything that's not a double quote or a backslash */
Packit a4aae4
Packit a4aae4
<quote>[\\]["]	yymore(); /* This matches the escaped double quote (\") */
Packit a4aae4
Packit a4aae4
<quote>[\\]{2}  yymore(); /* This matches an escaped escape (\\) */
Packit a4aae4
Packit a4aae4
<quote>[\\]{1}  {
Packit a4aae4
                    BEGIN(INITIAL);
Packit a4aae4
                    if (yytext) {
Packit a4aae4
                        YY_FATAL_ERROR("Inside a string, backslash (\\) can escape a double quote or must itself be escaped (\\\\).");
Packit a4aae4
                    }
Packit a4aae4
                }
Packit a4aae4
Packit a4aae4
<quote>["]  { 
Packit a4aae4
                /* An unescaped double quote in the 'quote' state indicates the end of the string */
Packit a4aae4
                BEGIN(INITIAL); 
Packit a4aae4
                yylval->build<std::string>(yytext);
Packit a4aae4
                return token::STRING;
Packit a4aae4
            }
Packit a4aae4
Packit a4aae4
<quote><<EOF>>	{
Packit a4aae4
                  BEGIN(INITIAL);   /* resetting the state is needed for reentrant parsers */
Packit a4aae4
                  YY_FATAL_ERROR("Unterminated quote");
Packit a4aae4
                }
Packit a4aae4
Packit a4aae4
.   {
Packit a4aae4
        BEGIN(INITIAL);
Packit a4aae4
        if (yytext) {
Packit a4aae4
            YY_FATAL_ERROR("Characters found in the input were not recognized.");
Packit a4aae4
        }
Packit a4aae4
    }
Packit a4aae4
%%