// -*- mode: c++; c-basic-offset:4 -*-
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
// Access Protocol.
// Copyright (c) 2013 OPeNDAP, Inc.
// Author: James Gallagher <jgallagher@opendap.org>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
// A minor edit
%skeleton "lalr1.cc" /* -*- C++ -*- */
%require "2.5"
%defines
// The d4ce_parser.tab.cc and .hh files define and declare this class
%define parser_class_name {D4CEParser}
// D4CEParser is in this namespace
%define api.namespace {libdap}
%define parse.trace
%define parse.error verbose
%define parse.assert
// Could not get this to work with a C++ scanner built by flex. 8/10/13 jhrg
// %define api.token.constructor
%define api.value.type variant
// Because the code uses the C++ mode of flex, we don't use this. 8/8/13 jhrg
// %define api.prefix { d4_ce }
%code requires {
#include "D4ConstraintEvaluator.h"
#include "escaping.h" // for www2id() used with WORD and STRING
namespace libdap {
class D4CEScanner;
}
}
// Pass both the scanner and parser objects to both the automatically generated
// parser and scanner. Note that in the actions bound to the rules, 'driver'
// means use the 'D4ConstraintEvaluator' instance.
%lex-param { D4CEScanner &scanner }
%parse-param { D4CEScanner &scanner }
%lex-param { D4ConstraintEvaluator &driver }
%parse-param { D4ConstraintEvaluator &driver }
%locations
%initial-action
{
// Initialize the initial location. This is printed when the parser builds
// its own error messages - when the parse fails as opposed to when the
// CE names a missing variables, ...
@$.initialize (driver.expression());
};
%code {
#include <iostream>
#include <cstdlib>
#include <fstream>
#include "BaseType.h"
#include "DMR.h"
#include "D4Group.h"
/* include for all driver functions */
#include "D4ConstraintEvaluator.h"
/* this is silly, but I can't figure out a way around it */
static int yylex(libdap::D4CEParser::semantic_type *yylval,
libdap::location *loc,
libdap::D4CEScanner &scanner,
libdap::D4ConstraintEvaluator &driver);
}
// The strings used in the token definitions are used for error messages
%token <std::string> WORD "word"
%token <std::string> STRING "string"
// %type is used to set the return type of non-terminals; %token sets the
// return type for terminals.
%type <bool> filter predicate fields indexes subset clause clauses dimension dimensions
%type <std::string> id path group name op
%type <libdap::D4ConstraintEvaluator::index> index
// %type <libdap::D4FilterClause> predicate
%token
END 0 "end of file"
%token
SEMICOLON ";"
PIPE "|"
LBRACKET "["
RBRACKET "]"
COLON ":"
LBRACE "{"
RBRACE "}"
LESS "<"
GREATER ">"
LESS_EQUAL "<="
GREATER_EQUAL ">="
EQUAL "=="
NOT_EQUAL "!="
REGEX_MATCH "~="
LESS_BBOX "<<"
GREATER_BBOX ">>"
MASK "@="
ND "ND"
COMMA ","
ASSIGN "="
GROUP_SEP "/"
PATH_SEP "."
%%
%start expression;
expression : clauses { driver.set_result($1); }
| dimensions ";" clauses { driver.set_result($1 && $3); }
;
dimensions : dimension { $$ = $1; }
| dimensions ";" dimension { $$ = $1 && $3; }
;
dimension : id "=" index
{
$$ = driver.slice_dimension($1, $3);
}
;
clauses : clause { $$ = $1; }
| clauses ";" clause { $$ = $1 && $3; }
;
// Change: I moved the pop_basetype() call out of the 'fields'
// actions in 'subset' so that I could push the basetype for
// all of the cases. That way I'm sure to have a top_baseype()
// when processing the 'filter' part of the grammar. I need the
// the top basetype so that I know which Sequence to use.
// jhrg 4/23/16
clause : subset { $$ = $1; driver.pop_basetype(); }
// For the DAP4 at this time (3/18/15) filters apply only to D4Sequences
| subset "|" filter { driver.pop_basetype(); $$ = $1 && $3; }
;
// mark_variable returns a BaseType* or throws Error
// Note that this is a fairly long production rule with a number
// of different right hand sides spanning about 110 lines.
// jhrg 4/8/16
subset : id
{
BaseType *btp = 0;
if (driver.top_basetype()) {
btp = driver.top_basetype()->var($1);
}
else {
btp = driver.dmr()->root()->find_var($1);
}
if (!btp)
driver.throw_not_found($1, "id");
$$ = driver.mark_variable(btp);
// push the basetype so that it is
// accessible if/while filters are parsed
driver.push_basetype(btp);
}
| id indexes
{
BaseType *btp = 0;
if (driver.top_basetype()) {
btp = driver.top_basetype()->var($1);
}
else {
btp = driver.dmr()->root()->find_var($1);
}
if (!btp)
driver.throw_not_found($1, "id indexes");
if (btp->type() != dods_array_c)
driver.throw_not_array($1, "id indexes");
$$ = driver.mark_variable(btp);
// push the basetype so that it is
// accessible if/while filters are parsed
driver.push_basetype(btp);
}
// Note this case is '| id fields'
| id
{
BaseType *btp = 0;
if (driver.top_basetype()) {
btp = driver.top_basetype()->var($1);
}
else {
btp = driver.dmr()->root()->find_var($1);
}
if (!btp)
driver.throw_not_found($1, "id fields");
if (btp->type() == dods_array_c) {
if (btp->var() && !btp->var()->is_constructor_type())
throw Error(no_such_variable, "The variable " + $1 + " must be a Structure or Sequence to be used with {}.");
// This call also tests the btp to make sure it's an array
driver.mark_array_variable(btp);
}
else {
// Don't mark the variable here because only some fields are to be sent and those
// will be marked when the fields are parsed
if (!btp->is_constructor_type())
throw Error(no_such_variable, "The variable " + $1 + " must be a Structure or Sequence to be used with {}.");
}
// push the basetype so that it is
// accessible when fields and if/while filters are parsed
driver.push_basetype(btp);
}
fields
{
//driver.pop_basetype();
$$ = true;
}
// Note this case is '| id indexes fields'
| id indexes
{
BaseType *btp = 0;
if (driver.top_basetype()) {
btp = driver.top_basetype()->var($1);
}
else {
btp = driver.dmr()->root()->find_var($1);
}
if (!btp)
driver.throw_not_found($1, "id indexes fields");
if (btp->type() != dods_array_c)
driver.throw_not_array($1, "id indexes fields");
// This call also tests the btp to make sure it's an array
driver.mark_array_variable(btp);
if (!btp->var()->is_constructor_type())
throw Error(no_such_variable, "The variable " + $1 + " must be a Structure or Sequence to be used with {}.");
driver.push_basetype(btp->var());
}
fields
{
//driver.pop_basetype();
$$ = true;
}
// The following has been removed from the syntax
// | fields indexes { $$ = true; }
;
// push_index stores the index in the D4ConstraintEvaluator
indexes : index
{
driver.push_index($1);
$$ = true;
}
| index { driver.push_index($1); } indexes { $$ = $3; }
;
// Note that the index values are scanned as WORDs but the web escaping is
// not supported as it is for identifiers. jhrg 10/20/16
index : "[" "]" { $$ = driver.make_index(); }
| "[" WORD "]" { $$ = driver.make_index($2); }
| "[" WORD ":" WORD "]" { $$ = driver.make_index($2, 1, $4); }
| "[" WORD ":" WORD ":" WORD "]" { $$ = driver.make_index($2, $4, $6); }
| "[" WORD ":" "]" { $$ = driver.make_index($2, 1); }
| "[" WORD ":" WORD ":" "]" { $$ = driver.make_index($2, $4); }
;
fields : "{" clauses "}" { $$ = $2; }
;
// A filter should return a FilterClauseList; a predicate should return a single
// FilterClause.
filter : predicate { $$ = true; }
| filter "," predicate { $$ = $1 && $3; }
;
// Here we use a grammar that is overly general: id op id is not really
// supported by the CE evaluator. However, id op constant, which captures
// the intent of the evaluator design introduces a number of reduce/reduce
// conflicts because any sensible definition of 'constant' will be the
// same as the definition of 'name'. This happens because we must make 'name'
// far more general than ideal (it must include tokens that start with digits,
// odd characters that clash with the operators, et cetera).
predicate : id op id
{ driver.add_filter_clause($2, $1, $3); $$ = true; }
| id op id op id
{
driver.add_filter_clause($2, $1, $3);
driver.add_filter_clause($4, $3, $5);
$$ = true;
}
| "ND" "=" id { throw Error(malformed_expr, "The 'ND' operator is not currently supported."); }
;
// See http://docs.opendap.org/index.php/DAP4:_Constraint_Expressions,_v2
// for a discussion of filters that's quite a bit longer than the current
// draft spec. << and >> are the 'less than bbox' and '> bbox' operations
// that I'm not so sure about now; @= is the same as *= and is the mapping
// operation. jhrg 3/18/15
op : "<" {$$ = "<";}
| ">" {$$ = ">";}
| "<=" {$$ = "<=";}
| ">=" {$$ = ">=";}
| "==" {$$ = "==";}
| "!=" {$$ = "!=";}
| "~=" {$$ = "~=";}
| "<<" {$$ = "<<";}
| ">>" {$$ = ">>";}
| "@=" {$$ = "@=";}
;
id : path
{
$$ = $1;
}
| "/" path
{
$$.append("/");
$$.append($2);
}
| group "/" path
{
$1.append("/");
$1.append($3);
$$ = $1;
}
;
group : "/" name
{
$$.append("/");
$$.append($2);
}
| group "/" name
{
$1.append("/");
$1.append($3);
$$ = $1;
}
;
path : name
{
$$ = $1;
}
| path "." name
{
$1.append(".");
$1.append($3);
$$ = $1;
}
;
// Because some formats/datasets allow 'any' name for a variable, it's possible
// that a variable name will be a number, etc. The grammar also allows STRING
// to support "name"."name with spaces and dots (.)".x
//
// I added calls here to remove the double quotes because they were breaking
// the parse for STRINGs and also added www2id() for WORDs (so that %20, etc.
// can be used for escaping stuff). However, the two cannot be mixed - if the
// parser id passed"Point%20Break" the %20 will remain as a literal in the STRING.
// jhrg 10/20/16
name : WORD
{
$$=www2id($1);
}
| STRING
{
$$=driver.remove_quotes($1);
}
;
%%
// Forward the error to the driver for handling. The location parameter
// provides the line number and character position of the error.
void
libdap::D4CEParser::error(const location_type &l, const std::string &m)
{
driver.error(l, m);
}
/* include for access to scanner.yylex */
#include "D4CEScanner.h"
static int yylex(libdap::D4CEParser::semantic_type *yylval,
libdap::location *loc,
libdap::D4CEScanner &scanner,
libdap::D4ConstraintEvaluator &driver)
{
if (driver.trace_scanning())
scanner.set_debug(true);
return( scanner.yylex(yylval, loc) );
}