Blame d4_ce/d4_ce_parser.yy

Packit a4aae4
Packit a4aae4
// -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
// Access Protocol.
Packit a4aae4
Packit a4aae4
// Copyright (c) 2013 OPeNDAP, Inc.
Packit a4aae4
// Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
//
Packit a4aae4
// This library is free software; you can redistribute it and/or
Packit a4aae4
// modify it under the terms of the GNU Lesser General Public
Packit a4aae4
// License as published by the Free Software Foundation; either
Packit a4aae4
// version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
// 
Packit a4aae4
// This library is distributed in the hope that it will be useful,
Packit a4aae4
// but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
// Lesser General Public License for more details.
Packit a4aae4
// 
Packit a4aae4
// You should have received a copy of the GNU Lesser General Public
Packit a4aae4
// License along with this library; if not, write to the Free Software
Packit a4aae4
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
Packit a4aae4
//
Packit a4aae4
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
Packit a4aae4
// A minor edit
Packit a4aae4
Packit a4aae4
%skeleton "lalr1.cc" /* -*- C++ -*- */
Packit a4aae4
%require "2.5"
Packit a4aae4
%defines
Packit a4aae4
Packit a4aae4
// The d4ce_parser.tab.cc and .hh files define and declare this class
Packit a4aae4
%define parser_class_name {D4CEParser}
Packit a4aae4
// D4CEParser is in this namespace
Packit a4aae4
%define api.namespace {libdap}
Packit a4aae4
Packit a4aae4
%define parse.trace
Packit a4aae4
%define parse.error verbose
Packit a4aae4
%define parse.assert
Packit a4aae4
Packit a4aae4
// Could not get this to work with a C++ scanner built by flex. 8/10/13 jhrg
Packit a4aae4
// %define api.token.constructor
Packit a4aae4
%define api.value.type variant
Packit a4aae4
Packit a4aae4
// Because the code uses the C++ mode of flex, we don't use this. 8/8/13 jhrg
Packit a4aae4
// %define api.prefix { d4_ce }
Packit a4aae4
Packit a4aae4
%code requires {
Packit a4aae4
#include "D4ConstraintEvaluator.h"
Packit a4aae4
#include "escaping.h" // for www2id() used with WORD and STRING
Packit a4aae4
namespace libdap {
Packit a4aae4
    class D4CEScanner;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
// Pass both the scanner and parser objects to both the automatically generated
Packit a4aae4
// parser and scanner. Note that in the actions bound to the rules, 'driver' 
Packit a4aae4
// means use the 'D4ConstraintEvaluator' instance.
Packit a4aae4
%lex-param   { D4CEScanner  &scanner  }
Packit a4aae4
%parse-param { D4CEScanner  &scanner  }
Packit a4aae4
Packit a4aae4
%lex-param   { D4ConstraintEvaluator  &driver  }
Packit a4aae4
%parse-param { D4ConstraintEvaluator  &driver  }
Packit a4aae4
Packit a4aae4
%locations
Packit a4aae4
%initial-action
Packit a4aae4
{
Packit a4aae4
    // Initialize the initial location. This is printed when the parser builds
Packit a4aae4
    // its own error messages - when the parse fails as opposed to when the 
Packit a4aae4
    // CE names a missing variables, ...
Packit a4aae4
Packit a4aae4
    @$.initialize (driver.expression());
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
%code {
Packit a4aae4
   #include <iostream>
Packit a4aae4
   #include <cstdlib>
Packit a4aae4
   #include <fstream>
Packit a4aae4
   
Packit a4aae4
   #include "BaseType.h"
Packit a4aae4
   #include "DMR.h"
Packit a4aae4
   #include "D4Group.h"
Packit a4aae4
Packit a4aae4
   /* include for all driver functions */
Packit a4aae4
   #include "D4ConstraintEvaluator.h"
Packit a4aae4
Packit a4aae4
   /* this is silly, but I can't figure out a way around it */
Packit a4aae4
   static int yylex(libdap::D4CEParser::semantic_type *yylval,
Packit a4aae4
                    libdap::location *loc,
Packit a4aae4
                    libdap::D4CEScanner  &scanner,
Packit a4aae4
                    libdap::D4ConstraintEvaluator   &driver);
Packit a4aae4
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
// The strings used in the token definitions are used for error messages
Packit a4aae4
%token <std::string> WORD "word"
Packit a4aae4
%token <std::string> STRING "string"
Packit a4aae4
Packit a4aae4
// %type is used to set the return type of non-terminals; %token sets the
Packit a4aae4
// return type for terminals.
Packit a4aae4
%type <bool> filter predicate fields indexes subset clause clauses dimension dimensions
Packit a4aae4
%type <std::string> id path group name op
Packit a4aae4
Packit a4aae4
%type <libdap::D4ConstraintEvaluator::index> index
Packit a4aae4
// %type <libdap::D4FilterClause> predicate
Packit a4aae4
Packit a4aae4
%token
Packit a4aae4
    END  0  "end of file"
Packit a4aae4
  
Packit a4aae4
%token 
Packit a4aae4
    SEMICOLON ";"
Packit a4aae4
    PIPE "|"
Packit a4aae4
Packit a4aae4
    LBRACKET "["
Packit a4aae4
    RBRACKET "]"
Packit a4aae4
    COLON ":"
Packit a4aae4
Packit a4aae4
    LBRACE "{"
Packit a4aae4
    RBRACE "}"
Packit a4aae4
Packit a4aae4
    LESS "<"
Packit a4aae4
    GREATER ">"
Packit a4aae4
    LESS_EQUAL "<="
Packit a4aae4
    GREATER_EQUAL ">="
Packit a4aae4
    EQUAL "=="
Packit a4aae4
    NOT_EQUAL "!="
Packit a4aae4
    REGEX_MATCH "~="
Packit a4aae4
Packit a4aae4
    LESS_BBOX "<<"
Packit a4aae4
    GREATER_BBOX ">>"
Packit a4aae4
Packit a4aae4
    MASK "@="
Packit a4aae4
    ND "ND"
Packit a4aae4
Packit a4aae4
    COMMA ","
Packit a4aae4
Packit a4aae4
    ASSIGN "="
Packit a4aae4
Packit a4aae4
    GROUP_SEP "/"
Packit a4aae4
    PATH_SEP "."
Packit a4aae4
    
Packit a4aae4
%%
Packit a4aae4
Packit a4aae4
%start expression;
Packit a4aae4
Packit a4aae4
expression : clauses { driver.set_result($1); }
Packit a4aae4
| dimensions ";" clauses { driver.set_result($1 && $3); }
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
dimensions : dimension { $$ = $1; }
Packit a4aae4
| dimensions ";" dimension { $$ = $1 && $3; }
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
dimension : id "=" index
Packit a4aae4
{
Packit a4aae4
    $$ = driver.slice_dimension($1, $3);
Packit a4aae4
}
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
clauses : clause { $$ = $1; }
Packit a4aae4
| clauses ";" clause { $$ = $1 && $3; }
Packit a4aae4
;
Packit a4aae4
    
Packit a4aae4
// Change: I moved the pop_basetype() call out of the 'fields'
Packit a4aae4
// actions in 'subset' so that I could push the basetype for
Packit a4aae4
// all of the cases. That way I'm sure to have a top_baseype()
Packit a4aae4
// when processing the 'filter' part of the grammar. I need the
Packit a4aae4
// the top basetype so that I know which Sequence to use.
Packit a4aae4
// jhrg 4/23/16
Packit a4aae4
               
Packit a4aae4
clause : subset { $$ = $1; driver.pop_basetype(); }
Packit a4aae4
Packit a4aae4
// For the DAP4 at this time (3/18/15) filters apply only to D4Sequences 
Packit a4aae4
Packit a4aae4
| subset "|" filter { driver.pop_basetype(); $$ = $1 && $3; }
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
// mark_variable returns a BaseType* or throws Error
Packit a4aae4
// Note that this is a fairly long production rule with a number
Packit a4aae4
// of different right hand sides spanning about 110 lines.
Packit a4aae4
// jhrg 4/8/16
Packit a4aae4
subset : id 
Packit a4aae4
{
Packit a4aae4
    BaseType *btp = 0;
Packit a4aae4
    if (driver.top_basetype()) {
Packit a4aae4
        btp = driver.top_basetype()->var($1);
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        btp = driver.dmr()->root()->find_var($1);
Packit a4aae4
    }
Packit a4aae4
    
Packit a4aae4
    if (!btp)
Packit a4aae4
        driver.throw_not_found($1, "id");
Packit a4aae4
Packit a4aae4
    $$ = driver.mark_variable(btp);
Packit a4aae4
    
Packit a4aae4
    // push the basetype so that it is
Packit a4aae4
    // accessible if/while filters are parsed
Packit a4aae4
    driver.push_basetype(btp);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
| id indexes 
Packit a4aae4
{
Packit a4aae4
    BaseType *btp = 0;
Packit a4aae4
    if (driver.top_basetype()) {
Packit a4aae4
        btp = driver.top_basetype()->var($1);
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        btp = driver.dmr()->root()->find_var($1);
Packit a4aae4
    }
Packit a4aae4
    
Packit a4aae4
    if (!btp)
Packit a4aae4
        driver.throw_not_found($1, "id indexes");
Packit a4aae4
        
Packit a4aae4
    if (btp->type() != dods_array_c)
Packit a4aae4
        driver.throw_not_array($1, "id indexes");
Packit a4aae4
        
Packit a4aae4
    $$ = driver.mark_variable(btp);
Packit a4aae4
    
Packit a4aae4
    // push the basetype so that it is
Packit a4aae4
    // accessible if/while filters are parsed
Packit a4aae4
    driver.push_basetype(btp);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
// Note this case is '| id fields'
Packit a4aae4
| id 
Packit a4aae4
{
Packit a4aae4
    BaseType *btp = 0;
Packit a4aae4
    if (driver.top_basetype()) {
Packit a4aae4
        btp = driver.top_basetype()->var($1);
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        btp = driver.dmr()->root()->find_var($1);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    if (!btp)
Packit a4aae4
        driver.throw_not_found($1, "id fields");
Packit a4aae4
    
Packit a4aae4
    if (btp->type() == dods_array_c) {
Packit a4aae4
        if (btp->var() && !btp->var()->is_constructor_type())
Packit a4aae4
            throw Error(no_such_variable, "The variable " + $1 + " must be a Structure or Sequence to be used with {}.");
Packit a4aae4
            
Packit a4aae4
        // This call also tests the btp to make sure it's an array
Packit a4aae4
        driver.mark_array_variable(btp);
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        // Don't mark the variable here because only some fields are to be sent and those
Packit a4aae4
        // will be marked when the fields are parsed
Packit a4aae4
        if (!btp->is_constructor_type())
Packit a4aae4
            throw Error(no_such_variable, "The variable " + $1 + " must be a Structure or Sequence to be used with {}.");
Packit a4aae4
    }
Packit a4aae4
    
Packit a4aae4
    // push the basetype so that it is
Packit a4aae4
    // accessible when fields and if/while filters are parsed
Packit a4aae4
    driver.push_basetype(btp);
Packit a4aae4
} 
Packit a4aae4
fields 
Packit a4aae4
{ 
Packit a4aae4
    //driver.pop_basetype(); 
Packit a4aae4
    $$ = true; 
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
// Note this case is '| id indexes fields'
Packit a4aae4
Packit a4aae4
| id indexes
Packit a4aae4
{
Packit a4aae4
    BaseType *btp = 0;
Packit a4aae4
    if (driver.top_basetype()) {
Packit a4aae4
        btp = driver.top_basetype()->var($1);
Packit a4aae4
    }
Packit a4aae4
    else {
Packit a4aae4
        btp = driver.dmr()->root()->find_var($1);
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    if (!btp)
Packit a4aae4
        driver.throw_not_found($1, "id indexes fields");
Packit a4aae4
    
Packit a4aae4
    if (btp->type() != dods_array_c)
Packit a4aae4
        driver.throw_not_array($1, "id indexes fields");
Packit a4aae4
Packit a4aae4
    // This call also tests the btp to make sure it's an array
Packit a4aae4
    driver.mark_array_variable(btp);
Packit a4aae4
    
Packit a4aae4
    if (!btp->var()->is_constructor_type())
Packit a4aae4
        throw Error(no_such_variable, "The variable " + $1 + " must be a Structure or Sequence to be used with {}.");
Packit a4aae4
      
Packit a4aae4
    driver.push_basetype(btp->var());       
Packit a4aae4
} 
Packit a4aae4
fields 
Packit a4aae4
{ 
Packit a4aae4
    //driver.pop_basetype();
Packit a4aae4
    $$ = true; 
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
// The following has been removed from the syntax
Packit a4aae4
// | fields indexes { $$ = true; }
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
// push_index stores the index in the D4ConstraintEvaluator
Packit a4aae4
indexes : index 
Packit a4aae4
{ 
Packit a4aae4
    driver.push_index($1); 
Packit a4aae4
    $$ = true; 
Packit a4aae4
}
Packit a4aae4
| index { driver.push_index($1); } indexes { $$ = $3; }
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
// Note that the index values are scanned as WORDs but the web escaping is
Packit a4aae4
// not supported as it is for identifiers. jhrg 10/20/16
Packit a4aae4
index   : "[" "]" { $$ = driver.make_index(); }
Packit a4aae4
| "[" WORD "]" { $$ = driver.make_index($2); }
Packit a4aae4
| "[" WORD ":" WORD "]" { $$ = driver.make_index($2, 1, $4); }
Packit a4aae4
| "[" WORD ":" WORD ":" WORD "]" { $$ = driver.make_index($2, $4, $6); }
Packit a4aae4
| "[" WORD ":" "]" { $$ = driver.make_index($2, 1); }
Packit a4aae4
| "[" WORD ":" WORD ":" "]" { $$ = driver.make_index($2, $4); }
Packit a4aae4
;
Packit a4aae4
        
Packit a4aae4
fields : "{" clauses "}" { $$ = $2; }
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
// A filter should return a FilterClauseList; a predicate should return a single
Packit a4aae4
// FilterClause.
Packit a4aae4
Packit a4aae4
filter : predicate { $$ = true; }
Packit a4aae4
| filter "," predicate { $$ = $1 && $3; }
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
// Here we use a grammar that is overly general: id op id is not really
Packit a4aae4
// supported by the CE evaluator. However, id op constant, which captures
Packit a4aae4
// the intent of the evaluator design introduces a number of reduce/reduce
Packit a4aae4
// conflicts because any sensible definition of 'constant' will be the
Packit a4aae4
// same as the definition of 'name'. This happens because we must make 'name'
Packit a4aae4
// far more general than ideal (it must include tokens that start with digits,
Packit a4aae4
// odd characters that clash with the operators, et cetera).
Packit a4aae4
Packit a4aae4
predicate : id op id
Packit a4aae4
{ driver.add_filter_clause($2, $1, $3); $$ = true; }
Packit a4aae4
          
Packit a4aae4
| id op id op id 
Packit a4aae4
{ 
Packit a4aae4
    driver.add_filter_clause($2, $1, $3); 
Packit a4aae4
    driver.add_filter_clause($4, $3, $5); 
Packit a4aae4
    $$ = true; 
Packit a4aae4
Packit a4aae4
}
Packit a4aae4
| "ND" "=" id { throw Error(malformed_expr, "The 'ND' operator is not currently supported."); }
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
// See http://docs.opendap.org/index.php/DAP4:_Constraint_Expressions,_v2
Packit a4aae4
// for a discussion of filters that's quite a bit longer than the current
Packit a4aae4
// draft spec. << and >> are the 'less than bbox' and '> bbox' operations
Packit a4aae4
// that I'm not so sure about now; @= is the same as *= and is the mapping
Packit a4aae4
// operation. jhrg 3/18/15 
Packit a4aae4
op : "<" {$$ = "<";}
Packit a4aae4
   | ">" {$$ = ">";}
Packit a4aae4
   | "<=" {$$ = "<=";}
Packit a4aae4
   | ">=" {$$ = ">=";}
Packit a4aae4
   | "==" {$$ = "==";}
Packit a4aae4
   | "!=" {$$ = "!=";}
Packit a4aae4
   | "~=" {$$ = "~=";}
Packit a4aae4
Packit a4aae4
   | "<<" {$$ = "<<";}
Packit a4aae4
   | ">>" {$$ = ">>";}
Packit a4aae4
Packit a4aae4
   | "@=" {$$ = "@=";}
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
id : path
Packit a4aae4
{
Packit a4aae4
    $$ = $1;
Packit a4aae4
}
Packit a4aae4
| "/" path
Packit a4aae4
{
Packit a4aae4
    $$.append("/");
Packit a4aae4
    $$.append($2);
Packit a4aae4
}
Packit a4aae4
| group "/" path
Packit a4aae4
{
Packit a4aae4
    $1.append("/");
Packit a4aae4
    $1.append($3);
Packit a4aae4
    $$ = $1;
Packit a4aae4
}
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
group : "/" name
Packit a4aae4
{
Packit a4aae4
    $$.append("/");
Packit a4aae4
    $$.append($2);
Packit a4aae4
}
Packit a4aae4
| group "/" name
Packit a4aae4
{
Packit a4aae4
    $1.append("/");
Packit a4aae4
    $1.append($3);
Packit a4aae4
    $$ = $1;
Packit a4aae4
}
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
path : name 
Packit a4aae4
{
Packit a4aae4
    $$ = $1;
Packit a4aae4
}
Packit a4aae4
| path "." name
Packit a4aae4
{
Packit a4aae4
    $1.append(".");
Packit a4aae4
    $1.append($3);
Packit a4aae4
    $$ = $1;
Packit a4aae4
}
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
// Because some formats/datasets allow 'any' name for a variable, it's possible
Packit a4aae4
// that a variable name will be a number, etc. The grammar also allows STRING
Packit a4aae4
// to support "name"."name with spaces and dots (.)".x
Packit a4aae4
//
Packit a4aae4
// I added calls here to remove the double quotes because they were breaking
Packit a4aae4
// the parse for STRINGs and also added www2id() for WORDs (so that %20, etc.
Packit a4aae4
// can be used for escaping stuff). However, the two cannot be mixed - if the
Packit a4aae4
// parser id passed"Point%20Break" the %20 will remain as a literal in the STRING.
Packit a4aae4
// jhrg 10/20/16
Packit a4aae4
name : WORD 
Packit a4aae4
{
Packit a4aae4
    $$=www2id($1);
Packit a4aae4
}
Packit a4aae4
| STRING 
Packit a4aae4
{
Packit a4aae4
    $$=driver.remove_quotes($1);
Packit a4aae4
}
Packit a4aae4
;
Packit a4aae4
Packit a4aae4
%%
Packit a4aae4
Packit a4aae4
// Forward the error to the driver for handling. The location parameter
Packit a4aae4
// provides the line number and character position of the error.
Packit a4aae4
void
Packit a4aae4
libdap::D4CEParser::error(const location_type &l, const std::string &m)
Packit a4aae4
{
Packit a4aae4
    driver.error(l, m);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* include for access to scanner.yylex */
Packit a4aae4
#include "D4CEScanner.h"
Packit a4aae4
Packit a4aae4
static int yylex(libdap::D4CEParser::semantic_type *yylval,
Packit a4aae4
                 libdap::location *loc,
Packit a4aae4
                 libdap::D4CEScanner &scanner,
Packit a4aae4
                 libdap::D4ConstraintEvaluator &driver)
Packit a4aae4
{
Packit a4aae4
    if (driver.trace_scanning())
Packit a4aae4
        scanner.set_debug(true);
Packit a4aae4
    
Packit a4aae4
    return( scanner.yylex(yylval, loc) );
Packit a4aae4
}