Blame D4ParserSax2.h

Packit a4aae4
Packit a4aae4
// -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
// Access Protocol.
Packit a4aae4
Packit a4aae4
// Copyright (c) 2012 OPeNDAP, Inc.
Packit a4aae4
// Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
//
Packit a4aae4
// This library is free software; you can redistribute it and/or
Packit a4aae4
// modify it under the terms of the GNU Lesser General Public
Packit a4aae4
// License as published by the Free Software Foundation; either
Packit a4aae4
// version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
//
Packit a4aae4
// This library is distributed in the hope that it will be useful,
Packit a4aae4
// but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
// Lesser General Public License for more details.
Packit a4aae4
//
Packit a4aae4
// You should have received a copy of the GNU Lesser General Public
Packit a4aae4
// License along with this library; if not, write to the Free Software
Packit a4aae4
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Packit a4aae4
//
Packit a4aae4
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
Packit a4aae4
#ifndef d4_parser_sax2_h
Packit a4aae4
#define d4_parser_sax2_h
Packit a4aae4
Packit a4aae4
#define ATTR 1
Packit a4aae4
Packit a4aae4
#include <string.h>
Packit a4aae4
Packit a4aae4
#include <string>
Packit a4aae4
#include <iostream>
Packit a4aae4
#include <map>
Packit a4aae4
#include <stack>
Packit a4aae4
Packit a4aae4
#include <libxml/parserInternals.h>
Packit a4aae4
Packit a4aae4
#define CRLF "\r\n"
Packit a4aae4
Packit a4aae4
namespace libdap
Packit a4aae4
{
Packit a4aae4
Packit a4aae4
class DMR;
Packit a4aae4
class BaseType;
Packit a4aae4
class D4BaseTypeFactory;
Packit a4aae4
class D4Group;
Packit a4aae4
class D4Attributes;
Packit a4aae4
class D4EnumDef;
Packit a4aae4
class D4Dimension;
Packit a4aae4
Packit a4aae4
/** Parse the XML text which encodes the network/persistent representation of
Packit a4aae4
    the DMR object. In the current implementation, the DMR is held by an
Packit a4aae4
    instance of the class DDS which in turn holds variables which include
Packit a4aae4
    attributes.
Packit a4aae4
Packit a4aae4
    This parser for the DMR document uses the SAX interface of libxml2.
Packit a4aae4
    Static methods are used as callbacks for the SAX parser. These static
Packit a4aae4
    methods are public because making them private complicates compilation.
Packit a4aae4
    They should not be called by anything other than the intern method.
Packit a4aae4
    They do not throw exceptions because exceptions from within callbacks are
Packit a4aae4
    not reliable or portable. To signal errors, the methods record
Packit a4aae4
    information in the D4ParserSax2 object. Once the error handler is called,
Packit a4aae4
    construction of an DMR object ends even though the SAX parser still
Packit a4aae4
    calls the various callback functions. The parser treats warnings,
Packit a4aae4
    errors and fatal_errors the same way; when any are found parsing
Packit a4aae4
    stops. The intern method throws an Error of InternalErr exception if an
Packit a4aae4
    error was found.
Packit a4aae4
Packit a4aae4
    Note that this class uses the C++-supplied default definitions for the
Packit a4aae4
    default and copy constructors as well as the destructor and assignment
Packit a4aae4
    operator.
Packit a4aae4
Packit a4aae4
    @see DMR */
Packit a4aae4
class D4ParserSax2
Packit a4aae4
{
Packit a4aae4
private:
Packit a4aae4
    /** States used by DDXParserDAP4State. These are the states of the SAX parser
Packit a4aae4
    state-machine. */
Packit a4aae4
    enum ParseState {
Packit a4aae4
        parser_start,
Packit a4aae4
Packit a4aae4
        inside_dataset,
Packit a4aae4
Packit a4aae4
        // inside_group is the state just after parsing the start of a Group
Packit a4aae4
        // element.
Packit a4aae4
        inside_group,
Packit a4aae4
Packit a4aae4
        inside_attribute_container,
Packit a4aae4
        inside_attribute,
Packit a4aae4
        inside_attribute_value,
Packit a4aae4
        inside_other_xml_attribute,
Packit a4aae4
Packit a4aae4
        inside_enum_def,
Packit a4aae4
        inside_enum_const,
Packit a4aae4
Packit a4aae4
        inside_dim_def,
Packit a4aae4
Packit a4aae4
        // This covers Byte, ..., Url, Opaque
Packit a4aae4
        inside_simple_type,
Packit a4aae4
Packit a4aae4
        // inside_array,
Packit a4aae4
        inside_dim,
Packit a4aae4
        inside_map,
Packit a4aae4
Packit a4aae4
        inside_constructor,
Packit a4aae4
Packit a4aae4
        // inside_sequence, Removed from merged code jhrg 5/2/14
Packit a4aae4
Packit a4aae4
        not_dap4_element,
Packit a4aae4
Packit a4aae4
        parser_unknown,
Packit a4aae4
        parser_error,
Packit a4aae4
        parser_fatal_error,
Packit a4aae4
Packit a4aae4
        parser_end
Packit a4aae4
    };
Packit a4aae4
Packit a4aae4
    xmlSAXHandler d_dmr_sax_parser;
Packit a4aae4
Packit a4aae4
    // The results of the parse operation are stored in these fields.
Packit a4aae4
    // This is passed into the parser using the intern() methods.
Packit a4aae4
    DMR *d_dmr;   // dump DMR here
Packit a4aae4
    DMR *dmr() const { return d_dmr; }
Packit a4aae4
Packit a4aae4
    // These stacks hold the state of the parse as it progresses.
Packit a4aae4
    stack<ParseState> s; // Current parse state
Packit a4aae4
    void push_state(D4ParserSax2::ParseState state) { s.push(state); }
Packit a4aae4
    D4ParserSax2::ParseState get_state() const { return s.top(); }
Packit a4aae4
    void pop_state() { s.pop(); }
Packit a4aae4
    bool empty_state() const { return s.empty(); }
Packit a4aae4
Packit a4aae4
    stack<BaseType*> btp_stack; // current variable(s)
Packit a4aae4
    void push_basetype(BaseType *btp) { btp_stack.push(btp); }
Packit a4aae4
    BaseType *top_basetype() const { return btp_stack.top(); }
Packit a4aae4
    void pop_basetype() { btp_stack.pop(); }
Packit a4aae4
    bool empty_basetype() const { return btp_stack.empty(); }
Packit a4aae4
Packit a4aae4
    stack<D4Group*> grp_stack; // current groups(s)
Packit a4aae4
    void push_group(D4Group *grp) { grp_stack.push(grp); }
Packit a4aae4
    D4Group *top_group() const { return grp_stack.top(); }
Packit a4aae4
    void pop_group() { grp_stack.pop(); }
Packit a4aae4
    bool empty_group() const { return grp_stack.empty(); }
Packit a4aae4
Packit a4aae4
    stack<D4Attributes*> d_attrs_stack; // DAP4 Attributes
Packit a4aae4
    void push_attributes(D4Attributes *attr) { d_attrs_stack.push(attr); }
Packit a4aae4
    D4Attributes *top_attributes() const { return d_attrs_stack.top(); }
Packit a4aae4
    void pop_attributes() { d_attrs_stack.pop(); }
Packit a4aae4
    bool empty_attributes() const { return d_attrs_stack.empty(); }
Packit a4aae4
Packit a4aae4
    D4EnumDef *d_enum_def;
Packit a4aae4
    D4EnumDef *enum_def();
Packit a4aae4
    void clear_enum_def() { d_enum_def = 0; }
Packit a4aae4
Packit a4aae4
    D4Dimension *d_dim_def;
Packit a4aae4
    D4Dimension *dim_def();
Packit a4aae4
    void clear_dim_def() { d_dim_def = 0; }
Packit a4aae4
Packit a4aae4
    // Accumulate stuff inside an 'OtherXML' DAP attribute here
Packit a4aae4
    string other_xml;
Packit a4aae4
Packit a4aae4
    // When we're parsing unknown XML, how deeply is it nested? This is used
Packit a4aae4
    // for the OtherXML DAP attributes.
Packit a4aae4
    unsigned int other_xml_depth;
Packit a4aae4
    unsigned int unknown_depth;
Packit a4aae4
Packit a4aae4
    // These are used for processing errors.
Packit a4aae4
    string d_error_msg;  // Error message(s), if any.
Packit a4aae4
    xmlParserCtxtPtr d_context; // used for error message line numbers
Packit a4aae4
Packit a4aae4
    // These hold temporary values read during the parse.
Packit a4aae4
    string dods_attr_name; // DAP4 attributes, not XML attributes
Packit a4aae4
    string dods_attr_type; // ... not XML ...
Packit a4aae4
    string char_data;  // char data in value elements; null after use
Packit a4aae4
    string root_ns;     // What is the namespace of the root node (Group)
Packit a4aae4
Packit a4aae4
    bool d_debug;
Packit a4aae4
    bool debug() const { return d_debug; }
Packit a4aae4
Packit a4aae4
    bool d_strict;
Packit a4aae4
Packit a4aae4
    class XMLAttribute {
Packit a4aae4
        public:
Packit a4aae4
        string prefix;
Packit a4aae4
        string nsURI;
Packit a4aae4
        string value;
Packit a4aae4
Packit a4aae4
        void clone(const XMLAttribute &src) {
Packit a4aae4
            prefix = src.prefix;
Packit a4aae4
            nsURI = src.nsURI;
Packit a4aae4
            value = src.value;
Packit a4aae4
        }
Packit a4aae4
Packit a4aae4
        XMLAttribute() : prefix(""), nsURI(""), value("") {}
Packit a4aae4
        XMLAttribute(const string &p, const string &ns, const string &v)
Packit a4aae4
            : prefix(p), nsURI(ns), value(v) {}
Packit a4aae4
        // 'attributes' as passed from libxml2 is a five element array but this
Packit a4aae4
        // ctor gets the back four elements.
Packit a4aae4
        XMLAttribute(const xmlChar **attributes/*[4]*/) {
Packit a4aae4
            prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
Packit a4aae4
            nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
Packit a4aae4
            value = string((const char *)attributes[2], (const char *)attributes[3]);
Packit a4aae4
        }
Packit a4aae4
        XMLAttribute(const XMLAttribute &rhs) {
Packit a4aae4
            clone(rhs);
Packit a4aae4
        }
Packit a4aae4
        XMLAttribute &operator=(const XMLAttribute &rhs) {
Packit a4aae4
            if (this == &rhs)
Packit a4aae4
                return *this;
Packit a4aae4
            clone(rhs);
Packit a4aae4
            return *this;
Packit a4aae4
        }
Packit a4aae4
    };
Packit a4aae4
Packit a4aae4
    typedef map<string, XMLAttribute> XMLAttrMap;
Packit a4aae4
    XMLAttrMap xml_attrs; // dump XML attributes here
Packit a4aae4
Packit a4aae4
    XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); }
Packit a4aae4
Packit a4aae4
    XMLAttrMap::iterator xml_attr_end() {  return xml_attrs.end(); }
Packit a4aae4
Packit a4aae4
    map<string, string> namespace_table;
Packit a4aae4
Packit a4aae4
    void cleanup_parse();
Packit a4aae4
Packit a4aae4
    /** @name Parser Actions
Packit a4aae4
Packit a4aae4
    These methods are the 'actions' carried out by the start_element and
Packit a4aae4
    end_element callbacks. Most of what takes place in those has been
Packit a4aae4
    factored out to this set of functions. */
Packit a4aae4
    //@{
Packit a4aae4
    void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
Packit a4aae4
    void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
Packit a4aae4
    bool check_required_attribute(const string &attr);
Packit a4aae4
    bool check_attribute(const string & attr);
Packit a4aae4
    void process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
Packit a4aae4
Packit a4aae4
    void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
Packit a4aae4
    void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
Packit a4aae4
Packit a4aae4
    bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs);
Packit a4aae4
    bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs);
Packit a4aae4
    bool process_map(const char *name, const xmlChar **attrs, int nb_attributes);
Packit a4aae4
    bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
Packit a4aae4
    bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
Packit a4aae4
    bool process_group(const char *name, const xmlChar **attrs, int nb_attributes);
Packit a4aae4
    bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
Packit a4aae4
    bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
Packit a4aae4
Packit a4aae4
    void finish_variable(const char *tag, Type t, const char *expected);
Packit a4aae4
    //@}
Packit a4aae4
Packit a4aae4
    friend class D4ParserSax2Test;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
    D4ParserSax2() :
Packit a4aae4
        d_dmr(0), d_enum_def(0), d_dim_def(0),
Packit a4aae4
        other_xml(""), other_xml_depth(0), unknown_depth(0),
Packit a4aae4
        d_error_msg(""), d_context(0),
Packit a4aae4
        dods_attr_name(""), dods_attr_type(""),
Packit a4aae4
        char_data(""), root_ns(""), d_debug(false), d_strict(true)
Packit a4aae4
    {
Packit a4aae4
        //xmlSAXHandler ddx_sax_parser;
Packit a4aae4
        memset(&d_dmr_sax_parser, 0, sizeof(xmlSAXHandler));
Packit a4aae4
Packit a4aae4
        d_dmr_sax_parser.getEntity = &D4ParserSax2::dmr_get_entity;
Packit a4aae4
        d_dmr_sax_parser.startDocument = &D4ParserSax2::dmr_start_document;
Packit a4aae4
        d_dmr_sax_parser.endDocument = &D4ParserSax2::dmr_end_document;
Packit a4aae4
        d_dmr_sax_parser.characters = &D4ParserSax2::dmr_get_characters;
Packit a4aae4
        d_dmr_sax_parser.ignorableWhitespace = &D4ParserSax2::dmr_ignoreable_whitespace;
Packit a4aae4
        d_dmr_sax_parser.cdataBlock = &D4ParserSax2::dmr_get_cdata;
Packit a4aae4
        d_dmr_sax_parser.warning = &D4ParserSax2::dmr_error;
Packit a4aae4
        d_dmr_sax_parser.error = &D4ParserSax2::dmr_error;
Packit a4aae4
        d_dmr_sax_parser.fatalError = &D4ParserSax2::dmr_fatal_error;
Packit a4aae4
        d_dmr_sax_parser.initialized = XML_SAX2_MAGIC;
Packit a4aae4
        d_dmr_sax_parser.startElementNs = &D4ParserSax2::dmr_start_element;
Packit a4aae4
        d_dmr_sax_parser.endElementNs = &D4ParserSax2::dmr_end_element;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    void intern(istream &f, DMR *dest_dmr, bool debug = false);
Packit a4aae4
    void intern(const string &document, DMR *dest_dmr, bool debug = false);
Packit a4aae4
    void intern(const char *buffer, int size, DMR *dest_dmr, bool debug = false);
Packit a4aae4
Packit a4aae4
    /**
Packit a4aae4
     * @defgroup strict The 'strict' mode
Packit a4aae4
     * @{
Packit a4aae4
     * The strict mode of the parser is the default. In this mode any error
Packit a4aae4
     * will result in an exception and parsing will stop. When strict mode
Packit a4aae4
     * is set to false (forgiving mode?), some errors will be silently ignored.
Packit a4aae4
     * The list of allowed errors is:
Packit a4aae4
     *     The Array named by a Map element is not required to be in the DMR.
Packit a4aae4
     *     There are no other allowed errors at this time (4/13/16)
Packit a4aae4
     */
Packit a4aae4
Packit a4aae4
    /** @brief Set the 'strict' mode to true or false. */
Packit a4aae4
    void set_strict(bool s) { d_strict = s; }
Packit a4aae4
    /** @brief Get the setting of the 'strict' mode.
Packit a4aae4
     * @return True or False.
Packit a4aae4
     */
Packit a4aae4
    bool get_strict() const { return d_strict; }
Packit a4aae4
    /** @} */
Packit a4aae4
Packit a4aae4
    static void dmr_start_document(void *parser);
Packit a4aae4
    static void dmr_end_document(void *parser);
Packit a4aae4
Packit a4aae4
    static void dmr_start_element(void *parser,
Packit a4aae4
            const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
Packit a4aae4
            int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
Packit a4aae4
            int nb_defaulted, const xmlChar **attributes);
Packit a4aae4
    static void dmr_end_element(void *parser, const xmlChar *localname,
Packit a4aae4
            const xmlChar *prefix, const xmlChar *URI);
Packit a4aae4
Packit a4aae4
    static void dmr_get_characters(void *parser, const xmlChar *ch, int len);
Packit a4aae4
    static void dmr_ignoreable_whitespace(void *parser,
Packit a4aae4
            const xmlChar * ch, int len);
Packit a4aae4
    static void dmr_get_cdata(void *parser, const xmlChar *value, int len);
Packit a4aae4
Packit a4aae4
    static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name);
Packit a4aae4
    static void dmr_fatal_error(void *parser, const char *msg, ...);
Packit a4aae4
    static void dmr_error(void *parser, const char *msg, ...);
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
} // namespace libdap
Packit a4aae4
Packit a4aae4
#endif // d4_parser_sax2_h