Blame DDXParserSAX2.h

Packit a4aae4
Packit a4aae4
// -*- mode: c++; c-basic-offset:4 -*-
Packit a4aae4
Packit a4aae4
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
Packit a4aae4
// Access Protocol.
Packit a4aae4
Packit a4aae4
// Copyright (c) 2003 OPeNDAP, Inc.
Packit a4aae4
// Author: James Gallagher <jgallagher@opendap.org>
Packit a4aae4
//
Packit a4aae4
// This library is free software; you can redistribute it and/or
Packit a4aae4
// modify it under the terms of the GNU Lesser General Public
Packit a4aae4
// License as published by the Free Software Foundation; either
Packit a4aae4
// version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
//
Packit a4aae4
// This library is distributed in the hope that it will be useful,
Packit a4aae4
// but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
// Lesser General Public License for more details.
Packit a4aae4
//
Packit a4aae4
// You should have received a copy of the GNU Lesser General Public
Packit a4aae4
// License along with this library; if not, write to the Free Software
Packit a4aae4
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Packit a4aae4
//
Packit a4aae4
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
Packit a4aae4
Packit a4aae4
#ifndef ddx_parser_h
Packit a4aae4
#define ddx_parser_h
Packit a4aae4
Packit a4aae4
#include <string>
Packit a4aae4
#include <map>
Packit a4aae4
#include <stack>
Packit a4aae4
Packit a4aae4
#include <libxml/parserInternals.h>
Packit a4aae4
Packit a4aae4
#ifndef ddx_exceptions_h
Packit a4aae4
#include "DDXExceptions.h"
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
#ifndef _dds_h
Packit a4aae4
#include "DDS.h"
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
#ifndef _basetype_h
Packit a4aae4
#include "BaseType.h"
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
#ifndef base_type_factory_h
Packit a4aae4
#include "BaseTypeFactory.h"
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
namespace libdap
Packit a4aae4
{
Packit a4aae4
Packit a4aae4
/** Parse the XML text which encodes the network/persistent representation of
Packit a4aae4
    the DDX object. In the current implementation, the DDX is held by an
Packit a4aae4
    instance of the class DDS which in turn holds variables which include
Packit a4aae4
    attributes. That is, the binary \e implementation of a DDX uses the old
Packit a4aae4
    DDS, BaseType and AttrTable classes, albeit arranged in a slightly new
Packit a4aae4
    way.
Packit a4aae4
Packit a4aae4
    This parser for the DDX \e document uses the SAX interface of \c libxml2.
Packit a4aae4
    Static methods are used as callbacks for the SAX parser. These static
Packit a4aae4
    methods are public because making them private complicates compilation.
Packit a4aae4
    They should not be called by anything other than the \e intern method.
Packit a4aae4
    They do not throw exceptions because exceptions from within callbacks are
Packit a4aae4
    not reliable or portable. To signal errors, the methods record
Packit a4aae4
    information in the DDXParser object. Once the error handler is called,
Packit a4aae4
    construction of an DDX/DDS object ends even though the SAX parser still
Packit a4aae4
    calls the various callback functions. The parser treats \e warnings, \e
Packit a4aae4
    errors and \e fatal_errors the same way; when any are found parsing
Packit a4aae4
    stops. The \e intern method throws an DDXParseFailed exception if an
Packit a4aae4
    error was found.
Packit a4aae4
Packit a4aae4
    Note that this class uses the C++-supplied default definitions for the
Packit a4aae4
    default and copy constructors as well as the destructor and assignment
Packit a4aae4
    operator.
Packit a4aae4
Packit a4aae4
    @see DDS */
Packit a4aae4
class DDXParser
Packit a4aae4
{
Packit a4aae4
private:
Packit a4aae4
    /** States used by DDXParserState. These are the states of the SAX parser
Packit a4aae4
    state-machine. */
Packit a4aae4
    enum ParseState {
Packit a4aae4
        parser_start,
Packit a4aae4
Packit a4aae4
        inside_dataset,
Packit a4aae4
Packit a4aae4
        inside_attribute_container,
Packit a4aae4
        inside_attribute,
Packit a4aae4
        inside_attribute_value,
Packit a4aae4
        inside_other_xml_attribute,
Packit a4aae4
Packit a4aae4
        inside_alias,
Packit a4aae4
Packit a4aae4
        // This covers Byte, ..., Url.
Packit a4aae4
        inside_simple_type,
Packit a4aae4
Packit a4aae4
        inside_array,
Packit a4aae4
        inside_dimension,
Packit a4aae4
Packit a4aae4
        inside_grid,
Packit a4aae4
        inside_map,
Packit a4aae4
Packit a4aae4
        inside_structure,
Packit a4aae4
        inside_sequence,
Packit a4aae4
Packit a4aae4
        inside_blob_href,
Packit a4aae4
Packit a4aae4
        parser_unknown,
Packit a4aae4
        parser_error
Packit a4aae4
    };
Packit a4aae4
Packit a4aae4
    BaseTypeFactory *d_factory;
Packit a4aae4
Packit a4aae4
    // These stacks hold the state of the parse as it progresses.
Packit a4aae4
    stack<ParseState> s; // Current parse state
Packit a4aae4
    stack<BaseType*> bt_stack; // current variable(s)
Packit a4aae4
    stack<AttrTable*> at_stack; // current attribute table
Packit a4aae4
Packit a4aae4
    // Accumulate stuff inside an 'OtherXML' DAP attribute here
Packit a4aae4
    string other_xml;
Packit a4aae4
Packit a4aae4
    // When we're parsing unknown XML, how deeply is it nested? This is used
Packit a4aae4
    // for the OtherXML DAP attributes.
Packit a4aae4
    unsigned int other_xml_depth;
Packit a4aae4
    unsigned int unknown_depth;
Packit a4aae4
Packit a4aae4
    // These are used for processing errors.
Packit a4aae4
    string error_msg;  // Error message(s), if any.
Packit a4aae4
    xmlParserCtxtPtr ctxt; // used for error message line numbers
Packit a4aae4
Packit a4aae4
    // The results of the parse operation are stored in these fields.
Packit a4aae4
    DDS *dds;   // dump DDX here
Packit a4aae4
    string *blob_href;  // put href to blob here
Packit a4aae4
Packit a4aae4
    // These hold temporary values read during the parse.
Packit a4aae4
    string dods_attr_name; // DAP2 attributes, not XML attributes
Packit a4aae4
    string dods_attr_type; // ... not XML ...
Packit a4aae4
    string char_data;  // char data in value elements; null after use
Packit a4aae4
    string root_ns;     // What is the namespace of the root node (Dataset)
Packit a4aae4
Packit a4aae4
    class XMLAttribute {
Packit a4aae4
        public:
Packit a4aae4
        string prefix;
Packit a4aae4
        string nsURI;
Packit a4aae4
        string value;
Packit a4aae4
Packit a4aae4
        void clone(const XMLAttribute &src) {
Packit a4aae4
            prefix = src.prefix;
Packit a4aae4
            nsURI = src.nsURI;
Packit a4aae4
            value = src.value;
Packit a4aae4
        }
Packit a4aae4
Packit a4aae4
        XMLAttribute() : prefix(""), nsURI(""), value("") {}
Packit a4aae4
        XMLAttribute(const string &p, const string &ns, const string &v)
Packit a4aae4
            : prefix(p), nsURI(ns), value(v) {}
Packit a4aae4
        // 'attributes' as passed from libxml2 is a five element array but this
Packit a4aae4
        // ctor gets the back four elements.
Packit a4aae4
        XMLAttribute(const xmlChar **attributes/*[4]*/) {
Packit a4aae4
            prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
Packit a4aae4
            nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
Packit a4aae4
            value = string((const char *)attributes[2], (const char *)attributes[3]);
Packit a4aae4
        }
Packit a4aae4
        XMLAttribute(const XMLAttribute &rhs) {
Packit a4aae4
            clone(rhs);
Packit a4aae4
        }
Packit a4aae4
        XMLAttribute &operator=(const XMLAttribute &rhs) {
Packit a4aae4
            if (this == &rhs)
Packit a4aae4
                return *this;
Packit a4aae4
            clone(rhs);
Packit a4aae4
            return *this;
Packit a4aae4
        }
Packit a4aae4
    };
Packit a4aae4
Packit a4aae4
    typedef map<string, XMLAttribute> XMLAttrMap;
Packit a4aae4
    XMLAttrMap attribute_table; // dump XML attributes here
Packit a4aae4
Packit a4aae4
    XMLAttrMap::iterator attr_table_begin() {
Packit a4aae4
        return attribute_table.begin();
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    XMLAttrMap::iterator attr_table_end() {
Packit a4aae4
        return attribute_table.end();
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
    map<string, string> namespace_table;
Packit a4aae4
Packit a4aae4
    // These are kind of silly...
Packit a4aae4
    void set_state(DDXParser::ParseState state);
Packit a4aae4
    DDXParser::ParseState get_state() const;
Packit a4aae4
    void pop_state();
Packit a4aae4
Packit a4aae4
    // Glue for the BaseTypeFactory class.
Packit a4aae4
    BaseType *factory(Type t, const string &name);
Packit a4aae4
Packit a4aae4
    // Common cleanup code for intern() and intern_stream()
Packit a4aae4
    void cleanup_parse(xmlParserCtxtPtr &context);
Packit a4aae4
Packit a4aae4
    /** @name Parser Actions
Packit a4aae4
Packit a4aae4
    These methods are the 'actions' carried out by the start_element and
Packit a4aae4
    end_element callbacks. Most of what takes place in those has been
Packit a4aae4
    factored out to this set of functions. */
Packit a4aae4
    //@{
Packit a4aae4
    void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
Packit a4aae4
    void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
Packit a4aae4
    bool check_required_attribute(const string &attr);
Packit a4aae4
    bool check_attribute(const string & attr);
Packit a4aae4
Packit a4aae4
    void process_attribute_element(const xmlChar **attrs, int nb_attrs);
Packit a4aae4
    void process_attribute_alias(const xmlChar **attrs, int nb_attrs);
Packit a4aae4
Packit a4aae4
    void process_variable(Type t, ParseState s, const xmlChar **attrs,
Packit a4aae4
            int nb_attributes);
Packit a4aae4
Packit a4aae4
    void process_dimension(const xmlChar **attrs, int nb_attrs);
Packit a4aae4
    void process_blob(const xmlChar **attrs, int nb_attrs);
Packit a4aae4
Packit a4aae4
    bool is_attribute_or_alias(const char *name, const xmlChar **attrs,
Packit a4aae4
            int nb_attributes);
Packit a4aae4
    bool is_variable(const char *name, const xmlChar **attrs, int nb_attributes);
Packit a4aae4
Packit a4aae4
    void finish_variable(const char *tag, Type t, const char *expected);
Packit a4aae4
    //@}
Packit a4aae4
Packit a4aae4
    /// Declare the default ctor here to prevent its use.
Packit a4aae4
    DDXParser();
Packit a4aae4
Packit a4aae4
    friend class DDXParserTest;
Packit a4aae4
Packit a4aae4
public:
Packit a4aae4
    DDXParser(BaseTypeFactory *factory)
Packit a4aae4
        : d_factory(factory),
Packit a4aae4
        other_xml(""), other_xml_depth(0), unknown_depth(0),
Packit a4aae4
        error_msg(""), ctxt(0), dds(0), blob_href(0),
Packit a4aae4
        dods_attr_name(""), dods_attr_type(""),
Packit a4aae4
        char_data(""), root_ns("")
Packit a4aae4
    {}
Packit a4aae4
Packit a4aae4
    void intern(const string &document, DDS *dest_dds, string &cid;;
Packit a4aae4
    void intern_stream(FILE *in, DDS *dds, string &cid, const string &boundary = "");
Packit a4aae4
    void intern_stream(istream &in, DDS *dds, string &cid, const string &boundary = "");
Packit a4aae4
Packit a4aae4
    static void ddx_start_document(void *parser);
Packit a4aae4
    static void ddx_end_document(void *parser);
Packit a4aae4
Packit a4aae4
    static void ddx_sax2_start_element(void *parser,
Packit a4aae4
            const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
Packit a4aae4
            int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
Packit a4aae4
            int nb_defaulted, const xmlChar **attributes);
Packit a4aae4
    static void ddx_sax2_end_element(void *parser, const xmlChar *localname,
Packit a4aae4
            const xmlChar *prefix, const xmlChar *URI);
Packit a4aae4
Packit a4aae4
    static void ddx_get_characters(void *parser, const xmlChar *ch, int len);
Packit a4aae4
    static void ddx_ignoreable_whitespace(void *parser,
Packit a4aae4
            const xmlChar * ch, int len);
Packit a4aae4
    static void ddx_get_cdata(void *parser, const xmlChar *value, int len);
Packit a4aae4
Packit a4aae4
    static xmlEntityPtr ddx_get_entity(void *parser, const xmlChar *name);
Packit a4aae4
    static void ddx_fatal_error(void *parser, const char *msg, ...);
Packit a4aae4
};
Packit a4aae4
Packit a4aae4
} // namespace libdap
Packit a4aae4
Packit a4aae4
#endif // ddx_parser_h