|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// -*- mode: c++; c-basic-offset:4 -*-
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
|
|
Packit |
a4aae4 |
// Access Protocol.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Copyright (c) 2003 OPeNDAP, Inc.
|
|
Packit |
a4aae4 |
// Author: James Gallagher <jgallagher@opendap.org>
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is free software; you can redistribute it and/or
|
|
Packit |
a4aae4 |
// modify it under the terms of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License as published by the Free Software Foundation; either
|
|
Packit |
a4aae4 |
// version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is distributed in the hope that it will be useful,
|
|
Packit |
a4aae4 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
a4aae4 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
a4aae4 |
// Lesser General Public License for more details.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You should have received a copy of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License along with this library; if not, write to the Free Software
|
|
Packit |
a4aae4 |
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifndef ddx_parser_h
|
|
Packit |
a4aae4 |
#define ddx_parser_h
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <string>
|
|
Packit |
a4aae4 |
#include <map>
|
|
Packit |
a4aae4 |
#include <stack>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <libxml/parserInternals.h>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifndef ddx_exceptions_h
|
|
Packit |
a4aae4 |
#include "DDXExceptions.h"
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifndef _dds_h
|
|
Packit |
a4aae4 |
#include "DDS.h"
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifndef _basetype_h
|
|
Packit |
a4aae4 |
#include "BaseType.h"
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifndef base_type_factory_h
|
|
Packit |
a4aae4 |
#include "BaseTypeFactory.h"
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
namespace libdap
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Parse the XML text which encodes the network/persistent representation of
|
|
Packit |
a4aae4 |
the DDX object. In the current implementation, the DDX is held by an
|
|
Packit |
a4aae4 |
instance of the class DDS which in turn holds variables which include
|
|
Packit |
a4aae4 |
attributes. That is, the binary \e implementation of a DDX uses the old
|
|
Packit |
a4aae4 |
DDS, BaseType and AttrTable classes, albeit arranged in a slightly new
|
|
Packit |
a4aae4 |
way.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
This parser for the DDX \e document uses the SAX interface of \c libxml2.
|
|
Packit |
a4aae4 |
Static methods are used as callbacks for the SAX parser. These static
|
|
Packit |
a4aae4 |
methods are public because making them private complicates compilation.
|
|
Packit |
a4aae4 |
They should not be called by anything other than the \e intern method.
|
|
Packit |
a4aae4 |
They do not throw exceptions because exceptions from within callbacks are
|
|
Packit |
a4aae4 |
not reliable or portable. To signal errors, the methods record
|
|
Packit |
a4aae4 |
information in the DDXParser object. Once the error handler is called,
|
|
Packit |
a4aae4 |
construction of an DDX/DDS object ends even though the SAX parser still
|
|
Packit |
a4aae4 |
calls the various callback functions. The parser treats \e warnings, \e
|
|
Packit |
a4aae4 |
errors and \e fatal_errors the same way; when any are found parsing
|
|
Packit |
a4aae4 |
stops. The \e intern method throws an DDXParseFailed exception if an
|
|
Packit |
a4aae4 |
error was found.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Note that this class uses the C++-supplied default definitions for the
|
|
Packit |
a4aae4 |
default and copy constructors as well as the destructor and assignment
|
|
Packit |
a4aae4 |
operator.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@see DDS */
|
|
Packit |
a4aae4 |
class DDXParser
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
private:
|
|
Packit |
a4aae4 |
/** States used by DDXParserState. These are the states of the SAX parser
|
|
Packit |
a4aae4 |
state-machine. */
|
|
Packit |
a4aae4 |
enum ParseState {
|
|
Packit |
a4aae4 |
parser_start,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
inside_dataset,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
inside_attribute_container,
|
|
Packit |
a4aae4 |
inside_attribute,
|
|
Packit |
a4aae4 |
inside_attribute_value,
|
|
Packit |
a4aae4 |
inside_other_xml_attribute,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
inside_alias,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This covers Byte, ..., Url.
|
|
Packit |
a4aae4 |
inside_simple_type,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
inside_array,
|
|
Packit |
a4aae4 |
inside_dimension,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
inside_grid,
|
|
Packit |
a4aae4 |
inside_map,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
inside_structure,
|
|
Packit |
a4aae4 |
inside_sequence,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
inside_blob_href,
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
parser_unknown,
|
|
Packit |
a4aae4 |
parser_error
|
|
Packit |
a4aae4 |
};
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
BaseTypeFactory *d_factory;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// These stacks hold the state of the parse as it progresses.
|
|
Packit |
a4aae4 |
stack<ParseState> s; // Current parse state
|
|
Packit |
a4aae4 |
stack<BaseType*> bt_stack; // current variable(s)
|
|
Packit |
a4aae4 |
stack<AttrTable*> at_stack; // current attribute table
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Accumulate stuff inside an 'OtherXML' DAP attribute here
|
|
Packit |
a4aae4 |
string other_xml;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// When we're parsing unknown XML, how deeply is it nested? This is used
|
|
Packit |
a4aae4 |
// for the OtherXML DAP attributes.
|
|
Packit |
a4aae4 |
unsigned int other_xml_depth;
|
|
Packit |
a4aae4 |
unsigned int unknown_depth;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// These are used for processing errors.
|
|
Packit |
a4aae4 |
string error_msg; // Error message(s), if any.
|
|
Packit |
a4aae4 |
xmlParserCtxtPtr ctxt; // used for error message line numbers
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// The results of the parse operation are stored in these fields.
|
|
Packit |
a4aae4 |
DDS *dds; // dump DDX here
|
|
Packit |
a4aae4 |
string *blob_href; // put href to blob here
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// These hold temporary values read during the parse.
|
|
Packit |
a4aae4 |
string dods_attr_name; // DAP2 attributes, not XML attributes
|
|
Packit |
a4aae4 |
string dods_attr_type; // ... not XML ...
|
|
Packit |
a4aae4 |
string char_data; // char data in value elements; null after use
|
|
Packit |
a4aae4 |
string root_ns; // What is the namespace of the root node (Dataset)
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
class XMLAttribute {
|
|
Packit |
a4aae4 |
public:
|
|
Packit |
a4aae4 |
string prefix;
|
|
Packit |
a4aae4 |
string nsURI;
|
|
Packit |
a4aae4 |
string value;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void clone(const XMLAttribute &src) {
|
|
Packit |
a4aae4 |
prefix = src.prefix;
|
|
Packit |
a4aae4 |
nsURI = src.nsURI;
|
|
Packit |
a4aae4 |
value = src.value;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
XMLAttribute() : prefix(""), nsURI(""), value("") {}
|
|
Packit |
a4aae4 |
XMLAttribute(const string &p, const string &ns, const string &v)
|
|
Packit |
a4aae4 |
: prefix(p), nsURI(ns), value(v) {}
|
|
Packit |
a4aae4 |
// 'attributes' as passed from libxml2 is a five element array but this
|
|
Packit |
a4aae4 |
// ctor gets the back four elements.
|
|
Packit |
a4aae4 |
XMLAttribute(const xmlChar **attributes/*[4]*/) {
|
|
Packit |
a4aae4 |
prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
|
|
Packit |
a4aae4 |
nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
|
|
Packit |
a4aae4 |
value = string((const char *)attributes[2], (const char *)attributes[3]);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
XMLAttribute(const XMLAttribute &rhs) {
|
|
Packit |
a4aae4 |
clone(rhs);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
XMLAttribute &operator=(const XMLAttribute &rhs) {
|
|
Packit |
a4aae4 |
if (this == &rhs)
|
|
Packit |
a4aae4 |
return *this;
|
|
Packit |
a4aae4 |
clone(rhs);
|
|
Packit |
a4aae4 |
return *this;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
};
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
typedef map<string, XMLAttribute> XMLAttrMap;
|
|
Packit |
a4aae4 |
XMLAttrMap attribute_table; // dump XML attributes here
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
XMLAttrMap::iterator attr_table_begin() {
|
|
Packit |
a4aae4 |
return attribute_table.begin();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
XMLAttrMap::iterator attr_table_end() {
|
|
Packit |
a4aae4 |
return attribute_table.end();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
map<string, string> namespace_table;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// These are kind of silly...
|
|
Packit |
a4aae4 |
void set_state(DDXParser::ParseState state);
|
|
Packit |
a4aae4 |
DDXParser::ParseState get_state() const;
|
|
Packit |
a4aae4 |
void pop_state();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Glue for the BaseTypeFactory class.
|
|
Packit |
a4aae4 |
BaseType *factory(Type t, const string &name);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Common cleanup code for intern() and intern_stream()
|
|
Packit |
a4aae4 |
void cleanup_parse(xmlParserCtxtPtr &context);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** @name Parser Actions
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
These methods are the 'actions' carried out by the start_element and
|
|
Packit |
a4aae4 |
end_element callbacks. Most of what takes place in those has been
|
|
Packit |
a4aae4 |
factored out to this set of functions. */
|
|
Packit |
a4aae4 |
//@{
|
|
Packit |
a4aae4 |
void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
|
|
Packit |
a4aae4 |
void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
|
|
Packit |
a4aae4 |
bool check_required_attribute(const string &attr);
|
|
Packit |
a4aae4 |
bool check_attribute(const string & attr);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void process_attribute_element(const xmlChar **attrs, int nb_attrs);
|
|
Packit |
a4aae4 |
void process_attribute_alias(const xmlChar **attrs, int nb_attrs);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void process_variable(Type t, ParseState s, const xmlChar **attrs,
|
|
Packit |
a4aae4 |
int nb_attributes);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void process_dimension(const xmlChar **attrs, int nb_attrs);
|
|
Packit |
a4aae4 |
void process_blob(const xmlChar **attrs, int nb_attrs);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
bool is_attribute_or_alias(const char *name, const xmlChar **attrs,
|
|
Packit |
a4aae4 |
int nb_attributes);
|
|
Packit |
a4aae4 |
bool is_variable(const char *name, const xmlChar **attrs, int nb_attributes);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void finish_variable(const char *tag, Type t, const char *expected);
|
|
Packit |
a4aae4 |
//@}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/// Declare the default ctor here to prevent its use.
|
|
Packit |
a4aae4 |
DDXParser();
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
friend class DDXParserTest;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
public:
|
|
Packit |
a4aae4 |
DDXParser(BaseTypeFactory *factory)
|
|
Packit |
a4aae4 |
: d_factory(factory),
|
|
Packit |
a4aae4 |
other_xml(""), other_xml_depth(0), unknown_depth(0),
|
|
Packit |
a4aae4 |
error_msg(""), ctxt(0), dds(0), blob_href(0),
|
|
Packit |
a4aae4 |
dods_attr_name(""), dods_attr_type(""),
|
|
Packit |
a4aae4 |
char_data(""), root_ns("")
|
|
Packit |
a4aae4 |
{}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void intern(const string &document, DDS *dest_dds, string &cid;;
|
|
Packit |
a4aae4 |
void intern_stream(FILE *in, DDS *dds, string &cid, const string &boundary = "");
|
|
Packit |
a4aae4 |
void intern_stream(istream &in, DDS *dds, string &cid, const string &boundary = "");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
static void ddx_start_document(void *parser);
|
|
Packit |
a4aae4 |
static void ddx_end_document(void *parser);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
static void ddx_sax2_start_element(void *parser,
|
|
Packit |
a4aae4 |
const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
|
|
Packit |
a4aae4 |
int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
|
|
Packit |
a4aae4 |
int nb_defaulted, const xmlChar **attributes);
|
|
Packit |
a4aae4 |
static void ddx_sax2_end_element(void *parser, const xmlChar *localname,
|
|
Packit |
a4aae4 |
const xmlChar *prefix, const xmlChar *URI);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
static void ddx_get_characters(void *parser, const xmlChar *ch, int len);
|
|
Packit |
a4aae4 |
static void ddx_ignoreable_whitespace(void *parser,
|
|
Packit |
a4aae4 |
const xmlChar * ch, int len);
|
|
Packit |
a4aae4 |
static void ddx_get_cdata(void *parser, const xmlChar *value, int len);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
static xmlEntityPtr ddx_get_entity(void *parser, const xmlChar *name);
|
|
Packit |
a4aae4 |
static void ddx_fatal_error(void *parser, const char *msg, ...);
|
|
Packit |
a4aae4 |
};
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
} // namespace libdap
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#endif // ddx_parser_h
|