|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// -*- mode: c++; c-basic-offset:4 -*-
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
|
|
Packit |
a4aae4 |
// Access Protocol.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Copyright (c) 2002,2003 OPeNDAP, Inc.
|
|
Packit |
a4aae4 |
// Author: James Gallagher <jgallagher@opendap.org>
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is free software; you can redistribute it and/or
|
|
Packit |
a4aae4 |
// modify it under the terms of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License as published by the Free Software Foundation; either
|
|
Packit |
a4aae4 |
// version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is distributed in the hope that it will be useful,
|
|
Packit |
a4aae4 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
a4aae4 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
a4aae4 |
// Lesser General Public License for more details.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You should have received a copy of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License along with this library; if not, write to the Free Software
|
|
Packit |
a4aae4 |
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Copyright (c) 1996, California Institute of Technology.
|
|
Packit |
a4aae4 |
// ALL RIGHTS RESERVED. U.S. Government Sponsorship acknowledged.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// Please read the full copyright notice in the file COPYRIGHT_URI
|
|
Packit |
a4aae4 |
// in this directory.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// Author: Todd Karakashian, NASA/Jet Propulsion Laboratory
|
|
Packit |
a4aae4 |
// Todd.K.Karakashian@jpl.nasa.gov
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// $RCSfile: escaping.cc,v $ - Miscellaneous routines for OPeNDAP HDF server
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// These two routines are for escaping/unescaping strings that are identifiers
|
|
Packit |
a4aae4 |
// in DAP2
|
|
Packit |
a4aae4 |
// id2www() -- escape (using WWW hex codes) non-allowable characters in a
|
|
Packit |
a4aae4 |
// DAP2 identifier
|
|
Packit |
a4aae4 |
// www2id() -- given an WWW hexcode escaped identifier, restore it
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// These two routines are for escaping/unescaping strings storing attribute
|
|
Packit |
a4aae4 |
// values. They use traditional octal escapes (\nnn) because they are
|
|
Packit |
a4aae4 |
// intended to be viewed by a user
|
|
Packit |
a4aae4 |
// escattr() -- escape (using traditional octal backslash) non-allowable
|
|
Packit |
a4aae4 |
// characters in the value of a DAP2 attribute
|
|
Packit |
a4aae4 |
// unescattr() -- given an octally escaped string, restore it
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// These are routines used by the above, not intended to be called directly:
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// hexstring()
|
|
Packit |
a4aae4 |
// unhexstring()
|
|
Packit |
a4aae4 |
// octstring()
|
|
Packit |
a4aae4 |
// unoctstring()
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// -Todd
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "config.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <ctype.h>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <iomanip>
|
|
Packit |
a4aae4 |
#include <string>
|
|
Packit |
a4aae4 |
#include <sstream>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "GNURegex.h"
|
|
Packit |
a4aae4 |
#include "Error.h"
|
|
Packit |
a4aae4 |
#include "InternalErr.h"
|
|
Packit |
a4aae4 |
//#define DODS_DEBUG
|
|
Packit |
a4aae4 |
#include "debug.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
using namespace std;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
namespace libdap {
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// The next four functions were originally defined static, but I removed that
|
|
Packit |
a4aae4 |
// to make testing them (see generalUtilTest.cc) easier to write. 5/7/2001
|
|
Packit |
a4aae4 |
// jhrg
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
hexstring(unsigned char val)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
ostringstream buf;
|
|
Packit |
a4aae4 |
buf << hex << setw(2) << setfill('0') << static_cast<unsigned int>(val);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return buf.str();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
unhexstring(string s)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
int val;
|
|
Packit |
a4aae4 |
istringstream ss(s);
|
|
Packit |
a4aae4 |
ss >> hex >> val;
|
|
Packit |
a4aae4 |
char tmp_str[2];
|
|
Packit |
a4aae4 |
tmp_str[0] = static_cast<char>(val);
|
|
Packit |
a4aae4 |
tmp_str[1] = '\0';
|
|
Packit |
a4aae4 |
return string(tmp_str);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
octstring(unsigned char val)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
ostringstream buf;
|
|
Packit |
a4aae4 |
buf << oct << setw(3) << setfill('0')
|
|
Packit |
a4aae4 |
<< static_cast<unsigned int>(val);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return buf.str();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
unoctstring(string s)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
int val;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
istringstream ss(s);
|
|
Packit |
a4aae4 |
ss >> oct >> val;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "unoctstring: " << val << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
char tmp_str[2];
|
|
Packit |
a4aae4 |
tmp_str[0] = static_cast<char>(val);
|
|
Packit |
a4aae4 |
tmp_str[1] = '\0';
|
|
Packit |
a4aae4 |
return string(tmp_str);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Replace characters that are not allowed in DAP2 identifiers.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
-In the DAP itself, id2www() is called in:
|
|
Packit |
a4aae4 |
-# Array::print_decl() where dimension names are escaped
|
|
Packit |
a4aae4 |
-# AttrTable::print() (which calls AttrTable::simple_print()) where
|
|
Packit |
a4aae4 |
attribute names are escaped
|
|
Packit |
a4aae4 |
-# BaseType::print_decl() where variable names are escaped.
|
|
Packit |
a4aae4 |
-# Constructor::print_decl() where the name of the constructor type is
|
|
Packit |
a4aae4 |
printed.
|
|
Packit |
a4aae4 |
-# DDS::print() and DDS::print_constrained() where the name of the
|
|
Packit |
a4aae4 |
dataset is printed.
|
|
Packit |
a4aae4 |
-# Grid::print_decl() where the name of the grid is printed.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
-In the client code:
|
|
Packit |
a4aae4 |
-# id2www_ce() is called five times in the five methods that are used to
|
|
Packit |
a4aae4 |
request responses where a CE is appended to a URL
|
|
Packit |
a4aae4 |
(Connect::request_version, request_protocol, request_das, request_dds,
|
|
Packit |
a4aae4 |
request_data).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param in Replace characters in this string.
|
|
Packit |
a4aae4 |
@param allowable The set of characters that are allowed in a URI.
|
|
Packit |
a4aae4 |
default: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_/.\\*"
|
|
Packit |
a4aae4 |
@see id2www_ce()
|
|
Packit |
a4aae4 |
@return The modified identifier. */
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
id2www(string in, const string &allowable)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string::size_type i = 0;
|
|
Packit |
a4aae4 |
DBG(cerr<<"Input string: [" << in << "]" << endl);
|
|
Packit |
a4aae4 |
while ((i = in.find_first_not_of(allowable, i)) != string::npos) {
|
|
Packit |
a4aae4 |
DBG(cerr<<"Found escapee: [" << in[i] << "]");
|
|
Packit |
a4aae4 |
in.replace(i, 1, "%" + hexstring(in[i]));
|
|
Packit |
a4aae4 |
DBGN(cerr<<" now the string is: " << in << endl);
|
|
Packit |
a4aae4 |
i += 3;//i++;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return in;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Replace characters that are not allowed in WWW URLs using rules specific
|
|
Packit |
a4aae4 |
to Constraint Expressions. This has changed over time and now the only
|
|
Packit |
a4aae4 |
difference is that '*' is escaped by this function while it is not
|
|
Packit |
a4aae4 |
escaped by id2www().
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param in The string in which to replace characters.
|
|
Packit |
a4aae4 |
@param allowable The set of characters that are allowed in a URI.
|
|
Packit |
a4aae4 |
default: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_/.\\"
|
|
Packit |
a4aae4 |
@see id2www()
|
|
Packit |
a4aae4 |
@return The modified identifier. */
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
id2www_ce(string in, const string &allowable)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
return id2www(in, allowable);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Given a string that contains WWW escape sequences, translate those escape
|
|
Packit |
a4aae4 |
sequences back into the ASCII characters they represent. Return the
|
|
Packit |
a4aae4 |
modified string.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
-Places in the dap code where www2id() is called:
|
|
Packit |
a4aae4 |
-# Array::append_dim() the name is decoded before it is added
|
|
Packit |
a4aae4 |
-# AttrTable::set_name(), AttrTable::append_attr(),
|
|
Packit |
a4aae4 |
AttrTable::append_container(), AttrTable::del_attr(),
|
|
Packit |
a4aae4 |
AttrTable::add_container_alias(), AttrTable::add_value_alias()
|
|
Packit |
a4aae4 |
names are decoded before that are set/used.
|
|
Packit |
a4aae4 |
-# BaseType::set_name() Names are decoded before they are set
|
|
Packit |
a4aae4 |
-# When the constraint expression parser looks for a variable, the name is
|
|
Packit |
a4aae4 |
first decoded.
|
|
Packit |
a4aae4 |
-# DAS::DAS() Named attribute containers are decoded
|
|
Packit |
a4aae4 |
-# DDS::var() When a DDS searches for a variable, the name is first decoded.
|
|
Packit |
a4aae4 |
-# Grid::var(), Sequence::var(), Structure::var() Variable names are decoded.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
-In the server code:
|
|
Packit |
a4aae4 |
-# ResponseBuilder::initialize() The dataset name is decoded except that %20
|
|
Packit |
a4aae4 |
is not removed.
|
|
Packit |
a4aae4 |
-# ResponseBuilder::set_ce() The CE is decoded, except for spaces (%20).
|
|
Packit |
a4aae4 |
-# ResponseBuilder::set_dataset_name() same logic as the first case.
|
|
Packit |
a4aae4 |
-# The ResponseBuilder methods supersede methods with the same names
|
|
Packit |
a4aae4 |
from DODSFilter, which is still in the code although deprecated.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param in The string to modify.
|
|
Packit |
a4aae4 |
@param escape The character used to signal the beginning of an escape
|
|
Packit |
a4aae4 |
sequence. default: "%"
|
|
Packit |
a4aae4 |
@param except If there are some escape codes that should not be removed by
|
|
Packit |
a4aae4 |
this call (e.g., you might not want to remove spaces, %20) use this
|
|
Packit |
a4aae4 |
parameter to specify those codes. The function will then transform all
|
|
Packit |
a4aae4 |
escapes \e except those given. For example, to suppress translation of both
|
|
Packit |
a4aae4 |
spaces and the ampersand, pass "%20%26" for 'except'. default: ""
|
|
Packit |
a4aae4 |
@return The modified string. */
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
www2id(const string &in, const string &escape, const string &except)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string::size_type i = 0;
|
|
Packit |
a4aae4 |
string res = in;
|
|
Packit |
a4aae4 |
while ((i = res.find_first_of(escape, i)) != string::npos) {
|
|
Packit |
a4aae4 |
if (except.find(res.substr(i, 3)) != string::npos) {
|
|
Packit |
a4aae4 |
i += 3;
|
|
Packit |
a4aae4 |
continue;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
|
|
Packit |
a4aae4 |
++i;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return res;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
static string
|
|
Packit |
a4aae4 |
entity(char c)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
switch (c) {
|
|
Packit |
a4aae4 |
case '>': return ">";
|
|
Packit |
a4aae4 |
case '<': return "<";
|
|
Packit |
a4aae4 |
case '&': return "&";
|
|
Packit |
a4aae4 |
case '\'': return "'";
|
|
Packit |
a4aae4 |
case '\"': return """;
|
|
Packit |
a4aae4 |
default:
|
|
Packit |
a4aae4 |
throw InternalErr(__FILE__, __LINE__, "Unrecognized character.");
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Assumption: There are always exactly two octal digits in the input
|
|
Packit |
a4aae4 |
// and two hex digits in the result.
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
octal_to_hex(const string &octal_digits)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
int val;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
istringstream ss(octal_digits);
|
|
Packit |
a4aae4 |
ss >> oct >> val;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
ostringstream ds;
|
|
Packit |
a4aae4 |
ds << hex << setw(2) << setfill('0') << val;
|
|
Packit |
a4aae4 |
return ds.str();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Replace characters that are not allowed in XML
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param in The string in which to replace characters.
|
|
Packit |
a4aae4 |
@param not_allowed The set of characters that are not allowed in XML.
|
|
Packit |
a4aae4 |
default: ><&'(single quote)"(double quote)
|
|
Packit |
a4aae4 |
@return The modified identifier. */
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
id2xml(string in, const string ¬_allowed)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string::size_type i = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
|
|
Packit |
a4aae4 |
in.replace(i, 1, entity(in[i]));
|
|
Packit |
a4aae4 |
++i;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
#if 0
|
|
Packit |
a4aae4 |
// Removed the encoding of octal escapes. This function is used by
|
|
Packit |
a4aae4 |
// AttrTable to encode the stuff that is the value of the <value>
|
|
Packit |
a4aae4 |
// element in the DDX. The problem is that some of the values are not
|
|
Packit |
a4aae4 |
// valid UTF-8 and that makes a XML parser gag.; ticket 1512.
|
|
Packit |
a4aae4 |
// jhrg 3/19/10
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// OK, now scan for octal escape sequences like \\012 (where the '\'
|
|
Packit |
a4aae4 |
// is itself escaped). This type of attribute value comes from the netCDF
|
|
Packit |
a4aae4 |
// handler and maybe others. Assumption: The '\' will always appear as
|
|
Packit |
a4aae4 |
// in its escaped form: '\\'. NB: Both backslashes must be escaped in the
|
|
Packit |
a4aae4 |
// C++ string.
|
|
Packit |
a4aae4 |
string octal_escape = "\\\\";
|
|
Packit |
a4aae4 |
i = 0;
|
|
Packit |
a4aae4 |
string::size_type length = in.length();
|
|
Packit |
a4aae4 |
while ((i = in.find(octal_escape, i)) != string::npos) {
|
|
Packit |
a4aae4 |
// Get the three octal digits following the '\\0'
|
|
Packit |
a4aae4 |
string::size_type j = i + 2;
|
|
Packit |
a4aae4 |
if (j + 1 >= length) // Check that we're not past the end
|
|
Packit |
a4aae4 |
break;
|
|
Packit |
a4aae4 |
string octal_digits = in.substr(j, 3);
|
|
Packit |
a4aae4 |
// convert to a Ý XML escape
|
|
Packit |
a4aae4 |
string hex_escape = string("&#x");
|
|
Packit |
a4aae4 |
hex_escape.append(octal_to_hex(octal_digits));
|
|
Packit |
a4aae4 |
hex_escape.append(string(";"));
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// replace the octal escape with an XML/hex escape
|
|
Packit |
a4aae4 |
in.replace(i, 5, hex_escape);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// increment i
|
|
Packit |
a4aae4 |
i += 6;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
return in;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Given a string that contains XML escape sequences (i.e., entities),
|
|
Packit |
a4aae4 |
translate those back into ASCII characters. Return the modified string.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param in The string to modify.
|
|
Packit |
a4aae4 |
@return The modified string. */
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
xml2id(string in)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string::size_type i = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
while ((i = in.find(">", i)) != string::npos)
|
|
Packit |
a4aae4 |
in.replace(i, 4, ">");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
i = 0;
|
|
Packit |
a4aae4 |
while ((i = in.find("<", i)) != string::npos)
|
|
Packit |
a4aae4 |
in.replace(i, 4, "<");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
i = 0;
|
|
Packit |
a4aae4 |
while ((i = in.find("&", i)) != string::npos)
|
|
Packit |
a4aae4 |
in.replace(i, 5, "&";;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
i = 0;
|
|
Packit |
a4aae4 |
while ((i = in.find("'", i)) != string::npos)
|
|
Packit |
a4aae4 |
in.replace(i, 6, "'");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
i = 0;
|
|
Packit |
a4aae4 |
while ((i = in.find(""", i)) != string::npos)
|
|
Packit |
a4aae4 |
in.replace(i, 6, "\"");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return in;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Return a string that has all the \c %<hex digit><hex digit>
|
|
Packit |
a4aae4 |
sequences replaced with underscores (`_').
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param s The string to transform
|
|
Packit |
a4aae4 |
@return The modified string. */
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
esc2underscore(string s)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string::size_type pos;
|
|
Packit |
a4aae4 |
while ((pos = s.find('%')) != string::npos)
|
|
Packit |
a4aae4 |
s.replace(pos, 3, "_");
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return s;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Escape non-printable characters and quotes from an HDF attribute.
|
|
Packit |
a4aae4 |
@param s The attribute to modify.
|
|
Packit |
a4aae4 |
@return The modified attribute. */
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
escattr(string s)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
const string printable = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~`!@#$%^&*()_-+={[}]|\\:;<,>.?/'\"";
|
|
Packit |
a4aae4 |
const string ESC = "\\";
|
|
Packit |
a4aae4 |
const string DOUBLE_ESC = ESC + ESC;
|
|
Packit |
a4aae4 |
const string QUOTE = "\"";
|
|
Packit |
a4aae4 |
const string ESCQUOTE = ESC + QUOTE;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// escape \ with a second backslash
|
|
Packit |
a4aae4 |
string::size_type ind = 0;
|
|
Packit |
a4aae4 |
while ((ind = s.find(ESC, ind)) != s.npos) {
|
|
Packit |
a4aae4 |
s.replace(ind, 1, DOUBLE_ESC);
|
|
Packit |
a4aae4 |
ind += DOUBLE_ESC.length();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// escape non-printing characters with octal escape
|
|
Packit |
a4aae4 |
ind = 0;
|
|
Packit |
a4aae4 |
while ((ind = s.find_first_not_of(printable, ind)) != s.npos)
|
|
Packit |
a4aae4 |
s.replace(ind, 1, ESC + octstring(s[ind]));
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// escape " with backslash
|
|
Packit |
a4aae4 |
ind = 0;
|
|
Packit |
a4aae4 |
while ((ind = s.find(QUOTE, ind)) != s.npos) {
|
|
Packit |
a4aae4 |
s.replace(ind, 1, ESCQUOTE);
|
|
Packit |
a4aae4 |
ind += ESCQUOTE.length();
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return s;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Un-escape special characters, quotes and backslashes from an HDF
|
|
Packit |
a4aae4 |
attribute.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Note: A regex to match one \ must be defined as: Regex foo = "\\\\";
|
|
Packit |
a4aae4 |
because both C++ strings and GNU's Regex also employ \ as an escape
|
|
Packit |
a4aae4 |
character!
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param s The escaped attribute. @return The unescaped attribute. */
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
unescattr(string s)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
Regex octal("\\\\[0-3][0-7][0-7]"); // matches 4 characters
|
|
Packit |
a4aae4 |
Regex esc_quote("\\\\\""); // matches 3 characters
|
|
Packit |
a4aae4 |
Regex esc_esc("\\\\\\\\"); // matches 2 characters
|
|
Packit |
a4aae4 |
const string ESC = "\\";
|
|
Packit |
a4aae4 |
const string QUOTE = "\"";
|
|
Packit |
a4aae4 |
int matchlen;
|
|
Packit |
a4aae4 |
unsigned int index;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "0XX" << s << "XXX" << endl);
|
|
Packit |
a4aae4 |
// unescape any escaped backslashes
|
|
Packit |
a4aae4 |
index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
|
|
Packit |
a4aae4 |
while (index < s.length()) {
|
|
Packit |
a4aae4 |
DBG(cerr << "1aXX" << s << "XXX index: " << index << endl);
|
|
Packit |
a4aae4 |
s.replace(index, 2, ESC);
|
|
Packit |
a4aae4 |
DBG(cerr << "1bXX" << s << "XXX index: " << index << endl);
|
|
Packit |
a4aae4 |
index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// unescape any escaped double quote characters
|
|
Packit |
a4aae4 |
index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
|
|
Packit |
a4aae4 |
while (index < s.length()) {
|
|
Packit |
a4aae4 |
s.replace(index, 2, QUOTE);
|
|
Packit |
a4aae4 |
DBG(cerr << "2XX" << s << "XXX index: " << index << endl);
|
|
Packit |
a4aae4 |
index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// unescape octal characters
|
|
Packit |
a4aae4 |
index = octal.search(s.c_str(), s.length(), matchlen, 0);
|
|
Packit |
a4aae4 |
while (index < s.length()) {
|
|
Packit |
a4aae4 |
s.replace(index, 4, unoctstring(s.substr(index + 1, 3)));
|
|
Packit |
a4aae4 |
DBG(cerr << "3XX" << s << "XXX index: " << index << endl);
|
|
Packit |
a4aae4 |
index = octal.search(s.c_str(), s.length(), matchlen, 0);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG(cerr << "4XX" << s << "XXX" << endl);
|
|
Packit |
a4aae4 |
return s;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
munge_error_message(string msg)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
// First, add enclosing quotes if needed.
|
|
Packit |
a4aae4 |
if (*msg.begin() != '"')
|
|
Packit |
a4aae4 |
msg.insert(msg.begin(), '"');
|
|
Packit |
a4aae4 |
if (*(msg.end() - 1) != '"')
|
|
Packit |
a4aae4 |
msg += "\"";
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Now escape any internal double quotes that aren't escaped.
|
|
Packit |
a4aae4 |
string::iterator miter;
|
|
Packit |
a4aae4 |
for (miter = msg.begin() + 1; miter != msg.end() - 1; miter++)
|
|
Packit |
a4aae4 |
if (*miter == '"' && *(miter - 1) != '\\')
|
|
Packit |
a4aae4 |
miter = msg.insert(miter, '\\');
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return msg;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Rip through a string and replace all the double quotes with \" sequences.
|
|
Packit |
a4aae4 |
@param source
|
|
Packit |
a4aae4 |
@return result
|
|
Packit |
a4aae4 |
*/
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
escape_double_quotes(string source)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string::size_type idx = 0;
|
|
Packit |
a4aae4 |
while((idx = source.find('\"', idx)) != string::npos) {
|
|
Packit |
a4aae4 |
source.replace(idx, 1, "\\\""); // a backslash and a double quote
|
|
Packit |
a4aae4 |
idx += 2;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return source;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Rip through a string and replace all the escaped double quotes with
|
|
Packit |
a4aae4 |
regular double quotes.
|
|
Packit |
a4aae4 |
@param source
|
|
Packit |
a4aae4 |
@return result
|
|
Packit |
a4aae4 |
*/
|
|
Packit |
a4aae4 |
string
|
|
Packit |
a4aae4 |
unescape_double_quotes(string source)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
string::size_type idx = 0;
|
|
Packit |
a4aae4 |
while((idx = source.find("\\\"", idx)) != string::npos) {
|
|
Packit |
a4aae4 |
source.replace(idx, 2, "\""); // a backslash and a double quote
|
|
Packit |
a4aae4 |
++idx;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return source;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
} // namespace libdap
|
|
Packit |
a4aae4 |
|