|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// -*- mode: c++; c-basic-offset:4 -*-
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// This file is part of libdap, A C++ implementation of the OPeNDAP Data
|
|
Packit |
a4aae4 |
// Access Protocol.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Copyright (c) 2005 OPeNDAP, Inc.
|
|
Packit |
a4aae4 |
// Author: James Gallagher <jgallagher@opendap.org>
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is free software; you can redistribute it and/or
|
|
Packit |
a4aae4 |
// modify it under the terms of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License as published by the Free Software Foundation; either
|
|
Packit |
a4aae4 |
// version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// This library is distributed in the hope that it will be useful,
|
|
Packit |
a4aae4 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
a4aae4 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
a4aae4 |
// Lesser General Public License for more details.
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You should have received a copy of the GNU Lesser General Public
|
|
Packit |
a4aae4 |
// License along with this library; if not, write to the Free Software
|
|
Packit |
a4aae4 |
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Packit |
a4aae4 |
//
|
|
Packit |
a4aae4 |
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
//#define DODS_DEBUG
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <config.h>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#ifndef WIN32
|
|
Packit |
a4aae4 |
#include <alloca.h>
|
|
Packit |
a4aae4 |
#endif
|
|
Packit |
a4aae4 |
#include <stdlib.h>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <sys/types.h>
|
|
Packit |
a4aae4 |
#include <regex.h>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include <new>
|
|
Packit |
a4aae4 |
#include <string>
|
|
Packit |
a4aae4 |
#include <vector>
|
|
Packit |
a4aae4 |
#include <stdexcept>
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
#include "GNURegex.h"
|
|
Packit |
a4aae4 |
#include "Error.h"
|
|
Packit |
a4aae4 |
#include "util.h"
|
|
Packit |
a4aae4 |
#include "debug.h"
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
using namespace std;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
namespace libdap {
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
void
|
|
Packit |
a4aae4 |
Regex::init(const char *t)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - BEGIN" << endl);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - creating new regex..." << endl);
|
|
Packit |
a4aae4 |
d_preg = static_cast<void*>(new regex_t);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - Calling regcomp()..." << endl);
|
|
Packit |
a4aae4 |
int result = regcomp(static_cast<regex_t*>(d_preg), t, REG_EXTENDED);
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
if (result != 0) {
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - Call to regcomp FAILED" << endl);
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - Calling regerror()..." << endl);
|
|
Packit |
a4aae4 |
size_t msg_len = regerror(result, static_cast<regex_t*>(d_preg),
|
|
Packit |
a4aae4 |
static_cast<char*>(NULL),
|
|
Packit |
a4aae4 |
static_cast<size_t>(0));
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - Creating message" << endl);
|
|
Packit |
a4aae4 |
vector<char> msg(msg_len+1);
|
|
Packit |
a4aae4 |
//char *msg = new char[msg_len+1];
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - Calling regerror() again..." << endl);
|
|
Packit |
a4aae4 |
regerror(result, static_cast<regex_t*>(d_preg), &msg[0], msg_len);
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - Throwing libdap::Error" << endl);
|
|
Packit |
a4aae4 |
throw Error(string("Regex error: ") + string(&msg[0]));
|
|
Packit |
a4aae4 |
//delete[] msg;
|
|
Packit |
a4aae4 |
//throw e;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - Call to regcomp() SUCCEEDED" << endl);
|
|
Packit |
a4aae4 |
DBG( cerr << "Regex::init() - END" << endl);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
Regex::~Regex()
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
regfree(static_cast<regex_t*>(d_preg));
|
|
Packit |
a4aae4 |
delete static_cast<regex_t*>(d_preg); d_preg = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Initialize a POSIX regular expression (using the 'extended' features).
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param t The regular expression pattern. */
|
|
Packit |
a4aae4 |
Regex::Regex(const char* t)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
init(t);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Compatability ctor.
|
|
Packit |
a4aae4 |
@see Regex::Regex(const char* t) */
|
|
Packit |
a4aae4 |
Regex::Regex(const char* t, int)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
init(t);
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Does the regular expression match the string?
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param s The string
|
|
Packit |
a4aae4 |
@param len The length of string to consider
|
|
Packit |
a4aae4 |
@param pos Start looking at this position in the string
|
|
Packit |
a4aae4 |
@return The number of characters that match, -1 if there's no match. */
|
|
Packit |
a4aae4 |
int
|
|
Packit |
a4aae4 |
Regex::match(const char* s, int len, int pos)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
if (len > 32766) // Integer overflow protection
|
|
Packit |
a4aae4 |
return -1;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
regmatch_t *pmatch = new regmatch_t[len+1];
|
|
Packit |
a4aae4 |
string ss = s;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
int result = regexec(static_cast<regex_t*>(d_preg),
|
|
Packit |
a4aae4 |
ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
|
|
Packit |
a4aae4 |
int matchnum;
|
|
Packit |
a4aae4 |
if (result == REG_NOMATCH)
|
|
Packit |
a4aae4 |
matchnum = -1;
|
|
Packit |
a4aae4 |
else
|
|
Packit |
a4aae4 |
matchnum = pmatch[0].rm_eo - pmatch[0].rm_so;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
delete[] pmatch; pmatch = 0;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
return matchnum;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
/** Does the regular expression match the string?
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
@param s The string
|
|
Packit |
a4aae4 |
@param len The length of string to consider
|
|
Packit |
a4aae4 |
@param matchlen Return the length of the matched portion in this
|
|
Packit |
a4aae4 |
value-result parameter.
|
|
Packit |
a4aae4 |
@param pos Start looking at this position in the string
|
|
Packit |
a4aae4 |
@return The start position of the first match. This is different from
|
|
Packit |
a4aae4 |
POSIX regular expressions, whcih return the start position of the
|
|
Packit |
a4aae4 |
longest match. */
|
|
Packit |
a4aae4 |
int
|
|
Packit |
a4aae4 |
Regex::search(const char* s, int len, int& matchlen, int pos)
|
|
Packit |
a4aae4 |
{
|
|
Packit |
a4aae4 |
// sanitize allocation
|
|
Packit |
a4aae4 |
if (!size_ok(sizeof(regmatch_t), len+1))
|
|
Packit |
a4aae4 |
return -1;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// alloc space for len matches, which is theoretical max.
|
|
Packit |
a4aae4 |
// Problem: If somehow 'len' is very large - say the size of a 32-bit int,
|
|
Packit |
a4aae4 |
// then len+1 is a an integer overflow and this might be exploited by
|
|
Packit |
a4aae4 |
// an attacker. It's not likely there will be more than a handful of
|
|
Packit |
a4aae4 |
// matches, so I am going to limit this value to 32766. jhrg 3/4/09
|
|
Packit |
a4aae4 |
if (len > 32766)
|
|
Packit |
a4aae4 |
return -1;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
regmatch_t *pmatch = new regmatch_t[len+1];
|
|
Packit |
a4aae4 |
string ss = s;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
int result = regexec(static_cast<regex_t*>(d_preg),
|
|
Packit |
a4aae4 |
ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
|
|
Packit |
a4aae4 |
if (result == REG_NOMATCH) {
|
|
Packit |
a4aae4 |
delete[] pmatch; pmatch = 0;
|
|
Packit |
a4aae4 |
return -1;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
// Match found, find the first one (pmatch lists the longest first)
|
|
Packit |
a4aae4 |
int m = 0;
|
|
Packit |
a4aae4 |
for (int i = 1; i < len; ++i)
|
|
Packit |
a4aae4 |
if (pmatch[i].rm_so != -1 && pmatch[i].rm_so < pmatch[m].rm_so)
|
|
Packit |
a4aae4 |
m = i;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
matchlen = pmatch[m].rm_eo - pmatch[m].rm_so;
|
|
Packit |
a4aae4 |
int matchpos = pmatch[m].rm_so;
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
delete[] pmatch; pmatch = 0;
|
|
Packit |
a4aae4 |
return matchpos;
|
|
Packit |
a4aae4 |
}
|
|
Packit |
a4aae4 |
|
|
Packit |
a4aae4 |
} // namespace libdap
|
|
Packit |
a4aae4 |
|