Blame pcrecpp.h

Packit 78a954
// Copyright (c) 2005, Google Inc.
Packit 78a954
// All rights reserved.
Packit 78a954
Packit 78a954
// Redistribution and use in source and binary forms, with or without
Packit 78a954
// modification, are permitted provided that the following conditions are
Packit 78a954
// met:
Packit 78a954
Packit 78a954
//     * Redistributions of source code must retain the above copyright
Packit 78a954
// notice, this list of conditions and the following disclaimer.
Packit 78a954
//     * Redistributions in binary form must reproduce the above
Packit 78a954
// copyright notice, this list of conditions and the following disclaimer
Packit 78a954
// in the documentation and/or other materials provided with the
Packit 78a954
// distribution.
Packit 78a954
//     * Neither the name of Google Inc. nor the names of its
Packit 78a954
// contributors may be used to endorse or promote products derived from
Packit 78a954
// this software without specific prior written permission.
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
// Author: Sanjay Ghemawat
Packit 78a954
// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
Packit 78a954
Packit 78a954
#ifndef _PCRECPP_H
Packit 78a954
#define _PCRECPP_H
Packit 78a954
Packit 78a954
// C++ interface to the pcre regular-expression library.  RE supports
Packit 78a954
// Perl-style regular expressions (with extensions like \d, \w, \s,
Packit 78a954
// ...).
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// This module is part of the pcre library and hence supports its syntax
Packit 78a954
// for regular expressions.
Packit 78a954
Packit 78a954
// The syntax is pretty similar to Perl's.  For those not familiar
Packit 78a954
// with Perl's regular expressions, here are some examples of the most
Packit 78a954
// commonly used extensions:
Packit 78a954
Packit 78a954
//   "hello (\\w+) world"  -- \w matches a "word" character
Packit 78a954
//   "version (\\d+)"      -- \d matches a digit
Packit 78a954
//   "hello\\s+world"      -- \s matches any whitespace character
Packit 78a954
//   "\\b(\\w+)\\b"        -- \b matches empty string at a word boundary
Packit 78a954
//   "(?i)hello"           -- (?i) turns on case-insensitive matching
Packit 78a954
//   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// The "FullMatch" operation checks that supplied text matches a
Packit 78a954
// supplied pattern exactly.
Packit 78a954
Packit 78a954
// Example: successful match
Packit 78a954
//    pcrecpp::RE re("h.*o");
Packit 78a954
//    re.FullMatch("hello");
Packit 78a954
Packit 78a954
// Example: unsuccessful match (requires full match):
Packit 78a954
//    pcrecpp::RE re("e");
Packit 78a954
//    !re.FullMatch("hello");
Packit 78a954
Packit 78a954
// Example: creating a temporary RE object:
Packit 78a954
//    pcrecpp::RE("h.*o").FullMatch("hello");
Packit 78a954
Packit 78a954
// You can pass in a "const char*" or a "string" for "text".  The
Packit 78a954
// examples below tend to use a const char*.
Packit 78a954
Packit 78a954
// You can, as in the different examples above, store the RE object
Packit 78a954
// explicitly in a variable or use a temporary RE object.  The
Packit 78a954
// examples below use one mode or the other arbitrarily.  Either
Packit 78a954
// could correctly be used for any of these examples.
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// You can supply extra pointer arguments to extract matched subpieces.
Packit 78a954
Packit 78a954
// Example: extracts "ruby" into "s" and 1234 into "i"
Packit 78a954
//    int i;
Packit 78a954
//    string s;
Packit 78a954
//    pcrecpp::RE re("(\\w+):(\\d+)");
Packit 78a954
//    re.FullMatch("ruby:1234", &s, &i);
Packit 78a954
Packit 78a954
// Example: does not try to extract any extra sub-patterns
Packit 78a954
//    re.FullMatch("ruby:1234", &s);
Packit 78a954
Packit 78a954
// Example: does not try to extract into NULL
Packit 78a954
//    re.FullMatch("ruby:1234", NULL, &i);
Packit 78a954
Packit 78a954
// Example: integer overflow causes failure
Packit 78a954
//    !re.FullMatch("ruby:1234567891234", NULL, &i);
Packit 78a954
Packit 78a954
// Example: fails because there aren't enough sub-patterns:
Packit 78a954
//    !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
Packit 78a954
Packit 78a954
// Example: fails because string cannot be stored in integer
Packit 78a954
//    !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
Packit 78a954
Packit 78a954
// The provided pointer arguments can be pointers to any scalar numeric
Packit 78a954
// type, or one of
Packit 78a954
//    string        (matched piece is copied to string)
Packit 78a954
//    StringPiece   (StringPiece is mutated to point to matched piece)
Packit 78a954
//    T             (where "bool T::ParseFrom(const char*, int)" exists)
Packit 78a954
//    NULL          (the corresponding matched sub-pattern is not copied)
Packit 78a954
Packit 78a954
// CAVEAT: An optional sub-pattern that does not exist in the matched
Packit 78a954
// string is assigned the empty string.  Therefore, the following will
Packit 78a954
// return false (because the empty string is not a valid number):
Packit 78a954
//    int number;
Packit 78a954
//    pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// The matching interface supports at most 16 arguments per call.
Packit 78a954
// If you need more, consider using the more general interface
Packit 78a954
// pcrecpp::RE::DoMatch().  See pcrecpp.h for the signature for DoMatch.
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// You can use the "PartialMatch" operation when you want the pattern
Packit 78a954
// to match any substring of the text.
Packit 78a954
Packit 78a954
// Example: simple search for a string:
Packit 78a954
//    pcrecpp::RE("ell").PartialMatch("hello");
Packit 78a954
Packit 78a954
// Example: find first number in a string:
Packit 78a954
//    int number;
Packit 78a954
//    pcrecpp::RE re("(\\d+)");
Packit 78a954
//    re.PartialMatch("x*100 + 20", &number);
Packit 78a954
//    assert(number == 100);
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// By default, pattern and text are plain text, one byte per character.
Packit 78a954
// The UTF8 flag, passed to the constructor, causes both pattern
Packit 78a954
// and string to be treated as UTF-8 text, still a byte stream but
Packit 78a954
// potentially multiple bytes per character. In practice, the text
Packit 78a954
// is likelier to be UTF-8 than the pattern, but the match returned
Packit 78a954
// may depend on the UTF8 flag, so always use it when matching
Packit 78a954
// UTF8 text.  E.g., "." will match one byte normally but with UTF8
Packit 78a954
// set may match up to three bytes of a multi-byte character.
Packit 78a954
Packit 78a954
// Example:
Packit 78a954
//    pcrecpp::RE_Options options;
Packit 78a954
//    options.set_utf8();
Packit 78a954
//    pcrecpp::RE re(utf8_pattern, options);
Packit 78a954
//    re.FullMatch(utf8_string);
Packit 78a954
Packit 78a954
// Example: using the convenience function UTF8():
Packit 78a954
//    pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
Packit 78a954
//    re.FullMatch(utf8_string);
Packit 78a954
Packit 78a954
// NOTE: The UTF8 option is ignored if pcre was not configured with the
Packit 78a954
//       --enable-utf8 flag.
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// PCRE defines some modifiers to change the behavior of the regular
Packit 78a954
// expression engine.
Packit 78a954
// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
Packit 78a954
// to pass such modifiers to a RE class.
Packit 78a954
Packit 78a954
// Currently, the following modifiers are supported
Packit 78a954
Packit 78a954
//    modifier              description               Perl corresponding
Packit 78a954
Packit 78a954
//    PCRE_CASELESS         case insensitive match    /i
Packit 78a954
//    PCRE_MULTILINE        multiple lines match      /m
Packit 78a954
//    PCRE_DOTALL           dot matches newlines      /s
Packit 78a954
//    PCRE_DOLLAR_ENDONLY   $ matches only at end     N/A
Packit 78a954
//    PCRE_EXTRA            strict escape parsing     N/A
Packit 78a954
//    PCRE_EXTENDED         ignore whitespaces        /x
Packit 78a954
//    PCRE_UTF8             handles UTF8 chars        built-in
Packit 78a954
//    PCRE_UNGREEDY         reverses * and *?         N/A
Packit 78a954
//    PCRE_NO_AUTO_CAPTURE  disables matching parens  N/A (*)
Packit 78a954
Packit 78a954
// (For a full account on how each modifier works, please check the
Packit 78a954
// PCRE API reference manual).
Packit 78a954
Packit 78a954
// (*) Both Perl and PCRE allow non matching parentheses by means of the
Packit 78a954
// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
Packit 78a954
// capture, while (ab|cd) does.
Packit 78a954
Packit 78a954
// For each modifier, there are two member functions whose name is made
Packit 78a954
// out of the modifier in lowercase, without the "PCRE_" prefix. For
Packit 78a954
// instance, PCRE_CASELESS is handled by
Packit 78a954
//    bool caseless(),
Packit 78a954
// which returns true if the modifier is set, and
Packit 78a954
//    RE_Options & set_caseless(bool),
Packit 78a954
// which sets or unsets the modifier.
Packit 78a954
Packit 78a954
// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
Packit 78a954
// set_match_limit() and match_limit() member functions.
Packit 78a954
// Setting match_limit to a non-zero value will limit the executation of
Packit 78a954
// pcre to keep it from doing bad things like blowing the stack or taking
Packit 78a954
// an eternity to return a result.  A value of 5000 is good enough to stop
Packit 78a954
// stack blowup in a 2MB thread stack.  Setting match_limit to zero will
Packit 78a954
// disable match limiting.  Alternately, you can set match_limit_recursion()
Packit 78a954
// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
Packit 78a954
// recurses.  match_limit() caps the number of matches pcre does;
Packit 78a954
// match_limit_recrusion() caps the depth of recursion.
Packit 78a954
Packit 78a954
// Normally, to pass one or more modifiers to a RE class, you declare
Packit 78a954
// a RE_Options object, set the appropriate options, and pass this
Packit 78a954
// object to a RE constructor. Example:
Packit 78a954
Packit 78a954
//    RE_options opt;
Packit 78a954
//    opt.set_caseless(true);
Packit 78a954
Packit 78a954
//    if (RE("HELLO", opt).PartialMatch("hello world")) ...
Packit 78a954
Packit 78a954
// RE_options has two constructors. The default constructor takes no
Packit 78a954
// arguments and creates a set of flags that are off by default.
Packit 78a954
Packit 78a954
// The optional parameter 'option_flags' is to facilitate transfer
Packit 78a954
// of legacy code from C programs.  This lets you do
Packit 78a954
//    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
Packit 78a954
Packit 78a954
// But new code is better off doing
Packit 78a954
//    RE(pattern,
Packit 78a954
//      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
Packit 78a954
// (See below)
Packit 78a954
Packit 78a954
// If you are going to pass one of the most used modifiers, there are some
Packit 78a954
// convenience functions that return a RE_Options class with the
Packit 78a954
// appropriate modifier already set:
Packit 78a954
Packit 78a954
Packit 78a954
// If you need to set several options at once, and you don't want to go
Packit 78a954
// through the pains of declaring a RE_Options object and setting several
Packit 78a954
// options, there is a parallel method that give you such ability on the
Packit 78a954
// fly. You can concatenate several set_xxxxx member functions, since each
Packit 78a954
// of them returns a reference to its class object.  e.g.: to pass
Packit 78a954
Packit 78a954
// statement, you may write
Packit 78a954
Packit 78a954
//    RE(" ^ xyz \\s+ .* blah$", RE_Options()
Packit 78a954
//                            .set_caseless(true)
Packit 78a954
//                            .set_extended(true)
Packit 78a954
//                            .set_multiline(true)).PartialMatch(sometext);
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// The "Consume" operation may be useful if you want to repeatedly
Packit 78a954
// match regular expressions at the front of a string and skip over
Packit 78a954
// them as they match.  This requires use of the "StringPiece" type,
Packit 78a954
// which represents a sub-range of a real string.  Like RE, StringPiece
Packit 78a954
// is defined in the pcrecpp namespace.
Packit 78a954
Packit 78a954
// Example: read lines of the form "var = value" from a string.
Packit 78a954
//    string contents = ...;                 // Fill string somehow
Packit 78a954
//    pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece
Packit 78a954
Packit 78a954
//    string var;
Packit 78a954
//    int value;
Packit 78a954
//    pcrecpp::RE re("(\\w+) = (\\d+)\n");
Packit 78a954
//    while (re.Consume(&input, &var, &value)) {
Packit 78a954
//      ...;
Packit 78a954
//    }
Packit 78a954
Packit 78a954
// Each successful call to "Consume" will set "var/value", and also
Packit 78a954
// advance "input" so it points past the matched text.
Packit 78a954
Packit 78a954
// The "FindAndConsume" operation is similar to "Consume" but does not
Packit 78a954
// anchor your match at the beginning of the string.  For example, you
Packit 78a954
// could extract all words from a string by repeatedly calling
Packit 78a954
//     pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// By default, if you pass a pointer to a numeric value, the
Packit 78a954
// corresponding text is interpreted as a base-10 number.  You can
Packit 78a954
// instead wrap the pointer with a call to one of the operators Hex(),
Packit 78a954
// Octal(), or CRadix() to interpret the text in another base.  The
Packit 78a954
// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
Packit 78a954
// prefixes, but defaults to base-10.
Packit 78a954
Packit 78a954
// Example:
Packit 78a954
//   int a, b, c, d;
Packit 78a954
//   pcrecpp::RE re("(.*) (.*) (.*) (.*)");
Packit 78a954
//   re.FullMatch("100 40 0100 0x40",
Packit 78a954
//                pcrecpp::Octal(&a), pcrecpp::Hex(&b),
Packit 78a954
//                pcrecpp::CRadix(&c), pcrecpp::CRadix(&d);;
Packit 78a954
// will leave 64 in a, b, c, and d.
Packit 78a954
Packit 78a954
// -----------------------------------------------------------------------
Packit 78a954
Packit 78a954
Packit 78a954
// You can replace the first match of "pattern" in "str" with
Packit 78a954
// "rewrite".  Within "rewrite", backslash-escaped digits (\1 to \9)
Packit 78a954
// can be used to insert text matching corresponding parenthesized
Packit 78a954
// group from the pattern.  \0 in "rewrite" refers to the entire
Packit 78a954
// matching text.  E.g.,
Packit 78a954
Packit 78a954
//   string s = "yabba dabba doo";
Packit 78a954
//   pcrecpp::RE("b+").Replace("d", &s);
Packit 78a954
Packit 78a954
// will leave "s" containing "yada dabba doo".  The result is true if
Packit 78a954
// the pattern matches and a replacement occurs, or false otherwise.
Packit 78a954
Packit 78a954
// GlobalReplace() is like Replace(), except that it replaces all
Packit 78a954
// occurrences of the pattern in the string with the rewrite.
Packit 78a954
// Replacements are not subject to re-matching.  E.g.,
Packit 78a954
Packit 78a954
//   string s = "yabba dabba doo";
Packit 78a954
//   pcrecpp::RE("b+").GlobalReplace("d", &s);
Packit 78a954
Packit 78a954
// will leave "s" containing "yada dada doo".  It returns the number
Packit 78a954
// of replacements made.
Packit 78a954
Packit 78a954
// Extract() is like Replace(), except that if the pattern matches,
Packit 78a954
// "rewrite" is copied into "out" (an additional argument) with
Packit 78a954
// substitutions.  The non-matching portions of "text" are ignored.
Packit 78a954
// Returns true iff a match occurred and the extraction happened
Packit 78a954
// successfully.  If no match occurs, the string is left unaffected.
Packit 78a954
Packit 78a954
Packit 78a954
#include <string>
Packit 78a954
#include <pcre.h>
Packit 78a954
#include <pcrecpparg.h>   // defines the Arg class
Packit 78a954
// This isn't technically needed here, but we include it
Packit 78a954
// anyway so folks who include pcrecpp.h don't have to.
Packit 78a954
#include <pcre_stringpiece.h>
Packit 78a954
Packit 78a954
namespace pcrecpp {
Packit 78a954
Packit 78a954
#define PCRE_SET_OR_CLEAR(b, o) \
Packit 78a954
    if (b) all_options_ |= (o); else all_options_ &= ~(o); \
Packit 78a954
    return *this
Packit 78a954
Packit 78a954
#define PCRE_IS_SET(o)  \
Packit 78a954
        (all_options_ & o) == o
Packit 78a954
Packit 78a954
/***** Compiling regular expressions: the RE class *****/
Packit 78a954
Packit 78a954
// RE_Options allow you to set options to be passed along to pcre,
Packit 78a954
// along with other options we put on top of pcre.
Packit 78a954
// Only 9 modifiers, plus match_limit and match_limit_recursion,
Packit 78a954
// are supported now.
Packit 78a954
class PCRECPP_EXP_DEFN RE_Options {
Packit 78a954
Packit 78a954
  // constructor
Packit 78a954
  RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {}
Packit 78a954
Packit 78a954
  // alternative constructor.
Packit 78a954
  // To facilitate transfer of legacy code from C programs
Packit 78a954
Packit 78a954
  // This lets you do
Packit 78a954
  //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
Packit 78a954
  // But new code is better off doing
Packit 78a954
  //    RE(pattern,
Packit 78a954
  //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
Packit 78a954
  RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0),
Packit 78a954
                                 all_options_(option_flags) {}
Packit 78a954
  // we're fine with the default destructor, copy constructor, etc.
Packit 78a954
Packit 78a954
  // accessors and mutators
Packit 78a954
  int match_limit() const { return match_limit_; };
Packit 78a954
  RE_Options &set_match_limit(int limit) {
Packit 78a954
    match_limit_ = limit;
Packit 78a954
    return *this;
Packit 78a954
Packit 78a954
Packit 78a954
  int match_limit_recursion() const { return match_limit_recursion_; };
Packit 78a954
  RE_Options &set_match_limit_recursion(int limit) {
Packit 78a954
    match_limit_recursion_ = limit;
Packit 78a954
    return *this;
Packit 78a954
Packit 78a954
Packit 78a954
  bool caseless() const {
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_caseless(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  bool multiline() const {
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_multiline(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  bool dotall() const {
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_dotall(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  bool extended() const {
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_extended(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  bool dollar_endonly() const {
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_dollar_endonly(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  bool extra() const {
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_extra(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  bool ungreedy() const {
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_ungreedy(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  bool utf8() const {
Packit 78a954
    return PCRE_IS_SET(PCRE_UTF8);
Packit 78a954
Packit 78a954
  RE_Options &set_utf8(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  bool no_auto_capture() const {
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_no_auto_capture(bool x) {
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  RE_Options &set_all_options(int opt) {
Packit 78a954
    all_options_ = opt;
Packit 78a954
    return *this;
Packit 78a954
Packit 78a954
  int all_options() const {
Packit 78a954
    return all_options_ ;
Packit 78a954
Packit 78a954
Packit 78a954
  // TODO: add other pcre flags
Packit 78a954
Packit 78a954
Packit 78a954
  int match_limit_;
Packit 78a954
  int match_limit_recursion_;
Packit 78a954
  int all_options_;
Packit 78a954
Packit 78a954
Packit 78a954
// These functions return some common RE_Options
Packit 78a954
static inline RE_Options UTF8() {
Packit 78a954
  return RE_Options().set_utf8(true);
Packit 78a954
Packit 78a954
Packit 78a954
static inline RE_Options CASELESS() {
Packit 78a954
  return RE_Options().set_caseless(true);
Packit 78a954
Packit 78a954
static inline RE_Options MULTILINE() {
Packit 78a954
  return RE_Options().set_multiline(true);
Packit 78a954
Packit 78a954
Packit 78a954
static inline RE_Options DOTALL() {
Packit 78a954
  return RE_Options().set_dotall(true);
Packit 78a954
Packit 78a954
Packit 78a954
static inline RE_Options EXTENDED() {
Packit 78a954
  return RE_Options().set_extended(true);
Packit 78a954
Packit 78a954
Packit 78a954
// Interface for regular expression matching.  Also corresponds to a
Packit 78a954
// pre-compiled regular expression.  An "RE" object is safe for
Packit 78a954
// concurrent use by multiple threads.
Packit 78a954
Packit 78a954
Packit 78a954
  // We provide implicit conversions from strings so that users can
Packit 78a954
  // pass in a string or a "const char*" wherever an "RE" is expected.
Packit 78a954
  RE(const string& pat) { Init(pat, NULL); }
Packit 78a954
  RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
Packit 78a954
  RE(const char* pat) { Init(pat, NULL); }
Packit 78a954
  RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
Packit 78a954
  RE(const unsigned char* pat) {
Packit 78a954
    Init(reinterpret_cast<const char*>(pat), NULL);
Packit 78a954
Packit 78a954
  RE(const unsigned char* pat, const RE_Options& option) {
Packit 78a954
    Init(reinterpret_cast<const char*>(pat), &option);
Packit 78a954
Packit 78a954
Packit 78a954
  // Copy constructor & assignment - note that these are expensive
Packit 78a954
  // because they recompile the expression.
Packit 78a954
  RE(const RE& re) { Init(re.pattern_, &re.options_); }
Packit 78a954
  const RE& operator=(const RE& re) {
Packit 78a954
    if (this != &re) {
Packit 78a954
Packit 78a954
Packit 78a954
      // This is the code that originally came from Google
Packit 78a954
      // Init(re.pattern_.c_str(), &re.options_);
Packit 78a954
Packit 78a954
      // This is the replacement from Ari Pollak
Packit 78a954
      Init(re.pattern_, &re.options_);
Packit 78a954
Packit 78a954
    return *this;
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  // The string specification for this RE.  E.g.
Packit 78a954
  //   RE re("ab*c?d+");
Packit 78a954
  //   re.pattern();    // "ab*c?d+"
Packit 78a954
  const string& pattern() const { return pattern_; }
Packit 78a954
Packit 78a954
  // If RE could not be created properly, returns an error string.
Packit 78a954
  // Else returns the empty string.
Packit 78a954
  const string& error() const { return *error_; }
Packit 78a954
Packit 78a954
  /***** The useful part: the matching interface *****/
Packit 78a954
Packit 78a954
  // This is provided so one can do pattern.ReplaceAll() just as
Packit 78a954
  // easily as ReplaceAll(pattern-text, ....)
Packit 78a954
Packit 78a954
  bool FullMatch(const StringPiece& text,
Packit 78a954
                 const Arg& ptr1 = no_arg,
Packit 78a954
                 const Arg& ptr2 = no_arg,
Packit 78a954
                 const Arg& ptr3 = no_arg,
Packit 78a954
                 const Arg& ptr4 = no_arg,
Packit 78a954
                 const Arg& ptr5 = no_arg,
Packit 78a954
                 const Arg& ptr6 = no_arg,
Packit 78a954
                 const Arg& ptr7 = no_arg,
Packit 78a954
                 const Arg& ptr8 = no_arg,
Packit 78a954
                 const Arg& ptr9 = no_arg,
Packit 78a954
                 const Arg& ptr10 = no_arg,
Packit 78a954
                 const Arg& ptr11 = no_arg,
Packit 78a954
                 const Arg& ptr12 = no_arg,
Packit 78a954
                 const Arg& ptr13 = no_arg,
Packit 78a954
                 const Arg& ptr14 = no_arg,
Packit 78a954
                 const Arg& ptr15 = no_arg,
Packit 78a954
                 const Arg& ptr16 = no_arg) const;
Packit 78a954
Packit 78a954
  bool PartialMatch(const StringPiece& text,
Packit 78a954
                    const Arg& ptr1 = no_arg,
Packit 78a954
                    const Arg& ptr2 = no_arg,
Packit 78a954
                    const Arg& ptr3 = no_arg,
Packit 78a954
                    const Arg& ptr4 = no_arg,
Packit 78a954
                    const Arg& ptr5 = no_arg,
Packit 78a954
                    const Arg& ptr6 = no_arg,
Packit 78a954
                    const Arg& ptr7 = no_arg,
Packit 78a954
                    const Arg& ptr8 = no_arg,
Packit 78a954
                    const Arg& ptr9 = no_arg,
Packit 78a954
                    const Arg& ptr10 = no_arg,
Packit 78a954
                    const Arg& ptr11 = no_arg,
Packit 78a954
                    const Arg& ptr12 = no_arg,
Packit 78a954
                    const Arg& ptr13 = no_arg,
Packit 78a954
                    const Arg& ptr14 = no_arg,
Packit 78a954
                    const Arg& ptr15 = no_arg,
Packit 78a954
                    const Arg& ptr16 = no_arg) const;
Packit 78a954
Packit 78a954
  bool Consume(StringPiece* input,
Packit 78a954
               const Arg& ptr1 = no_arg,
Packit 78a954
               const Arg& ptr2 = no_arg,
Packit 78a954
               const Arg& ptr3 = no_arg,
Packit 78a954
               const Arg& ptr4 = no_arg,
Packit 78a954
               const Arg& ptr5 = no_arg,
Packit 78a954
               const Arg& ptr6 = no_arg,
Packit 78a954
               const Arg& ptr7 = no_arg,
Packit 78a954
               const Arg& ptr8 = no_arg,
Packit 78a954
               const Arg& ptr9 = no_arg,
Packit 78a954
               const Arg& ptr10 = no_arg,
Packit 78a954
               const Arg& ptr11 = no_arg,
Packit 78a954
               const Arg& ptr12 = no_arg,
Packit 78a954
               const Arg& ptr13 = no_arg,
Packit 78a954
               const Arg& ptr14 = no_arg,
Packit 78a954
               const Arg& ptr15 = no_arg,
Packit 78a954
               const Arg& ptr16 = no_arg) const;
Packit 78a954
Packit 78a954
  bool FindAndConsume(StringPiece* input,
Packit 78a954
                      const Arg& ptr1 = no_arg,
Packit 78a954
                      const Arg& ptr2 = no_arg,
Packit 78a954
                      const Arg& ptr3 = no_arg,
Packit 78a954
                      const Arg& ptr4 = no_arg,
Packit 78a954
                      const Arg& ptr5 = no_arg,
Packit 78a954
                      const Arg& ptr6 = no_arg,
Packit 78a954
                      const Arg& ptr7 = no_arg,
Packit 78a954
                      const Arg& ptr8 = no_arg,
Packit 78a954
                      const Arg& ptr9 = no_arg,
Packit 78a954
                      const Arg& ptr10 = no_arg,
Packit 78a954
                      const Arg& ptr11 = no_arg,
Packit 78a954
                      const Arg& ptr12 = no_arg,
Packit 78a954
                      const Arg& ptr13 = no_arg,
Packit 78a954
                      const Arg& ptr14 = no_arg,
Packit 78a954
                      const Arg& ptr15 = no_arg,
Packit 78a954
                      const Arg& ptr16 = no_arg) const;
Packit 78a954
Packit 78a954
  bool Replace(const StringPiece& rewrite,
Packit 78a954
               string *str) const;
Packit 78a954
Packit 78a954
  int GlobalReplace(const StringPiece& rewrite,
Packit 78a954
                    string *str) const;
Packit 78a954
Packit 78a954
  bool Extract(const StringPiece &rewrite,
Packit 78a954
               const StringPiece &text,
Packit 78a954
               string *out) const;
Packit 78a954
Packit 78a954
  // Escapes all potentially meaningful regexp characters in
Packit 78a954
  // 'unquoted'.  The returned string, used as a regular expression,
Packit 78a954
  // will exactly match the original string.  For example,
Packit 78a954
  //           1.5-2.0?
Packit 78a954
  // may become:
Packit 78a954
  //           1\.5\-2\.0\?
Packit 78a954
  // Note QuoteMeta behaves the same as perl's QuoteMeta function,
Packit 78a954
  // *except* that it escapes the NUL character (\0) as backslash + 0,
Packit 78a954
  // rather than backslash + NUL.
Packit 78a954
  static string QuoteMeta(const StringPiece& unquoted);
Packit 78a954
Packit 78a954
Packit 78a954
  /***** Generic matching interface *****/
Packit 78a954
Packit 78a954
  // Type of match (TODO: Should be restructured as part of RE_Options)
Packit 78a954
  enum Anchor {
Packit 78a954
    UNANCHORED,         // No anchoring
Packit 78a954
    ANCHOR_START,       // Anchor at start only
Packit 78a954
    ANCHOR_BOTH         // Anchor at start and end
Packit 78a954
Packit 78a954
Packit 78a954
  // General matching routine.  Stores the length of the match in
Packit 78a954
  // "*consumed" if successful.
Packit 78a954
  bool DoMatch(const StringPiece& text,
Packit 78a954
               Anchor anchor,
Packit 78a954
               int* consumed,
Packit 78a954
               const Arg* const* args, int n) const;
Packit 78a954
Packit 78a954
  // Return the number of capturing subpatterns, or -1 if the
Packit 78a954
  // regexp wasn't valid on construction.
Packit 78a954
  int NumberOfCapturingGroups() const;
Packit 78a954
Packit 78a954
  // The default value for an argument, to indicate the end of the argument
Packit 78a954
  // list. This must be used only in optional argument defaults. It should NOT
Packit 78a954
  // be passed explicitly. Some people have tried to use it like this:
Packit 78a954
Packit 78a954
  //   FullMatch(x, y, &z, no_arg, &w);
Packit 78a954
Packit 78a954
  // This is a mistake, and will not work.
Packit 78a954
  static Arg no_arg;
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
  void Init(const string& pattern, const RE_Options* options);
Packit 78a954
  void Cleanup();
Packit 78a954
Packit 78a954
  // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
Packit 78a954
  // pairs of integers for the beginning and end positions of matched
Packit 78a954
  // text.  The first pair corresponds to the entire matched text;
Packit 78a954
  // subsequent pairs correspond, in order, to parentheses-captured
Packit 78a954
  // matches.  Returns the number of pairs (one more than the number of
Packit 78a954
  // the last subpattern with a match) if matching was successful
Packit 78a954
  // and zero if the match failed.
Packit 78a954
  // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
Packit 78a954
  // against "foo", "bar", and "baz" respectively.
Packit 78a954
  // When matching RE("(foo)|hello") against "hello", it will return 1.
Packit 78a954
  // But the values for all subpattern are filled in into "vec".
Packit 78a954
  int TryMatch(const StringPiece& text,
Packit 78a954
               int startpos,
Packit 78a954
               Anchor anchor,
Packit 78a954
               bool empty_ok,
Packit 78a954
               int *vec,
Packit 78a954
               int vecsize) const;
Packit 78a954
Packit 78a954
  // Append the "rewrite" string, with backslash subsitutions from "text"
Packit 78a954
  // and "vec", to string "out".
Packit 78a954
  bool Rewrite(string *out,
Packit 78a954
               const StringPiece& rewrite,
Packit 78a954
               const StringPiece& text,
Packit 78a954
               int *vec,
Packit 78a954
               int veclen) const;
Packit 78a954
Packit 78a954
  // internal implementation for DoMatch
Packit 78a954
  bool DoMatchImpl(const StringPiece& text,
Packit 78a954
                   Anchor anchor,
Packit 78a954
                   int* consumed,
Packit 78a954
                   const Arg* const args[],
Packit 78a954
                   int n,
Packit 78a954
                   int* vec,
Packit 78a954
                   int vecsize) const;
Packit 78a954
Packit 78a954
  // Compile the regexp for the specified anchoring mode
Packit 78a954
  pcre* Compile(Anchor anchor);
Packit 78a954
Packit 78a954
  string        pattern_;
Packit 78a954
  RE_Options    options_;
Packit 78a954
  pcre*         re_full_;       // For full matches
Packit 78a954
  pcre*         re_partial_;    // For partial matches
Packit 78a954
  const string* error_;         // Error indicator (or points to empty string)
Packit 78a954
Packit 78a954
Packit 78a954
}   // namespace pcrecpp
Packit 78a954
Packit 78a954
#endif /* _PCRECPP_H */