|
Packit |
1c1d7e |
/****************************************************************************
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** Implementation of QRegExp class
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** Created : 950126
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** Copyright (C) 1992-2000 Trolltech AS. All rights reserved.
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** This file is part of the tools module of the Qt GUI Toolkit.
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** This file may be distributed under the terms of the Q Public License
|
|
Packit |
1c1d7e |
** as defined by Trolltech AS of Norway and appearing in the file
|
|
Packit |
1c1d7e |
** LICENSE.QPL included in the packaging of this file.
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** This file may be distributed and/or modified under the terms of the
|
|
Packit |
1c1d7e |
** GNU General Public License version 2 as published by the Free Software
|
|
Packit |
1c1d7e |
** Foundation and appearing in the file LICENSE.GPL included in the
|
|
Packit |
1c1d7e |
** packaging of this file.
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
|
|
Packit |
1c1d7e |
** licenses may use this file in accordance with the Qt Commercial License
|
|
Packit |
1c1d7e |
** Agreement provided with the Software.
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
|
|
Packit |
1c1d7e |
** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
|
|
Packit |
1c1d7e |
** information about Qt Commercial License Agreements.
|
|
Packit |
1c1d7e |
** See http://www.trolltech.com/qpl/ for QPL licensing information.
|
|
Packit |
1c1d7e |
** See http://www.trolltech.com/gpl/ for GPL licensing information.
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
** Contact info@trolltech.com if any conditions of this licensing are
|
|
Packit |
1c1d7e |
** not clear to you.
|
|
Packit |
1c1d7e |
**
|
|
Packit |
1c1d7e |
**********************************************************************/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
#include "qregexp.h"
|
|
Packit |
1c1d7e |
#include <ctype.h>
|
|
Packit |
1c1d7e |
#include <stdlib.h>
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
// NOT REVISED
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\class QRegExp qregexp.h
|
|
Packit |
1c1d7e |
\ingroup tools
|
|
Packit |
1c1d7e |
\ingroup misc
|
|
Packit |
1c1d7e |
\brief The QRegExp class provides pattern matching using regular
|
|
Packit |
1c1d7e |
expressions or wildcards.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QRegExp knows these regexp primitives:
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
<dfn>c</dfn> matches the character 'c'
|
|
Packit |
1c1d7e |
<dfn>.</dfn> matches any character
|
|
Packit |
1c1d7e |
<dfn>^</dfn> matches start of input
|
|
Packit |
1c1d7e |
<dfn>$</dfn> matches end of input
|
|
Packit |
1c1d7e |
<dfn>[]</dfn> matches a defined set of characters - see below.
|
|
Packit |
1c1d7e |
<dfn>a*</dfn> matches a sequence of zero or more a's
|
|
Packit |
1c1d7e |
<dfn>a+</dfn> matches a sequence of one or more a's
|
|
Packit |
1c1d7e |
<dfn>a?</dfn> matches an optional a
|
|
Packit |
1c1d7e |
<dfn>\c</dfn> escape code for matching special characters such
|
|
Packit |
1c1d7e |
as \, [, *, +, . etc.
|
|
Packit |
1c1d7e |
<dfn>\t</dfn> matches the TAB character (9)
|
|
Packit |
1c1d7e |
<dfn>\n</dfn> matches newline (10)
|
|
Packit |
1c1d7e |
<dfn>\r</dfn> matches return (13)
|
|
Packit |
1c1d7e |
<dfn>\s</dfn> matches a white space (defined as any character
|
|
Packit |
1c1d7e |
for which QChar::isSpace() returns TRUE. This includes at least
|
|
Packit |
1c1d7e |
ASCII characters 9 (TAB), 10 (LF), 11 (VT), 12(FF), 13 (CR) and 32
|
|
Packit |
1c1d7e |
(Space)).
|
|
Packit |
1c1d7e |
<dfn>\d</dfn> matches a digit (defined as any character for
|
|
Packit |
1c1d7e |
which QChar::isDigit() returns TRUE. This includes at least ASCII
|
|
Packit |
1c1d7e |
characters '0'-'9').
|
|
Packit |
1c1d7e |
<dfn>\x1f6b</dfn> matches the character with unicode point U1f6b
|
|
Packit |
1c1d7e |
(hexadecimal 1f6b). \x0012 will match the ASCII/Latin1 character
|
|
Packit |
1c1d7e |
0x12 (18 decimal, 12 hexadecimal).
|
|
Packit |
1c1d7e |
<dfn>\022</dfn> matches the ASCII/Latin1 character 022 (18
|
|
Packit |
1c1d7e |
decimal, 22 octal).
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
In wildcard mode, it only knows four primitives:
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
<dfn>c</dfn> matches the character 'c'
|
|
Packit |
1c1d7e |
<dfn>?</dfn> matches any character
|
|
Packit |
1c1d7e |
<dfn>*</dfn> matches any sequence of characters
|
|
Packit |
1c1d7e |
<dfn>[]</dfn> matches a defined set of characters - see below.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QRegExp supports Unicode both in the pattern strings and in the
|
|
Packit |
1c1d7e |
strings to be matched.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
When writing regular expressions in C++ code, remember that C++
|
|
Packit |
1c1d7e |
processes \ characters. So in order to match e.g. a "." character,
|
|
Packit |
1c1d7e |
you must write "\\." in C++ source, not "\.".
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
A character set matches a defined set of characters. For example,
|
|
Packit |
1c1d7e |
[BSD] matches any of 'B', 'D' and 'S'. Within a character set, the
|
|
Packit |
1c1d7e |
special characters '.', '*', '?', '^', '$', '+' and '[' lose their
|
|
Packit |
1c1d7e |
special meanings. The following special characters apply:
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
<dfn>^</dfn> When placed first in the list, changes the
|
|
Packit |
1c1d7e |
character set to match any character \e not in the list. To include
|
|
Packit |
1c1d7e |
the character '^' itself in the set, escape it or place it anywhere
|
|
Packit |
1c1d7e |
but first.
|
|
Packit |
1c1d7e |
<dfn>-</dfn> Defines a range of characters. To include the
|
|
Packit |
1c1d7e |
character '-' itself in the set, escape it or place it last.
|
|
Packit |
1c1d7e |
<dfn>]</dfn> Ends the character set definition. To include the
|
|
Packit |
1c1d7e |
character ']' itself in the set, escape it or place it first (but
|
|
Packit |
1c1d7e |
after the negation operator '^', if present)
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
Thus, [a-zA-Z0-9.] matches upper and lower case ASCII letters,
|
|
Packit |
1c1d7e |
digits and dot; and [^\s] matches everything except white space.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\bug Case insensitive matching is not supported for non-ASCII/Latin1
|
|
Packit |
1c1d7e |
(non-8bit) characters. Any character with a non-zero QChar.row() is
|
|
Packit |
1c1d7e |
matched case sensitively even if the QRegExp is in case insensitive
|
|
Packit |
1c1d7e |
mode.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\note In Qt 3.0, the language of regular expressions will contain
|
|
Packit |
1c1d7e |
five more special characters, namely '(', ')', '{', '|' and '}'. To
|
|
Packit |
1c1d7e |
ease porting, it's a good idea to escape these characters with a
|
|
Packit |
1c1d7e |
backslash in all the regular expressions you'll write from now on.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
//
|
|
Packit |
1c1d7e |
// The regexp pattern is internally represented as an array of uints,
|
|
Packit |
1c1d7e |
// each element containing an 16-bit character or a 32-bit code
|
|
Packit |
1c1d7e |
// (listed below). User-defined character classes (e.g. [a-zA-Z])
|
|
Packit |
1c1d7e |
// are encoded as this:
|
|
Packit |
1c1d7e |
// uint no: 1 2 3 ...
|
|
Packit |
1c1d7e |
// value: CCL | n from | to from | to
|
|
Packit |
1c1d7e |
//
|
|
Packit |
1c1d7e |
// where n is the (16-bit) number of following range definitions and
|
|
Packit |
1c1d7e |
// from and to define the ranges inclusive. from <= to is always true,
|
|
Packit |
1c1d7e |
// otherwise it is a built-in charclass (Pxx, eg \s - PWS). Single
|
|
Packit |
1c1d7e |
// characters in the class are coded as from==to. Negated classes
|
|
Packit |
1c1d7e |
// (e.g. [^a-z]) use CCN instead of CCL.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
const uint END = 0x00000000;
|
|
Packit |
1c1d7e |
const uint PWS = 0x10010000; // predef charclass: whitespace (\s)
|
|
Packit |
1c1d7e |
const uint PDG = 0x10020000; // predef charclass: digit (\d)
|
|
Packit |
1c1d7e |
const uint CCL = 0x20010000; // character class []
|
|
Packit |
1c1d7e |
const uint CCN = 0x20020000; // neg character class [^]
|
|
Packit |
1c1d7e |
const uint CHR = 0x40000000; // character
|
|
Packit |
1c1d7e |
const uint BOL = 0x80010000; // beginning of line ^
|
|
Packit |
1c1d7e |
const uint EOL = 0x80020000; // end of line $
|
|
Packit |
1c1d7e |
const uint BOW = 0x80030000; // beginning of word \<
|
|
Packit |
1c1d7e |
const uint EOW = 0x80040000; // end of word \>
|
|
Packit |
1c1d7e |
const uint ANY = 0x80050000; // any character .
|
|
Packit |
1c1d7e |
const uint CLO = 0x80070000; // Kleene closure *
|
|
Packit |
1c1d7e |
const uint OPT = 0x80080000; // Optional closure ?
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
const uint MCC = 0x20000000; // character class bitmask
|
|
Packit |
1c1d7e |
const uint MCD = 0xffff0000; // code mask
|
|
Packit |
1c1d7e |
const uint MVL = 0x0000ffff; // value mask
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
//
|
|
Packit |
1c1d7e |
// QRegExp::error codes (internal)
|
|
Packit |
1c1d7e |
//
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
const int PatOk = 0; // pattern ok
|
|
Packit |
1c1d7e |
const int PatNull = 1; // no pattern defined
|
|
Packit |
1c1d7e |
const int PatSyntax = 2; // pattern syntax error
|
|
Packit |
1c1d7e |
const int PatOverflow = 4; // pattern too long
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*****************************************************************************
|
|
Packit |
1c1d7e |
QRegExp member functions
|
|
Packit |
1c1d7e |
*****************************************************************************/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Constructs an empty regular expression.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QRegExp::QRegExp()
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
rxdata = 0;
|
|
Packit |
1c1d7e |
cs = TRUE;
|
|
Packit |
1c1d7e |
wc = FALSE;
|
|
Packit |
1c1d7e |
error = PatOk;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Constructs a regular expression.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\arg \e pattern is the regular expression pattern string.
|
|
Packit |
1c1d7e |
\arg \e caseSensitive specifies whether or not to use case sensitive
|
|
Packit |
1c1d7e |
matching.
|
|
Packit |
1c1d7e |
\arg \e wildcard specifies whether the pattern string should be used for
|
|
Packit |
1c1d7e |
wildcard matching (also called globbing expression), normally used for
|
|
Packit |
1c1d7e |
matching file names.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\sa setWildcard()
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QRegExp::QRegExp( const QCString &pattern, bool caseSensitive, bool wildcard )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
rxstring = pattern;
|
|
Packit |
1c1d7e |
rxdata = 0;
|
|
Packit |
1c1d7e |
cs = caseSensitive;
|
|
Packit |
1c1d7e |
wc = wildcard;
|
|
Packit |
1c1d7e |
compile();
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Constructs a regular expression which is a copy of \e r.
|
|
Packit |
1c1d7e |
\sa operator=(const QRegExp&)
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QRegExp::QRegExp( const QRegExp &r )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
rxstring = r.pattern();
|
|
Packit |
1c1d7e |
rxdata = 0;
|
|
Packit |
1c1d7e |
cs = r.caseSensitive();
|
|
Packit |
1c1d7e |
wc = r.wildcard();
|
|
Packit |
1c1d7e |
compile();
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Destructs the regular expression and cleans up its internal data.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QRegExp::~QRegExp()
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
if ( rxdata ) // Avoid purify complaints
|
|
Packit |
1c1d7e |
delete [] rxdata;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Copies the regexp \e r and returns a reference to this regexp.
|
|
Packit |
1c1d7e |
The case sensitivity and wildcard options are copied, as well.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QRegExp &QRegExp::operator=( const QRegExp &r )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
rxstring = r.rxstring;
|
|
Packit |
1c1d7e |
cs = r.cs;
|
|
Packit |
1c1d7e |
wc = r.wc;
|
|
Packit |
1c1d7e |
compile();
|
|
Packit |
1c1d7e |
return *this;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\obsolete
|
|
Packit |
1c1d7e |
Consider using setPattern() instead of this method.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
Sets the pattern string to \e pattern and returns a reference to this regexp.
|
|
Packit |
1c1d7e |
The case sensitivity or wildcard options do not change.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QRegExp &QRegExp::operator=( const QCString &pattern )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
rxstring = pattern;
|
|
Packit |
1c1d7e |
compile();
|
|
Packit |
1c1d7e |
return *this;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Returns TRUE if this regexp is equal to \e r.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
Two regexp objects are equal if they have equal pattern strings,
|
|
Packit |
1c1d7e |
case sensitivity options and wildcard options.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
bool QRegExp::operator==( const QRegExp &r ) const
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
return rxstring == r.rxstring && cs == r.cs && wc == r.wc;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\fn bool QRegExp::operator!=( const QRegExp &r ) const
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
Returns TRUE if this regexp is \e not equal to \e r.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\sa operator==()
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\fn bool QRegExp::isEmpty() const
|
|
Packit |
1c1d7e |
Returns TRUE if the regexp is empty.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\fn bool QRegExp::isValid() const
|
|
Packit |
1c1d7e |
Returns TRUE if the regexp is valid, or FALSE if it is invalid.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
The pattern "[a-z" is an example of an invalid pattern, since it lacks a
|
|
Packit |
1c1d7e |
closing bracket.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\fn bool QRegExp::wildcard() const
|
|
Packit |
1c1d7e |
Returns TRUE if wildcard mode is on, otherwise FALSE. \sa setWildcard().
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Sets the wildcard option for the regular expression. The default
|
|
Packit |
1c1d7e |
is FALSE.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
Setting \e wildcard to TRUE makes it convenient to match filenames
|
|
Packit |
1c1d7e |
instead of plain text.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
For example, "qr*.cpp" matches the string "qregexp.cpp" in wildcard mode,
|
|
Packit |
1c1d7e |
but not "qicpp" (which would be matched in normal mode).
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\sa wildcard()
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
void QRegExp::setWildcard( bool wildcard )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
if ( wildcard != wc ) {
|
|
Packit |
1c1d7e |
wc = wildcard;
|
|
Packit |
1c1d7e |
compile();
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\fn bool QRegExp::caseSensitive() const
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
Returns TRUE if case sensitivity is enabled, otherwise FALSE. The
|
|
Packit |
1c1d7e |
default is TRUE.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\sa setCaseSensitive()
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Enables or disables case sensitive matching.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
In case sensitive mode, "a.e" matches "axe" but not "Axe".
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
See also: caseSensitive()
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
void QRegExp::setCaseSensitive( bool enable )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
if ( cs != enable ) {
|
|
Packit |
1c1d7e |
cs = enable;
|
|
Packit |
1c1d7e |
compile();
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\fn QCString QRegExp::pattern() const
|
|
Packit |
1c1d7e |
Returns the pattern string of the regexp.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\fn void QRegExp::setPattern(const QCString & pattern)
|
|
Packit |
1c1d7e |
Sets the pattern string to \a pattern and returns a reference to this regexp.
|
|
Packit |
1c1d7e |
The case sensitivity or wildcard options do not change.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
static inline bool iswordchar( int x )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
return isalnum(x) || x == '_'; //# Only 8-bit support
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\internal
|
|
Packit |
1c1d7e |
Match character class
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
static bool matchcharclass( uint *rxd, char c )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
uint *d = rxd;
|
|
Packit |
1c1d7e |
uint clcode = *d & MCD;
|
|
Packit |
1c1d7e |
bool neg = clcode == CCN;
|
|
Packit |
1c1d7e |
if ( clcode != CCL && clcode != CCN)
|
|
Packit |
1c1d7e |
qWarning("QRegExp: Internal error, please report to qt-bugs@trolltech.com");
|
|
Packit |
1c1d7e |
uint numFields = *d & MVL;
|
|
Packit |
1c1d7e |
uint cval = (unsigned char)c; //(((uint)(c.row())) << 8) | ((uint)c.cell());
|
|
Packit |
1c1d7e |
bool found = FALSE;
|
|
Packit |
1c1d7e |
for ( int i = 0; i < (int)numFields; i++ ) {
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
if ( *d == PWS && isspace(c) ) {
|
|
Packit |
1c1d7e |
found = TRUE;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
if ( *d == PDG && isdigit(c) ) {
|
|
Packit |
1c1d7e |
found = TRUE;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else {
|
|
Packit |
1c1d7e |
uint from = ( *d & MCD ) >> 16;
|
|
Packit |
1c1d7e |
uint to = *d & MVL;
|
|
Packit |
1c1d7e |
if ( (cval >= from) && (cval <= to) ) {
|
|
Packit |
1c1d7e |
found = TRUE;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
return neg ? !found : found;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*
|
|
Packit |
1c1d7e |
Internal: Recursively match string.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
static int matchstring( uint *rxd, const char *str, uint strlength,
|
|
Packit |
1c1d7e |
const char *bol, bool cs )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
const char *p = str;
|
|
Packit |
1c1d7e |
const char *start = p;
|
|
Packit |
1c1d7e |
uint pl = strlength;
|
|
Packit |
1c1d7e |
uint *d = rxd;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
//### in all cases here: handle pl == 0! (don't read past strlen)
|
|
Packit |
1c1d7e |
while ( *d ) {
|
|
Packit |
1c1d7e |
if ( *d & CHR ) { // match char
|
|
Packit |
1c1d7e |
if ( !pl )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
char c = *d;
|
|
Packit |
1c1d7e |
if ( !cs /*&& !c.row()*/ ) { // case insensitive, #Only 8bit
|
|
Packit |
1c1d7e |
if ( tolower(*p) != c )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
} else { // case insensitive
|
|
Packit |
1c1d7e |
if ( *p != c )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d & MCC ) { // match char class
|
|
Packit |
1c1d7e |
if ( !pl )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
if ( !matchcharclass( d, *p ) )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
d += (*d & MVL) + 1;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else switch ( *d++ ) {
|
|
Packit |
1c1d7e |
case PWS: // match whitespace
|
|
Packit |
1c1d7e |
if ( !pl || !isspace(*p) )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case PDG: // match digits
|
|
Packit |
1c1d7e |
if ( !pl || !isdigit(*p) )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case ANY: // match anything
|
|
Packit |
1c1d7e |
if ( !pl )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case BOL: // match beginning of line
|
|
Packit |
1c1d7e |
if ( p != bol )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case EOL: // match end of line
|
|
Packit |
1c1d7e |
if ( pl )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case BOW: // match beginning of word
|
|
Packit |
1c1d7e |
if ( !iswordchar(*p) || (p > bol && iswordchar(*(p-1)) ) )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case EOW: // match end of word
|
|
Packit |
1c1d7e |
if ( iswordchar(*p) || p == bol || !iswordchar(*(p-1)) )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case CLO: // Kleene closure
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
const char *first_p = p;
|
|
Packit |
1c1d7e |
if ( *d & CHR ) { // match char
|
|
Packit |
1c1d7e |
char c = *d;
|
|
Packit |
1c1d7e |
if ( !cs /*&& !c.row()*/ ) { // case insensitive, #only 8bit
|
|
Packit |
1c1d7e |
while ( pl /*&& !p->row()*/ && tolower(*p)==c ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else { // case sensitive
|
|
Packit |
1c1d7e |
while ( pl && *p == c ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d & MCC ) { // match char class
|
|
Packit |
1c1d7e |
while( pl && matchcharclass( d, *p ) ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d += (*d & MVL) + 1;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d == PWS ) {
|
|
Packit |
1c1d7e |
while ( pl && isspace(*p) ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d == PDG ) {
|
|
Packit |
1c1d7e |
while ( pl && isdigit(*p) ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d == ANY ) {
|
|
Packit |
1c1d7e |
p += pl;
|
|
Packit |
1c1d7e |
pl = 0;
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else {
|
|
Packit |
1c1d7e |
return -1; // error
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++; // skip CLO's END
|
|
Packit |
1c1d7e |
while ( p >= first_p ) { // go backwards
|
|
Packit |
1c1d7e |
int end = matchstring( d, p, pl, bol, cs );
|
|
Packit |
1c1d7e |
if ( end >= 0 )
|
|
Packit |
1c1d7e |
return ( (int)(p - start) ) + end;
|
|
Packit |
1c1d7e |
if ( !p )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
--p;
|
|
Packit |
1c1d7e |
++pl;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
case OPT: // optional closure
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
const char *first_p = p;
|
|
Packit |
1c1d7e |
if ( *d & CHR ) { // match char
|
|
Packit |
1c1d7e |
char c = *d;
|
|
Packit |
1c1d7e |
if ( !cs /*&& !c.row()*/ ) { // case insensitive, #only 8bit
|
|
Packit |
1c1d7e |
if ( pl && /*!p->row() &&*/ tolower(*p) == c ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else { // case sensitive
|
|
Packit |
1c1d7e |
if ( pl && *p == c ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d & MCC ) { // match char class
|
|
Packit |
1c1d7e |
if ( pl && matchcharclass( d, *p ) ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d += (*d & MVL) + 1;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d == PWS ) {
|
|
Packit |
1c1d7e |
if ( pl && isspace(*p) ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d == PDG ) {
|
|
Packit |
1c1d7e |
if ( pl && isdigit(*p) ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *d == ANY ) {
|
|
Packit |
1c1d7e |
if ( pl ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else {
|
|
Packit |
1c1d7e |
return -1; // error
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
d++; // skip OPT's END
|
|
Packit |
1c1d7e |
while ( p >= first_p ) { // go backwards
|
|
Packit |
1c1d7e |
int end = matchstring( d, p, pl, bol, cs );
|
|
Packit |
1c1d7e |
if ( end >= 0 )
|
|
Packit |
1c1d7e |
return ( (int)(p - start) ) + end;
|
|
Packit |
1c1d7e |
if ( !p )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
--p;
|
|
Packit |
1c1d7e |
++pl;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
default: // error
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
return (int)(p - start);
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\internal
|
|
Packit |
1c1d7e |
Recursively match string.
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
// This is obsolete now, but since it is protected (not private), it
|
|
Packit |
1c1d7e |
// is still implemented on the off-chance that somebody has made a
|
|
Packit |
1c1d7e |
// class derived from QRegExp and calls this directly.
|
|
Packit |
1c1d7e |
// Qt 3.0: Remove this?
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
#if 0
|
|
Packit |
1c1d7e |
const char *QRegExp::matchstr( uint *rxd, const QChar *str, uint strlength,
|
|
Packit |
1c1d7e |
const QChar *bol ) const
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
int len = matchstring( rxd, str, strlength, bol, cs );
|
|
Packit |
1c1d7e |
if ( len < 0 )
|
|
Packit |
1c1d7e |
return 0;
|
|
Packit |
1c1d7e |
return str + len;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
#endif
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
Attempts to match in \e str, starting from position \e index.
|
|
Packit |
1c1d7e |
Returns the position of the match, or -1 if there was no match.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
If \e len is not a null pointer, the length of the match is stored in
|
|
Packit |
1c1d7e |
\e *len.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
If \e indexIsStart is TRUE (the default), the position \e index in
|
|
Packit |
1c1d7e |
the string will match the start-of-input primitive (^) in the
|
|
Packit |
1c1d7e |
regexp, if present. Otherwise, position 0 in \e str will match.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
Example:
|
|
Packit |
1c1d7e |
\code
|
|
Packit |
1c1d7e |
QRegExp r("[0-9]*\\.[0-9]+"); // matches floating point
|
|
Packit |
1c1d7e |
int len;
|
|
Packit |
1c1d7e |
r.match("pi = 3.1416", 0, &len;; // returns 5, len == 6
|
|
Packit |
1c1d7e |
\endcode
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\note In Qt 3.0, this function will be replaced by find().
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
int QRegExp::match( const QCString &str, int index, int *len,
|
|
Packit |
1c1d7e |
bool indexIsStart ) const
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
if ( !isValid() || isEmpty() )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
if ( str.length() < (uint)index )
|
|
Packit |
1c1d7e |
return -1;
|
|
Packit |
1c1d7e |
const char *start = str.data();
|
|
Packit |
1c1d7e |
const char *p = start + index;
|
|
Packit |
1c1d7e |
uint pl = str.length() - index;
|
|
Packit |
1c1d7e |
uint *d = rxdata;
|
|
Packit |
1c1d7e |
int ep = -1;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
if ( *d == BOL ) { // match from beginning of line
|
|
Packit |
1c1d7e |
ep = matchstring( d, p, pl, indexIsStart ? p : start, cs );
|
|
Packit |
1c1d7e |
} else {
|
|
Packit |
1c1d7e |
if ( *d & CHR ) {
|
|
Packit |
1c1d7e |
char c = *d;
|
|
Packit |
1c1d7e |
if ( !cs /*&& !c.row()*/ ) { // case sensitive, # only 8bit
|
|
Packit |
1c1d7e |
while ( pl && ( /*p->row() ||*/ tolower(*p) != c ) ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
} else { // case insensitive
|
|
Packit |
1c1d7e |
while ( pl && *p != c ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
while( 1 ) { // regular match
|
|
Packit |
1c1d7e |
ep = matchstring( d, p, pl, indexIsStart ? start+index : start, cs );
|
|
Packit |
1c1d7e |
if ( ep >= 0 )
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
if ( !pl )
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
if ( len )
|
|
Packit |
1c1d7e |
*len = ep >= 0 ? ep : 0; // No match -> 0, for historical reasons
|
|
Packit |
1c1d7e |
return ep >= 0 ? (int)(p - start) : -1; // return index;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*! \fn int QRegExp::find( const QCString& str, int index )
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
Attempts to match in \e str, starting from position \e index.
|
|
Packit |
1c1d7e |
Returns the position of the match, or -1 if there was no match.
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
\sa match()
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
//
|
|
Packit |
1c1d7e |
// Translate wildcard pattern to standard regexp pattern.
|
|
Packit |
1c1d7e |
// Ex: *.cpp ==> ^.*\.cpp$
|
|
Packit |
1c1d7e |
//
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
static QCString wc2rx( const QCString &pattern )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
int patlen = (int)pattern.length();
|
|
Packit |
1c1d7e |
QCString wcpattern("^");
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
char c;
|
|
Packit |
1c1d7e |
for( int i = 0; i < patlen; i++ ) {
|
|
Packit |
1c1d7e |
c = pattern[i];
|
|
Packit |
1c1d7e |
switch ( (char)c ) {
|
|
Packit |
1c1d7e |
case '*': // '*' ==> '.*'
|
|
Packit |
1c1d7e |
wcpattern += '.';
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case '?': // '?' ==> '.'
|
|
Packit |
1c1d7e |
c = '.';
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case '.': // quote special regexp chars
|
|
Packit |
1c1d7e |
case '+':
|
|
Packit |
1c1d7e |
case '\\':
|
|
Packit |
1c1d7e |
case '$':
|
|
Packit |
1c1d7e |
case '^':
|
|
Packit |
1c1d7e |
wcpattern += '\\';
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case '[':
|
|
Packit |
1c1d7e |
if ( (char)pattern[i+1] == '^' ) { // don't quote '^' after '['
|
|
Packit |
1c1d7e |
wcpattern += '[';
|
|
Packit |
1c1d7e |
c = pattern[i+1];
|
|
Packit |
1c1d7e |
i++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
wcpattern += c;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
wcpattern += '$';
|
|
Packit |
1c1d7e |
return wcpattern; // return new regexp pattern
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
//
|
|
Packit |
1c1d7e |
// Internal: Get char value and increment pointer.
|
|
Packit |
1c1d7e |
//
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
static uint char_val( const char **str, uint *strlength ) // get char value
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
const char *p = *str;
|
|
Packit |
1c1d7e |
uint pl = *strlength;
|
|
Packit |
1c1d7e |
uint len = 1;
|
|
Packit |
1c1d7e |
uint v = 0;
|
|
Packit |
1c1d7e |
if ( (char)*p == '\\' ) { // escaped code
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
if ( !pl ) { // it is just a '\'
|
|
Packit |
1c1d7e |
(*str)++;
|
|
Packit |
1c1d7e |
(*strlength)--;
|
|
Packit |
1c1d7e |
return '\\';
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
len++; // length at least 2
|
|
Packit |
1c1d7e |
int i;
|
|
Packit |
1c1d7e |
char c;
|
|
Packit |
1c1d7e |
char ch = tolower((char)*p);
|
|
Packit |
1c1d7e |
switch ( ch ) {
|
|
Packit |
1c1d7e |
case 'b': v = '\b'; break; // bell
|
|
Packit |
1c1d7e |
case 'f': v = '\f'; break; // form feed
|
|
Packit |
1c1d7e |
case 'n': v = '\n'; break; // newline
|
|
Packit |
1c1d7e |
case 'r': v = '\r'; break; // return
|
|
Packit |
1c1d7e |
case 't': v = '\t'; break; // tab
|
|
Packit |
1c1d7e |
case 's': v = PWS; break; // whitespace charclass
|
|
Packit |
1c1d7e |
case 'd': v = PDG; break; // digit charclass
|
|
Packit |
1c1d7e |
case '<': v = BOW; break; // word beginning matcher
|
|
Packit |
1c1d7e |
case '>': v = EOW; break; // word ending matcher
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
case 'x': { // hex code
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
for ( i = 0; (i < 4) && pl; i++ ) { //up to 4 hex digits
|
|
Packit |
1c1d7e |
c = tolower((char)*p);
|
|
Packit |
1c1d7e |
bool a = ( c >= 'a' && c <= 'f' );
|
|
Packit |
1c1d7e |
if ( (c >= '0' && c <= '9') || a ) {
|
|
Packit |
1c1d7e |
v <<= 4;
|
|
Packit |
1c1d7e |
v += a ? 10 + c - 'a' : c - '0';
|
|
Packit |
1c1d7e |
len++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else {
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
default: {
|
|
Packit |
1c1d7e |
if ( ch >= '0' && ch <= '7' ) { //octal code
|
|
Packit |
1c1d7e |
len--;
|
|
Packit |
1c1d7e |
for ( i = 0; (i < 3) && pl; i++ ) { // up to 3 oct digits
|
|
Packit |
1c1d7e |
c = (char)*p;
|
|
Packit |
1c1d7e |
if ( c >= '0' && c <= '7' ) {
|
|
Packit |
1c1d7e |
v <<= 3;
|
|
Packit |
1c1d7e |
v += c - '0';
|
|
Packit |
1c1d7e |
len++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else {
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else { // not an octal number
|
|
Packit |
1c1d7e |
v = (uint)*p; //(((uint)(p->row())) << 8) | ((uint)p->cell());
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
} else {
|
|
Packit |
1c1d7e |
v = (uint)*p; //(((uint)(p->row())) << 8) | ((uint)p->cell());
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
*str += len;
|
|
Packit |
1c1d7e |
*strlength -= len;
|
|
Packit |
1c1d7e |
return v;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
#if 0 //defined(DEBUG)
|
|
Packit |
1c1d7e |
static uint *dump( uint *p )
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
while ( *p != END ) {
|
|
Packit |
1c1d7e |
if ( *p & CHR ) {
|
|
Packit |
1c1d7e |
uchar uc = (uchar)*p;
|
|
Packit |
1c1d7e |
char c = (char)uc;
|
|
Packit |
1c1d7e |
uint u = (uint)uc; //(((uint)(uc.row())) << 8) | ((uint)uc.cell());
|
|
Packit |
1c1d7e |
qDebug( "\tCHR\tU%04x (%c)", u, (c ? c : ' '));
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else if ( *p & MCC ) {
|
|
Packit |
1c1d7e |
uint clcode = *p & MCD;
|
|
Packit |
1c1d7e |
uint numFields = *p & MVL;
|
|
Packit |
1c1d7e |
if ( clcode == CCL )
|
|
Packit |
1c1d7e |
qDebug( "\tCCL\t%i", numFields );
|
|
Packit |
1c1d7e |
else if ( clcode == CCN )
|
|
Packit |
1c1d7e |
qDebug( "\tCCN\t%i", numFields );
|
|
Packit |
1c1d7e |
else
|
|
Packit |
1c1d7e |
qDebug("coding error!");
|
|
Packit |
1c1d7e |
for ( int i = 0; i < (int)numFields; i++ ) {
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
if ( *p == PWS )
|
|
Packit |
1c1d7e |
qDebug( "\t\tPWS" );
|
|
Packit |
1c1d7e |
else if ( *p == PDG )
|
|
Packit |
1c1d7e |
qDebug( "\t\tPDG" );
|
|
Packit |
1c1d7e |
else {
|
|
Packit |
1c1d7e |
uint from = ( *p & MCD ) >> 16;
|
|
Packit |
1c1d7e |
uint to = *p & MVL;
|
|
Packit |
1c1d7e |
char fc = (char)from;
|
|
Packit |
1c1d7e |
char tc = (char)to;
|
|
Packit |
1c1d7e |
qDebug( "\t\tU%04x (%c) - U%04x (%c)", from,
|
|
Packit |
1c1d7e |
(fc ? fc : ' '), to, (tc ? tc : ' ') );
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else switch ( *p++ ) {
|
|
Packit |
1c1d7e |
case PWS:
|
|
Packit |
1c1d7e |
qDebug( "\tPWS" );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case PDG:
|
|
Packit |
1c1d7e |
qDebug( "\tPDG" );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case BOL:
|
|
Packit |
1c1d7e |
qDebug( "\tBOL" );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case EOL:
|
|
Packit |
1c1d7e |
qDebug( "\tEOL" );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case BOW:
|
|
Packit |
1c1d7e |
qDebug( "\tBOW" );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case EOW:
|
|
Packit |
1c1d7e |
qDebug( "\tEOW" );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case ANY:
|
|
Packit |
1c1d7e |
qDebug( "\tANY" );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case CLO:
|
|
Packit |
1c1d7e |
qDebug( "\tCLO" );
|
|
Packit |
1c1d7e |
p = dump( p );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
case OPT:
|
|
Packit |
1c1d7e |
qDebug( "\tOPT" );
|
|
Packit |
1c1d7e |
p = dump( p );
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
qDebug( "\tEND" );
|
|
Packit |
1c1d7e |
return p+1;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
#endif // DEBUG
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
static const int maxlen = 1024; // max length of regexp array
|
|
Packit |
1c1d7e |
static uint rxarray[ maxlen ]; // tmp regexp array
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
/*!
|
|
Packit |
1c1d7e |
\internal
|
|
Packit |
1c1d7e |
Compiles the regular expression and stores the result in rxdata.
|
|
Packit |
1c1d7e |
The 'error' flag is set to non-zero if an error is detected.
|
|
Packit |
1c1d7e |
NOTE! This function is not reentrant!
|
|
Packit |
1c1d7e |
*/
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
void QRegExp::compile()
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
if ( rxdata ) { // delete old data
|
|
Packit |
1c1d7e |
delete [] rxdata;
|
|
Packit |
1c1d7e |
rxdata = 0;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
if ( rxstring.isEmpty() ) { // no regexp pattern set
|
|
Packit |
1c1d7e |
error = PatNull;
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
error = PatOk; // assume pattern is ok
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
QCString pattern;
|
|
Packit |
1c1d7e |
if ( wc )
|
|
Packit |
1c1d7e |
pattern = wc2rx(rxstring);
|
|
Packit |
1c1d7e |
else
|
|
Packit |
1c1d7e |
pattern = rxstring;
|
|
Packit |
1c1d7e |
const char *start = pattern.data(); // pattern pointer
|
|
Packit |
1c1d7e |
const char *p = start; // pattern pointer
|
|
Packit |
1c1d7e |
uint pl = pattern.length();
|
|
Packit |
1c1d7e |
uint *d = rxarray; // data pointer
|
|
Packit |
1c1d7e |
uint *prev_d = 0;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
#define GEN(x) *d++ = (x)
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
while ( pl ) {
|
|
Packit |
1c1d7e |
char ch = (char)*p;
|
|
Packit |
1c1d7e |
switch ( ch ) {
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
case '^': // beginning of line
|
|
Packit |
1c1d7e |
prev_d = d;
|
|
Packit |
1c1d7e |
GEN( p == start ? BOL : (CHR | ch) );
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
case '$': // end of line
|
|
Packit |
1c1d7e |
prev_d = d;
|
|
Packit |
1c1d7e |
GEN( pl == 1 ? EOL : (CHR | ch) );
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
case '.': // any char
|
|
Packit |
1c1d7e |
prev_d = d;
|
|
Packit |
1c1d7e |
GEN( ANY );
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
case '[': // character class
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
prev_d = d;
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
if ( !pl ) {
|
|
Packit |
1c1d7e |
error = PatSyntax;
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
bool firstIsEscaped = ( (char)*p == '\\' );
|
|
Packit |
1c1d7e |
uint cch = char_val( &p, &pl );
|
|
Packit |
1c1d7e |
if ( cch == '^' && !firstIsEscaped ) { // negate!
|
|
Packit |
1c1d7e |
GEN( CCN );
|
|
Packit |
1c1d7e |
if ( !pl ) {
|
|
Packit |
1c1d7e |
error = PatSyntax;
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
cch = char_val( &p, &pl );
|
|
Packit |
1c1d7e |
} else {
|
|
Packit |
1c1d7e |
GEN( CCL );
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
uint numFields = 0;
|
|
Packit |
1c1d7e |
while ( pl ) {
|
|
Packit |
1c1d7e |
if ((pl>2) && ((char)*p == '-') && ((char)*(p+1) != ']')) {
|
|
Packit |
1c1d7e |
// Found a range
|
|
Packit |
1c1d7e |
char_val( &p, &pl ); // Read the '-'
|
|
Packit |
1c1d7e |
uint cch2 = char_val( &p, &pl ); // Read the range end
|
|
Packit |
1c1d7e |
if ( cch > cch2 ) { // swap start and stop
|
|
Packit |
1c1d7e |
int tmp = cch;
|
|
Packit |
1c1d7e |
cch = cch2;
|
|
Packit |
1c1d7e |
cch2 = tmp;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
GEN( (cch << 16) | cch2 ); // from < to
|
|
Packit |
1c1d7e |
numFields++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else {
|
|
Packit |
1c1d7e |
// Found a single character
|
|
Packit |
1c1d7e |
if ( cch & MCD ) // It's a code; will not be mistaken
|
|
Packit |
1c1d7e |
GEN( cch ); // for a range, since from > to
|
|
Packit |
1c1d7e |
else
|
|
Packit |
1c1d7e |
GEN( (cch << 16) | cch ); // from == to range
|
|
Packit |
1c1d7e |
numFields++;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
if ( d >= rxarray + maxlen ) { // pattern too long
|
|
Packit |
1c1d7e |
error = PatOverflow;
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
if ( !pl ) { // At least ']' should be left
|
|
Packit |
1c1d7e |
error = PatSyntax;
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
bool nextIsEscaped = ( (char)*p == '\\' );
|
|
Packit |
1c1d7e |
cch = char_val( &p, &pl );
|
|
Packit |
1c1d7e |
if ( cch == (uint)']' && !nextIsEscaped )
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
if ( !pl ) { // End, should have seen ']'
|
|
Packit |
1c1d7e |
error = PatSyntax;
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
*prev_d |= numFields; // Store number of fields
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
case '*': // Kleene closure, or
|
|
Packit |
1c1d7e |
case '+': // positive closure, or
|
|
Packit |
1c1d7e |
case '?': // optional closure
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
if ( prev_d == 0 ) { // no previous expression
|
|
Packit |
1c1d7e |
error = PatSyntax; // empty closure
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
switch ( *prev_d ) { // test if invalid closure
|
|
Packit |
1c1d7e |
case BOL:
|
|
Packit |
1c1d7e |
case BOW:
|
|
Packit |
1c1d7e |
case EOW:
|
|
Packit |
1c1d7e |
case CLO:
|
|
Packit |
1c1d7e |
case OPT:
|
|
Packit |
1c1d7e |
error = PatSyntax;
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
int ddiff = (int)(d - prev_d);
|
|
Packit |
1c1d7e |
if ( *p == '+' ) { // convert to Kleene closure
|
|
Packit |
1c1d7e |
if ( d + ddiff >= rxarray + maxlen ) {
|
|
Packit |
1c1d7e |
error = PatOverflow; // pattern too long
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
memcpy( d, prev_d, ddiff*sizeof(uint) );
|
|
Packit |
1c1d7e |
d += ddiff;
|
|
Packit |
1c1d7e |
prev_d += ddiff;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
memmove( prev_d+1, prev_d, ddiff*sizeof(uint) );
|
|
Packit |
1c1d7e |
*prev_d = ch == '?' ? OPT : CLO;
|
|
Packit |
1c1d7e |
d++;
|
|
Packit |
1c1d7e |
GEN( END );
|
|
Packit |
1c1d7e |
p++;
|
|
Packit |
1c1d7e |
pl--;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
break;
|
|
Packit |
1c1d7e |
|
|
Packit |
1c1d7e |
default:
|
|
Packit |
1c1d7e |
{
|
|
Packit |
1c1d7e |
prev_d = d;
|
|
Packit |
1c1d7e |
uint cv = char_val( &p, &pl );
|
|
Packit |
1c1d7e |
if ( cv & MCD ) { // It's a code
|
|
Packit |
1c1d7e |
GEN( cv );
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
else {
|
|
Packit |
1c1d7e |
if ( !cs && cv <= 0xff ) // #only 8bit support
|
|
Packit |
1c1d7e |
cv = tolower( cv );
|
|
Packit |
1c1d7e |
GEN( CHR | cv );
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
if ( d >= rxarray + maxlen ) { // oops!
|
|
Packit |
1c1d7e |
error = PatOverflow; // pattern too long
|
|
Packit |
1c1d7e |
return;
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
}
|
|
Packit |
1c1d7e |
GEN( END );
|
|
Packit |
1c1d7e |
int len = (int)(d - rxarray);
|
|
Packit |
1c1d7e |
rxdata = new uint[ len ]; // copy from rxarray to rxdata
|
|
Packit |
1c1d7e |
CHECK_PTR( rxdata );
|
|
Packit |
1c1d7e |
memcpy( rxdata, rxarray, len*sizeof(uint) );
|
|
Packit |
1c1d7e |
#if defined(DEBUG)
|
|
Packit |
1c1d7e |
//dump( rxdata ); // uncomment this line for debugging
|
|
Packit |
1c1d7e |
#endif
|
|
Packit |
1c1d7e |
}
|