/*
tre-match-utils.h - TRE matcher helper definitions
Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define str_source ((tre_str_source*)string)
#ifdef TRE_WCHAR
#ifdef TRE_MULTIBYTE
/* Wide character and multibyte support. */
#define GET_NEXT_WCHAR() \
do { \
prev_c = next_c; \
if (type == STR_BYTE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = '\0'; \
else \
next_c = (unsigned char)(*str_byte++); \
} \
else if (type == STR_WIDE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = L'\0'; \
else \
next_c = *str_wide++; \
} \
else if (type == STR_MBS) \
{ \
pos += pos_add_next; \
if (str_byte == NULL) \
next_c = L'\0'; \
else \
{ \
size_t w; \
int max; \
if (len >= 0) \
max = len - pos; \
else \
max = 32; \
if (max <= 0) \
{ \
next_c = L'\0'; \
pos_add_next = 1; \
} \
else \
{ \
w = tre_mbrtowc(&next_c, str_byte, max, &mbstate); \
if (w == (size_t)-1 || w == (size_t)-2) \
return REG_NOMATCH; \
if (w == 0 && len >= 0) \
{ \
pos_add_next = 1; \
next_c = 0; \
str_byte++; \
} \
else \
{ \
pos_add_next = w; \
str_byte += w; \
} \
} \
} \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(0)
#else /* !TRE_MULTIBYTE */
/* Wide character support, no multibyte support. */
#define GET_NEXT_WCHAR() \
do { \
prev_c = next_c; \
if (type == STR_BYTE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = '\0'; \
else \
next_c = (unsigned char)(*str_byte++); \
} \
else if (type == STR_WIDE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = L'\0'; \
else \
next_c = *str_wide++; \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(0)
#endif /* !TRE_MULTIBYTE */
#else /* !TRE_WCHAR */
/* No wide character or multibyte support. */
#define GET_NEXT_WCHAR() \
do { \
prev_c = next_c; \
if (type == STR_BYTE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = '\0'; \
else \
next_c = (unsigned char)(*str_byte++); \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(0)
#endif /* !TRE_WCHAR */
#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
#define CHECK_ASSERTIONS(assertions) \
(((assertions & ASSERT_AT_BOL) \
&& (pos > 0 || reg_notbol) \
&& (prev_c != L'\n' || !reg_newline)) \
|| ((assertions & ASSERT_AT_EOL) \
&& (next_c != L'\0' || reg_noteol) \
&& (next_c != L'\n' || !reg_newline)) \
|| ((assertions & ASSERT_AT_BOW) \
&& (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))) \
|| ((assertions & ASSERT_AT_EOW) \
&& (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
|| ((assertions & ASSERT_AT_WB) \
&& (pos != 0 && next_c != L'\0' \
&& IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
|| ((assertions & ASSERT_AT_WB_NEG) \
&& (pos == 0 || next_c == L'\0' \
|| IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
/* Returns 1 if `t1' wins `t2', 0 otherwise. */
static int
tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
int *t1, int *t2)
{
int i;
for (i = 0; i < num_tags; i++)
{
if (tag_directions[i] == TRE_TAG_MINIMIZE)
{
if (t1[i] < t2[i])
return 1;
if (t1[i] > t2[i])
return 0;
}
else
{
if (t1[i] > t2[i])
return 1;
if (t1[i] < t2[i])
return 0;
}
}
/* assert(0);*/
return 0;
}
static int
tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
{
DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
while (*classes != (tre_ctype_t)0)
if ((!icase && tre_isctype(wc, *classes))
|| (icase && (tre_isctype(tre_toupper(wc), *classes)
|| tre_isctype(tre_tolower(wc), *classes))))
return 1; /* Match. */
else
classes++;
return 0; /* No match. */
}