commit f591ddde8913633972409b9ebb3967738007730e
Author: David Lutterkort <lutter@redhat.com>
Date: Sun Nov 13 19:38:39 2011 -0800
* src/fa.c (totalize): handle case-insensitive FA's properly
The convention for case-insensitive FA's is that they do not contain any
transitions on [A-Z], effectively removing upper case letters from the
alphabet.
totalize used to create transitions into the crash state that did
transition on upper case letters, violating the convention.
diff --git a/src/fa.c b/src/fa.c
index ecfe8f4..40194e3 100644
--- a/src/fa.c
+++ b/src/fa.c
@@ -60,6 +60,10 @@ int fa_minimization_algorithm = FA_MIN_HOPCROFT;
* fa_as_regexp, we store regexps on transitions in the re field of each
* transition. TRANS_RE indicates that we do that, and is used by fa_dot to
* produce proper graphs of an automaton transitioning on regexps.
+ *
+ * For case-insensitive regexps (nocase == 1), the FA never has transitions
+ * on uppercase letters [A-Z], effectively removing these letters from the
+ * alphabet.
*/
struct fa {
struct state *initial;
@@ -2344,6 +2348,34 @@ int fa_contains(struct fa *fa1, struct fa *fa2) {
goto done;
}
+static int add_crash_trans(struct fa *fa, struct state *s, struct state *crash,
+ int min, int max) {
+ int result;
+
+ if (fa->nocase) {
+ /* Never transition on anything in [A-Z] */
+ if (min > 'Z' || max < 'A') {
+ result = add_new_trans(s, crash, min, max);
+ } else if (min >= 'A' && max <= 'Z') {
+ result = 0;
+ } else if (max <= 'Z') {
+ /* min < 'A' */
+ result = add_new_trans(s, crash, min, 'A' - 1);
+ } else if (min >= 'A') {
+ /* max > 'Z' */
+ result = add_new_trans(s, crash, 'Z' + 1, max);
+ } else {
+ /* min < 'A' && max > 'Z' */
+ result = add_new_trans(s, crash, min, 'A' - 1);
+ if (result == 0)
+ result = add_new_trans(s, crash, 'Z' + 1, max);
+ }
+ } else {
+ result = add_new_trans(s, crash, min, max);
+ }
+ return result;
+}
+
static int totalize(struct fa *fa) {
int r;
struct state *crash = add_state(fa, 0);
@@ -2352,42 +2384,25 @@ static int totalize(struct fa *fa) {
F(mark_reachable(fa));
sort_transition_intervals(fa);
- if (fa->nocase) {
- r = add_new_trans(crash, crash, UCHAR_MIN, 'A' - 1);
- if (r < 0)
- return -1;
- r = add_new_trans(crash, crash, 'Z' + 1, UCHAR_MAX);
- if (r < 0)
- return -1;
- } else {
- r = add_new_trans(crash, crash, UCHAR_MIN, UCHAR_MAX);
- if (r < 0)
- return -1;
- }
+ r = add_crash_trans(fa, crash, crash, UCHAR_MIN, UCHAR_MAX);
+ if (r < 0)
+ return -1;
list_for_each(s, fa->initial) {
int next = UCHAR_MIN;
int tused = s->tused;
for (int i=0; i < tused; i++) {
uchar min = s->trans[i].min, max = s->trans[i].max;
- if (fa->nocase) {
- /* Don't add transitions on [A-Z] into crash */
- if (isupper(min)) min = 'A';
- if (isupper(max)) max = 'Z';
- }
if (min > next) {
- r = add_new_trans(s, crash, next, min - 1);
+ r = add_crash_trans(fa, s, crash, next, min - 1);
if (r < 0)
return -1;
}
- if (max + 1 > next) {
+ if (max + 1 > next)
next = max + 1;
- if (fa->nocase && isupper(next))
- next = 'Z' + 1;
- }
}
if (next <= UCHAR_MAX) {
- r = add_new_trans(s, crash, next, UCHAR_MAX);
+ r = add_crash_trans(fa, s, crash, next, UCHAR_MAX);
if (r < 0)
return -1;
}
@@ -3019,6 +3034,10 @@ int fa_nocase(struct fa *fa) {
/* t->min < 'A' */
t->max = 'A' - 1;
F(add_new_trans(s, t->to, lc_min, lc_max));
+ } else if (t->min >= 'A') {
+ /* t->max > 'Z' */
+ t->min = 'Z' + 1;
+ F(add_new_trans(s, t->to, lc_min, lc_max));
} else {
/* t->min < 'A' && t->max > 'Z' */
F(add_new_trans(s, t->to, 'Z' + 1, t->max));
diff --git a/tests/fatest.c b/tests/fatest.c
index be4460b..e3658ab 100644
--- a/tests/fatest.c
+++ b/tests/fatest.c
@@ -581,6 +581,24 @@ static void testExpandNoCase(CuTest *tc) {
free(s);
}
+static void testNoCaseComplement(CuTest *tc) {
+ const char *key_s = "keY";
+ struct fa *key = make_good_fa(tc, key_s);
+ struct fa *isect = NULL;
+
+ fa_nocase(key);
+
+ struct fa *comp = mark(fa_complement(key));
+
+ key = make_good_fa(tc, key_s);
+
+ /* We used to have a bug in totalize that caused the intersection
+ * to contain "keY" */
+ isect = fa_intersect(key, comp);
+
+ CuAssertIntEquals(tc, 1, fa_is_basic(isect, FA_EMPTY));
+}
+
int main(int argc, char **argv) {
if (argc == 1) {
char *output = NULL;
@@ -605,6 +623,7 @@ int main(int argc, char **argv) {
SUITE_ADD_TEST(suite, testExpandCharRanges);
SUITE_ADD_TEST(suite, testNoCase);
SUITE_ADD_TEST(suite, testExpandNoCase);
+ SUITE_ADD_TEST(suite, testNoCaseComplement);
CuSuiteRun(suite);
CuSuiteSummary(suite, &output);
diff --git a/tests/modules/pass_nocase.aug b/tests/modules/pass_nocase.aug
index 6d254a3..ef248f4 100644
--- a/tests/modules/pass_nocase.aug
+++ b/tests/modules/pass_nocase.aug
@@ -10,7 +10,7 @@ test lns1 get "KEY" = { "1" = "KEY" }
test lns1 get "KeY" = { "1" = "KeY" }
let lns2 =
- let re = /[a-z]/i - /Key/i in
+ let re = /[A-Za-z]+/ - /Key/i in
[ label "1" . store re ] | [ label "2" . store /Key/i ]
test lns2 get "Key" = { "2" = "Key" }