Blob Blame History Raw
commit f591ddde8913633972409b9ebb3967738007730e
Author: David Lutterkort <lutter@redhat.com>
Date:   Sun Nov 13 19:38:39 2011 -0800

    * src/fa.c (totalize): handle case-insensitive FA's properly
    
    The convention for case-insensitive FA's is that they do not contain any
    transitions on [A-Z], effectively removing upper case letters from the
    alphabet.
    
    totalize used to create transitions into the crash state that did
    transition on upper case letters, violating the convention.

diff --git a/src/fa.c b/src/fa.c
index ecfe8f4..40194e3 100644
--- a/src/fa.c
+++ b/src/fa.c
@@ -60,6 +60,10 @@ int fa_minimization_algorithm = FA_MIN_HOPCROFT;
  * fa_as_regexp, we store regexps on transitions in the re field of each
  * transition. TRANS_RE indicates that we do that, and is used by fa_dot to
  * produce proper graphs of an automaton transitioning on regexps.
+ *
+ * For case-insensitive regexps (nocase == 1), the FA never has transitions
+ * on uppercase letters [A-Z], effectively removing these letters from the
+ * alphabet.
  */
 struct fa {
     struct state *initial;
@@ -2344,6 +2348,34 @@ int fa_contains(struct fa *fa1, struct fa *fa2) {
     goto done;
 }
 
+static int add_crash_trans(struct fa *fa, struct state *s, struct state *crash,
+                           int min, int max) {
+    int result;
+
+    if (fa->nocase) {
+        /* Never transition on anything in [A-Z] */
+        if (min > 'Z' || max < 'A') {
+            result = add_new_trans(s, crash, min, max);
+        } else if (min >= 'A' && max <= 'Z') {
+            result = 0;
+        } else if (max <= 'Z') {
+            /* min < 'A' */
+            result = add_new_trans(s, crash, min, 'A' - 1);
+        } else if (min >= 'A') {
+            /* max > 'Z' */
+            result = add_new_trans(s, crash, 'Z' + 1, max);
+        } else {
+            /* min < 'A' && max > 'Z' */
+            result = add_new_trans(s, crash, min, 'A' - 1);
+            if (result == 0)
+                result = add_new_trans(s, crash, 'Z' + 1, max);
+        }
+    } else {
+        result = add_new_trans(s, crash, min, max);
+    }
+    return result;
+}
+
 static int totalize(struct fa *fa) {
     int r;
     struct state *crash = add_state(fa, 0);
@@ -2352,42 +2384,25 @@ static int totalize(struct fa *fa) {
     F(mark_reachable(fa));
     sort_transition_intervals(fa);
 
-    if (fa->nocase) {
-        r = add_new_trans(crash, crash, UCHAR_MIN, 'A' - 1);
-        if (r < 0)
-            return -1;
-        r = add_new_trans(crash, crash, 'Z' + 1, UCHAR_MAX);
-        if (r < 0)
-            return -1;
-    } else {
-        r = add_new_trans(crash, crash, UCHAR_MIN, UCHAR_MAX);
-        if (r < 0)
-            return -1;
-    }
+    r = add_crash_trans(fa, crash, crash, UCHAR_MIN, UCHAR_MAX);
+    if (r < 0)
+        return -1;
 
     list_for_each(s, fa->initial) {
         int next = UCHAR_MIN;
         int tused = s->tused;
         for (int i=0; i < tused; i++) {
             uchar min = s->trans[i].min, max = s->trans[i].max;
-            if (fa->nocase) {
-                /* Don't add transitions on [A-Z] into crash */
-                if (isupper(min)) min = 'A';
-                if (isupper(max)) max = 'Z';
-            }
             if (min > next) {
-                r = add_new_trans(s, crash, next, min - 1);
+                r = add_crash_trans(fa, s, crash, next, min - 1);
                 if (r < 0)
                     return -1;
             }
-            if (max + 1 > next) {
+            if (max + 1 > next)
                 next = max + 1;
-                if (fa->nocase && isupper(next))
-                    next = 'Z' + 1;
-            }
         }
         if (next <= UCHAR_MAX) {
-            r = add_new_trans(s, crash, next, UCHAR_MAX);
+            r = add_crash_trans(fa, s, crash, next, UCHAR_MAX);
             if (r < 0)
                 return -1;
         }
@@ -3019,6 +3034,10 @@ int fa_nocase(struct fa *fa) {
                 /* t->min < 'A' */
                 t->max = 'A' - 1;
                 F(add_new_trans(s, t->to, lc_min, lc_max));
+            } else if (t->min >= 'A') {
+                /* t->max > 'Z' */
+                t->min = 'Z' + 1;
+                F(add_new_trans(s, t->to, lc_min, lc_max));
             } else {
                 /* t->min < 'A' && t->max > 'Z' */
                 F(add_new_trans(s, t->to, 'Z' + 1, t->max));
diff --git a/tests/fatest.c b/tests/fatest.c
index be4460b..e3658ab 100644
--- a/tests/fatest.c
+++ b/tests/fatest.c
@@ -581,6 +581,24 @@ static void testExpandNoCase(CuTest *tc) {
     free(s);
 }
 
+static void testNoCaseComplement(CuTest *tc) {
+    const char *key_s = "keY";
+    struct fa *key = make_good_fa(tc, key_s);
+    struct fa *isect = NULL;
+
+    fa_nocase(key);
+
+    struct fa *comp = mark(fa_complement(key));
+
+    key = make_good_fa(tc, key_s);
+
+    /* We used to have a bug in totalize that caused the intersection
+     * to contain "keY" */
+    isect = fa_intersect(key, comp);
+
+    CuAssertIntEquals(tc, 1, fa_is_basic(isect, FA_EMPTY));
+}
+
 int main(int argc, char **argv) {
     if (argc == 1) {
         char *output = NULL;
@@ -605,6 +623,7 @@ int main(int argc, char **argv) {
         SUITE_ADD_TEST(suite, testExpandCharRanges);
         SUITE_ADD_TEST(suite, testNoCase);
         SUITE_ADD_TEST(suite, testExpandNoCase);
+        SUITE_ADD_TEST(suite, testNoCaseComplement);
 
         CuSuiteRun(suite);
         CuSuiteSummary(suite, &output);
diff --git a/tests/modules/pass_nocase.aug b/tests/modules/pass_nocase.aug
index 6d254a3..ef248f4 100644
--- a/tests/modules/pass_nocase.aug
+++ b/tests/modules/pass_nocase.aug
@@ -10,7 +10,7 @@ test lns1 get "KEY" = { "1" = "KEY" }
 test lns1 get "KeY" = { "1" = "KeY" }
 
 let lns2 =
-  let re = /[a-z]/i - /Key/i in
+  let re = /[A-Za-z]+/ - /Key/i in
   [ label "1" . store re ] | [ label "2" . store /Key/i ]
 
 test lns2 get "Key" = { "2" = "Key" }