|
Matthew Booth |
00c1a6 |
commit f591ddde8913633972409b9ebb3967738007730e
|
|
Matthew Booth |
00c1a6 |
Author: David Lutterkort <lutter@redhat.com>
|
|
Matthew Booth |
00c1a6 |
Date: Sun Nov 13 19:38:39 2011 -0800
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
* src/fa.c (totalize): handle case-insensitive FA's properly
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
The convention for case-insensitive FA's is that they do not contain any
|
|
Matthew Booth |
00c1a6 |
transitions on [A-Z], effectively removing upper case letters from the
|
|
Matthew Booth |
00c1a6 |
alphabet.
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
totalize used to create transitions into the crash state that did
|
|
Matthew Booth |
00c1a6 |
transition on upper case letters, violating the convention.
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
diff --git a/src/fa.c b/src/fa.c
|
|
Matthew Booth |
00c1a6 |
index ecfe8f4..40194e3 100644
|
|
Matthew Booth |
00c1a6 |
--- a/src/fa.c
|
|
Matthew Booth |
00c1a6 |
+++ b/src/fa.c
|
|
Matthew Booth |
00c1a6 |
@@ -60,6 +60,10 @@ int fa_minimization_algorithm = FA_MIN_HOPCROFT;
|
|
Matthew Booth |
00c1a6 |
* fa_as_regexp, we store regexps on transitions in the re field of each
|
|
Matthew Booth |
00c1a6 |
* transition. TRANS_RE indicates that we do that, and is used by fa_dot to
|
|
Matthew Booth |
00c1a6 |
* produce proper graphs of an automaton transitioning on regexps.
|
|
Matthew Booth |
00c1a6 |
+ *
|
|
Matthew Booth |
00c1a6 |
+ * For case-insensitive regexps (nocase == 1), the FA never has transitions
|
|
Matthew Booth |
00c1a6 |
+ * on uppercase letters [A-Z], effectively removing these letters from the
|
|
Matthew Booth |
00c1a6 |
+ * alphabet.
|
|
Matthew Booth |
00c1a6 |
*/
|
|
Matthew Booth |
00c1a6 |
struct fa {
|
|
Matthew Booth |
00c1a6 |
struct state *initial;
|
|
Matthew Booth |
00c1a6 |
@@ -2344,6 +2348,34 @@ int fa_contains(struct fa *fa1, struct fa *fa2) {
|
|
Matthew Booth |
00c1a6 |
goto done;
|
|
Matthew Booth |
00c1a6 |
}
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
+static int add_crash_trans(struct fa *fa, struct state *s, struct state *crash,
|
|
Matthew Booth |
00c1a6 |
+ int min, int max) {
|
|
Matthew Booth |
00c1a6 |
+ int result;
|
|
Matthew Booth |
00c1a6 |
+
|
|
Matthew Booth |
00c1a6 |
+ if (fa->nocase) {
|
|
Matthew Booth |
00c1a6 |
+ /* Never transition on anything in [A-Z] */
|
|
Matthew Booth |
00c1a6 |
+ if (min > 'Z' || max < 'A') {
|
|
Matthew Booth |
00c1a6 |
+ result = add_new_trans(s, crash, min, max);
|
|
Matthew Booth |
00c1a6 |
+ } else if (min >= 'A' && max <= 'Z') {
|
|
Matthew Booth |
00c1a6 |
+ result = 0;
|
|
Matthew Booth |
00c1a6 |
+ } else if (max <= 'Z') {
|
|
Matthew Booth |
00c1a6 |
+ /* min < 'A' */
|
|
Matthew Booth |
00c1a6 |
+ result = add_new_trans(s, crash, min, 'A' - 1);
|
|
Matthew Booth |
00c1a6 |
+ } else if (min >= 'A') {
|
|
Matthew Booth |
00c1a6 |
+ /* max > 'Z' */
|
|
Matthew Booth |
00c1a6 |
+ result = add_new_trans(s, crash, 'Z' + 1, max);
|
|
Matthew Booth |
00c1a6 |
+ } else {
|
|
Matthew Booth |
00c1a6 |
+ /* min < 'A' && max > 'Z' */
|
|
Matthew Booth |
00c1a6 |
+ result = add_new_trans(s, crash, min, 'A' - 1);
|
|
Matthew Booth |
00c1a6 |
+ if (result == 0)
|
|
Matthew Booth |
00c1a6 |
+ result = add_new_trans(s, crash, 'Z' + 1, max);
|
|
Matthew Booth |
00c1a6 |
+ }
|
|
Matthew Booth |
00c1a6 |
+ } else {
|
|
Matthew Booth |
00c1a6 |
+ result = add_new_trans(s, crash, min, max);
|
|
Matthew Booth |
00c1a6 |
+ }
|
|
Matthew Booth |
00c1a6 |
+ return result;
|
|
Matthew Booth |
00c1a6 |
+}
|
|
Matthew Booth |
00c1a6 |
+
|
|
Matthew Booth |
00c1a6 |
static int totalize(struct fa *fa) {
|
|
Matthew Booth |
00c1a6 |
int r;
|
|
Matthew Booth |
00c1a6 |
struct state *crash = add_state(fa, 0);
|
|
Matthew Booth |
00c1a6 |
@@ -2352,42 +2384,25 @@ static int totalize(struct fa *fa) {
|
|
Matthew Booth |
00c1a6 |
F(mark_reachable(fa));
|
|
Matthew Booth |
00c1a6 |
sort_transition_intervals(fa);
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
- if (fa->nocase) {
|
|
Matthew Booth |
00c1a6 |
- r = add_new_trans(crash, crash, UCHAR_MIN, 'A' - 1);
|
|
Matthew Booth |
00c1a6 |
- if (r < 0)
|
|
Matthew Booth |
00c1a6 |
- return -1;
|
|
Matthew Booth |
00c1a6 |
- r = add_new_trans(crash, crash, 'Z' + 1, UCHAR_MAX);
|
|
Matthew Booth |
00c1a6 |
- if (r < 0)
|
|
Matthew Booth |
00c1a6 |
- return -1;
|
|
Matthew Booth |
00c1a6 |
- } else {
|
|
Matthew Booth |
00c1a6 |
- r = add_new_trans(crash, crash, UCHAR_MIN, UCHAR_MAX);
|
|
Matthew Booth |
00c1a6 |
- if (r < 0)
|
|
Matthew Booth |
00c1a6 |
- return -1;
|
|
Matthew Booth |
00c1a6 |
- }
|
|
Matthew Booth |
00c1a6 |
+ r = add_crash_trans(fa, crash, crash, UCHAR_MIN, UCHAR_MAX);
|
|
Matthew Booth |
00c1a6 |
+ if (r < 0)
|
|
Matthew Booth |
00c1a6 |
+ return -1;
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
list_for_each(s, fa->initial) {
|
|
Matthew Booth |
00c1a6 |
int next = UCHAR_MIN;
|
|
Matthew Booth |
00c1a6 |
int tused = s->tused;
|
|
Matthew Booth |
00c1a6 |
for (int i=0; i < tused; i++) {
|
|
Matthew Booth |
00c1a6 |
uchar min = s->trans[i].min, max = s->trans[i].max;
|
|
Matthew Booth |
00c1a6 |
- if (fa->nocase) {
|
|
Matthew Booth |
00c1a6 |
- /* Don't add transitions on [A-Z] into crash */
|
|
Matthew Booth |
00c1a6 |
- if (isupper(min)) min = 'A';
|
|
Matthew Booth |
00c1a6 |
- if (isupper(max)) max = 'Z';
|
|
Matthew Booth |
00c1a6 |
- }
|
|
Matthew Booth |
00c1a6 |
if (min > next) {
|
|
Matthew Booth |
00c1a6 |
- r = add_new_trans(s, crash, next, min - 1);
|
|
Matthew Booth |
00c1a6 |
+ r = add_crash_trans(fa, s, crash, next, min - 1);
|
|
Matthew Booth |
00c1a6 |
if (r < 0)
|
|
Matthew Booth |
00c1a6 |
return -1;
|
|
Matthew Booth |
00c1a6 |
}
|
|
Matthew Booth |
00c1a6 |
- if (max + 1 > next) {
|
|
Matthew Booth |
00c1a6 |
+ if (max + 1 > next)
|
|
Matthew Booth |
00c1a6 |
next = max + 1;
|
|
Matthew Booth |
00c1a6 |
- if (fa->nocase && isupper(next))
|
|
Matthew Booth |
00c1a6 |
- next = 'Z' + 1;
|
|
Matthew Booth |
00c1a6 |
- }
|
|
Matthew Booth |
00c1a6 |
}
|
|
Matthew Booth |
00c1a6 |
if (next <= UCHAR_MAX) {
|
|
Matthew Booth |
00c1a6 |
- r = add_new_trans(s, crash, next, UCHAR_MAX);
|
|
Matthew Booth |
00c1a6 |
+ r = add_crash_trans(fa, s, crash, next, UCHAR_MAX);
|
|
Matthew Booth |
00c1a6 |
if (r < 0)
|
|
Matthew Booth |
00c1a6 |
return -1;
|
|
Matthew Booth |
00c1a6 |
}
|
|
Matthew Booth |
00c1a6 |
@@ -3019,6 +3034,10 @@ int fa_nocase(struct fa *fa) {
|
|
Matthew Booth |
00c1a6 |
/* t->min < 'A' */
|
|
Matthew Booth |
00c1a6 |
t->max = 'A' - 1;
|
|
Matthew Booth |
00c1a6 |
F(add_new_trans(s, t->to, lc_min, lc_max));
|
|
Matthew Booth |
00c1a6 |
+ } else if (t->min >= 'A') {
|
|
Matthew Booth |
00c1a6 |
+ /* t->max > 'Z' */
|
|
Matthew Booth |
00c1a6 |
+ t->min = 'Z' + 1;
|
|
Matthew Booth |
00c1a6 |
+ F(add_new_trans(s, t->to, lc_min, lc_max));
|
|
Matthew Booth |
00c1a6 |
} else {
|
|
Matthew Booth |
00c1a6 |
/* t->min < 'A' && t->max > 'Z' */
|
|
Matthew Booth |
00c1a6 |
F(add_new_trans(s, t->to, 'Z' + 1, t->max));
|
|
Matthew Booth |
00c1a6 |
diff --git a/tests/fatest.c b/tests/fatest.c
|
|
Matthew Booth |
00c1a6 |
index be4460b..e3658ab 100644
|
|
Matthew Booth |
00c1a6 |
--- a/tests/fatest.c
|
|
Matthew Booth |
00c1a6 |
+++ b/tests/fatest.c
|
|
Matthew Booth |
00c1a6 |
@@ -581,6 +581,24 @@ static void testExpandNoCase(CuTest *tc) {
|
|
Matthew Booth |
00c1a6 |
free(s);
|
|
Matthew Booth |
00c1a6 |
}
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
+static void testNoCaseComplement(CuTest *tc) {
|
|
Matthew Booth |
00c1a6 |
+ const char *key_s = "keY";
|
|
Matthew Booth |
00c1a6 |
+ struct fa *key = make_good_fa(tc, key_s);
|
|
Matthew Booth |
00c1a6 |
+ struct fa *isect = NULL;
|
|
Matthew Booth |
00c1a6 |
+
|
|
Matthew Booth |
00c1a6 |
+ fa_nocase(key);
|
|
Matthew Booth |
00c1a6 |
+
|
|
Matthew Booth |
00c1a6 |
+ struct fa *comp = mark(fa_complement(key));
|
|
Matthew Booth |
00c1a6 |
+
|
|
Matthew Booth |
00c1a6 |
+ key = make_good_fa(tc, key_s);
|
|
Matthew Booth |
00c1a6 |
+
|
|
Matthew Booth |
00c1a6 |
+ /* We used to have a bug in totalize that caused the intersection
|
|
Matthew Booth |
00c1a6 |
+ * to contain "keY" */
|
|
Matthew Booth |
00c1a6 |
+ isect = fa_intersect(key, comp);
|
|
Matthew Booth |
00c1a6 |
+
|
|
Matthew Booth |
00c1a6 |
+ CuAssertIntEquals(tc, 1, fa_is_basic(isect, FA_EMPTY));
|
|
Matthew Booth |
00c1a6 |
+}
|
|
Matthew Booth |
00c1a6 |
+
|
|
Matthew Booth |
00c1a6 |
int main(int argc, char **argv) {
|
|
Matthew Booth |
00c1a6 |
if (argc == 1) {
|
|
Matthew Booth |
00c1a6 |
char *output = NULL;
|
|
Matthew Booth |
00c1a6 |
@@ -605,6 +623,7 @@ int main(int argc, char **argv) {
|
|
Matthew Booth |
00c1a6 |
SUITE_ADD_TEST(suite, testExpandCharRanges);
|
|
Matthew Booth |
00c1a6 |
SUITE_ADD_TEST(suite, testNoCase);
|
|
Matthew Booth |
00c1a6 |
SUITE_ADD_TEST(suite, testExpandNoCase);
|
|
Matthew Booth |
00c1a6 |
+ SUITE_ADD_TEST(suite, testNoCaseComplement);
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
CuSuiteRun(suite);
|
|
Matthew Booth |
00c1a6 |
CuSuiteSummary(suite, &output);
|
|
Matthew Booth |
00c1a6 |
diff --git a/tests/modules/pass_nocase.aug b/tests/modules/pass_nocase.aug
|
|
Matthew Booth |
00c1a6 |
index 6d254a3..ef248f4 100644
|
|
Matthew Booth |
00c1a6 |
--- a/tests/modules/pass_nocase.aug
|
|
Matthew Booth |
00c1a6 |
+++ b/tests/modules/pass_nocase.aug
|
|
Matthew Booth |
00c1a6 |
@@ -10,7 +10,7 @@ test lns1 get "KEY" = { "1" = "KEY" }
|
|
Matthew Booth |
00c1a6 |
test lns1 get "KeY" = { "1" = "KeY" }
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
let lns2 =
|
|
Matthew Booth |
00c1a6 |
- let re = /[a-z]/i - /Key/i in
|
|
Matthew Booth |
00c1a6 |
+ let re = /[A-Za-z]+/ - /Key/i in
|
|
Matthew Booth |
00c1a6 |
[ label "1" . store re ] | [ label "2" . store /Key/i ]
|
|
Matthew Booth |
00c1a6 |
|
|
Matthew Booth |
00c1a6 |
test lns2 get "Key" = { "2" = "Key" }
|