|
Jaroslav Škarvada |
6ccb10 |
From ebca24d6c9eb12f91eed3993de65945ee97dd467 Mon Sep 17 00:00:00 2001
|
|
Jaroslav Škarvada |
6ccb10 |
From: Paolo Bonzini <bonzini@gnu.org>
|
|
Jaroslav Škarvada |
6ccb10 |
Date: Tue, 4 May 2010 18:07:28 +0200
|
|
Jaroslav Škarvada |
6ccb10 |
Subject: [PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]]
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
There's no "multibyte pain" in these two classes, since POSIX
|
|
Jaroslav Škarvada |
6ccb10 |
and ISO C99 mandate their contents.
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
Time for "./grep -x '[[:digit:]]' /usr/share/dict/linux.words"
|
|
Jaroslav Škarvada |
6ccb10 |
Before: 1.5s, after: 0.07s. (sed manages only 0.5s).
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
* src/dfa.c (predicates): Declare struct dfa_ctype separately
|
|
Jaroslav Škarvada |
6ccb10 |
from definition. Add sb_only.
|
|
Jaroslav Škarvada |
6ccb10 |
(find_pred): Return const struct dfa_ctype *.
|
|
Jaroslav Škarvada |
6ccb10 |
(parse_bracket_exp): Return const struct dfa_ctype *. Do
|
|
Jaroslav Škarvada |
6ccb10 |
not fill MBCSET for sb_only character types.
|
|
Jaroslav Škarvada |
6ccb10 |
---
|
|
Jaroslav Škarvada |
6ccb10 |
src/dfa.c | 55 ++++++++++++++++++++++++++++---------------------------
|
|
Jaroslav Škarvada |
6ccb10 |
1 files changed, 28 insertions(+), 27 deletions(-)
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
diff --git a/src/dfa.c b/src/dfa.c
|
|
Jaroslav Škarvada |
6ccb10 |
index 4dd26c9..da5a306 100644
|
|
Jaroslav Škarvada |
6ccb10 |
--- a/src/dfa.c
|
|
Jaroslav Škarvada |
6ccb10 |
+++ b/src/dfa.c
|
|
Jaroslav Škarvada |
6ccb10 |
@@ -429,26 +429,29 @@ typedef int predicate (int);
|
|
Jaroslav Škarvada |
6ccb10 |
/* The following list maps the names of the Posix named character classes
|
|
Jaroslav Škarvada |
6ccb10 |
to predicate functions that determine whether a given character is in
|
|
Jaroslav Škarvada |
6ccb10 |
the class. The leading [ has already been eaten by the lexical analyzer. */
|
|
Jaroslav Škarvada |
6ccb10 |
-static struct {
|
|
Jaroslav Škarvada |
6ccb10 |
+struct dfa_ctype {
|
|
Jaroslav Škarvada |
6ccb10 |
const char *name;
|
|
Jaroslav Škarvada |
6ccb10 |
- predicate *pred;
|
|
Jaroslav Škarvada |
6ccb10 |
-} const prednames[] = {
|
|
Jaroslav Škarvada |
6ccb10 |
- { "alpha", is_alpha },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "upper", is_upper },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "lower", is_lower },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "digit", is_digit },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "xdigit", is_xdigit },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "space", is_space },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "punct", is_punct },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "alnum", is_alnum },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "print", is_print },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "graph", is_graph },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "cntrl", is_cntrl },
|
|
Jaroslav Škarvada |
6ccb10 |
- { "blank", is_blank },
|
|
Jaroslav Škarvada |
6ccb10 |
- { NULL, NULL }
|
|
Jaroslav Škarvada |
6ccb10 |
+ predicate *func;
|
|
Jaroslav Škarvada |
6ccb10 |
+ bool sb_only;
|
|
Jaroslav Škarvada |
6ccb10 |
};
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
-static predicate *
|
|
Jaroslav Škarvada |
6ccb10 |
+static const struct dfa_ctype prednames[] = {
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "alpha", isalpha, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "upper", isupper, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "lower", islower, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "digit", isdigit, true },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "xdigit", isxdigit, true },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "space", isspace, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "punct", ispunct, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "alnum", isalnum, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "print", isprint, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "graph", isgraph, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "cntrl", iscntrl, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { "blank", isblank, false },
|
|
Jaroslav Škarvada |
6ccb10 |
+ { NULL, NULL, false }
|
|
Jaroslav Škarvada |
6ccb10 |
+};
|
|
Jaroslav Škarvada |
6ccb10 |
+
|
|
Jaroslav Škarvada |
6ccb10 |
+static const struct dfa_ctype *
|
|
Jaroslav Škarvada |
6ccb10 |
find_pred (const char *str)
|
|
Jaroslav Škarvada |
6ccb10 |
{
|
|
Jaroslav Škarvada |
6ccb10 |
unsigned int i;
|
|
Jaroslav Škarvada |
6ccb10 |
@@ -456,7 +459,7 @@ find_pred (const char *str)
|
|
Jaroslav Škarvada |
6ccb10 |
if (!strcmp(str, prednames[i].name))
|
|
Jaroslav Škarvada |
6ccb10 |
break;
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
- return prednames[i].pred;
|
|
Jaroslav Škarvada |
6ccb10 |
+ return &prednames[i];
|
|
Jaroslav Škarvada |
6ccb10 |
}
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
/* Multibyte character handling sub-routine for lex.
|
|
Jaroslav Škarvada |
6ccb10 |
@@ -553,8 +556,11 @@ parse_bracket_exp (void)
|
|
Jaroslav Škarvada |
6ccb10 |
|| !strcmp (str, "lower"))
|
|
Jaroslav Škarvada |
6ccb10 |
? "alpha"
|
|
Jaroslav Škarvada |
6ccb10 |
: str);
|
|
Jaroslav Škarvada |
6ccb10 |
+ const struct dfa_ctype *pred = find_pred (class);
|
|
Jaroslav Škarvada |
6ccb10 |
+ if (!pred)
|
|
Jaroslav Škarvada |
6ccb10 |
+ dfaerror(_("invalid character class"));
|
|
Jaroslav Škarvada |
6ccb10 |
#ifdef MBS_SUPPORT
|
|
Jaroslav Škarvada |
6ccb10 |
- if (MB_CUR_MAX > 1)
|
|
Jaroslav Škarvada |
6ccb10 |
+ if (MB_CUR_MAX > 1 && !pred->sb_only)
|
|
Jaroslav Škarvada |
6ccb10 |
{
|
|
Jaroslav Škarvada |
6ccb10 |
/* Store the character class as wctype_t. */
|
|
Jaroslav Škarvada |
6ccb10 |
wctype_t wt = wctype (class);
|
|
Jaroslav Škarvada |
6ccb10 |
@@ -568,14 +574,9 @@ parse_bracket_exp (void)
|
|
Jaroslav Škarvada |
6ccb10 |
}
|
|
Jaroslav Škarvada |
6ccb10 |
#endif
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
- {
|
|
Jaroslav Škarvada |
6ccb10 |
- predicate *pred = find_pred (class);
|
|
Jaroslav Škarvada |
6ccb10 |
- if (!pred)
|
|
Jaroslav Škarvada |
6ccb10 |
- dfaerror(_("invalid character class"));
|
|
Jaroslav Škarvada |
6ccb10 |
- for (c2 = 0; c2 < NOTCHAR; ++c2)
|
|
Jaroslav Škarvada |
6ccb10 |
- if ((*pred)(c2))
|
|
Jaroslav Škarvada |
6ccb10 |
- setbit_case_fold (c2, ccl);
|
|
Jaroslav Škarvada |
6ccb10 |
- }
|
|
Jaroslav Škarvada |
6ccb10 |
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
|
|
Jaroslav Škarvada |
6ccb10 |
+ if (pred->func(c2))
|
|
Jaroslav Škarvada |
6ccb10 |
+ setbit_case_fold (c2, ccl);
|
|
Jaroslav Škarvada |
6ccb10 |
}
|
|
Jaroslav Škarvada |
6ccb10 |
|
|
Jaroslav Škarvada |
6ccb10 |
#ifdef MBS_SUPPORT
|
|
Jaroslav Škarvada |
6ccb10 |
--
|
|
Jaroslav Škarvada |
6ccb10 |
1.6.6.1
|
|
Jaroslav Škarvada |
6ccb10 |
|