Jaroslav Škarvada 6ccb10
From ebca24d6c9eb12f91eed3993de65945ee97dd467 Mon Sep 17 00:00:00 2001
Jaroslav Škarvada 6ccb10
From: Paolo Bonzini <bonzini@gnu.org>
Jaroslav Škarvada 6ccb10
Date: Tue, 4 May 2010 18:07:28 +0200
Jaroslav Škarvada 6ccb10
Subject: [PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]]
Jaroslav Škarvada 6ccb10
Jaroslav Škarvada 6ccb10
There's no "multibyte pain" in these two classes, since POSIX
Jaroslav Škarvada 6ccb10
and ISO C99 mandate their contents.
Jaroslav Škarvada 6ccb10
Jaroslav Škarvada 6ccb10
Time for "./grep -x '[[:digit:]]' /usr/share/dict/linux.words"
Jaroslav Škarvada 6ccb10
Before: 1.5s, after: 0.07s.  (sed manages only 0.5s).
Jaroslav Škarvada 6ccb10
Jaroslav Škarvada 6ccb10
* src/dfa.c (predicates): Declare struct dfa_ctype separately
Jaroslav Škarvada 6ccb10
from definition.  Add sb_only.
Jaroslav Škarvada 6ccb10
(find_pred): Return const struct dfa_ctype *.
Jaroslav Škarvada 6ccb10
(parse_bracket_exp): Return const struct dfa_ctype *.  Do
Jaroslav Škarvada 6ccb10
not fill MBCSET for sb_only character types.
Jaroslav Škarvada 6ccb10
---
Jaroslav Škarvada 6ccb10
 src/dfa.c |   55 ++++++++++++++++++++++++++++---------------------------
Jaroslav Škarvada 6ccb10
 1 files changed, 28 insertions(+), 27 deletions(-)
Jaroslav Škarvada 6ccb10
Jaroslav Škarvada 6ccb10
diff --git a/src/dfa.c b/src/dfa.c
Jaroslav Škarvada 6ccb10
index 4dd26c9..da5a306 100644
Jaroslav Škarvada 6ccb10
--- a/src/dfa.c
Jaroslav Škarvada 6ccb10
+++ b/src/dfa.c
Jaroslav Škarvada 6ccb10
@@ -429,26 +429,29 @@ typedef int predicate (int);
Jaroslav Škarvada 6ccb10
 /* The following list maps the names of the Posix named character classes
Jaroslav Škarvada 6ccb10
    to predicate functions that determine whether a given character is in
Jaroslav Škarvada 6ccb10
    the class.  The leading [ has already been eaten by the lexical analyzer. */
Jaroslav Škarvada 6ccb10
-static struct {
Jaroslav Škarvada 6ccb10
+struct dfa_ctype {
Jaroslav Škarvada 6ccb10
   const char *name;
Jaroslav Škarvada 6ccb10
-  predicate *pred;
Jaroslav Škarvada 6ccb10
-} const prednames[] = {
Jaroslav Škarvada 6ccb10
-  { "alpha", is_alpha },
Jaroslav Škarvada 6ccb10
-  { "upper", is_upper },
Jaroslav Škarvada 6ccb10
-  { "lower", is_lower },
Jaroslav Škarvada 6ccb10
-  { "digit", is_digit },
Jaroslav Škarvada 6ccb10
-  { "xdigit", is_xdigit },
Jaroslav Škarvada 6ccb10
-  { "space", is_space },
Jaroslav Škarvada 6ccb10
-  { "punct", is_punct },
Jaroslav Škarvada 6ccb10
-  { "alnum", is_alnum },
Jaroslav Škarvada 6ccb10
-  { "print", is_print },
Jaroslav Škarvada 6ccb10
-  { "graph", is_graph },
Jaroslav Škarvada 6ccb10
-  { "cntrl", is_cntrl },
Jaroslav Škarvada 6ccb10
-  { "blank", is_blank },
Jaroslav Škarvada 6ccb10
-  { NULL, NULL }
Jaroslav Škarvada 6ccb10
+  predicate *func;
Jaroslav Škarvada 6ccb10
+  bool sb_only;
Jaroslav Škarvada 6ccb10
 };
Jaroslav Škarvada 6ccb10
 
Jaroslav Škarvada 6ccb10
-static predicate *
Jaroslav Škarvada 6ccb10
+static const struct dfa_ctype prednames[] = {
Jaroslav Škarvada 6ccb10
+  { "alpha", isalpha, false },
Jaroslav Škarvada 6ccb10
+  { "upper", isupper, false },
Jaroslav Škarvada 6ccb10
+  { "lower", islower, false },
Jaroslav Škarvada 6ccb10
+  { "digit", isdigit, true },
Jaroslav Škarvada 6ccb10
+  { "xdigit", isxdigit, true },
Jaroslav Škarvada 6ccb10
+  { "space", isspace, false },
Jaroslav Škarvada 6ccb10
+  { "punct", ispunct, false },
Jaroslav Škarvada 6ccb10
+  { "alnum", isalnum, false },
Jaroslav Škarvada 6ccb10
+  { "print", isprint, false },
Jaroslav Škarvada 6ccb10
+  { "graph", isgraph, false },
Jaroslav Škarvada 6ccb10
+  { "cntrl", iscntrl, false },
Jaroslav Škarvada 6ccb10
+  { "blank", isblank, false },
Jaroslav Škarvada 6ccb10
+  { NULL, NULL, false }
Jaroslav Škarvada 6ccb10
+};
Jaroslav Škarvada 6ccb10
+
Jaroslav Škarvada 6ccb10
+static const struct dfa_ctype *
Jaroslav Škarvada 6ccb10
 find_pred (const char *str)
Jaroslav Škarvada 6ccb10
 {
Jaroslav Škarvada 6ccb10
   unsigned int i;
Jaroslav Škarvada 6ccb10
@@ -456,7 +459,7 @@ find_pred (const char *str)
Jaroslav Škarvada 6ccb10
     if (!strcmp(str, prednames[i].name))
Jaroslav Škarvada 6ccb10
       break;
Jaroslav Škarvada 6ccb10
 
Jaroslav Škarvada 6ccb10
-  return prednames[i].pred;
Jaroslav Škarvada 6ccb10
+  return &prednames[i];
Jaroslav Škarvada 6ccb10
 }
Jaroslav Škarvada 6ccb10
 
Jaroslav Škarvada 6ccb10
 /* Multibyte character handling sub-routine for lex.
Jaroslav Škarvada 6ccb10
@@ -553,8 +556,11 @@ parse_bracket_exp (void)
Jaroslav Škarvada 6ccb10
 				     || !strcmp (str, "lower"))
Jaroslav Škarvada 6ccb10
 				       ? "alpha"
Jaroslav Škarvada 6ccb10
 				       : str);
Jaroslav Škarvada 6ccb10
+                  const struct dfa_ctype *pred = find_pred (class);
Jaroslav Škarvada 6ccb10
+                  if (!pred)
Jaroslav Škarvada 6ccb10
+                    dfaerror(_("invalid character class"));
Jaroslav Škarvada 6ccb10
 #ifdef MBS_SUPPORT
Jaroslav Škarvada 6ccb10
-                  if (MB_CUR_MAX > 1)
Jaroslav Škarvada 6ccb10
+                  if (MB_CUR_MAX > 1 && !pred->sb_only)
Jaroslav Škarvada 6ccb10
                     {
Jaroslav Škarvada 6ccb10
 		      /* Store the character class as wctype_t.  */
Jaroslav Škarvada 6ccb10
                       wctype_t wt = wctype (class);
Jaroslav Škarvada 6ccb10
@@ -568,14 +574,9 @@ parse_bracket_exp (void)
Jaroslav Škarvada 6ccb10
                     }
Jaroslav Škarvada 6ccb10
 #endif
Jaroslav Škarvada 6ccb10
 
Jaroslav Škarvada 6ccb10
-                  {
Jaroslav Škarvada 6ccb10
-                    predicate *pred = find_pred (class);
Jaroslav Škarvada 6ccb10
-                    if (!pred)
Jaroslav Škarvada 6ccb10
-                      dfaerror(_("invalid character class"));
Jaroslav Škarvada 6ccb10
-                    for (c2 = 0; c2 < NOTCHAR; ++c2)
Jaroslav Škarvada 6ccb10
-                      if ((*pred)(c2))
Jaroslav Škarvada 6ccb10
-                        setbit_case_fold (c2, ccl);
Jaroslav Škarvada 6ccb10
-                  }
Jaroslav Škarvada 6ccb10
+                  for (c2 = 0; c2 < NOTCHAR; ++c2)
Jaroslav Škarvada 6ccb10
+                    if (pred->func(c2))
Jaroslav Škarvada 6ccb10
+                      setbit_case_fold (c2, ccl);
Jaroslav Škarvada 6ccb10
                 }
Jaroslav Škarvada 6ccb10
 
Jaroslav Škarvada 6ccb10
 #ifdef MBS_SUPPORT
Jaroslav Škarvada 6ccb10
-- 
Jaroslav Škarvada 6ccb10
1.6.6.1
Jaroslav Škarvada 6ccb10