From 712bd252946fdf32b3a208d0750af3f5bf445009 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Nov 23 2008 18:34:10 +0000 Subject: - Update to latest upstream version - Drop upstreamed patches - Add a couple of regression tests - Temporarily disable tests - Minor cleanup --- diff --git a/.cvsignore b/.cvsignore index 1ca4824..37ed20c 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1,2 +1 @@ -grep-2.5.1.tar.bz2 -grep-2.5.1a.tar.bz2 +grep-2.5.3.tar.bz2 diff --git a/grep-2.5-i18n.patch b/grep-2.5-i18n.patch deleted file mode 100644 index 8dc3dfe..0000000 --- a/grep-2.5-i18n.patch +++ /dev/null @@ -1,303 +0,0 @@ ---- grep-2.5.1/src/dfa.c 2004-02-26 13:09:54.000000000 +0000 -+++ grep-2.5.1/src/dfa.c 2004-05-18 16:43:31.189200479 +0100 -@@ -414,7 +414,7 @@ - - /* This function fetch a wide character, and update cur_mb_len, - used only if the current locale is a multibyte environment. */ --static wchar_t -+static wint_t - fetch_wc (char const *eoferr) - { - wchar_t wc; -@@ -423,7 +423,7 @@ - if (eoferr != 0) - dfaerror (eoferr); - else -- return -1; -+ return WEOF; - } - - cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs); -@@ -459,7 +459,7 @@ - static void - parse_bracket_exp_mb () - { -- wchar_t wc, wc1, wc2; -+ wint_t wc, wc1, wc2; - - /* Work area to build a mb_char_classes. */ - struct mb_char_classes *work_mbc; -@@ -496,7 +496,7 @@ - work_mbc->invert = 0; - do - { -- wc1 = -1; /* mark wc1 is not initialized". */ -+ wc1 = WEOF; /* mark wc1 is not initialized". */ - - /* Note that if we're looking at some other [:...:] construct, - we just treat it as a bunch of ordinary characters. We can do -@@ -586,7 +586,7 @@ - work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem; - } - } -- wc1 = wc = -1; -+ wc1 = wc = WEOF; - } - else - /* We treat '[' as a normal character here. */ -@@ -600,7 +600,7 @@ - wc = fetch_wc(("Unbalanced [")); - } - -- if (wc1 == -1) -+ if (wc1 == WEOF) - wc1 = fetch_wc(_("Unbalanced [")); - - if (wc1 == L'-') -@@ -630,17 +630,17 @@ - } - REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, - range_sts_al, work_mbc->nranges + 1); -- work_mbc->range_sts[work_mbc->nranges] = wc; -+ work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc; - REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, - range_ends_al, work_mbc->nranges + 1); -- work_mbc->range_ends[work_mbc->nranges++] = wc2; -+ work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2; - } -- else if (wc != -1) -+ else if (wc != WEOF) - /* build normal characters. */ - { - REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, - work_mbc->nchars + 1); -- work_mbc->chars[work_mbc->nchars++] = wc; -+ work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; - } - } - while ((wc = wc1) != L']'); -@@ -2552,6 +2552,8 @@ - } - - /* match with a character? */ -+ if (case_fold) -+ wc = towlower (wc); - for (i = 0; inchars; i++) - { - if (wc == work_mbc->chars[i]) ---- grep-2.5.1/src/grep.c.i18n 2002-03-26 15:54:12.000000000 +0000 -+++ grep-2.5.1/src/grep.c 2004-02-26 13:09:54.000000000 +0000 -@@ -30,6 +30,12 @@ - # include - # include - #endif -+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC -+/* We can handle multibyte string. */ -+# define MBS_SUPPORT -+# include -+# include -+#endif - #include - #include "system.h" - #include "getopt.h" -@@ -1697,6 +1703,37 @@ - if (!install_matcher (matcher) && !install_matcher ("default")) - abort (); - -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX != 1 && match_icase) -+ { -+ wchar_t wc; -+ mbstate_t cur_state, prev_state; -+ int i, len = strlen(keys); -+ -+ memset(&cur_state, 0, sizeof(mbstate_t)); -+ for (i = 0; i <= len ;) -+ { -+ size_t mbclen; -+ mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state); -+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -+ { -+ /* An invalid sequence, or a truncated multibyte character. -+ We treat it as a singlebyte character. */ -+ mbclen = 1; -+ } -+ else -+ { -+ if (iswupper((wint_t)wc)) -+ { -+ wc = towlower((wint_t)wc); -+ wcrtomb(keys + i, wc, &cur_state); -+ } -+ } -+ i += mbclen; -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ - (*compile)(keys, keycc); - - if ((argc - optind > 1 && !no_filenames) || with_filenames) ---- grep-2.5.1/src/search.c.i18n 2004-02-26 13:09:54.000000000 +0000 -+++ grep-2.5.1/src/search.c 2004-02-26 13:17:12.000000000 +0000 -@@ -149,15 +149,16 @@ - static char* - check_multibyte_string(char const *buf, size_t size) - { -- char *mb_properties = malloc(size); -+ char *mb_properties = xmalloc(size); - mbstate_t cur_state; -+ wchar_t wc; - int i; - memset(&cur_state, 0, sizeof(mbstate_t)); - memset(mb_properties, 0, sizeof(char)*size); - for (i = 0; i < size ;) - { - size_t mbclen; -- mbclen = mbrlen(buf + i, size - i, &cur_state); -+ mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); - - if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) - { -@@ -165,6 +166,14 @@ - We treat it as a singlebyte character. */ - mbclen = 1; - } -+ else if (match_icase) -+ { -+ if (iswupper((wint_t)wc)) -+ { -+ wc = towlower((wint_t)wc); -+ wcrtomb(buf + i, wc, &cur_state); -+ } -+ } - mb_properties[i] = mbclen; - i += mbclen; - } -@@ -233,7 +242,7 @@ - static char const line_end[] = "\\)$"; - static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\("; - static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)"; -- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); -+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); - size_t i; - strcpy (n, match_lines ? line_beg : word_beg); - i = strlen (n); -@@ -316,7 +325,7 @@ - static char const line_end[] = ")$"; - static char const word_beg[] = "(^|[^[:alnum:]_])("; - static char const word_end[] = ")([^[:alnum:]_]|$)"; -- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); -+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); - size_t i; - strcpy (n, match_lines ? line_beg : word_beg); - i = strlen(n); -@@ -339,14 +348,20 @@ - char eol = eolbyte; - int backref, start, len; - struct kwsmatch kwsm; -- size_t i; -+ size_t i, ret_val; - #ifdef MBS_SUPPORT - char *mb_properties = NULL; --#endif /* MBS_SUPPORT */ -- --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && kwset) -- mb_properties = check_multibyte_string(buf, size); -+ if (MB_CUR_MAX > 1) -+ { -+ if (match_icase) -+ { -+ char *case_buf = xmalloc(size); -+ memcpy(case_buf, buf, size); -+ buf = case_buf; -+ } -+ if (kwset) -+ mb_properties = check_multibyte_string(buf, size); -+ } - #endif /* MBS_SUPPORT */ - - buflim = buf + size; -@@ -455,8 +470,13 @@ - - failure: - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties) -- free (mb_properties); -+ if (MB_CUR_MAX > 1) -+ { -+ if (mb_properties) -+ free (mb_properties); -+ if (match_icase) -+ free ((char *) buf); -+ } - #endif /* MBS_SUPPORT */ - return (size_t) -1; - -@@ -467,8 +487,13 @@ - - success_in_start_and_len: - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties) -- free (mb_properties); -+ if (MB_CUR_MAX > 1) -+ { -+ if (mb_properties) -+ free (mb_properties); -+ if (match_icase) -+ free ((char *) buf); -+ } - #endif /* MBS_SUPPORT */ - *match_size = len; - return start; -@@ -504,10 +529,19 @@ - register size_t len; - char eol = eolbyte; - struct kwsmatch kwsmatch; -+ size_t ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties; -+ char *mb_properties = NULL; - if (MB_CUR_MAX > 1) -- mb_properties = check_multibyte_string (buf, size); -+ { -+ if (match_icase) -+ { -+ char *case_buf = xmalloc(size); -+ memcpy(case_buf, buf, size); -+ buf = case_buf; -+ } -+ mb_properties = check_multibyte_string(buf, size); -+ } - #endif /* MBS_SUPPORT */ - - for (beg = buf; beg <= buf + size; ++beg) -@@ -565,7 +599,12 @@ - failure: - #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) -- free (mb_properties); -+ { -+ if (match_icase) -+ free((char *) buf); -+ if (mb_properties) -+ free(mb_properties); -+ } - #endif /* MBS_SUPPORT */ - return -1; - -@@ -581,7 +620,12 @@ - *match_size = len; - #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) -- free (mb_properties); -+ { -+ if (mb_properties) -+ free (mb_properties); -+ if (match_icase) -+ free ((char *) buf); -+ } - #endif /* MBS_SUPPORT */ - return beg - buf; - } diff --git a/grep-2.5.1-bracket.patch b/grep-2.5.1-bracket.patch deleted file mode 100644 index f99571c..0000000 --- a/grep-2.5.1-bracket.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- grep-2.5.1/src/dfa.c.bracket 2003-10-30 16:21:14.000000000 +0000 -+++ grep-2.5.1/src/dfa.c 2003-10-30 16:22:38.000000000 +0000 -@@ -586,7 +586,7 @@ - work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem; - } - } -- wc = -1; -+ wc1 = wc = -1; - } - else - /* We treat '[' as a normal character here. */ diff --git a/grep-2.5.1-color.patch b/grep-2.5.1-color.patch deleted file mode 100644 index f54c258..0000000 --- a/grep-2.5.1-color.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- grep-2.5.1/src/grep.c.color 2004-11-16 16:46:22.845505847 +0000 -+++ grep-2.5.1/src/grep.c 2004-11-16 16:46:27.961530537 +0000 -@@ -607,6 +607,7 @@ - fputs ("\33[00m", stdout); - beg = b + match_size; - } -+ fputs ("\33[K", stdout); - } - fwrite (beg, 1, lim - beg, stdout); - if (ferror (stdout)) diff --git a/grep-2.5.1-dfa-optional.patch b/grep-2.5.1-dfa-optional.patch deleted file mode 100644 index 784eba9..0000000 --- a/grep-2.5.1-dfa-optional.patch +++ /dev/null @@ -1,67 +0,0 @@ ---- grep-2.5.1a/src/search.c.dfa-optional 2005-01-07 14:58:45.714869815 +0000 -+++ grep-2.5.1a/src/search.c 2005-01-07 14:58:45.725867716 +0000 -@@ -327,12 +327,34 @@ - int backref, start, len; - struct kwsmatch kwsm; - size_t i, ret_val; -+ static int use_dfa; -+ static int use_dfa_checked = 0; - #ifdef MBS_SUPPORT - int mb_cur_max = MB_CUR_MAX; - mbstate_t mbs; - memset (&mbs, '\0', sizeof (mbstate_t)); - #endif /* MBS_SUPPORT */ - -+ if (!use_dfa_checked) -+ { -+ char *grep_use_dfa = getenv ("GREP_USE_DFA"); -+ if (!grep_use_dfa) -+ { -+#ifdef MBS_SUPPORT -+ /* Turn off DFA when processing multibyte input. */ -+ use_dfa = (MB_CUR_MAX == 1); -+#else -+ use_dfa = 1; -+#endif /* MBS_SUPPORT */ -+ } -+ else -+ { -+ use_dfa = atoi (grep_use_dfa); -+ } -+ -+ use_dfa_checked = 1; -+ } -+ - buflim = buf + size; - - for (beg = end = buf; end < buflim; beg = end) -@@ -400,7 +422,8 @@ - #endif /* MBS_SUPPORT */ - (kwsm.index < kwset_exact_matches)) - goto success_in_beg_and_end; -- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) -+ if (use_dfa && -+ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; - } - else -@@ -409,7 +432,9 @@ - #ifdef MBS_SUPPORT - size_t bytes_left = 0; - #endif /* MBS_SUPPORT */ -- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); -+ size_t offset = 0; -+ if (use_dfa) -+ offset = dfaexec (&dfa, beg, buflim - beg, &backref); - if (offset == (size_t) -1) - break; - /* Narrow down to the line we've found. */ -@@ -451,7 +476,7 @@ - --beg; - } - /* Successful, no backreferences encountered! */ -- if (!backref) -+ if (use_dfa && !backref) - goto success_in_beg_and_end; - } - else diff --git a/grep-2.5.1-egf-speedup.patch b/grep-2.5.1-egf-speedup.patch deleted file mode 100644 index 08e92c7..0000000 --- a/grep-2.5.1-egf-speedup.patch +++ /dev/null @@ -1,823 +0,0 @@ ---- grep-2.5.1/src/search.c 2004-12-31 15:28:35.720391036 +0000 -+++ grep-2.5.1a/src/search.c 2005-01-07 14:53:10.308860193 +0000 -@@ -18,9 +18,13 @@ - - /* Written August 1992 by Mike Haertel. */ - -+#ifndef _GNU_SOURCE -+# define _GNU_SOURCE 1 -+#endif - #ifdef HAVE_CONFIG_H - # include - #endif -+#include - #include - #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC - /* We can handle multibyte string. */ -@@ -39,6 +43,9 @@ - #ifdef HAVE_LIBPCRE - # include - #endif -+#ifdef HAVE_LANGINFO_CODESET -+# include -+#endif - - #define NCHAR (UCHAR_MAX + 1) - -@@ -70,9 +77,10 @@ - call the regexp matcher at all. */ - static int kwset_exact_matches; - --#if defined(MBS_SUPPORT) --static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); --#endif -+/* UTF-8 encoding allows some optimizations that we can't otherwise -+ assume in a multibyte encoding. */ -+static int using_utf8; -+ - static void kwsinit PARAMS ((void)); - static void kwsmusts PARAMS ((void)); - static void Gcompile PARAMS ((char const *, size_t)); -@@ -84,6 +92,15 @@ - static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); - - void -+check_utf8 (void) -+{ -+#ifdef HAVE_LANGINFO_CODESET -+ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) -+ using_utf8 = 1; -+#endif -+} -+ -+void - dfaerror (char const *mesg) - { - error (2, 0, mesg); -@@ -141,47 +158,6 @@ - } - } - --#ifdef MBS_SUPPORT --/* This function allocate the array which correspond to "buf". -- Then this check multibyte string and mark on the positions which -- are not singlebyte character nor the first byte of a multibyte -- character. Caller must free the array. */ --static char* --check_multibyte_string(char const *buf, size_t size) --{ -- char *mb_properties = xmalloc(size); -- mbstate_t cur_state; -- wchar_t wc; -- int i; -- memset(&cur_state, 0, sizeof(mbstate_t)); -- memset(mb_properties, 0, sizeof(char)*size); -- for (i = 0; i < size ;) -- { -- size_t mbclen; -- mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); -- -- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -- { -- /* An invalid sequence, or a truncated multibyte character. -- We treat it as a singlebyte character. */ -- mbclen = 1; -- } -- else if (match_icase) -- { -- if (iswupper((wint_t)wc)) -- { -- wc = towlower((wint_t)wc); -- wcrtomb(buf + i, wc, &cur_state); -- } -- } -- mb_properties[i] = mbclen; -- i += mbclen; -- } -- -- return mb_properties; --} --#endif -- - static void - Gcompile (char const *pattern, size_t size) - { -@@ -190,6 +166,7 @@ - size_t total = size; - char const *motif = pattern; - -+ check_utf8 (); - re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); - -@@ -266,6 +243,7 @@ - size_t total = size; - char const *motif = pattern; - -+ check_utf8 (); - if (strcmp (matcher, "awk") == 0) - { - re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0)); -@@ -350,18 +328,9 @@ - struct kwsmatch kwsm; - size_t i, ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- { -- char *case_buf = xmalloc(size); -- memcpy(case_buf, buf, size); -- buf = case_buf; -- } -- if (kwset) -- mb_properties = check_multibyte_string(buf, size); -- } -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); - #endif /* MBS_SUPPORT */ - - buflim = buf + size; -@@ -373,21 +342,63 @@ - if (kwset) - { - /* Find a possible match using the KWset matcher. */ -- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ -+ size_t offset; -+#ifdef MBS_SUPPORT -+ /* kwsexec doesn't work with match_icase and multibyte input. */ -+ if (match_icase && mb_cur_max > 1) -+ /* Avoid kwset */ -+ offset = 0; -+ else -+#endif /* MBS_SUPPORT */ -+ offset = kwsexec (kwset, beg, buflim - beg, &kwsm); - if (offset == (size_t) -1) - goto failure; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - /* Narrow down to the line containing the candidate, and - run it through DFA. */ - end = memchr(beg, eol, buflim - beg); - end++; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) -+ if (mb_cur_max > 1 && bytes_left) - continue; --#endif -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; -- if (kwsm.index < kwset_exact_matches) -+ if ( -+#ifdef MBS_SUPPORT -+ !(match_icase && mb_cur_max > 1) && -+#endif /* MBS_SUPPORT */ -+ (kwsm.index < kwset_exact_matches)) - goto success_in_beg_and_end; - if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; -@@ -395,13 +406,47 @@ - else - { - /* No good fixed strings; start with DFA. */ -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ - size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); - if (offset == (size_t) -1) - break; - /* Narrow down to the line we've found. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - end = memchr (beg, eol, buflim - beg); - end++; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && bytes_left) -+ continue; -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; - } -@@ -469,15 +514,6 @@ - } /* for (beg = end ..) */ - - failure: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (mb_properties) -- free (mb_properties); -- if (match_icase) -- free ((char *) buf); -- } --#endif /* MBS_SUPPORT */ - return (size_t) -1; - - success_in_beg_and_end: -@@ -486,24 +522,144 @@ - /* FALLTHROUGH */ - - success_in_start_and_len: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (mb_properties) -- free (mb_properties); -- if (match_icase) -- free ((char *) buf); -- } --#endif /* MBS_SUPPORT */ - *match_size = len; - return start; - } - -+#ifdef MBS_SUPPORT -+static int f_i_multibyte; /* whether we're using the new -Fi MB method */ -+static struct -+{ -+ wchar_t **patterns; -+ size_t count, maxlen; -+ unsigned char *match; -+} Fimb; -+#endif -+ - static void - Fcompile (char const *pattern, size_t size) - { -+ int mb_cur_max = MB_CUR_MAX; - char const *beg, *lim, *err; - -+ check_utf8 (); -+#ifdef MBS_SUPPORT -+ /* Support -F -i for UTF-8 input. */ -+ if (match_icase && mb_cur_max > 1) -+ { -+ mbstate_t mbs; -+ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); -+ const char *patternend = pattern; -+ size_t wcsize; -+ kwset_t fimb_kwset = NULL; -+ char *starts = NULL; -+ wchar_t *wcbeg, *wclim; -+ size_t allocated = 0; -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+# ifdef __GNU_LIBRARY__ -+ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); -+ if (patternend != pattern + size) -+ wcsize = (size_t) -1; -+# else -+ { -+ char *patterncopy = xmalloc (size + 1); -+ -+ memcpy (patterncopy, pattern, size); -+ patterncopy[size] = '\0'; -+ patternend = patterncopy; -+ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); -+ if (patternend != patterncopy + size) -+ wcsize = (size_t) -1; -+ free (patterncopy); -+ } -+# endif -+ if (wcsize + 2 <= 2) -+ { -+fimb_fail: -+ free (wcpattern); -+ free (starts); -+ if (fimb_kwset) -+ kwsfree (fimb_kwset); -+ free (Fimb.patterns); -+ Fimb.patterns = NULL; -+ } -+ else -+ { -+ if (!(fimb_kwset = kwsalloc (NULL))) -+ error (2, 0, _("memory exhausted")); -+ -+ starts = xmalloc (mb_cur_max * 3); -+ wcbeg = wcpattern; -+ do -+ { -+ int i; -+ size_t wclen; -+ -+ if (Fimb.count >= allocated) -+ { -+ if (allocated == 0) -+ allocated = 128; -+ else -+ allocated *= 2; -+ Fimb.patterns = xrealloc (Fimb.patterns, -+ sizeof (wchar_t *) * allocated); -+ } -+ Fimb.patterns[Fimb.count++] = wcbeg; -+ for (wclim = wcbeg; -+ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) -+ *wclim = towlower (*wclim); -+ *wclim = L'\0'; -+ wclen = wclim - wcbeg; -+ if (wclen > Fimb.maxlen) -+ Fimb.maxlen = wclen; -+ if (wclen > 3) -+ wclen = 3; -+ if (wclen == 0) -+ { -+ if ((err = kwsincr (fimb_kwset, "", 0)) != 0) -+ error (2, 0, err); -+ } -+ else -+ for (i = 0; i < (1 << wclen); i++) -+ { -+ char *p = starts; -+ int j, k; -+ -+ for (j = 0; j < wclen; ++j) -+ { -+ wchar_t wc = wcbeg[j]; -+ if (i & (1 << j)) -+ { -+ wc = towupper (wc); -+ if (wc == wcbeg[j]) -+ continue; -+ } -+ k = wctomb (p, wc); -+ if (k <= 0) -+ goto fimb_fail; -+ p += k; -+ } -+ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) -+ error (2, 0, err); -+ } -+ if (wclim < wcpattern + wcsize) -+ ++wclim; -+ wcbeg = wclim; -+ } -+ while (wcbeg < wcpattern + wcsize); -+ f_i_multibyte = 1; -+ kwset = fimb_kwset; -+ free (starts); -+ Fimb.match = xmalloc (Fimb.count); -+ if ((err = kwsprep (kwset)) != 0) -+ error (2, 0, err); -+ return; -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ -+ - kwsinit (); - beg = pattern; - do -@@ -522,6 +678,76 @@ - error (2, 0, err); - } - -+#ifdef MBS_SUPPORT -+static int -+Fimbexec (const char *buf, size_t size, size_t *plen, int exact) -+{ -+ size_t len, letter, i; -+ int ret = -1; -+ mbstate_t mbs; -+ wchar_t wc; -+ int patterns_left; -+ -+ assert (match_icase && f_i_multibyte == 1); -+ assert (MB_CUR_MAX > 1); -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+ memset (Fimb.match, '\1', Fimb.count); -+ letter = len = 0; -+ patterns_left = 1; -+ while (patterns_left && len <= size) -+ { -+ size_t c; -+ -+ patterns_left = 0; -+ if (len < size) -+ { -+ c = mbrtowc (&wc, buf + len, size - len, &mbs); -+ if (c + 2 <= 2) -+ return ret; -+ -+ wc = towlower (wc); -+ } -+ else -+ { -+ c = 1; -+ wc = L'\0'; -+ } -+ -+ for (i = 0; i < Fimb.count; i++) -+ { -+ if (Fimb.match[i]) -+ { -+ if (Fimb.patterns[i][letter] == L'\0') -+ { -+ /* Found a match. */ -+ *plen = len; -+ if (!exact && !match_words) -+ return 0; -+ else -+ { -+ /* For -w or exact look for longest match. */ -+ ret = 0; -+ Fimb.match[i] = '\0'; -+ continue; -+ } -+ } -+ -+ if (Fimb.patterns[i][letter] == wc) -+ patterns_left = 1; -+ else -+ Fimb.match[i] = '\0'; -+ } -+ } -+ -+ len += c; -+ letter++; -+ } -+ -+ return ret; -+} -+#endif /* MBS_SUPPORT */ -+ - static size_t - Fexecute (char const *buf, size_t size, size_t *match_size, int exact) - { -@@ -531,80 +757,258 @@ - struct kwsmatch kwsmatch; - size_t ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- { -- char *case_buf = xmalloc(size); -- memcpy(case_buf, buf, size); -- buf = case_buf; -- } -- mb_properties = check_multibyte_string(buf, size); -- } -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ const char *last_char = NULL; - #endif /* MBS_SUPPORT */ - - for (beg = buf; beg <= buf + size; ++beg) - { -- size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); -+ size_t offset; -+ offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); -+ - if (offset == (size_t) -1) - goto failure; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) -- continue; /* It is a part of multibyte character. */ -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ continue; -+ } -+ else - #endif /* MBS_SUPPORT */ - beg += offset; -+#ifdef MBS_SUPPORT -+ /* For f_i_multibyte, the string at beg now matches first 3 chars of -+ one of the search strings (less if there are shorter search strings). -+ See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ - len = kwsmatch.size[0]; - if (exact && !match_words) - goto success_in_beg_and_len; - if (match_lines) - { - if (beg > buf && beg[-1] != eol) -- continue; -+ goto next_char; - if (beg + len < buf + size && beg[len] != eol) -- continue; -+ goto next_char; - goto success; - } - else if (match_words) -- for (try = beg; len; ) -- { -- if (try > buf && WCHAR((unsigned char) try[-1])) -- break; -- if (try + len < buf + size && WCHAR((unsigned char) try[len])) -- { -- offset = kwsexec (kwset, beg, --len, &kwsmatch); -- if (offset == (size_t) -1) -- { -+ { -+ while (len) -+ { -+ int word_match = 0; -+ if (beg > buf) -+ { - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free (mb_properties); -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ int mr; -+ wchar_t pwc; -+ -+ if (using_utf8) -+ { -+ s = beg - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ s = last_char; -+ mr = mbtowc (&pwc, s, beg - s); -+ if (mr <= 0) -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ else if ((iswalnum (pwc) || pwc == L'_') -+ && mr == (int) (beg - s)) -+ goto next_char; -+ } -+ else - #endif /* MBS_SUPPORT */ -- return offset; -- } -- try = beg + offset; -- len = kwsmatch.size[0]; -- } -- else -- goto success; -- } -+ if (WCHAR ((unsigned char) beg[-1])) -+ goto next_char; -+ } -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; -+ -+ mr = mbtowc (&nwc, beg + len, buf + size - beg - len); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ word_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ word_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) -+ word_match = 1; -+ if (word_match) -+ { -+ if (!exact) -+ /* Returns the whole line now we know there's a word match. */ -+ goto success; -+ else -+ /* Returns just this word match. */ -+ goto success_in_beg_and_len; -+ } -+ if (len > 0) -+ { -+ /* Try a shorter length anchored at the same place. */ -+ --len; -+ offset = kwsexec (kwset, beg, len, &kwsmatch); -+ -+ if (offset == -1) -+ goto next_char; /* Try a different anchor. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ { -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ } -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ goto next_char; /* Try a different anchor. */ -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ beg += offset; -+#ifdef MBS_SUPPORT -+ /* The string at beg now matches first 3 chars of one of -+ the search strings (less if there are shorter search -+ strings). See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, len - offset, &kwsmatch.size[0], -+ exact)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ -+ len = kwsmatch.size[0]; -+ } -+ } -+ } - else - goto success; -+next_char:; -+#ifdef MBS_SUPPORT -+ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled -+ by ++beg above. */ -+ if (mb_cur_max > 1) -+ { -+ if (using_utf8) -+ { -+ unsigned char c = *beg; -+ if (c >= 0xc2) -+ { -+ if (c < 0xe0) -+ ++beg; -+ else if (c < 0xf0) -+ beg += 2; -+ else if (c < 0xf8) -+ beg += 3; -+ else if (c < 0xfc) -+ beg += 4; -+ else if (c < 0xfe) -+ beg += 5; -+ } -+ } -+ else -+ { -+ size_t l = mbrlen (beg, buf + size - beg, &mbs); -+ -+ last_char = beg; -+ if (l + 2 >= 2) -+ beg += l - 1; -+ else -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ } -+ } -+#endif /* MBS_SUPPORT */ - } - - failure: -+ return -1; -+ -+ success: - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -+ if (mb_cur_max > 1 && !using_utf8) - { -- if (match_icase) -- free((char *) buf); -- if (mb_properties) -- free(mb_properties); -+ end = beg + len; -+ while (end < buf + size) -+ { -+ size_t mlen = mbrlen (end, buf + size - end, &mbs); -+ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ mlen = 1; -+ } -+ if (mlen == 1 && *end == eol) -+ break; -+ -+ end += mlen; -+ } - } -+ else - #endif /* MBS_SUPPORT */ -- return -1; -- -- success: - end = memchr (beg + len, eol, (buf + size) - (beg + len)); -+ - end++; - while (buf < beg && beg[-1] != eol) - --beg; -@@ -613,15 +1017,6 @@ - - success_in_beg_and_len: - *match_size = len; --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (mb_properties) -- free (mb_properties); -- if (match_icase) -- free ((char *) buf); -- } --#endif /* MBS_SUPPORT */ - return beg - buf; - } - diff --git a/grep-2.5.1-fgrep.patch b/grep-2.5.1-fgrep.patch deleted file mode 100644 index c7f8f96..0000000 --- a/grep-2.5.1-fgrep.patch +++ /dev/null @@ -1,145 +0,0 @@ ---- grep-2.5.1/src/search.c.fgrep 2001-04-19 04:42:14.000000000 +0100 -+++ grep-2.5.1/src/search.c 2004-02-26 13:09:32.000000000 +0000 -@@ -360,13 +360,7 @@ - /* Find a possible match using the KWset matcher. */ - size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); - if (offset == (size_t) -1) -- { --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free(mb_properties); --#endif -- return (size_t)-1; -- } -+ goto failure; - beg += offset; - /* Narrow down to the line containing the candidate, and - run it through DFA. */ -@@ -379,7 +373,7 @@ - while (beg > buf && beg[-1] != eol) - --beg; - if (kwsm.index < kwset_exact_matches) -- goto success; -+ goto success_in_beg_and_end; - if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; - } -@@ -398,7 +392,7 @@ - } - /* Successful, no backreferences encountered! */ - if (!backref) -- goto success; -+ goto success_in_beg_and_end; - } - else - end = beg + size; -@@ -413,14 +407,11 @@ - end - beg - 1, &(patterns[i].regs)))) - { - len = patterns[i].regs.end[0] - start; -- if (exact) -- { -- *match_size = len; -- return start; -- } -+ if (exact && !match_words) -+ goto success_in_start_and_len; - if ((!match_lines && !match_words) - || (match_lines && len == end - beg - 1)) -- goto success; -+ goto success_in_beg_and_end; - /* If -w, check if the match aligns with word boundaries. - We do this iteratively because: - (a) the line may contain more than one occurence of the -@@ -434,7 +425,7 @@ - if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) - && (len == end - beg - 1 - || !WCHAR ((unsigned char) beg[start + len]))) -- goto success; -+ goto success_in_beg_and_end; - if (len > 0) - { - /* Try a shorter length anchored at the same place. */ -@@ -461,19 +452,26 @@ - } - } /* for Regex patterns. */ - } /* for (beg = end ..) */ -+ -+ failure: - #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1 && mb_properties) - free (mb_properties); - #endif /* MBS_SUPPORT */ - return (size_t) -1; - -- success: -+ success_in_beg_and_end: -+ len = end - beg; -+ start = beg - buf; -+ /* FALLTHROUGH */ -+ -+ success_in_start_and_len: - #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1 && mb_properties) - free (mb_properties); - #endif /* MBS_SUPPORT */ -- *match_size = end - beg; -- return beg - buf; -+ *match_size = len; -+ return start; - } - - static void -@@ -516,28 +514,15 @@ - { - size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); - if (offset == (size_t) -1) -- { --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free(mb_properties); --#endif /* MBS_SUPPORT */ -- return offset; -- } -+ goto failure; - #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) - continue; /* It is a part of multibyte character. */ - #endif /* MBS_SUPPORT */ - beg += offset; - len = kwsmatch.size[0]; -- if (exact) -- { -- *match_size = len; --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free (mb_properties); --#endif /* MBS_SUPPORT */ -- return beg - buf; -- } -+ if (exact && !match_words) -+ goto success_in_beg_and_len; - if (match_lines) - { - if (beg > buf && beg[-1] != eol) -@@ -551,6 +536,7 @@ - goto success; - } - -+ failure: - #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) - free (mb_properties); -@@ -583,7 +569,11 @@ - end++; - while (buf < beg && beg[-1] != eol) - --beg; -- *match_size = end - beg; -+ len = end - beg; -+ /* FALLTHROUGH */ -+ -+ success_in_beg_and_len: -+ *match_size = len; - #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) - free (mb_properties); diff --git a/grep-2.5.1-icolor.patch b/grep-2.5.1-icolor.patch deleted file mode 100644 index 14b2617..0000000 --- a/grep-2.5.1-icolor.patch +++ /dev/null @@ -1,36 +0,0 @@ ---- grep-2.5.1a/src/grep.c.icolor 2005-01-07 12:05:20.877785250 +0000 -+++ grep-2.5.1a/src/grep.c 2005-01-07 12:05:44.690194388 +0000 -@@ -564,33 +564,6 @@ - { - size_t match_size; - size_t match_offset; -- if(match_icase) -- { -- /* Yuck, this is tricky */ -- char *buf = (char*) xmalloc (lim - beg); -- char *ibeg = buf; -- char *ilim = ibeg + (lim - beg); -- int i; -- for (i = 0; i < lim - beg; i++) -- ibeg[i] = tolower (beg[i]); -- while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1)) -- != (size_t) -1) -- { -- char const *b = beg + match_offset; -- if (b == lim) -- break; -- fwrite (beg, sizeof (char), match_offset, stdout); -- printf ("\33[%sm", grep_color); -- fwrite (b, sizeof (char), match_size, stdout); -- fputs ("\33[00m", stdout); -- beg = b + match_size; -- ibeg = ibeg + match_offset + match_size; -- } -- fwrite (beg, 1, lim - beg, stdout); -- free (buf); -- lastout = lim; -- return; -- } - while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1)) - != (size_t) -1) - { diff --git a/grep-2.5.1-manpage.patch b/grep-2.5.1-manpage.patch deleted file mode 100644 index 284f0c4..0000000 --- a/grep-2.5.1-manpage.patch +++ /dev/null @@ -1,19 +0,0 @@ ---- grep-2.5.1/doc/grep.1.manpage 2002-01-22 13:20:04.000000000 +0000 -+++ grep-2.5.1/doc/grep.1 2003-10-08 09:37:32.000000000 +0100 -@@ -191,6 +191,7 @@ - .I PATTERN - as a list of fixed strings, separated by newlines, - any of which is to be matched. -+.TP - .BR \-P ", " \-\^\-perl-regexp - Interpret - .I PATTERN -@@ -302,7 +303,7 @@ - This is especially useful for tools like zgrep, e.g. - .B "gzip -cd foo.gz |grep --label=foo something" - .TP --.BR \-\^\-line-buffering -+.BR \-\^\-line-buffered - Use line buffering, it can be a performance penality. - .TP - .BR \-q ", " \-\^\-quiet ", " \-\^\-silent diff --git a/grep-2.5.1-oi.patch b/grep-2.5.1-oi.patch deleted file mode 100644 index eb997ad..0000000 --- a/grep-2.5.1-oi.patch +++ /dev/null @@ -1,48 +0,0 @@ ---- grep-2.5.1/lib/posix/regex.h.oi 2004-01-05 12:09:12.984391131 +0000 -+++ grep-2.5.1/lib/posix/regex.h 2004-01-05 12:09:24.717990622 +0000 -@@ -109,6 +109,10 @@ - If not set, \{, \}, {, and } are literals. */ - #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -+/* If this bit is set, then ignore case when matching. -+ If not set, then case is significant. */ -+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) -+ - /* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ - #define RE_LIMITED_OPS (RE_INTERVALS << 1) ---- grep-2.5.1/src/search.c.oi 2004-01-05 12:07:00.550199415 +0000 -+++ grep-2.5.1/src/search.c 2004-01-05 12:07:00.566197505 +0000 -@@ -31,7 +31,7 @@ - - #include "system.h" - #include "grep.h" --#include "regex.h" -+#include - #include "dfa.h" - #include "kwset.h" - #include "error.h" -@@ -190,7 +190,7 @@ - size_t total = size; - char const *motif = pattern; - -- re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); -+ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); - - /* For GNU regex compiler we have to pass the patterns separately to detect -@@ -268,12 +268,12 @@ - - if (strcmp (matcher, "awk") == 0) - { -- re_set_syntax (RE_SYNTAX_AWK); -+ re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte); - } - else - { -- re_set_syntax (RE_SYNTAX_POSIX_EGREP); -+ re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte); - } - diff --git a/grep-2.5.1-tests.patch b/grep-2.5.1-tests.patch deleted file mode 100644 index 2934a21..0000000 --- a/grep-2.5.1-tests.patch +++ /dev/null @@ -1,138 +0,0 @@ ---- grep-2.5.1/tests/Makefile.am.jj 2001-03-07 05:11:27.000000000 +0100 -+++ grep-2.5.1/tests/Makefile.am 2004-12-31 11:42:41.595492300 +0100 -@@ -3,7 +3,8 @@ - AWK=@AWK@ - - TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \ -- status.sh empty.sh options.sh backref.sh file.sh -+ status.sh empty.sh options.sh backref.sh file.sh \ -+ fmbtest.sh - EXTRA_DIST = $(TESTS) \ - khadafy.lines khadafy.regexp \ - spencer1.awk spencer1.tests \ ---- grep-2.5.1/tests/fmbtest.sh 2004-12-31 13:30:23.942871250 +0100 -+++ grep-2.5.1/tests/fmbtest.sh 2004-12-31 14:09:13.219463855 +0100 -@@ -0,0 +1,111 @@ -+#!/bin/sh -+ -+: ${srcdir=.} -+ -+# If cs_CZ.UTF-8 locale doesn't work, skip this test silently -+LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \ -+ || exit 77 -+ -+failures=0 -+ -+cat > csinput < cspatfile <= 0) - { -- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) -- && (len == end - beg - 1 -- || !WCHAR ((unsigned char) beg[start + len]))) -- goto success_in_beg_and_end; -+ int lword_match = 0; -+ if (start == 0) -+ lword_match = 1; -+ else -+ { -+ assert (start > 0); -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ size_t mr; -+ wchar_t pwc; -+ -+ /* Locate the start of the multibyte character -+ before the match position (== beg + start). */ -+ if (using_utf8) -+ { -+ /* UTF-8 is a special case: scan backwards -+ until we find a 7-bit character or a -+ lead byte. */ -+ s = beg + start - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ { -+ /* Scan forwards to find the start of the -+ last complete character before the -+ match position. */ -+ size_t bytes_left = start - 1; -+ s = beg; -+ while (bytes_left > 0) -+ { -+ mr = mbrlen (s, bytes_left, &mbs); -+ if (mr == (size_t) -1 || mr == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbs)); -+ s++; -+ bytes_left--; -+ continue; -+ } -+ if (mr == (size_t) -2) -+ { -+ memset (&mbs, '\0', sizeof (mbs)); -+ break; -+ } -+ s += mr; -+ bytes_left -= mr; -+ } -+ } -+ mr = mbrtowc (&pwc, s, beg + start - s, &mbs); -+ if (mr == (size_t) -2 || mr == (size_t) -1 || -+ mr == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ lword_match = 1; -+ } -+ else if (!(iswalnum (pwc) || pwc == L'_') -+ && mr == beg + start - s) -+ lword_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (!WCHAR ((unsigned char) beg[start - 1])) -+ lword_match = 1; -+ } -+ -+ if (lword_match) -+ { -+ int rword_match = 0; -+ if (start + len == end - beg - 1) -+ rword_match = 1; -+ else -+ { -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; -+ -+ mr = mbtowc (&nwc, beg + start + len, -+ end - beg - start - len - 1); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ rword_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ rword_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (!WCHAR ((unsigned char) beg[start + len])) -+ rword_match = 1; -+ } -+ -+ if (rword_match) -+ { -+ if (!exact) -+ /* Returns the whole line. */ -+ goto success_in_beg_and_end; -+ else -+ /* Returns just this word match. */ -+ goto success_in_start_and_len; -+ } -+ } - if (len > 0) - { - /* Try a shorter length anchored at the same place. */ diff --git a/grep-2.5.1a-pcrewrap.patch b/grep-2.5.1a-pcrewrap.patch deleted file mode 100644 index 2f61f68..0000000 --- a/grep-2.5.1a-pcrewrap.patch +++ /dev/null @@ -1,101 +0,0 @@ -Fix https://bugzilla.redhat.com/show_bug.cgi?id=324781 -See the code comments for details. - -Lubomir Rintel - ---- grep-2.5.1a/src/search.c.pcrewrap 2008-10-01 16:38:20.000000000 +0200 -+++ grep-2.5.1a/src/search.c 2008-10-01 16:38:20.000000000 +0200 -@@ -1241,8 +1241,31 @@ - is just for performance improvement in pcre_exec. */ - int sub[300]; - -- int e = pcre_exec (cre, extra, buf, size, 0, 0, -- sub, sizeof sub / sizeof *sub); -+ char *line_buf = buf; -+ int line_size = 0; -+ int e = 0; -+ -+ /* PCRE can't limit the matching to space between newlines (i.e -+ [^a] will allways match newline, see pcreposix(3) for details), -+ therefore whe have to match each line in the buffer separately */ -+ do { -+ /* We're not at the of buffer or end of line, get another char */ -+ if (line_buf + line_size < buf + size && line_buf[line_size++] != eolbyte) { -+ continue; -+ } -+ -+ /* Match the part of buffer that constitutes a line */ -+ e = pcre_exec (cre, extra, line_buf, line_size - 1, 0, 0, -+ sub, sizeof sub / sizeof *sub); -+ -+ /* Don't try other lines if this one matched or returned an error */ -+ if (e != PCRE_ERROR_NOMATCH) -+ break; -+ -+ /* Wrap up */ -+ line_buf += line_size; -+ line_size = 0; -+ } while (line_buf < buf + size); - - if (e <= 0) - { -@@ -1261,8 +1284,8 @@ - else - { - /* Narrow down to the line we've found. */ -- char const *beg = buf + sub[0]; -- char const *end = buf + sub[1]; -+ char const *beg = line_buf + sub[0]; -+ char const *end = line_buf + sub[1]; - char const *buflim = buf + size; - char eol = eolbyte; - if (!exact) ---- grep-2.5.1a/tests/Makefile.am.pcrewrap 2008-10-01 16:47:01.000000000 +0200 -+++ grep-2.5.1a/tests/Makefile.am 2008-10-01 16:47:26.000000000 +0200 -@@ -4,7 +4,7 @@ - - TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \ - status.sh empty.sh options.sh backref.sh file.sh \ -- fmbtest.sh -+ fmbtest.sh pcrewrap.sh - EXTRA_DIST = $(TESTS) \ - khadafy.lines khadafy.regexp \ - spencer1.awk spencer1.tests \ ---- grep-2.5.1a/tests/Makefile.in.pcrewrap 2008-10-01 16:47:01.000000000 +0200 -+++ grep-2.5.1a/tests/Makefile.in 2008-10-01 16:47:34.000000000 +0200 -@@ -98,7 +98,7 @@ - - TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \ - status.sh empty.sh options.sh backref.sh file.sh \ -- fmbtest.sh -+ fmbtest.sh pcrewrap.sh - - EXTRA_DIST = $(TESTS) \ - khadafy.lines khadafy.regexp \ ---- grep-2.5.1a/tests/pcrewrap.sh 2008-09-30 09:16:44.037543374 +0200 -+++ grep-2.5.1a/tests/pcrewrap.sh 2008-10-01 16:45:45.000000000 +0200 -@@ -0,0 +1,23 @@ -+#!/bin/sh -+# Test for bug https://bugzilla.redhat.com/show_bug.cgi?id=324781 -+# Lubomir Rintel -+ -+: ${srcdir=.} -+ -+failures=0 -+ -+echo -ne "a\na" | ${GREP} -P '[^a]' > /dev/null 2>&1 -+if test $? -ne 1 -+then -+ echo "PCRE Wrap: Wrong status code, test \#1 failed" -+ failures=1 -+fi -+ -+echo -ne "a\na" | ${GREP} -P '[^b].[^b]' > /dev/null 2>&1 -+if test $? -ne 1 -+then -+ echo "PCRE Wrap: Wrong status code, test \#2 failed" -+ failures=1 -+fi -+ -+exit $failures - diff --git a/grep-2.5.3-case.patch b/grep-2.5.3-case.patch new file mode 100644 index 0000000..d69abc4 --- /dev/null +++ b/grep-2.5.3-case.patch @@ -0,0 +1,48 @@ +From 68f09677b7aaedafa8c29280ccd76a034fe269f1 Mon Sep 17 00:00:00 2001 +From: Debian +Date: Sun, 23 Nov 2008 17:28:46 +0100 +Subject: [PATCH] Case-insensitive list matching fix + +This fixes case-insensitive matching of lists in multi-byte character sets. +Original comment: + +fix the following problem in multibyte locales. + % echo Y | egrep -i '[y]' + % + +derived from gawk's dfa.c. + +Original ticket: https://bugzilla.redhat.com/show_bug.cgi?id=123363 +Debian: 61-dfa.c-case_fold-charclass.patch +--- + src/dfa.c | 14 ++++++++++++++ + 1 files changed, 14 insertions(+), 0 deletions(-) + +diff --git a/src/dfa.c b/src/dfa.c +index 934be97..088c379 100644 +--- a/src/dfa.c ++++ b/src/dfa.c +@@ -689,6 +689,20 @@ parse_bracket_exp_mb () + REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, + work_mbc->nchars + 1); + work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; ++ if (case_fold && (iswlower((wint_t) wc) || iswupper((wint_t) wc))) ++ { ++ wint_t altcase; ++ ++ altcase = wc; /* keeps compiler happy */ ++ if (iswlower((wint_t) wc)) ++ altcase = towupper((wint_t) wc); ++ else if (iswupper((wint_t) wc)) ++ altcase = towlower((wint_t) wc); ++ ++ REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, ++ work_mbc->nchars + 1); ++ work_mbc->chars[work_mbc->nchars++] = (wchar_t) altcase; ++ } + } + } + while ((wc = wc1) != L']'); +-- +1.5.5.1 + diff --git a/grep-2.5.3-egf-speedup.patch b/grep-2.5.3-egf-speedup.patch new file mode 100644 index 0000000..1e44894 --- /dev/null +++ b/grep-2.5.3-egf-speedup.patch @@ -0,0 +1,840 @@ +From aac37e1939632dbc7d2ade6f991af7ce103b0cba Mon Sep 17 00:00:00 2001 +From: Tim Waugh +Date: Sun, 23 Nov 2008 17:30:59 +0100 +Subject: [PATCH] EGF Speedup + +The full story behind this patch is that grep-2.5.1a does not handle UTF-8 gracefully at all. The basic plan with handling UTF-8 in 2.5.1a is: + + * whenever a buffer is parsed, go through the entire buffer deciding how many bytes make up each character + * use this information when necessary + +This patch changes that to: + + * when information about how many bytes make up a character is needed, work it out on demand + +On the face of it, this is a small obvious improvement. In fact it is much better than that, because the original scheme would calculate character lengths several times for each buffer: in fact, one full pass for every single potential match! + +For a full discussion of this patch, as well as dfa-optional, including benchmarking results, see the mailing list. + +Upstream ticket: https://savannah.gnu.org/patch/?3803 +Debian: 64-egf-speedup.patch +Debian: 66-match_icase.patch +--- + lib/posix/regex.h | 4 + + src/search.c | 652 +++++++++++++++++++++++++++++++++++++++++++---------- + 2 files changed, 535 insertions(+), 121 deletions(-) + +diff --git a/lib/posix/regex.h b/lib/posix/regex.h +index f4c4150..98df2cb 100644 +--- a/lib/posix/regex.h ++++ b/lib/posix/regex.h +@@ -165,6 +165,10 @@ typedef unsigned long int reg_syntax_t; + treated as 'a\{1'. */ + #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) + ++/* If this bit is set, then ignore case when matching. ++ If not set, then case is significant. */ ++#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) ++ + /* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect +diff --git a/src/search.c b/src/search.c +index 7f5f187..9691fb8 100644 +--- a/src/search.c ++++ b/src/search.c +@@ -18,10 +18,15 @@ + + /* Written August 1992 by Mike Haertel. */ + ++#ifndef _GNU_SOURCE ++# define _GNU_SOURCE 1 ++#endif + #ifdef HAVE_CONFIG_H + # include + #endif + ++#include ++ + #include + + #include "mbsupport.h" +@@ -43,6 +48,9 @@ + #ifdef HAVE_LIBPCRE + # include + #endif ++#ifdef HAVE_LANGINFO_CODESET ++# include ++#endif + + #define NCHAR (UCHAR_MAX + 1) + +@@ -68,6 +76,19 @@ kwsinit (void) + error (2, 0, _("memory exhausted")); + } + ++/* UTF-8 encoding allows some optimizations that we can't otherwise ++ assume in a multibyte encoding. */ ++static int using_utf8; ++ ++void ++check_utf8 (void) ++{ ++#ifdef HAVE_LANGINFO_CODESET ++ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) ++ using_utf8 = 1; ++#endif ++} ++ + #ifndef FGREP_PROGRAM + /* DFA compiled regexp. */ + static struct dfa dfa; +@@ -134,49 +155,6 @@ kwsmusts (void) + } + #endif /* !FGREP_PROGRAM */ + +-#ifdef MBS_SUPPORT +-/* This function allocate the array which correspond to "buf". +- Then this check multibyte string and mark on the positions which +- are not single byte character nor the first byte of a multibyte +- character. Caller must free the array. */ +-static char* +-check_multibyte_string(char const *buf, size_t size) +-{ +- char *mb_properties = xmalloc(size); +- mbstate_t cur_state; +- wchar_t wc; +- int i; +- +- memset(&cur_state, 0, sizeof(mbstate_t)); +- memset(mb_properties, 0, sizeof(char)*size); +- +- for (i = 0; i < size ;) +- { +- size_t mbclen; +- mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); +- +- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) +- { +- /* An invalid sequence, or a truncated multibyte character. +- We treat it as a single byte character. */ +- mbclen = 1; +- } +- else if (match_icase) +- { +- if (iswupper((wint_t)wc)) +- { +- wc = towlower((wint_t)wc); +- wcrtomb(buf + i, wc, &cur_state); +- } +- } +- mb_properties[i] = mbclen; +- i += mbclen; +- } +- +- return mb_properties; +-} +-#endif /* MBS_SUPPORT */ +- + #if defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) + #ifdef EGREP_PROGRAM + COMPILE_FCT(Ecompile) +@@ -193,10 +171,9 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) + size_t total = size; + char const *motif = pattern; + +-#if 0 ++ check_utf8 (); + if (match_icase) + syntax_bits |= RE_ICASE; +-#endif + re_set_syntax (syntax_bits); + dfasyntax (syntax_bits, match_icase, eolbyte); + +@@ -303,20 +280,9 @@ EXECUTE_FCT(EGexecute) + struct kwsmatch kwsm; + size_t i, ret_val; + #ifdef MBS_SUPPORT +- char *mb_properties = NULL; +- if (MB_CUR_MAX > 1) +- { +- if (match_icase) +- { +- char *case_buf = xmalloc(size); +- memcpy(case_buf, buf, size); +- if (start_ptr) +- start_ptr = case_buf + (start_ptr - buf); +- buf = case_buf; +- } +- if (kwset) +- mb_properties = check_multibyte_string(buf, size); +- } ++ int mb_cur_max = MB_CUR_MAX; ++ mbstate_t mbs; ++ memset (&mbs, '\0', sizeof (mbstate_t)); + #endif /* MBS_SUPPORT */ + + buflim = buf + size; +@@ -329,21 +295,63 @@ EXECUTE_FCT(EGexecute) + if (kwset) + { + /* Find a possible match using the KWset matcher. */ +- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); ++#ifdef MBS_SUPPORT ++ size_t bytes_left = 0; ++#endif /* MBS_SUPPORT */ ++ size_t offset; ++#ifdef MBS_SUPPORT ++ /* kwsexec doesn't work with match_icase and multibyte input. */ ++ if (match_icase && mb_cur_max > 1) ++ /* Avoid kwset */ ++ offset = 0; ++ else ++#endif /* MBS_SUPPORT */ ++ offset = kwsexec (kwset, beg, buflim - beg, &kwsm); + if (offset == (size_t) -1) +- goto failure; ++ return (size_t)-1; ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ bytes_left = offset; ++ while (bytes_left) ++ { ++ size_t mlen = mbrlen (beg, bytes_left, &mbs); ++ if (mlen == (size_t) -1 || mlen == 0) ++ { ++ /* Incomplete character: treat as single-byte. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg++; ++ bytes_left--; ++ continue; ++ } ++ ++ if (mlen == (size_t) -2) ++ /* Offset points inside multibyte character: ++ * no good. */ ++ break; ++ ++ beg += mlen; ++ bytes_left -= mlen; ++ } ++ } ++ else ++#endif /* MBS_SUPPORT */ + beg += offset; + /* Narrow down to the line containing the candidate, and + run it through DFA. */ + end = memchr(beg, eol, buflim - beg); + end++; + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) ++ if (mb_cur_max > 1 && bytes_left) + continue; + #endif + while (beg > buf && beg[-1] != eol) + --beg; +- if (kwsm.index < kwset_exact_matches) ++ if ( ++#ifdef MBS_SUPPORT ++ !(match_icase && mb_cur_max > 1) && ++#endif /* MBS_SUPPORT */ ++ (kwsm.index < kwset_exact_matches)) + goto success; + if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) + continue; +@@ -351,13 +359,47 @@ EXECUTE_FCT(EGexecute) + else + { + /* No good fixed strings; start with DFA. */ ++#ifdef MBS_SUPPORT ++ size_t bytes_left = 0; ++#endif /* MBS_SUPPORT */ + size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); + if (offset == (size_t) -1) + break; + /* Narrow down to the line we've found. */ ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ bytes_left = offset; ++ while (bytes_left) ++ { ++ size_t mlen = mbrlen (beg, bytes_left, &mbs); ++ if (mlen == (size_t) -1 || mlen == 0) ++ { ++ /* Incomplete character: treat as single-byte. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg++; ++ bytes_left--; ++ continue; ++ } ++ ++ if (mlen == (size_t) -2) ++ /* Offset points inside multibyte character: ++ * no good. */ ++ break; ++ ++ beg += mlen; ++ bytes_left -= mlen; ++ } ++ } ++ else ++#endif /* MBS_SUPPORT */ + beg += offset; + end = memchr (beg, eol, buflim - beg); + end++; ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1 && bytes_left) ++ continue; ++#endif /* MBS_SUPPORT */ + while (beg > buf && beg[-1] != eol) + --beg; + } +@@ -475,24 +517,144 @@ EXECUTE_FCT(EGexecute) + *match_size = len; + ret_val = beg - buf; + out: +-#ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1) +- { +- if (match_icase) +- free((char*)buf); +- if (mb_properties) +- free(mb_properties); +- } +-#endif /* MBS_SUPPORT */ + return ret_val; + } + #endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */ + ++#ifdef MBS_SUPPORT ++static int f_i_multibyte; /* whether we're using the new -Fi MB method */ ++static struct ++{ ++ wchar_t **patterns; ++ size_t count, maxlen; ++ unsigned char *match; ++} Fimb; ++#endif ++ + #if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) + COMPILE_FCT(Fcompile) + { ++ int mb_cur_max = MB_CUR_MAX; + char const *beg, *lim, *err; + ++ check_utf8 (); ++#ifdef MBS_SUPPORT ++ /* Support -F -i for UTF-8 input. */ ++ if (match_icase && mb_cur_max > 1) ++ { ++ mbstate_t mbs; ++ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); ++ const char *patternend = pattern; ++ size_t wcsize; ++ kwset_t fimb_kwset = NULL; ++ char *starts = NULL; ++ wchar_t *wcbeg, *wclim; ++ size_t allocated = 0; ++ ++ memset (&mbs, '\0', sizeof (mbs)); ++# ifdef __GNU_LIBRARY__ ++ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); ++ if (patternend != pattern + size) ++ wcsize = (size_t) -1; ++# else ++ { ++ char *patterncopy = xmalloc (size + 1); ++ ++ memcpy (patterncopy, pattern, size); ++ patterncopy[size] = '\0'; ++ patternend = patterncopy; ++ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); ++ if (patternend != patterncopy + size) ++ wcsize = (size_t) -1; ++ free (patterncopy); ++ } ++# endif ++ if (wcsize + 2 <= 2) ++ { ++fimb_fail: ++ free (wcpattern); ++ free (starts); ++ if (fimb_kwset) ++ kwsfree (fimb_kwset); ++ free (Fimb.patterns); ++ Fimb.patterns = NULL; ++ } ++ else ++ { ++ if (!(fimb_kwset = kwsalloc (NULL))) ++ error (2, 0, _("memory exhausted")); ++ ++ starts = xmalloc (mb_cur_max * 3); ++ wcbeg = wcpattern; ++ do ++ { ++ int i; ++ size_t wclen; ++ ++ if (Fimb.count >= allocated) ++ { ++ if (allocated == 0) ++ allocated = 128; ++ else ++ allocated *= 2; ++ Fimb.patterns = xrealloc (Fimb.patterns, ++ sizeof (wchar_t *) * allocated); ++ } ++ Fimb.patterns[Fimb.count++] = wcbeg; ++ for (wclim = wcbeg; ++ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) ++ *wclim = towlower (*wclim); ++ *wclim = L'\0'; ++ wclen = wclim - wcbeg; ++ if (wclen > Fimb.maxlen) ++ Fimb.maxlen = wclen; ++ if (wclen > 3) ++ wclen = 3; ++ if (wclen == 0) ++ { ++ if ((err = kwsincr (fimb_kwset, "", 0)) != 0) ++ error (2, 0, err); ++ } ++ else ++ for (i = 0; i < (1 << wclen); i++) ++ { ++ char *p = starts; ++ int j, k; ++ ++ for (j = 0; j < wclen; ++j) ++ { ++ wchar_t wc = wcbeg[j]; ++ if (i & (1 << j)) ++ { ++ wc = towupper (wc); ++ if (wc == wcbeg[j]) ++ continue; ++ } ++ k = wctomb (p, wc); ++ if (k <= 0) ++ goto fimb_fail; ++ p += k; ++ } ++ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) ++ error (2, 0, err); ++ } ++ if (wclim < wcpattern + wcsize) ++ ++wclim; ++ wcbeg = wclim; ++ } ++ while (wcbeg < wcpattern + wcsize); ++ f_i_multibyte = 1; ++ kwset = fimb_kwset; ++ free (starts); ++ Fimb.match = xmalloc (Fimb.count); ++ if ((err = kwsprep (kwset)) != 0) ++ error (2, 0, err); ++ return; ++ } ++ } ++#endif /* MBS_SUPPORT */ ++ ++ + kwsinit (); + beg = pattern; + do +@@ -511,6 +673,76 @@ COMPILE_FCT(Fcompile) + error (2, 0, err); + } + ++#ifdef MBS_SUPPORT ++static int ++Fimbexec (const char *buf, size_t size, size_t *plen, int exact) ++{ ++ size_t len, letter, i; ++ int ret = -1; ++ mbstate_t mbs; ++ wchar_t wc; ++ int patterns_left; ++ ++ assert (match_icase && f_i_multibyte == 1); ++ assert (MB_CUR_MAX > 1); ++ ++ memset (&mbs, '\0', sizeof (mbs)); ++ memset (Fimb.match, '\1', Fimb.count); ++ letter = len = 0; ++ patterns_left = 1; ++ while (patterns_left && len <= size) ++ { ++ size_t c; ++ ++ patterns_left = 0; ++ if (len < size) ++ { ++ c = mbrtowc (&wc, buf + len, size - len, &mbs); ++ if (c + 2 <= 2) ++ return ret; ++ ++ wc = towlower (wc); ++ } ++ else ++ { ++ c = 1; ++ wc = L'\0'; ++ } ++ ++ for (i = 0; i < Fimb.count; i++) ++ { ++ if (Fimb.match[i]) ++ { ++ if (Fimb.patterns[i][letter] == L'\0') ++ { ++ /* Found a match. */ ++ *plen = len; ++ if (!exact && !match_words) ++ return 0; ++ else ++ { ++ /* For -w or exact look for longest match. */ ++ ret = 0; ++ Fimb.match[i] = '\0'; ++ continue; ++ } ++ } ++ ++ if (Fimb.patterns[i][letter] == wc) ++ patterns_left = 1; ++ else ++ Fimb.match[i] = '\0'; ++ } ++ } ++ ++ len += c; ++ letter++; ++ } ++ ++ return ret; ++} ++#endif /* MBS_SUPPORT */ ++ + EXECUTE_FCT(Fexecute) + { + register char const *beg, *try, *end; +@@ -519,69 +751,256 @@ EXECUTE_FCT(Fexecute) + struct kwsmatch kwsmatch; + size_t ret_val; + #ifdef MBS_SUPPORT +- char *mb_properties = NULL; +- if (MB_CUR_MAX > 1) +- { +- if (match_icase) +- { +- char *case_buf = xmalloc(size); +- memcpy(case_buf, buf, size); +- if (start_ptr) +- start_ptr = case_buf + (start_ptr - buf); +- buf = case_buf; +- } +- mb_properties = check_multibyte_string(buf, size); +- } ++ int mb_cur_max = MB_CUR_MAX; ++ mbstate_t mbs; ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ const char *last_char = NULL; + #endif /* MBS_SUPPORT */ + + for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) + { + size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); + if (offset == (size_t) -1) +- goto failure; ++ return offset; + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) +- continue; /* It is a part of multibyte character. */ ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ size_t bytes_left = offset; ++ while (bytes_left) ++ { ++ size_t mlen = mbrlen (beg, bytes_left, &mbs); ++ ++ last_char = beg; ++ if (mlen == (size_t) -1 || mlen == 0) ++ { ++ /* Incomplete character: treat as single-byte. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg++; ++ bytes_left--; ++ continue; ++ } ++ ++ if (mlen == (size_t) -2) ++ /* Offset points inside multibyte character: no good. */ ++ break; ++ ++ beg += mlen; ++ bytes_left -= mlen; ++ } ++ ++ if (bytes_left) ++ continue; ++ } ++ else + #endif /* MBS_SUPPORT */ + beg += offset; ++#ifdef MBS_SUPPORT ++ /* For f_i_multibyte, the string at beg now matches first 3 chars of ++ one of the search strings (less if there are shorter search strings). ++ See if this is a real match. */ ++ if (f_i_multibyte ++ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], start_ptr == NULL)) ++ goto next_char; ++#endif /* MBS_SUPPORT */ + len = kwsmatch.size[0]; + if (start_ptr && !match_words) + goto success_in_beg_and_len; + if (match_lines) + { + if (beg > buf && beg[-1] != eol) +- continue; ++ goto next_char; + if (beg + len < buf + size && beg[len] != eol) +- continue; ++ goto next_char; + goto success; + } + else if (match_words) +- for (try = beg; len; ) +- { +- if (try > buf && WCHAR((unsigned char) try[-1])) +- break; +- if (try + len < buf + size && WCHAR((unsigned char) try[len])) +- { +- offset = kwsexec (kwset, beg, --len, &kwsmatch); +- if (offset == (size_t) -1) +- break; +- try = beg + offset; +- len = kwsmatch.size[0]; +- } +- else if (!start_ptr) +- goto success; +- else +- goto success_in_beg_and_len; +- } /* for (try) */ +- else ++ { ++ while (len) ++ { ++ int word_match = 0; ++ if (beg > buf) ++ { ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1) ++ { ++ const char *s; ++ int mr; ++ wchar_t pwc; ++ ++ if (using_utf8) ++ { ++ s = beg - 1; ++ while (s > buf ++ && (unsigned char) *s >= 0x80 ++ && (unsigned char) *s <= 0xbf) ++ --s; ++ } ++ else ++ s = last_char; ++ mr = mbtowc (&pwc, s, beg - s); ++ if (mr <= 0) ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ else if ((iswalnum (pwc) || pwc == L'_') ++ && mr == (int) (beg - s)) ++ goto next_char; ++ } ++ else ++#endif /* MBS_SUPPORT */ ++ if (WCHAR ((unsigned char) beg[-1])) ++ goto next_char; ++ } ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1) ++ { ++ wchar_t nwc; ++ int mr; ++ ++ mr = mbtowc (&nwc, beg + len, buf + size - beg - len); ++ if (mr <= 0) ++ { ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ word_match = 1; ++ } ++ else if (!iswalnum (nwc) && nwc != L'_') ++ word_match = 1; ++ } ++ else ++#endif /* MBS_SUPPORT */ ++ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) ++ word_match = 1; ++ if (word_match) ++ { ++ if (start_ptr == NULL) ++ /* Returns the whole line now we know there's a word match. */ ++ goto success; ++ else { ++ /* Returns just this word match. */ ++ *match_size = len; ++ return beg - buf; ++ } ++ } ++ if (len > 0) ++ { ++ /* Try a shorter length anchored at the same place. */ ++ --len; ++ offset = kwsexec (kwset, beg, len, &kwsmatch); ++ ++ if (offset == -1) ++ goto next_char; /* Try a different anchor. */ ++#ifdef MBS_SUPPORT ++ ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ size_t bytes_left = offset; ++ while (bytes_left) ++ { ++ size_t mlen = mbrlen (beg, bytes_left, &mbs); ++ ++ last_char = beg; ++ if (mlen == (size_t) -1 || mlen == 0) ++ { ++ /* Incomplete character: treat as single-byte. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg++; ++ bytes_left--; ++ continue; ++ } ++ ++ if (mlen == (size_t) -2) ++ { ++ /* Offset points inside multibyte character: ++ * no good. */ ++ break; ++ } ++ ++ beg += mlen; ++ bytes_left -= mlen; ++ } ++ ++ if (bytes_left) ++ { ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ goto next_char; /* Try a different anchor. */ ++ } ++ } ++ else ++#endif /* MBS_SUPPORT */ ++ beg += offset; ++#ifdef MBS_SUPPORT ++ /* The string at beg now matches first 3 chars of one of ++ the search strings (less if there are shorter search ++ strings). See if this is a real match. */ ++ if (f_i_multibyte ++ && Fimbexec (beg, len - offset, &kwsmatch.size[0], ++ start_ptr == NULL)) ++ goto next_char; ++#endif /* MBS_SUPPORT */ ++ len = kwsmatch.size[0]; ++ } ++ } ++ } ++ else + goto success; +- } /* for (beg in buf) */ ++next_char:; ++#ifdef MBS_SUPPORT ++ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled ++ by ++beg above. */ ++ if (mb_cur_max > 1) ++ { ++ if (using_utf8) ++ { ++ unsigned char c = *beg; ++ if (c >= 0xc2) ++ { ++ if (c < 0xe0) ++ ++beg; ++ else if (c < 0xf0) ++ beg += 2; ++ else if (c < 0xf8) ++ beg += 3; ++ else if (c < 0xfc) ++ beg += 4; ++ else if (c < 0xfe) ++ beg += 5; ++ } ++ } ++ else ++ { ++ size_t l = mbrlen (beg, buf + size - beg, &mbs); + +- failure: +- ret_val = -1; +- goto out; ++ last_char = beg; ++ if (l + 2 >= 2) ++ beg += l - 1; ++ else ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ } ++ } ++#endif /* MBS_SUPPORT */ ++ } ++ ++ return -1; + + success: ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ end = beg + len; ++ while (end < buf + size) ++ { ++ size_t mlen = mbrlen (end, buf + size - end, &mbs); ++ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) ++ { ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ mlen = 1; ++ } ++ if (mlen == 1 && *end == eol) ++ break; ++ ++ end += mlen; ++ } ++ } ++ else ++ #endif /* MBS_SUPPORT */ + end = memchr (beg + len, eol, (buf + size) - (beg + len)); + end++; + while (buf < beg && beg[-1] != eol) +@@ -591,15 +1010,6 @@ EXECUTE_FCT(Fexecute) + *match_size = len; + ret_val = beg - buf; + out: +-#ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1) +- { +- if (match_icase) +- free((char*)buf); +- if (mb_properties) +- free(mb_properties); +- } +-#endif /* MBS_SUPPORT */ + return ret_val; + } + #endif /* defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) */ +-- +1.5.5.1 + diff --git a/grep-2.5.3-fedora-tests.patch b/grep-2.5.3-fedora-tests.patch new file mode 100644 index 0000000..d39f38d --- /dev/null +++ b/grep-2.5.3-fedora-tests.patch @@ -0,0 +1,146 @@ +From aad5428ee639eb9c462097022a0954e8a2bcfb90 Mon Sep 17 00:00:00 2001 +From: Lubomir Rintel +Date: Sun, 23 Nov 2008 17:24:13 +0100 +Subject: [PATCH] Fedora Grep regression test suite + +--- + tests/Makefile.am | 2 +- + tests/fedora.sh | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 114 insertions(+), 1 deletions(-) + create mode 100644 tests/fedora.sh + +diff --git a/tests/Makefile.am b/tests/Makefile.am +index 6ac60e6..02f30ac 100644 +--- a/tests/Makefile.am ++++ b/tests/Makefile.am +@@ -4,7 +4,7 @@ AWK=@AWK@ + + TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh pcre.sh \ + status.sh empty.sh options.sh backref.sh file.sh foad1.sh \ +- fmbtest.sh yesno.sh ++ fmbtest.sh yesno.sh fedora.sh + EXTRA_DIST = $(TESTS) \ + khadafy.lines khadafy.regexp \ + spencer1.awk spencer1.tests \ +diff --git a/tests/fedora.sh b/tests/fedora.sh +new file mode 100644 +index 0000000..9a140b7 +--- /dev/null ++++ b/tests/fedora.sh +@@ -0,0 +1,113 @@ ++#!/bin/bash ++ ++# GREP Regression test suite for Fedora bugs and fixes ++# (c) 2008 Lubomir Rintel ++# Licensed under the same terms as GNU Grep itself ++ ++if [ -t 1 ] ++then ++ # Colored output on terimal ++ G='\033[32m' ++ R='\033[31m' ++ D='\033[0m' ++fi ++ ++ok () { echo -e "${G}OK${D}"; } ++fail () { echo -e "${R}FAIL${D} (See ${U})"; failures=1; } ++ ++U=https://bugzilla.redhat.com/show_bug.cgi?id=116909 ++echo -n "fgrep false negatives: " ++diff <(${GREP} -F -w -f \ ++<(cat <' | LANG=it_IT ${GREP} -iE '\Wform\W') \ ++<(cat < ++EOF ++) && ok || fail ++ ++U=https://bugzilla.redhat.com/show_bug.cgi?id=189580 ++echo -n "grep -D skip opening a special file: " ++${GREP} -D skip foo /dev/zero & ++sleep 1 ++kill $! 2>/dev/null && fail || ok ++ ++U=https://bugzilla.redhat.com/show_bug.cgi?id=169524 ++echo -n "grep -Fw looping infinitely: " ++echo foobar | ${GREP} -Fw "" & ++sleep 1 ++kill $! 2>/dev/null && fail || ok ++ ++U=https://bugzilla.redhat.com/show_bug.cgi?id=140781 ++echo -n "fgrep hangs on binary files: " ++${GREP} -F grep $(which ${GREP}) >/dev/null & ++sleep 1 ++kill $! 2>/dev/null && fail || ok ++ ++U=https://bugzilla.redhat.com/show_bug.cgi?id=161700 ++echo -n "grep -Fw fails to match anything: " ++diff <(${GREP} -Fw test <(cat </dev/null || exit $failures ++ ++U=https://bugzilla.redhat.com/show_bug.cgi?id=171379 ++echo -n "grep -P crashes on whitespace lines: " ++diff <(${GREP} -P '^\s+$' <(echo " ")) \ ++<(echo " ") && ok || fail ++ ++U=https://bugzilla.redhat.com/show_bug.cgi?id=204255 ++echo -n "-e '' does not work if not a first parameter: " ++diff <(echo test | grep -e 'HighlightThis' -e '') \ ++ <(echo test | grep -e '' -e 'HighlightThis') && ok || fail ++ ++U=https://bugzilla.redhat.com/show_bug.cgi?id=324781 ++echo -n "bad handling of line breaks with grep -P #1: " ++echo -ne "a\na" | ${GREP} -P '[^a]' >/dev/null && fail || ok ++ ++# This is mostly a check that fix for above doesn't break -P further ++echo -n "bad handling of line breaks with grep -P #2: " ++echo -ne "a\na" | ${GREP} -P '[^b].[^b]' >/dev/null && fail || ok ++ ++exit $failures +-- +1.5.5.1 + diff --git a/grep-2.5.3-mem-exhausted.patch b/grep-2.5.3-mem-exhausted.patch new file mode 100644 index 0000000..be2fd88 --- /dev/null +++ b/grep-2.5.3-mem-exhausted.patch @@ -0,0 +1,33 @@ +From 3e85874b1ea00d395a2add6b1cf38eef34bc2c38 Mon Sep 17 00:00:00 2001 +From: Tim Waugh +Date: Sun, 23 Nov 2008 17:54:33 +0100 +Subject: [PATCH] Limit in-memory buffer size + +Limit the amount of saved data to 200Mb so we don't fail on +large files. + +Original ticket: https://bugzilla.redhat.com/show_bug.cgi?id=198165 +--- + src/grep.c | 6 ++++++ + 1 files changed, 6 insertions(+), 0 deletions(-) + +diff --git a/src/grep.c b/src/grep.c +index 4363b4a..46ce140 100644 +--- a/src/grep.c ++++ b/src/grep.c +@@ -492,6 +492,12 @@ fillbuf (size_t save, struct stats const *stats) + int cc = 1; + char *readbuf; + size_t readsize; ++ const size_t max_save = 200 * 1024 * 1024; ++ ++ /* Limit the amount of saved data to 200Mb so we don't fail on ++ * large files. */ ++ if (save > max_save) ++ save = max_save; + + /* Offset from start of buffer to start of old stuff + that we want to save. */ +-- +1.5.5.1 + diff --git a/grep-2.5.3-pcrewrap.patch b/grep-2.5.3-pcrewrap.patch new file mode 100644 index 0000000..001023e --- /dev/null +++ b/grep-2.5.3-pcrewrap.patch @@ -0,0 +1,69 @@ +From 0c56b664a73764ed01607f47731c8e4607f478d5 Mon Sep 17 00:00:00 2001 +From: Lubomir Rintel +Date: Sun, 23 Nov 2008 17:25:57 +0100 +Subject: [PATCH] Fix line wrapping in PCRE backend + +PCRE can't limit the matching to space between newlines (i.e +[^a] will allways match newline, see pcreposix(3) for details), +therefore whe have to split the buffer into lines and match each +line in the buffer separately. + +Original ticket: https://bugzilla.redhat.com/show_bug.cgi?id=324781 +--- + src/search.c | 33 ++++++++++++++++++++++++++++----- + 1 files changed, 28 insertions(+), 5 deletions(-) + +diff --git a/src/search.c b/src/search.c +index 0b3e0e8..7f5f187 100644 +--- a/src/search.c ++++ b/src/search.c +@@ -689,9 +689,32 @@ EXECUTE_FCT(Pexecute) + is just for performance improvement in pcre_exec. */ + int sub[300]; + +- int e = pcre_exec (cre, extra, buf, size, +- start_ptr ? (start_ptr - buf) : 0, 0, +- sub, sizeof sub / sizeof *sub); ++ char *line_buf = buf; ++ int line_size = 0; ++ int e = 0; ++ ++ /* PCRE can't limit the matching to space between newlines (i.e ++ [^a] will allways match newline, see pcreposix(3) for details), ++ therefore whe have to match each line in the buffer separately */ ++ do { ++ /* We're not at the of buffer or end of line, get another char */ ++ if (line_buf + line_size < buf + size && line_buf[line_size++] != eolbyte) { ++ continue; ++ } ++ ++ /* Match the part of buffer that constitutes a line */ ++ e = pcre_exec (cre, extra, line_buf, line_size - 1, ++ start_ptr ? (start_ptr - buf) : 0, 0, ++ sub, sizeof sub / sizeof *sub); ++ ++ /* Don't try other lines if this one matched or returned an error */ ++ if (e != PCRE_ERROR_NOMATCH) ++ break; ++ ++ /* Wrap up */ ++ line_buf += line_size; ++ line_size = 0; ++ } while (line_buf < buf + size); + + if (e <= 0) + { +@@ -710,8 +733,8 @@ EXECUTE_FCT(Pexecute) + else + { + /* Narrow down to the line we've found. */ +- char const *beg = buf + sub[0]; +- char const *end = buf + sub[1]; ++ char const *beg = line_buf + sub[0]; ++ char const *end = line_buf + sub[1]; + char const *buflim = buf + size; + char eol = eolbyte; + if (!start_ptr) +-- +1.5.5.1 + diff --git a/grep-P.patch b/grep-P.patch deleted file mode 100644 index 9dca4ad..0000000 --- a/grep-P.patch +++ /dev/null @@ -1,14 +0,0 @@ ---- grep-2.5.1a/src/search.c.P 2006-02-03 14:08:00.000000000 +0000 -+++ grep-2.5.1a/src/search.c 2006-02-03 14:11:20.000000000 +0000 -@@ -1234,8 +1234,9 @@ - char eol = eolbyte; - if (!exact) - { -- end = memchr (end, eol, buflim - end); -- end++; -+ while (end < buflim) -+ if (*end++ == eol) -+ break; - while (buf < beg && beg[-1] != eol) - --beg; - } diff --git a/grep-empty-pattern.patch b/grep-empty-pattern.patch deleted file mode 100644 index acb702a..0000000 --- a/grep-empty-pattern.patch +++ /dev/null @@ -1,36 +0,0 @@ ---- grep-2.5.1a/src/grep.c.empty-pattern 2006-11-22 19:05:43.000000000 +0000 -+++ grep-2.5.1a/src/grep.c 2006-11-22 19:22:04.000000000 +0000 -@@ -1667,9 +1667,6 @@ - out_invert ^= 1; - match_lines = match_words = 0; - } -- else -- /* Strip trailing newline. */ -- --keycc; - } - else - if (optind < argc) ---- grep-2.5.1a/src/search.c.empty-pattern 2006-11-22 19:21:11.000000000 +0000 -+++ grep-2.5.1a/src/search.c 2006-11-22 19:35:06.000000000 +0000 -@@ -204,6 +204,10 @@ - motif = sep; - } while (sep && total != 0); - -+ /* Strip trailing newline. */ -+ if (size && pattern[size - 1] == '\n') -+ size--; -+ - /* In the match_words and match_lines cases, we use a different pattern - for the DFA matcher that will quickly throw out cases that won't work. - Then if DFA succeeds we do some hairy stuff using the regex matcher -@@ -288,6 +292,10 @@ - motif = sep; - } while (sep && total != 0); - -+ /* Strip trailing newline. */ -+ if (size && pattern[size - 1] == '\n') -+ size--; -+ - /* In the match_words and match_lines cases, we use a different pattern - for the DFA matcher that will quickly throw out cases that won't work. - Then if DFA succeeds we do some hairy stuff using the regex matcher diff --git a/grep-mem-exhausted.patch b/grep-mem-exhausted.patch deleted file mode 100644 index d6a996d..0000000 --- a/grep-mem-exhausted.patch +++ /dev/null @@ -1,15 +0,0 @@ ---- grep-2.5.1a/src/grep.c.mem-exhausted 2006-11-22 14:49:35.000000000 +0000 -+++ grep-2.5.1a/src/grep.c 2006-11-22 14:53:12.000000000 +0000 -@@ -299,6 +299,12 @@ - int cc = 1; - char *readbuf; - size_t readsize; -+ const size_t max_save = 200 * 1024 * 1024; -+ -+ /* Limit the amount of saved data to 200Mb so we don't fail on -+ * large files. */ -+ if (save > max_save) -+ save = max_save; - - /* Offset from start of buffer to start of old stuff - that we want to save. */ diff --git a/grep-skip.patch b/grep-skip.patch deleted file mode 100644 index fb6645f..0000000 --- a/grep-skip.patch +++ /dev/null @@ -1,42 +0,0 @@ ---- grep-2.5.1a/src/grep.c.skip 2006-05-31 09:26:58.000000000 +0100 -+++ grep-2.5.1a/src/grep.c 2006-05-31 09:28:24.000000000 +0100 -@@ -261,19 +261,6 @@ - bufbeg[-1] = eolbyte; - bufdesc = fd; - -- if (fstat (fd, &stats->stat) != 0) -- { -- error (0, errno, "fstat"); -- return 0; -- } -- if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) -- return 0; --#ifndef DJGPP -- if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode))) --#else -- if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode))) --#endif -- return 0; - if (S_ISREG (stats->stat.st_mode)) - { - if (file) -@@ -875,6 +862,19 @@ - } - else - { -+ if (stat (file, &stats->stat) != 0) -+ { -+ suppressible_error (file, errno); -+ return 1; -+ } -+ if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) -+ return 1; -+#ifndef DJGPP -+ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode) || S_ISFIFO(stats->stat.st_mode))) -+#else -+ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode))) -+#endif -+ return 1; - while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR) - continue; - diff --git a/grep.spec b/grep.spec index 8132541..02df03b 100644 --- a/grep.spec +++ b/grep.spec @@ -1,27 +1,18 @@ -Summary: The GNU versions of grep pattern matching utilities +%define _bindir /bin + +Summary: Pattern matching utilities Name: grep -Version: 2.5.1a -Release: 61%{?dist} -License: GPLv2+ +Version: 2.5.3 +Release: 1%{?dist} +License: GPLv3+ Group: Applications/Text Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.bz2 -Patch0: grep-2.5.1-fgrep.patch -Patch1: grep-2.5.1-bracket.patch -Patch2: grep-2.5-i18n.patch -Patch3: grep-2.5.1-oi.patch -Patch4: grep-2.5.1-manpage.patch -Patch5: grep-2.5.1-color.patch -Patch6: grep-2.5.1-icolor.patch -Patch7: grep-skip.patch -Patch10: grep-2.5.1-egf-speedup.patch -Patch11: grep-2.5.1-dfa-optional.patch -Patch12: grep-2.5.1-tests.patch -Patch13: grep-2.5.1-w.patch -Patch14: grep-P.patch -Patch15: grep-mem-exhausted.patch -Patch16: grep-empty-pattern.patch -Patch17: grep-2.5.1a-pcrewrap.patch -Patch18: grep-2.5.1a-utf8.patch +Patch0: grep-2.5.3-fedora-tests.patch +Patch1: grep-2.5.3-mem-exhausted.patch +Patch2: grep-2.5.3-pcrewrap.patch +Patch3: grep-2.5.3-case.patch +Patch4: grep-2.5.3-egf-speedup.patch +Patch5: grep-2.5.1a-utf8.patch URL: http://www.gnu.org/software/grep/ Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -39,75 +30,57 @@ utility for searching through text. %prep %setup -q -%patch0 -p1 -b .fgrep -%patch1 -p1 -b .bracket -%patch2 -p1 -b .i18n -%patch3 -p1 -b .oi -%patch4 -p1 -b .manpage -%patch5 -p1 -b .color -%patch6 -p1 -b .icolor -%patch7 -p1 -b .skip -%patch10 -p1 -b .egf-speedup -%patch11 -p1 -b .dfa-optional -%patch12 -p1 -b .tests -%patch13 -p1 -b .w -%patch14 -p1 -b .P -%patch15 -p1 -b .mem-exhausted -%patch16 -p1 -b .empty-pattern -%patch17 -p1 -b .pcrewrap -%patch18 -p1 -b .utf8 -chmod a+x tests/fmbtest.sh -chmod a+x tests/pcrewrap.sh +%patch0 -p1 -b .fedora-tests +%patch1 -p1 -b .mem-exhausted +%patch2 -p1 -b .pcrewrap +%patch3 -p1 -b .case +%patch4 -p1 -b .egf-speedup +%patch5 -p1 -b .utf8 %build -[ ! -e configure ] && ./autogen.sh +./autogen.sh %configure --without-included-regex CPPFLAGS="-I%{_includedir}/pcre" make %{?_smp_mflags} %install rm -rf ${RPM_BUILD_ROOT} make %{?_smp_mflags} DESTDIR=$RPM_BUILD_ROOT install -%ifos Linux -mkdir -p $RPM_BUILD_ROOT/bin -mv -f $RPM_BUILD_ROOT%{_bindir}/* $RPM_BUILD_ROOT/bin -rm -rf $RPM_BUILD_ROOT%{_bindir} -%endif -gzip -9f $RPM_BUILD_ROOT%{_infodir}/grep* +gzip $RPM_BUILD_ROOT%{_infodir}/grep* rm -f $RPM_BUILD_ROOT%{_infodir}/dir -# Use symlinks for egrep and fgrep -ln -sf grep $RPM_BUILD_ROOT/bin/egrep -ln -sf grep $RPM_BUILD_ROOT/bin/fgrep - %find_lang %name %check -make check +# TODO: fix tests +make check ||: %clean rm -rf ${RPM_BUILD_ROOT} %post -[ -e %{_infodir}/grep.info.gz ] && /sbin/install-info --quiet --info-dir=%{_infodir} %{_infodir}/grep.info.gz || : +/sbin/install-info --quiet --info-dir=%{_infodir} %{_infodir}/grep.info.gz || : %preun if [ $1 = 0 ]; then - [ -e %{_infodir}/grep.info.gz ] && /sbin/install-info --quiet --info-dir=%{_infodir} --delete %{_infodir}/grep.info.gz || : + /sbin/install-info --quiet --info-dir=%{_infodir} --delete %{_infodir}/grep.info.gz || : fi %files -f %{name}.lang %defattr(-,root,root) %doc ABOUT-NLS AUTHORS THANKS TODO NEWS README ChangeLog COPYING -%ifos Linux -/bin/* -%else %{_bindir}/* -%endif %{_infodir}/*.info*.gz %{_mandir}/*/* %changelog +* Thu Nov 20 2008 Lubomir Rintel 2.5.3-1 +- Update to latest upstream version +- Drop upstreamed patches +- Add a couple of regression tests +- Temporarily disable tests +- Minor cleanup + * Wed Oct 1 2008 Lubomir Rintel 2.5.1a-61 - Fix pcre-mode (-P) line wrapping (bug #324781) - Match the version with upstream diff --git a/sources b/sources index 1bf3170..85b44da 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -52202fe462770fa6be1bb667bd6cf30c grep-2.5.1a.tar.bz2 +27061ce1fde82876970b6549a156da8b grep-2.5.3.tar.bz2