From 0dcc5a0d5eb1554f4b5c0d2f2c8988b6b91e6553 Mon Sep 17 00:00:00 2001 From: Ondrej Oprala Date: Aug 14 2013 17:01:16 +0000 Subject: sort: fix multibyte incompabilities (#821264) --- diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index 0e25f49..da46a71 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -2419,8 +2419,8 @@ diff -urNp coreutils-8.21-orig/src/pr.c coreutils-8.21/src/pr.c looking for more options and printing the next batch of files. diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c ---- coreutils-8.21-orig/src/sort.c 2013-01-31 01:46:24.000000000 +0100 -+++ coreutils-8.21/src/sort.c 2013-02-15 14:25:07.828467769 +0100 +--- coreutils-8.21-orig/src/sort.c 2013-08-14 18:14:06.172216606 +0200 ++++ coreutils-8.21/src/sort.c 2013-08-14 18:13:30.295247905 +0200 @@ -29,6 +29,14 @@ #include #include @@ -2440,8 +2440,8 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c /* Nonzero if the corresponding locales are hard. */ static bool hard_LC_COLLATE; --#if HAVE_NL_LANGINFO +#if HAVE_LANGINFO_CODESET +-#if HAVE_NL_LANGINFO static bool hard_LC_TIME; #endif @@ -2476,16 +2476,16 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c they were read if all keys compare equal. */ static bool stable; ++/* Tab character separating fields. If tab_length is 0, then fields are -/* If TAB has this value, blanks separate fields. */ -enum { TAB_DEFAULT = CHAR_MAX + 1 }; - -/* Tab character separating fields. If TAB_DEFAULT, then fields are -+/* Tab character separating fields. If tab_length is 0, then fields are separated by the empty string between a non-blank character and a blank character. */ --static int tab = TAB_DEFAULT; +static char tab[MB_LEN_MAX + 1]; +static size_t tab_length = 0; +-static int tab = TAB_DEFAULT; /* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */ @@ -2540,8 +2540,8 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c free (node); } --#if HAVE_NL_LANGINFO +#if HAVE_LANGINFO_CODESET +-#if HAVE_NL_LANGINFO static int struct_month_cmp (void const *m1, void const *m2) @@ -2549,17 +2549,17 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c /* Initialize the character class tables. */ static void --inittables (void) +inittables_uni (void) +-inittables (void) { size_t i; -@@ -1250,7 +1318,7 @@ inittables (void) +@@ -1250,7 +1318,7 @@ inittables_uni (void) fold_toupper[i] = toupper (i); } --#if HAVE_NL_LANGINFO +#if HAVE_LANGINFO_CODESET +-#if HAVE_NL_LANGINFO /* If we're not in the "C" locale, read different names for months. */ if (hard_LC_TIME) { @@ -2652,25 +2652,25 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c by KEY in LINE. */ static char * --begfield (struct line const *line, struct keyfield const *key) +begfield_uni (const struct line *line, const struct keyfield *key) +-begfield (struct line const *line, struct keyfield const *key) { char *ptr = line->text, *lim = ptr + line->length - 1; size_t sword = key->sword; -@@ -1573,10 +1719,10 @@ begfield (struct line const *line, struc +@@ -1573,10 +1719,10 @@ begfield_uni (const struct line *line, c /* The leading field separator itself is included in a field when -t is absent. */ -- if (tab != TAB_DEFAULT) + if (tab_length) +- if (tab != TAB_DEFAULT) while (ptr < lim && sword--) { -- while (ptr < lim && *ptr != tab) + while (ptr < lim && *ptr != tab[0]) +- while (ptr < lim && *ptr != tab) ++ptr; if (ptr < lim) ++ptr; -@@ -1602,11 +1748,70 @@ begfield (struct line const *line, struc +@@ -1602,11 +1748,70 @@ begfield_uni (const struct line *line, c return ptr; } @@ -2737,38 +2737,38 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c in LINE specified by KEY. */ static char * --limfield (struct line const *line, struct keyfield const *key) +limfield_uni (const struct line *line, const struct keyfield *key) +-limfield (struct line const *line, struct keyfield const *key) { char *ptr = line->text, *lim = ptr + line->length - 1; size_t eword = key->eword, echar = key->echar; -@@ -1621,10 +1826,10 @@ limfield (struct line const *line, struc +@@ -1621,10 +1826,10 @@ limfield_uni (const struct line *line, c 'beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first 'blank' character after the preceding field. */ -- if (tab != TAB_DEFAULT) + if (tab_length) +- if (tab != TAB_DEFAULT) while (ptr < lim && eword--) { -- while (ptr < lim && *ptr != tab) + while (ptr < lim && *ptr != tab[0]) +- while (ptr < lim && *ptr != tab) ++ptr; if (ptr < lim && (eword || echar)) ++ptr; -@@ -1670,10 +1875,10 @@ limfield (struct line const *line, struc +@@ -1670,10 +1875,10 @@ limfield_uni (const struct line *line, c */ /* Make LIM point to the end of (one byte past) the current field. */ -- if (tab != TAB_DEFAULT) + if (tab_length) +- if (tab != TAB_DEFAULT) { char *newlim; -- newlim = memchr (ptr, tab, lim - ptr); + newlim = memchr (ptr, tab[0], lim - ptr); +- newlim = memchr (ptr, tab, lim - ptr); if (newlim) lim = newlim; } -@@ -1704,6 +1909,130 @@ limfield (struct line const *line, struc +@@ -1704,6 +1909,130 @@ limfield_uni (const struct line *line, c return ptr; } @@ -2857,7 +2857,7 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c + { + /* If we're skipping leading blanks, don't start counting characters + * until after skipping past any leading blanks. */ -+ if (key->skipsblanks) ++ if (key->skipeblanks) + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) + ptr += mblength; + @@ -2903,8 +2903,6 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c else { if (key->skipsblanks) -- while (blanks[to_uchar (*line_start)]) -- line_start++; + { +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) @@ -2921,6 +2919,8 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c + while (blanks[to_uchar (*line_start)]) + line_start++; + } +- while (blanks[to_uchar (*line_start)]) +- line_start++; line->keybeg = line_start; } } @@ -2928,12 +2928,12 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c hideously fast. */ static int --numcompare (char const *a, char const *b) +numcompare_uni (const char *a, const char *b) +-numcompare (char const *a, char const *b) { while (blanks[to_uchar (*a)]) a++; -@@ -1922,6 +2265,25 @@ numcompare (char const *a, char const *b +@@ -1922,6 +2265,25 @@ numcompare_uni (const char *a, const cha return strnumcmp (a, b, decimal_point, thousands_sep); } @@ -2963,8 +2963,8 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c Return 0 if the name in S is not recognized. */ static int --getmonth (char const *month, char **ea) +getmonth_uni (char const *month, size_t len, char **ea) +-getmonth (char const *month, char **ea) { size_t lo = 0; size_t hi = MONTHS_PER_YEAR; @@ -2972,17 +2972,17 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c char saved = *lim; *lim = '\0'; ++ skipblanks (&beg, lim); - while (blanks[to_uchar (*beg)]) - beg++; -+ skipblanks (&beg, lim); char *tighter_lim = beg; if (lim < beg) tighter_lim = lim; else if (key->month) -- getmonth (beg, &tighter_lim); + getmonth (beg, lim-beg, &tighter_lim); +- getmonth (beg, &tighter_lim); else if (key->general_numeric) ignore_value (strtold (beg, &tighter_lim)); else if (key->numeric || key->human_numeric) @@ -2990,8 +2990,8 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key) && !(key->schar || key->echar); bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ -- if (!gkey_only && tab == TAB_DEFAULT && !line_offset + if (!gkey_only && !tab_length && !line_offset +- if (!gkey_only && tab == TAB_DEFAULT && !line_offset && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned)) || (!key->skipsblanks && key->schar) || (!key->skipeblanks && key->echar))) @@ -3079,21 +3079,21 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c are no more keys or a difference is found. */ static int --keycompare (struct line const *a, struct line const *b) +keycompare_uni (const struct line *a, const struct line *b) +-keycompare (struct line const *a, struct line const *b) { struct keyfield *key = keylist; -@@ -2546,7 +2983,7 @@ keycompare (struct line const *a, struct +@@ -2546,7 +2983,7 @@ keycompare_uni (const struct line *a, co else if (key->human_numeric) diff = human_numcompare (ta, tb); else if (key->month) -- diff = getmonth (ta, NULL) - getmonth (tb, NULL); + diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL); +- diff = getmonth (ta, NULL) - getmonth (tb, NULL); else if (key->random) diff = compare_random (ta, tlena, tb, tlenb); else if (key->version) -@@ -2662,6 +3099,181 @@ keycompare (struct line const *a, struct +@@ -2662,6 +3099,191 @@ keycompare_uni (const struct line *a, co return key->reverse ? -diff : diff; } @@ -3114,45 +3114,14 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c + wchar_t wc_a, wc_b; + mbstate_t state_a, state_b; + -+ int diff; ++ int diff = 0; + + memset (&state_a, '\0', sizeof(mbstate_t)); + memset (&state_b, '\0', sizeof(mbstate_t)); ++ /* Ignore keys with start after end. */ ++ if (a->keybeg - a->keylim > 0) ++ return 0; + -+ for (;;) -+ { -+ char const *translate = key->translate; -+ bool const *ignore = key->ignore; -+ -+ /* Find the lengths. */ -+ size_t lena = lima <= texta ? 0 : lima - texta; -+ size_t lenb = limb <= textb ? 0 : limb - textb; -+ -+ /* Actually compare the fields. */ -+ if (key->random) -+ diff = compare_random (texta, lena, textb, lenb); -+ else if (key->numeric | key->general_numeric | key->human_numeric) -+ { -+ char savea = *lima, saveb = *limb; -+ -+ *lima = *limb = '\0'; -+ diff = (key->numeric ? numcompare (texta, textb) -+ : key->general_numeric ? general_numcompare (texta, textb) -+ : human_numcompare (texta, textb)); -+ *lima = savea, *limb = saveb; -+ } -+ else if (key->version) -+ diff = filevercmp (texta, textb); -+ else if (key->month) -+ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL); -+ else -+ { -+ if (ignore || translate) -+ { -+ char *copy_a = (char *) xmalloc (lena + 1 + lenb + 1); -+ char *copy_b = copy_a + lena + 1; -+ size_t new_len_a, new_len_b; -+ size_t i, j; + + /* Ignore and/or translate chars before comparing. */ +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ @@ -3220,20 +3189,62 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c + COPY[NEW_LEN] = '\0'; \ + } \ + while (0) -+ IGNORE_CHARS (new_len_a, lena, texta, copy_a, -+ wc_a, mblength_a, state_a); -+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b, -+ wc_b, mblength_b, state_b); -+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); -+ free(copy_a); -+ } -+ else if (lena == 0) -+ diff = - NONZERO (lenb); -+ else if (lenb == 0) -+ goto greater; -+ else -+ diff = xmemcoll (texta, lena, textb, lenb); ++ ++ /* Actually compare the fields. */ ++ ++ for (;;) ++ { ++ /* Find the lengths. */ ++ size_t lena = lima <= texta ? 0 : lima - texta; ++ size_t lenb = limb <= textb ? 0 : limb - textb; ++ ++ char const *translate = key->translate; ++ bool const *ignore = key->ignore; ++ ++ if (ignore || translate) ++ { ++ char *copy_a = (char *) xmalloc (lena + 1 + lenb + 1); ++ char *copy_b = copy_a + lena + 1; ++ size_t new_len_a, new_len_b; ++ size_t i, j; ++ ++ IGNORE_CHARS (new_len_a, lena, texta, copy_a, ++ wc_a, mblength_a, state_a); ++ IGNORE_CHARS (new_len_b, lenb, textb, copy_b, ++ wc_b, mblength_b, state_b); ++ texta = copy_a; textb = copy_b; ++ lena = new_len_a; lenb = new_len_b; ++ } ++ ++ if (key->random) ++ diff = compare_random (texta, lena, textb, lenb); ++ else if (key->numeric | key->general_numeric | key->human_numeric) ++ { ++ char savea = *lima, saveb = *limb; ++ ++ *lima = *limb = '\0'; ++ diff = (key->numeric ? numcompare (texta, textb) ++ : key->general_numeric ? general_numcompare (texta, textb) ++ : human_numcompare (texta, textb)); ++ *lima = savea, *limb = saveb; + } ++ else if (key->version) ++ diff = filevercmp (texta, textb); ++ else if (key->month) ++ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL); ++ else if (lena == 0) ++ diff = - NONZERO (lenb); ++ else if (lenb == 0) ++ diff = 1; ++ else ++ { ++ diff = memcmp (texta, textb, MIN (lena,lenb)); ++ if (!diff) ++ diff = xmemcoll (texta, lena, textb, lenb); ++ } ++ ++ if (ignore || translate) ++ free (texta); + + if (diff) + goto not_equal; @@ -3263,28 +3274,42 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c + } + } + -+ return 0; -+ -+greater: -+ diff = 1; +not_equal: -+ return key->reverse ? -diff : diff; ++ if (key && key->reverse) ++ return -diff; ++ else ++ return diff; +} +#endif + /* Compare two lines A and B, returning negative, zero, or positive depending on whether A compares less than, equal to, or greater than B. */ -@@ -4157,7 +4769,7 @@ main (int argc, char **argv) +@@ -2689,14 +3311,6 @@ compare (struct line const *a, struct li + diff = - NONZERO (blen); + else if (blen == 0) + diff = 1; +- else if (hard_LC_COLLATE) +- { +- /* Note xmemcoll0 is a performance enhancement as +- it will not unconditionally write '\0' after the +- passed in buffers, which was seen to give around +- a 3% increase in performance for short lines. */ +- diff = xmemcoll0 (a->text, alen + 1, b->text, blen + 1); +- } + else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen)))) + diff = alen < blen ? -1 : alen != blen; + +@@ -4157,7 +4771,7 @@ main (int argc, char **argv) initialize_exit_failure (SORT_FAILURE); hard_LC_COLLATE = hard_locale (LC_COLLATE); --#if HAVE_NL_LANGINFO +#if HAVE_LANGINFO_CODESET +-#if HAVE_NL_LANGINFO hard_LC_TIME = hard_locale (LC_TIME); #endif -@@ -4178,6 +4790,29 @@ main (int argc, char **argv) +@@ -4178,6 +4792,29 @@ main (int argc, char **argv) thousands_sep = -1; } @@ -3314,18 +3339,17 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c have_read_stdin = false; inittables (); -@@ -4452,13 +5087,34 @@ main (int argc, char **argv) +@@ -4452,13 +5089,34 @@ main (int argc, char **argv) case 't': { -- char newtab = optarg[0]; -- if (! newtab) + char newtab[MB_LEN_MAX + 1]; + size_t newtab_length = 1; + strncpy (newtab, optarg, MB_LEN_MAX); + if (! newtab[0]) +- char newtab = optarg[0]; +- if (! newtab) error (SORT_FAILURE, 0, _("empty tab")); -- if (optarg[1]) +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) + { @@ -3346,25 +3370,26 @@ diff -urNp coreutils-8.21-orig/src/sort.c coreutils-8.21/src/sort.c + } +#endif + if (newtab_length == 1 && optarg[1]) +- if (optarg[1]) { if (STREQ (optarg, "\\0")) -- newtab = '\0'; + newtab[0] = '\0'; +- newtab = '\0'; else { /* Provoke with 'sort -txx'. Complain about -@@ -4469,9 +5125,12 @@ main (int argc, char **argv) +@@ -4469,9 +5127,12 @@ main (int argc, char **argv) quote (optarg)); } } -- if (tab != TAB_DEFAULT && tab != newtab) + if (tab_length + && (tab_length != newtab_length + || memcmp (tab, newtab, tab_length) != 0)) +- if (tab != TAB_DEFAULT && tab != newtab) error (SORT_FAILURE, 0, _("incompatible tabs")); -- tab = newtab; + memcpy (tab, newtab, newtab_length); + tab_length = newtab_length; +- tab = newtab; } break; diff --git a/coreutils.spec b/coreutils.spec index 621cfb6..69ff506 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,7 +1,7 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils Version: 8.21 -Release: 16%{?dist} +Release: 17%{?dist} License: GPLv3+ Group: System Environment/Base Url: http://www.gnu.org/software/coreutils/ @@ -375,6 +375,9 @@ fi %{_sbindir}/chroot %changelog +* Wed Aug 14 2013 Ondrej Oprala 8.21-17 +- Fix sort multibyte incompatibilities + * Sat Aug 03 2013 Fedora Release Engineering - 8.21-16 - Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild