diff -urNp coreutils-6.12-orig/src/sort.c coreutils-6.12/src/sort.c --- coreutils-6.12-orig/src/sort.c 2009-02-26 16:01:04.000000000 +0100 +++ coreutils-6.12/src/sort.c 2009-02-26 16:24:27.000000000 +0100 @@ -1390,7 +1390,6 @@ begfield_uni (const struct line *line, c char *ptr = line->text, *lim = ptr + line->length - 1; size_t sword = key->sword; size_t schar = key->schar; - size_t remaining_bytes; /* The leading field separator itself is included in a field when -t is absent. */ @@ -1416,12 +1415,7 @@ begfield_uni (const struct line *line, c while (ptr < lim && blanks[to_uchar (*ptr)]) ++ptr; - /* Advance PTR by SCHAR (if possible), but no further than LIM. */ - remaining_bytes = lim - ptr; - if (schar < remaining_bytes) - ptr += schar; - else - ptr = lim; + ptr = MIN (lim, ptr + schar); return ptr; } @@ -1493,7 +1487,9 @@ limfield_uni (const struct line *line, c { char *ptr = line->text, *lim = ptr + line->length - 1; size_t eword = key->eword, echar = key->echar; - size_t remaining_bytes; + + if (echar == 0) + eword++; /* Skip all of end field. */ /* Move PTR past EWORD fields or to one past the last byte on LINE, whichever comes first. If there are more than EWORD fields, leave @@ -1566,19 +1566,13 @@ limfield_uni (const struct line *line, c } #endif - /* If we're ignoring leading blanks when computing the End - of the field, don't start counting bytes until after skipping - past any leading blanks. */ - if (key->skipeblanks) - while (ptr < lim && blanks[to_uchar (*ptr)]) - ++ptr; - - /* Advance PTR by ECHAR (if possible), but no further than LIM. */ - remaining_bytes = lim - ptr; - if (echar < remaining_bytes) - ptr += echar; - else - ptr = lim; + if (echar != 0) /* We need to skip over a portion of the end field. */ + { + if (key->skipeblanks) /* blanks not counted in echar. */ + while (ptr < lim && blanks[to_uchar (*ptr)]) + ++ptr; + ptr = MIN (lim, ptr + echar); + } return ptr; } @@ -3582,12 +3579,9 @@ main (int argc, char **argv) badfieldspec (optarg, N_("field number is zero")); } if (*s == '.') - s = parse_field_count (s + 1, &key->echar, - N_("invalid number after `.'")); - else { - /* `-k 2,3' is equivalent to `+1 -3'. */ - key->eword++; + s = parse_field_count (s + 1, &key->echar, + N_("invalid number after `.'")); } s = set_ordering (s, key, bl_end); } diff -urNp coreutils-6.12-orig/tests/misc/sort coreutils-6.12/tests/misc/sort --- coreutils-6.12-orig/tests/misc/sort 2008-05-17 08:41:12.000000000 +0200 +++ coreutils-6.12/tests/misc/sort 2009-02-26 16:25:39.000000000 +0100 @@ -24,6 +24,10 @@ my $prog = 'sort'; # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; +my $mb_locale = $ENV{LOCALE_FR_UTF8}; +! defined $mb_locale || $mb_locale eq 'none' + and $mb_locale = 'C'; + # Since each test is run with a file name and with redirected stdin, # the name in the diagnostic is either the file name or "-". # Normalize each diagnostic to use '-'. @@ -108,6 +114,8 @@ my @Tests = ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}], ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], +#ensure a character position of 0 includes whole field +["07e", '-k 2,3.0', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}], # # report an error for `.' without following char spec ["08a", '-k 2.,3', {EXIT=>2}, @@ -208,6 +216,15 @@ my @Tests = # key start and key end. ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}], +# When ignoring leading blanks for end position, ensure blanks from +# next field are not included in the sort. I.E. order should not change here. +["18f", '-k1,1b', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}], + +# When ignoring leading blanks for start position, ensure blanks from +# next field are not included in the sort. I.E. order should not change here. +# This was noticed as an issue on fedora 8 (only in multibyte locales). +["18g", '-k1b,1', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}, + {ENV => "LC_ALL=$mb_locale"}], # This looks odd, but works properly -- 2nd keyspec is never # used because all lines are different. ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}],