diff --git a/coreutils-i18n-expand-unexpand.patch b/coreutils-i18n-expand-unexpand.patch new file mode 100644 index 0000000..63813f9 --- /dev/null +++ b/coreutils-i18n-expand-unexpand.patch @@ -0,0 +1,1452 @@ +From 332e9adf944e4ea232a855b1bf75ea4ddfd7e794 Mon Sep 17 00:00:00 2001 +From: Ondrej Oprala +Date: Wed, 5 Aug 2015 09:15:09 +0200 +Subject: [PATCH] expand,unexpand: add multibyte support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +* NEWS: Mention the changes. +* bootstrap.conf: Add mbfile to the list of modules. +* configure.ac: Properly initialize mbfile. +* po/POTFILES.in: Add new source file. +* src/expand-core.c: Move functions common to both expand and +unexpand to this file. +* src/expand-core.h: Add function prototypes from expand-core.c. +* src/expand.c (expand): Iterate over multibyte characters properly. +* src/local.mk: Add expand-core.c to the lists of source codes for +expand and unexpand +* src/unexpand.c (unexpand): Iterate over multibyte characters +properly. +* tests/local.mk: Add new tests. +* tests/{expand,unexpand}/mb.sh: New tests. + +Co-authored-by: Pádraig Brady +--- + NEWS | 3 + + bootstrap.conf | 1 + + configure.ac | 2 + + po/POTFILES.in | 1 + + src/expand-core.c | 150 +++++++++++++++++++++++++++++++++++++++ + src/expand-core.h | 44 ++++++++++++ + src/expand.c | 183 ++++++++++------------------------------------- + src/local.mk | 2 + + src/unexpand.c | 197 ++++++++++++--------------------------------------- + tests/expand/mb.sh | 98 +++++++++++++++++++++++++ + tests/local.mk | 2 + + tests/unexpand/mb.sh | 97 +++++++++++++++++++++++++ + 12 files changed, 482 insertions(+), 298 deletions(-) + create mode 100644 src/expand-core.c + create mode 100644 src/expand-core.h + create mode 100755 tests/expand/mb.sh + create mode 100755 tests/unexpand/mb.sh + +diff --git a/bootstrap.conf b/bootstrap.conf +index ef1c078..ea8cebc 100644 +--- a/bootstrap.conf ++++ b/bootstrap.conf +@@ -152,6 +152,7 @@ gnulib_modules=" + maintainer-makefile + malloc-gnu + manywarnings ++ mbfile + mbrlen + mbrtowc + mbsalign +diff --git a/configure.ac b/configure.ac +index 8dc2192..b8b5114 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -422,6 +422,8 @@ gl_WINSIZE_IN_PTEM + # I'm leaving it here for now. This whole thing needs to be modernized... + gl_WINSIZE_IN_PTEM + ++gl_MBFILE ++ + gl_HEADER_TIOCGWINSZ_IN_TERMIOS_H + + if test $gl_cv_sys_tiocgwinsz_needs_termios_h = no && \ +diff --git a/po/POTFILES.in b/po/POTFILES.in +index b3fe668..c594d20 100644 +--- a/po/POTFILES.in ++++ b/po/POTFILES.in +@@ -57,6 +57,7 @@ src/dirname.c + src/du.c + src/echo.c + src/env.c ++src/expand-core.c + src/expand.c + src/expr.c + src/factor.c +diff --git a/src/expand-core.c b/src/expand-core.c +new file mode 100644 +index 0000000..c8445db +--- /dev/null ++++ b/src/expand-core.c +@@ -0,0 +1,150 @@ ++/* expand-core.c - elementary functions for the expand and unexpand utilities ++ Copyright (C) 1989-2015 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++#include ++ ++#include ++#include ++ ++#include "system.h" ++#include "error.h" ++#include "fadvise.h" ++#include "quote.h" ++#include "xstrndup.h" ++ ++#include "expand-core.h" ++ ++/* Add the comma or blank separated list of tab stops STOPS ++ to the list of tab stops. */ ++ ++extern void ++parse_tab_stops (char const *stops, void (*add_tab_stop)(uintmax_t)) ++{ ++ bool have_tabval = false; ++ uintmax_t tabval IF_LINT ( = 0); ++ char const *num_start IF_LINT ( = NULL); ++ bool ok = true; ++ ++ for (; *stops; stops++) ++ { ++ if (*stops == ',' || isblank (to_uchar (*stops))) ++ { ++ if (have_tabval) ++ add_tab_stop (tabval); ++ have_tabval = false; ++ } ++ else if (ISDIGIT (*stops)) ++ { ++ if (!have_tabval) ++ { ++ tabval = 0; ++ have_tabval = true; ++ num_start = stops; ++ } ++ ++ /* Detect overflow. */ ++ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) ++ { ++ size_t len = strspn (num_start, "0123456789"); ++ char *bad_num = xstrndup (num_start, len); ++ error (0, 0, _("tab stop is too large %s"), quote (bad_num)); ++ free (bad_num); ++ ok = false; ++ stops = num_start + len - 1; ++ } ++ } ++ else ++ { ++ error (0, 0, _("tab size contains invalid character(s): %s"), ++ quote (stops)); ++ ok = false; ++ break; ++ } ++ } ++ ++ if (!ok) ++ exit (EXIT_FAILURE); ++ ++ if (have_tabval) ++ add_tab_stop (tabval); ++} ++ ++/* Check that the list of tab stops TABS, with ENTRIES entries, ++ contains only nonzero, ascending values. */ ++ ++extern void ++validate_tab_stops (uintmax_t const *tabs, size_t entries) ++{ ++ uintmax_t prev_tab = 0; ++ size_t i; ++ ++ for (i = 0; i < entries; i++) ++ { ++ if (tabs[i] == 0) ++ error (EXIT_FAILURE, 0, _("tab size cannot be 0")); ++ if (tabs[i] <= prev_tab) ++ error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); ++ prev_tab = tabs[i]; ++ } ++} ++ ++/* Close the old stream pointer FP if it is non-NULL, ++ and return a new one opened to read the next input file. ++ Open a filename of '-' as the standard input. ++ Return NULL if there are no more input files. */ ++ ++extern FILE * ++next_file (FILE *fp) ++{ ++ static char *prev_file; ++ char *file; ++ ++ if (fp) ++ { ++ if (ferror (fp)) ++ { ++ error (0, errno, "%s", prev_file); ++ exit_status = EXIT_FAILURE; ++ } ++ if (STREQ (prev_file, "-")) ++ clearerr (fp); /* Also clear EOF. */ ++ else if (fclose (fp) != 0) ++ { ++ error (0, errno, "%s", prev_file); ++ exit_status = EXIT_FAILURE; ++ } ++ } ++ ++ while ((file = *file_list++) != NULL) ++ { ++ if (STREQ (file, "-")) ++ { ++ have_read_stdin = true; ++ fp = stdin; ++ } ++ else ++ fp = fopen (file, "r"); ++ if (fp) ++ { ++ prev_file = file; ++ fadvise (fp, FADVISE_SEQUENTIAL); ++ return fp; ++ } ++ error (0, errno, "%s", file); ++ exit_status = EXIT_FAILURE; ++ } ++ return NULL; ++} +diff --git a/src/expand-core.h b/src/expand-core.h +new file mode 100644 +index 0000000..2419407 +--- /dev/null ++++ b/src/expand-core.h +@@ -0,0 +1,41 @@ ++/* expand-core.h - function prototypes for the expand and unexpand utilities ++ Copyright (C) 1989-2015 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++#ifndef EXPAND_CORE_H_ ++# define EXPAND_CORE_H_ ++ ++extern size_t first_free_tab; ++ ++extern size_t n_tabs_allocated; ++ ++extern uintmax_t *tab_list; ++ ++extern int exit_status; ++ ++extern char **file_list; ++ ++extern bool have_read_stdin; ++ ++void ++parse_tab_stops (char const *stops, void (*add_tab_stop)(uintmax_t)); ++ ++void ++validate_tab_stops (uintmax_t const *tabs, size_t entries); ++ ++FILE * ++next_file (FILE *fp); ++ ++#endif /* EXPAND_CORE_H_ */ +diff --git a/src/expand.c b/src/expand.c +index 0a40a1a..ed97fd4 100644 +--- a/src/expand.c ++++ b/src/expand.c +@@ -37,12 +37,16 @@ + #include + #include + #include ++ ++#include ++ + #include "system.h" + #include "error.h" + #include "fadvise.h" +-#include "quote.h" + #include "xstrndup.h" + ++#include "expand-core.h" ++ + /* The official name of this program (e.g., no 'g' prefix). */ + #define PROGRAM_NAME "expand" + +@@ -58,17 +62,17 @@ static uintmax_t tab_size; + /* Array of the explicit column numbers of the tab stops; + after 'tab_list' is exhausted, each additional tab is replaced + by a space. The first column is column 0. */ +-static uintmax_t *tab_list; ++uintmax_t *tab_list; + + /* The number of allocated entries in 'tab_list'. */ +-static size_t n_tabs_allocated; ++size_t n_tabs_allocated; + + /* The index of the first invalid element of 'tab_list', + where the next element can be added. */ +-static size_t first_free_tab; ++size_t first_free_tab; + + /* Null-terminated array of input filenames. */ +-static char **file_list; ++char **file_list; + + /* Default for 'file_list' if no files are given on the command line. */ + static char *stdin_argv[] = +@@ -77,10 +81,10 @@ static char *stdin_argv[] = + }; + + /* True if we have ever read standard input. */ +-static bool have_read_stdin; ++bool have_read_stdin; + + /* The desired exit status. */ +-static int exit_status; ++int exit_status; + + static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::"; + +@@ -125,128 +129,6 @@ + if (first_free_tab == n_tabs_allocated) + tab_list = X2NREALLOC (tab_list, &n_tabs_allocated); + tab_list[first_free_tab++] = tabval; +-} +- +-/* Add the comma or blank separated list of tab stops STOPS +- to the list of tab stops. */ +- +-static void +-parse_tab_stops (char const *stops) +-{ +- bool have_tabval = false; +- uintmax_t tabval IF_LINT ( = 0); +- char const *num_start IF_LINT ( = NULL); +- bool ok = true; +- +- for (; *stops; stops++) +- { +- if (*stops == ',' || isblank (to_uchar (*stops))) +- { +- if (have_tabval) +- add_tab_stop (tabval); +- have_tabval = false; +- } +- else if (ISDIGIT (*stops)) +- { +- if (!have_tabval) +- { +- tabval = 0; +- have_tabval = true; +- num_start = stops; +- } +- +- /* Detect overflow. */ +- if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) +- { +- size_t len = strspn (num_start, "0123456789"); +- char *bad_num = xstrndup (num_start, len); +- error (0, 0, _("tab stop is too large %s"), quote (bad_num)); +- free (bad_num); +- ok = false; +- stops = num_start + len - 1; +- } +- } +- else +- { +- error (0, 0, _("tab size contains invalid character(s): %s"), +- quote (stops)); +- ok = false; +- break; +- } +- } +- +- if (!ok) +- exit (EXIT_FAILURE); +- +- if (have_tabval) +- add_tab_stop (tabval); +-} +- +-/* Check that the list of tab stops TABS, with ENTRIES entries, +- contains only nonzero, ascending values. */ +- +-static void +-validate_tab_stops (uintmax_t const *tabs, size_t entries) +-{ +- uintmax_t prev_tab = 0; +- size_t i; +- +- for (i = 0; i < entries; i++) +- { +- if (tabs[i] == 0) +- error (EXIT_FAILURE, 0, _("tab size cannot be 0")); +- if (tabs[i] <= prev_tab) +- error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); +- prev_tab = tabs[i]; +- } +-} +- +-/* Close the old stream pointer FP if it is non-NULL, +- and return a new one opened to read the next input file. +- Open a filename of '-' as the standard input. +- Return NULL if there are no more input files. */ +- +-static FILE * +-next_file (FILE *fp) +-{ +- static char *prev_file; +- char *file; +- +- if (fp) +- { +- if (ferror (fp)) +- { +- error (0, errno, "%s", prev_file); +- exit_status = EXIT_FAILURE; +- } +- if (STREQ (prev_file, "-")) +- clearerr (fp); /* Also clear EOF. */ +- else if (fclose (fp) != 0) +- { +- error (0, errno, "%s", prev_file); +- exit_status = EXIT_FAILURE; +- } +- } +- +- while ((file = *file_list++) != NULL) +- { +- if (STREQ (file, "-")) +- { +- have_read_stdin = true; +- fp = stdin; +- } +- else +- fp = fopen (file, "r"); +- if (fp) +- { +- prev_file = file; +- fadvise (fp, FADVISE_SEQUENTIAL); +- return fp; +- } +- error (0, errno, "%s", file); +- exit_status = EXIT_FAILURE; +- } +- return NULL; + } + + /* Change tabs to spaces, writing to stdout. +@@ -265,19 +146,19 @@ expand (void) + { + /* Input stream. */ + FILE *fp = next_file (NULL); ++ mb_file_t mbf; ++ mbf_char_t c; + + if (!fp) + return; + ++ mbf_init (mbf, fp); ++ + while (true) + { +- /* Input character, or EOF. */ +- int c; +- + /* If true, perform translations. */ + bool convert = true; + +- + /* The following variables have valid values only when CONVERT + is true: */ + +@@ -287,17 +168,23 @@ expand (void) + /* Index in TAB_LIST of next tab stop to examine. */ + size_t tab_index = 0; + +- + /* Convert a line of text. */ + + do + { +- while ((c = getc (fp)) < 0 && (fp = next_file (fp))) +- continue; ++ do { ++ mbf_getc (c, mbf); ++ if (mb_iseof (c)) ++ { ++ mbf_init (mbf, fp = next_file (fp)); ++ continue; ++ } ++ } ++ while (false); + + if (convert) + { +- if (c == '\t') ++ if (mb_iseq (c, '\t')) + { + /* Column the next input tab stop is on. */ + uintmax_t next_tab_column; +@@ -328,32 +215,34 @@ expand (void) + if (putchar (' ') < 0) + error (EXIT_FAILURE, errno, _("write error")); + +- c = ' '; ++ mb_setascii (&c, ' '); + } +- else if (c == '\b') ++ else if (mb_iseq (c, '\b')) + { + /* Go back one column, and force recalculation of the + next tab stop. */ + column -= !!column; + tab_index -= !!tab_index; + } +- else ++ /* A leading control character could make us trip over. */ ++ else if (!mb_iscntrl (c)) + { +- column++; ++ column += mb_width (c); + if (!column) + error (EXIT_FAILURE, 0, _("input line is too long")); + } + +- convert &= convert_entire_line || !! isblank (c); ++ convert &= convert_entire_line || mb_isblank (c); + } + +- if (c < 0) ++ if (mb_iseof (c)) + return; + +- if (putchar (c) < 0) ++ mb_putc (c, stdout); ++ if (ferror (stdout)) + error (EXIT_FAILURE, errno, _("write error")); + } +- while (c != '\n'); ++ while (!mb_iseq (c, '\n')); + } + } + +@@ -385,19 +274,19 @@ main (int argc, char **argv) + break; + + case 't': +- parse_tab_stops (optarg); ++ parse_tab_stops (optarg, add_tab_stop); + break; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (optarg) +- parse_tab_stops (optarg - 1); ++ parse_tab_stops (optarg - 1, add_tab_stop); + else + { + char tab_stop[2]; + tab_stop[0] = c; + tab_stop[1] = '\0'; +- parse_tab_stops (tab_stop); ++ parse_tab_stops (tab_stop, add_tab_stop); + } + break; + +diff --git a/src/local.mk b/src/local.mk +index 536b7cc..bfede88 100644 +--- a/src/local.mk ++++ b/src/local.mk +@@ -362,6 +362,8 @@ src_coreutils_SOURCES = src/coreutils.c + + src_cp_SOURCES = src/cp.c $(copy_sources) $(selinux_sources) + src_dir_SOURCES = src/ls.c src/ls-dir.c ++src_expand_SOURCES = src/expand.c src/expand-core.c ++src_unexpand_SOURCES = src/unexpand.c src/expand-core.c + src_vdir_SOURCES = src/ls.c src/ls-vdir.c + src_id_SOURCES = src/id.c src/group-list.c + src_groups_SOURCES = src/groups.c src/group-list.c +diff --git a/src/unexpand.c b/src/unexpand.c +index e0f7c22..48fbb32 100644 +--- a/src/unexpand.c ++++ b/src/unexpand.c +@@ -38,12 +38,16 @@ + #include + #include + #include ++ ++#include ++ + #include "system.h" + #include "error.h" + #include "fadvise.h" +-#include "quote.h" + #include "xstrndup.h" + ++#include "expand-core.h" ++ + /* The official name of this program (e.g., no 'g' prefix). */ + #define PROGRAM_NAME "unexpand" + +@@ -62,17 +66,17 @@ static size_t max_column_width; + /* Array of the explicit column numbers of the tab stops; + after 'tab_list' is exhausted, the rest of the line is printed + unchanged. The first column is column 0. */ +-static uintmax_t *tab_list; ++uintmax_t *tab_list; + + /* The number of allocated entries in 'tab_list'. */ +-static size_t n_tabs_allocated; ++size_t n_tabs_allocated; + + /* The index of the first invalid element of 'tab_list', + where the next element can be added. */ +-static size_t first_free_tab; ++size_t first_free_tab; + + /* Null-terminated array of input filenames. */ +-static char **file_list; ++char **file_list; + + /* Default for 'file_list' if no files are given on the command line. */ + static char *stdin_argv[] = +@@ -81,10 +85,10 @@ static char *stdin_argv[] = + }; + + /* True if we have ever read standard input. */ +-static bool have_read_stdin; ++bool have_read_stdin; + + /* The desired exit status. */ +-static int exit_status; ++int exit_status; + + /* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +@@ -154,128 +156,6 @@ add_tab_stop (uintmax_t tabval) + } + } + +-/* Add the comma or blank separated list of tab stops STOPS +- to the list of tab stops. */ +- +-static void +-parse_tab_stops (char const *stops) +-{ +- bool have_tabval = false; +- uintmax_t tabval IF_LINT ( = 0); +- char const *num_start IF_LINT ( = NULL); +- bool ok = true; +- +- for (; *stops; stops++) +- { +- if (*stops == ',' || isblank (to_uchar (*stops))) +- { +- if (have_tabval) +- add_tab_stop (tabval); +- have_tabval = false; +- } +- else if (ISDIGIT (*stops)) +- { +- if (!have_tabval) +- { +- tabval = 0; +- have_tabval = true; +- num_start = stops; +- } +- +- /* Detect overflow. */ +- if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) +- { +- size_t len = strspn (num_start, "0123456789"); +- char *bad_num = xstrndup (num_start, len); +- error (0, 0, _("tab stop is too large %s"), quote (bad_num)); +- free (bad_num); +- ok = false; +- stops = num_start + len - 1; +- } +- } +- else +- { +- error (0, 0, _("tab size contains invalid character(s): %s"), +- quote (stops)); +- ok = false; +- break; +- } +- } +- +- if (!ok) +- exit (EXIT_FAILURE); +- +- if (have_tabval) +- add_tab_stop (tabval); +-} +- +-/* Check that the list of tab stops TABS, with ENTRIES entries, +- contains only nonzero, ascending values. */ +- +-static void +-validate_tab_stops (uintmax_t const *tabs, size_t entries) +-{ +- uintmax_t prev_tab = 0; +- size_t i; +- +- for (i = 0; i < entries; i++) +- { +- if (tabs[i] == 0) +- error (EXIT_FAILURE, 0, _("tab size cannot be 0")); +- if (tabs[i] <= prev_tab) +- error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); +- prev_tab = tabs[i]; +- } +-} +- +-/* Close the old stream pointer FP if it is non-NULL, +- and return a new one opened to read the next input file. +- Open a filename of '-' as the standard input. +- Return NULL if there are no more input files. */ +- +-static FILE * +-next_file (FILE *fp) +-{ +- static char *prev_file; +- char *file; +- +- if (fp) +- { +- if (ferror (fp)) +- { +- error (0, errno, "%s", prev_file); +- exit_status = EXIT_FAILURE; +- } +- if (STREQ (prev_file, "-")) +- clearerr (fp); /* Also clear EOF. */ +- else if (fclose (fp) != 0) +- { +- error (0, errno, "%s", prev_file); +- exit_status = EXIT_FAILURE; +- } +- } +- +- while ((file = *file_list++) != NULL) +- { +- if (STREQ (file, "-")) +- { +- have_read_stdin = true; +- fp = stdin; +- } +- else +- fp = fopen (file, "r"); +- if (fp) +- { +- prev_file = file; +- fadvise (fp, FADVISE_SEQUENTIAL); +- return fp; +- } +- error (0, errno, "%s", file); +- exit_status = EXIT_FAILURE; +- } +- return NULL; +-} +- + /* Change blanks to tabs, writing to stdout. + Read each file in 'file_list', in order. */ + +@@ -284,11 +164,12 @@ unexpand (void) + { + /* Input stream. */ + FILE *fp = next_file (NULL); ++ mb_file_t mbf; + + /* The array of pending blanks. In non-POSIX locales, blanks can + include characters other than spaces, so the blanks must be + stored, not merely counted. */ +- char *pending_blank; ++ mbf_char_t *pending_blank; + + if (!fp) + return; +@@ -296,12 +177,14 @@ unexpand (void) + /* The worst case is a non-blank character, then one blank, then a + tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so + allocate MAX_COLUMN_WIDTH bytes to store the blanks. */ +- pending_blank = xmalloc (max_column_width); ++ pending_blank = xmalloc (max_column_width * sizeof (mbf_char_t)); ++ ++ mbf_init (mbf, fp); + + while (true) + { + /* Input character, or EOF. */ +- int c; ++ mbf_char_t c; + + /* If true, perform translations. */ + bool convert = true; +@@ -335,12 +218,19 @@ unexpand (void) + + do + { +- while ((c = getc (fp)) < 0 && (fp = next_file (fp))) +- continue; ++ do { ++ mbf_getc (c, mbf); ++ if (mb_iseof (c)) ++ { ++ mbf_init (mbf, fp = next_file (fp)); ++ continue; ++ } ++ } ++ while (false); + + if (convert) + { +- bool blank = !! isblank (c); ++ bool blank = mb_isblank (c); + + if (blank) + { +@@ -372,16 +262,16 @@ unexpand (void) + if (next_tab_column < column) + error (EXIT_FAILURE, 0, _("input line is too long")); + +- if (c == '\t') ++ if (mb_iseq (c, '\t')) + { + column = next_tab_column; + + if (pending) +- pending_blank[0] = '\t'; ++ mb_setascii (&pending_blank[0], '\t'); + } + else + { +- column++; ++ column += mb_width (c); + + if (! (prev_blank && column == next_tab_column)) + { +@@ -389,13 +279,14 @@ unexpand (void) + will be replaced by tabs. */ + if (column == next_tab_column) + one_blank_before_tab_stop = true; +- pending_blank[pending++] = c; ++ mb_copy (&pending_blank[pending++], &c); + prev_blank = true; + continue; + } + + /* Replace the pending blanks by a tab or two. */ +- pending_blank[0] = c = '\t'; ++ mb_setascii (&c, '\t'); ++ mb_setascii (&pending_blank[0], '\t'); + } + + /* Discard pending blanks, unless it was a single +@@ -403,7 +294,7 @@ unexpand (void) + pending = one_blank_before_tab_stop; + } + } +- else if (c == '\b') ++ else if (mb_iseq (c, '\b')) + { + /* Go back one column, and force recalculation of the + next tab stop. */ +@@ -413,7 +304,7 @@ unexpand (void) + } + else + { +- column++; ++ column += mb_width (c); + if (!column) + error (EXIT_FAILURE, 0, _("input line is too long")); + } +@@ -421,9 +312,13 @@ unexpand (void) + if (pending) + { + if (pending > 1 && one_blank_before_tab_stop) +- pending_blank[0] = '\t'; +- if (fwrite (pending_blank, 1, pending, stdout) != pending) ++ mb_setascii (&pending_blank[0], '\t'); ++ ++ for (int n = 0; n < pending; ++n) ++ mb_putc (pending_blank[n], stdout); ++ if (ferror (stdout)) + error (EXIT_FAILURE, errno, _("write error")); ++ + pending = 0; + one_blank_before_tab_stop = false; + } +@@ -432,16 +327,16 @@ unexpand (void) + convert &= convert_entire_line || blank; + } + +- if (c < 0) ++ if (mb_iseof (c)) + { + free (pending_blank); + return; + } +- +- if (putchar (c) < 0) ++ mb_putc (c, stdout); ++ if (ferror (stdout)) + error (EXIT_FAILURE, errno, _("write error")); + } +- while (c != '\n'); ++ while (!mb_iseq (c, '\n')); + } + } + +@@ -482,7 +377,7 @@ main (int argc, char **argv) + break; + case 't': + convert_entire_line = true; +- parse_tab_stops (optarg); ++ parse_tab_stops (optarg, add_tab_stop); + break; + case CONVERT_FIRST_ONLY_OPTION: + convert_first_only = true; +diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh +new file mode 100755 +index 0000000..7971e18 +--- /dev/null ++++ b/tests/expand/mb.sh +@@ -0,0 +1,98 @@ ++#!/bin/sh ++ ++# Copyright (C) 2012-2015 Free Software Foundation, Inc. ++ ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src ++print_ver_ expand ++ ++export LC_ALL=en_US.UTF-8 ++ ++#input containing multibyte characters ++cat <<\EOF > in || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++EOF ++env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_ ++ ++cat <<\EOF > exp || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++EOF ++ ++expand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++#test characters with display widths != 1 ++env printf '12345678 ++e\t|ascii(1) ++\u00E9\t|composed(1) ++e\u0301\t|decomposed(1) ++\u3000\t|ideo-space(2) ++\uFF0D\t|full-hypen(2) ++' > in || framework_failure_ ++ ++env printf '12345678 ++e |ascii(1) ++\u00E9 |composed(1) ++e\u0301 |decomposed(1) ++\u3000 |ideo-space(2) ++\uFF0D |full-hypen(2) ++' > exp || framework_failure_ ++ ++expand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++#shouldn't fail with "input line too long" ++#when a line starts with a control character ++env printf '\n' > in || framework_failure_ ++ ++expand < in > out || fail=1 ++compare in out > /dev/null 2>&1 || fail=1 ++ ++#non-Unicode characters interspersed between Unicode ones ++env printf '12345678 ++\t\xFF| ++\xFF\t| ++\t\xFFä| ++ä\xFF\t| ++\tä\xFF| ++\xFF\tä| ++äbcdef\xFF\t| ++' > in || framework_failure_ ++ ++env printf '12345678 ++ \xFF| ++\xFF | ++ \xFFä| ++ä\xFF | ++ ä\xFF| ++\xFF ä| ++äbcdef\xFF | ++' > exp || framework_failure_ ++ ++expand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++exit $fail +diff --git a/tests/local.mk b/tests/local.mk +index 7df04da..d3462be 100644 +--- a/tests/local.mk ++++ b/tests/local.mk +@@ -532,6 +532,7 @@ all_tests = \ + tests/du/threshold.sh \ + tests/du/trailing-slash.sh \ + tests/du/two-args.sh \ ++ tests/expand/mb.sh \ + tests/id/gnu-zero-uids.sh \ + tests/id/no-context.sh \ + tests/id/context.sh \ +@@ -671,6 +672,7 @@ all_tests = \ + tests/touch/read-only.sh \ + tests/touch/relative.sh \ + tests/touch/trailing-slash.sh \ ++ tests/unexpand/mb.sh \ + $(all_root_tests) + + # See tests/factor/create-test.sh. +diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh +new file mode 100755 +index 0000000..60d4c1a +--- /dev/null ++++ b/tests/unexpand/mb.sh +@@ -0,0 +1,97 @@ ++#!/bin/sh ++ ++# Copyright (C) 2012-2015 Free Software Foundation, Inc. ++ ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src ++print_ver_ unexpand ++ ++export LC_ALL=en_US.UTF-8 ++ ++#input containing multibyte characters ++cat > in <<\EOF ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++EOF ++ ++cat > exp <<\EOF ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++EOF ++ ++unexpand -a < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++#test characters with a display width larger than 1 ++ ++env printf '12345678 ++e |ascii(1) ++\u00E9 |composed(1) ++e\u0301 |decomposed(1) ++\u3000 |ideo-space(2) ++\uFF0D |full-hypen(2) ++' > in || framework_failure_ ++ ++env printf '12345678 ++e\t|ascii(1) ++\u00E9\t|composed(1) ++e\u0301\t|decomposed(1) ++\u3000\t|ideo-space(2) ++\uFF0D\t|full-hypen(2) ++' > exp || framework_failure_ ++ ++unexpand -a < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++#test input where a blank of width > 1 is not being substituted ++in="$(LC_ALL=en_US.UTF-8 printf ' \u3000 ö ü ß')" ++exp='   ö ü ß' ++ ++unexpand -a < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++#non-Unicode characters interspersed between Unicode ones ++env printf '12345678 ++ \xFF| ++\xFF | ++ \xFFä| ++ä\xFF | ++ ä\xFF| ++\xFF ä| ++äbcdef\xFF | ++' > in || framework_failure_ ++ ++env printf '12345678 ++\t\xFF| ++\xFF\t| ++\t\xFFä| ++ä\xFF\t| ++\tä\xFF| ++\xFF\tä| ++äbcdef\xFF\t| ++' > exp || framework_failure_ ++ ++unexpand -a < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 +-- +2.4.3 + +--- /dev/null 2015-11-30 08:40:17.566742513 +0100 ++++ coreutils-8.24/m4/mbfile.m4 2015-12-01 09:30:55.951149907 +0100 +@@ -0,0 +1,14 @@ ++# mbfile.m4 serial 7 ++dnl Copyright (C) 2005, 2008-2015 Free Software Foundation, Inc. ++dnl This file is free software; the Free Software Foundation ++dnl gives unlimited permission to copy and/or distribute it, ++dnl with or without modifications, as long as this notice is preserved. ++ ++dnl autoconf tests required for use of mbfile.h ++dnl From Bruno Haible. ++ ++AC_DEFUN([gl_MBFILE], ++[ ++ AC_REQUIRE([AC_TYPE_MBSTATE_T]) ++ : ++]) +--- /dev/null 2015-11-30 08:40:17.566742513 +0100 ++++ coreutils-8.24/lib/mbfile.c 2015-12-01 09:28:22.254928468 +0100 +@@ -0,0 +1,3 @@ ++#include ++#define MBFILE_INLINE _GL_EXTERN_INLINE ++#include "mbfile.h" +--- /dev/null 2015-11-30 08:40:17.566742513 +0100 ++++ coreutils-8.24/lib/mbfile.h 2015-12-01 09:28:30.829885570 +0100 +@@ -0,0 +1,255 @@ ++/* Multibyte character I/O: macros for multi-byte encodings. ++ Copyright (C) 2001, 2005, 2009-2015 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Written by Mitsuru Chinen ++ and Bruno Haible . */ ++ ++/* The macros in this file implement multi-byte character input from a ++ stream. ++ ++ mb_file_t ++ is the type for multibyte character input stream, usable for variable ++ declarations. ++ ++ mbf_char_t ++ is the type for multibyte character or EOF, usable for variable ++ declarations. ++ ++ mbf_init (mbf, stream) ++ initializes the MB_FILE for reading from stream. ++ ++ mbf_getc (mbc, mbf) ++ reads the next multibyte character from mbf and stores it in mbc. ++ ++ mb_iseof (mbc) ++ returns true if mbc represents the EOF value. ++ ++ Here are the function prototypes of the macros. ++ ++ extern void mbf_init (mb_file_t mbf, FILE *stream); ++ extern void mbf_getc (mbf_char_t mbc, mb_file_t mbf); ++ extern bool mb_iseof (const mbf_char_t mbc); ++ */ ++ ++#ifndef _MBFILE_H ++#define _MBFILE_H 1 ++ ++#include ++#include ++#include ++#include ++ ++/* Tru64 with Desktop Toolkit C has a bug: must be included before ++ . ++ BSD/OS 4.1 has a bug: and must be included before ++ . */ ++#include ++#include ++#include ++ ++#include "mbchar.h" ++ ++#ifndef _GL_INLINE_HEADER_BEGIN ++ #error "Please include config.h first." ++#endif ++_GL_INLINE_HEADER_BEGIN ++#ifndef MBFILE_INLINE ++# define MBFILE_INLINE _GL_INLINE ++#endif ++ ++struct mbfile_multi { ++ FILE *fp; ++ bool eof_seen; ++ bool have_pushback; ++ mbstate_t state; ++ unsigned int bufcount; ++ char buf[MBCHAR_BUF_SIZE]; ++ struct mbchar pushback; ++}; ++ ++MBFILE_INLINE void ++mbfile_multi_getc (struct mbchar *mbc, struct mbfile_multi *mbf) ++{ ++ size_t bytes; ++ ++ /* If EOF has already been seen, don't use getc. This matters if ++ mbf->fp is connected to an interactive tty. */ ++ if (mbf->eof_seen) ++ goto eof; ++ ++ /* Return character pushed back, if there is one. */ ++ if (mbf->have_pushback) ++ { ++ mb_copy (mbc, &mbf->pushback); ++ mbf->have_pushback = false; ++ return; ++ } ++ ++ /* Before using mbrtowc, we need at least one byte. */ ++ if (mbf->bufcount == 0) ++ { ++ int c = getc (mbf->fp); ++ if (c == EOF) ++ { ++ mbf->eof_seen = true; ++ goto eof; ++ } ++ mbf->buf[0] = (unsigned char) c; ++ mbf->bufcount++; ++ } ++ ++ /* Handle most ASCII characters quickly, without calling mbrtowc(). */ ++ if (mbf->bufcount == 1 && mbsinit (&mbf->state) && is_basic (mbf->buf[0])) ++ { ++ /* These characters are part of the basic character set. ISO C 99 ++ guarantees that their wide character code is identical to their ++ char code. */ ++ mbc->wc = mbc->buf[0] = mbf->buf[0]; ++ mbc->wc_valid = true; ++ mbc->ptr = &mbc->buf[0]; ++ mbc->bytes = 1; ++ mbf->bufcount = 0; ++ return; ++ } ++ ++ /* Use mbrtowc on an increasing number of bytes. Read only as many bytes ++ from mbf->fp as needed. This is needed to give reasonable interactive ++ behaviour when mbf->fp is connected to an interactive tty. */ ++ for (;;) ++ { ++ /* We don't know whether the 'mbrtowc' function updates the state when ++ it returns -2, - this is the ISO C 99 and glibc-2.2 behaviour - or ++ not - amended ANSI C, glibc-2.1 and Solaris 2.7 behaviour. We ++ don't have an autoconf test for this, yet. ++ The new behaviour would allow us to feed the bytes one by one into ++ mbrtowc. But the old behaviour forces us to feed all bytes since ++ the end of the last character into mbrtowc. Since we want to retry ++ with more bytes when mbrtowc returns -2, we must backup the state ++ before calling mbrtowc, because implementations with the new ++ behaviour will clobber it. */ ++ mbstate_t backup_state = mbf->state; ++ ++ bytes = mbrtowc (&mbc->wc, &mbf->buf[0], mbf->bufcount, &mbf->state); ++ ++ if (bytes == (size_t) -1) ++ { ++ /* An invalid multibyte sequence was encountered. */ ++ /* Return a single byte. */ ++ bytes = 1; ++ mbc->wc_valid = false; ++ break; ++ } ++ else if (bytes == (size_t) -2) ++ { ++ /* An incomplete multibyte character. */ ++ mbf->state = backup_state; ++ if (mbf->bufcount == MBCHAR_BUF_SIZE) ++ { ++ /* An overlong incomplete multibyte sequence was encountered. */ ++ /* Return a single byte. */ ++ bytes = 1; ++ mbc->wc_valid = false; ++ break; ++ } ++ else ++ { ++ /* Read one more byte and retry mbrtowc. */ ++ int c = getc (mbf->fp); ++ if (c == EOF) ++ { ++ /* An incomplete multibyte character at the end. */ ++ mbf->eof_seen = true; ++ bytes = mbf->bufcount; ++ mbc->wc_valid = false; ++ break; ++ } ++ mbf->buf[mbf->bufcount] = (unsigned char) c; ++ mbf->bufcount++; ++ } ++ } ++ else ++ { ++ if (bytes == 0) ++ { ++ /* A null wide character was encountered. */ ++ bytes = 1; ++ assert (mbf->buf[0] == '\0'); ++ assert (mbc->wc == 0); ++ } ++ mbc->wc_valid = true; ++ break; ++ } ++ } ++ ++ /* Return the multibyte sequence mbf->buf[0..bytes-1]. */ ++ mbc->ptr = &mbc->buf[0]; ++ memcpy (&mbc->buf[0], &mbf->buf[0], bytes); ++ mbc->bytes = bytes; ++ ++ mbf->bufcount -= bytes; ++ if (mbf->bufcount > 0) ++ { ++ /* It's not worth calling memmove() for so few bytes. */ ++ unsigned int count = mbf->bufcount; ++ char *p = &mbf->buf[0]; ++ ++ do ++ { ++ *p = *(p + bytes); ++ p++; ++ } ++ while (--count > 0); ++ } ++ return; ++ ++eof: ++ /* An mbchar_t with bytes == 0 is used to indicate EOF. */ ++ mbc->ptr = NULL; ++ mbc->bytes = 0; ++ mbc->wc_valid = false; ++ return; ++} ++ ++MBFILE_INLINE void ++mbfile_multi_ungetc (const struct mbchar *mbc, struct mbfile_multi *mbf) ++{ ++ mb_copy (&mbf->pushback, mbc); ++ mbf->have_pushback = true; ++} ++ ++typedef struct mbfile_multi mb_file_t; ++ ++typedef mbchar_t mbf_char_t; ++ ++#define mbf_init(mbf, stream) \ ++ ((mbf).fp = (stream), \ ++ (mbf).eof_seen = false, \ ++ (mbf).have_pushback = false, \ ++ memset (&(mbf).state, '\0', sizeof (mbstate_t)), \ ++ (mbf).bufcount = 0) ++ ++#define mbf_getc(mbc, mbf) mbfile_multi_getc (&(mbc), &(mbf)) ++ ++#define mbf_ungetc(mbc, mbf) mbfile_multi_ungetc (&(mbc), &(mbf)) ++ ++#define mb_iseof(mbc) ((mbc).bytes == 0) ++ ++#ifndef _GL_INLINE_HEADER_BEGIN ++ #error "Please include config.h first." ++#endif ++_GL_INLINE_HEADER_BEGIN ++ ++#endif /* _MBFILE_H */ diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index f823a40..5d3a591 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -596,201 +596,6 @@ diff -urNp coreutils-8.24-orig/src/cut.c coreutils-8.24/src/cut.c } if (optind == argc) -diff -urNp coreutils-8.24-orig/src/expand.c coreutils-8.24/src/expand.c ---- coreutils-8.24-orig/src/expand.c 2015-06-26 19:05:22.000000000 +0200 -+++ coreutils-8.24/src/expand.c 2015-07-05 09:04:33.028546950 +0200 -@@ -37,12 +37,34 @@ - #include - #include - #include -+ -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ -+/* Get iswblank(). */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+ - #include "system.h" - #include "error.h" - #include "fadvise.h" - #include "quote.h" - #include "xstrndup.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "expand" - -@@ -357,6 +379,142 @@ expand (void) - } - } - -+#if HAVE_MBRTOWC -+static void -+expand_multibyte (void) -+{ -+ FILE *fp; /* Input strem. */ -+ mbstate_t i_state; /* Current shift state of the input stream. */ -+ mbstate_t i_state_bak; /* Back up the I_STATE. */ -+ mbstate_t o_state; /* Current shift state of the output stream. */ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos = buf; /* Next read position of BUF. */ -+ size_t buflen = 0; /* The length of the byte sequence in buf. */ -+ wchar_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character -+ which shows as same character as WC. */ -+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */ -+ int column = 0; /* Column on screen of the next char. */ -+ int next_tab_column; /* Column the next tab stop is on. */ -+ int convert = 1; /* If nonzero, perform translations. */ -+ -+ fp = next_file ((FILE *) NULL); -+ if (fp == NULL) -+ return; -+ -+ memset (&o_state, '\0', sizeof(mbstate_t)); -+ memset (&i_state, '\0', sizeof(mbstate_t)); -+ -+ for (;;) -+ { -+ /* Refill the buffer BUF. */ -+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) -+ { -+ memmove (buf, bufpos, buflen); -+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); -+ bufpos = buf; -+ } -+ -+ /* No character is left in BUF. */ -+ if (buflen < 1) -+ { -+ fp = next_file (fp); -+ -+ if (fp == NULL) -+ break; /* No more files. */ -+ else -+ { -+ memset (&i_state, '\0', sizeof(mbstate_t)); -+ continue; -+ } -+ } -+ -+ /* Get a wide character. */ -+ i_state_bak = i_state; -+ mblength = mbrtowc (&wc, bufpos, buflen, &i_state); -+ -+ switch (mblength) -+ { -+ case (size_t)-1: /* illegal byte sequence. */ -+ case (size_t)-2: -+ mblength = 1; -+ i_state = i_state_bak; -+ if (convert) -+ { -+ ++column; -+ if (convert_entire_line == 0 && !isblank(*bufpos)) -+ convert = 0; -+ } -+ putchar (*bufpos); -+ break; -+ -+ case 0: /* null. */ -+ mblength = 1; -+ if (convert && convert_entire_line == 0) -+ convert = 0; -+ putchar ('\0'); -+ break; -+ -+ default: -+ if (wc == L'\n') /* LF. */ -+ { -+ tab_index = 0; -+ column = 0; -+ convert = 1; -+ putchar ('\n'); -+ } -+ else if (wc == L'\t' && convert) /* Tab. */ -+ { -+ if (tab_size == 0) -+ { -+ /* Do not let tab_index == first_free_tab; -+ stop when it is 1 less. */ -+ while (tab_index < first_free_tab - 1 -+ && column >= tab_list[tab_index]) -+ tab_index++; -+ next_tab_column = tab_list[tab_index]; -+ if (tab_index < first_free_tab - 1) -+ tab_index++; -+ if (column >= next_tab_column) -+ next_tab_column = column + 1; -+ } -+ else -+ next_tab_column = column + tab_size - column % tab_size; -+ -+ while (column < next_tab_column) -+ { -+ putchar (' '); -+ ++column; -+ } -+ } -+ else /* Others. */ -+ { -+ if (convert) -+ { -+ if (wc == L'\b') -+ { -+ if (column > 0) -+ --column; -+ } -+ else -+ { -+ int width; /* The width of WC. */ -+ -+ width = wcwidth (wc); -+ column += (width > 0) ? width : 0; -+ if (convert_entire_line == 0 && !iswblank(wc)) -+ convert = 0; -+ } -+ } -+ fwrite (bufpos, sizeof(char), mblength, stdout); -+ } -+ } -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+} -+#endif -+ - int - main (int argc, char **argv) - { -@@ -421,7 +579,12 @@ main (int argc, char **argv) - - file_list = (optind < argc ? &argv[optind] : stdin_argv); - -- expand (); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ expand_multibyte (); -+ else -+#endif -+ expand (); - - if (have_read_stdin && fclose (stdin) != 0) - error (EXIT_FAILURE, errno, "-"); diff -urNp coreutils-8.24-orig/src/fold.c coreutils-8.24/src/fold.c --- coreutils-8.24-orig/src/fold.c 2015-06-26 19:05:22.000000000 +0200 +++ coreutils-8.24/src/fold.c 2015-07-05 09:04:33.029546958 +0200 @@ -3480,264 +3285,6 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c if (have_read_stdin && fclose (stdin) == EOF) die (_("close failed"), "-"); -diff -urNp coreutils-8.24-orig/src/unexpand.c coreutils-8.24/src/unexpand.c ---- coreutils-8.24-orig/src/unexpand.c 2015-06-26 19:05:22.000000000 +0200 -+++ coreutils-8.24/src/unexpand.c 2015-07-05 09:04:33.032546980 +0200 -@@ -38,12 +38,29 @@ - #include - #include - #include -+ -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ - #include "system.h" - #include "error.h" - #include "fadvise.h" - #include "quote.h" - #include "xstrndup.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "unexpand" - -@@ -103,6 +120,210 @@ static struct option const longopts[] = - {NULL, 0, NULL, 0} - }; - -+static FILE *next_file (FILE *fp); -+ -+#if HAVE_MBRTOWC -+static void -+unexpand_multibyte (void) -+{ -+ FILE *fp; /* Input stream. */ -+ mbstate_t i_state; /* Current shift state of the input stream. */ -+ mbstate_t i_state_bak; /* Back up the I_STATE. */ -+ mbstate_t o_state; /* Current shift state of the output stream. */ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos = buf; /* Next read position of BUF. */ -+ size_t buflen = 0; /* The length of the byte sequence in buf. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character -+ which shows as same character as WC. */ -+ bool prev_tab = false; -+ -+ /* Index in `tab_list' of next tabstop: */ -+ int tab_index = 0; /* For calculating width of pending tabs. */ -+ int print_tab_index = 0; /* For printing as many tabs as possible. */ -+ unsigned int column = 0; /* Column on screen of next char. */ -+ int next_tab_column; /* Column the next tab stop is on. */ -+ int convert = 1; /* If nonzero, perform translations. */ -+ unsigned int pending = 0; /* Pending columns of blanks. */ -+ -+ fp = next_file ((FILE *) NULL); -+ if (fp == NULL) -+ return; -+ -+ memset (&o_state, '\0', sizeof(mbstate_t)); -+ memset (&i_state, '\0', sizeof(mbstate_t)); -+ -+ for (;;) -+ { -+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) -+ { -+ memmove (buf, bufpos, buflen); -+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); -+ bufpos = buf; -+ } -+ -+ /* Get a wide character. */ -+ if (buflen < 1) -+ { -+ mblength = 1; -+ wc = WEOF; -+ } -+ else -+ { -+ i_state_bak = i_state; -+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state); -+ } -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ i_state = i_state_bak; -+ wc = L'\0'; -+ } -+ -+ if (wc == L' ' && convert && column < INT_MAX) -+ { -+ ++pending; -+ ++column; -+ } -+ else if (wc == L'\t' && convert) -+ { -+ if (tab_size == 0) -+ { -+ /* Do not let tab_index == first_free_tab; -+ stop when it is 1 less. */ -+ while (tab_index < first_free_tab - 1 -+ && column >= tab_list[tab_index]) -+ tab_index++; -+ next_tab_column = tab_list[tab_index]; -+ if (tab_index < first_free_tab - 1) -+ tab_index++; -+ if (column >= next_tab_column) -+ { -+ convert = 0; /* Ran out of tab stops. */ -+ goto flush_pend_mb; -+ } -+ } -+ else -+ { -+ next_tab_column = column + tab_size - column % tab_size; -+ } -+ pending += next_tab_column - column; -+ column = next_tab_column; -+ } -+ else -+ { -+flush_pend_mb: -+ /* Flush pending spaces. Print as many tabs as possible, -+ then print the rest as spaces. */ -+ if (pending == 1 && column != 1 && !prev_tab) -+ { -+ putchar (' '); -+ pending = 0; -+ } -+ column -= pending; -+ while (pending > 0) -+ { -+ if (tab_size == 0) -+ { -+ /* Do not let print_tab_index == first_free_tab; -+ stop when it is 1 less. */ -+ while (print_tab_index < first_free_tab - 1 -+ && column >= tab_list[print_tab_index]) -+ print_tab_index++; -+ next_tab_column = tab_list[print_tab_index]; -+ if (print_tab_index < first_free_tab - 1) -+ print_tab_index++; -+ } -+ else -+ { -+ next_tab_column = -+ column + tab_size - column % tab_size; -+ } -+ if (next_tab_column - column <= pending) -+ { -+ putchar ('\t'); -+ pending -= next_tab_column - column; -+ column = next_tab_column; -+ } -+ else -+ { -+ --print_tab_index; -+ column += pending; -+ while (pending != 0) -+ { -+ putchar (' '); -+ pending--; -+ } -+ } -+ } -+ -+ if (wc == WEOF) -+ { -+ fp = next_file (fp); -+ if (fp == NULL) -+ break; /* No more files. */ -+ else -+ { -+ memset (&i_state, '\0', sizeof(mbstate_t)); -+ continue; -+ } -+ } -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ if (convert) -+ { -+ ++column; -+ if (convert_entire_line == 0) -+ convert = 0; -+ } -+ mblength = 1; -+ putchar (buf[0]); -+ } -+ else if (mblength == 0) -+ { -+ if (convert && convert_entire_line == 0) -+ convert = 0; -+ mblength = 1; -+ putchar ('\0'); -+ } -+ else -+ { -+ if (convert) -+ { -+ if (wc == L'\b') -+ { -+ if (column > 0) -+ --column; -+ } -+ else -+ { -+ int width; /* The width of WC. */ -+ -+ width = wcwidth (wc); -+ column += (width > 0) ? width : 0; -+ if (convert_entire_line == 0) -+ convert = 0; -+ } -+ } -+ -+ if (wc == L'\n') -+ { -+ tab_index = print_tab_index = 0; -+ column = pending = 0; -+ convert = 1; -+ } -+ fwrite (bufpos, sizeof(char), mblength, stdout); -+ } -+ } -+ prev_tab = wc == L'\t'; -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+} -+#endif -+ -+ - void - usage (int status) - { -@@ -523,7 +744,12 @@ main (int argc, char **argv) - - file_list = (optind < argc ? &argv[optind] : stdin_argv); - -- unexpand (); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ unexpand_multibyte (); -+ else -+#endif -+ unexpand (); - - if (have_read_stdin && fclose (stdin) != 0) - error (EXIT_FAILURE, errno, "-"); diff -urNp coreutils-8.24-orig/src/uniq.c coreutils-8.24/src/uniq.c --- coreutils-8.24-orig/src/uniq.c 2015-06-26 19:04:19.000000000 +0200 +++ coreutils-8.24/src/uniq.c 2015-07-05 09:04:33.032546980 +0200 diff --git a/coreutils.spec b/coreutils.spec index b0c13dc..d2d7c1c 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,7 +1,7 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils Version: 8.24 -Release: 101%{?dist} +Release: 102%{?dist} License: GPLv3+ Group: System Environment/Base Url: http://www.gnu.org/software/coreutils/ @@ -41,6 +41,8 @@ Patch713: coreutils-4.5.3-langinfo.patch # (sb) lin18nux/lsb compliance - multibyte functionality patch Patch800: coreutils-i18n.patch +# (sb) lin18nux/lsb compliance - expand/unexpand +Patch801: coreutils-i18n-expand-unexpand.patch #getgrouplist() patch from Ulrich Drepper. Patch908: coreutils-getgrouplist.patch @@ -171,6 +173,7 @@ including documentation and translations. # li18nux/lsb %patch800 -p1 -b .i18n +%patch801 -p1 -b .i18n-expand # Coreutils %patch908 -p1 -b .getgrouplist @@ -339,6 +342,9 @@ fi %{_mandir}/man*/* %changelog +* Tue Dec 01 2015 Ondrej Oprala - 8.24-102 +- Use the new i18n implementation for expand/unexpand + * Mon Nov 30 2015 Ondrej Vasik - 8.24-101 - coreutils-single should provide versioned coreutils (#1286338)