diff --git a/src/diff.c b/src/diff.c
index df3338c..60c6ce7 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -76,6 +76,8 @@ static void try_help (char const *, char const *) __attribute__((noreturn));
static void check_stdout (void);
static void usage (void);
+bool (*lines_differ) (char const *, size_t, char const *, size_t);
+
/* If comparing directories, compare their common subdirectories
recursively. */
static bool recursive;
@@ -298,6 +300,13 @@ main (int argc, char **argv)
excluded = new_exclude ();
presume_output_tty = false;
+#ifdef HANDLE_MULTIBYTE
+ if (MB_CUR_MAX > 1)
+ lines_differ = lines_differ_multibyte;
+ else
+#endif
+ lines_differ = lines_differ_singlebyte;
+
/* Decode the options. */
while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
diff --git a/src/diff.c.i18n b/src/diff.c.i18n
new file mode 100644
index 0000000..df3338c
--- /dev/null
+++ b/src/diff.c.i18n
@@ -0,0 +1,1473 @@
+/* diff - compare files line by line
+
+ Copyright (C) 1988-1989, 1992-1994, 1996, 1998, 2001-2002, 2004, 2006-2007,
+ 2009-2013, 2015-2017 Free Software Foundation, Inc.
+
+ This file is part of GNU DIFF.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see . */
+
+#define GDIFF_MAIN
+#include "diff.h"
+#include "die.h"
+#include
+#include "paths.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+/* The official name of this program (e.g., no 'g' prefix). */
+#define PROGRAM_NAME "diff"
+
+#define AUTHORS \
+ proper_name ("Paul Eggert"), \
+ proper_name ("Mike Haertel"), \
+ proper_name ("David Hayes"), \
+ proper_name ("Richard Stallman"), \
+ proper_name ("Len Tower")
+
+#ifndef GUTTER_WIDTH_MINIMUM
+# define GUTTER_WIDTH_MINIMUM 3
+#endif
+
+struct regexp_list
+{
+ char *regexps; /* chars representing disjunction of the regexps */
+ size_t len; /* chars used in 'regexps' */
+ size_t size; /* size malloc'ed for 'regexps'; 0 if not malloc'ed */
+ bool multiple_regexps;/* Does 'regexps' represent a disjunction? */
+ struct re_pattern_buffer *buf;
+};
+
+static int compare_files (struct comparison const *, char const *, char const *);
+static void add_regexp (struct regexp_list *, char const *);
+static void summarize_regexp_list (struct regexp_list *);
+static void specify_style (enum output_style);
+static void specify_value (char const **, char const *, char const *);
+static void specify_colors_style (char const *);
+static void try_help (char const *, char const *) __attribute__((noreturn));
+static void check_stdout (void);
+static void usage (void);
+
+/* If comparing directories, compare their common subdirectories
+ recursively. */
+static bool recursive;
+
+/* In context diffs, show previous lines that match these regexps. */
+static struct regexp_list function_regexp_list;
+
+/* Ignore changes affecting only lines that match these regexps. */
+static struct regexp_list ignore_regexp_list;
+
+#if O_BINARY
+/* Use binary I/O when reading and writing data (--binary).
+ On POSIX hosts, this has no effect. */
+static bool binary;
+#else
+enum { binary = true };
+#endif
+
+/* If one file is missing, treat it as present but empty (-N). */
+static bool new_file;
+
+/* If the first file is missing, treat it as present but empty
+ (--unidirectional-new-file). */
+static bool unidirectional_new_file;
+
+/* Report files compared that are the same (-s).
+ Normally nothing is output when that happens. */
+static bool report_identical_files;
+
+static char const shortopts[] =
+"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ";
+
+/* Values for long options that do not have single-letter equivalents. */
+enum
+{
+ BINARY_OPTION = CHAR_MAX + 1,
+ FROM_FILE_OPTION,
+ HELP_OPTION,
+ HORIZON_LINES_OPTION,
+ IGNORE_FILE_NAME_CASE_OPTION,
+ INHIBIT_HUNK_MERGE_OPTION,
+ LEFT_COLUMN_OPTION,
+ LINE_FORMAT_OPTION,
+ NO_DEREFERENCE_OPTION,
+ NO_IGNORE_FILE_NAME_CASE_OPTION,
+ NORMAL_OPTION,
+ SDIFF_MERGE_ASSIST_OPTION,
+ STRIP_TRAILING_CR_OPTION,
+ SUPPRESS_BLANK_EMPTY_OPTION,
+ SUPPRESS_COMMON_LINES_OPTION,
+ TABSIZE_OPTION,
+ TO_FILE_OPTION,
+
+ /* These options must be in sequence. */
+ UNCHANGED_LINE_FORMAT_OPTION,
+ OLD_LINE_FORMAT_OPTION,
+ NEW_LINE_FORMAT_OPTION,
+
+ /* These options must be in sequence. */
+ UNCHANGED_GROUP_FORMAT_OPTION,
+ OLD_GROUP_FORMAT_OPTION,
+ NEW_GROUP_FORMAT_OPTION,
+ CHANGED_GROUP_FORMAT_OPTION,
+
+ COLOR_OPTION,
+ COLOR_PALETTE_OPTION,
+
+ PRESUME_OUTPUT_TTY_OPTION,
+};
+
+static char const group_format_option[][sizeof "--unchanged-group-format"] =
+ {
+ "--unchanged-group-format",
+ "--old-group-format",
+ "--new-group-format",
+ "--changed-group-format"
+ };
+
+static char const line_format_option[][sizeof "--unchanged-line-format"] =
+ {
+ "--unchanged-line-format",
+ "--old-line-format",
+ "--new-line-format"
+ };
+
+static struct option const longopts[] =
+{
+ {"binary", 0, 0, BINARY_OPTION},
+ {"brief", 0, 0, 'q'},
+ {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
+ {"color", 2, 0, COLOR_OPTION},
+ {"context", 2, 0, 'C'},
+ {"ed", 0, 0, 'e'},
+ {"exclude", 1, 0, 'x'},
+ {"exclude-from", 1, 0, 'X'},
+ {"expand-tabs", 0, 0, 't'},
+ {"forward-ed", 0, 0, 'f'},
+ {"from-file", 1, 0, FROM_FILE_OPTION},
+ {"help", 0, 0, HELP_OPTION},
+ {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
+ {"ifdef", 1, 0, 'D'},
+ {"ignore-all-space", 0, 0, 'w'},
+ {"ignore-blank-lines", 0, 0, 'B'},
+ {"ignore-case", 0, 0, 'i'},
+ {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
+ {"ignore-matching-lines", 1, 0, 'I'},
+ {"ignore-space-change", 0, 0, 'b'},
+ {"ignore-tab-expansion", 0, 0, 'E'},
+ {"ignore-trailing-space", 0, 0, 'Z'},
+ {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
+ {"initial-tab", 0, 0, 'T'},
+ {"label", 1, 0, 'L'},
+ {"left-column", 0, 0, LEFT_COLUMN_OPTION},
+ {"line-format", 1, 0, LINE_FORMAT_OPTION},
+ {"minimal", 0, 0, 'd'},
+ {"new-file", 0, 0, 'N'},
+ {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
+ {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
+ {"no-dereference", 0, 0, NO_DEREFERENCE_OPTION},
+ {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
+ {"normal", 0, 0, NORMAL_OPTION},
+ {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
+ {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
+ {"paginate", 0, 0, 'l'},
+ {"palette", 1, 0, COLOR_PALETTE_OPTION},
+ {"rcs", 0, 0, 'n'},
+ {"recursive", 0, 0, 'r'},
+ {"report-identical-files", 0, 0, 's'},
+ {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
+ {"show-c-function", 0, 0, 'p'},
+ {"show-function-line", 1, 0, 'F'},
+ {"side-by-side", 0, 0, 'y'},
+ {"speed-large-files", 0, 0, 'H'},
+ {"starting-file", 1, 0, 'S'},
+ {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
+ {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION},
+ {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
+ {"tabsize", 1, 0, TABSIZE_OPTION},
+ {"text", 0, 0, 'a'},
+ {"to-file", 1, 0, TO_FILE_OPTION},
+ {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
+ {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
+ {"unidirectional-new-file", 0, 0, 'P'},
+ {"unified", 2, 0, 'U'},
+ {"version", 0, 0, 'v'},
+ {"width", 1, 0, 'W'},
+
+ /* This is solely for testing. Do not document. */
+ {"-presume-output-tty", no_argument, NULL, PRESUME_OUTPUT_TTY_OPTION},
+ {0, 0, 0, 0}
+};
+
+/* Return a string containing the command options with which diff was invoked.
+ Spaces appear between what were separate ARGV-elements.
+ There is a space at the beginning but none at the end.
+ If there were no options, the result is an empty string.
+
+ Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
+ the length of that vector. */
+
+static char *
+option_list (char **optionvec, int count)
+{
+ int i;
+ size_t size = 1;
+ char *result;
+ char *p;
+
+ for (i = 0; i < count; i++)
+ size += 1 + shell_quote_length (optionvec[i]);
+
+ p = result = xmalloc (size);
+
+ for (i = 0; i < count; i++)
+ {
+ *p++ = ' ';
+ p = shell_quote_copy (p, optionvec[i]);
+ }
+
+ *p = '\0';
+ return result;
+}
+
+
+/* Return an option value suitable for add_exclude. */
+
+static int
+exclude_options (void)
+{
+ return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
+}
+
+int
+main (int argc, char **argv)
+{
+ int exit_status = EXIT_SUCCESS;
+ int c;
+ int i;
+ int prev = -1;
+ lin ocontext = -1;
+ bool explicit_context = false;
+ size_t width = 0;
+ bool show_c_function = false;
+ char const *from_file = NULL;
+ char const *to_file = NULL;
+ uintmax_t numval;
+ char *numend;
+
+ /* Do our initializations. */
+ exit_failure = EXIT_TROUBLE;
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+ c_stack_action (0);
+ function_regexp_list.buf = &function_regexp;
+ ignore_regexp_list.buf = &ignore_regexp;
+ re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
+ excluded = new_exclude ();
+ presume_output_tty = false;
+
+ /* Decode the options. */
+
+ while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
+ {
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ ocontext = (! ISDIGIT (prev)
+ ? c - '0'
+ : (ocontext - (c - '0' <= CONTEXT_MAX % 10)
+ < CONTEXT_MAX / 10)
+ ? 10 * ocontext + (c - '0')
+ : CONTEXT_MAX);
+ break;
+
+ case 'a':
+ text = true;
+ break;
+
+ case 'b':
+ if (ignore_white_space < IGNORE_SPACE_CHANGE)
+ ignore_white_space = IGNORE_SPACE_CHANGE;
+ break;
+
+ case 'Z':
+ if (ignore_white_space < IGNORE_SPACE_CHANGE)
+ ignore_white_space |= IGNORE_TRAILING_SPACE;
+ break;
+
+ case 'B':
+ ignore_blank_lines = true;
+ break;
+
+ case 'C':
+ case 'U':
+ {
+ if (optarg)
+ {
+ numval = strtoumax (optarg, &numend, 10);
+ if (*numend)
+ try_help ("invalid context length '%s'", optarg);
+ if (CONTEXT_MAX < numval)
+ numval = CONTEXT_MAX;
+ }
+ else
+ numval = 3;
+
+ specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
+ if (context < numval)
+ context = numval;
+ explicit_context = true;
+ }
+ break;
+
+ case 'c':
+ specify_style (OUTPUT_CONTEXT);
+ if (context < 3)
+ context = 3;
+ break;
+
+ case 'd':
+ minimal = true;
+ break;
+
+ case 'D':
+ specify_style (OUTPUT_IFDEF);
+ {
+ static char const C_ifdef_group_formats[] =
+ "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
+ char *b = xmalloc (sizeof C_ifdef_group_formats
+ + 7 * strlen (optarg) - 14 /* 7*"%s" */
+ - 8 /* 5*"%%" + 3*"%c" */);
+ sprintf (b, C_ifdef_group_formats,
+ 0,
+ optarg, optarg, 0,
+ optarg, optarg, 0,
+ optarg, optarg, optarg);
+ for (i = 0; i < sizeof group_format / sizeof group_format[0]; i++)
+ {
+ specify_value (&group_format[i], b, "-D");
+ b += strlen (b) + 1;
+ }
+ }
+ break;
+
+ case 'e':
+ specify_style (OUTPUT_ED);
+ break;
+
+ case 'E':
+ if (ignore_white_space < IGNORE_SPACE_CHANGE)
+ ignore_white_space |= IGNORE_TAB_EXPANSION;
+ break;
+
+ case 'f':
+ specify_style (OUTPUT_FORWARD_ED);
+ break;
+
+ case 'F':
+ add_regexp (&function_regexp_list, optarg);
+ break;
+
+ case 'h':
+ /* Split the files into chunks for faster processing.
+ Usually does not change the result.
+
+ This currently has no effect. */
+ break;
+
+ case 'H':
+ speed_large_files = true;
+ break;
+
+ case 'i':
+ ignore_case = true;
+ break;
+
+ case 'I':
+ add_regexp (&ignore_regexp_list, optarg);
+ break;
+
+ case 'l':
+ if (!pr_program[0])
+ try_help ("pagination not supported on this host", NULL);
+ paginate = true;
+#ifdef SIGCHLD
+ /* Pagination requires forking and waiting, and
+ System V fork+wait does not work if SIGCHLD is ignored. */
+ signal (SIGCHLD, SIG_DFL);
+#endif
+ break;
+
+ case 'L':
+ if (!file_label[0])
+ file_label[0] = optarg;
+ else if (!file_label[1])
+ file_label[1] = optarg;
+ else
+ fatal ("too many file label options");
+ break;
+
+ case 'n':
+ specify_style (OUTPUT_RCS);
+ break;
+
+ case 'N':
+ new_file = true;
+ break;
+
+ case 'p':
+ show_c_function = true;
+ add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
+ break;
+
+ case 'P':
+ unidirectional_new_file = true;
+ break;
+
+ case 'q':
+ brief = true;
+ break;
+
+ case 'r':
+ recursive = true;
+ break;
+
+ case 's':
+ report_identical_files = true;
+ break;
+
+ case 'S':
+ specify_value (&starting_file, optarg, "-S");
+ break;
+
+ case 't':
+ expand_tabs = true;
+ break;
+
+ case 'T':
+ initial_tab = true;
+ break;
+
+ case 'u':
+ specify_style (OUTPUT_UNIFIED);
+ if (context < 3)
+ context = 3;
+ break;
+
+ case 'v':
+ version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version,
+ AUTHORS, (char *) NULL);
+ check_stdout ();
+ return EXIT_SUCCESS;
+
+ case 'w':
+ ignore_white_space = IGNORE_ALL_SPACE;
+ break;
+
+ case 'x':
+ add_exclude (excluded, optarg, exclude_options ());
+ break;
+
+ case 'X':
+ if (add_exclude_file (add_exclude, excluded, optarg,
+ exclude_options (), '\n'))
+ pfatal_with_name (optarg);
+ break;
+
+ case 'y':
+ specify_style (OUTPUT_SDIFF);
+ break;
+
+ case 'W':
+ numval = strtoumax (optarg, &numend, 10);
+ if (! (0 < numval && numval <= SIZE_MAX) || *numend)
+ try_help ("invalid width '%s'", optarg);
+ if (width != numval)
+ {
+ if (width)
+ fatal ("conflicting width options");
+ width = numval;
+ }
+ break;
+
+ case BINARY_OPTION:
+#if O_BINARY
+ binary = true;
+ if (! isatty (STDOUT_FILENO))
+ set_binary_mode (STDOUT_FILENO, O_BINARY);
+#endif
+ break;
+
+ case FROM_FILE_OPTION:
+ specify_value (&from_file, optarg, "--from-file");
+ break;
+
+ case HELP_OPTION:
+ usage ();
+ check_stdout ();
+ return EXIT_SUCCESS;
+
+ case HORIZON_LINES_OPTION:
+ numval = strtoumax (optarg, &numend, 10);
+ if (*numend)
+ try_help ("invalid horizon length '%s'", optarg);
+ horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
+ break;
+
+ case IGNORE_FILE_NAME_CASE_OPTION:
+ ignore_file_name_case = true;
+ break;
+
+ case INHIBIT_HUNK_MERGE_OPTION:
+ /* This option is obsolete, but accept it for backward
+ compatibility. */
+ break;
+
+ case LEFT_COLUMN_OPTION:
+ left_column = true;
+ break;
+
+ case LINE_FORMAT_OPTION:
+ specify_style (OUTPUT_IFDEF);
+ for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
+ specify_value (&line_format[i], optarg, "--line-format");
+ break;
+
+ case NO_DEREFERENCE_OPTION:
+ no_dereference_symlinks = true;
+ break;
+
+ case NO_IGNORE_FILE_NAME_CASE_OPTION:
+ ignore_file_name_case = false;
+ break;
+
+ case NORMAL_OPTION:
+ specify_style (OUTPUT_NORMAL);
+ break;
+
+ case SDIFF_MERGE_ASSIST_OPTION:
+ specify_style (OUTPUT_SDIFF);
+ sdiff_merge_assist = true;
+ break;
+
+ case STRIP_TRAILING_CR_OPTION:
+ strip_trailing_cr = true;
+ break;
+
+ case SUPPRESS_BLANK_EMPTY_OPTION:
+ suppress_blank_empty = true;
+ break;
+
+ case SUPPRESS_COMMON_LINES_OPTION:
+ suppress_common_lines = true;
+ break;
+
+ case TABSIZE_OPTION:
+ numval = strtoumax (optarg, &numend, 10);
+ if (! (0 < numval && numval <= SIZE_MAX - GUTTER_WIDTH_MINIMUM)
+ || *numend)
+ try_help ("invalid tabsize '%s'", optarg);
+ if (tabsize != numval)
+ {
+ if (tabsize)
+ fatal ("conflicting tabsize options");
+ tabsize = numval;
+ }
+ break;
+
+ case TO_FILE_OPTION:
+ specify_value (&to_file, optarg, "--to-file");
+ break;
+
+ case UNCHANGED_LINE_FORMAT_OPTION:
+ case OLD_LINE_FORMAT_OPTION:
+ case NEW_LINE_FORMAT_OPTION:
+ specify_style (OUTPUT_IFDEF);
+ c -= UNCHANGED_LINE_FORMAT_OPTION;
+ specify_value (&line_format[c], optarg, line_format_option[c]);
+ break;
+
+ case UNCHANGED_GROUP_FORMAT_OPTION:
+ case OLD_GROUP_FORMAT_OPTION:
+ case NEW_GROUP_FORMAT_OPTION:
+ case CHANGED_GROUP_FORMAT_OPTION:
+ specify_style (OUTPUT_IFDEF);
+ c -= UNCHANGED_GROUP_FORMAT_OPTION;
+ specify_value (&group_format[c], optarg, group_format_option[c]);
+ break;
+
+ case COLOR_OPTION:
+ specify_colors_style (optarg);
+ break;
+
+ case COLOR_PALETTE_OPTION:
+ set_color_palette (optarg);
+ break;
+
+ case PRESUME_OUTPUT_TTY_OPTION:
+ presume_output_tty = true;
+ break;
+
+ default:
+ try_help (NULL, NULL);
+ }
+ prev = c;
+ }
+
+ if (colors_style == AUTO)
+ {
+ char const *t = getenv ("TERM");
+ if (t && STREQ (t, "dumb"))
+ colors_style = NEVER;
+ }
+
+ if (output_style == OUTPUT_UNSPECIFIED)
+ {
+ if (show_c_function)
+ {
+ specify_style (OUTPUT_CONTEXT);
+ if (ocontext < 0)
+ context = 3;
+ }
+ else
+ specify_style (OUTPUT_NORMAL);
+ }
+
+ if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
+ {
+#if (defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS \
+ || defined HAVE_STRUCT_STAT_ST_SPARE1)
+ time_format = "%Y-%m-%d %H:%M:%S.%N %z";
+#else
+ time_format = "%Y-%m-%d %H:%M:%S %z";
+#endif
+ }
+ else
+ {
+ /* See POSIX 1003.1-2001 for this format. */
+ time_format = "%a %b %e %T %Y";
+ }
+
+ if (0 <= ocontext
+ && (output_style == OUTPUT_CONTEXT
+ || output_style == OUTPUT_UNIFIED)
+ && (context < ocontext
+ || (ocontext < context && ! explicit_context)))
+ context = ocontext;
+
+ if (! tabsize)
+ tabsize = 8;
+ if (! width)
+ width = 130;
+
+ {
+ /* Maximize first the half line width, and then the gutter width,
+ according to the following constraints:
+
+ 1. Two half lines plus a gutter must fit in a line.
+ 2. If the half line width is nonzero:
+ a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
+ b. If tabs are not expanded to spaces,
+ a half line plus a gutter is an integral number of tabs,
+ so that tabs in the right column line up. */
+
+ size_t t = expand_tabs ? 1 : tabsize;
+ size_t w = width;
+ size_t t_plus_g = t + GUTTER_WIDTH_MINIMUM;
+ size_t unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1);
+ size_t off = unaligned_off - unaligned_off % t;
+ sdiff_half_width = (off <= GUTTER_WIDTH_MINIMUM || w <= off
+ ? 0
+ : MIN (off - GUTTER_WIDTH_MINIMUM, w - off));
+ sdiff_column2_offset = sdiff_half_width ? off : w;
+ }
+
+ /* Make the horizon at least as large as the context, so that
+ shift_boundaries has more freedom to shift the first and last hunks. */
+ if (horizon_lines < context)
+ horizon_lines = context;
+
+ summarize_regexp_list (&function_regexp_list);
+ summarize_regexp_list (&ignore_regexp_list);
+
+ if (output_style == OUTPUT_IFDEF)
+ {
+ for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
+ if (!line_format[i])
+ line_format[i] = "%l\n";
+ if (!group_format[OLD])
+ group_format[OLD]
+ = group_format[CHANGED] ? group_format[CHANGED] : "%<";
+ if (!group_format[NEW])
+ group_format[NEW]
+ = group_format[CHANGED] ? group_format[CHANGED] : "%>";
+ if (!group_format[UNCHANGED])
+ group_format[UNCHANGED] = "%=";
+ if (!group_format[CHANGED])
+ group_format[CHANGED] = concat (group_format[OLD],
+ group_format[NEW], "");
+ }
+
+ no_diff_means_no_output =
+ (output_style == OUTPUT_IFDEF ?
+ (!*group_format[UNCHANGED]
+ || (STREQ (group_format[UNCHANGED], "%=")
+ && !*line_format[UNCHANGED]))
+ : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
+
+ files_can_be_treated_as_binary =
+ (brief & binary
+ & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
+ | (ignore_regexp_list.regexps || ignore_white_space)));
+
+ switch_string = option_list (argv + 1, optind - 1);
+
+ if (from_file)
+ {
+ if (to_file)
+ fatal ("--from-file and --to-file both specified");
+ else
+ for (; optind < argc; optind++)
+ {
+ int status = compare_files (NULL, from_file, argv[optind]);
+ if (exit_status < status)
+ exit_status = status;
+ }
+ }
+ else
+ {
+ if (to_file)
+ for (; optind < argc; optind++)
+ {
+ int status = compare_files (NULL, argv[optind], to_file);
+ if (exit_status < status)
+ exit_status = status;
+ }
+ else
+ {
+ if (argc - optind != 2)
+ {
+ if (argc - optind < 2)
+ try_help ("missing operand after '%s'", argv[argc - 1]);
+ else
+ try_help ("extra operand '%s'", argv[optind + 2]);
+ }
+
+ exit_status = compare_files (NULL, argv[optind], argv[optind + 1]);
+ }
+ }
+
+ /* Print any messages that were saved up for last. */
+ print_message_queue ();
+
+ check_stdout ();
+ exit (exit_status);
+ return exit_status;
+}
+
+/* Append to REGLIST the regexp PATTERN. */
+
+static void
+add_regexp (struct regexp_list *reglist, char const *pattern)
+{
+ size_t patlen = strlen (pattern);
+ char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
+
+ if (m != 0)
+ error (EXIT_TROUBLE, 0, "%s: %s", pattern, m);
+ else
+ {
+ char *regexps = reglist->regexps;
+ size_t len = reglist->len;
+ bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
+ size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
+ size_t size = reglist->size;
+
+ if (size <= newlen)
+ {
+ if (!size)
+ size = 1;
+
+ do size *= 2;
+ while (size <= newlen);
+
+ reglist->size = size;
+ reglist->regexps = regexps = xrealloc (regexps, size);
+ }
+ if (multiple_regexps)
+ {
+ regexps[len++] = '\\';
+ regexps[len++] = '|';
+ }
+ memcpy (regexps + len, pattern, patlen + 1);
+ }
+}
+
+/* Ensure that REGLIST represents the disjunction of its regexps.
+ This is done here, rather than earlier, to avoid O(N^2) behavior. */
+
+static void
+summarize_regexp_list (struct regexp_list *reglist)
+{
+ if (reglist->regexps)
+ {
+ /* At least one regexp was specified. Allocate a fastmap for it. */
+ reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
+ if (reglist->multiple_regexps)
+ {
+ /* Compile the disjunction of the regexps.
+ (If just one regexp was specified, it is already compiled.) */
+ char const *m = re_compile_pattern (reglist->regexps, reglist->len,
+ reglist->buf);
+ if (m)
+ die (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
+ }
+ }
+}
+
+static void
+try_help (char const *reason_msgid, char const *operand)
+{
+ if (reason_msgid)
+ error (0, 0, _(reason_msgid), operand);
+ die (EXIT_TROUBLE, 0, _("Try '%s --help' for more information."),
+ program_name);
+}
+
+static void
+check_stdout (void)
+{
+ if (ferror (stdout))
+ fatal ("write failed");
+ else if (fclose (stdout) != 0)
+ pfatal_with_name (_("standard output"));
+}
+
+static char const * const option_help_msgid[] = {
+ N_(" --normal output a normal diff (the default)"),
+ N_("-q, --brief report only when files differ"),
+ N_("-s, --report-identical-files report when two files are the same"),
+ N_("-c, -C NUM, --context[=NUM] output NUM (default 3) lines of copied context"),
+ N_("-u, -U NUM, --unified[=NUM] output NUM (default 3) lines of unified context"),
+ N_("-e, --ed output an ed script"),
+ N_("-n, --rcs output an RCS format diff"),
+ N_("-y, --side-by-side output in two columns"),
+ N_("-W, --width=NUM output at most NUM (default 130) print columns"),
+ N_(" --left-column output only the left column of common lines"),
+ N_(" --suppress-common-lines do not output common lines"),
+ "",
+ N_("-p, --show-c-function show which C function each change is in"),
+ N_("-F, --show-function-line=RE show the most recent line matching RE"),
+ N_(" --label LABEL use LABEL instead of file name and timestamp\n"
+ " (can be repeated)"),
+ "",
+ N_("-t, --expand-tabs expand tabs to spaces in output"),
+ N_("-T, --initial-tab make tabs line up by prepending a tab"),
+ N_(" --tabsize=NUM tab stops every NUM (default 8) print columns"),
+ N_(" --suppress-blank-empty suppress space or tab before empty output lines"),
+ N_("-l, --paginate pass output through 'pr' to paginate it"),
+ "",
+ N_("-r, --recursive recursively compare any subdirectories found"),
+ N_(" --no-dereference don't follow symbolic links"),
+ N_("-N, --new-file treat absent files as empty"),
+ N_(" --unidirectional-new-file treat absent first files as empty"),
+ N_(" --ignore-file-name-case ignore case when comparing file names"),
+ N_(" --no-ignore-file-name-case consider case when comparing file names"),
+ N_("-x, --exclude=PAT exclude files that match PAT"),
+ N_("-X, --exclude-from=FILE exclude files that match any pattern in FILE"),
+ N_("-S, --starting-file=FILE start with FILE when comparing directories"),
+ N_(" --from-file=FILE1 compare FILE1 to all operands;\n"
+ " FILE1 can be a directory"),
+ N_(" --to-file=FILE2 compare all operands to FILE2;\n"
+ " FILE2 can be a directory"),
+ "",
+ N_("-i, --ignore-case ignore case differences in file contents"),
+ N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"),
+ N_("-Z, --ignore-trailing-space ignore white space at line end"),
+ N_("-b, --ignore-space-change ignore changes in the amount of white space"),
+ N_("-w, --ignore-all-space ignore all white space"),
+ N_("-B, --ignore-blank-lines ignore changes where lines are all blank"),
+ N_("-I, --ignore-matching-lines=RE ignore changes where all lines match RE"),
+ "",
+ N_("-a, --text treat all files as text"),
+ N_(" --strip-trailing-cr strip trailing carriage return on input"),
+#if O_BINARY
+ N_(" --binary read and write data in binary mode"),
+#endif
+ "",
+ N_("-D, --ifdef=NAME output merged file with '#ifdef NAME' diffs"),
+ N_(" --GTYPE-group-format=GFMT format GTYPE input groups with GFMT"),
+ N_(" --line-format=LFMT format all input lines with LFMT"),
+ N_(" --LTYPE-line-format=LFMT format LTYPE input lines with LFMT"),
+ N_(" These format options provide fine-grained control over the output\n"
+ " of diff, generalizing -D/--ifdef."),
+ N_(" LTYPE is 'old', 'new', or 'unchanged'. GTYPE is LTYPE or 'changed'."),
+ N_(" GFMT (only) may contain:\n\
+ %< lines from FILE1\n\
+ %> lines from FILE2\n\
+ %= lines common to FILE1 and FILE2\n\
+ %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
+ LETTERs are as follows for new group, lower case for old group:\n\
+ F first line number\n\
+ L last line number\n\
+ N number of lines = L-F+1\n\
+ E F-1\n\
+ M L+1\n\
+ %(A=B?T:E) if A equals B then T else E"),
+ N_(" LFMT (only) may contain:\n\
+ %L contents of line\n\
+ %l contents of line, excluding any trailing newline\n\
+ %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"),
+ N_(" Both GFMT and LFMT may contain:\n\
+ %% %\n\
+ %c'C' the single character C\n\
+ %c'\\OOO' the character with octal code OOO\n\
+ C the character C (other characters represent themselves)"),
+ "",
+ N_("-d, --minimal try hard to find a smaller set of changes"),
+ N_(" --horizon-lines=NUM keep NUM lines of the common prefix and suffix"),
+ N_(" --speed-large-files assume large files and many scattered small changes"),
+ N_(" --color[=WHEN] colorize the output; WHEN can be 'never', 'always',\n"
+ " or 'auto' (the default)"),
+ N_(" --palette=PALETTE the colors to use when --color is active; PALETTE is\n"
+ " a colon-separated list of terminfo capabilities"),
+ "",
+ N_(" --help display this help and exit"),
+ N_("-v, --version output version information and exit"),
+ "",
+ N_("FILES are 'FILE1 FILE2' or 'DIR1 DIR2' or 'DIR FILE' or 'FILE DIR'."),
+ N_("If --from-file or --to-file is given, there are no restrictions on FILE(s)."),
+ N_("If a FILE is '-', read standard input."),
+ N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
+ 0
+};
+
+static void
+usage (void)
+{
+ char const * const *p;
+
+ printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
+ printf ("%s\n\n", _("Compare FILES line by line."));
+
+ fputs (_("\
+Mandatory arguments to long options are mandatory for short options too.\n\
+"), stdout);
+
+ for (p = option_help_msgid; *p; p++)
+ {
+ if (!**p)
+ putchar ('\n');
+ else
+ {
+ char const *msg = _(*p);
+ char const *nl;
+ while ((nl = strchr (msg, '\n')))
+ {
+ int msglen = nl + 1 - msg;
+ printf (" %.*s", msglen, msg);
+ msg = nl + 1;
+ }
+
+ printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
+ }
+ }
+ emit_bug_reporting_address ();
+}
+
+/* Set VAR to VALUE, reporting an OPTION error if this is a
+ conflict. */
+static void
+specify_value (char const **var, char const *value, char const *option)
+{
+ if (*var && ! STREQ (*var, value))
+ {
+ error (0, 0, _("conflicting %s option value '%s'"), option, value);
+ try_help (NULL, NULL);
+ }
+ *var = value;
+}
+
+/* Set the output style to STYLE, diagnosing conflicts. */
+static void
+specify_style (enum output_style style)
+{
+ if (output_style != style)
+ {
+ if (output_style != OUTPUT_UNSPECIFIED)
+ try_help ("conflicting output style options", NULL);
+ output_style = style;
+ }
+}
+
+/* Set the color mode. */
+static void
+specify_colors_style (char const *value)
+{
+ if (value == NULL || STREQ (value, "auto"))
+ colors_style = AUTO;
+ else if (STREQ (value, "always"))
+ colors_style = ALWAYS;
+ else if (STREQ (value, "never"))
+ colors_style = NEVER;
+ else
+ try_help ("invalid color '%s'", value);
+}
+
+
+/* Set the last-modified time of *ST to be the current time. */
+
+static void
+set_mtime_to_now (struct stat *st)
+{
+#ifdef STAT_TIMESPEC
+ gettime (&STAT_TIMESPEC (st, st_mtim));
+#else
+ struct timespec t;
+ gettime (&t);
+ st->st_mtime = t.tv_sec;
+# if defined STAT_TIMESPEC_NS
+ STAT_TIMESPEC_NS (st, st_mtim) = t.tv_nsec;
+# elif defined HAVE_STRUCT_STAT_ST_SPARE1
+ st->st_spare1 = t.tv_nsec / 1000;
+# endif
+#endif
+}
+
+/* Compare two files (or dirs) with parent comparison PARENT
+ and names NAME0 and NAME1.
+ (If PARENT is null, then the first name is just NAME0, etc.)
+ This is self-contained; it opens the files and closes them.
+
+ Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
+ different, EXIT_TROUBLE if there is a problem opening them. */
+
+static int
+compare_files (struct comparison const *parent,
+ char const *name0,
+ char const *name1)
+{
+ struct comparison cmp;
+#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
+ register int f;
+ int status = EXIT_SUCCESS;
+ bool same_files;
+ char *free0;
+ char *free1;
+
+ /* If this is directory comparison, perhaps we have a file
+ that exists only in one of the directories.
+ If so, just print a message to that effect. */
+
+ if (! ((name0 && name1)
+ || (unidirectional_new_file && name1)
+ || new_file))
+ {
+ char const *name = name0 ? name0 : name1;
+ char const *dir = parent->file[!name0].name;
+
+ /* See POSIX 1003.1-2001 for this format. */
+ message ("Only in %s: %s\n", dir, name);
+
+ /* Return EXIT_FAILURE so that diff_dirs will return
+ EXIT_FAILURE ("some files differ"). */
+ return EXIT_FAILURE;
+ }
+
+ memset (cmp.file, 0, sizeof cmp.file);
+ cmp.parent = parent;
+
+ /* cmp.file[f].desc markers */
+#define NONEXISTENT (-1) /* nonexistent file */
+#define UNOPENED (-2) /* unopened file (e.g. directory) */
+#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
+
+#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
+
+ cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT;
+ cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT;
+
+ /* Now record the full name of each file, including nonexistent ones. */
+
+ if (!name0)
+ name0 = name1;
+ if (!name1)
+ name1 = name0;
+
+ if (!parent)
+ {
+ free0 = NULL;
+ free1 = NULL;
+ cmp.file[0].name = name0;
+ cmp.file[1].name = name1;
+ }
+ else
+ {
+ cmp.file[0].name = free0
+ = file_name_concat (parent->file[0].name, name0, NULL);
+ cmp.file[1].name = free1
+ = file_name_concat (parent->file[1].name, name1, NULL);
+ }
+
+ /* Stat the files. */
+
+ for (f = 0; f < 2; f++)
+ {
+ if (cmp.file[f].desc != NONEXISTENT)
+ {
+ if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
+ {
+ cmp.file[f].desc = cmp.file[0].desc;
+ cmp.file[f].stat = cmp.file[0].stat;
+ }
+ else if (STREQ (cmp.file[f].name, "-"))
+ {
+ cmp.file[f].desc = STDIN_FILENO;
+ if (binary && ! isatty (STDIN_FILENO))
+ set_binary_mode (STDIN_FILENO, O_BINARY);
+ if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
+ cmp.file[f].desc = ERRNO_ENCODE (errno);
+ else
+ {
+ if (S_ISREG (cmp.file[f].stat.st_mode))
+ {
+ off_t pos = lseek (STDIN_FILENO, 0, SEEK_CUR);
+ if (pos < 0)
+ cmp.file[f].desc = ERRNO_ENCODE (errno);
+ else
+ cmp.file[f].stat.st_size =
+ MAX (0, cmp.file[f].stat.st_size - pos);
+ }
+
+ /* POSIX 1003.1-2001 requires current time for
+ stdin. */
+ set_mtime_to_now (&cmp.file[f].stat);
+ }
+ }
+ else if ((no_dereference_symlinks
+ ? lstat (cmp.file[f].name, &cmp.file[f].stat)
+ : stat (cmp.file[f].name, &cmp.file[f].stat))
+ != 0)
+ cmp.file[f].desc = ERRNO_ENCODE (errno);
+ }
+ }
+
+ /* Mark files as nonexistent as needed for -N and -P, if they are
+ inaccessible empty regular files (the kind of files that 'patch'
+ creates to indicate nonexistent backups), or if they are
+ top-level files that do not exist but their counterparts do
+ exist. */
+ for (f = 0; f < 2; f++)
+ if ((new_file || (f == 0 && unidirectional_new_file))
+ && (cmp.file[f].desc == UNOPENED
+ ? (S_ISREG (cmp.file[f].stat.st_mode)
+ && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
+ && cmp.file[f].stat.st_size == 0)
+ : ((cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
+ || cmp.file[f].desc == ERRNO_ENCODE (EBADF))
+ && ! parent
+ && (cmp.file[1 - f].desc == UNOPENED
+ || cmp.file[1 - f].desc == STDIN_FILENO))))
+ cmp.file[f].desc = NONEXISTENT;
+
+ for (f = 0; f < 2; f++)
+ if (cmp.file[f].desc == NONEXISTENT)
+ {
+ memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
+ cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
+ }
+
+ for (f = 0; f < 2; f++)
+ {
+ int e = ERRNO_DECODE (cmp.file[f].desc);
+ if (0 <= e)
+ {
+ errno = e;
+ perror_with_name (cmp.file[f].name);
+ status = EXIT_TROUBLE;
+ }
+ }
+
+ if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
+ {
+ /* If one is a directory, and it was specified in the command line,
+ use the file in that dir with the other file's basename. */
+
+ int fnm_arg = DIR_P (0);
+ int dir_arg = 1 - fnm_arg;
+ char const *fnm = cmp.file[fnm_arg].name;
+ char const *dir = cmp.file[dir_arg].name;
+ char const *filename = cmp.file[dir_arg].name = free0
+ = find_dir_file_pathname (dir, last_component (fnm));
+
+ if (STREQ (fnm, "-"))
+ fatal ("cannot compare '-' to a directory");
+
+ if ((no_dereference_symlinks
+ ? lstat (filename, &cmp.file[dir_arg].stat)
+ : stat (filename, &cmp.file[dir_arg].stat))
+ != 0)
+ {
+ perror_with_name (filename);
+ status = EXIT_TROUBLE;
+ }
+ }
+
+ if (status != EXIT_SUCCESS)
+ {
+ /* One of the files should exist but does not. */
+ }
+ else if (cmp.file[0].desc == NONEXISTENT
+ && cmp.file[1].desc == NONEXISTENT)
+ {
+ /* Neither file "exists", so there's nothing to compare. */
+ }
+ else if ((same_files
+ = (cmp.file[0].desc != NONEXISTENT
+ && cmp.file[1].desc != NONEXISTENT
+ && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
+ && same_file_attributes (&cmp.file[0].stat,
+ &cmp.file[1].stat)))
+ && no_diff_means_no_output)
+ {
+ /* The two named files are actually the same physical file.
+ We know they are identical without actually reading them. */
+ }
+ else if (DIR_P (0) & DIR_P (1))
+ {
+ if (output_style == OUTPUT_IFDEF)
+ fatal ("-D option not supported with directories");
+
+ /* If both are directories, compare the files in them. */
+
+ if (parent && !recursive)
+ {
+ /* But don't compare dir contents one level down
+ unless -r was specified.
+ See POSIX 1003.1-2001 for this format. */
+ message ("Common subdirectories: %s and %s\n",
+ cmp.file[0].name, cmp.file[1].name);
+ }
+ else
+ status = diff_dirs (&cmp, compare_files);
+ }
+ else if ((DIR_P (0) | DIR_P (1))
+ || (parent
+ && !((S_ISREG (cmp.file[0].stat.st_mode)
+ || S_ISLNK (cmp.file[0].stat.st_mode))
+ && (S_ISREG (cmp.file[1].stat.st_mode)
+ || S_ISLNK (cmp.file[1].stat.st_mode)))))
+ {
+ if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
+ {
+ /* We have a subdirectory that exists only in one directory. */
+
+ if ((DIR_P (0) | DIR_P (1))
+ && recursive
+ && (new_file
+ || (unidirectional_new_file
+ && cmp.file[0].desc == NONEXISTENT)))
+ status = diff_dirs (&cmp, compare_files);
+ else
+ {
+ char const *dir;
+
+ /* PARENT must be non-NULL here. */
+ assert (parent);
+ dir = parent->file[cmp.file[0].desc == NONEXISTENT].name;
+
+ /* See POSIX 1003.1-2001 for this format. */
+ message ("Only in %s: %s\n", dir, name0);
+
+ status = EXIT_FAILURE;
+ }
+ }
+ else
+ {
+ /* We have two files that are not to be compared. */
+
+ /* See POSIX 1003.1-2001 for this format. */
+ message5 ("File %s is a %s while file %s is a %s\n",
+ file_label[0] ? file_label[0] : cmp.file[0].name,
+ file_type (&cmp.file[0].stat),
+ file_label[1] ? file_label[1] : cmp.file[1].name,
+ file_type (&cmp.file[1].stat));
+
+ /* This is a difference. */
+ status = EXIT_FAILURE;
+ }
+ }
+ else if (S_ISLNK (cmp.file[0].stat.st_mode)
+ || S_ISLNK (cmp.file[1].stat.st_mode))
+ {
+ /* We get here only if we use lstat(), not stat(). */
+ assert (no_dereference_symlinks);
+
+ if (S_ISLNK (cmp.file[0].stat.st_mode)
+ && S_ISLNK (cmp.file[1].stat.st_mode))
+ {
+ /* Compare the values of the symbolic links. */
+ char *link_value[2] = { NULL, NULL };
+
+ for (f = 0; f < 2; f++)
+ {
+ link_value[f] = xreadlink (cmp.file[f].name);
+ if (link_value[f] == NULL)
+ {
+ perror_with_name (cmp.file[f].name);
+ status = EXIT_TROUBLE;
+ break;
+ }
+ }
+ if (status == EXIT_SUCCESS)
+ {
+ if ( ! STREQ (link_value[0], link_value[1]))
+ {
+ message ("Symbolic links %s and %s differ\n",
+ cmp.file[0].name, cmp.file[1].name);
+ /* This is a difference. */
+ status = EXIT_FAILURE;
+ }
+ }
+ for (f = 0; f < 2; f++)
+ free (link_value[f]);
+ }
+ else
+ {
+ /* We have two files that are not to be compared, because
+ one of them is a symbolic link and the other one is not. */
+
+ message5 ("File %s is a %s while file %s is a %s\n",
+ file_label[0] ? file_label[0] : cmp.file[0].name,
+ file_type (&cmp.file[0].stat),
+ file_label[1] ? file_label[1] : cmp.file[1].name,
+ file_type (&cmp.file[1].stat));
+
+ /* This is a difference. */
+ status = EXIT_FAILURE;
+ }
+ }
+ else if (files_can_be_treated_as_binary
+ && S_ISREG (cmp.file[0].stat.st_mode)
+ && S_ISREG (cmp.file[1].stat.st_mode)
+ && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
+ && 0 < cmp.file[0].stat.st_size
+ && 0 < cmp.file[1].stat.st_size)
+ {
+ message ("Files %s and %s differ\n",
+ file_label[0] ? file_label[0] : cmp.file[0].name,
+ file_label[1] ? file_label[1] : cmp.file[1].name);
+ status = EXIT_FAILURE;
+ }
+ else
+ {
+ /* Both exist and neither is a directory. */
+
+ /* Open the files and record their descriptors. */
+
+ int oflags = O_RDONLY | (binary ? O_BINARY : 0);
+
+ if (cmp.file[0].desc == UNOPENED)
+ if ((cmp.file[0].desc = open (cmp.file[0].name, oflags, 0)) < 0)
+ {
+ perror_with_name (cmp.file[0].name);
+ status = EXIT_TROUBLE;
+ }
+ if (cmp.file[1].desc == UNOPENED)
+ {
+ if (same_files)
+ cmp.file[1].desc = cmp.file[0].desc;
+ else if ((cmp.file[1].desc = open (cmp.file[1].name, oflags, 0)) < 0)
+ {
+ perror_with_name (cmp.file[1].name);
+ status = EXIT_TROUBLE;
+ }
+ }
+
+ /* Compare the files, if no error was found. */
+
+ if (status == EXIT_SUCCESS)
+ status = diff_2_files (&cmp);
+
+ /* Close the file descriptors. */
+
+ if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
+ {
+ perror_with_name (cmp.file[0].name);
+ status = EXIT_TROUBLE;
+ }
+ if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
+ && close (cmp.file[1].desc) != 0)
+ {
+ perror_with_name (cmp.file[1].name);
+ status = EXIT_TROUBLE;
+ }
+ }
+
+ /* Now the comparison has been done, if no error prevented it,
+ and STATUS is the value this function will return. */
+
+ if (status == EXIT_SUCCESS)
+ {
+ if (report_identical_files && !DIR_P (0))
+ message ("Files %s and %s are identical\n",
+ file_label[0] ? file_label[0] : cmp.file[0].name,
+ file_label[1] ? file_label[1] : cmp.file[1].name);
+ }
+ else
+ {
+ /* Flush stdout so that the user sees differences immediately.
+ This can hurt performance, unfortunately. */
+ if (fflush (stdout) != 0)
+ pfatal_with_name (_("standard output"));
+ }
+
+ free (free0);
+ free (free1);
+
+ return status;
+}
diff --git a/src/diff.h b/src/diff.h
index c8cf436..a438a8e 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -23,6 +23,17 @@
#include
#include
+/* For platforms which support the ISO C ammendment 1 functionality we
+ support user-defined character classes. */
+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H
+/* Solaris 2.5 has a bug: must be included before . */
+# include
+# include
+# if defined (HAVE_MBRTOWC)
+# define HANDLE_MULTIBYTE 1
+# endif
+#endif
+
/* What kind of changes a hunk contains. */
enum changes
{
@@ -381,7 +392,11 @@ extern void print_sdiff_script (struct change *);
extern char const change_letter[4];
extern char const pr_program[];
extern char *concat (char const *, char const *, char const *);
-extern bool lines_differ (char const *, char const *) _GL_ATTRIBUTE_PURE;
+extern bool (*lines_differ) (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE;
+extern bool lines_differ_singlebyte (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE;
+#ifdef HANDLE_MULTIBYTE
+extern bool lines_differ_multibyte (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE;
+#endif
extern lin translate_line_number (struct file_data const *, lin);
extern struct change *find_change (struct change *);
extern struct change *find_reverse_change (struct change *);
diff --git a/src/diff.h.i18n b/src/diff.h.i18n
new file mode 100644
index 0000000..c8cf436
--- /dev/null
+++ b/src/diff.h.i18n
@@ -0,0 +1,423 @@
+/* Shared definitions for GNU DIFF
+
+ Copyright (C) 1988-1989, 1991-1995, 1998, 2001-2002, 2004, 2009-2013,
+ 2015-2017 Free Software Foundation, Inc.
+
+ This file is part of GNU DIFF.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see . */
+
+#include "system.h"
+#include
+#include
+#include
+
+/* What kind of changes a hunk contains. */
+enum changes
+{
+ /* No changes: lines common to both files. */
+ UNCHANGED,
+
+ /* Deletes only: lines taken from just the first file. */
+ OLD,
+
+ /* Inserts only: lines taken from just the second file. */
+ NEW,
+
+ /* Both deletes and inserts: a hunk containing both old and new lines. */
+ CHANGED
+};
+
+/* When colors should be used in the output. */
+enum colors_style
+{
+ /* Never output colors. */
+ NEVER,
+
+ /* Output colors if the output is a terminal. */
+ AUTO,
+
+ /* Always output colors. */
+ ALWAYS,
+};
+
+/* Variables for command line options */
+
+#ifndef GDIFF_MAIN
+# define XTERN extern
+#else
+# define XTERN
+#endif
+
+enum output_style
+{
+ /* No output style specified. */
+ OUTPUT_UNSPECIFIED,
+
+ /* Default output style. */
+ OUTPUT_NORMAL,
+
+ /* Output the differences with lines of context before and after (-c). */
+ OUTPUT_CONTEXT,
+
+ /* Output the differences in a unified context diff format (-u). */
+ OUTPUT_UNIFIED,
+
+ /* Output the differences as commands suitable for 'ed' (-e). */
+ OUTPUT_ED,
+
+ /* Output the diff as a forward ed script (-f). */
+ OUTPUT_FORWARD_ED,
+
+ /* Like -f, but output a count of changed lines in each "command" (-n). */
+ OUTPUT_RCS,
+
+ /* Output merged #ifdef'd file (-D). */
+ OUTPUT_IFDEF,
+
+ /* Output sdiff style (-y). */
+ OUTPUT_SDIFF
+};
+
+/* True for output styles that are robust,
+ i.e. can handle a file that ends in a non-newline. */
+#define ROBUST_OUTPUT_STYLE(S) ((S) != OUTPUT_ED && (S) != OUTPUT_FORWARD_ED)
+
+XTERN enum output_style output_style;
+
+/* Define the current color context used to print a line. */
+XTERN enum colors_style colors_style;
+
+/* Nonzero if output cannot be generated for identical files. */
+XTERN bool no_diff_means_no_output;
+
+/* Number of lines of context to show in each set of diffs.
+ This is zero when context is not to be shown. */
+XTERN lin context;
+
+/* Consider all files as text files (-a).
+ Don't interpret codes over 0177 as implying a "binary file". */
+XTERN bool text;
+
+/* Number of lines to keep in identical prefix and suffix. */
+XTERN lin horizon_lines;
+
+/* The significance of white space during comparisons. */
+enum DIFF_white_space
+{
+ /* All white space is significant (the default). */
+ IGNORE_NO_WHITE_SPACE,
+
+ /* Ignore changes due to tab expansion (-E). */
+ IGNORE_TAB_EXPANSION,
+
+ /* Ignore changes in trailing horizontal white space (-Z). */
+ IGNORE_TRAILING_SPACE,
+
+ /* IGNORE_TAB_EXPANSION and IGNORE_TRAILING_SPACE are a special case
+ because they are independent and can be ORed together, yielding
+ IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE. */
+ IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE,
+
+ /* Ignore changes in horizontal white space (-b). */
+ IGNORE_SPACE_CHANGE,
+
+ /* Ignore all horizontal white space (-w). */
+ IGNORE_ALL_SPACE
+};
+XTERN enum DIFF_white_space ignore_white_space;
+
+/* Ignore changes that affect only blank lines (-B). */
+XTERN bool ignore_blank_lines;
+
+/* Files can be compared byte-by-byte, as if they were binary.
+ This depends on various options. */
+XTERN bool files_can_be_treated_as_binary;
+
+/* Ignore differences in case of letters (-i). */
+XTERN bool ignore_case;
+
+/* Ignore differences in case of letters in file names. */
+XTERN bool ignore_file_name_case;
+
+/* Act on symbolic links themselves rather than on their target
+ (--no-dereference). */
+XTERN bool no_dereference_symlinks;
+
+/* File labels for '-c' output headers (--label). */
+XTERN char *file_label[2];
+
+/* Regexp to identify function-header lines (-F). */
+XTERN struct re_pattern_buffer function_regexp;
+
+/* Ignore changes that affect only lines matching this regexp (-I). */
+XTERN struct re_pattern_buffer ignore_regexp;
+
+/* Say only whether files differ, not how (-q). */
+XTERN bool brief;
+
+/* Expand tabs in the output so the text lines up properly
+ despite the characters added to the front of each line (-t). */
+XTERN bool expand_tabs;
+
+/* Number of columns between tab stops. */
+XTERN size_t tabsize;
+
+/* Use a tab in the output, rather than a space, before the text of an
+ input line, so as to keep the proper alignment in the input line
+ without changing the characters in it (-T). */
+XTERN bool initial_tab;
+
+/* Do not output an initial space or tab before the text of an empty line. */
+XTERN bool suppress_blank_empty;
+
+/* Remove trailing carriage returns from input. */
+XTERN bool strip_trailing_cr;
+
+/* In directory comparison, specify file to start with (-S).
+ This is used for resuming an aborted comparison.
+ All file names less than this name are ignored. */
+XTERN char const *starting_file;
+
+/* Pipe each file's output through pr (-l). */
+XTERN bool paginate;
+
+/* Line group formats for unchanged, old, new, and changed groups. */
+XTERN char const *group_format[CHANGED + 1];
+
+/* Line formats for unchanged, old, and new lines. */
+XTERN char const *line_format[NEW + 1];
+
+/* If using OUTPUT_SDIFF print extra information to help the sdiff filter. */
+XTERN bool sdiff_merge_assist;
+
+/* Tell OUTPUT_SDIFF to show only the left version of common lines. */
+XTERN bool left_column;
+
+/* Tell OUTPUT_SDIFF to not show common lines. */
+XTERN bool suppress_common_lines;
+
+/* The half line width and column 2 offset for OUTPUT_SDIFF. */
+XTERN size_t sdiff_half_width;
+XTERN size_t sdiff_column2_offset;
+
+/* String containing all the command options diff received,
+ with spaces between and at the beginning but none at the end.
+ If there were no options given, this string is empty. */
+XTERN char *switch_string;
+
+/* Use heuristics for better speed with large files with a small
+ density of changes. */
+XTERN bool speed_large_files;
+
+/* Patterns that match file names to be excluded. */
+XTERN struct exclude *excluded;
+
+/* Don't discard lines. This makes things slower (sometimes much
+ slower) but will find a guaranteed minimal set of changes. */
+XTERN bool minimal;
+
+/* The strftime format to use for time strings. */
+XTERN char const *time_format;
+
+/* The result of comparison is an "edit script": a chain of 'struct change'.
+ Each 'struct change' represents one place where some lines are deleted
+ and some are inserted.
+
+ LINE0 and LINE1 are the first affected lines in the two files (origin 0).
+ DELETED is the number of lines deleted here from file 0.
+ INSERTED is the number of lines inserted here in file 1.
+
+ If DELETED is 0 then LINE0 is the number of the line before
+ which the insertion was done; vice versa for INSERTED and LINE1. */
+
+struct change
+{
+ struct change *link; /* Previous or next edit command */
+ lin inserted; /* # lines of file 1 changed here. */
+ lin deleted; /* # lines of file 0 changed here. */
+ lin line0; /* Line number of 1st deleted line. */
+ lin line1; /* Line number of 1st inserted line. */
+ bool ignore; /* Flag used in context.c. */
+};
+
+/* Structures that describe the input files. */
+
+/* Data on one input file being compared. */
+
+struct file_data {
+ int desc; /* File descriptor */
+ char const *name; /* File name */
+ struct stat stat; /* File status */
+
+ /* Buffer in which text of file is read. */
+ word *buffer;
+
+ /* Allocated size of buffer, in bytes. Always a multiple of
+ sizeof *buffer. */
+ size_t bufsize;
+
+ /* Number of valid bytes now in the buffer. */
+ size_t buffered;
+
+ /* Array of pointers to lines in the file. */
+ char const **linbuf;
+
+ /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
+ linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
+ linebuf[linbuf_base ... valid_lines - 1] contain valid data.
+ linebuf[linbuf_base ... alloc_lines - 1] are allocated. */
+ lin linbuf_base, buffered_lines, valid_lines, alloc_lines;
+
+ /* Pointer to end of prefix of this file to ignore when hashing. */
+ char const *prefix_end;
+
+ /* Count of lines in the prefix.
+ There are this many lines in the file before linbuf[0]. */
+ lin prefix_lines;
+
+ /* Pointer to start of suffix of this file to ignore when hashing. */
+ char const *suffix_begin;
+
+ /* Vector, indexed by line number, containing an equivalence code for
+ each line. It is this vector that is actually compared with that
+ of another file to generate differences. */
+ lin *equivs;
+
+ /* Vector, like the previous one except that
+ the elements for discarded lines have been squeezed out. */
+ lin *undiscarded;
+
+ /* Vector mapping virtual line numbers (not counting discarded lines)
+ to real ones (counting those lines). Both are origin-0. */
+ lin *realindexes;
+
+ /* Total number of nondiscarded lines. */
+ lin nondiscarded_lines;
+
+ /* Vector, indexed by real origin-0 line number,
+ containing 1 for a line that is an insertion or a deletion.
+ The results of comparison are stored here. */
+ char *changed;
+
+ /* 1 if file ends in a line with no final newline. */
+ bool missing_newline;
+
+ /* 1 if at end of file. */
+ bool eof;
+
+ /* 1 more than the maximum equivalence value used for this or its
+ sibling file. */
+ lin equiv_max;
+};
+
+/* The file buffer, considered as an array of bytes rather than
+ as an array of words. */
+#define FILE_BUFFER(f) ((char *) (f)->buffer)
+
+/* Data on two input files being compared. */
+
+struct comparison
+ {
+ struct file_data file[2];
+ struct comparison const *parent; /* parent, if a recursive comparison */
+ };
+
+/* Describe the two files currently being compared. */
+
+XTERN struct file_data files[2];
+
+/* Stdio stream to output diffs to. */
+
+XTERN FILE *outfile;
+
+/* Declare various functions. */
+
+/* analyze.c */
+extern int diff_2_files (struct comparison *);
+
+/* context.c */
+extern void print_context_header (struct file_data[], char const * const *, bool);
+extern void print_context_script (struct change *, bool);
+
+/* dir.c */
+extern int diff_dirs (struct comparison const *,
+ int (*) (struct comparison const *,
+ char const *, char const *));
+extern char *find_dir_file_pathname (char const *, char const *);
+
+/* ed.c */
+extern void print_ed_script (struct change *);
+extern void pr_forward_ed_script (struct change *);
+
+/* ifdef.c */
+extern void print_ifdef_script (struct change *);
+
+/* io.c */
+extern void file_block_read (struct file_data *, size_t);
+extern bool read_files (struct file_data[], bool);
+
+/* normal.c */
+extern void print_normal_script (struct change *);
+
+/* rcs.c */
+extern void print_rcs_script (struct change *);
+
+/* side.c */
+extern void print_sdiff_script (struct change *);
+
+/* util.c */
+extern char const change_letter[4];
+extern char const pr_program[];
+extern char *concat (char const *, char const *, char const *);
+extern bool lines_differ (char const *, char const *) _GL_ATTRIBUTE_PURE;
+extern lin translate_line_number (struct file_data const *, lin);
+extern struct change *find_change (struct change *);
+extern struct change *find_reverse_change (struct change *);
+extern void *zalloc (size_t);
+extern enum changes analyze_hunk (struct change *, lin *, lin *, lin *, lin *);
+extern void begin_output (void);
+extern void debug_script (struct change *);
+extern void fatal (char const *) __attribute__((noreturn));
+extern void finish_output (void);
+extern void message (char const *, char const *, char const *);
+extern void message5 (char const *, char const *, char const *,
+ char const *, char const *);
+extern void output_1_line (char const *, char const *, char const *,
+ char const *);
+extern void perror_with_name (char const *);
+extern void pfatal_with_name (char const *) __attribute__((noreturn));
+extern void print_1_line (char const *, char const * const *);
+extern void print_1_line_nl (char const *, char const * const *, bool);
+extern void print_message_queue (void);
+extern void print_number_range (char, struct file_data *, lin, lin);
+extern void print_script (struct change *, struct change * (*) (struct change *),
+ void (*) (struct change *));
+extern void setup_output (char const *, char const *, bool);
+extern void translate_range (struct file_data const *, lin, lin,
+ printint *, printint *);
+
+enum color_context
+{
+ HEADER_CONTEXT,
+ ADD_CONTEXT,
+ DELETE_CONTEXT,
+ RESET_CONTEXT,
+ LINE_NUMBER_CONTEXT,
+};
+
+XTERN bool presume_output_tty;
+
+extern void set_color_context (enum color_context color_context);
+extern void set_color_palette (char const *palette);
diff --git a/src/io.c b/src/io.c
index b4ef5dc..fce16ab 100644
--- a/src/io.c
+++ b/src/io.c
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
/* Rotate an unsigned value to the left. */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
@@ -215,6 +216,28 @@ slurp (struct file_data *current)
/* Split the file into lines, simultaneously computing the equivalence
class for each line. */
+#ifdef HANDLE_MULTIBYTE
+# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \
+do \
+ { \
+ mbstate_t state_bak = STATE; \
+ \
+ CONVFAIL = 0; \
+ MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \
+ \
+ switch (MBLENGTH) \
+ { \
+ case (size_t)-2: \
+ case (size_t)-1: \
+ STATE = state_bak; \
+ ++CONVFAIL; \
+ /* Fall through. */ \
+ case 0: \
+ MBLENGTH = 1; \
+ } \
+ } \
+ while (0)
+#endif
static void
find_and_hash_each_line (struct file_data *current)
@@ -241,12 +264,300 @@ find_and_hash_each_line (struct file_data *current)
bool same_length_diff_contents_compare_anyway =
diff_length_compare_anyway | ig_case;
+#ifdef HANDLE_MULTIBYTE
+ wchar_t wc;
+ size_t mblength;
+ mbstate_t state;
+ int convfail;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+#endif
+
while (p < suffix_begin)
{
char const *ip = p;
hash_value h = 0;
unsigned char c;
+#ifdef HANDLE_MULTIBYTE
+ if (MB_CUR_MAX > 1)
+ {
+ wchar_t lo_wc;
+ char mbc[MB_LEN_MAX];
+ mbstate_t state_wc;
+
+ /* Hash this line until we find a newline. */
+ switch (ig_white_space)
+ {
+ case IGNORE_ALL_SPACE:
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ break;
+ }
+
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+
+ if (convfail)
+ mbc[0] = *p++;
+ else if (!iswspace (wc))
+ {
+ bool flag = 0;
+
+ if (ig_case)
+ {
+ lo_wc = towlower (wc);
+ if (lo_wc != wc)
+ {
+ flag = 1;
+
+ p += mblength;
+ memset (&state_wc, '\0', sizeof(mbstate_t));
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
+
+ assert (mblength != (size_t)-1 &&
+ mblength != (size_t)-2);
+
+ mblength = (mblength < 1) ? 1 : mblength;
+ }
+ }
+
+ if (!flag)
+ {
+ for (i = 0; i < mblength; i++)
+ mbc[i] = *p++;
+ }
+ }
+ else
+ {
+ p += mblength;
+ continue;
+ }
+
+ for (i = 0; i < mblength; i++)
+ h = HASH (h, mbc[i]);
+ }
+ break;
+
+ case IGNORE_SPACE_CHANGE:
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ break;
+ }
+
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+
+ if (!convfail && iswspace (wc))
+ {
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ goto hashing_done;
+ }
+
+ p += mblength;
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+ if (convfail || !iswspace (wc))
+ break;
+ }
+ h = HASH (h, ' ');
+ }
+
+ /* WC is now the first non-space. */
+ if (convfail)
+ mbc[0] = *p++;
+ else
+ {
+ bool flag = 0;
+
+ if (ignore_case)
+ {
+ lo_wc = towlower (wc);
+ if (lo_wc != wc)
+ {
+ flag = 1;
+
+ p += mblength;
+ memset (&state_wc, '\0', sizeof(mbstate_t));
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
+
+ assert (mblength != (size_t)-1 &&
+ mblength != (size_t)-2);
+
+ mblength = (mblength < 1) ? 1 : mblength;
+ }
+ }
+
+ if (!flag)
+ {
+ for (i = 0; i < mblength; i++)
+ mbc[i] = *p++;
+ }
+ }
+
+ for (i = 0; i < mblength; i++)
+ h = HASH (h, mbc[i]);
+ }
+ break;
+
+ case IGNORE_TAB_EXPANSION:
+ case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+ case IGNORE_TRAILING_SPACE:
+ {
+ size_t column = 0;
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ break;
+ }
+
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+
+ if (!convfail
+ && ig_white_space & IGNORE_TRAILING_SPACE
+ && iswspace (wc))
+ {
+ char const *p1 = p;
+ while (1)
+ {
+ if (*p1 == '\n')
+ {
+ p = p1 + 1;
+ goto hashing_done;
+ }
+
+ p1 += mblength;
+ MBC2WC (p1, suffix_begin, mblength, wc, state, convfail);
+ if (convfail || !iswspace (wc))
+ break;
+ }
+ }
+
+ size_t repetitions = 1;
+ bool no_convert = 0;
+
+ if (ig_white_space & IGNORE_TAB_EXPANSION)
+ {
+ if (convfail)
+ column++;
+ else
+ switch (wc)
+ {
+ case L'\b':
+ column -= 0 < column;
+ break;
+
+ case L'\t':
+ mbc[0] = ' ';
+ mblength = 1;
+ no_convert = 1;
+ p++;
+ assert(mblength == 1);
+ repetitions = tabsize - column % tabsize;
+ column = (column + repetitions < column
+ ? 0
+ : column + repetitions);
+ break;
+
+ case L'\r':
+ column = 0;
+ break;
+
+ default:
+ column += wcwidth (wc);
+ break;
+ }
+ }
+
+ if (ig_case)
+ {
+ lo_wc = towlower (wc);
+ if (lo_wc != wc)
+ {
+ no_convert = 1;
+ p += mblength;
+ memset (&state_wc, '\0', sizeof(mbstate_t));
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
+
+ assert (mblength != (size_t)-1 &&
+ mblength != (size_t)-2);
+
+ mblength = (mblength < 1) ? 1 : mblength;
+ }
+ }
+
+ if (!no_convert)
+ for (i = 0; i < mblength; i++)
+ mbc[i] = *p++;
+
+ do
+ {
+ for (i = 0; i < mblength; i++)
+ h = HASH (h, mbc[i]);
+ }
+ while (--repetitions != 0);
+ }
+ }
+ break;
+
+ default:
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ break;
+ }
+
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+
+ if (convfail)
+ mbc[0] = *p++;
+ else
+ {
+ int flag = 0;
+
+ if (ig_case)
+ {
+ lo_wc = towlower (wc);
+ if (lo_wc != wc)
+ {
+ flag = 1;
+ p += mblength;
+ memset (&state_wc, '\0', sizeof(mbstate_t));
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
+
+ assert (mblength != (size_t)-1 &&
+ mblength != (size_t)-2);
+
+ mblength = (mblength < 1) ? 1 : mblength;
+ }
+ }
+
+ if (!flag)
+ {
+ for (i = 0; i < mblength; i++)
+ mbc[i] = *p++;
+ }
+ }
+
+ for (i = 0; i < mblength; i++)
+ h = HASH (h, mbc[i]);
+ }
+ }
+ }
+ else
+#endif
+
/* Hash this line until we find a newline. */
switch (ig_white_space)
{
@@ -397,7 +708,7 @@ find_and_hash_each_line (struct file_data *current)
else if (!diff_length_compare_anyway)
continue;
- if (! lines_differ (eqline, ip))
+ if (! lines_differ (eqline, eqs[i].length + 1, ip, length + 1))
break;
}
diff --git a/src/io.c.i18n b/src/io.c.i18n
new file mode 100644
index 0000000..b4ef5dc
--- /dev/null
+++ b/src/io.c.i18n
@@ -0,0 +1,830 @@
+/* File I/O for GNU DIFF.
+
+ Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2013,
+ 2015-2017 Free Software Foundation, Inc.
+
+ This file is part of GNU DIFF.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see . */
+
+#include "diff.h"
+#include
+#include
+#include
+#include
+
+/* Rotate an unsigned value to the left. */
+#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
+
+/* Given a hash value and a new character, return a new hash value. */
+#define HASH(h, c) ((c) + ROL (h, 7))
+
+/* The type of a hash value. */
+typedef size_t hash_value;
+verify (! TYPE_SIGNED (hash_value));
+
+/* Lines are put into equivalence classes of lines that match in lines_differ.
+ Each equivalence class is represented by one of these structures,
+ but only while the classes are being computed.
+ Afterward, each class is represented by a number. */
+struct equivclass
+{
+ lin next; /* Next item in this bucket. */
+ hash_value hash; /* Hash of lines in this class. */
+ char const *line; /* A line that fits this class. */
+ size_t length; /* That line's length, not counting its newline. */
+};
+
+/* Hash-table: array of buckets, each being a chain of equivalence classes.
+ buckets[-1] is reserved for incomplete lines. */
+static lin *buckets;
+
+/* Number of buckets in the hash table array, not counting buckets[-1]. */
+static size_t nbuckets;
+
+/* Array in which the equivalence classes are allocated.
+ The bucket-chains go through the elements in this array.
+ The number of an equivalence class is its index in this array. */
+static struct equivclass *equivs;
+
+/* Index of first free element in the array 'equivs'. */
+static lin equivs_index;
+
+/* Number of elements allocated in the array 'equivs'. */
+static lin equivs_alloc;
+
+/* Read a block of data into a file buffer, checking for EOF and error. */
+
+void
+file_block_read (struct file_data *current, size_t size)
+{
+ if (size && ! current->eof)
+ {
+ size_t s = block_read (current->desc,
+ FILE_BUFFER (current) + current->buffered, size);
+ if (s == SIZE_MAX)
+ pfatal_with_name (current->name);
+ current->buffered += s;
+ current->eof = s < size;
+ }
+}
+
+/* Check for binary files and compare them for exact identity. */
+
+/* Return 1 if BUF contains a non text character.
+ SIZE is the number of characters in BUF. */
+
+#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
+
+/* Get ready to read the current file.
+ Return nonzero if SKIP_TEST is zero,
+ and if it appears to be a binary file. */
+
+static bool
+sip (struct file_data *current, bool skip_test)
+{
+ /* If we have a nonexistent file at this stage, treat it as empty. */
+ if (current->desc < 0)
+ {
+ /* Leave room for a sentinel. */
+ current->bufsize = sizeof (word);
+ current->buffer = xmalloc (current->bufsize);
+ }
+ else
+ {
+ current->bufsize = buffer_lcm (sizeof (word),
+ STAT_BLOCKSIZE (current->stat),
+ PTRDIFF_MAX - 2 * sizeof (word));
+ current->buffer = xmalloc (current->bufsize);
+
+#ifdef __KLIBC__
+ /* Skip test if seek is not possible */
+ skip_test = skip_test
+ || (lseek (current->desc, 0, SEEK_CUR) < 0
+ && errno == ESPIPE);
+#endif
+
+ if (! skip_test)
+ {
+ /* Check first part of file to see if it's a binary file. */
+
+ int prev_mode = set_binary_mode (current->desc, O_BINARY);
+ off_t buffered;
+ file_block_read (current, current->bufsize);
+ buffered = current->buffered;
+
+ if (prev_mode != O_BINARY)
+ {
+ /* Revert to text mode and seek back to the start to reread
+ the file. Use relative seek, since file descriptors
+ like stdin might not start at offset zero. */
+ if (lseek (current->desc, - buffered, SEEK_CUR) < 0)
+ pfatal_with_name (current->name);
+ set_binary_mode (current->desc, prev_mode);
+ current->buffered = 0;
+ current->eof = false;
+ }
+
+ return binary_file_p (current->buffer, buffered);
+ }
+ }
+
+ current->buffered = 0;
+ current->eof = false;
+ return false;
+}
+
+/* Slurp the rest of the current file completely into memory. */
+
+static void
+slurp (struct file_data *current)
+{
+ size_t cc;
+
+ if (current->desc < 0)
+ {
+ /* The file is nonexistent. */
+ return;
+ }
+
+ if (S_ISREG (current->stat.st_mode))
+ {
+ /* It's a regular file; slurp in the rest all at once. */
+
+ /* Get the size out of the stat block.
+ Allocate just enough room for appended newline plus word sentinel,
+ plus word-alignment since we want the buffer word-aligned. */
+ size_t file_size = current->stat.st_size;
+ cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
+ if (file_size != current->stat.st_size || cc < file_size
+ || PTRDIFF_MAX <= cc)
+ xalloc_die ();
+
+ if (current->bufsize < cc)
+ {
+ current->bufsize = cc;
+ current->buffer = xrealloc (current->buffer, cc);
+ }
+
+ /* Try to read at least 1 more byte than the size indicates, to
+ detect whether the file is growing. This is a nicety for
+ users who run 'diff' on files while they are changing. */
+
+ if (current->buffered <= file_size)
+ {
+ file_block_read (current, file_size + 1 - current->buffered);
+ if (current->buffered <= file_size)
+ return;
+ }
+ }
+
+ /* It's not a regular file, or it's a growing regular file; read it,
+ growing the buffer as needed. */
+
+ file_block_read (current, current->bufsize - current->buffered);
+
+ if (current->buffered)
+ {
+ while (current->buffered == current->bufsize)
+ {
+ if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
+ xalloc_die ();
+ current->bufsize *= 2;
+ current->buffer = xrealloc (current->buffer, current->bufsize);
+ file_block_read (current, current->bufsize - current->buffered);
+ }
+
+ /* Allocate just enough room for appended newline plus word
+ sentinel, plus word-alignment. */
+ cc = current->buffered + 2 * sizeof (word);
+ current->bufsize = cc - cc % sizeof (word);
+ current->buffer = xrealloc (current->buffer, current->bufsize);
+ }
+}
+
+/* Split the file into lines, simultaneously computing the equivalence
+ class for each line. */
+
+static void
+find_and_hash_each_line (struct file_data *current)
+{
+ char const *p = current->prefix_end;
+ lin i, *bucket;
+ size_t length;
+
+ /* Cache often-used quantities in local variables to help the compiler. */
+ char const **linbuf = current->linbuf;
+ lin alloc_lines = current->alloc_lines;
+ lin line = 0;
+ lin linbuf_base = current->linbuf_base;
+ lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
+ struct equivclass *eqs = equivs;
+ lin eqs_index = equivs_index;
+ lin eqs_alloc = equivs_alloc;
+ char const *suffix_begin = current->suffix_begin;
+ char const *bufend = FILE_BUFFER (current) + current->buffered;
+ bool ig_case = ignore_case;
+ enum DIFF_white_space ig_white_space = ignore_white_space;
+ bool diff_length_compare_anyway =
+ ig_white_space != IGNORE_NO_WHITE_SPACE;
+ bool same_length_diff_contents_compare_anyway =
+ diff_length_compare_anyway | ig_case;
+
+ while (p < suffix_begin)
+ {
+ char const *ip = p;
+ hash_value h = 0;
+ unsigned char c;
+
+ /* Hash this line until we find a newline. */
+ switch (ig_white_space)
+ {
+ case IGNORE_ALL_SPACE:
+ while ((c = *p++) != '\n')
+ if (! isspace (c))
+ h = HASH (h, ig_case ? tolower (c) : c);
+ break;
+
+ case IGNORE_SPACE_CHANGE:
+ while ((c = *p++) != '\n')
+ {
+ if (isspace (c))
+ {
+ do
+ if ((c = *p++) == '\n')
+ goto hashing_done;
+ while (isspace (c));
+
+ h = HASH (h, ' ');
+ }
+
+ /* C is now the first non-space. */
+ h = HASH (h, ig_case ? tolower (c) : c);
+ }
+ break;
+
+ case IGNORE_TAB_EXPANSION:
+ case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+ case IGNORE_TRAILING_SPACE:
+ {
+ size_t column = 0;
+ while ((c = *p++) != '\n')
+ {
+ if (ig_white_space & IGNORE_TRAILING_SPACE
+ && isspace (c))
+ {
+ char const *p1 = p;
+ unsigned char c1;
+ do
+ if ((c1 = *p1++) == '\n')
+ {
+ p = p1;
+ goto hashing_done;
+ }
+ while (isspace (c1));
+ }
+
+ size_t repetitions = 1;
+
+ if (ig_white_space & IGNORE_TAB_EXPANSION)
+ switch (c)
+ {
+ case '\b':
+ column -= 0 < column;
+ break;
+
+ case '\t':
+ c = ' ';
+ repetitions = tabsize - column % tabsize;
+ column = (column + repetitions < column
+ ? 0
+ : column + repetitions);
+ break;
+
+ case '\r':
+ column = 0;
+ break;
+
+ default:
+ column++;
+ break;
+ }
+
+ if (ig_case)
+ c = tolower (c);
+
+ do
+ h = HASH (h, c);
+ while (--repetitions != 0);
+ }
+ }
+ break;
+
+ default:
+ if (ig_case)
+ while ((c = *p++) != '\n')
+ h = HASH (h, tolower (c));
+ else
+ while ((c = *p++) != '\n')
+ h = HASH (h, c);
+ break;
+ }
+
+ hashing_done:;
+
+ bucket = &buckets[h % nbuckets];
+ length = p - ip - 1;
+
+ if (p == bufend
+ && current->missing_newline
+ && ROBUST_OUTPUT_STYLE (output_style))
+ {
+ /* The last line is incomplete and we do not silently
+ complete lines. If the line cannot compare equal to any
+ complete line, put it into buckets[-1] so that it can
+ compare equal only to the other file's incomplete line
+ (if one exists). */
+ if (ig_white_space < IGNORE_TRAILING_SPACE)
+ bucket = &buckets[-1];
+ }
+
+ for (i = *bucket; ; i = eqs[i].next)
+ if (!i)
+ {
+ /* Create a new equivalence class in this bucket. */
+ i = eqs_index++;
+ if (i == eqs_alloc)
+ {
+ if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
+ xalloc_die ();
+ eqs_alloc *= 2;
+ eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
+ }
+ eqs[i].next = *bucket;
+ eqs[i].hash = h;
+ eqs[i].line = ip;
+ eqs[i].length = length;
+ *bucket = i;
+ break;
+ }
+ else if (eqs[i].hash == h)
+ {
+ char const *eqline = eqs[i].line;
+
+ /* Reuse existing class if lines_differ reports the lines
+ equal. */
+ if (eqs[i].length == length)
+ {
+ /* Reuse existing equivalence class if the lines are identical.
+ This detects the common case of exact identity
+ faster than lines_differ would. */
+ if (memcmp (eqline, ip, length) == 0)
+ break;
+ if (!same_length_diff_contents_compare_anyway)
+ continue;
+ }
+ else if (!diff_length_compare_anyway)
+ continue;
+
+ if (! lines_differ (eqline, ip))
+ break;
+ }
+
+ /* Maybe increase the size of the line table. */
+ if (line == alloc_lines)
+ {
+ /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
+ if (PTRDIFF_MAX / 3 <= alloc_lines
+ || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
+ || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
+ xalloc_die ();
+ alloc_lines = 2 * alloc_lines - linbuf_base;
+ cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
+ linbuf += linbuf_base;
+ linbuf = xrealloc (linbuf,
+ (alloc_lines - linbuf_base) * sizeof *linbuf);
+ linbuf -= linbuf_base;
+ }
+ linbuf[line] = ip;
+ cureqs[line] = i;
+ ++line;
+ }
+
+ current->buffered_lines = line;
+
+ for (i = 0; ; i++)
+ {
+ /* Record the line start for lines in the suffix that we care about.
+ Record one more line start than lines,
+ so that we can compute the length of any buffered line. */
+ if (line == alloc_lines)
+ {
+ /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
+ if (PTRDIFF_MAX / 3 <= alloc_lines
+ || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
+ || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
+ xalloc_die ();
+ alloc_lines = 2 * alloc_lines - linbuf_base;
+ linbuf += linbuf_base;
+ linbuf = xrealloc (linbuf,
+ (alloc_lines - linbuf_base) * sizeof *linbuf);
+ linbuf -= linbuf_base;
+ }
+ linbuf[line] = p;
+
+ if (p == bufend)
+ {
+ /* If the last line is incomplete and we do not silently
+ complete lines, don't count its appended newline. */
+ if (current->missing_newline && ROBUST_OUTPUT_STYLE (output_style))
+ linbuf[line]--;
+ break;
+ }
+
+ if (context <= i && no_diff_means_no_output)
+ break;
+
+ line++;
+
+ while (*p++ != '\n')
+ continue;
+ }
+
+ /* Done with cache in local variables. */
+ current->linbuf = linbuf;
+ current->valid_lines = line;
+ current->alloc_lines = alloc_lines;
+ current->equivs = cureqs;
+ equivs = eqs;
+ equivs_alloc = eqs_alloc;
+ equivs_index = eqs_index;
+}
+
+/* Prepare the text. Make sure the text end is initialized.
+ Make sure text ends in a newline,
+ but remember that we had to add one.
+ Strip trailing CRs, if that was requested. */
+
+static void
+prepare_text (struct file_data *current)
+{
+ size_t buffered = current->buffered;
+ char *p = FILE_BUFFER (current);
+
+ if (buffered == 0 || p[buffered - 1] == '\n')
+ current->missing_newline = false;
+ else
+ {
+ p[buffered++] = '\n';
+ current->missing_newline = true;
+ }
+
+ if (!p)
+ return;
+
+ /* Don't use uninitialized storage when planting or using sentinels. */
+ memset (p + buffered, 0, sizeof (word));
+
+ if (strip_trailing_cr)
+ {
+ char *dst;
+ char *srclim = p + buffered;
+ *srclim = '\r';
+ dst = rawmemchr (p, '\r');
+
+ if (dst != srclim)
+ {
+ char const *src = dst;
+ do
+ {
+ *dst = *src++;
+ dst += ! (*dst == '\r' && *src == '\n');
+ }
+ while (src < srclim);
+
+ buffered -= src - dst;
+ }
+ }
+
+ current->buffered = buffered;
+}
+
+/* We have found N lines in a buffer of size S; guess the
+ proportionate number of lines that will be found in a buffer of
+ size T. However, do not guess a number of lines so large that the
+ resulting line table might cause overflow in size calculations. */
+static lin
+guess_lines (lin n, size_t s, size_t t)
+{
+ size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
+ lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
+ return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
+}
+
+/* Given a vector of two file_data objects, find the identical
+ prefixes and suffixes of each object. */
+
+static void
+find_identical_ends (struct file_data filevec[])
+{
+ word *w0, *w1;
+ char *p0, *p1, *buffer0, *buffer1;
+ char const *end0, *beg0;
+ char const **linbuf0, **linbuf1;
+ lin i, lines;
+ size_t n0, n1;
+ lin alloc_lines0, alloc_lines1;
+ bool prefix_needed;
+ lin buffered_prefix, prefix_count, prefix_mask;
+ lin middle_guess, suffix_guess;
+
+ slurp (&filevec[0]);
+ prepare_text (&filevec[0]);
+ if (filevec[0].desc != filevec[1].desc)
+ {
+ slurp (&filevec[1]);
+ prepare_text (&filevec[1]);
+ }
+ else
+ {
+ filevec[1].buffer = filevec[0].buffer;
+ filevec[1].bufsize = filevec[0].bufsize;
+ filevec[1].buffered = filevec[0].buffered;
+ filevec[1].missing_newline = filevec[0].missing_newline;
+ }
+
+ /* Find identical prefix. */
+
+ w0 = filevec[0].buffer;
+ w1 = filevec[1].buffer;
+ p0 = buffer0 = (char *) w0;
+ p1 = buffer1 = (char *) w1;
+ n0 = filevec[0].buffered;
+ n1 = filevec[1].buffered;
+
+ if (p0 == p1)
+ /* The buffers are the same; sentinels won't work. */
+ p0 = p1 += n1;
+ else
+ {
+ /* Insert end sentinels, in this case characters that are guaranteed
+ to make the equality test false, and thus terminate the loop. */
+
+ if (n0 < n1)
+ p0[n0] = ~p1[n0];
+ else
+ p1[n1] = ~p0[n1];
+
+ /* Loop until first mismatch, or to the sentinel characters. */
+
+ /* Compare a word at a time for speed. */
+ while (*w0 == *w1)
+ w0++, w1++;
+
+ /* Do the last few bytes of comparison a byte at a time. */
+ p0 = (char *) w0;
+ p1 = (char *) w1;
+ while (*p0 == *p1)
+ p0++, p1++;
+
+ /* Don't mistakenly count missing newline as part of prefix. */
+ if (ROBUST_OUTPUT_STYLE (output_style)
+ && ((buffer0 + n0 - filevec[0].missing_newline < p0)
+ !=
+ (buffer1 + n1 - filevec[1].missing_newline < p1)))
+ p0--, p1--;
+ }
+
+ /* Now P0 and P1 point at the first nonmatching characters. */
+
+ /* Skip back to last line-beginning in the prefix,
+ and then discard up to HORIZON_LINES lines from the prefix. */
+ i = horizon_lines;
+ while (p0 != buffer0 && (p0[-1] != '\n' || i--))
+ p0--, p1--;
+
+ /* Record the prefix. */
+ filevec[0].prefix_end = p0;
+ filevec[1].prefix_end = p1;
+
+ /* Find identical suffix. */
+
+ /* P0 and P1 point beyond the last chars not yet compared. */
+ p0 = buffer0 + n0;
+ p1 = buffer1 + n1;
+
+ if (! ROBUST_OUTPUT_STYLE (output_style)
+ || filevec[0].missing_newline == filevec[1].missing_newline)
+ {
+ end0 = p0; /* Addr of last char in file 0. */
+
+ /* Get value of P0 at which we should stop scanning backward:
+ this is when either P0 or P1 points just past the last char
+ of the identical prefix. */
+ beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
+
+ /* Scan back until chars don't match or we reach that point. */
+ while (p0 != beg0)
+ if (*--p0 != *--p1)
+ {
+ /* Point at the first char of the matching suffix. */
+ ++p0, ++p1;
+ beg0 = p0;
+ break;
+ }
+
+ /* Are we at a line-beginning in both files? If not, add the rest of
+ this line to the main body. Discard up to HORIZON_LINES lines from
+ the identical suffix. Also, discard one extra line,
+ because shift_boundaries may need it. */
+ i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
+ &&
+ (buffer1 == p1 || p1[-1] == '\n'));
+ while (i-- && p0 != end0)
+ while (*p0++ != '\n')
+ continue;
+
+ p1 += p0 - beg0;
+ }
+
+ /* Record the suffix. */
+ filevec[0].suffix_begin = p0;
+ filevec[1].suffix_begin = p1;
+
+ /* Calculate number of lines of prefix to save.
+
+ prefix_count == 0 means save the whole prefix;
+ we need this for options like -D that output the whole file,
+ or for enormous contexts (to avoid worrying about arithmetic overflow).
+ We also need it for options like -F that output some preceding line;
+ at least we will need to find the last few lines,
+ but since we don't know how many, it's easiest to find them all.
+
+ Otherwise, prefix_count != 0. Save just prefix_count lines at start
+ of the line buffer; they'll be moved to the proper location later.
+ Handle 1 more line than the context says (because we count 1 too many),
+ rounded up to the next power of 2 to speed index computation. */
+
+ if (no_diff_means_no_output && ! function_regexp.fastmap
+ && context < LIN_MAX / 4 && context < n0)
+ {
+ middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
+ suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
+ for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
+ continue;
+ alloc_lines0 = (prefix_count + middle_guess
+ + MIN (context, suffix_guess));
+ }
+ else
+ {
+ prefix_count = 0;
+ alloc_lines0 = guess_lines (0, 0, n0);
+ }
+
+ prefix_mask = prefix_count - 1;
+ lines = 0;
+ linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
+ prefix_needed = ! (no_diff_means_no_output
+ && filevec[0].prefix_end == p0
+ && filevec[1].prefix_end == p1);
+ p0 = buffer0;
+
+ /* If the prefix is needed, find the prefix lines. */
+ if (prefix_needed)
+ {
+ end0 = filevec[0].prefix_end;
+ while (p0 != end0)
+ {
+ lin l = lines++ & prefix_mask;
+ if (l == alloc_lines0)
+ {
+ if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
+ xalloc_die ();
+ alloc_lines0 *= 2;
+ linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
+ }
+ linbuf0[l] = p0;
+ while (*p0++ != '\n')
+ continue;
+ }
+ }
+ buffered_prefix = prefix_count && context < lines ? context : lines;
+
+ /* Allocate line buffer 1. */
+
+ middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
+ suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
+ alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
+ if (alloc_lines1 < buffered_prefix
+ || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
+ xalloc_die ();
+ linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
+
+ if (buffered_prefix != lines)
+ {
+ /* Rotate prefix lines to proper location. */
+ for (i = 0; i < buffered_prefix; i++)
+ linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
+ for (i = 0; i < buffered_prefix; i++)
+ linbuf0[i] = linbuf1[i];
+ }
+
+ /* Initialize line buffer 1 from line buffer 0. */
+ for (i = 0; i < buffered_prefix; i++)
+ linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
+
+ /* Record the line buffer, adjusted so that
+ linbuf[0] points at the first differing line. */
+ filevec[0].linbuf = linbuf0 + buffered_prefix;
+ filevec[1].linbuf = linbuf1 + buffered_prefix;
+ filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
+ filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
+ filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
+ filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
+}
+
+/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
+ than 2**k. This table is derived from Chris K. Caldwell's list
+ . */
+
+static unsigned char const prime_offset[] =
+{
+ 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
+ 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
+ 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
+ 55, 93, 1, 57, 25
+};
+
+/* Verify that this host's size_t is not too wide for the above table. */
+
+verify (sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
+
+/* Given a vector of two file_data objects, read the file associated
+ with each one, and build the table of equivalence classes.
+ Return nonzero if either file appears to be a binary file.
+ If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
+
+bool
+read_files (struct file_data filevec[], bool pretend_binary)
+{
+ int i;
+ bool skip_test = text | pretend_binary;
+ bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
+
+ if (filevec[0].desc != filevec[1].desc)
+ appears_binary |= sip (&filevec[1], skip_test | appears_binary);
+ else
+ {
+ filevec[1].buffer = filevec[0].buffer;
+ filevec[1].bufsize = filevec[0].bufsize;
+ filevec[1].buffered = filevec[0].buffered;
+ }
+ if (appears_binary)
+ {
+ set_binary_mode (filevec[0].desc, O_BINARY);
+ set_binary_mode (filevec[1].desc, O_BINARY);
+ return true;
+ }
+
+ find_identical_ends (filevec);
+
+ equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
+ if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
+ xalloc_die ();
+ equivs = xmalloc (equivs_alloc * sizeof *equivs);
+ /* Equivalence class 0 is permanently safe for lines that were not
+ hashed. Real equivalence classes start at 1. */
+ equivs_index = 1;
+
+ /* Allocate (one plus) a prime number of hash buckets. Use a prime
+ number between 1/3 and 2/3 of the value of equiv_allocs,
+ approximately. */
+ for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
+ continue;
+ nbuckets = ((size_t) 1 << i) - prime_offset[i];
+ if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
+ xalloc_die ();
+ buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
+ buckets++;
+
+ for (i = 0; i < 2; i++)
+ find_and_hash_each_line (&filevec[i]);
+
+ filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
+
+ free (equivs);
+ free (buckets - 1);
+
+ return false;
+}
diff --git a/src/util.c b/src/util.c
index 88955da..3de19f8 100644
--- a/src/util.c
+++ b/src/util.c
@@ -985,7 +985,8 @@ finish_output (void)
Return nonzero if the lines differ. */
bool
-lines_differ (char const *s1, char const *s2)
+lines_differ_singlebyte (char const *s1, size_t s1len,
+ char const *s2, size_t s2len)
{
register char const *t1 = s1;
register char const *t2 = s2;
@@ -1141,6 +1142,354 @@ lines_differ (char const *s1, char const *s2)
return true;
}
+
+#ifdef HANDLE_MULTIBYTE
+# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \
+do \
+ { \
+ mbstate_t bak = STATE; \
+ \
+ CONVFAIL = 0; \
+ MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \
+ \
+ switch (MBLENGTH) \
+ { \
+ case (size_t)-2: \
+ case (size_t)-1: \
+ STATE = bak; \
+ ++CONVFAIL; \
+ /* Fall through. */ \
+ case 0: \
+ MBLENGTH = 1; \
+ } \
+ } \
+ while (0)
+
+bool
+lines_differ_multibyte (char const *s1, size_t s1len,
+ char const *s2, size_t s2len)
+{
+ char const *end1, *end2;
+ char c1, c2;
+ wchar_t wc1, wc2, wc1_bak, wc2_bak;
+ size_t mblen1, mblen2;
+ mbstate_t state1, state2, state1_bak, state2_bak;
+ int convfail1, convfail2, convfail1_bak, convfail2_bak;
+
+ char const *t1 = s1;
+ char const *t2 = s2;
+ char const *t1_bak, *t2_bak;
+ size_t column = 0;
+
+ if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case)
+ {
+ while (*t1 != '\n')
+ if (*t1++ != *t2++)
+ return 1;
+ return 0;
+ }
+
+ end1 = t1 + s1len;
+ end2 = t2 + s2len;
+
+ memset (&state1, '\0', sizeof (mbstate_t));
+ memset (&state2, '\0', sizeof (mbstate_t));
+
+ while (1)
+ {
+ c1 = *t1;
+ c2 = *t2;
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
+
+ /* Test for exact char equality first, since it's a common case. */
+ if (convfail1 ^ convfail2)
+ break;
+ else if (convfail1 && convfail2 && c1 != c2)
+ break;
+ else if (!convfail1 && !convfail2 && wc1 != wc2)
+ {
+ switch (ignore_white_space)
+ {
+ case IGNORE_ALL_SPACE:
+ /* For -w, just skip past any white space. */
+ while (1)
+ {
+ if (convfail1)
+ break;
+ else if (wc1 == L'\n' || !iswspace (wc1))
+ break;
+
+ t1 += mblen1;
+ c1 = *t1;
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
+ }
+
+ while (1)
+ {
+ if (convfail2)
+ break;
+ else if (wc2 == L'\n' || !iswspace (wc2))
+ break;
+
+ t2 += mblen2;
+ c2 = *t2;
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
+ }
+ t1 += mblen1;
+ t2 += mblen2;
+ break;
+
+ case IGNORE_SPACE_CHANGE:
+ /* For -b, advance past any sequence of white space in
+ line 1 and consider it just one space, or nothing at
+ all if it is at the end of the line. */
+ if (wc1 != L'\n' && iswspace (wc1))
+ {
+ size_t mblen_bak;
+ mbstate_t state_bak;
+
+ do
+ {
+ t1 += mblen1;
+ mblen_bak = mblen1;
+ state_bak = state1;
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
+ }
+ while (!convfail1 && (wc1 != L'\n' && iswspace (wc1)));
+
+ state1 = state_bak;
+ mblen1 = mblen_bak;
+ t1 -= mblen1;
+ convfail1 = 0;
+ wc1 = L' ';
+ }
+
+ /* Likewise for line 2. */
+ if (wc2 != L'\n' && iswspace (wc2))
+ {
+ size_t mblen_bak;
+ mbstate_t state_bak;
+
+ do
+ {
+ t2 += mblen2;
+ mblen_bak = mblen2;
+ state_bak = state2;
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
+ }
+ while (!convfail2 && (wc2 != L'\n' && iswspace (wc2)));
+
+ state2 = state_bak;
+ mblen2 = mblen_bak;
+ t2 -= mblen2;
+ convfail2 = 0;
+ wc2 = L' ';
+ }
+
+ if (wc1 != wc2)
+ {
+ /* If we went too far when doing the simple test for
+ equality, go back to the first non-whitespace
+ character in both sides and try again. */
+ if (wc2 == L' ' && wc1 != L'\n' &&
+ t1 > s1 &&
+ !convfail1_bak && iswspace (wc1_bak))
+ {
+ t1 = t1_bak;
+ wc1 = wc1_bak;
+ state1 = state1_bak;
+ convfail1 = convfail1_bak;
+ continue;
+ }
+ if (wc1 == L' ' && wc2 != L'\n'
+ && t2 > s2
+ && !convfail2_bak && iswspace (wc2_bak))
+ {
+ t2 = t2_bak;
+ wc2 = wc2_bak;
+ state2 = state2_bak;
+ convfail2 = convfail2_bak;
+ continue;
+ }
+ }
+
+ t1_bak = t1; t2_bak = t2;
+ wc1_bak = wc1; wc2_bak = wc2;
+ state1_bak = state1; state2_bak = state2;
+ convfail1_bak = convfail1; convfail2_bak = convfail2;
+
+ if (wc1 == L'\n')
+ wc1 = L' ';
+ else
+ t1 += mblen1;
+
+ if (wc2 == L'\n')
+ wc2 = L' ';
+ else
+ t2 += mblen2;
+
+ break;
+
+ case IGNORE_TRAILING_SPACE:
+ case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+ if (iswspace (wc1) && iswspace (wc2))
+ {
+ char const *p;
+ wchar_t wc;
+ size_t mblength;
+ int convfail;
+ mbstate_t state;
+ bool just_whitespace_left = 1;
+ if (wc1 != L'\n')
+ {
+ mblength = mblen1;
+ p = t1;
+ memset (&state, '\0', sizeof(mbstate_t));
+ while (p < end1)
+ {
+ if (*p == '\n')
+ break;
+
+ p += mblength;
+ MBC2WC (p, end1, mblength, wc, state, convfail);
+ if (convfail || !iswspace (wc))
+ {
+ just_whitespace_left = 0;
+ break;
+ }
+ }
+ }
+ if (just_whitespace_left && wc2 != L'\n')
+ {
+ mblength = mblen2;
+ p = t2;
+ memset (&state, '\0', sizeof(mbstate_t));
+ while (p < end2)
+ {
+ if (*p == '\n')
+ break;
+
+ p += mblength;
+ MBC2WC (p, end2, mblength, wc, state, convfail);
+ if (convfail || !iswspace (wc))
+ {
+ just_whitespace_left = 0;
+ break;
+ }
+ }
+ }
+
+ if (just_whitespace_left)
+ /* Both lines have nothing but whitespace left. */
+ return false;
+ }
+
+ if (ignore_white_space == IGNORE_TRAILING_SPACE)
+ break;
+ /* Fall through. */
+ case IGNORE_TAB_EXPANSION:
+ if ((wc1 == L' ' && wc2 == L'\t')
+ || (wc1 == L'\t' && wc2 == L' '))
+ {
+ size_t column2 = column;
+
+ while (1)
+ {
+ if (convfail1)
+ {
+ ++t1;
+ break;
+ }
+ else if (wc1 == L' ')
+ column++;
+ else if (wc1 == L'\t')
+ column += tabsize - column % tabsize;
+ else
+ {
+ t1 += mblen1;
+ break;
+ }
+
+ t1 += mblen1;
+ c1 = *t1;
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
+ }
+
+ while (1)
+ {
+ if (convfail2)
+ {
+ ++t2;
+ break;
+ }
+ else if (wc2 == L' ')
+ column2++;
+ else if (wc2 == L'\t')
+ column2 += tabsize - column2 % tabsize;
+ else
+ {
+ t2 += mblen2;
+ break;
+ }
+
+ t2 += mblen2;
+ c2 = *t2;
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
+ }
+
+ if (column != column2)
+ return 1;
+ }
+ else
+ {
+ t1 += mblen1;
+ t2 += mblen2;
+ }
+ break;
+
+ case IGNORE_NO_WHITE_SPACE:
+ t1 += mblen1;
+ t2 += mblen2;
+ break;
+ }
+
+ /* Lowercase all letters if -i is specified. */
+ if (ignore_case)
+ {
+ if (!convfail1)
+ wc1 = towlower (wc1);
+ if (!convfail2)
+ wc2 = towlower (wc2);
+ }
+
+ if (convfail1 ^ convfail2)
+ break;
+ else if (convfail1 && convfail2 && c1 != c2)
+ break;
+ else if (!convfail1 && !convfail2 && wc1 != wc2)
+ break;
+ }
+ else
+ {
+ t1_bak = t1; t2_bak = t2;
+ wc1_bak = wc1; wc2_bak = wc2;
+ state1_bak = state1; state2_bak = state2;
+ convfail1_bak = convfail1; convfail2_bak = convfail2;
+
+ t1 += mblen1; t2 += mblen2;
+ }
+
+ if (!convfail1 && wc1 == L'\n')
+ return 0;
+
+ column += convfail1 ? 1 :
+ (wc1 == L'\t') ? tabsize - column % tabsize : wcwidth (wc1);
+ }
+
+ return 1;
+}
+#endif
/* Find the consecutive changes at the start of the script START.
Return the last link before the first gap. */
diff --git a/src/util.c.i18n b/src/util.c.i18n
new file mode 100644
index 0000000..88955da
--- /dev/null
+++ b/src/util.c.i18n
@@ -0,0 +1,1576 @@
+/* Support routines for GNU DIFF.
+
+ Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2013,
+ 2015-2017 Free Software Foundation, Inc.
+
+ This file is part of GNU DIFF.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see . */
+
+#include "diff.h"
+#include "argmatch.h"
+#include "die.h"
+#include
+#include
+#include
+#include
+#include "xvasprintf.h"
+#include
+
+/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
+ present. */
+#ifndef SA_NOCLDSTOP
+# define SA_NOCLDSTOP 0
+# define sigprocmask(How, Set, Oset) /* empty */
+# define sigset_t int
+# if ! HAVE_SIGINTERRUPT
+# define siginterrupt(sig, flag) /* empty */
+# endif
+#endif
+
+#ifndef SA_RESTART
+# define SA_RESTART 0
+#endif
+
+char const pr_program[] = PR_PROGRAM;
+
+/* Queue up one-line messages to be printed at the end,
+ when -l is specified. Each message is recorded with a 'struct msg'. */
+
+struct msg
+{
+ struct msg *next;
+ char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */
+};
+
+/* Head of the chain of queues messages. */
+
+static struct msg *msg_chain;
+
+/* Tail of the chain of queues messages. */
+
+static struct msg **msg_chain_end = &msg_chain;
+
+/* Use when a system call returns non-zero status.
+ NAME should normally be the file name. */
+
+void
+perror_with_name (char const *name)
+{
+ error (0, errno, "%s", name);
+}
+
+/* Use when a system call returns non-zero status and that is fatal. */
+
+void
+pfatal_with_name (char const *name)
+{
+ int e = errno;
+ print_message_queue ();
+ die (EXIT_TROUBLE, e, "%s", name);
+}
+
+/* Print an error message containing MSGID, then exit. */
+
+void
+fatal (char const *msgid)
+{
+ print_message_queue ();
+ die (EXIT_TROUBLE, 0, "%s", _(msgid));
+}
+
+/* Like printf, except if -l in effect then save the message and print later.
+ This is used for things like "Only in ...". */
+
+void
+message (char const *format_msgid, char const *arg1, char const *arg2)
+{
+ message5 (format_msgid, arg1, arg2, 0, 0);
+}
+
+void
+message5 (char const *format_msgid, char const *arg1, char const *arg2,
+ char const *arg3, char const *arg4)
+{
+ if (paginate)
+ {
+ char *p;
+ char const *arg[5];
+ int i;
+ size_t size[5];
+ size_t total_size = offsetof (struct msg, args);
+ struct msg *new;
+
+ arg[0] = format_msgid;
+ arg[1] = arg1;
+ arg[2] = arg2;
+ arg[3] = arg3 ? arg3 : "";
+ arg[4] = arg4 ? arg4 : "";
+
+ for (i = 0; i < 5; i++)
+ total_size += size[i] = strlen (arg[i]) + 1;
+
+ new = xmalloc (total_size);
+
+ for (i = 0, p = new->args; i < 5; p += size[i++])
+ memcpy (p, arg[i], size[i]);
+
+ *msg_chain_end = new;
+ new->next = 0;
+ msg_chain_end = &new->next;
+ }
+ else
+ {
+ if (sdiff_merge_assist)
+ putchar (' ');
+ printf (_(format_msgid), arg1, arg2, arg3, arg4);
+ }
+}
+
+/* Output all the messages that were saved up by calls to 'message'. */
+
+void
+print_message_queue (void)
+{
+ char const *arg[5];
+ int i;
+ struct msg *m = msg_chain;
+
+ while (m)
+ {
+ struct msg *next = m->next;
+ arg[0] = m->args;
+ for (i = 0; i < 4; i++)
+ arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
+ printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
+ free (m);
+ m = next;
+ }
+}
+
+/* The set of signals that are caught. */
+
+static sigset_t caught_signals;
+
+/* If nonzero, the value of the pending fatal signal. */
+
+static sig_atomic_t volatile interrupt_signal;
+
+/* A count of the number of pending stop signals that have been received. */
+
+static sig_atomic_t volatile stop_signal_count;
+
+/* An ordinary signal was received; arrange for the program to exit. */
+
+static void
+sighandler (int sig)
+{
+ if (! SA_NOCLDSTOP)
+ signal (sig, SIG_IGN);
+ if (! interrupt_signal)
+ interrupt_signal = sig;
+}
+
+/* A SIGTSTP was received; arrange for the program to suspend itself. */
+
+static void
+stophandler (int sig)
+{
+ if (! SA_NOCLDSTOP)
+ signal (sig, stophandler);
+ if (! interrupt_signal)
+ stop_signal_count++;
+}
+/* Process any pending signals. If signals are caught, this function
+ should be called periodically. Ideally there should never be an
+ unbounded amount of time when signals are not being processed.
+ Signal handling can restore the default colors, so callers must
+ immediately change colors after invoking this function. */
+
+static void
+process_signals (void)
+{
+ while (interrupt_signal || stop_signal_count)
+ {
+ int sig;
+ int stops;
+ sigset_t oldset;
+
+ set_color_context (RESET_CONTEXT);
+ fflush (stdout);
+
+ sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
+
+ /* Reload interrupt_signal and stop_signal_count, in case a new
+ signal was handled before sigprocmask took effect. */
+ sig = interrupt_signal;
+ stops = stop_signal_count;
+
+ /* SIGTSTP is special, since the application can receive that signal
+ more than once. In this case, don't set the signal handler to the
+ default. Instead, just raise the uncatchable SIGSTOP. */
+ if (stops)
+ {
+ stop_signal_count = stops - 1;
+ sig = SIGSTOP;
+ }
+ else
+ signal (sig, SIG_DFL);
+
+ /* Exit or suspend the program. */
+ raise (sig);
+ sigprocmask (SIG_SETMASK, &oldset, NULL);
+
+ /* If execution reaches here, then the program has been
+ continued (after being suspended). */
+ }
+}
+
+static void
+install_signal_handlers (void)
+{
+ /* The signals that are trapped, and the number of such signals. */
+ static int const sig[] =
+ {
+ /* This one is handled specially. */
+ SIGTSTP,
+
+ /* The usual suspects. */
+ SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
+#ifdef SIGPOLL
+ SIGPOLL,
+#endif
+#ifdef SIGPROF
+ SIGPROF,
+#endif
+#ifdef SIGVTALRM
+ SIGVTALRM,
+#endif
+#ifdef SIGXCPU
+ SIGXCPU,
+#endif
+#ifdef SIGXFSZ
+ SIGXFSZ,
+#endif
+ };
+ enum { nsigs = sizeof (sig) / sizeof *(sig) };
+
+#if ! SA_NOCLDSTOP
+ bool caught_sig[nsigs];
+#endif
+ {
+ int j;
+#if SA_NOCLDSTOP
+ struct sigaction act;
+
+ sigemptyset (&caught_signals);
+ for (j = 0; j < nsigs; j++)
+ {
+ sigaction (sig[j], NULL, &act);
+ if (act.sa_handler != SIG_IGN)
+ sigaddset (&caught_signals, sig[j]);
+ }
+
+ act.sa_mask = caught_signals;
+ act.sa_flags = SA_RESTART;
+
+ for (j = 0; j < nsigs; j++)
+ if (sigismember (&caught_signals, sig[j]))
+ {
+ act.sa_handler = sig[j] == SIGTSTP ? stophandler : sighandler;
+ sigaction (sig[j], &act, NULL);
+ }
+#else
+ for (j = 0; j < nsigs; j++)
+ {
+ caught_sig[j] = (signal (sig[j], SIG_IGN) != SIG_IGN);
+ if (caught_sig[j])
+ {
+ signal (sig[j], sig[j] == SIGTSTP ? stophandler : sighandler);
+ siginterrupt (sig[j], 0);
+ }
+ }
+#endif
+ }
+}
+
+static char const *current_name0;
+static char const *current_name1;
+static bool currently_recursive;
+static bool colors_enabled;
+
+static struct color_ext_type *color_ext_list = NULL;
+
+struct bin_str
+ {
+ size_t len; /* Number of bytes */
+ const char *string; /* Pointer to the same */
+ };
+
+struct color_ext_type
+ {
+ struct bin_str ext; /* The extension we're looking for */
+ struct bin_str seq; /* The sequence to output when we do */
+ struct color_ext_type *next; /* Next in list */
+ };
+
+/* Parse a string as part of the --palette argument; this may involve
+ decoding all kinds of escape characters. If equals_end is set an
+ unescaped equal sign ends the string, otherwise only a : or \0
+ does. Set *OUTPUT_COUNT to the number of bytes output. Return
+ true if successful.
+
+ The resulting string is *not* null-terminated, but may contain
+ embedded nulls.
+
+ Note that both dest and src are char **; on return they point to
+ the first free byte after the array and the character that ended
+ the input string, respectively. */
+
+static bool
+get_funky_string (char **dest, const char **src, bool equals_end,
+ size_t *output_count)
+{
+ char num; /* For numerical codes */
+ size_t count; /* Something to count with */
+ enum {
+ ST_GND, ST_BACKSLASH, ST_OCTAL, ST_HEX, ST_CARET, ST_END, ST_ERROR
+ } state;
+ const char *p;
+ char *q;
+
+ p = *src; /* We don't want to double-indirect */
+ q = *dest; /* the whole darn time. */
+
+ count = 0; /* No characters counted in yet. */
+ num = 0;
+
+ state = ST_GND; /* Start in ground state. */
+ while (state < ST_END)
+ {
+ switch (state)
+ {
+ case ST_GND: /* Ground state (no escapes) */
+ switch (*p)
+ {
+ case ':':
+ case '\0':
+ state = ST_END; /* End of string */
+ break;
+ case '\\':
+ state = ST_BACKSLASH; /* Backslash scape sequence */
+ ++p;
+ break;
+ case '^':
+ state = ST_CARET; /* Caret escape */
+ ++p;
+ break;
+ case '=':
+ if (equals_end)
+ {
+ state = ST_END; /* End */
+ break;
+ }
+ FALLTHROUGH;
+ default:
+ *(q++) = *(p++);
+ ++count;
+ break;
+ }
+ break;
+
+ case ST_BACKSLASH: /* Backslash escaped character */
+ switch (*p)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ state = ST_OCTAL; /* Octal sequence */
+ num = *p - '0';
+ break;
+ case 'x':
+ case 'X':
+ state = ST_HEX; /* Hex sequence */
+ num = 0;
+ break;
+ case 'a': /* Bell */
+ num = '\a';
+ break;
+ case 'b': /* Backspace */
+ num = '\b';
+ break;
+ case 'e': /* Escape */
+ num = 27;
+ break;
+ case 'f': /* Form feed */
+ num = '\f';
+ break;
+ case 'n': /* Newline */
+ num = '\n';
+ break;
+ case 'r': /* Carriage return */
+ num = '\r';
+ break;
+ case 't': /* Tab */
+ num = '\t';
+ break;
+ case 'v': /* Vtab */
+ num = '\v';
+ break;
+ case '?': /* Delete */
+ num = 127;
+ break;
+ case '_': /* Space */
+ num = ' ';
+ break;
+ case '\0': /* End of string */
+ state = ST_ERROR; /* Error! */
+ break;
+ default: /* Escaped character like \ ^ : = */
+ num = *p;
+ break;
+ }
+ if (state == ST_BACKSLASH)
+ {
+ *(q++) = num;
+ ++count;
+ state = ST_GND;
+ }
+ ++p;
+ break;
+
+ case ST_OCTAL: /* Octal sequence */
+ if (*p < '0' || *p > '7')
+ {
+ *(q++) = num;
+ ++count;
+ state = ST_GND;
+ }
+ else
+ num = (num << 3) + (*(p++) - '0');
+ break;
+
+ case ST_HEX: /* Hex sequence */
+ switch (*p)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ num = (num << 4) + (*(p++) - '0');
+ break;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ num = (num << 4) + (*(p++) - 'a') + 10;
+ break;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ num = (num << 4) + (*(p++) - 'A') + 10;
+ break;
+ default:
+ *(q++) = num;
+ ++count;
+ state = ST_GND;
+ break;
+ }
+ break;
+
+ case ST_CARET: /* Caret escape */
+ state = ST_GND; /* Should be the next state... */
+ if (*p >= '@' && *p <= '~')
+ {
+ *(q++) = *(p++) & 037;
+ ++count;
+ }
+ else if (*p == '?')
+ {
+ *(q++) = 127;
+ ++count;
+ }
+ else
+ state = ST_ERROR;
+ break;
+
+ default:
+ abort ();
+ }
+ }
+
+ *dest = q;
+ *src = p;
+ *output_count = count;
+
+ return state != ST_ERROR;
+}
+
+enum parse_state
+ {
+ PS_START = 1,
+ PS_2,
+ PS_3,
+ PS_4,
+ PS_DONE,
+ PS_FAIL
+ };
+
+#define LEN_STR_PAIR(s) sizeof (s) - 1, s
+
+static struct bin_str color_indicator[] =
+ {
+ { LEN_STR_PAIR ("\033[") }, /* lc: Left of color sequence */
+ { LEN_STR_PAIR ("m") }, /* rc: Right of color sequence */
+ { 0, NULL }, /* ec: End color (replaces lc+rs+rc) */
+ { LEN_STR_PAIR ("0") }, /* rs: Reset to ordinary colors */
+ { LEN_STR_PAIR ("1") }, /* hd: Header */
+ { LEN_STR_PAIR ("32") }, /* ad: Add line */
+ { LEN_STR_PAIR ("31") }, /* de: Delete line */
+ { LEN_STR_PAIR ("36") }, /* ln: Line number */
+ };
+
+static const char *const indicator_name[] =
+ {
+ "lc", "rc", "ec", "rs", "hd", "ad", "de", "ln", NULL
+ };
+ARGMATCH_VERIFY (indicator_name, color_indicator);
+
+static char const *color_palette;
+
+void
+set_color_palette (char const *palette)
+{
+ color_palette = palette;
+}
+
+static void
+parse_diff_color (void)
+{
+ char *color_buf;
+ const char *p; /* Pointer to character being parsed */
+ char *buf; /* color_buf buffer pointer */
+ int ind_no; /* Indicator number */
+ char label[3]; /* Indicator label */
+ struct color_ext_type *ext; /* Extension we are working on */
+
+ if ((p = color_palette) == NULL || *p == '\0')
+ return;
+
+ ext = NULL;
+ strcpy (label, "??");
+
+ /* This is an overly conservative estimate, but any possible
+ --palette string will *not* generate a color_buf longer than
+ itself, so it is a safe way of allocating a buffer in
+ advance. */
+ buf = color_buf = xstrdup (p);
+
+ enum parse_state state = PS_START;
+ while (true)
+ {
+ switch (state)
+ {
+ case PS_START: /* First label character */
+ switch (*p)
+ {
+ case ':':
+ ++p;
+ break;
+
+ case '*':
+ /* Allocate new extension block and add to head of
+ linked list (this way a later definition will
+ override an earlier one, which can be useful for
+ having terminal-specific defs override global). */
+
+ ext = xmalloc (sizeof *ext);
+ ext->next = color_ext_list;
+ color_ext_list = ext;
+
+ ++p;
+ ext->ext.string = buf;
+
+ state = (get_funky_string (&buf, &p, true, &ext->ext.len)
+ ? PS_4 : PS_FAIL);
+ break;
+
+ case '\0':
+ state = PS_DONE; /* Done! */
+ goto done;
+
+ default: /* Assume it is file type label */
+ label[0] = *(p++);
+ state = PS_2;
+ break;
+ }
+ break;
+
+ case PS_2: /* Second label character */
+ if (*p)
+ {
+ label[1] = *(p++);
+ state = PS_3;
+ }
+ else
+ state = PS_FAIL; /* Error */
+ break;
+
+ case PS_3: /* Equal sign after indicator label */
+ state = PS_FAIL; /* Assume failure... */
+ if (*(p++) == '=')/* It *should* be... */
+ {
+ for (ind_no = 0; indicator_name[ind_no] != NULL; ++ind_no)
+ {
+ if (STREQ (label, indicator_name[ind_no]))
+ {
+ color_indicator[ind_no].string = buf;
+ state = (get_funky_string (&buf, &p, false,
+ &color_indicator[ind_no].len)
+ ? PS_START : PS_FAIL);
+ break;
+ }
+ }
+ if (state == PS_FAIL)
+ error (0, 0, _("unrecognized prefix: %s"), label);
+ }
+ break;
+
+ case PS_4: /* Equal sign after *.ext */
+ if (*(p++) == '=')
+ {
+ ext->seq.string = buf;
+ state = (get_funky_string (&buf, &p, false, &ext->seq.len)
+ ? PS_START : PS_FAIL);
+ }
+ else
+ state = PS_FAIL;
+ break;
+
+ case PS_FAIL:
+ goto done;
+
+ default:
+ abort ();
+ }
+ }
+ done:
+
+ if (state == PS_FAIL)
+ {
+ struct color_ext_type *e;
+ struct color_ext_type *e2;
+
+ error (0, 0,
+ _("unparsable value for --palette"));
+ free (color_buf);
+ for (e = color_ext_list; e != NULL; /* empty */)
+ {
+ e2 = e;
+ e = e->next;
+ free (e2);
+ }
+ colors_enabled = false;
+ }
+}
+
+static void
+check_color_output (bool is_pipe)
+{
+ bool output_is_tty;
+
+ if (! outfile || colors_style == NEVER)
+ return;
+
+ output_is_tty = presume_output_tty || (!is_pipe && isatty (fileno (outfile)));
+
+ colors_enabled = (colors_style == ALWAYS
+ || (colors_style == AUTO && output_is_tty));
+
+ if (colors_enabled)
+ parse_diff_color ();
+
+ if (output_is_tty)
+ install_signal_handlers ();
+}
+
+/* Call before outputting the results of comparing files NAME0 and NAME1
+ to set up OUTFILE, the stdio stream for the output to go to.
+
+ Usually, OUTFILE is just stdout. But when -l was specified
+ we fork off a 'pr' and make OUTFILE a pipe to it.
+ 'pr' then outputs to our stdout. */
+
+void
+setup_output (char const *name0, char const *name1, bool recursive)
+{
+ current_name0 = name0;
+ current_name1 = name1;
+ currently_recursive = recursive;
+ outfile = 0;
+}
+
+#if HAVE_WORKING_FORK
+static pid_t pr_pid;
+#endif
+
+static char c_escape_char (char c)
+{
+ switch (c) {
+ case '\a': return 'a';
+ case '\b': return 'b';
+ case '\t': return 't';
+ case '\n': return 'n';
+ case '\v': return 'v';
+ case '\f': return 'f';
+ case '\r': return 'r';
+ case '"': return '"';
+ case '\\': return '\\';
+ default:
+ return c < 32;
+ }
+}
+
+static char *
+c_escape (char const *str)
+{
+ char const *s;
+ size_t plus = 0;
+ bool must_quote = false;
+
+ for (s = str; *s; s++)
+ {
+ char c = *s;
+
+ if (c == ' ')
+ {
+ must_quote = true;
+ continue;
+ }
+ switch (c_escape_char (*s))
+ {
+ case 1:
+ plus += 3;
+ /* fall through */
+ case 0:
+ break;
+ default:
+ plus++;
+ break;
+ }
+ }
+
+ if (must_quote || plus)
+ {
+ size_t s_len = s - str;
+ char *buffer = xmalloc (s_len + plus + 3);
+ char *b = buffer;
+
+ *b++ = '"';
+ for (s = str; *s; s++)
+ {
+ char c = *s;
+ char escape = c_escape_char (c);
+
+ switch (escape)
+ {
+ case 0:
+ *b++ = c;
+ break;
+ case 1:
+ *b++ = '\\';
+ *b++ = ((c >> 6) & 03) + '0';
+ *b++ = ((c >> 3) & 07) + '0';
+ *b++ = ((c >> 0) & 07) + '0';
+ break;
+ default:
+ *b++ = '\\';
+ *b++ = escape;
+ break;
+ }
+ }
+ *b++ = '"';
+ *b = 0;
+ return buffer;
+ }
+
+ return (char *) str;
+}
+
+void
+begin_output (void)
+{
+ char *names[2];
+ char *name;
+
+ if (outfile != 0)
+ return;
+
+ names[0] = c_escape (current_name0);
+ names[1] = c_escape (current_name1);
+
+ /* Construct the header of this piece of diff. */
+ /* POSIX 1003.1-2001 specifies this format. But there are some bugs in
+ the standard: it says that we must print only the last component
+ of the pathnames, and it requires two spaces after "diff" if
+ there are no options. These requirements are silly and do not
+ match historical practice. */
+ name = xasprintf ("diff%s %s %s", switch_string, names[0], names[1]);
+
+ if (paginate)
+ {
+ char const *argv[4];
+
+ if (fflush (stdout) != 0)
+ pfatal_with_name (_("write failed"));
+
+ argv[0] = pr_program;
+ argv[1] = "-h";
+ argv[2] = name;
+ argv[3] = 0;
+
+ /* Make OUTFILE a pipe to a subsidiary 'pr'. */
+ {
+#if HAVE_WORKING_FORK
+ int pipes[2];
+
+ if (pipe (pipes) != 0)
+ pfatal_with_name ("pipe");
+
+ pr_pid = fork ();
+ if (pr_pid < 0)
+ pfatal_with_name ("fork");
+
+ if (pr_pid == 0)
+ {
+ close (pipes[1]);
+ if (pipes[0] != STDIN_FILENO)
+ {
+ if (dup2 (pipes[0], STDIN_FILENO) < 0)
+ pfatal_with_name ("dup2");
+ close (pipes[0]);
+ }
+
+ execv (pr_program, (char **) argv);
+ _exit (errno == ENOENT ? 127 : 126);
+ }
+ else
+ {
+ close (pipes[0]);
+ outfile = fdopen (pipes[1], "w");
+ if (!outfile)
+ pfatal_with_name ("fdopen");
+ check_color_output (true);
+ }
+#else
+ char *command = system_quote_argv (SCI_SYSTEM, (char **) argv);
+ errno = 0;
+ outfile = popen (command, "w");
+ if (!outfile)
+ pfatal_with_name (command);
+ check_color_output (true);
+ free (command);
+#endif
+ }
+ }
+ else
+ {
+
+ /* If -l was not specified, output the diff straight to 'stdout'. */
+
+ outfile = stdout;
+ check_color_output (false);
+
+ /* If handling multiple files (because scanning a directory),
+ print which files the following output is about. */
+ if (currently_recursive)
+ printf ("%s\n", name);
+ }
+
+ free (name);
+
+ /* A special header is needed at the beginning of context output. */
+ switch (output_style)
+ {
+ case OUTPUT_CONTEXT:
+ print_context_header (files, (char const *const *)names, false);
+ break;
+
+ case OUTPUT_UNIFIED:
+ print_context_header (files, (char const *const *)names, true);
+ break;
+
+ default:
+ break;
+ }
+
+ if (names[0] != current_name0)
+ free (names[0]);
+ if (names[1] != current_name1)
+ free (names[1]);
+}
+
+/* Call after the end of output of diffs for one file.
+ Close OUTFILE and get rid of the 'pr' subfork. */
+
+void
+finish_output (void)
+{
+ if (outfile != 0 && outfile != stdout)
+ {
+ int status;
+ int wstatus;
+ int werrno = 0;
+ if (ferror (outfile))
+ fatal ("write failed");
+#if ! HAVE_WORKING_FORK
+ wstatus = pclose (outfile);
+ if (wstatus == -1)
+ werrno = errno;
+#else
+ if (fclose (outfile) != 0)
+ pfatal_with_name (_("write failed"));
+ if (waitpid (pr_pid, &wstatus, 0) < 0)
+ pfatal_with_name ("waitpid");
+#endif
+ status = (! werrno && WIFEXITED (wstatus)
+ ? WEXITSTATUS (wstatus)
+ : INT_MAX);
+ if (status)
+ die (EXIT_TROUBLE, werrno,
+ _(status == 126
+ ? "subsidiary program '%s' could not be invoked"
+ : status == 127
+ ? "subsidiary program '%s' not found"
+ : status == INT_MAX
+ ? "subsidiary program '%s' failed"
+ : "subsidiary program '%s' failed (exit status %d)"),
+ pr_program, status);
+ }
+
+ outfile = 0;
+}
+
+/* Compare two lines (typically one from each input file)
+ according to the command line options.
+ For efficiency, this is invoked only when the lines do not match exactly
+ but an option like -i might cause us to ignore the difference.
+ Return nonzero if the lines differ. */
+
+bool
+lines_differ (char const *s1, char const *s2)
+{
+ register char const *t1 = s1;
+ register char const *t2 = s2;
+ size_t column = 0;
+
+ while (1)
+ {
+ register unsigned char c1 = *t1++;
+ register unsigned char c2 = *t2++;
+
+ /* Test for exact char equality first, since it's a common case. */
+ if (c1 != c2)
+ {
+ switch (ignore_white_space)
+ {
+ case IGNORE_ALL_SPACE:
+ /* For -w, just skip past any white space. */
+ while (isspace (c1) && c1 != '\n') c1 = *t1++;
+ while (isspace (c2) && c2 != '\n') c2 = *t2++;
+ break;
+
+ case IGNORE_SPACE_CHANGE:
+ /* For -b, advance past any sequence of white space in
+ line 1 and consider it just one space, or nothing at
+ all if it is at the end of the line. */
+ if (isspace (c1))
+ {
+ while (c1 != '\n')
+ {
+ c1 = *t1++;
+ if (! isspace (c1))
+ {
+ --t1;
+ c1 = ' ';
+ break;
+ }
+ }
+ }
+
+ /* Likewise for line 2. */
+ if (isspace (c2))
+ {
+ while (c2 != '\n')
+ {
+ c2 = *t2++;
+ if (! isspace (c2))
+ {
+ --t2;
+ c2 = ' ';
+ break;
+ }
+ }
+ }
+
+ if (c1 != c2)
+ {
+ /* If we went too far when doing the simple test
+ for equality, go back to the first non-white-space
+ character in both sides and try again. */
+ if (c2 == ' ' && c1 != '\n'
+ && s1 + 1 < t1
+ && isspace ((unsigned char) t1[-2]))
+ {
+ --t1;
+ continue;
+ }
+ if (c1 == ' ' && c2 != '\n'
+ && s2 + 1 < t2
+ && isspace ((unsigned char) t2[-2]))
+ {
+ --t2;
+ continue;
+ }
+ }
+
+ break;
+
+ case IGNORE_TRAILING_SPACE:
+ case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+ if (isspace (c1) && isspace (c2))
+ {
+ unsigned char c;
+ if (c1 != '\n')
+ {
+ char const *p = t1;
+ while ((c = *p) != '\n' && isspace (c))
+ ++p;
+ if (c != '\n')
+ break;
+ }
+ if (c2 != '\n')
+ {
+ char const *p = t2;
+ while ((c = *p) != '\n' && isspace (c))
+ ++p;
+ if (c != '\n')
+ break;
+ }
+ /* Both lines have nothing but whitespace left. */
+ return false;
+ }
+ if (ignore_white_space == IGNORE_TRAILING_SPACE)
+ break;
+ FALLTHROUGH;
+ case IGNORE_TAB_EXPANSION:
+ if ((c1 == ' ' && c2 == '\t')
+ || (c1 == '\t' && c2 == ' '))
+ {
+ size_t column2 = column;
+ for (;; c1 = *t1++)
+ {
+ if (c1 == ' ')
+ column++;
+ else if (c1 == '\t')
+ column += tabsize - column % tabsize;
+ else
+ break;
+ }
+ for (;; c2 = *t2++)
+ {
+ if (c2 == ' ')
+ column2++;
+ else if (c2 == '\t')
+ column2 += tabsize - column2 % tabsize;
+ else
+ break;
+ }
+ if (column != column2)
+ return true;
+ }
+ break;
+
+ case IGNORE_NO_WHITE_SPACE:
+ break;
+ }
+
+ /* Lowercase all letters if -i is specified. */
+
+ if (ignore_case)
+ {
+ c1 = tolower (c1);
+ c2 = tolower (c2);
+ }
+
+ if (c1 != c2)
+ break;
+ }
+ if (c1 == '\n')
+ return false;
+
+ column += c1 == '\t' ? tabsize - column % tabsize : 1;
+ }
+
+ return true;
+}
+
+/* Find the consecutive changes at the start of the script START.
+ Return the last link before the first gap. */
+
+struct change * _GL_ATTRIBUTE_CONST
+find_change (struct change *start)
+{
+ return start;
+}
+
+struct change * _GL_ATTRIBUTE_CONST
+find_reverse_change (struct change *start)
+{
+ return start;
+}
+
+/* Divide SCRIPT into pieces by calling HUNKFUN and
+ print each piece with PRINTFUN.
+ Both functions take one arg, an edit script.
+
+ HUNKFUN is called with the tail of the script
+ and returns the last link that belongs together with the start
+ of the tail.
+
+ PRINTFUN takes a subscript which belongs together (with a null
+ link at the end) and prints it. */
+
+void
+print_script (struct change *script,
+ struct change * (*hunkfun) (struct change *),
+ void (*printfun) (struct change *))
+{
+ struct change *next = script;
+
+ while (next)
+ {
+ struct change *this, *end;
+
+ /* Find a set of changes that belong together. */
+ this = next;
+ end = (*hunkfun) (next);
+
+ /* Disconnect them from the rest of the changes,
+ making them a hunk, and remember the rest for next iteration. */
+ next = end->link;
+ end->link = 0;
+#ifdef DEBUG
+ debug_script (this);
+#endif
+
+ /* Print this hunk. */
+ (*printfun) (this);
+
+ /* Reconnect the script so it will all be freed properly. */
+ end->link = next;
+ }
+}
+
+/* Print the text of a single line LINE,
+ flagging it with the characters in LINE_FLAG (which say whether
+ the line is inserted, deleted, changed, etc.). LINE_FLAG must not
+ end in a blank, unless it is a single blank. */
+
+void
+print_1_line (char const *line_flag, char const *const *line)
+{
+ print_1_line_nl (line_flag, line, false);
+}
+
+/* Print the text of a single line LINE,
+ flagging it with the characters in LINE_FLAG (which say whether
+ the line is inserted, deleted, changed, etc.). LINE_FLAG must not
+ end in a blank, unless it is a single blank. If SKIP_NL is set, then
+ the final '\n' is not printed. */
+
+void
+print_1_line_nl (char const *line_flag, char const *const *line, bool skip_nl)
+{
+ char const *base = line[0], *limit = line[1]; /* Help the compiler. */
+ FILE *out = outfile; /* Help the compiler some more. */
+ char const *flag_format = 0;
+
+ /* If -T was specified, use a Tab between the line-flag and the text.
+ Otherwise use a Space (as Unix diff does).
+ Print neither space nor tab if line-flags are empty.
+ But omit trailing blanks if requested. */
+
+ if (line_flag && *line_flag)
+ {
+ char const *flag_format_1 = flag_format = initial_tab ? "%s\t" : "%s ";
+ char const *line_flag_1 = line_flag;
+
+ if (suppress_blank_empty && **line == '\n')
+ {
+ flag_format_1 = "%s";
+
+ /* This hack to omit trailing blanks takes advantage of the
+ fact that the only way that LINE_FLAG can end in a blank
+ is when LINE_FLAG consists of a single blank. */
+ line_flag_1 += *line_flag_1 == ' ';
+ }
+
+ fprintf (out, flag_format_1, line_flag_1);
+ }
+
+ output_1_line (base, limit - (skip_nl && limit[-1] == '\n'), flag_format, line_flag);
+
+ if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
+ {
+ set_color_context (RESET_CONTEXT);
+ fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
+ }
+}
+
+/* Output a line from BASE up to LIMIT.
+ With -t, expand white space characters to spaces, and if FLAG_FORMAT
+ is nonzero, output it with argument LINE_FLAG after every
+ internal carriage return, so that tab stops continue to line up. */
+
+void
+output_1_line (char const *base, char const *limit, char const *flag_format,
+ char const *line_flag)
+{
+ const size_t MAX_CHUNK = 1024;
+ if (!expand_tabs)
+ {
+ size_t left = limit - base;
+ while (left)
+ {
+ size_t to_write = MIN (left, MAX_CHUNK);
+ size_t written = fwrite (base, sizeof (char), to_write, outfile);
+ if (written < to_write)
+ return;
+ base += written;
+ left -= written;
+ process_signals ();
+ }
+ }
+ else
+ {
+ register FILE *out = outfile;
+ register unsigned char c;
+ register char const *t = base;
+ register size_t column = 0;
+ size_t tab_size = tabsize;
+ size_t counter_proc_signals = 0;
+
+ while (t < limit)
+ {
+ counter_proc_signals++;
+ if (counter_proc_signals == MAX_CHUNK)
+ {
+ process_signals ();
+ counter_proc_signals = 0;
+ }
+
+ switch ((c = *t++))
+ {
+ case '\t':
+ {
+ size_t spaces = tab_size - column % tab_size;
+ column += spaces;
+ do
+ putc (' ', out);
+ while (--spaces);
+ }
+ break;
+
+ case '\r':
+ putc (c, out);
+ if (flag_format && t < limit && *t != '\n')
+ fprintf (out, flag_format, line_flag);
+ column = 0;
+ break;
+
+ case '\b':
+ if (column == 0)
+ continue;
+ column--;
+ putc (c, out);
+ break;
+
+ default:
+ column += isprint (c) != 0;
+ putc (c, out);
+ break;
+ }
+ }
+ }
+}
+
+enum indicator_no
+ {
+ C_LEFT, C_RIGHT, C_END, C_RESET, C_HEADER, C_ADD, C_DELETE, C_LINE
+ };
+
+static void
+put_indicator (const struct bin_str *ind)
+{
+ fwrite (ind->string, ind->len, 1, outfile);
+}
+
+static enum color_context last_context = RESET_CONTEXT;
+
+void
+set_color_context (enum color_context color_context)
+{
+ if (color_context != RESET_CONTEXT)
+ process_signals ();
+ if (colors_enabled && last_context != color_context)
+ {
+ put_indicator (&color_indicator[C_LEFT]);
+ switch (color_context)
+ {
+ case HEADER_CONTEXT:
+ put_indicator (&color_indicator[C_HEADER]);
+ break;
+
+ case LINE_NUMBER_CONTEXT:
+ put_indicator (&color_indicator[C_LINE]);
+ break;
+
+ case ADD_CONTEXT:
+ put_indicator (&color_indicator[C_ADD]);
+ break;
+
+ case DELETE_CONTEXT:
+ put_indicator (&color_indicator[C_DELETE]);
+ break;
+
+ case RESET_CONTEXT:
+ put_indicator (&color_indicator[C_RESET]);
+ break;
+
+ default:
+ abort ();
+ }
+ put_indicator (&color_indicator[C_RIGHT]);
+ last_context = color_context;
+ }
+}
+
+
+char const change_letter[] = { 0, 'd', 'a', 'c' };
+
+/* Translate an internal line number (an index into diff's table of lines)
+ into an actual line number in the input file.
+ The internal line number is I. FILE points to the data on the file.
+
+ Internal line numbers count from 0 starting after the prefix.
+ Actual line numbers count from 1 within the entire file. */
+
+lin _GL_ATTRIBUTE_PURE
+translate_line_number (struct file_data const *file, lin i)
+{
+ return i + file->prefix_lines + 1;
+}
+
+/* Translate a line number range. This is always done for printing,
+ so for convenience translate to printint rather than lin, so that the
+ caller can use printf with "%"pI"d" without casting. */
+
+void
+translate_range (struct file_data const *file,
+ lin a, lin b,
+ printint *aptr, printint *bptr)
+{
+ *aptr = translate_line_number (file, a - 1) + 1;
+ *bptr = translate_line_number (file, b + 1) - 1;
+}
+
+/* Print a pair of line numbers with SEPCHAR, translated for file FILE.
+ If the two numbers are identical, print just one number.
+
+ Args A and B are internal line numbers.
+ We print the translated (real) line numbers. */
+
+void
+print_number_range (char sepchar, struct file_data *file, lin a, lin b)
+{
+ printint trans_a, trans_b;
+ translate_range (file, a, b, &trans_a, &trans_b);
+
+ /* Note: we can have B < A in the case of a range of no lines.
+ In this case, we should print the line number before the range,
+ which is B. */
+ if (trans_b > trans_a)
+ fprintf (outfile, "%"pI"d%c%"pI"d", trans_a, sepchar, trans_b);
+ else
+ fprintf (outfile, "%"pI"d", trans_b);
+}
+
+/* Look at a hunk of edit script and report the range of lines in each file
+ that it applies to. HUNK is the start of the hunk, which is a chain
+ of 'struct change'. The first and last line numbers of file 0 are stored in
+ *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
+ Note that these are internal line numbers that count from 0.
+
+ If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
+
+ Return UNCHANGED if only ignorable lines are inserted or deleted,
+ OLD if lines of file 0 are deleted,
+ NEW if lines of file 1 are inserted,
+ and CHANGED if both kinds of changes are found. */
+
+enum changes
+analyze_hunk (struct change *hunk,
+ lin *first0, lin *last0,
+ lin *first1, lin *last1)
+{
+ struct change *next;
+ lin l0, l1;
+ lin show_from, show_to;
+ lin i;
+ bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
+ size_t trivial_length = ignore_blank_lines - 1;
+ /* If 0, ignore zero-length lines;
+ if SIZE_MAX, do not ignore lines just because of their length. */
+
+ bool skip_white_space =
+ ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space;
+ bool skip_leading_white_space =
+ skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space;
+
+ char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
+ char const * const *linbuf1 = files[1].linbuf;
+
+ show_from = show_to = 0;
+
+ *first0 = hunk->line0;
+ *first1 = hunk->line1;
+
+ next = hunk;
+ do
+ {
+ l0 = next->line0 + next->deleted - 1;
+ l1 = next->line1 + next->inserted - 1;
+ show_from += next->deleted;
+ show_to += next->inserted;
+
+ for (i = next->line0; i <= l0 && trivial; i++)
+ {
+ char const *line = linbuf0[i];
+ char const *lastbyte = linbuf0[i + 1] - 1;
+ char const *newline = lastbyte + (*lastbyte != '\n');
+ size_t len = newline - line;
+ char const *p = line;
+ if (skip_white_space)
+ for (; *p != '\n'; p++)
+ if (! isspace ((unsigned char) *p))
+ {
+ if (! skip_leading_white_space)
+ p = line;
+ break;
+ }
+ if (newline - p != trivial_length
+ && (! ignore_regexp.fastmap
+ || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
+ trivial = 0;
+ }
+
+ for (i = next->line1; i <= l1 && trivial; i++)
+ {
+ char const *line = linbuf1[i];
+ char const *lastbyte = linbuf1[i + 1] - 1;
+ char const *newline = lastbyte + (*lastbyte != '\n');
+ size_t len = newline - line;
+ char const *p = line;
+ if (skip_white_space)
+ for (; *p != '\n'; p++)
+ if (! isspace ((unsigned char) *p))
+ {
+ if (! skip_leading_white_space)
+ p = line;
+ break;
+ }
+ if (newline - p != trivial_length
+ && (! ignore_regexp.fastmap
+ || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
+ trivial = 0;
+ }
+ }
+ while ((next = next->link) != 0);
+
+ *last0 = l0;
+ *last1 = l1;
+
+ /* If all inserted or deleted lines are ignorable,
+ tell the caller to ignore this hunk. */
+
+ if (trivial)
+ return UNCHANGED;
+
+ return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
+}
+
+/* Concatenate three strings, returning a newly malloc'd string. */
+
+char *
+concat (char const *s1, char const *s2, char const *s3)
+{
+ char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
+ sprintf (new, "%s%s%s", s1, s2, s3);
+ return new;
+}
+
+/* Yield a new block of SIZE bytes, initialized to zero. */
+
+void *
+zalloc (size_t size)
+{
+ void *p = xmalloc (size);
+ memset (p, 0, size);
+ return p;
+}
+
+void
+debug_script (struct change *sp)
+{
+ fflush (stdout);
+
+ for (; sp; sp = sp->link)
+ {
+ printint line0 = sp->line0;
+ printint line1 = sp->line1;
+ printint deleted = sp->deleted;
+ printint inserted = sp->inserted;
+ fprintf (stderr, "%3"pI"d %3"pI"d delete %"pI"d insert %"pI"d\n",
+ line0, line1, deleted, inserted);
+ }
+
+ fflush (stderr);
+}