diff --git a/src/diff.c b/src/diff.c index df3338c..60c6ce7 100644 --- a/src/diff.c +++ b/src/diff.c @@ -76,6 +76,8 @@ static void try_help (char const *, char const *) __attribute__((noreturn)); static void check_stdout (void); static void usage (void); +bool (*lines_differ) (char const *, size_t, char const *, size_t); + /* If comparing directories, compare their common subdirectories recursively. */ static bool recursive; @@ -298,6 +300,13 @@ main (int argc, char **argv) excluded = new_exclude (); presume_output_tty = false; +#ifdef HANDLE_MULTIBYTE + if (MB_CUR_MAX > 1) + lines_differ = lines_differ_multibyte; + else +#endif + lines_differ = lines_differ_singlebyte; + /* Decode the options. */ while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) diff --git a/src/diff.c.i18n b/src/diff.c.i18n new file mode 100644 index 0000000..df3338c --- /dev/null +++ b/src/diff.c.i18n @@ -0,0 +1,1473 @@ +/* diff - compare files line by line + + Copyright (C) 1988-1989, 1992-1994, 1996, 1998, 2001-2002, 2004, 2006-2007, + 2009-2013, 2015-2017 Free Software Foundation, Inc. + + This file is part of GNU DIFF. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#define GDIFF_MAIN +#include "diff.h" +#include "die.h" +#include +#include "paths.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "diff" + +#define AUTHORS \ + proper_name ("Paul Eggert"), \ + proper_name ("Mike Haertel"), \ + proper_name ("David Hayes"), \ + proper_name ("Richard Stallman"), \ + proper_name ("Len Tower") + +#ifndef GUTTER_WIDTH_MINIMUM +# define GUTTER_WIDTH_MINIMUM 3 +#endif + +struct regexp_list +{ + char *regexps; /* chars representing disjunction of the regexps */ + size_t len; /* chars used in 'regexps' */ + size_t size; /* size malloc'ed for 'regexps'; 0 if not malloc'ed */ + bool multiple_regexps;/* Does 'regexps' represent a disjunction? */ + struct re_pattern_buffer *buf; +}; + +static int compare_files (struct comparison const *, char const *, char const *); +static void add_regexp (struct regexp_list *, char const *); +static void summarize_regexp_list (struct regexp_list *); +static void specify_style (enum output_style); +static void specify_value (char const **, char const *, char const *); +static void specify_colors_style (char const *); +static void try_help (char const *, char const *) __attribute__((noreturn)); +static void check_stdout (void); +static void usage (void); + +/* If comparing directories, compare their common subdirectories + recursively. */ +static bool recursive; + +/* In context diffs, show previous lines that match these regexps. */ +static struct regexp_list function_regexp_list; + +/* Ignore changes affecting only lines that match these regexps. */ +static struct regexp_list ignore_regexp_list; + +#if O_BINARY +/* Use binary I/O when reading and writing data (--binary). + On POSIX hosts, this has no effect. */ +static bool binary; +#else +enum { binary = true }; +#endif + +/* If one file is missing, treat it as present but empty (-N). */ +static bool new_file; + +/* If the first file is missing, treat it as present but empty + (--unidirectional-new-file). */ +static bool unidirectional_new_file; + +/* Report files compared that are the same (-s). + Normally nothing is output when that happens. */ +static bool report_identical_files; + +static char const shortopts[] = +"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ"; + +/* Values for long options that do not have single-letter equivalents. */ +enum +{ + BINARY_OPTION = CHAR_MAX + 1, + FROM_FILE_OPTION, + HELP_OPTION, + HORIZON_LINES_OPTION, + IGNORE_FILE_NAME_CASE_OPTION, + INHIBIT_HUNK_MERGE_OPTION, + LEFT_COLUMN_OPTION, + LINE_FORMAT_OPTION, + NO_DEREFERENCE_OPTION, + NO_IGNORE_FILE_NAME_CASE_OPTION, + NORMAL_OPTION, + SDIFF_MERGE_ASSIST_OPTION, + STRIP_TRAILING_CR_OPTION, + SUPPRESS_BLANK_EMPTY_OPTION, + SUPPRESS_COMMON_LINES_OPTION, + TABSIZE_OPTION, + TO_FILE_OPTION, + + /* These options must be in sequence. */ + UNCHANGED_LINE_FORMAT_OPTION, + OLD_LINE_FORMAT_OPTION, + NEW_LINE_FORMAT_OPTION, + + /* These options must be in sequence. */ + UNCHANGED_GROUP_FORMAT_OPTION, + OLD_GROUP_FORMAT_OPTION, + NEW_GROUP_FORMAT_OPTION, + CHANGED_GROUP_FORMAT_OPTION, + + COLOR_OPTION, + COLOR_PALETTE_OPTION, + + PRESUME_OUTPUT_TTY_OPTION, +}; + +static char const group_format_option[][sizeof "--unchanged-group-format"] = + { + "--unchanged-group-format", + "--old-group-format", + "--new-group-format", + "--changed-group-format" + }; + +static char const line_format_option[][sizeof "--unchanged-line-format"] = + { + "--unchanged-line-format", + "--old-line-format", + "--new-line-format" + }; + +static struct option const longopts[] = +{ + {"binary", 0, 0, BINARY_OPTION}, + {"brief", 0, 0, 'q'}, + {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, + {"color", 2, 0, COLOR_OPTION}, + {"context", 2, 0, 'C'}, + {"ed", 0, 0, 'e'}, + {"exclude", 1, 0, 'x'}, + {"exclude-from", 1, 0, 'X'}, + {"expand-tabs", 0, 0, 't'}, + {"forward-ed", 0, 0, 'f'}, + {"from-file", 1, 0, FROM_FILE_OPTION}, + {"help", 0, 0, HELP_OPTION}, + {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, + {"ifdef", 1, 0, 'D'}, + {"ignore-all-space", 0, 0, 'w'}, + {"ignore-blank-lines", 0, 0, 'B'}, + {"ignore-case", 0, 0, 'i'}, + {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, + {"ignore-matching-lines", 1, 0, 'I'}, + {"ignore-space-change", 0, 0, 'b'}, + {"ignore-tab-expansion", 0, 0, 'E'}, + {"ignore-trailing-space", 0, 0, 'Z'}, + {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, + {"initial-tab", 0, 0, 'T'}, + {"label", 1, 0, 'L'}, + {"left-column", 0, 0, LEFT_COLUMN_OPTION}, + {"line-format", 1, 0, LINE_FORMAT_OPTION}, + {"minimal", 0, 0, 'd'}, + {"new-file", 0, 0, 'N'}, + {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, + {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, + {"no-dereference", 0, 0, NO_DEREFERENCE_OPTION}, + {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, + {"normal", 0, 0, NORMAL_OPTION}, + {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, + {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, + {"paginate", 0, 0, 'l'}, + {"palette", 1, 0, COLOR_PALETTE_OPTION}, + {"rcs", 0, 0, 'n'}, + {"recursive", 0, 0, 'r'}, + {"report-identical-files", 0, 0, 's'}, + {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, + {"show-c-function", 0, 0, 'p'}, + {"show-function-line", 1, 0, 'F'}, + {"side-by-side", 0, 0, 'y'}, + {"speed-large-files", 0, 0, 'H'}, + {"starting-file", 1, 0, 'S'}, + {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, + {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION}, + {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, + {"tabsize", 1, 0, TABSIZE_OPTION}, + {"text", 0, 0, 'a'}, + {"to-file", 1, 0, TO_FILE_OPTION}, + {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, + {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, + {"unidirectional-new-file", 0, 0, 'P'}, + {"unified", 2, 0, 'U'}, + {"version", 0, 0, 'v'}, + {"width", 1, 0, 'W'}, + + /* This is solely for testing. Do not document. */ + {"-presume-output-tty", no_argument, NULL, PRESUME_OUTPUT_TTY_OPTION}, + {0, 0, 0, 0} +}; + +/* Return a string containing the command options with which diff was invoked. + Spaces appear between what were separate ARGV-elements. + There is a space at the beginning but none at the end. + If there were no options, the result is an empty string. + + Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, + the length of that vector. */ + +static char * +option_list (char **optionvec, int count) +{ + int i; + size_t size = 1; + char *result; + char *p; + + for (i = 0; i < count; i++) + size += 1 + shell_quote_length (optionvec[i]); + + p = result = xmalloc (size); + + for (i = 0; i < count; i++) + { + *p++ = ' '; + p = shell_quote_copy (p, optionvec[i]); + } + + *p = '\0'; + return result; +} + + +/* Return an option value suitable for add_exclude. */ + +static int +exclude_options (void) +{ + return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); +} + +int +main (int argc, char **argv) +{ + int exit_status = EXIT_SUCCESS; + int c; + int i; + int prev = -1; + lin ocontext = -1; + bool explicit_context = false; + size_t width = 0; + bool show_c_function = false; + char const *from_file = NULL; + char const *to_file = NULL; + uintmax_t numval; + char *numend; + + /* Do our initializations. */ + exit_failure = EXIT_TROUBLE; + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + c_stack_action (0); + function_regexp_list.buf = &function_regexp; + ignore_regexp_list.buf = &ignore_regexp; + re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); + excluded = new_exclude (); + presume_output_tty = false; + + /* Decode the options. */ + + while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) + { + switch (c) + { + case 0: + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ocontext = (! ISDIGIT (prev) + ? c - '0' + : (ocontext - (c - '0' <= CONTEXT_MAX % 10) + < CONTEXT_MAX / 10) + ? 10 * ocontext + (c - '0') + : CONTEXT_MAX); + break; + + case 'a': + text = true; + break; + + case 'b': + if (ignore_white_space < IGNORE_SPACE_CHANGE) + ignore_white_space = IGNORE_SPACE_CHANGE; + break; + + case 'Z': + if (ignore_white_space < IGNORE_SPACE_CHANGE) + ignore_white_space |= IGNORE_TRAILING_SPACE; + break; + + case 'B': + ignore_blank_lines = true; + break; + + case 'C': + case 'U': + { + if (optarg) + { + numval = strtoumax (optarg, &numend, 10); + if (*numend) + try_help ("invalid context length '%s'", optarg); + if (CONTEXT_MAX < numval) + numval = CONTEXT_MAX; + } + else + numval = 3; + + specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); + if (context < numval) + context = numval; + explicit_context = true; + } + break; + + case 'c': + specify_style (OUTPUT_CONTEXT); + if (context < 3) + context = 3; + break; + + case 'd': + minimal = true; + break; + + case 'D': + specify_style (OUTPUT_IFDEF); + { + static char const C_ifdef_group_formats[] = + "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; + char *b = xmalloc (sizeof C_ifdef_group_formats + + 7 * strlen (optarg) - 14 /* 7*"%s" */ + - 8 /* 5*"%%" + 3*"%c" */); + sprintf (b, C_ifdef_group_formats, + 0, + optarg, optarg, 0, + optarg, optarg, 0, + optarg, optarg, optarg); + for (i = 0; i < sizeof group_format / sizeof group_format[0]; i++) + { + specify_value (&group_format[i], b, "-D"); + b += strlen (b) + 1; + } + } + break; + + case 'e': + specify_style (OUTPUT_ED); + break; + + case 'E': + if (ignore_white_space < IGNORE_SPACE_CHANGE) + ignore_white_space |= IGNORE_TAB_EXPANSION; + break; + + case 'f': + specify_style (OUTPUT_FORWARD_ED); + break; + + case 'F': + add_regexp (&function_regexp_list, optarg); + break; + + case 'h': + /* Split the files into chunks for faster processing. + Usually does not change the result. + + This currently has no effect. */ + break; + + case 'H': + speed_large_files = true; + break; + + case 'i': + ignore_case = true; + break; + + case 'I': + add_regexp (&ignore_regexp_list, optarg); + break; + + case 'l': + if (!pr_program[0]) + try_help ("pagination not supported on this host", NULL); + paginate = true; +#ifdef SIGCHLD + /* Pagination requires forking and waiting, and + System V fork+wait does not work if SIGCHLD is ignored. */ + signal (SIGCHLD, SIG_DFL); +#endif + break; + + case 'L': + if (!file_label[0]) + file_label[0] = optarg; + else if (!file_label[1]) + file_label[1] = optarg; + else + fatal ("too many file label options"); + break; + + case 'n': + specify_style (OUTPUT_RCS); + break; + + case 'N': + new_file = true; + break; + + case 'p': + show_c_function = true; + add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); + break; + + case 'P': + unidirectional_new_file = true; + break; + + case 'q': + brief = true; + break; + + case 'r': + recursive = true; + break; + + case 's': + report_identical_files = true; + break; + + case 'S': + specify_value (&starting_file, optarg, "-S"); + break; + + case 't': + expand_tabs = true; + break; + + case 'T': + initial_tab = true; + break; + + case 'u': + specify_style (OUTPUT_UNIFIED); + if (context < 3) + context = 3; + break; + + case 'v': + version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, + AUTHORS, (char *) NULL); + check_stdout (); + return EXIT_SUCCESS; + + case 'w': + ignore_white_space = IGNORE_ALL_SPACE; + break; + + case 'x': + add_exclude (excluded, optarg, exclude_options ()); + break; + + case 'X': + if (add_exclude_file (add_exclude, excluded, optarg, + exclude_options (), '\n')) + pfatal_with_name (optarg); + break; + + case 'y': + specify_style (OUTPUT_SDIFF); + break; + + case 'W': + numval = strtoumax (optarg, &numend, 10); + if (! (0 < numval && numval <= SIZE_MAX) || *numend) + try_help ("invalid width '%s'", optarg); + if (width != numval) + { + if (width) + fatal ("conflicting width options"); + width = numval; + } + break; + + case BINARY_OPTION: +#if O_BINARY + binary = true; + if (! isatty (STDOUT_FILENO)) + set_binary_mode (STDOUT_FILENO, O_BINARY); +#endif + break; + + case FROM_FILE_OPTION: + specify_value (&from_file, optarg, "--from-file"); + break; + + case HELP_OPTION: + usage (); + check_stdout (); + return EXIT_SUCCESS; + + case HORIZON_LINES_OPTION: + numval = strtoumax (optarg, &numend, 10); + if (*numend) + try_help ("invalid horizon length '%s'", optarg); + horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); + break; + + case IGNORE_FILE_NAME_CASE_OPTION: + ignore_file_name_case = true; + break; + + case INHIBIT_HUNK_MERGE_OPTION: + /* This option is obsolete, but accept it for backward + compatibility. */ + break; + + case LEFT_COLUMN_OPTION: + left_column = true; + break; + + case LINE_FORMAT_OPTION: + specify_style (OUTPUT_IFDEF); + for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++) + specify_value (&line_format[i], optarg, "--line-format"); + break; + + case NO_DEREFERENCE_OPTION: + no_dereference_symlinks = true; + break; + + case NO_IGNORE_FILE_NAME_CASE_OPTION: + ignore_file_name_case = false; + break; + + case NORMAL_OPTION: + specify_style (OUTPUT_NORMAL); + break; + + case SDIFF_MERGE_ASSIST_OPTION: + specify_style (OUTPUT_SDIFF); + sdiff_merge_assist = true; + break; + + case STRIP_TRAILING_CR_OPTION: + strip_trailing_cr = true; + break; + + case SUPPRESS_BLANK_EMPTY_OPTION: + suppress_blank_empty = true; + break; + + case SUPPRESS_COMMON_LINES_OPTION: + suppress_common_lines = true; + break; + + case TABSIZE_OPTION: + numval = strtoumax (optarg, &numend, 10); + if (! (0 < numval && numval <= SIZE_MAX - GUTTER_WIDTH_MINIMUM) + || *numend) + try_help ("invalid tabsize '%s'", optarg); + if (tabsize != numval) + { + if (tabsize) + fatal ("conflicting tabsize options"); + tabsize = numval; + } + break; + + case TO_FILE_OPTION: + specify_value (&to_file, optarg, "--to-file"); + break; + + case UNCHANGED_LINE_FORMAT_OPTION: + case OLD_LINE_FORMAT_OPTION: + case NEW_LINE_FORMAT_OPTION: + specify_style (OUTPUT_IFDEF); + c -= UNCHANGED_LINE_FORMAT_OPTION; + specify_value (&line_format[c], optarg, line_format_option[c]); + break; + + case UNCHANGED_GROUP_FORMAT_OPTION: + case OLD_GROUP_FORMAT_OPTION: + case NEW_GROUP_FORMAT_OPTION: + case CHANGED_GROUP_FORMAT_OPTION: + specify_style (OUTPUT_IFDEF); + c -= UNCHANGED_GROUP_FORMAT_OPTION; + specify_value (&group_format[c], optarg, group_format_option[c]); + break; + + case COLOR_OPTION: + specify_colors_style (optarg); + break; + + case COLOR_PALETTE_OPTION: + set_color_palette (optarg); + break; + + case PRESUME_OUTPUT_TTY_OPTION: + presume_output_tty = true; + break; + + default: + try_help (NULL, NULL); + } + prev = c; + } + + if (colors_style == AUTO) + { + char const *t = getenv ("TERM"); + if (t && STREQ (t, "dumb")) + colors_style = NEVER; + } + + if (output_style == OUTPUT_UNSPECIFIED) + { + if (show_c_function) + { + specify_style (OUTPUT_CONTEXT); + if (ocontext < 0) + context = 3; + } + else + specify_style (OUTPUT_NORMAL); + } + + if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) + { +#if (defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS \ + || defined HAVE_STRUCT_STAT_ST_SPARE1) + time_format = "%Y-%m-%d %H:%M:%S.%N %z"; +#else + time_format = "%Y-%m-%d %H:%M:%S %z"; +#endif + } + else + { + /* See POSIX 1003.1-2001 for this format. */ + time_format = "%a %b %e %T %Y"; + } + + if (0 <= ocontext + && (output_style == OUTPUT_CONTEXT + || output_style == OUTPUT_UNIFIED) + && (context < ocontext + || (ocontext < context && ! explicit_context))) + context = ocontext; + + if (! tabsize) + tabsize = 8; + if (! width) + width = 130; + + { + /* Maximize first the half line width, and then the gutter width, + according to the following constraints: + + 1. Two half lines plus a gutter must fit in a line. + 2. If the half line width is nonzero: + a. The gutter width is at least GUTTER_WIDTH_MINIMUM. + b. If tabs are not expanded to spaces, + a half line plus a gutter is an integral number of tabs, + so that tabs in the right column line up. */ + + size_t t = expand_tabs ? 1 : tabsize; + size_t w = width; + size_t t_plus_g = t + GUTTER_WIDTH_MINIMUM; + size_t unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1); + size_t off = unaligned_off - unaligned_off % t; + sdiff_half_width = (off <= GUTTER_WIDTH_MINIMUM || w <= off + ? 0 + : MIN (off - GUTTER_WIDTH_MINIMUM, w - off)); + sdiff_column2_offset = sdiff_half_width ? off : w; + } + + /* Make the horizon at least as large as the context, so that + shift_boundaries has more freedom to shift the first and last hunks. */ + if (horizon_lines < context) + horizon_lines = context; + + summarize_regexp_list (&function_regexp_list); + summarize_regexp_list (&ignore_regexp_list); + + if (output_style == OUTPUT_IFDEF) + { + for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++) + if (!line_format[i]) + line_format[i] = "%l\n"; + if (!group_format[OLD]) + group_format[OLD] + = group_format[CHANGED] ? group_format[CHANGED] : "%<"; + if (!group_format[NEW]) + group_format[NEW] + = group_format[CHANGED] ? group_format[CHANGED] : "%>"; + if (!group_format[UNCHANGED]) + group_format[UNCHANGED] = "%="; + if (!group_format[CHANGED]) + group_format[CHANGED] = concat (group_format[OLD], + group_format[NEW], ""); + } + + no_diff_means_no_output = + (output_style == OUTPUT_IFDEF ? + (!*group_format[UNCHANGED] + || (STREQ (group_format[UNCHANGED], "%=") + && !*line_format[UNCHANGED])) + : (output_style != OUTPUT_SDIFF) | suppress_common_lines); + + files_can_be_treated_as_binary = + (brief & binary + & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr + | (ignore_regexp_list.regexps || ignore_white_space))); + + switch_string = option_list (argv + 1, optind - 1); + + if (from_file) + { + if (to_file) + fatal ("--from-file and --to-file both specified"); + else + for (; optind < argc; optind++) + { + int status = compare_files (NULL, from_file, argv[optind]); + if (exit_status < status) + exit_status = status; + } + } + else + { + if (to_file) + for (; optind < argc; optind++) + { + int status = compare_files (NULL, argv[optind], to_file); + if (exit_status < status) + exit_status = status; + } + else + { + if (argc - optind != 2) + { + if (argc - optind < 2) + try_help ("missing operand after '%s'", argv[argc - 1]); + else + try_help ("extra operand '%s'", argv[optind + 2]); + } + + exit_status = compare_files (NULL, argv[optind], argv[optind + 1]); + } + } + + /* Print any messages that were saved up for last. */ + print_message_queue (); + + check_stdout (); + exit (exit_status); + return exit_status; +} + +/* Append to REGLIST the regexp PATTERN. */ + +static void +add_regexp (struct regexp_list *reglist, char const *pattern) +{ + size_t patlen = strlen (pattern); + char const *m = re_compile_pattern (pattern, patlen, reglist->buf); + + if (m != 0) + error (EXIT_TROUBLE, 0, "%s: %s", pattern, m); + else + { + char *regexps = reglist->regexps; + size_t len = reglist->len; + bool multiple_regexps = reglist->multiple_regexps = regexps != 0; + size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; + size_t size = reglist->size; + + if (size <= newlen) + { + if (!size) + size = 1; + + do size *= 2; + while (size <= newlen); + + reglist->size = size; + reglist->regexps = regexps = xrealloc (regexps, size); + } + if (multiple_regexps) + { + regexps[len++] = '\\'; + regexps[len++] = '|'; + } + memcpy (regexps + len, pattern, patlen + 1); + } +} + +/* Ensure that REGLIST represents the disjunction of its regexps. + This is done here, rather than earlier, to avoid O(N^2) behavior. */ + +static void +summarize_regexp_list (struct regexp_list *reglist) +{ + if (reglist->regexps) + { + /* At least one regexp was specified. Allocate a fastmap for it. */ + reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); + if (reglist->multiple_regexps) + { + /* Compile the disjunction of the regexps. + (If just one regexp was specified, it is already compiled.) */ + char const *m = re_compile_pattern (reglist->regexps, reglist->len, + reglist->buf); + if (m) + die (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); + } + } +} + +static void +try_help (char const *reason_msgid, char const *operand) +{ + if (reason_msgid) + error (0, 0, _(reason_msgid), operand); + die (EXIT_TROUBLE, 0, _("Try '%s --help' for more information."), + program_name); +} + +static void +check_stdout (void) +{ + if (ferror (stdout)) + fatal ("write failed"); + else if (fclose (stdout) != 0) + pfatal_with_name (_("standard output")); +} + +static char const * const option_help_msgid[] = { + N_(" --normal output a normal diff (the default)"), + N_("-q, --brief report only when files differ"), + N_("-s, --report-identical-files report when two files are the same"), + N_("-c, -C NUM, --context[=NUM] output NUM (default 3) lines of copied context"), + N_("-u, -U NUM, --unified[=NUM] output NUM (default 3) lines of unified context"), + N_("-e, --ed output an ed script"), + N_("-n, --rcs output an RCS format diff"), + N_("-y, --side-by-side output in two columns"), + N_("-W, --width=NUM output at most NUM (default 130) print columns"), + N_(" --left-column output only the left column of common lines"), + N_(" --suppress-common-lines do not output common lines"), + "", + N_("-p, --show-c-function show which C function each change is in"), + N_("-F, --show-function-line=RE show the most recent line matching RE"), + N_(" --label LABEL use LABEL instead of file name and timestamp\n" + " (can be repeated)"), + "", + N_("-t, --expand-tabs expand tabs to spaces in output"), + N_("-T, --initial-tab make tabs line up by prepending a tab"), + N_(" --tabsize=NUM tab stops every NUM (default 8) print columns"), + N_(" --suppress-blank-empty suppress space or tab before empty output lines"), + N_("-l, --paginate pass output through 'pr' to paginate it"), + "", + N_("-r, --recursive recursively compare any subdirectories found"), + N_(" --no-dereference don't follow symbolic links"), + N_("-N, --new-file treat absent files as empty"), + N_(" --unidirectional-new-file treat absent first files as empty"), + N_(" --ignore-file-name-case ignore case when comparing file names"), + N_(" --no-ignore-file-name-case consider case when comparing file names"), + N_("-x, --exclude=PAT exclude files that match PAT"), + N_("-X, --exclude-from=FILE exclude files that match any pattern in FILE"), + N_("-S, --starting-file=FILE start with FILE when comparing directories"), + N_(" --from-file=FILE1 compare FILE1 to all operands;\n" + " FILE1 can be a directory"), + N_(" --to-file=FILE2 compare all operands to FILE2;\n" + " FILE2 can be a directory"), + "", + N_("-i, --ignore-case ignore case differences in file contents"), + N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"), + N_("-Z, --ignore-trailing-space ignore white space at line end"), + N_("-b, --ignore-space-change ignore changes in the amount of white space"), + N_("-w, --ignore-all-space ignore all white space"), + N_("-B, --ignore-blank-lines ignore changes where lines are all blank"), + N_("-I, --ignore-matching-lines=RE ignore changes where all lines match RE"), + "", + N_("-a, --text treat all files as text"), + N_(" --strip-trailing-cr strip trailing carriage return on input"), +#if O_BINARY + N_(" --binary read and write data in binary mode"), +#endif + "", + N_("-D, --ifdef=NAME output merged file with '#ifdef NAME' diffs"), + N_(" --GTYPE-group-format=GFMT format GTYPE input groups with GFMT"), + N_(" --line-format=LFMT format all input lines with LFMT"), + N_(" --LTYPE-line-format=LFMT format LTYPE input lines with LFMT"), + N_(" These format options provide fine-grained control over the output\n" + " of diff, generalizing -D/--ifdef."), + N_(" LTYPE is 'old', 'new', or 'unchanged'. GTYPE is LTYPE or 'changed'."), + N_(" GFMT (only) may contain:\n\ + %< lines from FILE1\n\ + %> lines from FILE2\n\ + %= lines common to FILE1 and FILE2\n\ + %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ + LETTERs are as follows for new group, lower case for old group:\n\ + F first line number\n\ + L last line number\n\ + N number of lines = L-F+1\n\ + E F-1\n\ + M L+1\n\ + %(A=B?T:E) if A equals B then T else E"), + N_(" LFMT (only) may contain:\n\ + %L contents of line\n\ + %l contents of line, excluding any trailing newline\n\ + %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), + N_(" Both GFMT and LFMT may contain:\n\ + %% %\n\ + %c'C' the single character C\n\ + %c'\\OOO' the character with octal code OOO\n\ + C the character C (other characters represent themselves)"), + "", + N_("-d, --minimal try hard to find a smaller set of changes"), + N_(" --horizon-lines=NUM keep NUM lines of the common prefix and suffix"), + N_(" --speed-large-files assume large files and many scattered small changes"), + N_(" --color[=WHEN] colorize the output; WHEN can be 'never', 'always',\n" + " or 'auto' (the default)"), + N_(" --palette=PALETTE the colors to use when --color is active; PALETTE is\n" + " a colon-separated list of terminfo capabilities"), + "", + N_(" --help display this help and exit"), + N_("-v, --version output version information and exit"), + "", + N_("FILES are 'FILE1 FILE2' or 'DIR1 DIR2' or 'DIR FILE' or 'FILE DIR'."), + N_("If --from-file or --to-file is given, there are no restrictions on FILE(s)."), + N_("If a FILE is '-', read standard input."), + N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."), + 0 +}; + +static void +usage (void) +{ + char const * const *p; + + printf (_("Usage: %s [OPTION]... FILES\n"), program_name); + printf ("%s\n\n", _("Compare FILES line by line.")); + + fputs (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +"), stdout); + + for (p = option_help_msgid; *p; p++) + { + if (!**p) + putchar ('\n'); + else + { + char const *msg = _(*p); + char const *nl; + while ((nl = strchr (msg, '\n'))) + { + int msglen = nl + 1 - msg; + printf (" %.*s", msglen, msg); + msg = nl + 1; + } + + printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); + } + } + emit_bug_reporting_address (); +} + +/* Set VAR to VALUE, reporting an OPTION error if this is a + conflict. */ +static void +specify_value (char const **var, char const *value, char const *option) +{ + if (*var && ! STREQ (*var, value)) + { + error (0, 0, _("conflicting %s option value '%s'"), option, value); + try_help (NULL, NULL); + } + *var = value; +} + +/* Set the output style to STYLE, diagnosing conflicts. */ +static void +specify_style (enum output_style style) +{ + if (output_style != style) + { + if (output_style != OUTPUT_UNSPECIFIED) + try_help ("conflicting output style options", NULL); + output_style = style; + } +} + +/* Set the color mode. */ +static void +specify_colors_style (char const *value) +{ + if (value == NULL || STREQ (value, "auto")) + colors_style = AUTO; + else if (STREQ (value, "always")) + colors_style = ALWAYS; + else if (STREQ (value, "never")) + colors_style = NEVER; + else + try_help ("invalid color '%s'", value); +} + + +/* Set the last-modified time of *ST to be the current time. */ + +static void +set_mtime_to_now (struct stat *st) +{ +#ifdef STAT_TIMESPEC + gettime (&STAT_TIMESPEC (st, st_mtim)); +#else + struct timespec t; + gettime (&t); + st->st_mtime = t.tv_sec; +# if defined STAT_TIMESPEC_NS + STAT_TIMESPEC_NS (st, st_mtim) = t.tv_nsec; +# elif defined HAVE_STRUCT_STAT_ST_SPARE1 + st->st_spare1 = t.tv_nsec / 1000; +# endif +#endif +} + +/* Compare two files (or dirs) with parent comparison PARENT + and names NAME0 and NAME1. + (If PARENT is null, then the first name is just NAME0, etc.) + This is self-contained; it opens the files and closes them. + + Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if + different, EXIT_TROUBLE if there is a problem opening them. */ + +static int +compare_files (struct comparison const *parent, + char const *name0, + char const *name1) +{ + struct comparison cmp; +#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) + register int f; + int status = EXIT_SUCCESS; + bool same_files; + char *free0; + char *free1; + + /* If this is directory comparison, perhaps we have a file + that exists only in one of the directories. + If so, just print a message to that effect. */ + + if (! ((name0 && name1) + || (unidirectional_new_file && name1) + || new_file)) + { + char const *name = name0 ? name0 : name1; + char const *dir = parent->file[!name0].name; + + /* See POSIX 1003.1-2001 for this format. */ + message ("Only in %s: %s\n", dir, name); + + /* Return EXIT_FAILURE so that diff_dirs will return + EXIT_FAILURE ("some files differ"). */ + return EXIT_FAILURE; + } + + memset (cmp.file, 0, sizeof cmp.file); + cmp.parent = parent; + + /* cmp.file[f].desc markers */ +#define NONEXISTENT (-1) /* nonexistent file */ +#define UNOPENED (-2) /* unopened file (e.g. directory) */ +#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ + +#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ + + cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT; + cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT; + + /* Now record the full name of each file, including nonexistent ones. */ + + if (!name0) + name0 = name1; + if (!name1) + name1 = name0; + + if (!parent) + { + free0 = NULL; + free1 = NULL; + cmp.file[0].name = name0; + cmp.file[1].name = name1; + } + else + { + cmp.file[0].name = free0 + = file_name_concat (parent->file[0].name, name0, NULL); + cmp.file[1].name = free1 + = file_name_concat (parent->file[1].name, name1, NULL); + } + + /* Stat the files. */ + + for (f = 0; f < 2; f++) + { + if (cmp.file[f].desc != NONEXISTENT) + { + if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) + { + cmp.file[f].desc = cmp.file[0].desc; + cmp.file[f].stat = cmp.file[0].stat; + } + else if (STREQ (cmp.file[f].name, "-")) + { + cmp.file[f].desc = STDIN_FILENO; + if (binary && ! isatty (STDIN_FILENO)) + set_binary_mode (STDIN_FILENO, O_BINARY); + if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) + cmp.file[f].desc = ERRNO_ENCODE (errno); + else + { + if (S_ISREG (cmp.file[f].stat.st_mode)) + { + off_t pos = lseek (STDIN_FILENO, 0, SEEK_CUR); + if (pos < 0) + cmp.file[f].desc = ERRNO_ENCODE (errno); + else + cmp.file[f].stat.st_size = + MAX (0, cmp.file[f].stat.st_size - pos); + } + + /* POSIX 1003.1-2001 requires current time for + stdin. */ + set_mtime_to_now (&cmp.file[f].stat); + } + } + else if ((no_dereference_symlinks + ? lstat (cmp.file[f].name, &cmp.file[f].stat) + : stat (cmp.file[f].name, &cmp.file[f].stat)) + != 0) + cmp.file[f].desc = ERRNO_ENCODE (errno); + } + } + + /* Mark files as nonexistent as needed for -N and -P, if they are + inaccessible empty regular files (the kind of files that 'patch' + creates to indicate nonexistent backups), or if they are + top-level files that do not exist but their counterparts do + exist. */ + for (f = 0; f < 2; f++) + if ((new_file || (f == 0 && unidirectional_new_file)) + && (cmp.file[f].desc == UNOPENED + ? (S_ISREG (cmp.file[f].stat.st_mode) + && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) + && cmp.file[f].stat.st_size == 0) + : ((cmp.file[f].desc == ERRNO_ENCODE (ENOENT) + || cmp.file[f].desc == ERRNO_ENCODE (EBADF)) + && ! parent + && (cmp.file[1 - f].desc == UNOPENED + || cmp.file[1 - f].desc == STDIN_FILENO)))) + cmp.file[f].desc = NONEXISTENT; + + for (f = 0; f < 2; f++) + if (cmp.file[f].desc == NONEXISTENT) + { + memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat); + cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; + } + + for (f = 0; f < 2; f++) + { + int e = ERRNO_DECODE (cmp.file[f].desc); + if (0 <= e) + { + errno = e; + perror_with_name (cmp.file[f].name); + status = EXIT_TROUBLE; + } + } + + if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) + { + /* If one is a directory, and it was specified in the command line, + use the file in that dir with the other file's basename. */ + + int fnm_arg = DIR_P (0); + int dir_arg = 1 - fnm_arg; + char const *fnm = cmp.file[fnm_arg].name; + char const *dir = cmp.file[dir_arg].name; + char const *filename = cmp.file[dir_arg].name = free0 + = find_dir_file_pathname (dir, last_component (fnm)); + + if (STREQ (fnm, "-")) + fatal ("cannot compare '-' to a directory"); + + if ((no_dereference_symlinks + ? lstat (filename, &cmp.file[dir_arg].stat) + : stat (filename, &cmp.file[dir_arg].stat)) + != 0) + { + perror_with_name (filename); + status = EXIT_TROUBLE; + } + } + + if (status != EXIT_SUCCESS) + { + /* One of the files should exist but does not. */ + } + else if (cmp.file[0].desc == NONEXISTENT + && cmp.file[1].desc == NONEXISTENT) + { + /* Neither file "exists", so there's nothing to compare. */ + } + else if ((same_files + = (cmp.file[0].desc != NONEXISTENT + && cmp.file[1].desc != NONEXISTENT + && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) + && same_file_attributes (&cmp.file[0].stat, + &cmp.file[1].stat))) + && no_diff_means_no_output) + { + /* The two named files are actually the same physical file. + We know they are identical without actually reading them. */ + } + else if (DIR_P (0) & DIR_P (1)) + { + if (output_style == OUTPUT_IFDEF) + fatal ("-D option not supported with directories"); + + /* If both are directories, compare the files in them. */ + + if (parent && !recursive) + { + /* But don't compare dir contents one level down + unless -r was specified. + See POSIX 1003.1-2001 for this format. */ + message ("Common subdirectories: %s and %s\n", + cmp.file[0].name, cmp.file[1].name); + } + else + status = diff_dirs (&cmp, compare_files); + } + else if ((DIR_P (0) | DIR_P (1)) + || (parent + && !((S_ISREG (cmp.file[0].stat.st_mode) + || S_ISLNK (cmp.file[0].stat.st_mode)) + && (S_ISREG (cmp.file[1].stat.st_mode) + || S_ISLNK (cmp.file[1].stat.st_mode))))) + { + if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) + { + /* We have a subdirectory that exists only in one directory. */ + + if ((DIR_P (0) | DIR_P (1)) + && recursive + && (new_file + || (unidirectional_new_file + && cmp.file[0].desc == NONEXISTENT))) + status = diff_dirs (&cmp, compare_files); + else + { + char const *dir; + + /* PARENT must be non-NULL here. */ + assert (parent); + dir = parent->file[cmp.file[0].desc == NONEXISTENT].name; + + /* See POSIX 1003.1-2001 for this format. */ + message ("Only in %s: %s\n", dir, name0); + + status = EXIT_FAILURE; + } + } + else + { + /* We have two files that are not to be compared. */ + + /* See POSIX 1003.1-2001 for this format. */ + message5 ("File %s is a %s while file %s is a %s\n", + file_label[0] ? file_label[0] : cmp.file[0].name, + file_type (&cmp.file[0].stat), + file_label[1] ? file_label[1] : cmp.file[1].name, + file_type (&cmp.file[1].stat)); + + /* This is a difference. */ + status = EXIT_FAILURE; + } + } + else if (S_ISLNK (cmp.file[0].stat.st_mode) + || S_ISLNK (cmp.file[1].stat.st_mode)) + { + /* We get here only if we use lstat(), not stat(). */ + assert (no_dereference_symlinks); + + if (S_ISLNK (cmp.file[0].stat.st_mode) + && S_ISLNK (cmp.file[1].stat.st_mode)) + { + /* Compare the values of the symbolic links. */ + char *link_value[2] = { NULL, NULL }; + + for (f = 0; f < 2; f++) + { + link_value[f] = xreadlink (cmp.file[f].name); + if (link_value[f] == NULL) + { + perror_with_name (cmp.file[f].name); + status = EXIT_TROUBLE; + break; + } + } + if (status == EXIT_SUCCESS) + { + if ( ! STREQ (link_value[0], link_value[1])) + { + message ("Symbolic links %s and %s differ\n", + cmp.file[0].name, cmp.file[1].name); + /* This is a difference. */ + status = EXIT_FAILURE; + } + } + for (f = 0; f < 2; f++) + free (link_value[f]); + } + else + { + /* We have two files that are not to be compared, because + one of them is a symbolic link and the other one is not. */ + + message5 ("File %s is a %s while file %s is a %s\n", + file_label[0] ? file_label[0] : cmp.file[0].name, + file_type (&cmp.file[0].stat), + file_label[1] ? file_label[1] : cmp.file[1].name, + file_type (&cmp.file[1].stat)); + + /* This is a difference. */ + status = EXIT_FAILURE; + } + } + else if (files_can_be_treated_as_binary + && S_ISREG (cmp.file[0].stat.st_mode) + && S_ISREG (cmp.file[1].stat.st_mode) + && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size + && 0 < cmp.file[0].stat.st_size + && 0 < cmp.file[1].stat.st_size) + { + message ("Files %s and %s differ\n", + file_label[0] ? file_label[0] : cmp.file[0].name, + file_label[1] ? file_label[1] : cmp.file[1].name); + status = EXIT_FAILURE; + } + else + { + /* Both exist and neither is a directory. */ + + /* Open the files and record their descriptors. */ + + int oflags = O_RDONLY | (binary ? O_BINARY : 0); + + if (cmp.file[0].desc == UNOPENED) + if ((cmp.file[0].desc = open (cmp.file[0].name, oflags, 0)) < 0) + { + perror_with_name (cmp.file[0].name); + status = EXIT_TROUBLE; + } + if (cmp.file[1].desc == UNOPENED) + { + if (same_files) + cmp.file[1].desc = cmp.file[0].desc; + else if ((cmp.file[1].desc = open (cmp.file[1].name, oflags, 0)) < 0) + { + perror_with_name (cmp.file[1].name); + status = EXIT_TROUBLE; + } + } + + /* Compare the files, if no error was found. */ + + if (status == EXIT_SUCCESS) + status = diff_2_files (&cmp); + + /* Close the file descriptors. */ + + if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) + { + perror_with_name (cmp.file[0].name); + status = EXIT_TROUBLE; + } + if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc + && close (cmp.file[1].desc) != 0) + { + perror_with_name (cmp.file[1].name); + status = EXIT_TROUBLE; + } + } + + /* Now the comparison has been done, if no error prevented it, + and STATUS is the value this function will return. */ + + if (status == EXIT_SUCCESS) + { + if (report_identical_files && !DIR_P (0)) + message ("Files %s and %s are identical\n", + file_label[0] ? file_label[0] : cmp.file[0].name, + file_label[1] ? file_label[1] : cmp.file[1].name); + } + else + { + /* Flush stdout so that the user sees differences immediately. + This can hurt performance, unfortunately. */ + if (fflush (stdout) != 0) + pfatal_with_name (_("standard output")); + } + + free (free0); + free (free1); + + return status; +} diff --git a/src/diff.h b/src/diff.h index c8cf436..a438a8e 100644 --- a/src/diff.h +++ b/src/diff.h @@ -23,6 +23,17 @@ #include #include +/* For platforms which support the ISO C ammendment 1 functionality we + support user-defined character classes. */ +#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H +/* Solaris 2.5 has a bug: must be included before . */ +# include +# include +# if defined (HAVE_MBRTOWC) +# define HANDLE_MULTIBYTE 1 +# endif +#endif + /* What kind of changes a hunk contains. */ enum changes { @@ -381,7 +392,11 @@ extern void print_sdiff_script (struct change *); extern char const change_letter[4]; extern char const pr_program[]; extern char *concat (char const *, char const *, char const *); -extern bool lines_differ (char const *, char const *) _GL_ATTRIBUTE_PURE; +extern bool (*lines_differ) (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE; +extern bool lines_differ_singlebyte (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE; +#ifdef HANDLE_MULTIBYTE +extern bool lines_differ_multibyte (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE; +#endif extern lin translate_line_number (struct file_data const *, lin); extern struct change *find_change (struct change *); extern struct change *find_reverse_change (struct change *); diff --git a/src/diff.h.i18n b/src/diff.h.i18n new file mode 100644 index 0000000..c8cf436 --- /dev/null +++ b/src/diff.h.i18n @@ -0,0 +1,423 @@ +/* Shared definitions for GNU DIFF + + Copyright (C) 1988-1989, 1991-1995, 1998, 2001-2002, 2004, 2009-2013, + 2015-2017 Free Software Foundation, Inc. + + This file is part of GNU DIFF. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "system.h" +#include +#include +#include + +/* What kind of changes a hunk contains. */ +enum changes +{ + /* No changes: lines common to both files. */ + UNCHANGED, + + /* Deletes only: lines taken from just the first file. */ + OLD, + + /* Inserts only: lines taken from just the second file. */ + NEW, + + /* Both deletes and inserts: a hunk containing both old and new lines. */ + CHANGED +}; + +/* When colors should be used in the output. */ +enum colors_style +{ + /* Never output colors. */ + NEVER, + + /* Output colors if the output is a terminal. */ + AUTO, + + /* Always output colors. */ + ALWAYS, +}; + +/* Variables for command line options */ + +#ifndef GDIFF_MAIN +# define XTERN extern +#else +# define XTERN +#endif + +enum output_style +{ + /* No output style specified. */ + OUTPUT_UNSPECIFIED, + + /* Default output style. */ + OUTPUT_NORMAL, + + /* Output the differences with lines of context before and after (-c). */ + OUTPUT_CONTEXT, + + /* Output the differences in a unified context diff format (-u). */ + OUTPUT_UNIFIED, + + /* Output the differences as commands suitable for 'ed' (-e). */ + OUTPUT_ED, + + /* Output the diff as a forward ed script (-f). */ + OUTPUT_FORWARD_ED, + + /* Like -f, but output a count of changed lines in each "command" (-n). */ + OUTPUT_RCS, + + /* Output merged #ifdef'd file (-D). */ + OUTPUT_IFDEF, + + /* Output sdiff style (-y). */ + OUTPUT_SDIFF +}; + +/* True for output styles that are robust, + i.e. can handle a file that ends in a non-newline. */ +#define ROBUST_OUTPUT_STYLE(S) ((S) != OUTPUT_ED && (S) != OUTPUT_FORWARD_ED) + +XTERN enum output_style output_style; + +/* Define the current color context used to print a line. */ +XTERN enum colors_style colors_style; + +/* Nonzero if output cannot be generated for identical files. */ +XTERN bool no_diff_means_no_output; + +/* Number of lines of context to show in each set of diffs. + This is zero when context is not to be shown. */ +XTERN lin context; + +/* Consider all files as text files (-a). + Don't interpret codes over 0177 as implying a "binary file". */ +XTERN bool text; + +/* Number of lines to keep in identical prefix and suffix. */ +XTERN lin horizon_lines; + +/* The significance of white space during comparisons. */ +enum DIFF_white_space +{ + /* All white space is significant (the default). */ + IGNORE_NO_WHITE_SPACE, + + /* Ignore changes due to tab expansion (-E). */ + IGNORE_TAB_EXPANSION, + + /* Ignore changes in trailing horizontal white space (-Z). */ + IGNORE_TRAILING_SPACE, + + /* IGNORE_TAB_EXPANSION and IGNORE_TRAILING_SPACE are a special case + because they are independent and can be ORed together, yielding + IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE. */ + IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE, + + /* Ignore changes in horizontal white space (-b). */ + IGNORE_SPACE_CHANGE, + + /* Ignore all horizontal white space (-w). */ + IGNORE_ALL_SPACE +}; +XTERN enum DIFF_white_space ignore_white_space; + +/* Ignore changes that affect only blank lines (-B). */ +XTERN bool ignore_blank_lines; + +/* Files can be compared byte-by-byte, as if they were binary. + This depends on various options. */ +XTERN bool files_can_be_treated_as_binary; + +/* Ignore differences in case of letters (-i). */ +XTERN bool ignore_case; + +/* Ignore differences in case of letters in file names. */ +XTERN bool ignore_file_name_case; + +/* Act on symbolic links themselves rather than on their target + (--no-dereference). */ +XTERN bool no_dereference_symlinks; + +/* File labels for '-c' output headers (--label). */ +XTERN char *file_label[2]; + +/* Regexp to identify function-header lines (-F). */ +XTERN struct re_pattern_buffer function_regexp; + +/* Ignore changes that affect only lines matching this regexp (-I). */ +XTERN struct re_pattern_buffer ignore_regexp; + +/* Say only whether files differ, not how (-q). */ +XTERN bool brief; + +/* Expand tabs in the output so the text lines up properly + despite the characters added to the front of each line (-t). */ +XTERN bool expand_tabs; + +/* Number of columns between tab stops. */ +XTERN size_t tabsize; + +/* Use a tab in the output, rather than a space, before the text of an + input line, so as to keep the proper alignment in the input line + without changing the characters in it (-T). */ +XTERN bool initial_tab; + +/* Do not output an initial space or tab before the text of an empty line. */ +XTERN bool suppress_blank_empty; + +/* Remove trailing carriage returns from input. */ +XTERN bool strip_trailing_cr; + +/* In directory comparison, specify file to start with (-S). + This is used for resuming an aborted comparison. + All file names less than this name are ignored. */ +XTERN char const *starting_file; + +/* Pipe each file's output through pr (-l). */ +XTERN bool paginate; + +/* Line group formats for unchanged, old, new, and changed groups. */ +XTERN char const *group_format[CHANGED + 1]; + +/* Line formats for unchanged, old, and new lines. */ +XTERN char const *line_format[NEW + 1]; + +/* If using OUTPUT_SDIFF print extra information to help the sdiff filter. */ +XTERN bool sdiff_merge_assist; + +/* Tell OUTPUT_SDIFF to show only the left version of common lines. */ +XTERN bool left_column; + +/* Tell OUTPUT_SDIFF to not show common lines. */ +XTERN bool suppress_common_lines; + +/* The half line width and column 2 offset for OUTPUT_SDIFF. */ +XTERN size_t sdiff_half_width; +XTERN size_t sdiff_column2_offset; + +/* String containing all the command options diff received, + with spaces between and at the beginning but none at the end. + If there were no options given, this string is empty. */ +XTERN char *switch_string; + +/* Use heuristics for better speed with large files with a small + density of changes. */ +XTERN bool speed_large_files; + +/* Patterns that match file names to be excluded. */ +XTERN struct exclude *excluded; + +/* Don't discard lines. This makes things slower (sometimes much + slower) but will find a guaranteed minimal set of changes. */ +XTERN bool minimal; + +/* The strftime format to use for time strings. */ +XTERN char const *time_format; + +/* The result of comparison is an "edit script": a chain of 'struct change'. + Each 'struct change' represents one place where some lines are deleted + and some are inserted. + + LINE0 and LINE1 are the first affected lines in the two files (origin 0). + DELETED is the number of lines deleted here from file 0. + INSERTED is the number of lines inserted here in file 1. + + If DELETED is 0 then LINE0 is the number of the line before + which the insertion was done; vice versa for INSERTED and LINE1. */ + +struct change +{ + struct change *link; /* Previous or next edit command */ + lin inserted; /* # lines of file 1 changed here. */ + lin deleted; /* # lines of file 0 changed here. */ + lin line0; /* Line number of 1st deleted line. */ + lin line1; /* Line number of 1st inserted line. */ + bool ignore; /* Flag used in context.c. */ +}; + +/* Structures that describe the input files. */ + +/* Data on one input file being compared. */ + +struct file_data { + int desc; /* File descriptor */ + char const *name; /* File name */ + struct stat stat; /* File status */ + + /* Buffer in which text of file is read. */ + word *buffer; + + /* Allocated size of buffer, in bytes. Always a multiple of + sizeof *buffer. */ + size_t bufsize; + + /* Number of valid bytes now in the buffer. */ + size_t buffered; + + /* Array of pointers to lines in the file. */ + char const **linbuf; + + /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines. + linebuf[linbuf_base ... buffered_lines - 1] are possibly differing. + linebuf[linbuf_base ... valid_lines - 1] contain valid data. + linebuf[linbuf_base ... alloc_lines - 1] are allocated. */ + lin linbuf_base, buffered_lines, valid_lines, alloc_lines; + + /* Pointer to end of prefix of this file to ignore when hashing. */ + char const *prefix_end; + + /* Count of lines in the prefix. + There are this many lines in the file before linbuf[0]. */ + lin prefix_lines; + + /* Pointer to start of suffix of this file to ignore when hashing. */ + char const *suffix_begin; + + /* Vector, indexed by line number, containing an equivalence code for + each line. It is this vector that is actually compared with that + of another file to generate differences. */ + lin *equivs; + + /* Vector, like the previous one except that + the elements for discarded lines have been squeezed out. */ + lin *undiscarded; + + /* Vector mapping virtual line numbers (not counting discarded lines) + to real ones (counting those lines). Both are origin-0. */ + lin *realindexes; + + /* Total number of nondiscarded lines. */ + lin nondiscarded_lines; + + /* Vector, indexed by real origin-0 line number, + containing 1 for a line that is an insertion or a deletion. + The results of comparison are stored here. */ + char *changed; + + /* 1 if file ends in a line with no final newline. */ + bool missing_newline; + + /* 1 if at end of file. */ + bool eof; + + /* 1 more than the maximum equivalence value used for this or its + sibling file. */ + lin equiv_max; +}; + +/* The file buffer, considered as an array of bytes rather than + as an array of words. */ +#define FILE_BUFFER(f) ((char *) (f)->buffer) + +/* Data on two input files being compared. */ + +struct comparison + { + struct file_data file[2]; + struct comparison const *parent; /* parent, if a recursive comparison */ + }; + +/* Describe the two files currently being compared. */ + +XTERN struct file_data files[2]; + +/* Stdio stream to output diffs to. */ + +XTERN FILE *outfile; + +/* Declare various functions. */ + +/* analyze.c */ +extern int diff_2_files (struct comparison *); + +/* context.c */ +extern void print_context_header (struct file_data[], char const * const *, bool); +extern void print_context_script (struct change *, bool); + +/* dir.c */ +extern int diff_dirs (struct comparison const *, + int (*) (struct comparison const *, + char const *, char const *)); +extern char *find_dir_file_pathname (char const *, char const *); + +/* ed.c */ +extern void print_ed_script (struct change *); +extern void pr_forward_ed_script (struct change *); + +/* ifdef.c */ +extern void print_ifdef_script (struct change *); + +/* io.c */ +extern void file_block_read (struct file_data *, size_t); +extern bool read_files (struct file_data[], bool); + +/* normal.c */ +extern void print_normal_script (struct change *); + +/* rcs.c */ +extern void print_rcs_script (struct change *); + +/* side.c */ +extern void print_sdiff_script (struct change *); + +/* util.c */ +extern char const change_letter[4]; +extern char const pr_program[]; +extern char *concat (char const *, char const *, char const *); +extern bool lines_differ (char const *, char const *) _GL_ATTRIBUTE_PURE; +extern lin translate_line_number (struct file_data const *, lin); +extern struct change *find_change (struct change *); +extern struct change *find_reverse_change (struct change *); +extern void *zalloc (size_t); +extern enum changes analyze_hunk (struct change *, lin *, lin *, lin *, lin *); +extern void begin_output (void); +extern void debug_script (struct change *); +extern void fatal (char const *) __attribute__((noreturn)); +extern void finish_output (void); +extern void message (char const *, char const *, char const *); +extern void message5 (char const *, char const *, char const *, + char const *, char const *); +extern void output_1_line (char const *, char const *, char const *, + char const *); +extern void perror_with_name (char const *); +extern void pfatal_with_name (char const *) __attribute__((noreturn)); +extern void print_1_line (char const *, char const * const *); +extern void print_1_line_nl (char const *, char const * const *, bool); +extern void print_message_queue (void); +extern void print_number_range (char, struct file_data *, lin, lin); +extern void print_script (struct change *, struct change * (*) (struct change *), + void (*) (struct change *)); +extern void setup_output (char const *, char const *, bool); +extern void translate_range (struct file_data const *, lin, lin, + printint *, printint *); + +enum color_context +{ + HEADER_CONTEXT, + ADD_CONTEXT, + DELETE_CONTEXT, + RESET_CONTEXT, + LINE_NUMBER_CONTEXT, +}; + +XTERN bool presume_output_tty; + +extern void set_color_context (enum color_context color_context); +extern void set_color_palette (char const *palette); diff --git a/src/io.c b/src/io.c index b4ef5dc..fce16ab 100644 --- a/src/io.c +++ b/src/io.c @@ -23,6 +23,7 @@ #include #include #include +#include /* Rotate an unsigned value to the left. */ #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n))) @@ -215,6 +216,28 @@ slurp (struct file_data *current) /* Split the file into lines, simultaneously computing the equivalence class for each line. */ +#ifdef HANDLE_MULTIBYTE +# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \ +do \ + { \ + mbstate_t state_bak = STATE; \ + \ + CONVFAIL = 0; \ + MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \ + \ + switch (MBLENGTH) \ + { \ + case (size_t)-2: \ + case (size_t)-1: \ + STATE = state_bak; \ + ++CONVFAIL; \ + /* Fall through. */ \ + case 0: \ + MBLENGTH = 1; \ + } \ + } \ + while (0) +#endif static void find_and_hash_each_line (struct file_data *current) @@ -241,12 +264,300 @@ find_and_hash_each_line (struct file_data *current) bool same_length_diff_contents_compare_anyway = diff_length_compare_anyway | ig_case; +#ifdef HANDLE_MULTIBYTE + wchar_t wc; + size_t mblength; + mbstate_t state; + int convfail; + + memset (&state, '\0', sizeof (mbstate_t)); +#endif + while (p < suffix_begin) { char const *ip = p; hash_value h = 0; unsigned char c; +#ifdef HANDLE_MULTIBYTE + if (MB_CUR_MAX > 1) + { + wchar_t lo_wc; + char mbc[MB_LEN_MAX]; + mbstate_t state_wc; + + /* Hash this line until we find a newline. */ + switch (ig_white_space) + { + case IGNORE_ALL_SPACE: + while (1) + { + if (*p == '\n') + { + ++p; + break; + } + + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); + + if (convfail) + mbc[0] = *p++; + else if (!iswspace (wc)) + { + bool flag = 0; + + if (ig_case) + { + lo_wc = towlower (wc); + if (lo_wc != wc) + { + flag = 1; + + p += mblength; + memset (&state_wc, '\0', sizeof(mbstate_t)); + mblength = wcrtomb (mbc, lo_wc, &state_wc); + + assert (mblength != (size_t)-1 && + mblength != (size_t)-2); + + mblength = (mblength < 1) ? 1 : mblength; + } + } + + if (!flag) + { + for (i = 0; i < mblength; i++) + mbc[i] = *p++; + } + } + else + { + p += mblength; + continue; + } + + for (i = 0; i < mblength; i++) + h = HASH (h, mbc[i]); + } + break; + + case IGNORE_SPACE_CHANGE: + while (1) + { + if (*p == '\n') + { + ++p; + break; + } + + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); + + if (!convfail && iswspace (wc)) + { + while (1) + { + if (*p == '\n') + { + ++p; + goto hashing_done; + } + + p += mblength; + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); + if (convfail || !iswspace (wc)) + break; + } + h = HASH (h, ' '); + } + + /* WC is now the first non-space. */ + if (convfail) + mbc[0] = *p++; + else + { + bool flag = 0; + + if (ignore_case) + { + lo_wc = towlower (wc); + if (lo_wc != wc) + { + flag = 1; + + p += mblength; + memset (&state_wc, '\0', sizeof(mbstate_t)); + mblength = wcrtomb (mbc, lo_wc, &state_wc); + + assert (mblength != (size_t)-1 && + mblength != (size_t)-2); + + mblength = (mblength < 1) ? 1 : mblength; + } + } + + if (!flag) + { + for (i = 0; i < mblength; i++) + mbc[i] = *p++; + } + } + + for (i = 0; i < mblength; i++) + h = HASH (h, mbc[i]); + } + break; + + case IGNORE_TAB_EXPANSION: + case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE: + case IGNORE_TRAILING_SPACE: + { + size_t column = 0; + while (1) + { + if (*p == '\n') + { + ++p; + break; + } + + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); + + if (!convfail + && ig_white_space & IGNORE_TRAILING_SPACE + && iswspace (wc)) + { + char const *p1 = p; + while (1) + { + if (*p1 == '\n') + { + p = p1 + 1; + goto hashing_done; + } + + p1 += mblength; + MBC2WC (p1, suffix_begin, mblength, wc, state, convfail); + if (convfail || !iswspace (wc)) + break; + } + } + + size_t repetitions = 1; + bool no_convert = 0; + + if (ig_white_space & IGNORE_TAB_EXPANSION) + { + if (convfail) + column++; + else + switch (wc) + { + case L'\b': + column -= 0 < column; + break; + + case L'\t': + mbc[0] = ' '; + mblength = 1; + no_convert = 1; + p++; + assert(mblength == 1); + repetitions = tabsize - column % tabsize; + column = (column + repetitions < column + ? 0 + : column + repetitions); + break; + + case L'\r': + column = 0; + break; + + default: + column += wcwidth (wc); + break; + } + } + + if (ig_case) + { + lo_wc = towlower (wc); + if (lo_wc != wc) + { + no_convert = 1; + p += mblength; + memset (&state_wc, '\0', sizeof(mbstate_t)); + mblength = wcrtomb (mbc, lo_wc, &state_wc); + + assert (mblength != (size_t)-1 && + mblength != (size_t)-2); + + mblength = (mblength < 1) ? 1 : mblength; + } + } + + if (!no_convert) + for (i = 0; i < mblength; i++) + mbc[i] = *p++; + + do + { + for (i = 0; i < mblength; i++) + h = HASH (h, mbc[i]); + } + while (--repetitions != 0); + } + } + break; + + default: + while (1) + { + if (*p == '\n') + { + ++p; + break; + } + + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); + + if (convfail) + mbc[0] = *p++; + else + { + int flag = 0; + + if (ig_case) + { + lo_wc = towlower (wc); + if (lo_wc != wc) + { + flag = 1; + p += mblength; + memset (&state_wc, '\0', sizeof(mbstate_t)); + mblength = wcrtomb (mbc, lo_wc, &state_wc); + + assert (mblength != (size_t)-1 && + mblength != (size_t)-2); + + mblength = (mblength < 1) ? 1 : mblength; + } + } + + if (!flag) + { + for (i = 0; i < mblength; i++) + mbc[i] = *p++; + } + } + + for (i = 0; i < mblength; i++) + h = HASH (h, mbc[i]); + } + } + } + else +#endif + /* Hash this line until we find a newline. */ switch (ig_white_space) { @@ -397,7 +708,7 @@ find_and_hash_each_line (struct file_data *current) else if (!diff_length_compare_anyway) continue; - if (! lines_differ (eqline, ip)) + if (! lines_differ (eqline, eqs[i].length + 1, ip, length + 1)) break; } diff --git a/src/io.c.i18n b/src/io.c.i18n new file mode 100644 index 0000000..b4ef5dc --- /dev/null +++ b/src/io.c.i18n @@ -0,0 +1,830 @@ +/* File I/O for GNU DIFF. + + Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2013, + 2015-2017 Free Software Foundation, Inc. + + This file is part of GNU DIFF. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "diff.h" +#include +#include +#include +#include + +/* Rotate an unsigned value to the left. */ +#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n))) + +/* Given a hash value and a new character, return a new hash value. */ +#define HASH(h, c) ((c) + ROL (h, 7)) + +/* The type of a hash value. */ +typedef size_t hash_value; +verify (! TYPE_SIGNED (hash_value)); + +/* Lines are put into equivalence classes of lines that match in lines_differ. + Each equivalence class is represented by one of these structures, + but only while the classes are being computed. + Afterward, each class is represented by a number. */ +struct equivclass +{ + lin next; /* Next item in this bucket. */ + hash_value hash; /* Hash of lines in this class. */ + char const *line; /* A line that fits this class. */ + size_t length; /* That line's length, not counting its newline. */ +}; + +/* Hash-table: array of buckets, each being a chain of equivalence classes. + buckets[-1] is reserved for incomplete lines. */ +static lin *buckets; + +/* Number of buckets in the hash table array, not counting buckets[-1]. */ +static size_t nbuckets; + +/* Array in which the equivalence classes are allocated. + The bucket-chains go through the elements in this array. + The number of an equivalence class is its index in this array. */ +static struct equivclass *equivs; + +/* Index of first free element in the array 'equivs'. */ +static lin equivs_index; + +/* Number of elements allocated in the array 'equivs'. */ +static lin equivs_alloc; + +/* Read a block of data into a file buffer, checking for EOF and error. */ + +void +file_block_read (struct file_data *current, size_t size) +{ + if (size && ! current->eof) + { + size_t s = block_read (current->desc, + FILE_BUFFER (current) + current->buffered, size); + if (s == SIZE_MAX) + pfatal_with_name (current->name); + current->buffered += s; + current->eof = s < size; + } +} + +/* Check for binary files and compare them for exact identity. */ + +/* Return 1 if BUF contains a non text character. + SIZE is the number of characters in BUF. */ + +#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0) + +/* Get ready to read the current file. + Return nonzero if SKIP_TEST is zero, + and if it appears to be a binary file. */ + +static bool +sip (struct file_data *current, bool skip_test) +{ + /* If we have a nonexistent file at this stage, treat it as empty. */ + if (current->desc < 0) + { + /* Leave room for a sentinel. */ + current->bufsize = sizeof (word); + current->buffer = xmalloc (current->bufsize); + } + else + { + current->bufsize = buffer_lcm (sizeof (word), + STAT_BLOCKSIZE (current->stat), + PTRDIFF_MAX - 2 * sizeof (word)); + current->buffer = xmalloc (current->bufsize); + +#ifdef __KLIBC__ + /* Skip test if seek is not possible */ + skip_test = skip_test + || (lseek (current->desc, 0, SEEK_CUR) < 0 + && errno == ESPIPE); +#endif + + if (! skip_test) + { + /* Check first part of file to see if it's a binary file. */ + + int prev_mode = set_binary_mode (current->desc, O_BINARY); + off_t buffered; + file_block_read (current, current->bufsize); + buffered = current->buffered; + + if (prev_mode != O_BINARY) + { + /* Revert to text mode and seek back to the start to reread + the file. Use relative seek, since file descriptors + like stdin might not start at offset zero. */ + if (lseek (current->desc, - buffered, SEEK_CUR) < 0) + pfatal_with_name (current->name); + set_binary_mode (current->desc, prev_mode); + current->buffered = 0; + current->eof = false; + } + + return binary_file_p (current->buffer, buffered); + } + } + + current->buffered = 0; + current->eof = false; + return false; +} + +/* Slurp the rest of the current file completely into memory. */ + +static void +slurp (struct file_data *current) +{ + size_t cc; + + if (current->desc < 0) + { + /* The file is nonexistent. */ + return; + } + + if (S_ISREG (current->stat.st_mode)) + { + /* It's a regular file; slurp in the rest all at once. */ + + /* Get the size out of the stat block. + Allocate just enough room for appended newline plus word sentinel, + plus word-alignment since we want the buffer word-aligned. */ + size_t file_size = current->stat.st_size; + cc = file_size + 2 * sizeof (word) - file_size % sizeof (word); + if (file_size != current->stat.st_size || cc < file_size + || PTRDIFF_MAX <= cc) + xalloc_die (); + + if (current->bufsize < cc) + { + current->bufsize = cc; + current->buffer = xrealloc (current->buffer, cc); + } + + /* Try to read at least 1 more byte than the size indicates, to + detect whether the file is growing. This is a nicety for + users who run 'diff' on files while they are changing. */ + + if (current->buffered <= file_size) + { + file_block_read (current, file_size + 1 - current->buffered); + if (current->buffered <= file_size) + return; + } + } + + /* It's not a regular file, or it's a growing regular file; read it, + growing the buffer as needed. */ + + file_block_read (current, current->bufsize - current->buffered); + + if (current->buffered) + { + while (current->buffered == current->bufsize) + { + if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize) + xalloc_die (); + current->bufsize *= 2; + current->buffer = xrealloc (current->buffer, current->bufsize); + file_block_read (current, current->bufsize - current->buffered); + } + + /* Allocate just enough room for appended newline plus word + sentinel, plus word-alignment. */ + cc = current->buffered + 2 * sizeof (word); + current->bufsize = cc - cc % sizeof (word); + current->buffer = xrealloc (current->buffer, current->bufsize); + } +} + +/* Split the file into lines, simultaneously computing the equivalence + class for each line. */ + +static void +find_and_hash_each_line (struct file_data *current) +{ + char const *p = current->prefix_end; + lin i, *bucket; + size_t length; + + /* Cache often-used quantities in local variables to help the compiler. */ + char const **linbuf = current->linbuf; + lin alloc_lines = current->alloc_lines; + lin line = 0; + lin linbuf_base = current->linbuf_base; + lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs); + struct equivclass *eqs = equivs; + lin eqs_index = equivs_index; + lin eqs_alloc = equivs_alloc; + char const *suffix_begin = current->suffix_begin; + char const *bufend = FILE_BUFFER (current) + current->buffered; + bool ig_case = ignore_case; + enum DIFF_white_space ig_white_space = ignore_white_space; + bool diff_length_compare_anyway = + ig_white_space != IGNORE_NO_WHITE_SPACE; + bool same_length_diff_contents_compare_anyway = + diff_length_compare_anyway | ig_case; + + while (p < suffix_begin) + { + char const *ip = p; + hash_value h = 0; + unsigned char c; + + /* Hash this line until we find a newline. */ + switch (ig_white_space) + { + case IGNORE_ALL_SPACE: + while ((c = *p++) != '\n') + if (! isspace (c)) + h = HASH (h, ig_case ? tolower (c) : c); + break; + + case IGNORE_SPACE_CHANGE: + while ((c = *p++) != '\n') + { + if (isspace (c)) + { + do + if ((c = *p++) == '\n') + goto hashing_done; + while (isspace (c)); + + h = HASH (h, ' '); + } + + /* C is now the first non-space. */ + h = HASH (h, ig_case ? tolower (c) : c); + } + break; + + case IGNORE_TAB_EXPANSION: + case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE: + case IGNORE_TRAILING_SPACE: + { + size_t column = 0; + while ((c = *p++) != '\n') + { + if (ig_white_space & IGNORE_TRAILING_SPACE + && isspace (c)) + { + char const *p1 = p; + unsigned char c1; + do + if ((c1 = *p1++) == '\n') + { + p = p1; + goto hashing_done; + } + while (isspace (c1)); + } + + size_t repetitions = 1; + + if (ig_white_space & IGNORE_TAB_EXPANSION) + switch (c) + { + case '\b': + column -= 0 < column; + break; + + case '\t': + c = ' '; + repetitions = tabsize - column % tabsize; + column = (column + repetitions < column + ? 0 + : column + repetitions); + break; + + case '\r': + column = 0; + break; + + default: + column++; + break; + } + + if (ig_case) + c = tolower (c); + + do + h = HASH (h, c); + while (--repetitions != 0); + } + } + break; + + default: + if (ig_case) + while ((c = *p++) != '\n') + h = HASH (h, tolower (c)); + else + while ((c = *p++) != '\n') + h = HASH (h, c); + break; + } + + hashing_done:; + + bucket = &buckets[h % nbuckets]; + length = p - ip - 1; + + if (p == bufend + && current->missing_newline + && ROBUST_OUTPUT_STYLE (output_style)) + { + /* The last line is incomplete and we do not silently + complete lines. If the line cannot compare equal to any + complete line, put it into buckets[-1] so that it can + compare equal only to the other file's incomplete line + (if one exists). */ + if (ig_white_space < IGNORE_TRAILING_SPACE) + bucket = &buckets[-1]; + } + + for (i = *bucket; ; i = eqs[i].next) + if (!i) + { + /* Create a new equivalence class in this bucket. */ + i = eqs_index++; + if (i == eqs_alloc) + { + if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc) + xalloc_die (); + eqs_alloc *= 2; + eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs); + } + eqs[i].next = *bucket; + eqs[i].hash = h; + eqs[i].line = ip; + eqs[i].length = length; + *bucket = i; + break; + } + else if (eqs[i].hash == h) + { + char const *eqline = eqs[i].line; + + /* Reuse existing class if lines_differ reports the lines + equal. */ + if (eqs[i].length == length) + { + /* Reuse existing equivalence class if the lines are identical. + This detects the common case of exact identity + faster than lines_differ would. */ + if (memcmp (eqline, ip, length) == 0) + break; + if (!same_length_diff_contents_compare_anyway) + continue; + } + else if (!diff_length_compare_anyway) + continue; + + if (! lines_differ (eqline, ip)) + break; + } + + /* Maybe increase the size of the line table. */ + if (line == alloc_lines) + { + /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */ + if (PTRDIFF_MAX / 3 <= alloc_lines + || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base + || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base) + xalloc_die (); + alloc_lines = 2 * alloc_lines - linbuf_base; + cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs); + linbuf += linbuf_base; + linbuf = xrealloc (linbuf, + (alloc_lines - linbuf_base) * sizeof *linbuf); + linbuf -= linbuf_base; + } + linbuf[line] = ip; + cureqs[line] = i; + ++line; + } + + current->buffered_lines = line; + + for (i = 0; ; i++) + { + /* Record the line start for lines in the suffix that we care about. + Record one more line start than lines, + so that we can compute the length of any buffered line. */ + if (line == alloc_lines) + { + /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */ + if (PTRDIFF_MAX / 3 <= alloc_lines + || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base + || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base) + xalloc_die (); + alloc_lines = 2 * alloc_lines - linbuf_base; + linbuf += linbuf_base; + linbuf = xrealloc (linbuf, + (alloc_lines - linbuf_base) * sizeof *linbuf); + linbuf -= linbuf_base; + } + linbuf[line] = p; + + if (p == bufend) + { + /* If the last line is incomplete and we do not silently + complete lines, don't count its appended newline. */ + if (current->missing_newline && ROBUST_OUTPUT_STYLE (output_style)) + linbuf[line]--; + break; + } + + if (context <= i && no_diff_means_no_output) + break; + + line++; + + while (*p++ != '\n') + continue; + } + + /* Done with cache in local variables. */ + current->linbuf = linbuf; + current->valid_lines = line; + current->alloc_lines = alloc_lines; + current->equivs = cureqs; + equivs = eqs; + equivs_alloc = eqs_alloc; + equivs_index = eqs_index; +} + +/* Prepare the text. Make sure the text end is initialized. + Make sure text ends in a newline, + but remember that we had to add one. + Strip trailing CRs, if that was requested. */ + +static void +prepare_text (struct file_data *current) +{ + size_t buffered = current->buffered; + char *p = FILE_BUFFER (current); + + if (buffered == 0 || p[buffered - 1] == '\n') + current->missing_newline = false; + else + { + p[buffered++] = '\n'; + current->missing_newline = true; + } + + if (!p) + return; + + /* Don't use uninitialized storage when planting or using sentinels. */ + memset (p + buffered, 0, sizeof (word)); + + if (strip_trailing_cr) + { + char *dst; + char *srclim = p + buffered; + *srclim = '\r'; + dst = rawmemchr (p, '\r'); + + if (dst != srclim) + { + char const *src = dst; + do + { + *dst = *src++; + dst += ! (*dst == '\r' && *src == '\n'); + } + while (src < srclim); + + buffered -= src - dst; + } + } + + current->buffered = buffered; +} + +/* We have found N lines in a buffer of size S; guess the + proportionate number of lines that will be found in a buffer of + size T. However, do not guess a number of lines so large that the + resulting line table might cause overflow in size calculations. */ +static lin +guess_lines (lin n, size_t s, size_t t) +{ + size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1); + lin guessed_lines = MAX (1, t / guessed_bytes_per_line); + return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5; +} + +/* Given a vector of two file_data objects, find the identical + prefixes and suffixes of each object. */ + +static void +find_identical_ends (struct file_data filevec[]) +{ + word *w0, *w1; + char *p0, *p1, *buffer0, *buffer1; + char const *end0, *beg0; + char const **linbuf0, **linbuf1; + lin i, lines; + size_t n0, n1; + lin alloc_lines0, alloc_lines1; + bool prefix_needed; + lin buffered_prefix, prefix_count, prefix_mask; + lin middle_guess, suffix_guess; + + slurp (&filevec[0]); + prepare_text (&filevec[0]); + if (filevec[0].desc != filevec[1].desc) + { + slurp (&filevec[1]); + prepare_text (&filevec[1]); + } + else + { + filevec[1].buffer = filevec[0].buffer; + filevec[1].bufsize = filevec[0].bufsize; + filevec[1].buffered = filevec[0].buffered; + filevec[1].missing_newline = filevec[0].missing_newline; + } + + /* Find identical prefix. */ + + w0 = filevec[0].buffer; + w1 = filevec[1].buffer; + p0 = buffer0 = (char *) w0; + p1 = buffer1 = (char *) w1; + n0 = filevec[0].buffered; + n1 = filevec[1].buffered; + + if (p0 == p1) + /* The buffers are the same; sentinels won't work. */ + p0 = p1 += n1; + else + { + /* Insert end sentinels, in this case characters that are guaranteed + to make the equality test false, and thus terminate the loop. */ + + if (n0 < n1) + p0[n0] = ~p1[n0]; + else + p1[n1] = ~p0[n1]; + + /* Loop until first mismatch, or to the sentinel characters. */ + + /* Compare a word at a time for speed. */ + while (*w0 == *w1) + w0++, w1++; + + /* Do the last few bytes of comparison a byte at a time. */ + p0 = (char *) w0; + p1 = (char *) w1; + while (*p0 == *p1) + p0++, p1++; + + /* Don't mistakenly count missing newline as part of prefix. */ + if (ROBUST_OUTPUT_STYLE (output_style) + && ((buffer0 + n0 - filevec[0].missing_newline < p0) + != + (buffer1 + n1 - filevec[1].missing_newline < p1))) + p0--, p1--; + } + + /* Now P0 and P1 point at the first nonmatching characters. */ + + /* Skip back to last line-beginning in the prefix, + and then discard up to HORIZON_LINES lines from the prefix. */ + i = horizon_lines; + while (p0 != buffer0 && (p0[-1] != '\n' || i--)) + p0--, p1--; + + /* Record the prefix. */ + filevec[0].prefix_end = p0; + filevec[1].prefix_end = p1; + + /* Find identical suffix. */ + + /* P0 and P1 point beyond the last chars not yet compared. */ + p0 = buffer0 + n0; + p1 = buffer1 + n1; + + if (! ROBUST_OUTPUT_STYLE (output_style) + || filevec[0].missing_newline == filevec[1].missing_newline) + { + end0 = p0; /* Addr of last char in file 0. */ + + /* Get value of P0 at which we should stop scanning backward: + this is when either P0 or P1 points just past the last char + of the identical prefix. */ + beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1); + + /* Scan back until chars don't match or we reach that point. */ + while (p0 != beg0) + if (*--p0 != *--p1) + { + /* Point at the first char of the matching suffix. */ + ++p0, ++p1; + beg0 = p0; + break; + } + + /* Are we at a line-beginning in both files? If not, add the rest of + this line to the main body. Discard up to HORIZON_LINES lines from + the identical suffix. Also, discard one extra line, + because shift_boundaries may need it. */ + i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n') + && + (buffer1 == p1 || p1[-1] == '\n')); + while (i-- && p0 != end0) + while (*p0++ != '\n') + continue; + + p1 += p0 - beg0; + } + + /* Record the suffix. */ + filevec[0].suffix_begin = p0; + filevec[1].suffix_begin = p1; + + /* Calculate number of lines of prefix to save. + + prefix_count == 0 means save the whole prefix; + we need this for options like -D that output the whole file, + or for enormous contexts (to avoid worrying about arithmetic overflow). + We also need it for options like -F that output some preceding line; + at least we will need to find the last few lines, + but since we don't know how many, it's easiest to find them all. + + Otherwise, prefix_count != 0. Save just prefix_count lines at start + of the line buffer; they'll be moved to the proper location later. + Handle 1 more line than the context says (because we count 1 too many), + rounded up to the next power of 2 to speed index computation. */ + + if (no_diff_means_no_output && ! function_regexp.fastmap + && context < LIN_MAX / 4 && context < n0) + { + middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end); + suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0); + for (prefix_count = 1; prefix_count <= context; prefix_count *= 2) + continue; + alloc_lines0 = (prefix_count + middle_guess + + MIN (context, suffix_guess)); + } + else + { + prefix_count = 0; + alloc_lines0 = guess_lines (0, 0, n0); + } + + prefix_mask = prefix_count - 1; + lines = 0; + linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0); + prefix_needed = ! (no_diff_means_no_output + && filevec[0].prefix_end == p0 + && filevec[1].prefix_end == p1); + p0 = buffer0; + + /* If the prefix is needed, find the prefix lines. */ + if (prefix_needed) + { + end0 = filevec[0].prefix_end; + while (p0 != end0) + { + lin l = lines++ & prefix_mask; + if (l == alloc_lines0) + { + if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0) + xalloc_die (); + alloc_lines0 *= 2; + linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0); + } + linbuf0[l] = p0; + while (*p0++ != '\n') + continue; + } + } + buffered_prefix = prefix_count && context < lines ? context : lines; + + /* Allocate line buffer 1. */ + + middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end); + suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1); + alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess); + if (alloc_lines1 < buffered_prefix + || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1) + xalloc_die (); + linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1); + + if (buffered_prefix != lines) + { + /* Rotate prefix lines to proper location. */ + for (i = 0; i < buffered_prefix; i++) + linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask]; + for (i = 0; i < buffered_prefix; i++) + linbuf0[i] = linbuf1[i]; + } + + /* Initialize line buffer 1 from line buffer 0. */ + for (i = 0; i < buffered_prefix; i++) + linbuf1[i] = linbuf0[i] - buffer0 + buffer1; + + /* Record the line buffer, adjusted so that + linbuf[0] points at the first differing line. */ + filevec[0].linbuf = linbuf0 + buffered_prefix; + filevec[1].linbuf = linbuf1 + buffered_prefix; + filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix; + filevec[0].alloc_lines = alloc_lines0 - buffered_prefix; + filevec[1].alloc_lines = alloc_lines1 - buffered_prefix; + filevec[0].prefix_lines = filevec[1].prefix_lines = lines; +} + +/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less + than 2**k. This table is derived from Chris K. Caldwell's list + . */ + +static unsigned char const prime_offset[] = +{ + 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3, + 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21, + 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27, + 55, 93, 1, 57, 25 +}; + +/* Verify that this host's size_t is not too wide for the above table. */ + +verify (sizeof (size_t) * CHAR_BIT <= sizeof prime_offset); + +/* Given a vector of two file_data objects, read the file associated + with each one, and build the table of equivalence classes. + Return nonzero if either file appears to be a binary file. + If PRETEND_BINARY is nonzero, pretend they are binary regardless. */ + +bool +read_files (struct file_data filevec[], bool pretend_binary) +{ + int i; + bool skip_test = text | pretend_binary; + bool appears_binary = pretend_binary | sip (&filevec[0], skip_test); + + if (filevec[0].desc != filevec[1].desc) + appears_binary |= sip (&filevec[1], skip_test | appears_binary); + else + { + filevec[1].buffer = filevec[0].buffer; + filevec[1].bufsize = filevec[0].bufsize; + filevec[1].buffered = filevec[0].buffered; + } + if (appears_binary) + { + set_binary_mode (filevec[0].desc, O_BINARY); + set_binary_mode (filevec[1].desc, O_BINARY); + return true; + } + + find_identical_ends (filevec); + + equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1; + if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc) + xalloc_die (); + equivs = xmalloc (equivs_alloc * sizeof *equivs); + /* Equivalence class 0 is permanently safe for lines that were not + hashed. Real equivalence classes start at 1. */ + equivs_index = 1; + + /* Allocate (one plus) a prime number of hash buckets. Use a prime + number between 1/3 and 2/3 of the value of equiv_allocs, + approximately. */ + for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++) + continue; + nbuckets = ((size_t) 1 << i) - prime_offset[i]; + if (PTRDIFF_MAX / sizeof *buckets <= nbuckets) + xalloc_die (); + buckets = zalloc ((nbuckets + 1) * sizeof *buckets); + buckets++; + + for (i = 0; i < 2; i++) + find_and_hash_each_line (&filevec[i]); + + filevec[0].equiv_max = filevec[1].equiv_max = equivs_index; + + free (equivs); + free (buckets - 1); + + return false; +} diff --git a/src/util.c b/src/util.c index 88955da..3de19f8 100644 --- a/src/util.c +++ b/src/util.c @@ -985,7 +985,8 @@ finish_output (void) Return nonzero if the lines differ. */ bool -lines_differ (char const *s1, char const *s2) +lines_differ_singlebyte (char const *s1, size_t s1len, + char const *s2, size_t s2len) { register char const *t1 = s1; register char const *t2 = s2; @@ -1141,6 +1142,354 @@ lines_differ (char const *s1, char const *s2) return true; } + +#ifdef HANDLE_MULTIBYTE +# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \ +do \ + { \ + mbstate_t bak = STATE; \ + \ + CONVFAIL = 0; \ + MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \ + \ + switch (MBLENGTH) \ + { \ + case (size_t)-2: \ + case (size_t)-1: \ + STATE = bak; \ + ++CONVFAIL; \ + /* Fall through. */ \ + case 0: \ + MBLENGTH = 1; \ + } \ + } \ + while (0) + +bool +lines_differ_multibyte (char const *s1, size_t s1len, + char const *s2, size_t s2len) +{ + char const *end1, *end2; + char c1, c2; + wchar_t wc1, wc2, wc1_bak, wc2_bak; + size_t mblen1, mblen2; + mbstate_t state1, state2, state1_bak, state2_bak; + int convfail1, convfail2, convfail1_bak, convfail2_bak; + + char const *t1 = s1; + char const *t2 = s2; + char const *t1_bak, *t2_bak; + size_t column = 0; + + if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case) + { + while (*t1 != '\n') + if (*t1++ != *t2++) + return 1; + return 0; + } + + end1 = t1 + s1len; + end2 = t2 + s2len; + + memset (&state1, '\0', sizeof (mbstate_t)); + memset (&state2, '\0', sizeof (mbstate_t)); + + while (1) + { + c1 = *t1; + c2 = *t2; + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); + + /* Test for exact char equality first, since it's a common case. */ + if (convfail1 ^ convfail2) + break; + else if (convfail1 && convfail2 && c1 != c2) + break; + else if (!convfail1 && !convfail2 && wc1 != wc2) + { + switch (ignore_white_space) + { + case IGNORE_ALL_SPACE: + /* For -w, just skip past any white space. */ + while (1) + { + if (convfail1) + break; + else if (wc1 == L'\n' || !iswspace (wc1)) + break; + + t1 += mblen1; + c1 = *t1; + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); + } + + while (1) + { + if (convfail2) + break; + else if (wc2 == L'\n' || !iswspace (wc2)) + break; + + t2 += mblen2; + c2 = *t2; + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); + } + t1 += mblen1; + t2 += mblen2; + break; + + case IGNORE_SPACE_CHANGE: + /* For -b, advance past any sequence of white space in + line 1 and consider it just one space, or nothing at + all if it is at the end of the line. */ + if (wc1 != L'\n' && iswspace (wc1)) + { + size_t mblen_bak; + mbstate_t state_bak; + + do + { + t1 += mblen1; + mblen_bak = mblen1; + state_bak = state1; + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); + } + while (!convfail1 && (wc1 != L'\n' && iswspace (wc1))); + + state1 = state_bak; + mblen1 = mblen_bak; + t1 -= mblen1; + convfail1 = 0; + wc1 = L' '; + } + + /* Likewise for line 2. */ + if (wc2 != L'\n' && iswspace (wc2)) + { + size_t mblen_bak; + mbstate_t state_bak; + + do + { + t2 += mblen2; + mblen_bak = mblen2; + state_bak = state2; + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); + } + while (!convfail2 && (wc2 != L'\n' && iswspace (wc2))); + + state2 = state_bak; + mblen2 = mblen_bak; + t2 -= mblen2; + convfail2 = 0; + wc2 = L' '; + } + + if (wc1 != wc2) + { + /* If we went too far when doing the simple test for + equality, go back to the first non-whitespace + character in both sides and try again. */ + if (wc2 == L' ' && wc1 != L'\n' && + t1 > s1 && + !convfail1_bak && iswspace (wc1_bak)) + { + t1 = t1_bak; + wc1 = wc1_bak; + state1 = state1_bak; + convfail1 = convfail1_bak; + continue; + } + if (wc1 == L' ' && wc2 != L'\n' + && t2 > s2 + && !convfail2_bak && iswspace (wc2_bak)) + { + t2 = t2_bak; + wc2 = wc2_bak; + state2 = state2_bak; + convfail2 = convfail2_bak; + continue; + } + } + + t1_bak = t1; t2_bak = t2; + wc1_bak = wc1; wc2_bak = wc2; + state1_bak = state1; state2_bak = state2; + convfail1_bak = convfail1; convfail2_bak = convfail2; + + if (wc1 == L'\n') + wc1 = L' '; + else + t1 += mblen1; + + if (wc2 == L'\n') + wc2 = L' '; + else + t2 += mblen2; + + break; + + case IGNORE_TRAILING_SPACE: + case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE: + if (iswspace (wc1) && iswspace (wc2)) + { + char const *p; + wchar_t wc; + size_t mblength; + int convfail; + mbstate_t state; + bool just_whitespace_left = 1; + if (wc1 != L'\n') + { + mblength = mblen1; + p = t1; + memset (&state, '\0', sizeof(mbstate_t)); + while (p < end1) + { + if (*p == '\n') + break; + + p += mblength; + MBC2WC (p, end1, mblength, wc, state, convfail); + if (convfail || !iswspace (wc)) + { + just_whitespace_left = 0; + break; + } + } + } + if (just_whitespace_left && wc2 != L'\n') + { + mblength = mblen2; + p = t2; + memset (&state, '\0', sizeof(mbstate_t)); + while (p < end2) + { + if (*p == '\n') + break; + + p += mblength; + MBC2WC (p, end2, mblength, wc, state, convfail); + if (convfail || !iswspace (wc)) + { + just_whitespace_left = 0; + break; + } + } + } + + if (just_whitespace_left) + /* Both lines have nothing but whitespace left. */ + return false; + } + + if (ignore_white_space == IGNORE_TRAILING_SPACE) + break; + /* Fall through. */ + case IGNORE_TAB_EXPANSION: + if ((wc1 == L' ' && wc2 == L'\t') + || (wc1 == L'\t' && wc2 == L' ')) + { + size_t column2 = column; + + while (1) + { + if (convfail1) + { + ++t1; + break; + } + else if (wc1 == L' ') + column++; + else if (wc1 == L'\t') + column += tabsize - column % tabsize; + else + { + t1 += mblen1; + break; + } + + t1 += mblen1; + c1 = *t1; + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); + } + + while (1) + { + if (convfail2) + { + ++t2; + break; + } + else if (wc2 == L' ') + column2++; + else if (wc2 == L'\t') + column2 += tabsize - column2 % tabsize; + else + { + t2 += mblen2; + break; + } + + t2 += mblen2; + c2 = *t2; + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); + } + + if (column != column2) + return 1; + } + else + { + t1 += mblen1; + t2 += mblen2; + } + break; + + case IGNORE_NO_WHITE_SPACE: + t1 += mblen1; + t2 += mblen2; + break; + } + + /* Lowercase all letters if -i is specified. */ + if (ignore_case) + { + if (!convfail1) + wc1 = towlower (wc1); + if (!convfail2) + wc2 = towlower (wc2); + } + + if (convfail1 ^ convfail2) + break; + else if (convfail1 && convfail2 && c1 != c2) + break; + else if (!convfail1 && !convfail2 && wc1 != wc2) + break; + } + else + { + t1_bak = t1; t2_bak = t2; + wc1_bak = wc1; wc2_bak = wc2; + state1_bak = state1; state2_bak = state2; + convfail1_bak = convfail1; convfail2_bak = convfail2; + + t1 += mblen1; t2 += mblen2; + } + + if (!convfail1 && wc1 == L'\n') + return 0; + + column += convfail1 ? 1 : + (wc1 == L'\t') ? tabsize - column % tabsize : wcwidth (wc1); + } + + return 1; +} +#endif /* Find the consecutive changes at the start of the script START. Return the last link before the first gap. */ diff --git a/src/util.c.i18n b/src/util.c.i18n new file mode 100644 index 0000000..88955da --- /dev/null +++ b/src/util.c.i18n @@ -0,0 +1,1576 @@ +/* Support routines for GNU DIFF. + + Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2013, + 2015-2017 Free Software Foundation, Inc. + + This file is part of GNU DIFF. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "diff.h" +#include "argmatch.h" +#include "die.h" +#include +#include +#include +#include +#include "xvasprintf.h" +#include + +/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is + present. */ +#ifndef SA_NOCLDSTOP +# define SA_NOCLDSTOP 0 +# define sigprocmask(How, Set, Oset) /* empty */ +# define sigset_t int +# if ! HAVE_SIGINTERRUPT +# define siginterrupt(sig, flag) /* empty */ +# endif +#endif + +#ifndef SA_RESTART +# define SA_RESTART 0 +#endif + +char const pr_program[] = PR_PROGRAM; + +/* Queue up one-line messages to be printed at the end, + when -l is specified. Each message is recorded with a 'struct msg'. */ + +struct msg +{ + struct msg *next; + char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */ +}; + +/* Head of the chain of queues messages. */ + +static struct msg *msg_chain; + +/* Tail of the chain of queues messages. */ + +static struct msg **msg_chain_end = &msg_chain; + +/* Use when a system call returns non-zero status. + NAME should normally be the file name. */ + +void +perror_with_name (char const *name) +{ + error (0, errno, "%s", name); +} + +/* Use when a system call returns non-zero status and that is fatal. */ + +void +pfatal_with_name (char const *name) +{ + int e = errno; + print_message_queue (); + die (EXIT_TROUBLE, e, "%s", name); +} + +/* Print an error message containing MSGID, then exit. */ + +void +fatal (char const *msgid) +{ + print_message_queue (); + die (EXIT_TROUBLE, 0, "%s", _(msgid)); +} + +/* Like printf, except if -l in effect then save the message and print later. + This is used for things like "Only in ...". */ + +void +message (char const *format_msgid, char const *arg1, char const *arg2) +{ + message5 (format_msgid, arg1, arg2, 0, 0); +} + +void +message5 (char const *format_msgid, char const *arg1, char const *arg2, + char const *arg3, char const *arg4) +{ + if (paginate) + { + char *p; + char const *arg[5]; + int i; + size_t size[5]; + size_t total_size = offsetof (struct msg, args); + struct msg *new; + + arg[0] = format_msgid; + arg[1] = arg1; + arg[2] = arg2; + arg[3] = arg3 ? arg3 : ""; + arg[4] = arg4 ? arg4 : ""; + + for (i = 0; i < 5; i++) + total_size += size[i] = strlen (arg[i]) + 1; + + new = xmalloc (total_size); + + for (i = 0, p = new->args; i < 5; p += size[i++]) + memcpy (p, arg[i], size[i]); + + *msg_chain_end = new; + new->next = 0; + msg_chain_end = &new->next; + } + else + { + if (sdiff_merge_assist) + putchar (' '); + printf (_(format_msgid), arg1, arg2, arg3, arg4); + } +} + +/* Output all the messages that were saved up by calls to 'message'. */ + +void +print_message_queue (void) +{ + char const *arg[5]; + int i; + struct msg *m = msg_chain; + + while (m) + { + struct msg *next = m->next; + arg[0] = m->args; + for (i = 0; i < 4; i++) + arg[i + 1] = arg[i] + strlen (arg[i]) + 1; + printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]); + free (m); + m = next; + } +} + +/* The set of signals that are caught. */ + +static sigset_t caught_signals; + +/* If nonzero, the value of the pending fatal signal. */ + +static sig_atomic_t volatile interrupt_signal; + +/* A count of the number of pending stop signals that have been received. */ + +static sig_atomic_t volatile stop_signal_count; + +/* An ordinary signal was received; arrange for the program to exit. */ + +static void +sighandler (int sig) +{ + if (! SA_NOCLDSTOP) + signal (sig, SIG_IGN); + if (! interrupt_signal) + interrupt_signal = sig; +} + +/* A SIGTSTP was received; arrange for the program to suspend itself. */ + +static void +stophandler (int sig) +{ + if (! SA_NOCLDSTOP) + signal (sig, stophandler); + if (! interrupt_signal) + stop_signal_count++; +} +/* Process any pending signals. If signals are caught, this function + should be called periodically. Ideally there should never be an + unbounded amount of time when signals are not being processed. + Signal handling can restore the default colors, so callers must + immediately change colors after invoking this function. */ + +static void +process_signals (void) +{ + while (interrupt_signal || stop_signal_count) + { + int sig; + int stops; + sigset_t oldset; + + set_color_context (RESET_CONTEXT); + fflush (stdout); + + sigprocmask (SIG_BLOCK, &caught_signals, &oldset); + + /* Reload interrupt_signal and stop_signal_count, in case a new + signal was handled before sigprocmask took effect. */ + sig = interrupt_signal; + stops = stop_signal_count; + + /* SIGTSTP is special, since the application can receive that signal + more than once. In this case, don't set the signal handler to the + default. Instead, just raise the uncatchable SIGSTOP. */ + if (stops) + { + stop_signal_count = stops - 1; + sig = SIGSTOP; + } + else + signal (sig, SIG_DFL); + + /* Exit or suspend the program. */ + raise (sig); + sigprocmask (SIG_SETMASK, &oldset, NULL); + + /* If execution reaches here, then the program has been + continued (after being suspended). */ + } +} + +static void +install_signal_handlers (void) +{ + /* The signals that are trapped, and the number of such signals. */ + static int const sig[] = + { + /* This one is handled specially. */ + SIGTSTP, + + /* The usual suspects. */ + SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, +#ifdef SIGPOLL + SIGPOLL, +#endif +#ifdef SIGPROF + SIGPROF, +#endif +#ifdef SIGVTALRM + SIGVTALRM, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + enum { nsigs = sizeof (sig) / sizeof *(sig) }; + +#if ! SA_NOCLDSTOP + bool caught_sig[nsigs]; +#endif + { + int j; +#if SA_NOCLDSTOP + struct sigaction act; + + sigemptyset (&caught_signals); + for (j = 0; j < nsigs; j++) + { + sigaction (sig[j], NULL, &act); + if (act.sa_handler != SIG_IGN) + sigaddset (&caught_signals, sig[j]); + } + + act.sa_mask = caught_signals; + act.sa_flags = SA_RESTART; + + for (j = 0; j < nsigs; j++) + if (sigismember (&caught_signals, sig[j])) + { + act.sa_handler = sig[j] == SIGTSTP ? stophandler : sighandler; + sigaction (sig[j], &act, NULL); + } +#else + for (j = 0; j < nsigs; j++) + { + caught_sig[j] = (signal (sig[j], SIG_IGN) != SIG_IGN); + if (caught_sig[j]) + { + signal (sig[j], sig[j] == SIGTSTP ? stophandler : sighandler); + siginterrupt (sig[j], 0); + } + } +#endif + } +} + +static char const *current_name0; +static char const *current_name1; +static bool currently_recursive; +static bool colors_enabled; + +static struct color_ext_type *color_ext_list = NULL; + +struct bin_str + { + size_t len; /* Number of bytes */ + const char *string; /* Pointer to the same */ + }; + +struct color_ext_type + { + struct bin_str ext; /* The extension we're looking for */ + struct bin_str seq; /* The sequence to output when we do */ + struct color_ext_type *next; /* Next in list */ + }; + +/* Parse a string as part of the --palette argument; this may involve + decoding all kinds of escape characters. If equals_end is set an + unescaped equal sign ends the string, otherwise only a : or \0 + does. Set *OUTPUT_COUNT to the number of bytes output. Return + true if successful. + + The resulting string is *not* null-terminated, but may contain + embedded nulls. + + Note that both dest and src are char **; on return they point to + the first free byte after the array and the character that ended + the input string, respectively. */ + +static bool +get_funky_string (char **dest, const char **src, bool equals_end, + size_t *output_count) +{ + char num; /* For numerical codes */ + size_t count; /* Something to count with */ + enum { + ST_GND, ST_BACKSLASH, ST_OCTAL, ST_HEX, ST_CARET, ST_END, ST_ERROR + } state; + const char *p; + char *q; + + p = *src; /* We don't want to double-indirect */ + q = *dest; /* the whole darn time. */ + + count = 0; /* No characters counted in yet. */ + num = 0; + + state = ST_GND; /* Start in ground state. */ + while (state < ST_END) + { + switch (state) + { + case ST_GND: /* Ground state (no escapes) */ + switch (*p) + { + case ':': + case '\0': + state = ST_END; /* End of string */ + break; + case '\\': + state = ST_BACKSLASH; /* Backslash scape sequence */ + ++p; + break; + case '^': + state = ST_CARET; /* Caret escape */ + ++p; + break; + case '=': + if (equals_end) + { + state = ST_END; /* End */ + break; + } + FALLTHROUGH; + default: + *(q++) = *(p++); + ++count; + break; + } + break; + + case ST_BACKSLASH: /* Backslash escaped character */ + switch (*p) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + state = ST_OCTAL; /* Octal sequence */ + num = *p - '0'; + break; + case 'x': + case 'X': + state = ST_HEX; /* Hex sequence */ + num = 0; + break; + case 'a': /* Bell */ + num = '\a'; + break; + case 'b': /* Backspace */ + num = '\b'; + break; + case 'e': /* Escape */ + num = 27; + break; + case 'f': /* Form feed */ + num = '\f'; + break; + case 'n': /* Newline */ + num = '\n'; + break; + case 'r': /* Carriage return */ + num = '\r'; + break; + case 't': /* Tab */ + num = '\t'; + break; + case 'v': /* Vtab */ + num = '\v'; + break; + case '?': /* Delete */ + num = 127; + break; + case '_': /* Space */ + num = ' '; + break; + case '\0': /* End of string */ + state = ST_ERROR; /* Error! */ + break; + default: /* Escaped character like \ ^ : = */ + num = *p; + break; + } + if (state == ST_BACKSLASH) + { + *(q++) = num; + ++count; + state = ST_GND; + } + ++p; + break; + + case ST_OCTAL: /* Octal sequence */ + if (*p < '0' || *p > '7') + { + *(q++) = num; + ++count; + state = ST_GND; + } + else + num = (num << 3) + (*(p++) - '0'); + break; + + case ST_HEX: /* Hex sequence */ + switch (*p) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + num = (num << 4) + (*(p++) - '0'); + break; + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + num = (num << 4) + (*(p++) - 'a') + 10; + break; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + num = (num << 4) + (*(p++) - 'A') + 10; + break; + default: + *(q++) = num; + ++count; + state = ST_GND; + break; + } + break; + + case ST_CARET: /* Caret escape */ + state = ST_GND; /* Should be the next state... */ + if (*p >= '@' && *p <= '~') + { + *(q++) = *(p++) & 037; + ++count; + } + else if (*p == '?') + { + *(q++) = 127; + ++count; + } + else + state = ST_ERROR; + break; + + default: + abort (); + } + } + + *dest = q; + *src = p; + *output_count = count; + + return state != ST_ERROR; +} + +enum parse_state + { + PS_START = 1, + PS_2, + PS_3, + PS_4, + PS_DONE, + PS_FAIL + }; + +#define LEN_STR_PAIR(s) sizeof (s) - 1, s + +static struct bin_str color_indicator[] = + { + { LEN_STR_PAIR ("\033[") }, /* lc: Left of color sequence */ + { LEN_STR_PAIR ("m") }, /* rc: Right of color sequence */ + { 0, NULL }, /* ec: End color (replaces lc+rs+rc) */ + { LEN_STR_PAIR ("0") }, /* rs: Reset to ordinary colors */ + { LEN_STR_PAIR ("1") }, /* hd: Header */ + { LEN_STR_PAIR ("32") }, /* ad: Add line */ + { LEN_STR_PAIR ("31") }, /* de: Delete line */ + { LEN_STR_PAIR ("36") }, /* ln: Line number */ + }; + +static const char *const indicator_name[] = + { + "lc", "rc", "ec", "rs", "hd", "ad", "de", "ln", NULL + }; +ARGMATCH_VERIFY (indicator_name, color_indicator); + +static char const *color_palette; + +void +set_color_palette (char const *palette) +{ + color_palette = palette; +} + +static void +parse_diff_color (void) +{ + char *color_buf; + const char *p; /* Pointer to character being parsed */ + char *buf; /* color_buf buffer pointer */ + int ind_no; /* Indicator number */ + char label[3]; /* Indicator label */ + struct color_ext_type *ext; /* Extension we are working on */ + + if ((p = color_palette) == NULL || *p == '\0') + return; + + ext = NULL; + strcpy (label, "??"); + + /* This is an overly conservative estimate, but any possible + --palette string will *not* generate a color_buf longer than + itself, so it is a safe way of allocating a buffer in + advance. */ + buf = color_buf = xstrdup (p); + + enum parse_state state = PS_START; + while (true) + { + switch (state) + { + case PS_START: /* First label character */ + switch (*p) + { + case ':': + ++p; + break; + + case '*': + /* Allocate new extension block and add to head of + linked list (this way a later definition will + override an earlier one, which can be useful for + having terminal-specific defs override global). */ + + ext = xmalloc (sizeof *ext); + ext->next = color_ext_list; + color_ext_list = ext; + + ++p; + ext->ext.string = buf; + + state = (get_funky_string (&buf, &p, true, &ext->ext.len) + ? PS_4 : PS_FAIL); + break; + + case '\0': + state = PS_DONE; /* Done! */ + goto done; + + default: /* Assume it is file type label */ + label[0] = *(p++); + state = PS_2; + break; + } + break; + + case PS_2: /* Second label character */ + if (*p) + { + label[1] = *(p++); + state = PS_3; + } + else + state = PS_FAIL; /* Error */ + break; + + case PS_3: /* Equal sign after indicator label */ + state = PS_FAIL; /* Assume failure... */ + if (*(p++) == '=')/* It *should* be... */ + { + for (ind_no = 0; indicator_name[ind_no] != NULL; ++ind_no) + { + if (STREQ (label, indicator_name[ind_no])) + { + color_indicator[ind_no].string = buf; + state = (get_funky_string (&buf, &p, false, + &color_indicator[ind_no].len) + ? PS_START : PS_FAIL); + break; + } + } + if (state == PS_FAIL) + error (0, 0, _("unrecognized prefix: %s"), label); + } + break; + + case PS_4: /* Equal sign after *.ext */ + if (*(p++) == '=') + { + ext->seq.string = buf; + state = (get_funky_string (&buf, &p, false, &ext->seq.len) + ? PS_START : PS_FAIL); + } + else + state = PS_FAIL; + break; + + case PS_FAIL: + goto done; + + default: + abort (); + } + } + done: + + if (state == PS_FAIL) + { + struct color_ext_type *e; + struct color_ext_type *e2; + + error (0, 0, + _("unparsable value for --palette")); + free (color_buf); + for (e = color_ext_list; e != NULL; /* empty */) + { + e2 = e; + e = e->next; + free (e2); + } + colors_enabled = false; + } +} + +static void +check_color_output (bool is_pipe) +{ + bool output_is_tty; + + if (! outfile || colors_style == NEVER) + return; + + output_is_tty = presume_output_tty || (!is_pipe && isatty (fileno (outfile))); + + colors_enabled = (colors_style == ALWAYS + || (colors_style == AUTO && output_is_tty)); + + if (colors_enabled) + parse_diff_color (); + + if (output_is_tty) + install_signal_handlers (); +} + +/* Call before outputting the results of comparing files NAME0 and NAME1 + to set up OUTFILE, the stdio stream for the output to go to. + + Usually, OUTFILE is just stdout. But when -l was specified + we fork off a 'pr' and make OUTFILE a pipe to it. + 'pr' then outputs to our stdout. */ + +void +setup_output (char const *name0, char const *name1, bool recursive) +{ + current_name0 = name0; + current_name1 = name1; + currently_recursive = recursive; + outfile = 0; +} + +#if HAVE_WORKING_FORK +static pid_t pr_pid; +#endif + +static char c_escape_char (char c) +{ + switch (c) { + case '\a': return 'a'; + case '\b': return 'b'; + case '\t': return 't'; + case '\n': return 'n'; + case '\v': return 'v'; + case '\f': return 'f'; + case '\r': return 'r'; + case '"': return '"'; + case '\\': return '\\'; + default: + return c < 32; + } +} + +static char * +c_escape (char const *str) +{ + char const *s; + size_t plus = 0; + bool must_quote = false; + + for (s = str; *s; s++) + { + char c = *s; + + if (c == ' ') + { + must_quote = true; + continue; + } + switch (c_escape_char (*s)) + { + case 1: + plus += 3; + /* fall through */ + case 0: + break; + default: + plus++; + break; + } + } + + if (must_quote || plus) + { + size_t s_len = s - str; + char *buffer = xmalloc (s_len + plus + 3); + char *b = buffer; + + *b++ = '"'; + for (s = str; *s; s++) + { + char c = *s; + char escape = c_escape_char (c); + + switch (escape) + { + case 0: + *b++ = c; + break; + case 1: + *b++ = '\\'; + *b++ = ((c >> 6) & 03) + '0'; + *b++ = ((c >> 3) & 07) + '0'; + *b++ = ((c >> 0) & 07) + '0'; + break; + default: + *b++ = '\\'; + *b++ = escape; + break; + } + } + *b++ = '"'; + *b = 0; + return buffer; + } + + return (char *) str; +} + +void +begin_output (void) +{ + char *names[2]; + char *name; + + if (outfile != 0) + return; + + names[0] = c_escape (current_name0); + names[1] = c_escape (current_name1); + + /* Construct the header of this piece of diff. */ + /* POSIX 1003.1-2001 specifies this format. But there are some bugs in + the standard: it says that we must print only the last component + of the pathnames, and it requires two spaces after "diff" if + there are no options. These requirements are silly and do not + match historical practice. */ + name = xasprintf ("diff%s %s %s", switch_string, names[0], names[1]); + + if (paginate) + { + char const *argv[4]; + + if (fflush (stdout) != 0) + pfatal_with_name (_("write failed")); + + argv[0] = pr_program; + argv[1] = "-h"; + argv[2] = name; + argv[3] = 0; + + /* Make OUTFILE a pipe to a subsidiary 'pr'. */ + { +#if HAVE_WORKING_FORK + int pipes[2]; + + if (pipe (pipes) != 0) + pfatal_with_name ("pipe"); + + pr_pid = fork (); + if (pr_pid < 0) + pfatal_with_name ("fork"); + + if (pr_pid == 0) + { + close (pipes[1]); + if (pipes[0] != STDIN_FILENO) + { + if (dup2 (pipes[0], STDIN_FILENO) < 0) + pfatal_with_name ("dup2"); + close (pipes[0]); + } + + execv (pr_program, (char **) argv); + _exit (errno == ENOENT ? 127 : 126); + } + else + { + close (pipes[0]); + outfile = fdopen (pipes[1], "w"); + if (!outfile) + pfatal_with_name ("fdopen"); + check_color_output (true); + } +#else + char *command = system_quote_argv (SCI_SYSTEM, (char **) argv); + errno = 0; + outfile = popen (command, "w"); + if (!outfile) + pfatal_with_name (command); + check_color_output (true); + free (command); +#endif + } + } + else + { + + /* If -l was not specified, output the diff straight to 'stdout'. */ + + outfile = stdout; + check_color_output (false); + + /* If handling multiple files (because scanning a directory), + print which files the following output is about. */ + if (currently_recursive) + printf ("%s\n", name); + } + + free (name); + + /* A special header is needed at the beginning of context output. */ + switch (output_style) + { + case OUTPUT_CONTEXT: + print_context_header (files, (char const *const *)names, false); + break; + + case OUTPUT_UNIFIED: + print_context_header (files, (char const *const *)names, true); + break; + + default: + break; + } + + if (names[0] != current_name0) + free (names[0]); + if (names[1] != current_name1) + free (names[1]); +} + +/* Call after the end of output of diffs for one file. + Close OUTFILE and get rid of the 'pr' subfork. */ + +void +finish_output (void) +{ + if (outfile != 0 && outfile != stdout) + { + int status; + int wstatus; + int werrno = 0; + if (ferror (outfile)) + fatal ("write failed"); +#if ! HAVE_WORKING_FORK + wstatus = pclose (outfile); + if (wstatus == -1) + werrno = errno; +#else + if (fclose (outfile) != 0) + pfatal_with_name (_("write failed")); + if (waitpid (pr_pid, &wstatus, 0) < 0) + pfatal_with_name ("waitpid"); +#endif + status = (! werrno && WIFEXITED (wstatus) + ? WEXITSTATUS (wstatus) + : INT_MAX); + if (status) + die (EXIT_TROUBLE, werrno, + _(status == 126 + ? "subsidiary program '%s' could not be invoked" + : status == 127 + ? "subsidiary program '%s' not found" + : status == INT_MAX + ? "subsidiary program '%s' failed" + : "subsidiary program '%s' failed (exit status %d)"), + pr_program, status); + } + + outfile = 0; +} + +/* Compare two lines (typically one from each input file) + according to the command line options. + For efficiency, this is invoked only when the lines do not match exactly + but an option like -i might cause us to ignore the difference. + Return nonzero if the lines differ. */ + +bool +lines_differ (char const *s1, char const *s2) +{ + register char const *t1 = s1; + register char const *t2 = s2; + size_t column = 0; + + while (1) + { + register unsigned char c1 = *t1++; + register unsigned char c2 = *t2++; + + /* Test for exact char equality first, since it's a common case. */ + if (c1 != c2) + { + switch (ignore_white_space) + { + case IGNORE_ALL_SPACE: + /* For -w, just skip past any white space. */ + while (isspace (c1) && c1 != '\n') c1 = *t1++; + while (isspace (c2) && c2 != '\n') c2 = *t2++; + break; + + case IGNORE_SPACE_CHANGE: + /* For -b, advance past any sequence of white space in + line 1 and consider it just one space, or nothing at + all if it is at the end of the line. */ + if (isspace (c1)) + { + while (c1 != '\n') + { + c1 = *t1++; + if (! isspace (c1)) + { + --t1; + c1 = ' '; + break; + } + } + } + + /* Likewise for line 2. */ + if (isspace (c2)) + { + while (c2 != '\n') + { + c2 = *t2++; + if (! isspace (c2)) + { + --t2; + c2 = ' '; + break; + } + } + } + + if (c1 != c2) + { + /* If we went too far when doing the simple test + for equality, go back to the first non-white-space + character in both sides and try again. */ + if (c2 == ' ' && c1 != '\n' + && s1 + 1 < t1 + && isspace ((unsigned char) t1[-2])) + { + --t1; + continue; + } + if (c1 == ' ' && c2 != '\n' + && s2 + 1 < t2 + && isspace ((unsigned char) t2[-2])) + { + --t2; + continue; + } + } + + break; + + case IGNORE_TRAILING_SPACE: + case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE: + if (isspace (c1) && isspace (c2)) + { + unsigned char c; + if (c1 != '\n') + { + char const *p = t1; + while ((c = *p) != '\n' && isspace (c)) + ++p; + if (c != '\n') + break; + } + if (c2 != '\n') + { + char const *p = t2; + while ((c = *p) != '\n' && isspace (c)) + ++p; + if (c != '\n') + break; + } + /* Both lines have nothing but whitespace left. */ + return false; + } + if (ignore_white_space == IGNORE_TRAILING_SPACE) + break; + FALLTHROUGH; + case IGNORE_TAB_EXPANSION: + if ((c1 == ' ' && c2 == '\t') + || (c1 == '\t' && c2 == ' ')) + { + size_t column2 = column; + for (;; c1 = *t1++) + { + if (c1 == ' ') + column++; + else if (c1 == '\t') + column += tabsize - column % tabsize; + else + break; + } + for (;; c2 = *t2++) + { + if (c2 == ' ') + column2++; + else if (c2 == '\t') + column2 += tabsize - column2 % tabsize; + else + break; + } + if (column != column2) + return true; + } + break; + + case IGNORE_NO_WHITE_SPACE: + break; + } + + /* Lowercase all letters if -i is specified. */ + + if (ignore_case) + { + c1 = tolower (c1); + c2 = tolower (c2); + } + + if (c1 != c2) + break; + } + if (c1 == '\n') + return false; + + column += c1 == '\t' ? tabsize - column % tabsize : 1; + } + + return true; +} + +/* Find the consecutive changes at the start of the script START. + Return the last link before the first gap. */ + +struct change * _GL_ATTRIBUTE_CONST +find_change (struct change *start) +{ + return start; +} + +struct change * _GL_ATTRIBUTE_CONST +find_reverse_change (struct change *start) +{ + return start; +} + +/* Divide SCRIPT into pieces by calling HUNKFUN and + print each piece with PRINTFUN. + Both functions take one arg, an edit script. + + HUNKFUN is called with the tail of the script + and returns the last link that belongs together with the start + of the tail. + + PRINTFUN takes a subscript which belongs together (with a null + link at the end) and prints it. */ + +void +print_script (struct change *script, + struct change * (*hunkfun) (struct change *), + void (*printfun) (struct change *)) +{ + struct change *next = script; + + while (next) + { + struct change *this, *end; + + /* Find a set of changes that belong together. */ + this = next; + end = (*hunkfun) (next); + + /* Disconnect them from the rest of the changes, + making them a hunk, and remember the rest for next iteration. */ + next = end->link; + end->link = 0; +#ifdef DEBUG + debug_script (this); +#endif + + /* Print this hunk. */ + (*printfun) (this); + + /* Reconnect the script so it will all be freed properly. */ + end->link = next; + } +} + +/* Print the text of a single line LINE, + flagging it with the characters in LINE_FLAG (which say whether + the line is inserted, deleted, changed, etc.). LINE_FLAG must not + end in a blank, unless it is a single blank. */ + +void +print_1_line (char const *line_flag, char const *const *line) +{ + print_1_line_nl (line_flag, line, false); +} + +/* Print the text of a single line LINE, + flagging it with the characters in LINE_FLAG (which say whether + the line is inserted, deleted, changed, etc.). LINE_FLAG must not + end in a blank, unless it is a single blank. If SKIP_NL is set, then + the final '\n' is not printed. */ + +void +print_1_line_nl (char const *line_flag, char const *const *line, bool skip_nl) +{ + char const *base = line[0], *limit = line[1]; /* Help the compiler. */ + FILE *out = outfile; /* Help the compiler some more. */ + char const *flag_format = 0; + + /* If -T was specified, use a Tab between the line-flag and the text. + Otherwise use a Space (as Unix diff does). + Print neither space nor tab if line-flags are empty. + But omit trailing blanks if requested. */ + + if (line_flag && *line_flag) + { + char const *flag_format_1 = flag_format = initial_tab ? "%s\t" : "%s "; + char const *line_flag_1 = line_flag; + + if (suppress_blank_empty && **line == '\n') + { + flag_format_1 = "%s"; + + /* This hack to omit trailing blanks takes advantage of the + fact that the only way that LINE_FLAG can end in a blank + is when LINE_FLAG consists of a single blank. */ + line_flag_1 += *line_flag_1 == ' '; + } + + fprintf (out, flag_format_1, line_flag_1); + } + + output_1_line (base, limit - (skip_nl && limit[-1] == '\n'), flag_format, line_flag); + + if ((!line_flag || line_flag[0]) && limit[-1] != '\n') + { + set_color_context (RESET_CONTEXT); + fprintf (out, "\n\\ %s\n", _("No newline at end of file")); + } +} + +/* Output a line from BASE up to LIMIT. + With -t, expand white space characters to spaces, and if FLAG_FORMAT + is nonzero, output it with argument LINE_FLAG after every + internal carriage return, so that tab stops continue to line up. */ + +void +output_1_line (char const *base, char const *limit, char const *flag_format, + char const *line_flag) +{ + const size_t MAX_CHUNK = 1024; + if (!expand_tabs) + { + size_t left = limit - base; + while (left) + { + size_t to_write = MIN (left, MAX_CHUNK); + size_t written = fwrite (base, sizeof (char), to_write, outfile); + if (written < to_write) + return; + base += written; + left -= written; + process_signals (); + } + } + else + { + register FILE *out = outfile; + register unsigned char c; + register char const *t = base; + register size_t column = 0; + size_t tab_size = tabsize; + size_t counter_proc_signals = 0; + + while (t < limit) + { + counter_proc_signals++; + if (counter_proc_signals == MAX_CHUNK) + { + process_signals (); + counter_proc_signals = 0; + } + + switch ((c = *t++)) + { + case '\t': + { + size_t spaces = tab_size - column % tab_size; + column += spaces; + do + putc (' ', out); + while (--spaces); + } + break; + + case '\r': + putc (c, out); + if (flag_format && t < limit && *t != '\n') + fprintf (out, flag_format, line_flag); + column = 0; + break; + + case '\b': + if (column == 0) + continue; + column--; + putc (c, out); + break; + + default: + column += isprint (c) != 0; + putc (c, out); + break; + } + } + } +} + +enum indicator_no + { + C_LEFT, C_RIGHT, C_END, C_RESET, C_HEADER, C_ADD, C_DELETE, C_LINE + }; + +static void +put_indicator (const struct bin_str *ind) +{ + fwrite (ind->string, ind->len, 1, outfile); +} + +static enum color_context last_context = RESET_CONTEXT; + +void +set_color_context (enum color_context color_context) +{ + if (color_context != RESET_CONTEXT) + process_signals (); + if (colors_enabled && last_context != color_context) + { + put_indicator (&color_indicator[C_LEFT]); + switch (color_context) + { + case HEADER_CONTEXT: + put_indicator (&color_indicator[C_HEADER]); + break; + + case LINE_NUMBER_CONTEXT: + put_indicator (&color_indicator[C_LINE]); + break; + + case ADD_CONTEXT: + put_indicator (&color_indicator[C_ADD]); + break; + + case DELETE_CONTEXT: + put_indicator (&color_indicator[C_DELETE]); + break; + + case RESET_CONTEXT: + put_indicator (&color_indicator[C_RESET]); + break; + + default: + abort (); + } + put_indicator (&color_indicator[C_RIGHT]); + last_context = color_context; + } +} + + +char const change_letter[] = { 0, 'd', 'a', 'c' }; + +/* Translate an internal line number (an index into diff's table of lines) + into an actual line number in the input file. + The internal line number is I. FILE points to the data on the file. + + Internal line numbers count from 0 starting after the prefix. + Actual line numbers count from 1 within the entire file. */ + +lin _GL_ATTRIBUTE_PURE +translate_line_number (struct file_data const *file, lin i) +{ + return i + file->prefix_lines + 1; +} + +/* Translate a line number range. This is always done for printing, + so for convenience translate to printint rather than lin, so that the + caller can use printf with "%"pI"d" without casting. */ + +void +translate_range (struct file_data const *file, + lin a, lin b, + printint *aptr, printint *bptr) +{ + *aptr = translate_line_number (file, a - 1) + 1; + *bptr = translate_line_number (file, b + 1) - 1; +} + +/* Print a pair of line numbers with SEPCHAR, translated for file FILE. + If the two numbers are identical, print just one number. + + Args A and B are internal line numbers. + We print the translated (real) line numbers. */ + +void +print_number_range (char sepchar, struct file_data *file, lin a, lin b) +{ + printint trans_a, trans_b; + translate_range (file, a, b, &trans_a, &trans_b); + + /* Note: we can have B < A in the case of a range of no lines. + In this case, we should print the line number before the range, + which is B. */ + if (trans_b > trans_a) + fprintf (outfile, "%"pI"d%c%"pI"d", trans_a, sepchar, trans_b); + else + fprintf (outfile, "%"pI"d", trans_b); +} + +/* Look at a hunk of edit script and report the range of lines in each file + that it applies to. HUNK is the start of the hunk, which is a chain + of 'struct change'. The first and last line numbers of file 0 are stored in + *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1. + Note that these are internal line numbers that count from 0. + + If no lines from file 0 are deleted, then FIRST0 is LAST0+1. + + Return UNCHANGED if only ignorable lines are inserted or deleted, + OLD if lines of file 0 are deleted, + NEW if lines of file 1 are inserted, + and CHANGED if both kinds of changes are found. */ + +enum changes +analyze_hunk (struct change *hunk, + lin *first0, lin *last0, + lin *first1, lin *last1) +{ + struct change *next; + lin l0, l1; + lin show_from, show_to; + lin i; + bool trivial = ignore_blank_lines || ignore_regexp.fastmap; + size_t trivial_length = ignore_blank_lines - 1; + /* If 0, ignore zero-length lines; + if SIZE_MAX, do not ignore lines just because of their length. */ + + bool skip_white_space = + ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space; + bool skip_leading_white_space = + skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space; + + char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */ + char const * const *linbuf1 = files[1].linbuf; + + show_from = show_to = 0; + + *first0 = hunk->line0; + *first1 = hunk->line1; + + next = hunk; + do + { + l0 = next->line0 + next->deleted - 1; + l1 = next->line1 + next->inserted - 1; + show_from += next->deleted; + show_to += next->inserted; + + for (i = next->line0; i <= l0 && trivial; i++) + { + char const *line = linbuf0[i]; + char const *lastbyte = linbuf0[i + 1] - 1; + char const *newline = lastbyte + (*lastbyte != '\n'); + size_t len = newline - line; + char const *p = line; + if (skip_white_space) + for (; *p != '\n'; p++) + if (! isspace ((unsigned char) *p)) + { + if (! skip_leading_white_space) + p = line; + break; + } + if (newline - p != trivial_length + && (! ignore_regexp.fastmap + || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) + trivial = 0; + } + + for (i = next->line1; i <= l1 && trivial; i++) + { + char const *line = linbuf1[i]; + char const *lastbyte = linbuf1[i + 1] - 1; + char const *newline = lastbyte + (*lastbyte != '\n'); + size_t len = newline - line; + char const *p = line; + if (skip_white_space) + for (; *p != '\n'; p++) + if (! isspace ((unsigned char) *p)) + { + if (! skip_leading_white_space) + p = line; + break; + } + if (newline - p != trivial_length + && (! ignore_regexp.fastmap + || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) + trivial = 0; + } + } + while ((next = next->link) != 0); + + *last0 = l0; + *last1 = l1; + + /* If all inserted or deleted lines are ignorable, + tell the caller to ignore this hunk. */ + + if (trivial) + return UNCHANGED; + + return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED); +} + +/* Concatenate three strings, returning a newly malloc'd string. */ + +char * +concat (char const *s1, char const *s2, char const *s3) +{ + char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1); + sprintf (new, "%s%s%s", s1, s2, s3); + return new; +} + +/* Yield a new block of SIZE bytes, initialized to zero. */ + +void * +zalloc (size_t size) +{ + void *p = xmalloc (size); + memset (p, 0, size); + return p; +} + +void +debug_script (struct change *sp) +{ + fflush (stdout); + + for (; sp; sp = sp->link) + { + printint line0 = sp->line0; + printint line1 = sp->line1; + printint deleted = sp->deleted; + printint inserted = sp->inserted; + fprintf (stderr, "%3"pI"d %3"pI"d delete %"pI"d insert %"pI"d\n", + line0, line1, deleted, inserted); + } + + fflush (stderr); +}