/* diff - compare files line by line Copyright (C) 1988-1989, 1992-1994, 1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2013, 2015-2017 Free Software Foundation, Inc. This file is part of GNU DIFF. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define GDIFF_MAIN #include "diff.h" #include "die.h" #include #include "paths.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "diff" #define AUTHORS \ proper_name ("Paul Eggert"), \ proper_name ("Mike Haertel"), \ proper_name ("David Hayes"), \ proper_name ("Richard Stallman"), \ proper_name ("Len Tower") #ifndef GUTTER_WIDTH_MINIMUM # define GUTTER_WIDTH_MINIMUM 3 #endif struct regexp_list { char *regexps; /* chars representing disjunction of the regexps */ size_t len; /* chars used in 'regexps' */ size_t size; /* size malloc'ed for 'regexps'; 0 if not malloc'ed */ bool multiple_regexps;/* Does 'regexps' represent a disjunction? */ struct re_pattern_buffer *buf; }; static int compare_files (struct comparison const *, char const *, char const *); static void add_regexp (struct regexp_list *, char const *); static void summarize_regexp_list (struct regexp_list *); static void specify_style (enum output_style); static void specify_value (char const **, char const *, char const *); static void specify_colors_style (char const *); static void try_help (char const *, char const *) __attribute__((noreturn)); static void check_stdout (void); static void usage (void); bool (*lines_differ) (char const *, size_t, char const *, size_t); /* If comparing directories, compare their common subdirectories recursively. */ static bool recursive; /* In context diffs, show previous lines that match these regexps. */ static struct regexp_list function_regexp_list; /* Ignore changes affecting only lines that match these regexps. */ static struct regexp_list ignore_regexp_list; #if O_BINARY /* Use binary I/O when reading and writing data (--binary). On POSIX hosts, this has no effect. */ static bool binary; #else enum { binary = true }; #endif /* If one file is missing, treat it as present but empty (-N). */ static bool new_file; /* If the first file is missing, treat it as present but empty (--unidirectional-new-file). */ static bool unidirectional_new_file; /* Report files compared that are the same (-s). Normally nothing is output when that happens. */ static bool report_identical_files; static char const shortopts[] = "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ"; /* Values for long options that do not have single-letter equivalents. */ enum { BINARY_OPTION = CHAR_MAX + 1, FROM_FILE_OPTION, HELP_OPTION, HORIZON_LINES_OPTION, IGNORE_FILE_NAME_CASE_OPTION, INHIBIT_HUNK_MERGE_OPTION, LEFT_COLUMN_OPTION, LINE_FORMAT_OPTION, NO_DEREFERENCE_OPTION, NO_IGNORE_FILE_NAME_CASE_OPTION, NORMAL_OPTION, SDIFF_MERGE_ASSIST_OPTION, STRIP_TRAILING_CR_OPTION, SUPPRESS_BLANK_EMPTY_OPTION, SUPPRESS_COMMON_LINES_OPTION, TABSIZE_OPTION, TO_FILE_OPTION, /* These options must be in sequence. */ UNCHANGED_LINE_FORMAT_OPTION, OLD_LINE_FORMAT_OPTION, NEW_LINE_FORMAT_OPTION, /* These options must be in sequence. */ UNCHANGED_GROUP_FORMAT_OPTION, OLD_GROUP_FORMAT_OPTION, NEW_GROUP_FORMAT_OPTION, CHANGED_GROUP_FORMAT_OPTION, COLOR_OPTION, COLOR_PALETTE_OPTION, PRESUME_OUTPUT_TTY_OPTION, }; static char const group_format_option[][sizeof "--unchanged-group-format"] = { "--unchanged-group-format", "--old-group-format", "--new-group-format", "--changed-group-format" }; static char const line_format_option[][sizeof "--unchanged-line-format"] = { "--unchanged-line-format", "--old-line-format", "--new-line-format" }; static struct option const longopts[] = { {"binary", 0, 0, BINARY_OPTION}, {"brief", 0, 0, 'q'}, {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, {"color", 2, 0, COLOR_OPTION}, {"context", 2, 0, 'C'}, {"ed", 0, 0, 'e'}, {"exclude", 1, 0, 'x'}, {"exclude-from", 1, 0, 'X'}, {"expand-tabs", 0, 0, 't'}, {"forward-ed", 0, 0, 'f'}, {"from-file", 1, 0, FROM_FILE_OPTION}, {"help", 0, 0, HELP_OPTION}, {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, {"ifdef", 1, 0, 'D'}, {"ignore-all-space", 0, 0, 'w'}, {"ignore-blank-lines", 0, 0, 'B'}, {"ignore-case", 0, 0, 'i'}, {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, {"ignore-matching-lines", 1, 0, 'I'}, {"ignore-space-change", 0, 0, 'b'}, {"ignore-tab-expansion", 0, 0, 'E'}, {"ignore-trailing-space", 0, 0, 'Z'}, {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, {"initial-tab", 0, 0, 'T'}, {"label", 1, 0, 'L'}, {"left-column", 0, 0, LEFT_COLUMN_OPTION}, {"line-format", 1, 0, LINE_FORMAT_OPTION}, {"minimal", 0, 0, 'd'}, {"new-file", 0, 0, 'N'}, {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, {"no-dereference", 0, 0, NO_DEREFERENCE_OPTION}, {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, {"normal", 0, 0, NORMAL_OPTION}, {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, {"paginate", 0, 0, 'l'}, {"palette", 1, 0, COLOR_PALETTE_OPTION}, {"rcs", 0, 0, 'n'}, {"recursive", 0, 0, 'r'}, {"report-identical-files", 0, 0, 's'}, {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, {"show-c-function", 0, 0, 'p'}, {"show-function-line", 1, 0, 'F'}, {"side-by-side", 0, 0, 'y'}, {"speed-large-files", 0, 0, 'H'}, {"starting-file", 1, 0, 'S'}, {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION}, {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, {"tabsize", 1, 0, TABSIZE_OPTION}, {"text", 0, 0, 'a'}, {"to-file", 1, 0, TO_FILE_OPTION}, {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, {"unidirectional-new-file", 0, 0, 'P'}, {"unified", 2, 0, 'U'}, {"version", 0, 0, 'v'}, {"width", 1, 0, 'W'}, /* This is solely for testing. Do not document. */ {"-presume-output-tty", no_argument, NULL, PRESUME_OUTPUT_TTY_OPTION}, {0, 0, 0, 0} }; /* Return a string containing the command options with which diff was invoked. Spaces appear between what were separate ARGV-elements. There is a space at the beginning but none at the end. If there were no options, the result is an empty string. Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, the length of that vector. */ static char * option_list (char **optionvec, int count) { int i; size_t size = 1; char *result; char *p; for (i = 0; i < count; i++) size += 1 + shell_quote_length (optionvec[i]); p = result = xmalloc (size); for (i = 0; i < count; i++) { *p++ = ' '; p = shell_quote_copy (p, optionvec[i]); } *p = '\0'; return result; } /* Return an option value suitable for add_exclude. */ static int exclude_options (void) { return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); } int main (int argc, char **argv) { int exit_status = EXIT_SUCCESS; int c; int i; int prev = -1; lin ocontext = -1; bool explicit_context = false; size_t width = 0; bool show_c_function = false; char const *from_file = NULL; char const *to_file = NULL; uintmax_t numval; char *numend; /* Do our initializations. */ exit_failure = EXIT_TROUBLE; initialize_main (&argc, &argv); set_program_name (argv[0]); setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); c_stack_action (0); function_regexp_list.buf = &function_regexp; ignore_regexp_list.buf = &ignore_regexp; re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); excluded = new_exclude (); presume_output_tty = false; #ifdef HANDLE_MULTIBYTE if (MB_CUR_MAX > 1) lines_differ = lines_differ_multibyte; else #endif lines_differ = lines_differ_singlebyte; /* Decode the options. */ while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) { switch (c) { case 0: break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ocontext = (! ISDIGIT (prev) ? c - '0' : (ocontext - (c - '0' <= CONTEXT_MAX % 10) < CONTEXT_MAX / 10) ? 10 * ocontext + (c - '0') : CONTEXT_MAX); break; case 'a': text = true; break; case 'b': if (ignore_white_space < IGNORE_SPACE_CHANGE) ignore_white_space = IGNORE_SPACE_CHANGE; break; case 'Z': if (ignore_white_space < IGNORE_SPACE_CHANGE) ignore_white_space |= IGNORE_TRAILING_SPACE; break; case 'B': ignore_blank_lines = true; break; case 'C': case 'U': { if (optarg) { numval = strtoumax (optarg, &numend, 10); if (*numend) try_help ("invalid context length '%s'", optarg); if (CONTEXT_MAX < numval) numval = CONTEXT_MAX; } else numval = 3; specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); if (context < numval) context = numval; explicit_context = true; } break; case 'c': specify_style (OUTPUT_CONTEXT); if (context < 3) context = 3; break; case 'd': minimal = true; break; case 'D': specify_style (OUTPUT_IFDEF); { static char const C_ifdef_group_formats[] = "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; char *b = xmalloc (sizeof C_ifdef_group_formats + 7 * strlen (optarg) - 14 /* 7*"%s" */ - 8 /* 5*"%%" + 3*"%c" */); sprintf (b, C_ifdef_group_formats, 0, optarg, optarg, 0, optarg, optarg, 0, optarg, optarg, optarg); for (i = 0; i < sizeof group_format / sizeof group_format[0]; i++) { specify_value (&group_format[i], b, "-D"); b += strlen (b) + 1; } } break; case 'e': specify_style (OUTPUT_ED); break; case 'E': if (ignore_white_space < IGNORE_SPACE_CHANGE) ignore_white_space |= IGNORE_TAB_EXPANSION; break; case 'f': specify_style (OUTPUT_FORWARD_ED); break; case 'F': add_regexp (&function_regexp_list, optarg); break; case 'h': /* Split the files into chunks for faster processing. Usually does not change the result. This currently has no effect. */ break; case 'H': speed_large_files = true; break; case 'i': ignore_case = true; break; case 'I': add_regexp (&ignore_regexp_list, optarg); break; case 'l': if (!pr_program[0]) try_help ("pagination not supported on this host", NULL); paginate = true; #ifdef SIGCHLD /* Pagination requires forking and waiting, and System V fork+wait does not work if SIGCHLD is ignored. */ signal (SIGCHLD, SIG_DFL); #endif break; case 'L': if (!file_label[0]) file_label[0] = optarg; else if (!file_label[1]) file_label[1] = optarg; else fatal ("too many file label options"); break; case 'n': specify_style (OUTPUT_RCS); break; case 'N': new_file = true; break; case 'p': show_c_function = true; add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); break; case 'P': unidirectional_new_file = true; break; case 'q': brief = true; break; case 'r': recursive = true; break; case 's': report_identical_files = true; break; case 'S': specify_value (&starting_file, optarg, "-S"); break; case 't': expand_tabs = true; break; case 'T': initial_tab = true; break; case 'u': specify_style (OUTPUT_UNIFIED); if (context < 3) context = 3; break; case 'v': version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS, (char *) NULL); check_stdout (); return EXIT_SUCCESS; case 'w': ignore_white_space = IGNORE_ALL_SPACE; break; case 'x': add_exclude (excluded, optarg, exclude_options ()); break; case 'X': if (add_exclude_file (add_exclude, excluded, optarg, exclude_options (), '\n')) pfatal_with_name (optarg); break; case 'y': specify_style (OUTPUT_SDIFF); break; case 'W': numval = strtoumax (optarg, &numend, 10); if (! (0 < numval && numval <= SIZE_MAX) || *numend) try_help ("invalid width '%s'", optarg); if (width != numval) { if (width) fatal ("conflicting width options"); width = numval; } break; case BINARY_OPTION: #if O_BINARY binary = true; if (! isatty (STDOUT_FILENO)) set_binary_mode (STDOUT_FILENO, O_BINARY); #endif break; case FROM_FILE_OPTION: specify_value (&from_file, optarg, "--from-file"); break; case HELP_OPTION: usage (); check_stdout (); return EXIT_SUCCESS; case HORIZON_LINES_OPTION: numval = strtoumax (optarg, &numend, 10); if (*numend) try_help ("invalid horizon length '%s'", optarg); horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); break; case IGNORE_FILE_NAME_CASE_OPTION: ignore_file_name_case = true; break; case INHIBIT_HUNK_MERGE_OPTION: /* This option is obsolete, but accept it for backward compatibility. */ break; case LEFT_COLUMN_OPTION: left_column = true; break; case LINE_FORMAT_OPTION: specify_style (OUTPUT_IFDEF); for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++) specify_value (&line_format[i], optarg, "--line-format"); break; case NO_DEREFERENCE_OPTION: no_dereference_symlinks = true; break; case NO_IGNORE_FILE_NAME_CASE_OPTION: ignore_file_name_case = false; break; case NORMAL_OPTION: specify_style (OUTPUT_NORMAL); break; case SDIFF_MERGE_ASSIST_OPTION: specify_style (OUTPUT_SDIFF); sdiff_merge_assist = true; break; case STRIP_TRAILING_CR_OPTION: strip_trailing_cr = true; break; case SUPPRESS_BLANK_EMPTY_OPTION: suppress_blank_empty = true; break; case SUPPRESS_COMMON_LINES_OPTION: suppress_common_lines = true; break; case TABSIZE_OPTION: numval = strtoumax (optarg, &numend, 10); if (! (0 < numval && numval <= SIZE_MAX - GUTTER_WIDTH_MINIMUM) || *numend) try_help ("invalid tabsize '%s'", optarg); if (tabsize != numval) { if (tabsize) fatal ("conflicting tabsize options"); tabsize = numval; } break; case TO_FILE_OPTION: specify_value (&to_file, optarg, "--to-file"); break; case UNCHANGED_LINE_FORMAT_OPTION: case OLD_LINE_FORMAT_OPTION: case NEW_LINE_FORMAT_OPTION: specify_style (OUTPUT_IFDEF); c -= UNCHANGED_LINE_FORMAT_OPTION; specify_value (&line_format[c], optarg, line_format_option[c]); break; case UNCHANGED_GROUP_FORMAT_OPTION: case OLD_GROUP_FORMAT_OPTION: case NEW_GROUP_FORMAT_OPTION: case CHANGED_GROUP_FORMAT_OPTION: specify_style (OUTPUT_IFDEF); c -= UNCHANGED_GROUP_FORMAT_OPTION; specify_value (&group_format[c], optarg, group_format_option[c]); break; case COLOR_OPTION: specify_colors_style (optarg); break; case COLOR_PALETTE_OPTION: set_color_palette (optarg); break; case PRESUME_OUTPUT_TTY_OPTION: presume_output_tty = true; break; default: try_help (NULL, NULL); } prev = c; } if (colors_style == AUTO) { char const *t = getenv ("TERM"); if (t && STREQ (t, "dumb")) colors_style = NEVER; } if (output_style == OUTPUT_UNSPECIFIED) { if (show_c_function) { specify_style (OUTPUT_CONTEXT); if (ocontext < 0) context = 3; } else specify_style (OUTPUT_NORMAL); } if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) { #if (defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS \ || defined HAVE_STRUCT_STAT_ST_SPARE1) time_format = "%Y-%m-%d %H:%M:%S.%N %z"; #else time_format = "%Y-%m-%d %H:%M:%S %z"; #endif } else { /* See POSIX 1003.1-2001 for this format. */ time_format = "%a %b %e %T %Y"; } if (0 <= ocontext && (output_style == OUTPUT_CONTEXT || output_style == OUTPUT_UNIFIED) && (context < ocontext || (ocontext < context && ! explicit_context))) context = ocontext; if (! tabsize) tabsize = 8; if (! width) width = 130; { /* Maximize first the half line width, and then the gutter width, according to the following constraints: 1. Two half lines plus a gutter must fit in a line. 2. If the half line width is nonzero: a. The gutter width is at least GUTTER_WIDTH_MINIMUM. b. If tabs are not expanded to spaces, a half line plus a gutter is an integral number of tabs, so that tabs in the right column line up. */ size_t t = expand_tabs ? 1 : tabsize; size_t w = width; size_t t_plus_g = t + GUTTER_WIDTH_MINIMUM; size_t unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1); size_t off = unaligned_off - unaligned_off % t; sdiff_half_width = (off <= GUTTER_WIDTH_MINIMUM || w <= off ? 0 : MIN (off - GUTTER_WIDTH_MINIMUM, w - off)); sdiff_column2_offset = sdiff_half_width ? off : w; } /* Make the horizon at least as large as the context, so that shift_boundaries has more freedom to shift the first and last hunks. */ if (horizon_lines < context) horizon_lines = context; summarize_regexp_list (&function_regexp_list); summarize_regexp_list (&ignore_regexp_list); if (output_style == OUTPUT_IFDEF) { for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++) if (!line_format[i]) line_format[i] = "%l\n"; if (!group_format[OLD]) group_format[OLD] = group_format[CHANGED] ? group_format[CHANGED] : "%<"; if (!group_format[NEW]) group_format[NEW] = group_format[CHANGED] ? group_format[CHANGED] : "%>"; if (!group_format[UNCHANGED]) group_format[UNCHANGED] = "%="; if (!group_format[CHANGED]) group_format[CHANGED] = concat (group_format[OLD], group_format[NEW], ""); } no_diff_means_no_output = (output_style == OUTPUT_IFDEF ? (!*group_format[UNCHANGED] || (STREQ (group_format[UNCHANGED], "%=") && !*line_format[UNCHANGED])) : (output_style != OUTPUT_SDIFF) | suppress_common_lines); files_can_be_treated_as_binary = (brief & binary & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr | (ignore_regexp_list.regexps || ignore_white_space))); switch_string = option_list (argv + 1, optind - 1); if (from_file) { if (to_file) fatal ("--from-file and --to-file both specified"); else for (; optind < argc; optind++) { int status = compare_files (NULL, from_file, argv[optind]); if (exit_status < status) exit_status = status; } } else { if (to_file) for (; optind < argc; optind++) { int status = compare_files (NULL, argv[optind], to_file); if (exit_status < status) exit_status = status; } else { if (argc - optind != 2) { if (argc - optind < 2) try_help ("missing operand after '%s'", argv[argc - 1]); else try_help ("extra operand '%s'", argv[optind + 2]); } exit_status = compare_files (NULL, argv[optind], argv[optind + 1]); } } /* Print any messages that were saved up for last. */ print_message_queue (); check_stdout (); exit (exit_status); return exit_status; } /* Append to REGLIST the regexp PATTERN. */ static void add_regexp (struct regexp_list *reglist, char const *pattern) { size_t patlen = strlen (pattern); char const *m = re_compile_pattern (pattern, patlen, reglist->buf); if (m != 0) error (EXIT_TROUBLE, 0, "%s: %s", pattern, m); else { char *regexps = reglist->regexps; size_t len = reglist->len; bool multiple_regexps = reglist->multiple_regexps = regexps != 0; size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; size_t size = reglist->size; if (size <= newlen) { if (!size) size = 1; do size *= 2; while (size <= newlen); reglist->size = size; reglist->regexps = regexps = xrealloc (regexps, size); } if (multiple_regexps) { regexps[len++] = '\\'; regexps[len++] = '|'; } memcpy (regexps + len, pattern, patlen + 1); } } /* Ensure that REGLIST represents the disjunction of its regexps. This is done here, rather than earlier, to avoid O(N^2) behavior. */ static void summarize_regexp_list (struct regexp_list *reglist) { if (reglist->regexps) { /* At least one regexp was specified. Allocate a fastmap for it. */ reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); if (reglist->multiple_regexps) { /* Compile the disjunction of the regexps. (If just one regexp was specified, it is already compiled.) */ char const *m = re_compile_pattern (reglist->regexps, reglist->len, reglist->buf); if (m) die (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); } } } static void try_help (char const *reason_msgid, char const *operand) { if (reason_msgid) error (0, 0, _(reason_msgid), operand); die (EXIT_TROUBLE, 0, _("Try '%s --help' for more information."), program_name); } static void check_stdout (void) { if (ferror (stdout)) fatal ("write failed"); else if (fclose (stdout) != 0) pfatal_with_name (_("standard output")); } static char const * const option_help_msgid[] = { N_(" --normal output a normal diff (the default)"), N_("-q, --brief report only when files differ"), N_("-s, --report-identical-files report when two files are the same"), N_("-c, -C NUM, --context[=NUM] output NUM (default 3) lines of copied context"), N_("-u, -U NUM, --unified[=NUM] output NUM (default 3) lines of unified context"), N_("-e, --ed output an ed script"), N_("-n, --rcs output an RCS format diff"), N_("-y, --side-by-side output in two columns"), N_("-W, --width=NUM output at most NUM (default 130) print columns"), N_(" --left-column output only the left column of common lines"), N_(" --suppress-common-lines do not output common lines"), "", N_("-p, --show-c-function show which C function each change is in"), N_("-F, --show-function-line=RE show the most recent line matching RE"), N_(" --label LABEL use LABEL instead of file name and timestamp\n" " (can be repeated)"), "", N_("-t, --expand-tabs expand tabs to spaces in output"), N_("-T, --initial-tab make tabs line up by prepending a tab"), N_(" --tabsize=NUM tab stops every NUM (default 8) print columns"), N_(" --suppress-blank-empty suppress space or tab before empty output lines"), N_("-l, --paginate pass output through 'pr' to paginate it"), "", N_("-r, --recursive recursively compare any subdirectories found"), N_(" --no-dereference don't follow symbolic links"), N_("-N, --new-file treat absent files as empty"), N_(" --unidirectional-new-file treat absent first files as empty"), N_(" --ignore-file-name-case ignore case when comparing file names"), N_(" --no-ignore-file-name-case consider case when comparing file names"), N_("-x, --exclude=PAT exclude files that match PAT"), N_("-X, --exclude-from=FILE exclude files that match any pattern in FILE"), N_("-S, --starting-file=FILE start with FILE when comparing directories"), N_(" --from-file=FILE1 compare FILE1 to all operands;\n" " FILE1 can be a directory"), N_(" --to-file=FILE2 compare all operands to FILE2;\n" " FILE2 can be a directory"), "", N_("-i, --ignore-case ignore case differences in file contents"), N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"), N_("-Z, --ignore-trailing-space ignore white space at line end"), N_("-b, --ignore-space-change ignore changes in the amount of white space"), N_("-w, --ignore-all-space ignore all white space"), N_("-B, --ignore-blank-lines ignore changes where lines are all blank"), N_("-I, --ignore-matching-lines=RE ignore changes where all lines match RE"), "", N_("-a, --text treat all files as text"), N_(" --strip-trailing-cr strip trailing carriage return on input"), #if O_BINARY N_(" --binary read and write data in binary mode"), #endif "", N_("-D, --ifdef=NAME output merged file with '#ifdef NAME' diffs"), N_(" --GTYPE-group-format=GFMT format GTYPE input groups with GFMT"), N_(" --line-format=LFMT format all input lines with LFMT"), N_(" --LTYPE-line-format=LFMT format LTYPE input lines with LFMT"), N_(" These format options provide fine-grained control over the output\n" " of diff, generalizing -D/--ifdef."), N_(" LTYPE is 'old', 'new', or 'unchanged'. GTYPE is LTYPE or 'changed'."), N_(" GFMT (only) may contain:\n\ %< lines from FILE1\n\ %> lines from FILE2\n\ %= lines common to FILE1 and FILE2\n\ %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ LETTERs are as follows for new group, lower case for old group:\n\ F first line number\n\ L last line number\n\ N number of lines = L-F+1\n\ E F-1\n\ M L+1\n\ %(A=B?T:E) if A equals B then T else E"), N_(" LFMT (only) may contain:\n\ %L contents of line\n\ %l contents of line, excluding any trailing newline\n\ %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), N_(" Both GFMT and LFMT may contain:\n\ %% %\n\ %c'C' the single character C\n\ %c'\\OOO' the character with octal code OOO\n\ C the character C (other characters represent themselves)"), "", N_("-d, --minimal try hard to find a smaller set of changes"), N_(" --horizon-lines=NUM keep NUM lines of the common prefix and suffix"), N_(" --speed-large-files assume large files and many scattered small changes"), N_(" --color[=WHEN] colorize the output; WHEN can be 'never', 'always',\n" " or 'auto' (the default)"), N_(" --palette=PALETTE the colors to use when --color is active; PALETTE is\n" " a colon-separated list of terminfo capabilities"), "", N_(" --help display this help and exit"), N_("-v, --version output version information and exit"), "", N_("FILES are 'FILE1 FILE2' or 'DIR1 DIR2' or 'DIR FILE' or 'FILE DIR'."), N_("If --from-file or --to-file is given, there are no restrictions on FILE(s)."), N_("If a FILE is '-', read standard input."), N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."), 0 }; static void usage (void) { char const * const *p; printf (_("Usage: %s [OPTION]... FILES\n"), program_name); printf ("%s\n\n", _("Compare FILES line by line.")); fputs (_("\ Mandatory arguments to long options are mandatory for short options too.\n\ "), stdout); for (p = option_help_msgid; *p; p++) { if (!**p) putchar ('\n'); else { char const *msg = _(*p); char const *nl; while ((nl = strchr (msg, '\n'))) { int msglen = nl + 1 - msg; printf (" %.*s", msglen, msg); msg = nl + 1; } printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); } } emit_bug_reporting_address (); } /* Set VAR to VALUE, reporting an OPTION error if this is a conflict. */ static void specify_value (char const **var, char const *value, char const *option) { if (*var && ! STREQ (*var, value)) { error (0, 0, _("conflicting %s option value '%s'"), option, value); try_help (NULL, NULL); } *var = value; } /* Set the output style to STYLE, diagnosing conflicts. */ static void specify_style (enum output_style style) { if (output_style != style) { if (output_style != OUTPUT_UNSPECIFIED) try_help ("conflicting output style options", NULL); output_style = style; } } /* Set the color mode. */ static void specify_colors_style (char const *value) { if (value == NULL || STREQ (value, "auto")) colors_style = AUTO; else if (STREQ (value, "always")) colors_style = ALWAYS; else if (STREQ (value, "never")) colors_style = NEVER; else try_help ("invalid color '%s'", value); } /* Set the last-modified time of *ST to be the current time. */ static void set_mtime_to_now (struct stat *st) { #ifdef STAT_TIMESPEC gettime (&STAT_TIMESPEC (st, st_mtim)); #else struct timespec t; gettime (&t); st->st_mtime = t.tv_sec; # if defined STAT_TIMESPEC_NS STAT_TIMESPEC_NS (st, st_mtim) = t.tv_nsec; # elif defined HAVE_STRUCT_STAT_ST_SPARE1 st->st_spare1 = t.tv_nsec / 1000; # endif #endif } /* Compare two files (or dirs) with parent comparison PARENT and names NAME0 and NAME1. (If PARENT is null, then the first name is just NAME0, etc.) This is self-contained; it opens the files and closes them. Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if different, EXIT_TROUBLE if there is a problem opening them. */ static int compare_files (struct comparison const *parent, char const *name0, char const *name1) { struct comparison cmp; #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) register int f; int status = EXIT_SUCCESS; bool same_files; char *free0; char *free1; /* If this is directory comparison, perhaps we have a file that exists only in one of the directories. If so, just print a message to that effect. */ if (! ((name0 && name1) || (unidirectional_new_file && name1) || new_file)) { char const *name = name0 ? name0 : name1; char const *dir = parent->file[!name0].name; /* See POSIX 1003.1-2001 for this format. */ message ("Only in %s: %s\n", dir, name); /* Return EXIT_FAILURE so that diff_dirs will return EXIT_FAILURE ("some files differ"). */ return EXIT_FAILURE; } memset (cmp.file, 0, sizeof cmp.file); cmp.parent = parent; /* cmp.file[f].desc markers */ #define NONEXISTENT (-1) /* nonexistent file */ #define UNOPENED (-2) /* unopened file (e.g. directory) */ #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT; cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT; /* Now record the full name of each file, including nonexistent ones. */ if (!name0) name0 = name1; if (!name1) name1 = name0; if (!parent) { free0 = NULL; free1 = NULL; cmp.file[0].name = name0; cmp.file[1].name = name1; } else { cmp.file[0].name = free0 = file_name_concat (parent->file[0].name, name0, NULL); cmp.file[1].name = free1 = file_name_concat (parent->file[1].name, name1, NULL); } /* Stat the files. */ for (f = 0; f < 2; f++) { if (cmp.file[f].desc != NONEXISTENT) { if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) { cmp.file[f].desc = cmp.file[0].desc; cmp.file[f].stat = cmp.file[0].stat; } else if (STREQ (cmp.file[f].name, "-")) { cmp.file[f].desc = STDIN_FILENO; if (binary && ! isatty (STDIN_FILENO)) set_binary_mode (STDIN_FILENO, O_BINARY); if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) cmp.file[f].desc = ERRNO_ENCODE (errno); else { if (S_ISREG (cmp.file[f].stat.st_mode)) { off_t pos = lseek (STDIN_FILENO, 0, SEEK_CUR); if (pos < 0) cmp.file[f].desc = ERRNO_ENCODE (errno); else cmp.file[f].stat.st_size = MAX (0, cmp.file[f].stat.st_size - pos); } /* POSIX 1003.1-2001 requires current time for stdin. */ set_mtime_to_now (&cmp.file[f].stat); } } else if ((no_dereference_symlinks ? lstat (cmp.file[f].name, &cmp.file[f].stat) : stat (cmp.file[f].name, &cmp.file[f].stat)) != 0) cmp.file[f].desc = ERRNO_ENCODE (errno); } } /* Mark files as nonexistent as needed for -N and -P, if they are inaccessible empty regular files (the kind of files that 'patch' creates to indicate nonexistent backups), or if they are top-level files that do not exist but their counterparts do exist. */ for (f = 0; f < 2; f++) if ((new_file || (f == 0 && unidirectional_new_file)) && (cmp.file[f].desc == UNOPENED ? (S_ISREG (cmp.file[f].stat.st_mode) && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) && cmp.file[f].stat.st_size == 0) : ((cmp.file[f].desc == ERRNO_ENCODE (ENOENT) || cmp.file[f].desc == ERRNO_ENCODE (EBADF)) && ! parent && (cmp.file[1 - f].desc == UNOPENED || cmp.file[1 - f].desc == STDIN_FILENO)))) cmp.file[f].desc = NONEXISTENT; for (f = 0; f < 2; f++) if (cmp.file[f].desc == NONEXISTENT) { memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat); cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; } for (f = 0; f < 2; f++) { int e = ERRNO_DECODE (cmp.file[f].desc); if (0 <= e) { errno = e; perror_with_name (cmp.file[f].name); status = EXIT_TROUBLE; } } if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) { /* If one is a directory, and it was specified in the command line, use the file in that dir with the other file's basename. */ int fnm_arg = DIR_P (0); int dir_arg = 1 - fnm_arg; char const *fnm = cmp.file[fnm_arg].name; char const *dir = cmp.file[dir_arg].name; char const *filename = cmp.file[dir_arg].name = free0 = find_dir_file_pathname (dir, last_component (fnm)); if (STREQ (fnm, "-")) fatal ("cannot compare '-' to a directory"); if ((no_dereference_symlinks ? lstat (filename, &cmp.file[dir_arg].stat) : stat (filename, &cmp.file[dir_arg].stat)) != 0) { perror_with_name (filename); status = EXIT_TROUBLE; } } if (status != EXIT_SUCCESS) { /* One of the files should exist but does not. */ } else if (cmp.file[0].desc == NONEXISTENT && cmp.file[1].desc == NONEXISTENT) { /* Neither file "exists", so there's nothing to compare. */ } else if ((same_files = (cmp.file[0].desc != NONEXISTENT && cmp.file[1].desc != NONEXISTENT && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) && same_file_attributes (&cmp.file[0].stat, &cmp.file[1].stat))) && no_diff_means_no_output) { /* The two named files are actually the same physical file. We know they are identical without actually reading them. */ } else if (DIR_P (0) & DIR_P (1)) { if (output_style == OUTPUT_IFDEF) fatal ("-D option not supported with directories"); /* If both are directories, compare the files in them. */ if (parent && !recursive) { /* But don't compare dir contents one level down unless -r was specified. See POSIX 1003.1-2001 for this format. */ message ("Common subdirectories: %s and %s\n", cmp.file[0].name, cmp.file[1].name); } else status = diff_dirs (&cmp, compare_files); } else if ((DIR_P (0) | DIR_P (1)) || (parent && !((S_ISREG (cmp.file[0].stat.st_mode) || S_ISLNK (cmp.file[0].stat.st_mode)) && (S_ISREG (cmp.file[1].stat.st_mode) || S_ISLNK (cmp.file[1].stat.st_mode))))) { if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) { /* We have a subdirectory that exists only in one directory. */ if ((DIR_P (0) | DIR_P (1)) && recursive && (new_file || (unidirectional_new_file && cmp.file[0].desc == NONEXISTENT))) status = diff_dirs (&cmp, compare_files); else { char const *dir; /* PARENT must be non-NULL here. */ assert (parent); dir = parent->file[cmp.file[0].desc == NONEXISTENT].name; /* See POSIX 1003.1-2001 for this format. */ message ("Only in %s: %s\n", dir, name0); status = EXIT_FAILURE; } } else { /* We have two files that are not to be compared. */ /* See POSIX 1003.1-2001 for this format. */ message5 ("File %s is a %s while file %s is a %s\n", file_label[0] ? file_label[0] : cmp.file[0].name, file_type (&cmp.file[0].stat), file_label[1] ? file_label[1] : cmp.file[1].name, file_type (&cmp.file[1].stat)); /* This is a difference. */ status = EXIT_FAILURE; } } else if (S_ISLNK (cmp.file[0].stat.st_mode) || S_ISLNK (cmp.file[1].stat.st_mode)) { /* We get here only if we use lstat(), not stat(). */ assert (no_dereference_symlinks); if (S_ISLNK (cmp.file[0].stat.st_mode) && S_ISLNK (cmp.file[1].stat.st_mode)) { /* Compare the values of the symbolic links. */ char *link_value[2] = { NULL, NULL }; for (f = 0; f < 2; f++) { link_value[f] = xreadlink (cmp.file[f].name); if (link_value[f] == NULL) { perror_with_name (cmp.file[f].name); status = EXIT_TROUBLE; break; } } if (status == EXIT_SUCCESS) { if ( ! STREQ (link_value[0], link_value[1])) { message ("Symbolic links %s and %s differ\n", cmp.file[0].name, cmp.file[1].name); /* This is a difference. */ status = EXIT_FAILURE; } } for (f = 0; f < 2; f++) free (link_value[f]); } else { /* We have two files that are not to be compared, because one of them is a symbolic link and the other one is not. */ message5 ("File %s is a %s while file %s is a %s\n", file_label[0] ? file_label[0] : cmp.file[0].name, file_type (&cmp.file[0].stat), file_label[1] ? file_label[1] : cmp.file[1].name, file_type (&cmp.file[1].stat)); /* This is a difference. */ status = EXIT_FAILURE; } } else if (files_can_be_treated_as_binary && S_ISREG (cmp.file[0].stat.st_mode) && S_ISREG (cmp.file[1].stat.st_mode) && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size && 0 < cmp.file[0].stat.st_size && 0 < cmp.file[1].stat.st_size) { message ("Files %s and %s differ\n", file_label[0] ? file_label[0] : cmp.file[0].name, file_label[1] ? file_label[1] : cmp.file[1].name); status = EXIT_FAILURE; } else { /* Both exist and neither is a directory. */ /* Open the files and record their descriptors. */ int oflags = O_RDONLY | (binary ? O_BINARY : 0); if (cmp.file[0].desc == UNOPENED) if ((cmp.file[0].desc = open (cmp.file[0].name, oflags, 0)) < 0) { perror_with_name (cmp.file[0].name); status = EXIT_TROUBLE; } if (cmp.file[1].desc == UNOPENED) { if (same_files) cmp.file[1].desc = cmp.file[0].desc; else if ((cmp.file[1].desc = open (cmp.file[1].name, oflags, 0)) < 0) { perror_with_name (cmp.file[1].name); status = EXIT_TROUBLE; } } /* Compare the files, if no error was found. */ if (status == EXIT_SUCCESS) status = diff_2_files (&cmp); /* Close the file descriptors. */ if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) { perror_with_name (cmp.file[0].name); status = EXIT_TROUBLE; } if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc && close (cmp.file[1].desc) != 0) { perror_with_name (cmp.file[1].name); status = EXIT_TROUBLE; } } /* Now the comparison has been done, if no error prevented it, and STATUS is the value this function will return. */ if (status == EXIT_SUCCESS) { if (report_identical_files && !DIR_P (0)) message ("Files %s and %s are identical\n", file_label[0] ? file_label[0] : cmp.file[0].name, file_label[1] ? file_label[1] : cmp.file[1].name); } else { /* Flush stdout so that the user sees differences immediately. This can hurt performance, unfortunately. */ if (fflush (stdout) != 0) pfatal_with_name (_("standard output")); } free (free0); free (free1); return status; }