Blame src/io.c.i18n

Packit d91b79
/* File I/O for GNU DIFF.
Packit d91b79
Packit d91b79
   Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2013,
Packit d91b79
   2015-2017 Free Software Foundation, Inc.
Packit d91b79
Packit d91b79
   This file is part of GNU DIFF.
Packit d91b79
Packit d91b79
   This program is free software: you can redistribute it and/or modify
Packit d91b79
   it under the terms of the GNU General Public License as published by
Packit d91b79
   the Free Software Foundation, either version 3 of the License, or
Packit d91b79
   (at your option) any later version.
Packit d91b79
Packit d91b79
   This program is distributed in the hope that it will be useful,
Packit d91b79
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit d91b79
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit d91b79
   GNU General Public License for more details.
Packit d91b79
Packit d91b79
   You should have received a copy of the GNU General Public License
Packit d91b79
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
Packit d91b79
Packit d91b79
#include "diff.h"
Packit d91b79
#include <binary-io.h>
Packit d91b79
#include <cmpbuf.h>
Packit d91b79
#include <file-type.h>
Packit d91b79
#include <xalloc.h>
Packit d91b79
Packit d91b79
/* Rotate an unsigned value to the left.  */
Packit d91b79
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
Packit d91b79
Packit d91b79
/* Given a hash value and a new character, return a new hash value.  */
Packit d91b79
#define HASH(h, c) ((c) + ROL (h, 7))
Packit d91b79

Packit d91b79
/* The type of a hash value.  */
Packit d91b79
typedef size_t hash_value;
Packit d91b79
verify (! TYPE_SIGNED (hash_value));
Packit d91b79
Packit d91b79
/* Lines are put into equivalence classes of lines that match in lines_differ.
Packit d91b79
   Each equivalence class is represented by one of these structures,
Packit d91b79
   but only while the classes are being computed.
Packit d91b79
   Afterward, each class is represented by a number.  */
Packit d91b79
struct equivclass
Packit d91b79
{
Packit d91b79
  lin next;		/* Next item in this bucket.  */
Packit d91b79
  hash_value hash;	/* Hash of lines in this class.  */
Packit d91b79
  char const *line;	/* A line that fits this class.  */
Packit d91b79
  size_t length;	/* That line's length, not counting its newline.  */
Packit d91b79
};
Packit d91b79
Packit d91b79
/* Hash-table: array of buckets, each being a chain of equivalence classes.
Packit d91b79
   buckets[-1] is reserved for incomplete lines.  */
Packit d91b79
static lin *buckets;
Packit d91b79
Packit d91b79
/* Number of buckets in the hash table array, not counting buckets[-1].  */
Packit d91b79
static size_t nbuckets;
Packit d91b79
Packit d91b79
/* Array in which the equivalence classes are allocated.
Packit d91b79
   The bucket-chains go through the elements in this array.
Packit d91b79
   The number of an equivalence class is its index in this array.  */
Packit d91b79
static struct equivclass *equivs;
Packit d91b79
Packit d91b79
/* Index of first free element in the array 'equivs'.  */
Packit d91b79
static lin equivs_index;
Packit d91b79
Packit d91b79
/* Number of elements allocated in the array 'equivs'.  */
Packit d91b79
static lin equivs_alloc;
Packit d91b79

Packit d91b79
/* Read a block of data into a file buffer, checking for EOF and error.  */
Packit d91b79
Packit d91b79
void
Packit d91b79
file_block_read (struct file_data *current, size_t size)
Packit d91b79
{
Packit d91b79
  if (size && ! current->eof)
Packit d91b79
    {
Packit d91b79
      size_t s = block_read (current->desc,
Packit d91b79
			     FILE_BUFFER (current) + current->buffered, size);
Packit d91b79
      if (s == SIZE_MAX)
Packit d91b79
	pfatal_with_name (current->name);
Packit d91b79
      current->buffered += s;
Packit d91b79
      current->eof = s < size;
Packit d91b79
    }
Packit d91b79
}
Packit d91b79

Packit d91b79
/* Check for binary files and compare them for exact identity.  */
Packit d91b79
Packit d91b79
/* Return 1 if BUF contains a non text character.
Packit d91b79
   SIZE is the number of characters in BUF.  */
Packit d91b79
Packit d91b79
#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
Packit d91b79
Packit d91b79
/* Get ready to read the current file.
Packit d91b79
   Return nonzero if SKIP_TEST is zero,
Packit d91b79
   and if it appears to be a binary file.  */
Packit d91b79
Packit d91b79
static bool
Packit d91b79
sip (struct file_data *current, bool skip_test)
Packit d91b79
{
Packit d91b79
  /* If we have a nonexistent file at this stage, treat it as empty.  */
Packit d91b79
  if (current->desc < 0)
Packit d91b79
    {
Packit d91b79
      /* Leave room for a sentinel.  */
Packit d91b79
      current->bufsize = sizeof (word);
Packit d91b79
      current->buffer = xmalloc (current->bufsize);
Packit d91b79
    }
Packit d91b79
  else
Packit d91b79
    {
Packit d91b79
      current->bufsize = buffer_lcm (sizeof (word),
Packit d91b79
				     STAT_BLOCKSIZE (current->stat),
Packit d91b79
				     PTRDIFF_MAX - 2 * sizeof (word));
Packit d91b79
      current->buffer = xmalloc (current->bufsize);
Packit d91b79
Packit d91b79
#ifdef __KLIBC__
Packit d91b79
      /* Skip test if seek is not possible */
Packit d91b79
      skip_test = skip_test
Packit d91b79
		  || (lseek (current->desc, 0, SEEK_CUR) < 0
Packit d91b79
		      && errno == ESPIPE);
Packit d91b79
#endif
Packit d91b79
Packit d91b79
      if (! skip_test)
Packit d91b79
	{
Packit d91b79
	  /* Check first part of file to see if it's a binary file.  */
Packit d91b79
Packit d91b79
	  int prev_mode = set_binary_mode (current->desc, O_BINARY);
Packit d91b79
	  off_t buffered;
Packit d91b79
	  file_block_read (current, current->bufsize);
Packit d91b79
	  buffered = current->buffered;
Packit d91b79
Packit d91b79
	  if (prev_mode != O_BINARY)
Packit d91b79
	    {
Packit d91b79
	      /* Revert to text mode and seek back to the start to reread
Packit d91b79
		 the file.  Use relative seek, since file descriptors
Packit d91b79
		 like stdin might not start at offset zero.  */
Packit d91b79
	      if (lseek (current->desc, - buffered, SEEK_CUR) < 0)
Packit d91b79
		pfatal_with_name (current->name);
Packit d91b79
	      set_binary_mode (current->desc, prev_mode);
Packit d91b79
	      current->buffered = 0;
Packit d91b79
	      current->eof = false;
Packit d91b79
	    }
Packit d91b79
Packit d91b79
	  return binary_file_p (current->buffer, buffered);
Packit d91b79
	}
Packit d91b79
    }
Packit d91b79
Packit d91b79
  current->buffered = 0;
Packit d91b79
  current->eof = false;
Packit d91b79
  return false;
Packit d91b79
}
Packit d91b79
Packit d91b79
/* Slurp the rest of the current file completely into memory.  */
Packit d91b79
Packit d91b79
static void
Packit d91b79
slurp (struct file_data *current)
Packit d91b79
{
Packit d91b79
  size_t cc;
Packit d91b79
Packit d91b79
  if (current->desc < 0)
Packit d91b79
    {
Packit d91b79
      /* The file is nonexistent.  */
Packit d91b79
      return;
Packit d91b79
    }
Packit d91b79
Packit d91b79
  if (S_ISREG (current->stat.st_mode))
Packit d91b79
    {
Packit d91b79
      /* It's a regular file; slurp in the rest all at once.  */
Packit d91b79
Packit d91b79
      /* Get the size out of the stat block.
Packit d91b79
	 Allocate just enough room for appended newline plus word sentinel,
Packit d91b79
	 plus word-alignment since we want the buffer word-aligned.  */
Packit d91b79
      size_t file_size = current->stat.st_size;
Packit d91b79
      cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
Packit d91b79
      if (file_size != current->stat.st_size || cc < file_size
Packit d91b79
	  || PTRDIFF_MAX <= cc)
Packit d91b79
	xalloc_die ();
Packit d91b79
Packit d91b79
      if (current->bufsize < cc)
Packit d91b79
	{
Packit d91b79
	  current->bufsize = cc;
Packit d91b79
	  current->buffer = xrealloc (current->buffer, cc);
Packit d91b79
	}
Packit d91b79
Packit d91b79
      /* Try to read at least 1 more byte than the size indicates, to
Packit d91b79
	 detect whether the file is growing.  This is a nicety for
Packit d91b79
	 users who run 'diff' on files while they are changing.  */
Packit d91b79
Packit d91b79
      if (current->buffered <= file_size)
Packit d91b79
	{
Packit d91b79
	  file_block_read (current, file_size + 1 - current->buffered);
Packit d91b79
	  if (current->buffered <= file_size)
Packit d91b79
	    return;
Packit d91b79
	}
Packit d91b79
    }
Packit d91b79
Packit d91b79
  /* It's not a regular file, or it's a growing regular file; read it,
Packit d91b79
     growing the buffer as needed.  */
Packit d91b79
Packit d91b79
  file_block_read (current, current->bufsize - current->buffered);
Packit d91b79
Packit d91b79
  if (current->buffered)
Packit d91b79
    {
Packit d91b79
      while (current->buffered == current->bufsize)
Packit d91b79
	{
Packit d91b79
	  if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
Packit d91b79
	    xalloc_die ();
Packit d91b79
	  current->bufsize *= 2;
Packit d91b79
	  current->buffer = xrealloc (current->buffer, current->bufsize);
Packit d91b79
	  file_block_read (current, current->bufsize - current->buffered);
Packit d91b79
	}
Packit d91b79
Packit d91b79
      /* Allocate just enough room for appended newline plus word
Packit d91b79
	 sentinel, plus word-alignment.  */
Packit d91b79
      cc = current->buffered + 2 * sizeof (word);
Packit d91b79
      current->bufsize = cc - cc % sizeof (word);
Packit d91b79
      current->buffer = xrealloc (current->buffer, current->bufsize);
Packit d91b79
    }
Packit d91b79
}
Packit d91b79

Packit d91b79
/* Split the file into lines, simultaneously computing the equivalence
Packit d91b79
   class for each line.  */
Packit d91b79
Packit d91b79
static void
Packit d91b79
find_and_hash_each_line (struct file_data *current)
Packit d91b79
{
Packit d91b79
  char const *p = current->prefix_end;
Packit d91b79
  lin i, *bucket;
Packit d91b79
  size_t length;
Packit d91b79
Packit d91b79
  /* Cache often-used quantities in local variables to help the compiler.  */
Packit d91b79
  char const **linbuf = current->linbuf;
Packit d91b79
  lin alloc_lines = current->alloc_lines;
Packit d91b79
  lin line = 0;
Packit d91b79
  lin linbuf_base = current->linbuf_base;
Packit d91b79
  lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
Packit d91b79
  struct equivclass *eqs = equivs;
Packit d91b79
  lin eqs_index = equivs_index;
Packit d91b79
  lin eqs_alloc = equivs_alloc;
Packit d91b79
  char const *suffix_begin = current->suffix_begin;
Packit d91b79
  char const *bufend = FILE_BUFFER (current) + current->buffered;
Packit d91b79
  bool ig_case = ignore_case;
Packit d91b79
  enum DIFF_white_space ig_white_space = ignore_white_space;
Packit d91b79
  bool diff_length_compare_anyway =
Packit d91b79
    ig_white_space != IGNORE_NO_WHITE_SPACE;
Packit d91b79
  bool same_length_diff_contents_compare_anyway =
Packit d91b79
    diff_length_compare_anyway | ig_case;
Packit d91b79
Packit d91b79
  while (p < suffix_begin)
Packit d91b79
    {
Packit d91b79
      char const *ip = p;
Packit d91b79
      hash_value h = 0;
Packit d91b79
      unsigned char c;
Packit d91b79
Packit d91b79
      /* Hash this line until we find a newline.  */
Packit d91b79
      switch (ig_white_space)
Packit d91b79
	{
Packit d91b79
	case IGNORE_ALL_SPACE:
Packit d91b79
	  while ((c = *p++) != '\n')
Packit d91b79
	    if (! isspace (c))
Packit d91b79
	      h = HASH (h, ig_case ? tolower (c) : c);
Packit d91b79
	  break;
Packit d91b79
Packit d91b79
	case IGNORE_SPACE_CHANGE:
Packit d91b79
	  while ((c = *p++) != '\n')
Packit d91b79
	    {
Packit d91b79
	      if (isspace (c))
Packit d91b79
		{
Packit d91b79
		  do
Packit d91b79
		    if ((c = *p++) == '\n')
Packit d91b79
		      goto hashing_done;
Packit d91b79
		  while (isspace (c));
Packit d91b79
Packit d91b79
		  h = HASH (h, ' ');
Packit d91b79
		}
Packit d91b79
Packit d91b79
	      /* C is now the first non-space.  */
Packit d91b79
	      h = HASH (h, ig_case ? tolower (c) : c);
Packit d91b79
	    }
Packit d91b79
	  break;
Packit d91b79
Packit d91b79
	case IGNORE_TAB_EXPANSION:
Packit d91b79
	case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
Packit d91b79
	case IGNORE_TRAILING_SPACE:
Packit d91b79
	  {
Packit d91b79
	    size_t column = 0;
Packit d91b79
	    while ((c = *p++) != '\n')
Packit d91b79
	      {
Packit d91b79
		if (ig_white_space & IGNORE_TRAILING_SPACE
Packit d91b79
		    && isspace (c))
Packit d91b79
		  {
Packit d91b79
		    char const *p1 = p;
Packit d91b79
		    unsigned char c1;
Packit d91b79
		    do
Packit d91b79
		      if ((c1 = *p1++) == '\n')
Packit d91b79
			{
Packit d91b79
			  p = p1;
Packit d91b79
			  goto hashing_done;
Packit d91b79
			}
Packit d91b79
		    while (isspace (c1));
Packit d91b79
		  }
Packit d91b79
Packit d91b79
		size_t repetitions = 1;
Packit d91b79
Packit d91b79
		if (ig_white_space & IGNORE_TAB_EXPANSION)
Packit d91b79
		  switch (c)
Packit d91b79
		    {
Packit d91b79
		    case '\b':
Packit d91b79
		      column -= 0 < column;
Packit d91b79
		      break;
Packit d91b79
Packit d91b79
		    case '\t':
Packit d91b79
		      c = ' ';
Packit d91b79
		      repetitions = tabsize - column % tabsize;
Packit d91b79
		      column = (column + repetitions < column
Packit d91b79
				? 0
Packit d91b79
				: column + repetitions);
Packit d91b79
		      break;
Packit d91b79
Packit d91b79
		    case '\r':
Packit d91b79
		      column = 0;
Packit d91b79
		      break;
Packit d91b79
Packit d91b79
		    default:
Packit d91b79
		      column++;
Packit d91b79
		      break;
Packit d91b79
		    }
Packit d91b79
Packit d91b79
		if (ig_case)
Packit d91b79
		  c = tolower (c);
Packit d91b79
Packit d91b79
		do
Packit d91b79
		  h = HASH (h, c);
Packit d91b79
		while (--repetitions != 0);
Packit d91b79
	      }
Packit d91b79
	  }
Packit d91b79
	  break;
Packit d91b79
Packit d91b79
	default:
Packit d91b79
	  if (ig_case)
Packit d91b79
	    while ((c = *p++) != '\n')
Packit d91b79
	      h = HASH (h, tolower (c));
Packit d91b79
	  else
Packit d91b79
	    while ((c = *p++) != '\n')
Packit d91b79
	      h = HASH (h, c);
Packit d91b79
	  break;
Packit d91b79
	}
Packit d91b79
Packit d91b79
   hashing_done:;
Packit d91b79
Packit d91b79
      bucket = &buckets[h % nbuckets];
Packit d91b79
      length = p - ip - 1;
Packit d91b79
Packit d91b79
      if (p == bufend
Packit d91b79
	  && current->missing_newline
Packit d91b79
	  && ROBUST_OUTPUT_STYLE (output_style))
Packit d91b79
	{
Packit d91b79
	  /* The last line is incomplete and we do not silently
Packit d91b79
	     complete lines.  If the line cannot compare equal to any
Packit d91b79
	     complete line, put it into buckets[-1] so that it can
Packit d91b79
	     compare equal only to the other file's incomplete line
Packit d91b79
	     (if one exists).  */
Packit d91b79
	  if (ig_white_space < IGNORE_TRAILING_SPACE)
Packit d91b79
	    bucket = &buckets[-1];
Packit d91b79
	}
Packit d91b79
Packit d91b79
      for (i = *bucket;  ;  i = eqs[i].next)
Packit d91b79
	if (!i)
Packit d91b79
	  {
Packit d91b79
	    /* Create a new equivalence class in this bucket.  */
Packit d91b79
	    i = eqs_index++;
Packit d91b79
	    if (i == eqs_alloc)
Packit d91b79
	      {
Packit d91b79
		if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
Packit d91b79
		  xalloc_die ();
Packit d91b79
		eqs_alloc *= 2;
Packit d91b79
		eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
Packit d91b79
	      }
Packit d91b79
	    eqs[i].next = *bucket;
Packit d91b79
	    eqs[i].hash = h;
Packit d91b79
	    eqs[i].line = ip;
Packit d91b79
	    eqs[i].length = length;
Packit d91b79
	    *bucket = i;
Packit d91b79
	    break;
Packit d91b79
	  }
Packit d91b79
	else if (eqs[i].hash == h)
Packit d91b79
	  {
Packit d91b79
	    char const *eqline = eqs[i].line;
Packit d91b79
Packit d91b79
	    /* Reuse existing class if lines_differ reports the lines
Packit d91b79
               equal.  */
Packit d91b79
	    if (eqs[i].length == length)
Packit d91b79
	      {
Packit d91b79
		/* Reuse existing equivalence class if the lines are identical.
Packit d91b79
		   This detects the common case of exact identity
Packit d91b79
		   faster than lines_differ would.  */
Packit d91b79
		if (memcmp (eqline, ip, length) == 0)
Packit d91b79
		  break;
Packit d91b79
		if (!same_length_diff_contents_compare_anyway)
Packit d91b79
		  continue;
Packit d91b79
	      }
Packit d91b79
	    else if (!diff_length_compare_anyway)
Packit d91b79
	      continue;
Packit d91b79
Packit d91b79
	    if (! lines_differ (eqline, ip))
Packit d91b79
	      break;
Packit d91b79
	  }
Packit d91b79
Packit d91b79
      /* Maybe increase the size of the line table.  */
Packit d91b79
      if (line == alloc_lines)
Packit d91b79
	{
Packit d91b79
	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
Packit d91b79
	  if (PTRDIFF_MAX / 3 <= alloc_lines
Packit d91b79
	      || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
Packit d91b79
	      || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
Packit d91b79
	    xalloc_die ();
Packit d91b79
	  alloc_lines = 2 * alloc_lines - linbuf_base;
Packit d91b79
	  cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
Packit d91b79
	  linbuf += linbuf_base;
Packit d91b79
	  linbuf = xrealloc (linbuf,
Packit d91b79
			     (alloc_lines - linbuf_base) * sizeof *linbuf);
Packit d91b79
	  linbuf -= linbuf_base;
Packit d91b79
	}
Packit d91b79
      linbuf[line] = ip;
Packit d91b79
      cureqs[line] = i;
Packit d91b79
      ++line;
Packit d91b79
    }
Packit d91b79
Packit d91b79
  current->buffered_lines = line;
Packit d91b79
Packit d91b79
  for (i = 0;  ;  i++)
Packit d91b79
    {
Packit d91b79
      /* Record the line start for lines in the suffix that we care about.
Packit d91b79
	 Record one more line start than lines,
Packit d91b79
	 so that we can compute the length of any buffered line.  */
Packit d91b79
      if (line == alloc_lines)
Packit d91b79
	{
Packit d91b79
	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
Packit d91b79
	  if (PTRDIFF_MAX / 3 <= alloc_lines
Packit d91b79
	      || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
Packit d91b79
	      || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
Packit d91b79
	    xalloc_die ();
Packit d91b79
	  alloc_lines = 2 * alloc_lines - linbuf_base;
Packit d91b79
	  linbuf += linbuf_base;
Packit d91b79
	  linbuf = xrealloc (linbuf,
Packit d91b79
			     (alloc_lines - linbuf_base) * sizeof *linbuf);
Packit d91b79
	  linbuf -= linbuf_base;
Packit d91b79
	}
Packit d91b79
      linbuf[line] = p;
Packit d91b79
Packit d91b79
      if (p == bufend)
Packit d91b79
	{
Packit d91b79
	  /* If the last line is incomplete and we do not silently
Packit d91b79
	     complete lines, don't count its appended newline.  */
Packit d91b79
	  if (current->missing_newline && ROBUST_OUTPUT_STYLE (output_style))
Packit d91b79
	    linbuf[line]--;
Packit d91b79
	  break;
Packit d91b79
	}
Packit d91b79
Packit d91b79
      if (context <= i && no_diff_means_no_output)
Packit d91b79
	break;
Packit d91b79
Packit d91b79
      line++;
Packit d91b79
Packit d91b79
      while (*p++ != '\n')
Packit d91b79
	continue;
Packit d91b79
    }
Packit d91b79
Packit d91b79
  /* Done with cache in local variables.  */
Packit d91b79
  current->linbuf = linbuf;
Packit d91b79
  current->valid_lines = line;
Packit d91b79
  current->alloc_lines = alloc_lines;
Packit d91b79
  current->equivs = cureqs;
Packit d91b79
  equivs = eqs;
Packit d91b79
  equivs_alloc = eqs_alloc;
Packit d91b79
  equivs_index = eqs_index;
Packit d91b79
}
Packit d91b79

Packit d91b79
/* Prepare the text.  Make sure the text end is initialized.
Packit d91b79
   Make sure text ends in a newline,
Packit d91b79
   but remember that we had to add one.
Packit d91b79
   Strip trailing CRs, if that was requested.  */
Packit d91b79
Packit d91b79
static void
Packit d91b79
prepare_text (struct file_data *current)
Packit d91b79
{
Packit d91b79
  size_t buffered = current->buffered;
Packit d91b79
  char *p = FILE_BUFFER (current);
Packit d91b79
Packit d91b79
  if (buffered == 0 || p[buffered - 1] == '\n')
Packit d91b79
    current->missing_newline = false;
Packit d91b79
  else
Packit d91b79
    {
Packit d91b79
      p[buffered++] = '\n';
Packit d91b79
      current->missing_newline = true;
Packit d91b79
    }
Packit d91b79
Packit d91b79
  if (!p)
Packit d91b79
    return;
Packit d91b79
Packit d91b79
  /* Don't use uninitialized storage when planting or using sentinels.  */
Packit d91b79
  memset (p + buffered, 0, sizeof (word));
Packit d91b79
Packit d91b79
  if (strip_trailing_cr)
Packit d91b79
    {
Packit d91b79
      char *dst;
Packit d91b79
      char *srclim = p + buffered;
Packit d91b79
      *srclim = '\r';
Packit d91b79
      dst = rawmemchr (p, '\r');
Packit d91b79
Packit d91b79
      if (dst != srclim)
Packit d91b79
	{
Packit d91b79
	  char const *src = dst;
Packit d91b79
	  do
Packit d91b79
	    {
Packit d91b79
	      *dst = *src++;
Packit d91b79
	      dst += ! (*dst == '\r' && *src == '\n');
Packit d91b79
	    }
Packit d91b79
	  while (src < srclim);
Packit d91b79
Packit d91b79
	  buffered -= src - dst;
Packit d91b79
	}
Packit d91b79
    }
Packit d91b79
Packit d91b79
  current->buffered = buffered;
Packit d91b79
}
Packit d91b79

Packit d91b79
/* We have found N lines in a buffer of size S; guess the
Packit d91b79
   proportionate number of lines that will be found in a buffer of
Packit d91b79
   size T.  However, do not guess a number of lines so large that the
Packit d91b79
   resulting line table might cause overflow in size calculations.  */
Packit d91b79
static lin
Packit d91b79
guess_lines (lin n, size_t s, size_t t)
Packit d91b79
{
Packit d91b79
  size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
Packit d91b79
  lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
Packit d91b79
  return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
Packit d91b79
}
Packit d91b79
Packit d91b79
/* Given a vector of two file_data objects, find the identical
Packit d91b79
   prefixes and suffixes of each object.  */
Packit d91b79
Packit d91b79
static void
Packit d91b79
find_identical_ends (struct file_data filevec[])
Packit d91b79
{
Packit d91b79
  word *w0, *w1;
Packit d91b79
  char *p0, *p1, *buffer0, *buffer1;
Packit d91b79
  char const *end0, *beg0;
Packit d91b79
  char const **linbuf0, **linbuf1;
Packit d91b79
  lin i, lines;
Packit d91b79
  size_t n0, n1;
Packit d91b79
  lin alloc_lines0, alloc_lines1;
Packit d91b79
  bool prefix_needed;
Packit d91b79
  lin buffered_prefix, prefix_count, prefix_mask;
Packit d91b79
  lin middle_guess, suffix_guess;
Packit d91b79
Packit d91b79
  slurp (&filevec[0]);
Packit d91b79
  prepare_text (&filevec[0]);
Packit d91b79
  if (filevec[0].desc != filevec[1].desc)
Packit d91b79
    {
Packit d91b79
      slurp (&filevec[1]);
Packit d91b79
      prepare_text (&filevec[1]);
Packit d91b79
    }
Packit d91b79
  else
Packit d91b79
    {
Packit d91b79
      filevec[1].buffer = filevec[0].buffer;
Packit d91b79
      filevec[1].bufsize = filevec[0].bufsize;
Packit d91b79
      filevec[1].buffered = filevec[0].buffered;
Packit d91b79
      filevec[1].missing_newline = filevec[0].missing_newline;
Packit d91b79
    }
Packit d91b79
Packit d91b79
  /* Find identical prefix.  */
Packit d91b79
Packit d91b79
  w0 = filevec[0].buffer;
Packit d91b79
  w1 = filevec[1].buffer;
Packit d91b79
  p0 = buffer0 = (char *) w0;
Packit d91b79
  p1 = buffer1 = (char *) w1;
Packit d91b79
  n0 = filevec[0].buffered;
Packit d91b79
  n1 = filevec[1].buffered;
Packit d91b79
Packit d91b79
  if (p0 == p1)
Packit d91b79
    /* The buffers are the same; sentinels won't work.  */
Packit d91b79
    p0 = p1 += n1;
Packit d91b79
  else
Packit d91b79
    {
Packit d91b79
      /* Insert end sentinels, in this case characters that are guaranteed
Packit d91b79
	 to make the equality test false, and thus terminate the loop.  */
Packit d91b79
Packit d91b79
      if (n0 < n1)
Packit d91b79
	p0[n0] = ~p1[n0];
Packit d91b79
      else
Packit d91b79
	p1[n1] = ~p0[n1];
Packit d91b79
Packit d91b79
      /* Loop until first mismatch, or to the sentinel characters.  */
Packit d91b79
Packit d91b79
      /* Compare a word at a time for speed.  */
Packit d91b79
      while (*w0 == *w1)
Packit d91b79
	w0++, w1++;
Packit d91b79
Packit d91b79
      /* Do the last few bytes of comparison a byte at a time.  */
Packit d91b79
      p0 = (char *) w0;
Packit d91b79
      p1 = (char *) w1;
Packit d91b79
      while (*p0 == *p1)
Packit d91b79
	p0++, p1++;
Packit d91b79
Packit d91b79
      /* Don't mistakenly count missing newline as part of prefix.  */
Packit d91b79
      if (ROBUST_OUTPUT_STYLE (output_style)
Packit d91b79
	  && ((buffer0 + n0 - filevec[0].missing_newline < p0)
Packit d91b79
	      !=
Packit d91b79
	      (buffer1 + n1 - filevec[1].missing_newline < p1)))
Packit d91b79
	p0--, p1--;
Packit d91b79
    }
Packit d91b79
Packit d91b79
  /* Now P0 and P1 point at the first nonmatching characters.  */
Packit d91b79
Packit d91b79
  /* Skip back to last line-beginning in the prefix,
Packit d91b79
     and then discard up to HORIZON_LINES lines from the prefix.  */
Packit d91b79
  i = horizon_lines;
Packit d91b79
  while (p0 != buffer0 && (p0[-1] != '\n' || i--))
Packit d91b79
    p0--, p1--;
Packit d91b79
Packit d91b79
  /* Record the prefix.  */
Packit d91b79
  filevec[0].prefix_end = p0;
Packit d91b79
  filevec[1].prefix_end = p1;
Packit d91b79
Packit d91b79
  /* Find identical suffix.  */
Packit d91b79
Packit d91b79
  /* P0 and P1 point beyond the last chars not yet compared.  */
Packit d91b79
  p0 = buffer0 + n0;
Packit d91b79
  p1 = buffer1 + n1;
Packit d91b79
Packit d91b79
  if (! ROBUST_OUTPUT_STYLE (output_style)
Packit d91b79
      || filevec[0].missing_newline == filevec[1].missing_newline)
Packit d91b79
    {
Packit d91b79
      end0 = p0;	/* Addr of last char in file 0.  */
Packit d91b79
Packit d91b79
      /* Get value of P0 at which we should stop scanning backward:
Packit d91b79
	 this is when either P0 or P1 points just past the last char
Packit d91b79
	 of the identical prefix.  */
Packit d91b79
      beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
Packit d91b79
Packit d91b79
      /* Scan back until chars don't match or we reach that point.  */
Packit d91b79
      while (p0 != beg0)
Packit d91b79
	if (*--p0 != *--p1)
Packit d91b79
	  {
Packit d91b79
	    /* Point at the first char of the matching suffix.  */
Packit d91b79
	    ++p0, ++p1;
Packit d91b79
	    beg0 = p0;
Packit d91b79
	    break;
Packit d91b79
	  }
Packit d91b79
Packit d91b79
      /* Are we at a line-beginning in both files?  If not, add the rest of
Packit d91b79
	 this line to the main body.  Discard up to HORIZON_LINES lines from
Packit d91b79
	 the identical suffix.  Also, discard one extra line,
Packit d91b79
	 because shift_boundaries may need it.  */
Packit d91b79
      i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
Packit d91b79
			    &&
Packit d91b79
			    (buffer1 == p1 || p1[-1] == '\n'));
Packit d91b79
      while (i-- && p0 != end0)
Packit d91b79
	while (*p0++ != '\n')
Packit d91b79
	  continue;
Packit d91b79
Packit d91b79
      p1 += p0 - beg0;
Packit d91b79
    }
Packit d91b79
Packit d91b79
  /* Record the suffix.  */
Packit d91b79
  filevec[0].suffix_begin = p0;
Packit d91b79
  filevec[1].suffix_begin = p1;
Packit d91b79
Packit d91b79
  /* Calculate number of lines of prefix to save.
Packit d91b79
Packit d91b79
     prefix_count == 0 means save the whole prefix;
Packit d91b79
     we need this for options like -D that output the whole file,
Packit d91b79
     or for enormous contexts (to avoid worrying about arithmetic overflow).
Packit d91b79
     We also need it for options like -F that output some preceding line;
Packit d91b79
     at least we will need to find the last few lines,
Packit d91b79
     but since we don't know how many, it's easiest to find them all.
Packit d91b79
Packit d91b79
     Otherwise, prefix_count != 0.  Save just prefix_count lines at start
Packit d91b79
     of the line buffer; they'll be moved to the proper location later.
Packit d91b79
     Handle 1 more line than the context says (because we count 1 too many),
Packit d91b79
     rounded up to the next power of 2 to speed index computation.  */
Packit d91b79
Packit d91b79
  if (no_diff_means_no_output && ! function_regexp.fastmap
Packit d91b79
      && context < LIN_MAX / 4 && context < n0)
Packit d91b79
    {
Packit d91b79
      middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
Packit d91b79
      suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
Packit d91b79
      for (prefix_count = 1;  prefix_count <= context;  prefix_count *= 2)
Packit d91b79
	continue;
Packit d91b79
      alloc_lines0 = (prefix_count + middle_guess
Packit d91b79
		      + MIN (context, suffix_guess));
Packit d91b79
    }
Packit d91b79
  else
Packit d91b79
    {
Packit d91b79
      prefix_count = 0;
Packit d91b79
      alloc_lines0 = guess_lines (0, 0, n0);
Packit d91b79
    }
Packit d91b79
Packit d91b79
  prefix_mask = prefix_count - 1;
Packit d91b79
  lines = 0;
Packit d91b79
  linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
Packit d91b79
  prefix_needed = ! (no_diff_means_no_output
Packit d91b79
		     && filevec[0].prefix_end == p0
Packit d91b79
		     && filevec[1].prefix_end == p1);
Packit d91b79
  p0 = buffer0;
Packit d91b79
Packit d91b79
  /* If the prefix is needed, find the prefix lines.  */
Packit d91b79
  if (prefix_needed)
Packit d91b79
    {
Packit d91b79
      end0 = filevec[0].prefix_end;
Packit d91b79
      while (p0 != end0)
Packit d91b79
	{
Packit d91b79
	  lin l = lines++ & prefix_mask;
Packit d91b79
	  if (l == alloc_lines0)
Packit d91b79
	    {
Packit d91b79
	      if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
Packit d91b79
		xalloc_die ();
Packit d91b79
	      alloc_lines0 *= 2;
Packit d91b79
	      linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
Packit d91b79
	    }
Packit d91b79
	  linbuf0[l] = p0;
Packit d91b79
	  while (*p0++ != '\n')
Packit d91b79
	    continue;
Packit d91b79
	}
Packit d91b79
    }
Packit d91b79
  buffered_prefix = prefix_count && context < lines ? context : lines;
Packit d91b79
Packit d91b79
  /* Allocate line buffer 1.  */
Packit d91b79
Packit d91b79
  middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
Packit d91b79
  suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
Packit d91b79
  alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
Packit d91b79
  if (alloc_lines1 < buffered_prefix
Packit d91b79
      || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
Packit d91b79
    xalloc_die ();
Packit d91b79
  linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
Packit d91b79
Packit d91b79
  if (buffered_prefix != lines)
Packit d91b79
    {
Packit d91b79
      /* Rotate prefix lines to proper location.  */
Packit d91b79
      for (i = 0;  i < buffered_prefix;  i++)
Packit d91b79
	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
Packit d91b79
      for (i = 0;  i < buffered_prefix;  i++)
Packit d91b79
	linbuf0[i] = linbuf1[i];
Packit d91b79
    }
Packit d91b79
Packit d91b79
  /* Initialize line buffer 1 from line buffer 0.  */
Packit d91b79
  for (i = 0; i < buffered_prefix; i++)
Packit d91b79
    linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
Packit d91b79
Packit d91b79
  /* Record the line buffer, adjusted so that
Packit d91b79
     linbuf[0] points at the first differing line.  */
Packit d91b79
  filevec[0].linbuf = linbuf0 + buffered_prefix;
Packit d91b79
  filevec[1].linbuf = linbuf1 + buffered_prefix;
Packit d91b79
  filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
Packit d91b79
  filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
Packit d91b79
  filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
Packit d91b79
  filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
Packit d91b79
}
Packit d91b79

Packit d91b79
/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
Packit d91b79
   than 2**k.  This table is derived from Chris K. Caldwell's list
Packit d91b79
   <http://www.utm.edu/research/primes/lists/2small/>.  */
Packit d91b79
Packit d91b79
static unsigned char const prime_offset[] =
Packit d91b79
{
Packit d91b79
  0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
Packit d91b79
  15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
Packit d91b79
  11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
Packit d91b79
  55, 93, 1, 57, 25
Packit d91b79
};
Packit d91b79
Packit d91b79
/* Verify that this host's size_t is not too wide for the above table.  */
Packit d91b79
Packit d91b79
verify (sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
Packit d91b79
Packit d91b79
/* Given a vector of two file_data objects, read the file associated
Packit d91b79
   with each one, and build the table of equivalence classes.
Packit d91b79
   Return nonzero if either file appears to be a binary file.
Packit d91b79
   If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
Packit d91b79
Packit d91b79
bool
Packit d91b79
read_files (struct file_data filevec[], bool pretend_binary)
Packit d91b79
{
Packit d91b79
  int i;
Packit d91b79
  bool skip_test = text | pretend_binary;
Packit d91b79
  bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
Packit d91b79
Packit d91b79
  if (filevec[0].desc != filevec[1].desc)
Packit d91b79
    appears_binary |= sip (&filevec[1], skip_test | appears_binary);
Packit d91b79
  else
Packit d91b79
    {
Packit d91b79
      filevec[1].buffer = filevec[0].buffer;
Packit d91b79
      filevec[1].bufsize = filevec[0].bufsize;
Packit d91b79
      filevec[1].buffered = filevec[0].buffered;
Packit d91b79
    }
Packit d91b79
  if (appears_binary)
Packit d91b79
    {
Packit d91b79
      set_binary_mode (filevec[0].desc, O_BINARY);
Packit d91b79
      set_binary_mode (filevec[1].desc, O_BINARY);
Packit d91b79
      return true;
Packit d91b79
    }
Packit d91b79
Packit d91b79
  find_identical_ends (filevec);
Packit d91b79
Packit d91b79
  equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
Packit d91b79
  if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
Packit d91b79
    xalloc_die ();
Packit d91b79
  equivs = xmalloc (equivs_alloc * sizeof *equivs);
Packit d91b79
  /* Equivalence class 0 is permanently safe for lines that were not
Packit d91b79
     hashed.  Real equivalence classes start at 1.  */
Packit d91b79
  equivs_index = 1;
Packit d91b79
Packit d91b79
  /* Allocate (one plus) a prime number of hash buckets.  Use a prime
Packit d91b79
     number between 1/3 and 2/3 of the value of equiv_allocs,
Packit d91b79
     approximately.  */
Packit d91b79
  for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
Packit d91b79
    continue;
Packit d91b79
  nbuckets = ((size_t) 1 << i) - prime_offset[i];
Packit d91b79
  if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
Packit d91b79
    xalloc_die ();
Packit d91b79
  buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
Packit d91b79
  buckets++;
Packit d91b79
Packit d91b79
  for (i = 0; i < 2; i++)
Packit d91b79
    find_and_hash_each_line (&filevec[i]);
Packit d91b79
Packit d91b79
  filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
Packit d91b79
Packit d91b79
  free (equivs);
Packit d91b79
  free (buckets - 1);
Packit d91b79
Packit d91b79
  return false;
Packit d91b79
}