Blob Blame History Raw
/* fold -- wrap each input line to fit in specified width.
   Copyright (C) 1991-2018 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

/* Written by David MacKenzie, djm@gnu.ai.mit.edu. */

#include <config.h>

#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>

/* Get mbstate_t, mbrtowc(), wcwidth().  */
#if HAVE_WCHAR_H
# include <wchar.h>
#endif

/* Get iswprint(), iswblank(), wcwidth().  */
#if HAVE_WCTYPE_H
# include <wctype.h>
#endif

#include "system.h"
#include "die.h"
#include "error.h"
#include "fadvise.h"
#include "xdectoint.h"

/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
      installation; work around this configuration error.  */
#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
# undef MB_LEN_MAX
# define MB_LEN_MAX 16
#endif

/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
#if HAVE_MBRTOWC && defined mbstate_t
# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
#endif

#define TAB_WIDTH 8

/* The official name of this program (e.g., no 'g' prefix).  */
#define PROGRAM_NAME "fold"

#define AUTHORS proper_name ("David MacKenzie")

#define FATAL_ERROR(Message)                                            \
  do                                                                    \
    {                                                                   \
      error (0, 0, (Message));                                          \
      usage (2);                                                        \
    }                                                                   \
  while (0)

enum operating_mode
{
  /* Fold texts by columns that are at the given positions. */
  column_mode,

  /* Fold texts by bytes that are at the given positions. */
  byte_mode,

  /* Fold texts by characters that are at the given positions. */
  character_mode,
};

/* The argument shows current mode. (Default: column_mode) */
static enum operating_mode operating_mode;

/* If nonzero, try to break on whitespace. */
static bool break_spaces;

/* If nonzero, at least one of the files we read was standard input. */
static bool have_read_stdin;

static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";

static struct option const longopts[] =
{
  {"bytes", no_argument, NULL, 'b'},
  {"characters", no_argument, NULL, 'c'},
  {"spaces", no_argument, NULL, 's'},
  {"width", required_argument, NULL, 'w'},
  {GETOPT_HELP_OPTION_DECL},
  {GETOPT_VERSION_OPTION_DECL},
  {NULL, 0, NULL, 0}
};

void
usage (int status)
{
  if (status != EXIT_SUCCESS)
    emit_try_help ();
  else
    {
      printf (_("\
Usage: %s [OPTION]... [FILE]...\n\
"),
              program_name);
      fputs (_("\
Wrap input lines in each FILE, writing to standard output.\n\
"), stdout);

      emit_stdin_note ();
      emit_mandatory_arg_note ();

      fputs (_("\
  -b, --bytes         count bytes rather than columns\n\
  -c, --characters    count characters rather than columns\n\
  -s, --spaces        break at spaces\n\
  -w, --width=WIDTH   use WIDTH columns instead of 80\n\
"), stdout);
      fputs (HELP_OPTION_DESCRIPTION, stdout);
      fputs (VERSION_OPTION_DESCRIPTION, stdout);
      emit_ancillary_info (PROGRAM_NAME);
    }
  exit (status);
}

/* Assuming the current column is COLUMN, return the column that
   printing C will move the cursor to.
   The first column is 0. */

static size_t
adjust_column (size_t column, char c)
{
  if (operating_mode != byte_mode)
    {
      if (c == '\b')
        {
          if (column > 0)
            column--;
        }
      else if (c == '\r')
        column = 0;
      else if (c == '\t')
        column += TAB_WIDTH - column % TAB_WIDTH;
      else /* if (isprint (c)) */
        column++;
    }
  else
    column++;
  return column;
}

/* Fold file FILENAME, or standard input if FILENAME is "-",
   to stdout, with maximum line length WIDTH.
   Return true if successful.  */

static void
fold_text (FILE *istream, size_t width, int *saved_errno)
{
  int c;
  size_t column = 0;		/* Screen column where next char will go. */
  size_t offset_out = 0;	/* Index in 'line_out' for next char. */
  static char *line_out = NULL;
  static size_t allocated_out = 0;

  fadvise (istream, FADVISE_SEQUENTIAL);

  while ((c = getc (istream)) != EOF)
    {
      if (offset_out + 1 >= allocated_out)
        line_out = X2REALLOC (line_out, &allocated_out);

      if (c == '\n')
        {
          line_out[offset_out++] = c;
          fwrite (line_out, sizeof (char), offset_out, stdout);
          column = offset_out = 0;
          continue;
        }

    rescan:
      column = adjust_column (column, c);

      if (column > width)
        {
          /* This character would make the line too long.
             Print the line plus a newline, and make this character
             start the next line. */
          if (break_spaces)
            {
              bool found_blank = false;
              size_t logical_end = offset_out;

              /* If LINE_OUT has no wide character,
                 put a new wide character in LINE_OUT
                 if column is bigger than width. */
              if (offset_out == 0)
                {
                  line_out[offset_out++] = c;
                  continue;
                }

              /* Look for the last blank. */
              while (logical_end)
                {
                  --logical_end;
                  if (isblank (to_uchar (line_out[logical_end])))
                    {
                      found_blank = true;
                      break;
                    }
                }

              if (found_blank)
                {
                  size_t i;

                  /* Found a blank.  Don't output the part after it. */
                  logical_end++;
                  fwrite (line_out, sizeof (char), (size_t) logical_end,
                          stdout);
                  putchar ('\n');
                  /* Move the remainder to the beginning of the next line.
                     The areas being copied here might overlap. */
                  memmove (line_out, line_out + logical_end,
                           offset_out - logical_end);
                  offset_out -= logical_end;
                  for (column = i = 0; i < offset_out; i++)
                    column = adjust_column (column, line_out[i]);
                  goto rescan;
                }
            }

          if (offset_out == 0)
            {
              line_out[offset_out++] = c;
              continue;
            }

          line_out[offset_out++] = '\n';
          fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
          column = offset_out = 0;
          goto rescan;
        }

      line_out[offset_out++] = c;
    }

  *saved_errno = errno;

  if (offset_out)
    fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);

}

#if HAVE_MBRTOWC
static void
fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
{
  char buf[MB_LEN_MAX + BUFSIZ];  /* For spooling a read byte sequence. */
  size_t buflen = 0;        /* The length of the byte sequence in buf. */
  char *bufpos = buf;         /* Next read position of BUF. */
  wint_t wc;                /* A gotten wide character. */
  size_t mblength;        /* The byte size of a multibyte character which shows
                           as same character as WC. */
  mbstate_t state, state_bak;        /* State of the stream. */
  int convfail = 0;                /* 1, when conversion is failed. Otherwise 0. */

  static char *line_out = NULL;
  size_t offset_out = 0;        /* Index in `line_out' for next char. */
  static size_t allocated_out = 0;

  int increment;
  size_t column = 0;

  size_t last_blank_pos;
  size_t last_blank_column;
  int is_blank_seen;
  int last_blank_increment = 0;
  int is_bs_following_last_blank;
  size_t bs_following_last_blank_num;
  int is_cr_after_last_blank;

#define CLEAR_FLAGS                                \
   do                                                \
     {                                                \
        last_blank_pos = 0;                        \
        last_blank_column = 0;                        \
        is_blank_seen = 0;                        \
        is_bs_following_last_blank = 0;                \
        bs_following_last_blank_num = 0;        \
        is_cr_after_last_blank = 0;                \
     }                                                \
   while (0)

#define START_NEW_LINE                        \
   do                                        \
     {                                        \
      putchar ('\n');                        \
      column = 0;                        \
      offset_out = 0;                        \
      CLEAR_FLAGS;                        \
    }                                        \
   while (0)

  CLEAR_FLAGS;
  memset (&state, '\0', sizeof(mbstate_t));

  for (;; bufpos += mblength, buflen -= mblength)
    {
      if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
        {
          memmove (buf, bufpos, buflen);
          buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
          bufpos = buf;
        }

      if (buflen < 1)
        break;

      /* Get a wide character. */
      state_bak = state;
      mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);

      switch (mblength)
        {
        case (size_t)-1:
        case (size_t)-2:
          convfail++;
          state = state_bak;
          /* Fall through. */

        case 0:
          mblength = 1;
          break;
        }

rescan:
      if (convfail)
        increment = 1;
      else if (wc == L'\n')
        {
          /* preserve newline */
          fwrite (line_out, sizeof(char), offset_out, stdout);
          START_NEW_LINE;
          continue;
        }
      else if (operating_mode == byte_mode)                  /* byte mode */
        increment = mblength;
      else if (operating_mode == character_mode)        /* character mode */
        increment = 1;
      else                                                 /* column mode */
        {
          switch (wc)
            {
            case L'\b':
              increment = (column > 0) ? -1 : 0;
              break;

            case L'\r':
              increment = -1 * column;
              break;

            case L'\t':
              increment = 8 - column % 8;
              break;

            default:
              increment = wcwidth (wc);
              increment = (increment < 0) ? 0 : increment;
            }
        }

      if (column + increment > width && break_spaces && last_blank_pos)
        {
          fwrite (line_out, sizeof(char), last_blank_pos, stdout);
          putchar ('\n');

          offset_out = offset_out - last_blank_pos;
          column = column - last_blank_column + ((is_cr_after_last_blank)
              ? last_blank_increment : bs_following_last_blank_num);
          memmove (line_out, line_out + last_blank_pos, offset_out);
          CLEAR_FLAGS;
          goto rescan;
        }

      if (column + increment > width && column != 0)
        {
          fwrite (line_out, sizeof(char), offset_out, stdout);
          START_NEW_LINE;
          goto rescan;
        }

      if (allocated_out < offset_out + mblength)
        {
          line_out = X2REALLOC (line_out, &allocated_out);
        }

      memcpy (line_out + offset_out, bufpos, mblength);
      offset_out += mblength;
      column += increment;

      if (is_blank_seen && !convfail && wc == L'\r')
        is_cr_after_last_blank = 1;

      if (is_bs_following_last_blank && !convfail && wc == L'\b')
        ++bs_following_last_blank_num;
      else
        is_bs_following_last_blank = 0;

      if (break_spaces && !convfail && iswblank (wc))
        {
          last_blank_pos = offset_out;
          last_blank_column = column;
          is_blank_seen = 1;
          last_blank_increment = increment;
          is_bs_following_last_blank = 1;
          bs_following_last_blank_num = 0;
          is_cr_after_last_blank = 0;
        }
    }

  *saved_errno = errno;

  if (offset_out)
    fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);

}
#endif

/* Fold file FILENAME, or standard input if FILENAME is "-",
   to stdout, with maximum line length WIDTH.
   Return 0 if successful, 1 if an error occurs. */

static bool
fold_file (char const *filename, size_t width)
{
  FILE *istream;
  int saved_errno;

  if (STREQ (filename, "-"))
    {
      istream = stdin;
      have_read_stdin = 1;
    }
  else
    istream = fopen (filename, "r");

  if (istream == NULL)
    {
      error (0, errno, "%s", filename);
      return 1;
    }

  /* Define how ISTREAM is being folded. */
#if HAVE_MBRTOWC
  if (MB_CUR_MAX > 1)
    fold_multibyte_text (istream, width, &saved_errno);
  else
#endif
    fold_text (istream, width, &saved_errno);

  if (ferror (istream))
    {
      error (0, saved_errno, "%s", quotef (filename));
      if (!STREQ (filename, "-"))
        fclose (istream);
      return false;
    }
  if (!STREQ (filename, "-") && fclose (istream) == EOF)
    {
      error (0, errno, "%s", quotef (filename));
      return false;
    }

  return true;
}

int
main (int argc, char **argv)
{
  size_t width = 80;
  int i;
  int optc;
  bool ok;

  initialize_main (&argc, &argv);
  set_program_name (argv[0]);
  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);

  atexit (close_stdout);

  operating_mode = column_mode;
  break_spaces = have_read_stdin = false;

  while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
    {
      char optargbuf[2];

      switch (optc)
        {
        case 'b':		/* Count bytes rather than columns. */
          if (operating_mode != column_mode)
            FATAL_ERROR (_("only one way of folding may be specified"));
          operating_mode = byte_mode;
          break;

        case 'c':
          if (operating_mode != column_mode)
            FATAL_ERROR (_("only one way of folding may be specified"));
          operating_mode = character_mode;
          break;

        case 's':		/* Break at word boundaries. */
          break_spaces = true;
          break;

        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
          if (optarg)
            optarg--;
          else
            {
              optargbuf[0] = optc;
              optargbuf[1] = '\0';
              optarg = optargbuf;
            }
          FALLTHROUGH;
        case 'w':		/* Line width. */
          width = xdectoumax (optarg, 1, SIZE_MAX - TAB_WIDTH - 1, "",
                              _("invalid number of columns"), 0);
          break;

        case_GETOPT_HELP_CHAR;

        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);

        default:
          usage (EXIT_FAILURE);
        }
    }

  if (argc == optind)
    ok = fold_file ("-", width);
  else
    {
      ok = true;
      for (i = optind; i < argc; i++)
        ok &= fold_file (argv[i], width);
    }

  if (have_read_stdin && fclose (stdin) == EOF)
    die (EXIT_FAILURE, errno, "-");

  return ok ? EXIT_SUCCESS : EXIT_FAILURE;
}