/* expand - convert tabs to spaces
Copyright (C) 1989-2018 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* By default, convert all tabs to spaces.
Preserves backspace characters in the output; they decrement the
column count for tab calculations.
The default action is equivalent to -8.
Options:
--tabs=tab1[,tab2[,...]]
-t tab1[,tab2[,...]]
-tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
columns apart instead of the default 8. Otherwise,
set the tabs at columns tab1, tab2, etc. (numbered from
0); replace any tabs beyond the tab stops given with
single spaces.
--initial
-i Only convert initial tabs on each line to spaces.
David MacKenzie <djm@gnu.ai.mit.edu> */
#include <config.h>
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include <mbfile.h>
#include "system.h"
#include "die.h"
#include "xstrndup.h"
#include "expand-common.h"
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "expand"
#define AUTHORS proper_name ("David MacKenzie")
static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::";
static struct option const longopts[] =
{
{"tabs", required_argument, NULL, 't'},
{"initial", no_argument, NULL, 'i'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};
void
usage (int status)
{
if (status != EXIT_SUCCESS)
emit_try_help ();
else
{
printf (_("\
Usage: %s [OPTION]... [FILE]...\n\
"),
program_name);
fputs (_("\
Convert tabs in each FILE to spaces, writing to standard output.\n\
"), stdout);
emit_stdin_note ();
emit_mandatory_arg_note ();
fputs (_("\
-i, --initial do not convert tabs after non blanks\n\
-t, --tabs=N have tabs N characters apart, not 8\n\
"), stdout);
emit_tab_list_info ();
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
emit_ancillary_info (PROGRAM_NAME);
}
exit (status);
}
/* Change tabs to spaces, writing to stdout.
Read each file in 'file_list', in order. */
static void
expand (void)
{
/* Input stream. */
FILE *fp = next_file (NULL);
mb_file_t mbf;
mbf_char_t c;
/* True if the starting locale is utf8. */
bool using_utf_locale;
/* True if the first file contains BOM header. */
bool found_bom;
using_utf_locale=check_utf_locale();
if (!fp)
return;
mbf_init (mbf, fp);
found_bom=check_bom(fp,&mbf);
if (using_utf_locale == false && found_bom == true)
{
/*try using some predefined locale */
if (set_utf_locale () != 0)
{
error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale"));
}
}
if (found_bom == true)
{
print_bom();
}
while (true)
{
/* If true, perform translations. */
bool convert = true;
/* The following variables have valid values only when CONVERT
is true: */
/* Column of next input character. */
uintmax_t column = 0;
/* Index in TAB_LIST of next tab stop to examine. */
size_t tab_index = 0;
/* Convert a line of text. */
do
{
while (true) {
mbf_getc (c, mbf);
if ((mb_iseof (c)) && (fp = next_file (fp)))
{
mbf_init (mbf, fp);
if (fp!=NULL)
{
if (check_bom(fp,&mbf)==true)
{
/*Not the first file - check BOM header*/
if (using_utf_locale==false && found_bom==false)
{
/*BOM header in subsequent file but not in the first one. */
error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
}
}
else
{
if(using_utf_locale==false && found_bom==true)
{
/*First file conatined BOM header - locale was switched to UTF
*all subsequent files should contain BOM. */
error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
}
}
}
continue;
}
else
{
break;
}
}
if (convert)
{
if (mb_iseq (c, '\t'))
{
/* Column the next input tab stop is on. */
uintmax_t next_tab_column;
bool last_tab IF_LINT (=0);
next_tab_column = get_next_tab_column (column, &tab_index,
&last_tab);
if (last_tab)
next_tab_column = column + 1;
if (next_tab_column < column)
die (EXIT_FAILURE, 0, _("input line is too long"));
while (++column < next_tab_column)
if (putchar (' ') < 0)
die (EXIT_FAILURE, errno, _("write error"));
mb_setascii (&c, ' ');
}
else if (mb_iseq (c, '\b'))
{
/* Go back one column, and force recalculation of the
next tab stop. */
column -= !!column;
tab_index -= !!tab_index;
}
/* A leading control character could make us trip over. */
else if (!mb_iscntrl (c))
{
column += mb_width (c);
if (!column)
die (EXIT_FAILURE, 0, _("input line is too long"));
}
convert &= convert_entire_line || mb_isblank (c);
}
if (mb_iseof (c))
return;
mb_putc (c, stdout);
if (ferror (stdout))
die (EXIT_FAILURE, errno, _("write error"));
}
while (!mb_iseq (c, '\n'));
}
}
int
main (int argc, char **argv)
{
int c;
initialize_main (&argc, &argv);
set_program_name (argv[0]);
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
atexit (close_stdout);
convert_entire_line = true;
while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
{
switch (c)
{
case 'i':
convert_entire_line = false;
break;
case 't':
parse_tab_stops (optarg);
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (optarg)
parse_tab_stops (optarg - 1);
else
{
char tab_stop[2];
tab_stop[0] = c;
tab_stop[1] = '\0';
parse_tab_stops (tab_stop);
}
break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
usage (EXIT_FAILURE);
}
}
finalize_tab_stops ();
set_file_list ( (optind < argc) ? &argv[optind] : NULL);
expand ();
cleanup_file_list_stdin ();
return exit_status;
}