Blob Blame History Raw
/*
 *  Name: unix2dos
 *  Documentation:
 *    Convert lf ('\x0a') characters in a file to cr lf ('\x0d' '\x0a')
 *    combinations.
 *
 *  The dos2unix package is distributed under FreeBSD style license.
 *  See also http://www.freebsd.org/copyright/freebsd-license.html
 *  --------
 *
 *  Copyright (C) 2009-2016 Erwin Waterlander
 *  Copyright (C) 1994-1995 Benjamin Lin.
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice in the documentation and/or other materials provided with
 *     the distribution.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
 *  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
 *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 *  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 *  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 *  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 *  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *  == 1.0 == 1989.10.04 == John Birchfield (jb@koko.csustan.edu)
 *  == 1.1 == 1994.12.20 == Benjamin Lin (blin@socs.uts.edu.au)
 *     Cleaned up for Borland C/C++ 4.02
 *  == 1.2 == 1995.03.09 == Benjamin Lin (blin@socs.uts.edu.au)
 *     Fixed minor typo error
 *  == 1.3 == 1995.03.16 == Benjamin Lin (blin@socs.uts.edu.au)
 *     Modified to more conform to UNIX style.
 *  == 2.0 == 1995.03.19 == Benjamin Lin (blin@socs.uts.edu.au)
 *     Rewritten from scratch.
 *  == 2.2 == 1995.03.30 == Benjamin Lin (blin@socs.uts.edu.au)
 *     Conversion from SunOS charset implemented.
 *
 *  See ChangeLog.txt for complete version history.
 *
 */


/* #define DEBUG 1 */
#define __UNIX2DOS_C

#include "common.h"
#include "unix2dos.h"
# if (defined(_WIN32) && !defined(__CYGWIN__))
#include <windows.h>
#endif
#ifdef D2U_UNICODE
#if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__)  /* Unix, Cygwin */
# include <langinfo.h>
#endif
#endif

void PrintLicense(void)
{
  D2U_ANSI_FPRINTF(stdout,_("\
Copyright (C) 2009-%d Erwin Waterlander\n\
Copyright (C) 1994-1995 Benjamin Lin\n\
All rights reserved.\n\n"),2016);
  PrintBSDLicense();
}

#ifdef D2U_UNICODE
wint_t AddDOSNewLineW(FILE* ipOutF, CFlag *ipFlag, wint_t CurChar, wint_t PrevChar, const char *progname)
{
  if (ipFlag->NewLine) {  /* add additional CR-LF? */
    /* Don't add line ending if it is a DOS line ending. Only in case of Unix line ending. */
    if ((CurChar == 0x0a) && (PrevChar != 0x0d)) {
      if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) {
          d2u_putwc_error(ipFlag,progname);
          return WEOF;
      }
      if (d2u_putwc(0x0a, ipOutF, ipFlag, progname) == WEOF) {
          d2u_putwc_error(ipFlag,progname);
          return WEOF;
      }
    }
  }
  return CurChar;
}
#endif

int AddDOSNewLine(FILE* ipOutF, CFlag *ipFlag, int CurChar, int PrevChar, const char *progname)
{
  if (ipFlag->NewLine) {  /* add additional CR-LF? */
    /* Don't add line ending if it is a DOS line ending. Only in case of Unix line ending. */
    if ((CurChar == '\x0a') && (PrevChar != '\x0d')) {
      if (fputc('\x0d', ipOutF) == EOF) {
          d2u_putc_error(ipFlag,progname);
          return EOF;
      }
      if (fputc('\x0a', ipOutF) == EOF) {
          d2u_putc_error(ipFlag,progname);
          return EOF;
      }
    }
  }
  return CurChar;
}

/* converts stream ipInF to DOS format text and write to stream ipOutF
 * RetVal: 0  if success
 *         -1  otherwise
 */
#ifdef D2U_UNICODE
int ConvertUnixToDosW(FILE* ipInF, FILE* ipOutF, CFlag *ipFlag, const char *progname)
{
    int RetVal = 0;
    wint_t TempChar;
    wint_t PreviousChar = 0;
    unsigned int line_nr = 1;
    unsigned int converted = 0;

    ipFlag->status = 0;

    /* LF    -> CR-LF */
    /* CR-LF -> CR-LF, in case the input file is a DOS text file */
    /* \x0a = Newline/Line Feed (LF) */
    /* \x0d = Carriage Return (CR) */

    switch (ipFlag->FromToMode)
    {
      case FROMTO_UNIX2DOS: /* unix2dos */
        while ((TempChar = d2u_getwc(ipInF, ipFlag->bomtype)) != WEOF) {  /* get character */
          if ((ipFlag->Force == 0) &&
              (TempChar < 32) &&
              (TempChar != 0x0a) &&  /* Not an LF */
              (TempChar != 0x0d) &&  /* Not a CR */
              (TempChar != 0x09) &&  /* Not a TAB */
              (TempChar != 0x0c)) {  /* Not a form feed */
            RetVal = -1;
            ipFlag->status |= BINARY_FILE ;
            if (ipFlag->verbose) {
              if ((ipFlag->stdio_mode) && (!ipFlag->error)) ipFlag->error = 1;
              D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
              D2U_UTF8_FPRINTF(stderr, _("Binary symbol 0x00%02X found at line %u\n"), TempChar, line_nr);
            }
            break;
          }
          if (TempChar == 0x0a) {
            if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) { /* got LF, put extra CR */
              RetVal = -1;
              d2u_putwc_error(ipFlag,progname);
              break;
            }
            converted++;
          } else {
             if (TempChar == 0x0d) { /* got CR */
               if ((TempChar = d2u_getwc(ipInF, ipFlag->bomtype)) == WEOF) { /* get next char (possibly LF) */
                 if (ferror(ipInF))  /* Read error */
                   break;
                 TempChar = 0x0d;  /* end of file. */
               } else {
                 if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) { /* put CR */
                   RetVal = -1;
                   d2u_putwc_error(ipFlag,progname);
                   break;
                 }
                 PreviousChar = 0x0d;
               }
             }
          }
          if (TempChar == 0x0a) /* Count all DOS and Unix line breaks */
            ++line_nr;
          if (d2u_putwc(TempChar, ipOutF, ipFlag, progname) == WEOF)
          {
              RetVal = -1;
              d2u_putwc_error(ipFlag,progname);
              break;
          } else {
            if (AddDOSNewLineW( ipOutF, ipFlag, TempChar, PreviousChar, progname) == WEOF) {
              RetVal = -1;
              break;
            }
          }
          PreviousChar = TempChar;
        }
        if ((TempChar == WEOF) && ferror(ipInF)) {
          RetVal = -1;
          d2u_getc_error(ipFlag,progname);
        }
        break;
      case FROMTO_UNIX2MAC: /* unix2mac */
        while ((TempChar = d2u_getwc(ipInF, ipFlag->bomtype)) != WEOF) {
          if ((ipFlag->Force == 0) &&
              (TempChar < 32) &&
              (TempChar != 0x0a) &&  /* Not an LF */
              (TempChar != 0x0d) &&  /* Not a CR */
              (TempChar != 0x09) &&  /* Not a TAB */
              (TempChar != 0x0c)) {  /* Not a form feed */
            RetVal = -1;
            ipFlag->status |= BINARY_FILE ;
            if (ipFlag->verbose) {
              if ((ipFlag->stdio_mode) && (!ipFlag->error)) ipFlag->error = 1;
              D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
              D2U_UTF8_FPRINTF(stderr, _("Binary symbol 0x00%02X found at line %u\n"), TempChar, line_nr);
            }
            break;
          }
          if (TempChar != 0x0a) { /* Not an LF */
            if(d2u_putwc(TempChar, ipOutF, ipFlag, progname) == WEOF) {
              RetVal = -1;
              d2u_putwc_error(ipFlag,progname);
              break;
            }
            PreviousChar = TempChar;
            if (TempChar == 0x0d) /* CR */
              ++line_nr;
          } else{
            /* TempChar is an LF */
            if (PreviousChar != 0x0d) /* CR already counted */
              ++line_nr;
            /* Don't touch this delimiter if it's a CR,LF pair. */
            if ( PreviousChar == 0x0d ) {
              if (d2u_putwc(0x0a, ipOutF, ipFlag, progname) == WEOF) { /* CR,LF pair. Put LF */
                  RetVal = -1;
                  d2u_putwc_error(ipFlag,progname);
                  break;
                }
              PreviousChar = TempChar;
              continue;
            }
            PreviousChar = TempChar;
            if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) { /* Unix line end (LF). Put CR */
                RetVal = -1;
                d2u_putwc_error(ipFlag,progname);
                break;
              }
            converted++;
            if (ipFlag->NewLine) {  /* add additional CR? */
              if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) {
                RetVal = -1;
                d2u_putwc_error(ipFlag,progname);
                break;
              }
            }
          }
        }
        if ((TempChar == WEOF) && ferror(ipInF)) {
          RetVal = -1;
          d2u_getc_error(ipFlag,progname);
        }
        break;
      default: /* unknown FromToMode */
      ;
#if DEBUG
      D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
      D2U_UTF8_FPRINTF(stderr, _("program error, invalid conversion mode %d\n"),ipFlag->FromToMode);
      exit(1);
#endif
    }
    if (ipFlag->status & UNICODE_CONVERSION_ERROR)
        ipFlag->line_nr = line_nr;
    if ((RetVal == 0) && (ipFlag->verbose > 1)) {
      D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
      D2U_UTF8_FPRINTF(stderr, _("Converted %u out of %u line breaks.\n"), converted, line_nr -1);
    }
    return RetVal;
}
#endif

/* converts stream ipInF to DOS format text and write to stream ipOutF
 * RetVal: 0  if success
 *         -1  otherwise
 */
int ConvertUnixToDos(FILE* ipInF, FILE* ipOutF, CFlag *ipFlag, const char *progname)
{
    int RetVal = 0;
    int TempChar;
    int PreviousChar = 0;
    int *ConvTable;
    unsigned int line_nr = 1;
    unsigned int converted = 0;

    ipFlag->status = 0;

    switch (ipFlag->ConvMode) {
      case CONVMODE_ASCII: /* ascii */
      case CONVMODE_UTF16LE: /* Assume UTF-16LE, bomtype = FILE_UTF8 or GB18030 */
      case CONVMODE_UTF16BE: /* Assume UTF-16BE, bomtype = FILE_UTF8 or GB18030 */
        ConvTable = U2DAsciiTable;
        break;
      case CONVMODE_7BIT: /* 7bit */
        ConvTable = U2D7BitTable;
        break;
      case CONVMODE_437: /* iso */
        ConvTable = U2DIso437Table;
        break;
      case CONVMODE_850: /* iso */
        ConvTable = U2DIso850Table;
        break;
      case CONVMODE_860: /* iso */
        ConvTable = U2DIso860Table;
        break;
      case CONVMODE_863: /* iso */
        ConvTable = U2DIso863Table;
        break;
      case CONVMODE_865: /* iso */
        ConvTable = U2DIso865Table;
        break;
      case CONVMODE_1252: /* iso */
        ConvTable = U2DIso1252Table;
        break;
      default: /* unknown convmode */
        ipFlag->status |= WRONG_CODEPAGE ;
        return(-1);
    }
    /* Turn off ISO and 7-bit conversion for Unicode text files */
    if (ipFlag->bomtype > 0)
      ConvTable = U2DAsciiTable;

    if ((ipFlag->ConvMode > CONVMODE_7BIT) && (ipFlag->verbose)) { /* not ascii or 7bit */
       D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
       D2U_UTF8_FPRINTF(stderr, _("using code page %d.\n"), ipFlag->ConvMode);
    }

    /* LF    -> CR-LF */
    /* CR-LF -> CR-LF, in case the input file is a DOS text file */
    /* \x0a = Newline/Line Feed (LF) */
    /* \x0d = Carriage Return (CR) */

    switch (ipFlag->FromToMode) {
      case FROMTO_UNIX2DOS: /* unix2dos */
        while ((TempChar = fgetc(ipInF)) != EOF) {  /* get character */
          if ((ipFlag->Force == 0) &&
              (TempChar < 32) &&
              (TempChar != '\x0a') &&  /* Not an LF */
              (TempChar != '\x0d') &&  /* Not a CR */
              (TempChar != '\x09') &&  /* Not a TAB */
              (TempChar != '\x0c')) {  /* Not a form feed */
            RetVal = -1;
            ipFlag->status |= BINARY_FILE ;
            if (ipFlag->verbose) {
              if ((ipFlag->stdio_mode) && (!ipFlag->error)) ipFlag->error = 1;
              D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
              D2U_UTF8_FPRINTF(stderr, _("Binary symbol 0x%02X found at line %u\n"), TempChar, line_nr);
            }
            break;
          }
          if (TempChar == '\x0a')
          {
            if (fputc('\x0d', ipOutF) == EOF) { /* got LF, put extra CR */
              RetVal = -1;
              d2u_putc_error(ipFlag,progname);
              break;
            }
            converted++;
          } else {
             if (TempChar == '\x0d') { /* got CR */
               if ((TempChar = fgetc(ipInF)) == EOF) { /* get next char (possibly LF) */
                 if (ferror(ipInF))  /* Read error */
                   break;
                 TempChar = '\x0d';  /* end of file. */
               } else {
                 if (fputc('\x0d', ipOutF) == EOF) { /* put CR */
                   RetVal = -1;
                   d2u_putc_error(ipFlag,progname);
                   break;
                 }
                 PreviousChar = '\x0d';
               }
             }
          }
          if (TempChar == '\x0a') /* Count all DOS and Unix line breaks */
            ++line_nr;
          if (fputc(ConvTable[TempChar], ipOutF) == EOF) { /* put LF or other char */
              RetVal = -1;
              d2u_putc_error(ipFlag,progname);
              break;
          } else {
            if (AddDOSNewLine( ipOutF, ipFlag, TempChar, PreviousChar, progname) == EOF) {
              RetVal = -1;
              break;
            }
          }
          PreviousChar = TempChar;
        }
        if ((TempChar == EOF) && ferror(ipInF)) {
          RetVal = -1;
          d2u_getc_error(ipFlag,progname);
        }
        break;
      case FROMTO_UNIX2MAC: /* unix2mac */
        while ((TempChar = fgetc(ipInF)) != EOF) {
          if ((ipFlag->Force == 0) &&
              (TempChar < 32) &&
              (TempChar != '\x0a') &&  /* Not an LF */
              (TempChar != '\x0d') &&  /* Not a CR */
              (TempChar != '\x09') &&  /* Not a TAB */
              (TempChar != '\x0c')) {  /* Not a form feed */
            RetVal = -1;
            ipFlag->status |= BINARY_FILE ;
            if (ipFlag->verbose) {
              if ((ipFlag->stdio_mode) && (!ipFlag->error)) ipFlag->error = 1;
              D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
              D2U_UTF8_FPRINTF(stderr, _("Binary symbol 0x%02X found at line %u\n"), TempChar, line_nr);
            }
            break;
          }
          if (TempChar != '\x0a') { /* Not an LF */
            if(fputc(ConvTable[TempChar], ipOutF) == EOF) {
              RetVal = -1;
              d2u_putc_error(ipFlag,progname);
              break;
            }
            PreviousChar = TempChar;
            if (TempChar == '\x0d') /* CR */
              ++line_nr;
          } else {
            /* TempChar is an LF */
            if (PreviousChar != '\x0d') /* CR already counted */
              ++line_nr;
            /* Don't touch this delimiter if it's a CR,LF pair. */
            if ( PreviousChar == '\x0d' ) {
              if (fputc('\x0a', ipOutF) == EOF) { /* CR,LF pair. Put LF */
                  RetVal = -1;
                  d2u_putc_error(ipFlag,progname);
                  break;
                }
              PreviousChar = TempChar;
              continue;
            }
            PreviousChar = TempChar;
            if (fputc('\x0d', ipOutF) == EOF) { /* Unix line end (LF). Put CR */
                RetVal = -1;
                d2u_putc_error(ipFlag,progname);
                break;
              }
            converted++;
            if (ipFlag->NewLine) {  /* add additional CR? */
              if (fputc('\x0d', ipOutF) == EOF) {
                RetVal = -1;
                d2u_putc_error(ipFlag,progname);
                break;
              }
            }
          }
        }
        if ((TempChar == EOF) && ferror(ipInF)) {
          RetVal = -1;
          d2u_getc_error(ipFlag,progname);
        }
        break;
      default: /* unknown FromToMode */
      ;
#if DEBUG
      D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
      D2U_UTF8_FPRINTF(stderr, _("program error, invalid conversion mode %d\n"),ipFlag->FromToMode);
      exit(1);
#endif
    }
    if ((RetVal == 0) && (ipFlag->verbose > 1)) {
      D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
      D2U_UTF8_FPRINTF(stderr, _("Converted %u out of %u line breaks.\n"), converted, line_nr -1);
    }
    return RetVal;
}


int main (int argc, char *argv[])
{
  /* variable declarations */
  char progname[9];
  CFlag *pFlag;
  char *ptr;
  char localedir[1024];
# ifdef __MINGW64__
  int _dowildcard = -1; /* enable wildcard expansion for Win64 */
# endif
  int  argc_new;
  char **argv_new;
#ifdef D2U_UNIFILE
  wchar_t **wargv;
  char ***argv_glob;
#endif

  progname[8] = '\0';
  strcpy(progname,"unix2dos");

#ifdef ENABLE_NLS
   ptr = getenv("DOS2UNIX_LOCALEDIR");
   if (ptr == NULL)
      d2u_strncpy(localedir,LOCALEDIR,sizeof(localedir));
   else {
      if (strlen(ptr) < sizeof(localedir))
         d2u_strncpy(localedir,ptr,sizeof(localedir));
      else {
         D2U_UTF8_FPRINTF(stderr,"%s: ",progname);
         D2U_ANSI_FPRINTF(stderr, "%s", _("error: Value of environment variable DOS2UNIX_LOCALEDIR is too long.\n"));
         d2u_strncpy(localedir,LOCALEDIR,sizeof(localedir));
      }
   }
#endif

#if defined(ENABLE_NLS) || (defined(D2U_UNICODE) && !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__))
/* setlocale() is also needed for nl_langinfo() */
#if (defined(_WIN32) && !defined(__CYGWIN__))
/* When the locale is set to "" on Windows all East-Asian multi-byte ANSI encoded text is printed
   wrongly when you use standard printf(). Also UTF-8 code is printed wrongly. See also test/setlocale.c.
   When we set the locale to "C" gettext still translates the messages on Windows. On Unix this would disable
   gettext. */
   setlocale (LC_ALL, "C");
#else
   setlocale (LC_ALL, "");
#endif
#endif

#ifdef ENABLE_NLS
   bindtextdomain (PACKAGE, localedir);
   textdomain (PACKAGE);
#endif


  /* variable initialisations */
  pFlag = (CFlag*)malloc(sizeof(CFlag));
  if (pFlag == NULL) {
    D2U_UTF8_FPRINTF(stderr, "unix2dos:");
    D2U_ANSI_FPRINTF(stderr, " %s\n", strerror(errno));
    return errno;
  }
  pFlag->FromToMode = FROMTO_UNIX2DOS;  /* default unix2dos */
  pFlag->keep_bom = 1;

  if ( ((ptr=strrchr(argv[0],'/')) == NULL) && ((ptr=strrchr(argv[0],'\\')) == NULL) )
    ptr = argv[0];
  else
    ptr++;

  if ((strcmpi("unix2mac", ptr) == 0) || (strcmpi("unix2mac.exe", ptr) == 0)) {
    pFlag->FromToMode = FROMTO_UNIX2MAC;
    strcpy(progname,"unix2mac");
  }

#ifdef D2U_UNIFILE
  /* Get arguments in wide Unicode format in the Windows Command Prompt */

  /* This does not support wildcard expansion (globbing) */
  wargv = CommandLineToArgvW(GetCommandLineW(), &argc);

  argv_glob = (char ***)malloc(sizeof(char***));
  if (argv_glob == NULL) {
    D2U_UTF8_FPRINTF(stderr, "%s:", progname);
    D2U_ANSI_FPRINTF(stderr, " %s\n", strerror(errno));
    free(pFlag);
    return errno;
  }
  /* Glob the arguments and convert them to UTF-8 */
  argc_new = glob_warg(argc, wargv, argv_glob, pFlag, progname);
  argv_new = *argv_glob;
#else  
  argc_new = argc;
  argv_new = argv;
#endif

#ifdef D2U_UNICODE
  return parse_options(argc_new, argv_new, pFlag, localedir, progname, PrintLicense, ConvertUnixToDos, ConvertUnixToDosW);
#else
  return parse_options(argc_new, argv_new, pFlag, localedir, progname, PrintLicense, ConvertUnixToDos);
#endif
}