Blame lib/iconv.c

Packit 709fb3
/* Character set conversion.
Packit 709fb3
   Copyright (C) 1999-2001, 2007, 2009-2017 Free Software Foundation, Inc.
Packit 709fb3
Packit 709fb3
   This program is free software; you can redistribute it and/or modify
Packit 709fb3
   it under the terms of the GNU General Public License as published by
Packit 709fb3
   the Free Software Foundation; either version 3, or (at your option)
Packit 709fb3
   any later version.
Packit 709fb3
Packit 709fb3
   This program is distributed in the hope that it will be useful,
Packit 709fb3
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 709fb3
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 709fb3
   GNU General Public License for more details.
Packit 709fb3
Packit 709fb3
   You should have received a copy of the GNU General Public License along
Packit 709fb3
   with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 709fb3
Packit 709fb3
#include <config.h>
Packit 709fb3
Packit 709fb3
/* Specification.  */
Packit 709fb3
#include <iconv.h>
Packit 709fb3
Packit 709fb3
#include <stddef.h>
Packit 709fb3
Packit 709fb3
#if REPLACE_ICONV_UTF
Packit 709fb3
# include <errno.h>
Packit 709fb3
# include <stdint.h>
Packit 709fb3
# include <stdlib.h>
Packit 709fb3
# include "unistr.h"
Packit 709fb3
# ifndef uintptr_t
Packit 709fb3
#  define uintptr_t unsigned long
Packit 709fb3
# endif
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
#if REPLACE_ICONV_UTF
Packit 709fb3
Packit 709fb3
/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11.  */
Packit 709fb3
Packit 709fb3
/* Return code if invalid. (xxx_mbtowc) */
Packit 709fb3
# define RET_ILSEQ      -1
Packit 709fb3
/* Return code if no bytes were read. (xxx_mbtowc) */
Packit 709fb3
# define RET_TOOFEW     -2
Packit 709fb3
Packit 709fb3
/* Return code if invalid. (xxx_wctomb) */
Packit 709fb3
# define RET_ILUNI      -1
Packit 709fb3
/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
Packit 709fb3
# define RET_TOOSMALL   -2
Packit 709fb3
Packit 709fb3
/*
Packit 709fb3
 * UTF-16BE
Packit 709fb3
 */
Packit 709fb3
Packit 709fb3
/* Specification: RFC 2781 */
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 709fb3
{
Packit 709fb3
  if (n >= 2)
Packit 709fb3
    {
Packit 709fb3
      ucs4_t wc = (s[0] << 8) + s[1];
Packit 709fb3
      if (wc >= 0xd800 && wc < 0xdc00)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 4)
Packit 709fb3
            {
Packit 709fb3
              ucs4_t wc2 = (s[2] << 8) + s[3];
Packit 709fb3
              if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
Packit 709fb3
                return RET_ILSEQ;
Packit 709fb3
              *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
Packit 709fb3
              return 4;
Packit 709fb3
            }
Packit 709fb3
        }
Packit 709fb3
      else if (wc >= 0xdc00 && wc < 0xe000)
Packit 709fb3
        {
Packit 709fb3
          return RET_ILSEQ;
Packit 709fb3
        }
Packit 709fb3
      else
Packit 709fb3
        {
Packit 709fb3
          *pwc = wc;
Packit 709fb3
          return 2;
Packit 709fb3
        }
Packit 709fb3
    }
Packit 709fb3
  return RET_TOOFEW;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 709fb3
{
Packit 709fb3
  if (!(wc >= 0xd800 && wc < 0xe000))
Packit 709fb3
    {
Packit 709fb3
      if (wc < 0x10000)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 2)
Packit 709fb3
            {
Packit 709fb3
              r[0] = (unsigned char) (wc >> 8);
Packit 709fb3
              r[1] = (unsigned char) wc;
Packit 709fb3
              return 2;
Packit 709fb3
            }
Packit 709fb3
          else
Packit 709fb3
            return RET_TOOSMALL;
Packit 709fb3
        }
Packit 709fb3
      else if (wc < 0x110000)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 4)
Packit 709fb3
            {
Packit 709fb3
              ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
Packit 709fb3
              ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
Packit 709fb3
              r[0] = (unsigned char) (wc1 >> 8);
Packit 709fb3
              r[1] = (unsigned char) wc1;
Packit 709fb3
              r[2] = (unsigned char) (wc2 >> 8);
Packit 709fb3
              r[3] = (unsigned char) wc2;
Packit 709fb3
              return 4;
Packit 709fb3
            }
Packit 709fb3
          else
Packit 709fb3
            return RET_TOOSMALL;
Packit 709fb3
        }
Packit 709fb3
    }
Packit 709fb3
  return RET_ILUNI;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/*
Packit 709fb3
 * UTF-16LE
Packit 709fb3
 */
Packit 709fb3
Packit 709fb3
/* Specification: RFC 2781 */
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 709fb3
{
Packit 709fb3
  if (n >= 2)
Packit 709fb3
    {
Packit 709fb3
      ucs4_t wc = s[0] + (s[1] << 8);
Packit 709fb3
      if (wc >= 0xd800 && wc < 0xdc00)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 4)
Packit 709fb3
            {
Packit 709fb3
              ucs4_t wc2 = s[2] + (s[3] << 8);
Packit 709fb3
              if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
Packit 709fb3
                return RET_ILSEQ;
Packit 709fb3
              *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
Packit 709fb3
              return 4;
Packit 709fb3
            }
Packit 709fb3
        }
Packit 709fb3
      else if (wc >= 0xdc00 && wc < 0xe000)
Packit 709fb3
        {
Packit 709fb3
          return RET_ILSEQ;
Packit 709fb3
        }
Packit 709fb3
      else
Packit 709fb3
        {
Packit 709fb3
          *pwc = wc;
Packit 709fb3
          return 2;
Packit 709fb3
        }
Packit 709fb3
    }
Packit 709fb3
  return RET_TOOFEW;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 709fb3
{
Packit 709fb3
  if (!(wc >= 0xd800 && wc < 0xe000))
Packit 709fb3
    {
Packit 709fb3
      if (wc < 0x10000)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 2)
Packit 709fb3
            {
Packit 709fb3
              r[0] = (unsigned char) wc;
Packit 709fb3
              r[1] = (unsigned char) (wc >> 8);
Packit 709fb3
              return 2;
Packit 709fb3
            }
Packit 709fb3
          else
Packit 709fb3
            return RET_TOOSMALL;
Packit 709fb3
        }
Packit 709fb3
      else if (wc < 0x110000)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 4)
Packit 709fb3
            {
Packit 709fb3
              ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
Packit 709fb3
              ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
Packit 709fb3
              r[0] = (unsigned char) wc1;
Packit 709fb3
              r[1] = (unsigned char) (wc1 >> 8);
Packit 709fb3
              r[2] = (unsigned char) wc2;
Packit 709fb3
              r[3] = (unsigned char) (wc2 >> 8);
Packit 709fb3
              return 4;
Packit 709fb3
            }
Packit 709fb3
          else
Packit 709fb3
            return RET_TOOSMALL;
Packit 709fb3
        }
Packit 709fb3
    }
Packit 709fb3
  return RET_ILUNI;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/*
Packit 709fb3
 * UTF-32BE
Packit 709fb3
 */
Packit 709fb3
Packit 709fb3
/* Specification: Unicode 3.1 Standard Annex #19 */
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 709fb3
{
Packit 709fb3
  if (n >= 4)
Packit 709fb3
    {
Packit 709fb3
      ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
Packit 709fb3
      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 709fb3
        {
Packit 709fb3
          *pwc = wc;
Packit 709fb3
          return 4;
Packit 709fb3
        }
Packit 709fb3
      else
Packit 709fb3
        return RET_ILSEQ;
Packit 709fb3
    }
Packit 709fb3
  return RET_TOOFEW;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 709fb3
{
Packit 709fb3
  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 709fb3
    {
Packit 709fb3
      if (n >= 4)
Packit 709fb3
        {
Packit 709fb3
          r[0] = 0;
Packit 709fb3
          r[1] = (unsigned char) (wc >> 16);
Packit 709fb3
          r[2] = (unsigned char) (wc >> 8);
Packit 709fb3
          r[3] = (unsigned char) wc;
Packit 709fb3
          return 4;
Packit 709fb3
        }
Packit 709fb3
      else
Packit 709fb3
        return RET_TOOSMALL;
Packit 709fb3
    }
Packit 709fb3
  return RET_ILUNI;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/*
Packit 709fb3
 * UTF-32LE
Packit 709fb3
 */
Packit 709fb3
Packit 709fb3
/* Specification: Unicode 3.1 Standard Annex #19 */
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 709fb3
{
Packit 709fb3
  if (n >= 4)
Packit 709fb3
    {
Packit 709fb3
      ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
Packit 709fb3
      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 709fb3
        {
Packit 709fb3
          *pwc = wc;
Packit 709fb3
          return 4;
Packit 709fb3
        }
Packit 709fb3
      else
Packit 709fb3
        return RET_ILSEQ;
Packit 709fb3
    }
Packit 709fb3
  return RET_TOOFEW;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 709fb3
{
Packit 709fb3
  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 709fb3
    {
Packit 709fb3
      if (n >= 4)
Packit 709fb3
        {
Packit 709fb3
          r[0] = (unsigned char) wc;
Packit 709fb3
          r[1] = (unsigned char) (wc >> 8);
Packit 709fb3
          r[2] = (unsigned char) (wc >> 16);
Packit 709fb3
          r[3] = 0;
Packit 709fb3
          return 4;
Packit 709fb3
        }
Packit 709fb3
      else
Packit 709fb3
        return RET_TOOSMALL;
Packit 709fb3
    }
Packit 709fb3
  return RET_ILUNI;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
size_t
Packit 709fb3
rpl_iconv (iconv_t cd,
Packit 709fb3
           ICONV_CONST char **inbuf, size_t *inbytesleft,
Packit 709fb3
           char **outbuf, size_t *outbytesleft)
Packit 709fb3
#undef iconv
Packit 709fb3
{
Packit 709fb3
#if REPLACE_ICONV_UTF
Packit 709fb3
  switch ((uintptr_t) cd)
Packit 709fb3
    {
Packit 709fb3
      {
Packit 709fb3
        int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
Packit 709fb3
Packit 709fb3
        case (uintptr_t) _ICONV_UTF8_UTF16BE:
Packit 709fb3
          xxx_wctomb = utf16be_wctomb;
Packit 709fb3
          goto loop_from_utf8;
Packit 709fb3
        case (uintptr_t) _ICONV_UTF8_UTF16LE:
Packit 709fb3
          xxx_wctomb = utf16le_wctomb;
Packit 709fb3
          goto loop_from_utf8;
Packit 709fb3
        case (uintptr_t) _ICONV_UTF8_UTF32BE:
Packit 709fb3
          xxx_wctomb = utf32be_wctomb;
Packit 709fb3
          goto loop_from_utf8;
Packit 709fb3
        case (uintptr_t) _ICONV_UTF8_UTF32LE:
Packit 709fb3
          xxx_wctomb = utf32le_wctomb;
Packit 709fb3
          goto loop_from_utf8;
Packit 709fb3
Packit 709fb3
       loop_from_utf8:
Packit 709fb3
        if (inbuf == NULL || *inbuf == NULL)
Packit 709fb3
          return 0;
Packit 709fb3
        {
Packit 709fb3
          ICONV_CONST char *inptr = *inbuf;
Packit 709fb3
          size_t inleft = *inbytesleft;
Packit 709fb3
          char *outptr = *outbuf;
Packit 709fb3
          size_t outleft = *outbytesleft;
Packit 709fb3
          size_t res = 0;
Packit 709fb3
          while (inleft > 0)
Packit 709fb3
            {
Packit 709fb3
              ucs4_t uc;
Packit 709fb3
              int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
Packit 709fb3
              if (m <= 0)
Packit 709fb3
                {
Packit 709fb3
                  if (m == -1)
Packit 709fb3
                    {
Packit 709fb3
                      errno = EILSEQ;
Packit 709fb3
                      res = (size_t)(-1);
Packit 709fb3
                      break;
Packit 709fb3
                    }
Packit 709fb3
                  if (m == -2)
Packit 709fb3
                    {
Packit 709fb3
                      errno = EINVAL;
Packit 709fb3
                      res = (size_t)(-1);
Packit 709fb3
                      break;
Packit 709fb3
                    }
Packit 709fb3
                  abort ();
Packit 709fb3
                }
Packit 709fb3
              else
Packit 709fb3
                {
Packit 709fb3
                  int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
Packit 709fb3
                  if (n < 0)
Packit 709fb3
                    {
Packit 709fb3
                      if (n == RET_ILUNI)
Packit 709fb3
                        {
Packit 709fb3
                          errno = EILSEQ;
Packit 709fb3
                          res = (size_t)(-1);
Packit 709fb3
                          break;
Packit 709fb3
                        }
Packit 709fb3
                      if (n == RET_TOOSMALL)
Packit 709fb3
                        {
Packit 709fb3
                          errno = E2BIG;
Packit 709fb3
                          res = (size_t)(-1);
Packit 709fb3
                          break;
Packit 709fb3
                        }
Packit 709fb3
                      abort ();
Packit 709fb3
                    }
Packit 709fb3
                  else
Packit 709fb3
                    {
Packit 709fb3
                      inptr += m;
Packit 709fb3
                      inleft -= m;
Packit 709fb3
                      outptr += n;
Packit 709fb3
                      outleft -= n;
Packit 709fb3
                    }
Packit 709fb3
                }
Packit 709fb3
            }
Packit 709fb3
          *inbuf = inptr;
Packit 709fb3
          *inbytesleft = inleft;
Packit 709fb3
          *outbuf = outptr;
Packit 709fb3
          *outbytesleft = outleft;
Packit 709fb3
          return res;
Packit 709fb3
        }
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
      {
Packit 709fb3
        int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
Packit 709fb3
Packit 709fb3
        case (uintptr_t) _ICONV_UTF16BE_UTF8:
Packit 709fb3
          xxx_mbtowc = utf16be_mbtowc;
Packit 709fb3
          goto loop_to_utf8;
Packit 709fb3
        case (uintptr_t) _ICONV_UTF16LE_UTF8:
Packit 709fb3
          xxx_mbtowc = utf16le_mbtowc;
Packit 709fb3
          goto loop_to_utf8;
Packit 709fb3
        case (uintptr_t) _ICONV_UTF32BE_UTF8:
Packit 709fb3
          xxx_mbtowc = utf32be_mbtowc;
Packit 709fb3
          goto loop_to_utf8;
Packit 709fb3
        case (uintptr_t) _ICONV_UTF32LE_UTF8:
Packit 709fb3
          xxx_mbtowc = utf32le_mbtowc;
Packit 709fb3
          goto loop_to_utf8;
Packit 709fb3
Packit 709fb3
       loop_to_utf8:
Packit 709fb3
        if (inbuf == NULL || *inbuf == NULL)
Packit 709fb3
          return 0;
Packit 709fb3
        {
Packit 709fb3
          ICONV_CONST char *inptr = *inbuf;
Packit 709fb3
          size_t inleft = *inbytesleft;
Packit 709fb3
          char *outptr = *outbuf;
Packit 709fb3
          size_t outleft = *outbytesleft;
Packit 709fb3
          size_t res = 0;
Packit 709fb3
          while (inleft > 0)
Packit 709fb3
            {
Packit 709fb3
              ucs4_t uc;
Packit 709fb3
              int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
Packit 709fb3
              if (m <= 0)
Packit 709fb3
                {
Packit 709fb3
                  if (m == RET_ILSEQ)
Packit 709fb3
                    {
Packit 709fb3
                      errno = EILSEQ;
Packit 709fb3
                      res = (size_t)(-1);
Packit 709fb3
                      break;
Packit 709fb3
                    }
Packit 709fb3
                  if (m == RET_TOOFEW)
Packit 709fb3
                    {
Packit 709fb3
                      errno = EINVAL;
Packit 709fb3
                      res = (size_t)(-1);
Packit 709fb3
                      break;
Packit 709fb3
                    }
Packit 709fb3
                  abort ();
Packit 709fb3
                }
Packit 709fb3
              else
Packit 709fb3
                {
Packit 709fb3
                  int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
Packit 709fb3
                  if (n < 0)
Packit 709fb3
                    {
Packit 709fb3
                      if (n == -1)
Packit 709fb3
                        {
Packit 709fb3
                          errno = EILSEQ;
Packit 709fb3
                          res = (size_t)(-1);
Packit 709fb3
                          break;
Packit 709fb3
                        }
Packit 709fb3
                      if (n == -2)
Packit 709fb3
                        {
Packit 709fb3
                          errno = E2BIG;
Packit 709fb3
                          res = (size_t)(-1);
Packit 709fb3
                          break;
Packit 709fb3
                        }
Packit 709fb3
                      abort ();
Packit 709fb3
                    }
Packit 709fb3
                  else
Packit 709fb3
                    {
Packit 709fb3
                      inptr += m;
Packit 709fb3
                      inleft -= m;
Packit 709fb3
                      outptr += n;
Packit 709fb3
                      outleft -= n;
Packit 709fb3
                    }
Packit 709fb3
                }
Packit 709fb3
            }
Packit 709fb3
          *inbuf = inptr;
Packit 709fb3
          *inbytesleft = inleft;
Packit 709fb3
          *outbuf = outptr;
Packit 709fb3
          *outbytesleft = outleft;
Packit 709fb3
          return res;
Packit 709fb3
        }
Packit 709fb3
      }
Packit 709fb3
    }
Packit 709fb3
#endif
Packit 709fb3
  return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
Packit 709fb3
}