Blame lib/iconv.c

Packit 8f70b4
/* Character set conversion.
Packit 8f70b4
   Copyright (C) 1999-2001, 2007, 2009-2018 Free Software Foundation, Inc.
Packit 8f70b4
Packit 8f70b4
   This program is free software; you can redistribute it and/or modify
Packit 8f70b4
   it under the terms of the GNU General Public License as published by
Packit 8f70b4
   the Free Software Foundation; either version 3, or (at your option)
Packit 8f70b4
   any later version.
Packit 8f70b4
Packit 8f70b4
   This program is distributed in the hope that it will be useful,
Packit 8f70b4
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 8f70b4
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 8f70b4
   GNU General Public License for more details.
Packit 8f70b4
Packit 8f70b4
   You should have received a copy of the GNU General Public License along
Packit 8f70b4
   with this program; if not, see <https://www.gnu.org/licenses/>.  */
Packit 8f70b4
Packit 8f70b4
#include <config.h>
Packit 8f70b4
Packit 8f70b4
/* Specification.  */
Packit 8f70b4
#include <iconv.h>
Packit 8f70b4
Packit 8f70b4
#include <stddef.h>
Packit 8f70b4
Packit 8f70b4
#if REPLACE_ICONV_UTF
Packit 8f70b4
# include <errno.h>
Packit 8f70b4
# include <stdint.h>
Packit 8f70b4
# include <stdlib.h>
Packit 8f70b4
# include "unistr.h"
Packit 8f70b4
# ifndef uintptr_t
Packit 8f70b4
#  define uintptr_t unsigned long
Packit 8f70b4
# endif
Packit 8f70b4
#endif
Packit 8f70b4
Packit 8f70b4
#if REPLACE_ICONV_UTF
Packit 8f70b4
Packit 8f70b4
/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11.  */
Packit 8f70b4
Packit 8f70b4
/* Return code if invalid. (xxx_mbtowc) */
Packit 8f70b4
# define RET_ILSEQ      -1
Packit 8f70b4
/* Return code if no bytes were read. (xxx_mbtowc) */
Packit 8f70b4
# define RET_TOOFEW     -2
Packit 8f70b4
Packit 8f70b4
/* Return code if invalid. (xxx_wctomb) */
Packit 8f70b4
# define RET_ILUNI      -1
Packit 8f70b4
/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
Packit 8f70b4
# define RET_TOOSMALL   -2
Packit 8f70b4
Packit 8f70b4
/*
Packit 8f70b4
 * UTF-16BE
Packit 8f70b4
 */
Packit 8f70b4
Packit 8f70b4
/* Specification: RFC 2781 */
Packit 8f70b4
Packit 8f70b4
static int
Packit 8f70b4
utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  if (n >= 2)
Packit 8f70b4
    {
Packit 8f70b4
      ucs4_t wc = (s[0] << 8) + s[1];
Packit 8f70b4
      if (wc >= 0xd800 && wc < 0xdc00)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 4)
Packit 8f70b4
            {
Packit 8f70b4
              ucs4_t wc2 = (s[2] << 8) + s[3];
Packit 8f70b4
              if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
Packit 8f70b4
                return RET_ILSEQ;
Packit 8f70b4
              *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
Packit 8f70b4
              return 4;
Packit 8f70b4
            }
Packit 8f70b4
        }
Packit 8f70b4
      else if (wc >= 0xdc00 && wc < 0xe000)
Packit 8f70b4
        {
Packit 8f70b4
          return RET_ILSEQ;
Packit 8f70b4
        }
Packit 8f70b4
      else
Packit 8f70b4
        {
Packit 8f70b4
          *pwc = wc;
Packit 8f70b4
          return 2;
Packit 8f70b4
        }
Packit 8f70b4
    }
Packit 8f70b4
  return RET_TOOFEW;
Packit 8f70b4
}
Packit 8f70b4
Packit 8f70b4
static int
Packit 8f70b4
utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  if (!(wc >= 0xd800 && wc < 0xe000))
Packit 8f70b4
    {
Packit 8f70b4
      if (wc < 0x10000)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 2)
Packit 8f70b4
            {
Packit 8f70b4
              r[0] = (unsigned char) (wc >> 8);
Packit 8f70b4
              r[1] = (unsigned char) wc;
Packit 8f70b4
              return 2;
Packit 8f70b4
            }
Packit 8f70b4
          else
Packit 8f70b4
            return RET_TOOSMALL;
Packit 8f70b4
        }
Packit 8f70b4
      else if (wc < 0x110000)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 4)
Packit 8f70b4
            {
Packit 8f70b4
              ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
Packit 8f70b4
              ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
Packit 8f70b4
              r[0] = (unsigned char) (wc1 >> 8);
Packit 8f70b4
              r[1] = (unsigned char) wc1;
Packit 8f70b4
              r[2] = (unsigned char) (wc2 >> 8);
Packit 8f70b4
              r[3] = (unsigned char) wc2;
Packit 8f70b4
              return 4;
Packit 8f70b4
            }
Packit 8f70b4
          else
Packit 8f70b4
            return RET_TOOSMALL;
Packit 8f70b4
        }
Packit 8f70b4
    }
Packit 8f70b4
  return RET_ILUNI;
Packit 8f70b4
}
Packit 8f70b4
Packit 8f70b4
/*
Packit 8f70b4
 * UTF-16LE
Packit 8f70b4
 */
Packit 8f70b4
Packit 8f70b4
/* Specification: RFC 2781 */
Packit 8f70b4
Packit 8f70b4
static int
Packit 8f70b4
utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  if (n >= 2)
Packit 8f70b4
    {
Packit 8f70b4
      ucs4_t wc = s[0] + (s[1] << 8);
Packit 8f70b4
      if (wc >= 0xd800 && wc < 0xdc00)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 4)
Packit 8f70b4
            {
Packit 8f70b4
              ucs4_t wc2 = s[2] + (s[3] << 8);
Packit 8f70b4
              if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
Packit 8f70b4
                return RET_ILSEQ;
Packit 8f70b4
              *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
Packit 8f70b4
              return 4;
Packit 8f70b4
            }
Packit 8f70b4
        }
Packit 8f70b4
      else if (wc >= 0xdc00 && wc < 0xe000)
Packit 8f70b4
        {
Packit 8f70b4
          return RET_ILSEQ;
Packit 8f70b4
        }
Packit 8f70b4
      else
Packit 8f70b4
        {
Packit 8f70b4
          *pwc = wc;
Packit 8f70b4
          return 2;
Packit 8f70b4
        }
Packit 8f70b4
    }
Packit 8f70b4
  return RET_TOOFEW;
Packit 8f70b4
}
Packit 8f70b4
Packit 8f70b4
static int
Packit 8f70b4
utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  if (!(wc >= 0xd800 && wc < 0xe000))
Packit 8f70b4
    {
Packit 8f70b4
      if (wc < 0x10000)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 2)
Packit 8f70b4
            {
Packit 8f70b4
              r[0] = (unsigned char) wc;
Packit 8f70b4
              r[1] = (unsigned char) (wc >> 8);
Packit 8f70b4
              return 2;
Packit 8f70b4
            }
Packit 8f70b4
          else
Packit 8f70b4
            return RET_TOOSMALL;
Packit 8f70b4
        }
Packit 8f70b4
      else if (wc < 0x110000)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 4)
Packit 8f70b4
            {
Packit 8f70b4
              ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
Packit 8f70b4
              ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
Packit 8f70b4
              r[0] = (unsigned char) wc1;
Packit 8f70b4
              r[1] = (unsigned char) (wc1 >> 8);
Packit 8f70b4
              r[2] = (unsigned char) wc2;
Packit 8f70b4
              r[3] = (unsigned char) (wc2 >> 8);
Packit 8f70b4
              return 4;
Packit 8f70b4
            }
Packit 8f70b4
          else
Packit 8f70b4
            return RET_TOOSMALL;
Packit 8f70b4
        }
Packit 8f70b4
    }
Packit 8f70b4
  return RET_ILUNI;
Packit 8f70b4
}
Packit 8f70b4
Packit 8f70b4
/*
Packit 8f70b4
 * UTF-32BE
Packit 8f70b4
 */
Packit 8f70b4
Packit 8f70b4
/* Specification: Unicode 3.1 Standard Annex #19 */
Packit 8f70b4
Packit 8f70b4
static int
Packit 8f70b4
utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  if (n >= 4)
Packit 8f70b4
    {
Packit 8f70b4
      ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
Packit 8f70b4
      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 8f70b4
        {
Packit 8f70b4
          *pwc = wc;
Packit 8f70b4
          return 4;
Packit 8f70b4
        }
Packit 8f70b4
      else
Packit 8f70b4
        return RET_ILSEQ;
Packit 8f70b4
    }
Packit 8f70b4
  return RET_TOOFEW;
Packit 8f70b4
}
Packit 8f70b4
Packit 8f70b4
static int
Packit 8f70b4
utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 8f70b4
    {
Packit 8f70b4
      if (n >= 4)
Packit 8f70b4
        {
Packit 8f70b4
          r[0] = 0;
Packit 8f70b4
          r[1] = (unsigned char) (wc >> 16);
Packit 8f70b4
          r[2] = (unsigned char) (wc >> 8);
Packit 8f70b4
          r[3] = (unsigned char) wc;
Packit 8f70b4
          return 4;
Packit 8f70b4
        }
Packit 8f70b4
      else
Packit 8f70b4
        return RET_TOOSMALL;
Packit 8f70b4
    }
Packit 8f70b4
  return RET_ILUNI;
Packit 8f70b4
}
Packit 8f70b4
Packit 8f70b4
/*
Packit 8f70b4
 * UTF-32LE
Packit 8f70b4
 */
Packit 8f70b4
Packit 8f70b4
/* Specification: Unicode 3.1 Standard Annex #19 */
Packit 8f70b4
Packit 8f70b4
static int
Packit 8f70b4
utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  if (n >= 4)
Packit 8f70b4
    {
Packit 8f70b4
      ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
Packit 8f70b4
      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 8f70b4
        {
Packit 8f70b4
          *pwc = wc;
Packit 8f70b4
          return 4;
Packit 8f70b4
        }
Packit 8f70b4
      else
Packit 8f70b4
        return RET_ILSEQ;
Packit 8f70b4
    }
Packit 8f70b4
  return RET_TOOFEW;
Packit 8f70b4
}
Packit 8f70b4
Packit 8f70b4
static int
Packit 8f70b4
utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 8f70b4
    {
Packit 8f70b4
      if (n >= 4)
Packit 8f70b4
        {
Packit 8f70b4
          r[0] = (unsigned char) wc;
Packit 8f70b4
          r[1] = (unsigned char) (wc >> 8);
Packit 8f70b4
          r[2] = (unsigned char) (wc >> 16);
Packit 8f70b4
          r[3] = 0;
Packit 8f70b4
          return 4;
Packit 8f70b4
        }
Packit 8f70b4
      else
Packit 8f70b4
        return RET_TOOSMALL;
Packit 8f70b4
    }
Packit 8f70b4
  return RET_ILUNI;
Packit 8f70b4
}
Packit 8f70b4
Packit 8f70b4
#endif
Packit 8f70b4
Packit 8f70b4
size_t
Packit 8f70b4
rpl_iconv (iconv_t cd,
Packit 8f70b4
           ICONV_CONST char **inbuf, size_t *inbytesleft,
Packit 8f70b4
           char **outbuf, size_t *outbytesleft)
Packit 8f70b4
#undef iconv
Packit 8f70b4
{
Packit 8f70b4
#if REPLACE_ICONV_UTF
Packit 8f70b4
  switch ((uintptr_t) cd)
Packit 8f70b4
    {
Packit 8f70b4
      {
Packit 8f70b4
        int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
Packit 8f70b4
Packit 8f70b4
        case (uintptr_t) _ICONV_UTF8_UTF16BE:
Packit 8f70b4
          xxx_wctomb = utf16be_wctomb;
Packit 8f70b4
          goto loop_from_utf8;
Packit 8f70b4
        case (uintptr_t) _ICONV_UTF8_UTF16LE:
Packit 8f70b4
          xxx_wctomb = utf16le_wctomb;
Packit 8f70b4
          goto loop_from_utf8;
Packit 8f70b4
        case (uintptr_t) _ICONV_UTF8_UTF32BE:
Packit 8f70b4
          xxx_wctomb = utf32be_wctomb;
Packit 8f70b4
          goto loop_from_utf8;
Packit 8f70b4
        case (uintptr_t) _ICONV_UTF8_UTF32LE:
Packit 8f70b4
          xxx_wctomb = utf32le_wctomb;
Packit 8f70b4
          goto loop_from_utf8;
Packit 8f70b4
Packit 8f70b4
       loop_from_utf8:
Packit 8f70b4
        if (inbuf == NULL || *inbuf == NULL)
Packit 8f70b4
          return 0;
Packit 8f70b4
        {
Packit 8f70b4
          ICONV_CONST char *inptr = *inbuf;
Packit 8f70b4
          size_t inleft = *inbytesleft;
Packit 8f70b4
          char *outptr = *outbuf;
Packit 8f70b4
          size_t outleft = *outbytesleft;
Packit 8f70b4
          size_t res = 0;
Packit 8f70b4
          while (inleft > 0)
Packit 8f70b4
            {
Packit 8f70b4
              ucs4_t uc;
Packit 8f70b4
              int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
Packit 8f70b4
              if (m <= 0)
Packit 8f70b4
                {
Packit 8f70b4
                  if (m == -1)
Packit 8f70b4
                    {
Packit 8f70b4
                      errno = EILSEQ;
Packit 8f70b4
                      res = (size_t)(-1);
Packit 8f70b4
                      break;
Packit 8f70b4
                    }
Packit 8f70b4
                  if (m == -2)
Packit 8f70b4
                    {
Packit 8f70b4
                      errno = EINVAL;
Packit 8f70b4
                      res = (size_t)(-1);
Packit 8f70b4
                      break;
Packit 8f70b4
                    }
Packit 8f70b4
                  abort ();
Packit 8f70b4
                }
Packit 8f70b4
              else
Packit 8f70b4
                {
Packit 8f70b4
                  int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
Packit 8f70b4
                  if (n < 0)
Packit 8f70b4
                    {
Packit 8f70b4
                      if (n == RET_ILUNI)
Packit 8f70b4
                        {
Packit 8f70b4
                          errno = EILSEQ;
Packit 8f70b4
                          res = (size_t)(-1);
Packit 8f70b4
                          break;
Packit 8f70b4
                        }
Packit 8f70b4
                      if (n == RET_TOOSMALL)
Packit 8f70b4
                        {
Packit 8f70b4
                          errno = E2BIG;
Packit 8f70b4
                          res = (size_t)(-1);
Packit 8f70b4
                          break;
Packit 8f70b4
                        }
Packit 8f70b4
                      abort ();
Packit 8f70b4
                    }
Packit 8f70b4
                  else
Packit 8f70b4
                    {
Packit 8f70b4
                      inptr += m;
Packit 8f70b4
                      inleft -= m;
Packit 8f70b4
                      outptr += n;
Packit 8f70b4
                      outleft -= n;
Packit 8f70b4
                    }
Packit 8f70b4
                }
Packit 8f70b4
            }
Packit 8f70b4
          *inbuf = inptr;
Packit 8f70b4
          *inbytesleft = inleft;
Packit 8f70b4
          *outbuf = outptr;
Packit 8f70b4
          *outbytesleft = outleft;
Packit 8f70b4
          return res;
Packit 8f70b4
        }
Packit 8f70b4
      }
Packit 8f70b4
Packit 8f70b4
      {
Packit 8f70b4
        int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
Packit 8f70b4
Packit 8f70b4
        case (uintptr_t) _ICONV_UTF16BE_UTF8:
Packit 8f70b4
          xxx_mbtowc = utf16be_mbtowc;
Packit 8f70b4
          goto loop_to_utf8;
Packit 8f70b4
        case (uintptr_t) _ICONV_UTF16LE_UTF8:
Packit 8f70b4
          xxx_mbtowc = utf16le_mbtowc;
Packit 8f70b4
          goto loop_to_utf8;
Packit 8f70b4
        case (uintptr_t) _ICONV_UTF32BE_UTF8:
Packit 8f70b4
          xxx_mbtowc = utf32be_mbtowc;
Packit 8f70b4
          goto loop_to_utf8;
Packit 8f70b4
        case (uintptr_t) _ICONV_UTF32LE_UTF8:
Packit 8f70b4
          xxx_mbtowc = utf32le_mbtowc;
Packit 8f70b4
          goto loop_to_utf8;
Packit 8f70b4
Packit 8f70b4
       loop_to_utf8:
Packit 8f70b4
        if (inbuf == NULL || *inbuf == NULL)
Packit 8f70b4
          return 0;
Packit 8f70b4
        {
Packit 8f70b4
          ICONV_CONST char *inptr = *inbuf;
Packit 8f70b4
          size_t inleft = *inbytesleft;
Packit 8f70b4
          char *outptr = *outbuf;
Packit 8f70b4
          size_t outleft = *outbytesleft;
Packit 8f70b4
          size_t res = 0;
Packit 8f70b4
          while (inleft > 0)
Packit 8f70b4
            {
Packit 8f70b4
              ucs4_t uc;
Packit 8f70b4
              int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
Packit 8f70b4
              if (m <= 0)
Packit 8f70b4
                {
Packit 8f70b4
                  if (m == RET_ILSEQ)
Packit 8f70b4
                    {
Packit 8f70b4
                      errno = EILSEQ;
Packit 8f70b4
                      res = (size_t)(-1);
Packit 8f70b4
                      break;
Packit 8f70b4
                    }
Packit 8f70b4
                  if (m == RET_TOOFEW)
Packit 8f70b4
                    {
Packit 8f70b4
                      errno = EINVAL;
Packit 8f70b4
                      res = (size_t)(-1);
Packit 8f70b4
                      break;
Packit 8f70b4
                    }
Packit 8f70b4
                  abort ();
Packit 8f70b4
                }
Packit 8f70b4
              else
Packit 8f70b4
                {
Packit 8f70b4
                  int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
Packit 8f70b4
                  if (n < 0)
Packit 8f70b4
                    {
Packit 8f70b4
                      if (n == -1)
Packit 8f70b4
                        {
Packit 8f70b4
                          errno = EILSEQ;
Packit 8f70b4
                          res = (size_t)(-1);
Packit 8f70b4
                          break;
Packit 8f70b4
                        }
Packit 8f70b4
                      if (n == -2)
Packit 8f70b4
                        {
Packit 8f70b4
                          errno = E2BIG;
Packit 8f70b4
                          res = (size_t)(-1);
Packit 8f70b4
                          break;
Packit 8f70b4
                        }
Packit 8f70b4
                      abort ();
Packit 8f70b4
                    }
Packit 8f70b4
                  else
Packit 8f70b4
                    {
Packit 8f70b4
                      inptr += m;
Packit 8f70b4
                      inleft -= m;
Packit 8f70b4
                      outptr += n;
Packit 8f70b4
                      outleft -= n;
Packit 8f70b4
                    }
Packit 8f70b4
                }
Packit 8f70b4
            }
Packit 8f70b4
          *inbuf = inptr;
Packit 8f70b4
          *inbytesleft = inleft;
Packit 8f70b4
          *outbuf = outptr;
Packit 8f70b4
          *outbytesleft = outleft;
Packit 8f70b4
          return res;
Packit 8f70b4
        }
Packit 8f70b4
      }
Packit 8f70b4
    }
Packit 8f70b4
#endif
Packit 8f70b4
  return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
Packit 8f70b4
}