Blame lib/iconv.c

Packit Service a2489d
/* Character set conversion.
Packit Service a2489d
   Copyright (C) 1999-2001, 2007, 2009-2018 Free Software Foundation, Inc.
Packit Service a2489d
Packit Service a2489d
   This program is free software; you can redistribute it and/or modify
Packit Service a2489d
   it under the terms of the GNU General Public License as published by
Packit Service a2489d
   the Free Software Foundation; either version 3, or (at your option)
Packit Service a2489d
   any later version.
Packit Service a2489d
Packit Service a2489d
   This program is distributed in the hope that it will be useful,
Packit Service a2489d
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service a2489d
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service a2489d
   GNU General Public License for more details.
Packit Service a2489d
Packit Service a2489d
   You should have received a copy of the GNU General Public License along
Packit Service a2489d
   with this program; if not, see <https://www.gnu.org/licenses/>.  */
Packit Service a2489d
Packit Service a2489d
#include <config.h>
Packit Service a2489d
Packit Service a2489d
/* Specification.  */
Packit Service a2489d
#include <iconv.h>
Packit Service a2489d
Packit Service a2489d
#include <stddef.h>
Packit Service a2489d
Packit Service a2489d
#if REPLACE_ICONV_UTF
Packit Service a2489d
# include <errno.h>
Packit Service a2489d
# include <stdint.h>
Packit Service a2489d
# include <stdlib.h>
Packit Service a2489d
# include "unistr.h"
Packit Service a2489d
# ifndef uintptr_t
Packit Service a2489d
#  define uintptr_t unsigned long
Packit Service a2489d
# endif
Packit Service a2489d
#endif
Packit Service a2489d
Packit Service a2489d
#if REPLACE_ICONV_UTF
Packit Service a2489d
Packit Service a2489d
/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11.  */
Packit Service a2489d
Packit Service a2489d
/* Return code if invalid. (xxx_mbtowc) */
Packit Service a2489d
# define RET_ILSEQ      -1
Packit Service a2489d
/* Return code if no bytes were read. (xxx_mbtowc) */
Packit Service a2489d
# define RET_TOOFEW     -2
Packit Service a2489d
Packit Service a2489d
/* Return code if invalid. (xxx_wctomb) */
Packit Service a2489d
# define RET_ILUNI      -1
Packit Service a2489d
/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
Packit Service a2489d
# define RET_TOOSMALL   -2
Packit Service a2489d
Packit Service a2489d
/*
Packit Service a2489d
 * UTF-16BE
Packit Service a2489d
 */
Packit Service a2489d
Packit Service a2489d
/* Specification: RFC 2781 */
Packit Service a2489d
Packit Service a2489d
static int
Packit Service a2489d
utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  if (n >= 2)
Packit Service a2489d
    {
Packit Service a2489d
      ucs4_t wc = (s[0] << 8) + s[1];
Packit Service a2489d
      if (wc >= 0xd800 && wc < 0xdc00)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 4)
Packit Service a2489d
            {
Packit Service a2489d
              ucs4_t wc2 = (s[2] << 8) + s[3];
Packit Service a2489d
              if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
Packit Service a2489d
                return RET_ILSEQ;
Packit Service a2489d
              *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
Packit Service a2489d
              return 4;
Packit Service a2489d
            }
Packit Service a2489d
        }
Packit Service a2489d
      else if (wc >= 0xdc00 && wc < 0xe000)
Packit Service a2489d
        {
Packit Service a2489d
          return RET_ILSEQ;
Packit Service a2489d
        }
Packit Service a2489d
      else
Packit Service a2489d
        {
Packit Service a2489d
          *pwc = wc;
Packit Service a2489d
          return 2;
Packit Service a2489d
        }
Packit Service a2489d
    }
Packit Service a2489d
  return RET_TOOFEW;
Packit Service a2489d
}
Packit Service a2489d
Packit Service a2489d
static int
Packit Service a2489d
utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  if (!(wc >= 0xd800 && wc < 0xe000))
Packit Service a2489d
    {
Packit Service a2489d
      if (wc < 0x10000)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 2)
Packit Service a2489d
            {
Packit Service a2489d
              r[0] = (unsigned char) (wc >> 8);
Packit Service a2489d
              r[1] = (unsigned char) wc;
Packit Service a2489d
              return 2;
Packit Service a2489d
            }
Packit Service a2489d
          else
Packit Service a2489d
            return RET_TOOSMALL;
Packit Service a2489d
        }
Packit Service a2489d
      else if (wc < 0x110000)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 4)
Packit Service a2489d
            {
Packit Service a2489d
              ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
Packit Service a2489d
              ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
Packit Service a2489d
              r[0] = (unsigned char) (wc1 >> 8);
Packit Service a2489d
              r[1] = (unsigned char) wc1;
Packit Service a2489d
              r[2] = (unsigned char) (wc2 >> 8);
Packit Service a2489d
              r[3] = (unsigned char) wc2;
Packit Service a2489d
              return 4;
Packit Service a2489d
            }
Packit Service a2489d
          else
Packit Service a2489d
            return RET_TOOSMALL;
Packit Service a2489d
        }
Packit Service a2489d
    }
Packit Service a2489d
  return RET_ILUNI;
Packit Service a2489d
}
Packit Service a2489d
Packit Service a2489d
/*
Packit Service a2489d
 * UTF-16LE
Packit Service a2489d
 */
Packit Service a2489d
Packit Service a2489d
/* Specification: RFC 2781 */
Packit Service a2489d
Packit Service a2489d
static int
Packit Service a2489d
utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  if (n >= 2)
Packit Service a2489d
    {
Packit Service a2489d
      ucs4_t wc = s[0] + (s[1] << 8);
Packit Service a2489d
      if (wc >= 0xd800 && wc < 0xdc00)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 4)
Packit Service a2489d
            {
Packit Service a2489d
              ucs4_t wc2 = s[2] + (s[3] << 8);
Packit Service a2489d
              if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
Packit Service a2489d
                return RET_ILSEQ;
Packit Service a2489d
              *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
Packit Service a2489d
              return 4;
Packit Service a2489d
            }
Packit Service a2489d
        }
Packit Service a2489d
      else if (wc >= 0xdc00 && wc < 0xe000)
Packit Service a2489d
        {
Packit Service a2489d
          return RET_ILSEQ;
Packit Service a2489d
        }
Packit Service a2489d
      else
Packit Service a2489d
        {
Packit Service a2489d
          *pwc = wc;
Packit Service a2489d
          return 2;
Packit Service a2489d
        }
Packit Service a2489d
    }
Packit Service a2489d
  return RET_TOOFEW;
Packit Service a2489d
}
Packit Service a2489d
Packit Service a2489d
static int
Packit Service a2489d
utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  if (!(wc >= 0xd800 && wc < 0xe000))
Packit Service a2489d
    {
Packit Service a2489d
      if (wc < 0x10000)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 2)
Packit Service a2489d
            {
Packit Service a2489d
              r[0] = (unsigned char) wc;
Packit Service a2489d
              r[1] = (unsigned char) (wc >> 8);
Packit Service a2489d
              return 2;
Packit Service a2489d
            }
Packit Service a2489d
          else
Packit Service a2489d
            return RET_TOOSMALL;
Packit Service a2489d
        }
Packit Service a2489d
      else if (wc < 0x110000)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 4)
Packit Service a2489d
            {
Packit Service a2489d
              ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
Packit Service a2489d
              ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
Packit Service a2489d
              r[0] = (unsigned char) wc1;
Packit Service a2489d
              r[1] = (unsigned char) (wc1 >> 8);
Packit Service a2489d
              r[2] = (unsigned char) wc2;
Packit Service a2489d
              r[3] = (unsigned char) (wc2 >> 8);
Packit Service a2489d
              return 4;
Packit Service a2489d
            }
Packit Service a2489d
          else
Packit Service a2489d
            return RET_TOOSMALL;
Packit Service a2489d
        }
Packit Service a2489d
    }
Packit Service a2489d
  return RET_ILUNI;
Packit Service a2489d
}
Packit Service a2489d
Packit Service a2489d
/*
Packit Service a2489d
 * UTF-32BE
Packit Service a2489d
 */
Packit Service a2489d
Packit Service a2489d
/* Specification: Unicode 3.1 Standard Annex #19 */
Packit Service a2489d
Packit Service a2489d
static int
Packit Service a2489d
utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  if (n >= 4)
Packit Service a2489d
    {
Packit Service a2489d
      ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
Packit Service a2489d
      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit Service a2489d
        {
Packit Service a2489d
          *pwc = wc;
Packit Service a2489d
          return 4;
Packit Service a2489d
        }
Packit Service a2489d
      else
Packit Service a2489d
        return RET_ILSEQ;
Packit Service a2489d
    }
Packit Service a2489d
  return RET_TOOFEW;
Packit Service a2489d
}
Packit Service a2489d
Packit Service a2489d
static int
Packit Service a2489d
utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit Service a2489d
    {
Packit Service a2489d
      if (n >= 4)
Packit Service a2489d
        {
Packit Service a2489d
          r[0] = 0;
Packit Service a2489d
          r[1] = (unsigned char) (wc >> 16);
Packit Service a2489d
          r[2] = (unsigned char) (wc >> 8);
Packit Service a2489d
          r[3] = (unsigned char) wc;
Packit Service a2489d
          return 4;
Packit Service a2489d
        }
Packit Service a2489d
      else
Packit Service a2489d
        return RET_TOOSMALL;
Packit Service a2489d
    }
Packit Service a2489d
  return RET_ILUNI;
Packit Service a2489d
}
Packit Service a2489d
Packit Service a2489d
/*
Packit Service a2489d
 * UTF-32LE
Packit Service a2489d
 */
Packit Service a2489d
Packit Service a2489d
/* Specification: Unicode 3.1 Standard Annex #19 */
Packit Service a2489d
Packit Service a2489d
static int
Packit Service a2489d
utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  if (n >= 4)
Packit Service a2489d
    {
Packit Service a2489d
      ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
Packit Service a2489d
      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit Service a2489d
        {
Packit Service a2489d
          *pwc = wc;
Packit Service a2489d
          return 4;
Packit Service a2489d
        }
Packit Service a2489d
      else
Packit Service a2489d
        return RET_ILSEQ;
Packit Service a2489d
    }
Packit Service a2489d
  return RET_TOOFEW;
Packit Service a2489d
}
Packit Service a2489d
Packit Service a2489d
static int
Packit Service a2489d
utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit Service a2489d
    {
Packit Service a2489d
      if (n >= 4)
Packit Service a2489d
        {
Packit Service a2489d
          r[0] = (unsigned char) wc;
Packit Service a2489d
          r[1] = (unsigned char) (wc >> 8);
Packit Service a2489d
          r[2] = (unsigned char) (wc >> 16);
Packit Service a2489d
          r[3] = 0;
Packit Service a2489d
          return 4;
Packit Service a2489d
        }
Packit Service a2489d
      else
Packit Service a2489d
        return RET_TOOSMALL;
Packit Service a2489d
    }
Packit Service a2489d
  return RET_ILUNI;
Packit Service a2489d
}
Packit Service a2489d
Packit Service a2489d
#endif
Packit Service a2489d
Packit Service a2489d
size_t
Packit Service a2489d
rpl_iconv (iconv_t cd,
Packit Service a2489d
           ICONV_CONST char **inbuf, size_t *inbytesleft,
Packit Service a2489d
           char **outbuf, size_t *outbytesleft)
Packit Service a2489d
#undef iconv
Packit Service a2489d
{
Packit Service a2489d
#if REPLACE_ICONV_UTF
Packit Service a2489d
  switch ((uintptr_t) cd)
Packit Service a2489d
    {
Packit Service a2489d
      {
Packit Service a2489d
        int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
Packit Service a2489d
Packit Service a2489d
        case (uintptr_t) _ICONV_UTF8_UTF16BE:
Packit Service a2489d
          xxx_wctomb = utf16be_wctomb;
Packit Service a2489d
          goto loop_from_utf8;
Packit Service a2489d
        case (uintptr_t) _ICONV_UTF8_UTF16LE:
Packit Service a2489d
          xxx_wctomb = utf16le_wctomb;
Packit Service a2489d
          goto loop_from_utf8;
Packit Service a2489d
        case (uintptr_t) _ICONV_UTF8_UTF32BE:
Packit Service a2489d
          xxx_wctomb = utf32be_wctomb;
Packit Service a2489d
          goto loop_from_utf8;
Packit Service a2489d
        case (uintptr_t) _ICONV_UTF8_UTF32LE:
Packit Service a2489d
          xxx_wctomb = utf32le_wctomb;
Packit Service a2489d
          goto loop_from_utf8;
Packit Service a2489d
Packit Service a2489d
       loop_from_utf8:
Packit Service a2489d
        if (inbuf == NULL || *inbuf == NULL)
Packit Service a2489d
          return 0;
Packit Service a2489d
        {
Packit Service a2489d
          ICONV_CONST char *inptr = *inbuf;
Packit Service a2489d
          size_t inleft = *inbytesleft;
Packit Service a2489d
          char *outptr = *outbuf;
Packit Service a2489d
          size_t outleft = *outbytesleft;
Packit Service a2489d
          size_t res = 0;
Packit Service a2489d
          while (inleft > 0)
Packit Service a2489d
            {
Packit Service a2489d
              ucs4_t uc;
Packit Service a2489d
              int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
Packit Service a2489d
              if (m <= 0)
Packit Service a2489d
                {
Packit Service a2489d
                  if (m == -1)
Packit Service a2489d
                    {
Packit Service a2489d
                      errno = EILSEQ;
Packit Service a2489d
                      res = (size_t)(-1);
Packit Service a2489d
                      break;
Packit Service a2489d
                    }
Packit Service a2489d
                  if (m == -2)
Packit Service a2489d
                    {
Packit Service a2489d
                      errno = EINVAL;
Packit Service a2489d
                      res = (size_t)(-1);
Packit Service a2489d
                      break;
Packit Service a2489d
                    }
Packit Service a2489d
                  abort ();
Packit Service a2489d
                }
Packit Service a2489d
              else
Packit Service a2489d
                {
Packit Service a2489d
                  int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
Packit Service a2489d
                  if (n < 0)
Packit Service a2489d
                    {
Packit Service a2489d
                      if (n == RET_ILUNI)
Packit Service a2489d
                        {
Packit Service a2489d
                          errno = EILSEQ;
Packit Service a2489d
                          res = (size_t)(-1);
Packit Service a2489d
                          break;
Packit Service a2489d
                        }
Packit Service a2489d
                      if (n == RET_TOOSMALL)
Packit Service a2489d
                        {
Packit Service a2489d
                          errno = E2BIG;
Packit Service a2489d
                          res = (size_t)(-1);
Packit Service a2489d
                          break;
Packit Service a2489d
                        }
Packit Service a2489d
                      abort ();
Packit Service a2489d
                    }
Packit Service a2489d
                  else
Packit Service a2489d
                    {
Packit Service a2489d
                      inptr += m;
Packit Service a2489d
                      inleft -= m;
Packit Service a2489d
                      outptr += n;
Packit Service a2489d
                      outleft -= n;
Packit Service a2489d
                    }
Packit Service a2489d
                }
Packit Service a2489d
            }
Packit Service a2489d
          *inbuf = inptr;
Packit Service a2489d
          *inbytesleft = inleft;
Packit Service a2489d
          *outbuf = outptr;
Packit Service a2489d
          *outbytesleft = outleft;
Packit Service a2489d
          return res;
Packit Service a2489d
        }
Packit Service a2489d
      }
Packit Service a2489d
Packit Service a2489d
      {
Packit Service a2489d
        int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
Packit Service a2489d
Packit Service a2489d
        case (uintptr_t) _ICONV_UTF16BE_UTF8:
Packit Service a2489d
          xxx_mbtowc = utf16be_mbtowc;
Packit Service a2489d
          goto loop_to_utf8;
Packit Service a2489d
        case (uintptr_t) _ICONV_UTF16LE_UTF8:
Packit Service a2489d
          xxx_mbtowc = utf16le_mbtowc;
Packit Service a2489d
          goto loop_to_utf8;
Packit Service a2489d
        case (uintptr_t) _ICONV_UTF32BE_UTF8:
Packit Service a2489d
          xxx_mbtowc = utf32be_mbtowc;
Packit Service a2489d
          goto loop_to_utf8;
Packit Service a2489d
        case (uintptr_t) _ICONV_UTF32LE_UTF8:
Packit Service a2489d
          xxx_mbtowc = utf32le_mbtowc;
Packit Service a2489d
          goto loop_to_utf8;
Packit Service a2489d
Packit Service a2489d
       loop_to_utf8:
Packit Service a2489d
        if (inbuf == NULL || *inbuf == NULL)
Packit Service a2489d
          return 0;
Packit Service a2489d
        {
Packit Service a2489d
          ICONV_CONST char *inptr = *inbuf;
Packit Service a2489d
          size_t inleft = *inbytesleft;
Packit Service a2489d
          char *outptr = *outbuf;
Packit Service a2489d
          size_t outleft = *outbytesleft;
Packit Service a2489d
          size_t res = 0;
Packit Service a2489d
          while (inleft > 0)
Packit Service a2489d
            {
Packit Service a2489d
              ucs4_t uc;
Packit Service a2489d
              int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
Packit Service a2489d
              if (m <= 0)
Packit Service a2489d
                {
Packit Service a2489d
                  if (m == RET_ILSEQ)
Packit Service a2489d
                    {
Packit Service a2489d
                      errno = EILSEQ;
Packit Service a2489d
                      res = (size_t)(-1);
Packit Service a2489d
                      break;
Packit Service a2489d
                    }
Packit Service a2489d
                  if (m == RET_TOOFEW)
Packit Service a2489d
                    {
Packit Service a2489d
                      errno = EINVAL;
Packit Service a2489d
                      res = (size_t)(-1);
Packit Service a2489d
                      break;
Packit Service a2489d
                    }
Packit Service a2489d
                  abort ();
Packit Service a2489d
                }
Packit Service a2489d
              else
Packit Service a2489d
                {
Packit Service a2489d
                  int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
Packit Service a2489d
                  if (n < 0)
Packit Service a2489d
                    {
Packit Service a2489d
                      if (n == -1)
Packit Service a2489d
                        {
Packit Service a2489d
                          errno = EILSEQ;
Packit Service a2489d
                          res = (size_t)(-1);
Packit Service a2489d
                          break;
Packit Service a2489d
                        }
Packit Service a2489d
                      if (n == -2)
Packit Service a2489d
                        {
Packit Service a2489d
                          errno = E2BIG;
Packit Service a2489d
                          res = (size_t)(-1);
Packit Service a2489d
                          break;
Packit Service a2489d
                        }
Packit Service a2489d
                      abort ();
Packit Service a2489d
                    }
Packit Service a2489d
                  else
Packit Service a2489d
                    {
Packit Service a2489d
                      inptr += m;
Packit Service a2489d
                      inleft -= m;
Packit Service a2489d
                      outptr += n;
Packit Service a2489d
                      outleft -= n;
Packit Service a2489d
                    }
Packit Service a2489d
                }
Packit Service a2489d
            }
Packit Service a2489d
          *inbuf = inptr;
Packit Service a2489d
          *inbytesleft = inleft;
Packit Service a2489d
          *outbuf = outptr;
Packit Service a2489d
          *outbytesleft = outleft;
Packit Service a2489d
          return res;
Packit Service a2489d
        }
Packit Service a2489d
      }
Packit Service a2489d
    }
Packit Service a2489d
#endif
Packit Service a2489d
  return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
Packit Service a2489d
}