Blame gettext-runtime/gnulib-lib/iconv.c

Packit 5b56b6
/* Character set conversion.
Packit 5b56b6
   Copyright (C) 1999-2001, 2007, 2009-2015 Free Software Foundation, Inc.
Packit 5b56b6
Packit 5b56b6
   This program is free software; you can redistribute it and/or modify
Packit 5b56b6
   it under the terms of the GNU General Public License as published by
Packit 5b56b6
   the Free Software Foundation; either version 3, or (at your option)
Packit 5b56b6
   any later version.
Packit 5b56b6
Packit 5b56b6
   This program is distributed in the hope that it will be useful,
Packit 5b56b6
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 5b56b6
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 5b56b6
   GNU General Public License for more details.
Packit 5b56b6
Packit 5b56b6
   You should have received a copy of the GNU General Public License along
Packit 5b56b6
   with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 5b56b6
Packit 5b56b6
#include <config.h>
Packit 5b56b6
Packit 5b56b6
/* Specification.  */
Packit 5b56b6
#include <iconv.h>
Packit 5b56b6
Packit 5b56b6
#include <stddef.h>
Packit 5b56b6
Packit 5b56b6
#if REPLACE_ICONV_UTF
Packit 5b56b6
# include <errno.h>
Packit 5b56b6
# include <stdint.h>
Packit 5b56b6
# include <stdlib.h>
Packit 5b56b6
# include "unistr.h"
Packit 5b56b6
# ifndef uintptr_t
Packit 5b56b6
#  define uintptr_t unsigned long
Packit 5b56b6
# endif
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#if REPLACE_ICONV_UTF
Packit 5b56b6
Packit 5b56b6
/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11.  */
Packit 5b56b6
Packit 5b56b6
/* Return code if invalid. (xxx_mbtowc) */
Packit 5b56b6
# define RET_ILSEQ      -1
Packit 5b56b6
/* Return code if no bytes were read. (xxx_mbtowc) */
Packit 5b56b6
# define RET_TOOFEW     -2
Packit 5b56b6
Packit 5b56b6
/* Return code if invalid. (xxx_wctomb) */
Packit 5b56b6
# define RET_ILUNI      -1
Packit 5b56b6
/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
Packit 5b56b6
# define RET_TOOSMALL   -2
Packit 5b56b6
Packit 5b56b6
/*
Packit 5b56b6
 * UTF-16BE
Packit 5b56b6
 */
Packit 5b56b6
Packit 5b56b6
/* Specification: RFC 2781 */
Packit 5b56b6
Packit 5b56b6
static int
Packit 5b56b6
utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 5b56b6
{
Packit 5b56b6
  if (n >= 2)
Packit 5b56b6
    {
Packit 5b56b6
      ucs4_t wc = (s[0] << 8) + s[1];
Packit 5b56b6
      if (wc >= 0xd800 && wc < 0xdc00)
Packit 5b56b6
        {
Packit 5b56b6
          if (n >= 4)
Packit 5b56b6
            {
Packit 5b56b6
              ucs4_t wc2 = (s[2] << 8) + s[3];
Packit 5b56b6
              if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
Packit 5b56b6
                return RET_ILSEQ;
Packit 5b56b6
              *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
Packit 5b56b6
              return 4;
Packit 5b56b6
            }
Packit 5b56b6
        }
Packit 5b56b6
      else if (wc >= 0xdc00 && wc < 0xe000)
Packit 5b56b6
        {
Packit 5b56b6
          return RET_ILSEQ;
Packit 5b56b6
        }
Packit 5b56b6
      else
Packit 5b56b6
        {
Packit 5b56b6
          *pwc = wc;
Packit 5b56b6
          return 2;
Packit 5b56b6
        }
Packit 5b56b6
    }
Packit 5b56b6
  return RET_TOOFEW;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
static int
Packit 5b56b6
utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 5b56b6
{
Packit 5b56b6
  if (!(wc >= 0xd800 && wc < 0xe000))
Packit 5b56b6
    {
Packit 5b56b6
      if (wc < 0x10000)
Packit 5b56b6
        {
Packit 5b56b6
          if (n >= 2)
Packit 5b56b6
            {
Packit 5b56b6
              r[0] = (unsigned char) (wc >> 8);
Packit 5b56b6
              r[1] = (unsigned char) wc;
Packit 5b56b6
              return 2;
Packit 5b56b6
            }
Packit 5b56b6
          else
Packit 5b56b6
            return RET_TOOSMALL;
Packit 5b56b6
        }
Packit 5b56b6
      else if (wc < 0x110000)
Packit 5b56b6
        {
Packit 5b56b6
          if (n >= 4)
Packit 5b56b6
            {
Packit 5b56b6
              ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
Packit 5b56b6
              ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
Packit 5b56b6
              r[0] = (unsigned char) (wc1 >> 8);
Packit 5b56b6
              r[1] = (unsigned char) wc1;
Packit 5b56b6
              r[2] = (unsigned char) (wc2 >> 8);
Packit 5b56b6
              r[3] = (unsigned char) wc2;
Packit 5b56b6
              return 4;
Packit 5b56b6
            }
Packit 5b56b6
          else
Packit 5b56b6
            return RET_TOOSMALL;
Packit 5b56b6
        }
Packit 5b56b6
    }
Packit 5b56b6
  return RET_ILUNI;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
/*
Packit 5b56b6
 * UTF-16LE
Packit 5b56b6
 */
Packit 5b56b6
Packit 5b56b6
/* Specification: RFC 2781 */
Packit 5b56b6
Packit 5b56b6
static int
Packit 5b56b6
utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 5b56b6
{
Packit 5b56b6
  if (n >= 2)
Packit 5b56b6
    {
Packit 5b56b6
      ucs4_t wc = s[0] + (s[1] << 8);
Packit 5b56b6
      if (wc >= 0xd800 && wc < 0xdc00)
Packit 5b56b6
        {
Packit 5b56b6
          if (n >= 4)
Packit 5b56b6
            {
Packit 5b56b6
              ucs4_t wc2 = s[2] + (s[3] << 8);
Packit 5b56b6
              if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
Packit 5b56b6
                return RET_ILSEQ;
Packit 5b56b6
              *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
Packit 5b56b6
              return 4;
Packit 5b56b6
            }
Packit 5b56b6
        }
Packit 5b56b6
      else if (wc >= 0xdc00 && wc < 0xe000)
Packit 5b56b6
        {
Packit 5b56b6
          return RET_ILSEQ;
Packit 5b56b6
        }
Packit 5b56b6
      else
Packit 5b56b6
        {
Packit 5b56b6
          *pwc = wc;
Packit 5b56b6
          return 2;
Packit 5b56b6
        }
Packit 5b56b6
    }
Packit 5b56b6
  return RET_TOOFEW;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
static int
Packit 5b56b6
utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 5b56b6
{
Packit 5b56b6
  if (!(wc >= 0xd800 && wc < 0xe000))
Packit 5b56b6
    {
Packit 5b56b6
      if (wc < 0x10000)
Packit 5b56b6
        {
Packit 5b56b6
          if (n >= 2)
Packit 5b56b6
            {
Packit 5b56b6
              r[0] = (unsigned char) wc;
Packit 5b56b6
              r[1] = (unsigned char) (wc >> 8);
Packit 5b56b6
              return 2;
Packit 5b56b6
            }
Packit 5b56b6
          else
Packit 5b56b6
            return RET_TOOSMALL;
Packit 5b56b6
        }
Packit 5b56b6
      else if (wc < 0x110000)
Packit 5b56b6
        {
Packit 5b56b6
          if (n >= 4)
Packit 5b56b6
            {
Packit 5b56b6
              ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
Packit 5b56b6
              ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
Packit 5b56b6
              r[0] = (unsigned char) wc1;
Packit 5b56b6
              r[1] = (unsigned char) (wc1 >> 8);
Packit 5b56b6
              r[2] = (unsigned char) wc2;
Packit 5b56b6
              r[3] = (unsigned char) (wc2 >> 8);
Packit 5b56b6
              return 4;
Packit 5b56b6
            }
Packit 5b56b6
          else
Packit 5b56b6
            return RET_TOOSMALL;
Packit 5b56b6
        }
Packit 5b56b6
    }
Packit 5b56b6
  return RET_ILUNI;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
/*
Packit 5b56b6
 * UTF-32BE
Packit 5b56b6
 */
Packit 5b56b6
Packit 5b56b6
/* Specification: Unicode 3.1 Standard Annex #19 */
Packit 5b56b6
Packit 5b56b6
static int
Packit 5b56b6
utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 5b56b6
{
Packit 5b56b6
  if (n >= 4)
Packit 5b56b6
    {
Packit 5b56b6
      ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
Packit 5b56b6
      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 5b56b6
        {
Packit 5b56b6
          *pwc = wc;
Packit 5b56b6
          return 4;
Packit 5b56b6
        }
Packit 5b56b6
      else
Packit 5b56b6
        return RET_ILSEQ;
Packit 5b56b6
    }
Packit 5b56b6
  return RET_TOOFEW;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
static int
Packit 5b56b6
utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 5b56b6
{
Packit 5b56b6
  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 5b56b6
    {
Packit 5b56b6
      if (n >= 4)
Packit 5b56b6
        {
Packit 5b56b6
          r[0] = 0;
Packit 5b56b6
          r[1] = (unsigned char) (wc >> 16);
Packit 5b56b6
          r[2] = (unsigned char) (wc >> 8);
Packit 5b56b6
          r[3] = (unsigned char) wc;
Packit 5b56b6
          return 4;
Packit 5b56b6
        }
Packit 5b56b6
      else
Packit 5b56b6
        return RET_TOOSMALL;
Packit 5b56b6
    }
Packit 5b56b6
  return RET_ILUNI;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
/*
Packit 5b56b6
 * UTF-32LE
Packit 5b56b6
 */
Packit 5b56b6
Packit 5b56b6
/* Specification: Unicode 3.1 Standard Annex #19 */
Packit 5b56b6
Packit 5b56b6
static int
Packit 5b56b6
utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
Packit 5b56b6
{
Packit 5b56b6
  if (n >= 4)
Packit 5b56b6
    {
Packit 5b56b6
      ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
Packit 5b56b6
      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 5b56b6
        {
Packit 5b56b6
          *pwc = wc;
Packit 5b56b6
          return 4;
Packit 5b56b6
        }
Packit 5b56b6
      else
Packit 5b56b6
        return RET_ILSEQ;
Packit 5b56b6
    }
Packit 5b56b6
  return RET_TOOFEW;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
static int
Packit 5b56b6
utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
Packit 5b56b6
{
Packit 5b56b6
  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
Packit 5b56b6
    {
Packit 5b56b6
      if (n >= 4)
Packit 5b56b6
        {
Packit 5b56b6
          r[0] = (unsigned char) wc;
Packit 5b56b6
          r[1] = (unsigned char) (wc >> 8);
Packit 5b56b6
          r[2] = (unsigned char) (wc >> 16);
Packit 5b56b6
          r[3] = 0;
Packit 5b56b6
          return 4;
Packit 5b56b6
        }
Packit 5b56b6
      else
Packit 5b56b6
        return RET_TOOSMALL;
Packit 5b56b6
    }
Packit 5b56b6
  return RET_ILUNI;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
size_t
Packit 5b56b6
rpl_iconv (iconv_t cd,
Packit 5b56b6
           ICONV_CONST char **inbuf, size_t *inbytesleft,
Packit 5b56b6
           char **outbuf, size_t *outbytesleft)
Packit 5b56b6
#undef iconv
Packit 5b56b6
{
Packit 5b56b6
#if REPLACE_ICONV_UTF
Packit 5b56b6
  switch ((uintptr_t) cd)
Packit 5b56b6
    {
Packit 5b56b6
      {
Packit 5b56b6
        int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
Packit 5b56b6
Packit 5b56b6
        case (uintptr_t) _ICONV_UTF8_UTF16BE:
Packit 5b56b6
          xxx_wctomb = utf16be_wctomb;
Packit 5b56b6
          goto loop_from_utf8;
Packit 5b56b6
        case (uintptr_t) _ICONV_UTF8_UTF16LE:
Packit 5b56b6
          xxx_wctomb = utf16le_wctomb;
Packit 5b56b6
          goto loop_from_utf8;
Packit 5b56b6
        case (uintptr_t) _ICONV_UTF8_UTF32BE:
Packit 5b56b6
          xxx_wctomb = utf32be_wctomb;
Packit 5b56b6
          goto loop_from_utf8;
Packit 5b56b6
        case (uintptr_t) _ICONV_UTF8_UTF32LE:
Packit 5b56b6
          xxx_wctomb = utf32le_wctomb;
Packit 5b56b6
          goto loop_from_utf8;
Packit 5b56b6
Packit 5b56b6
       loop_from_utf8:
Packit 5b56b6
        if (inbuf == NULL || *inbuf == NULL)
Packit 5b56b6
          return 0;
Packit 5b56b6
        {
Packit 5b56b6
          ICONV_CONST char *inptr = *inbuf;
Packit 5b56b6
          size_t inleft = *inbytesleft;
Packit 5b56b6
          char *outptr = *outbuf;
Packit 5b56b6
          size_t outleft = *outbytesleft;
Packit 5b56b6
          size_t res = 0;
Packit 5b56b6
          while (inleft > 0)
Packit 5b56b6
            {
Packit 5b56b6
              ucs4_t uc;
Packit 5b56b6
              int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
Packit 5b56b6
              if (m <= 0)
Packit 5b56b6
                {
Packit 5b56b6
                  if (m == -1)
Packit 5b56b6
                    {
Packit 5b56b6
                      errno = EILSEQ;
Packit 5b56b6
                      res = (size_t)(-1);
Packit 5b56b6
                      break;
Packit 5b56b6
                    }
Packit 5b56b6
                  if (m == -2)
Packit 5b56b6
                    {
Packit 5b56b6
                      errno = EINVAL;
Packit 5b56b6
                      res = (size_t)(-1);
Packit 5b56b6
                      break;
Packit 5b56b6
                    }
Packit 5b56b6
                  abort ();
Packit 5b56b6
                }
Packit 5b56b6
              else
Packit 5b56b6
                {
Packit 5b56b6
                  int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
Packit 5b56b6
                  if (n < 0)
Packit 5b56b6
                    {
Packit 5b56b6
                      if (n == RET_ILUNI)
Packit 5b56b6
                        {
Packit 5b56b6
                          errno = EILSEQ;
Packit 5b56b6
                          res = (size_t)(-1);
Packit 5b56b6
                          break;
Packit 5b56b6
                        }
Packit 5b56b6
                      if (n == RET_TOOSMALL)
Packit 5b56b6
                        {
Packit 5b56b6
                          errno = E2BIG;
Packit 5b56b6
                          res = (size_t)(-1);
Packit 5b56b6
                          break;
Packit 5b56b6
                        }
Packit 5b56b6
                      abort ();
Packit 5b56b6
                    }
Packit 5b56b6
                  else
Packit 5b56b6
                    {
Packit 5b56b6
                      inptr += m;
Packit 5b56b6
                      inleft -= m;
Packit 5b56b6
                      outptr += n;
Packit 5b56b6
                      outleft -= n;
Packit 5b56b6
                    }
Packit 5b56b6
                }
Packit 5b56b6
            }
Packit 5b56b6
          *inbuf = inptr;
Packit 5b56b6
          *inbytesleft = inleft;
Packit 5b56b6
          *outbuf = outptr;
Packit 5b56b6
          *outbytesleft = outleft;
Packit 5b56b6
          return res;
Packit 5b56b6
        }
Packit 5b56b6
      }
Packit 5b56b6
Packit 5b56b6
      {
Packit 5b56b6
        int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
Packit 5b56b6
Packit 5b56b6
        case (uintptr_t) _ICONV_UTF16BE_UTF8:
Packit 5b56b6
          xxx_mbtowc = utf16be_mbtowc;
Packit 5b56b6
          goto loop_to_utf8;
Packit 5b56b6
        case (uintptr_t) _ICONV_UTF16LE_UTF8:
Packit 5b56b6
          xxx_mbtowc = utf16le_mbtowc;
Packit 5b56b6
          goto loop_to_utf8;
Packit 5b56b6
        case (uintptr_t) _ICONV_UTF32BE_UTF8:
Packit 5b56b6
          xxx_mbtowc = utf32be_mbtowc;
Packit 5b56b6
          goto loop_to_utf8;
Packit 5b56b6
        case (uintptr_t) _ICONV_UTF32LE_UTF8:
Packit 5b56b6
          xxx_mbtowc = utf32le_mbtowc;
Packit 5b56b6
          goto loop_to_utf8;
Packit 5b56b6
Packit 5b56b6
       loop_to_utf8:
Packit 5b56b6
        if (inbuf == NULL || *inbuf == NULL)
Packit 5b56b6
          return 0;
Packit 5b56b6
        {
Packit 5b56b6
          ICONV_CONST char *inptr = *inbuf;
Packit 5b56b6
          size_t inleft = *inbytesleft;
Packit 5b56b6
          char *outptr = *outbuf;
Packit 5b56b6
          size_t outleft = *outbytesleft;
Packit 5b56b6
          size_t res = 0;
Packit 5b56b6
          while (inleft > 0)
Packit 5b56b6
            {
Packit 5b56b6
              ucs4_t uc;
Packit 5b56b6
              int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
Packit 5b56b6
              if (m <= 0)
Packit 5b56b6
                {
Packit 5b56b6
                  if (m == RET_ILSEQ)
Packit 5b56b6
                    {
Packit 5b56b6
                      errno = EILSEQ;
Packit 5b56b6
                      res = (size_t)(-1);
Packit 5b56b6
                      break;
Packit 5b56b6
                    }
Packit 5b56b6
                  if (m == RET_TOOFEW)
Packit 5b56b6
                    {
Packit 5b56b6
                      errno = EINVAL;
Packit 5b56b6
                      res = (size_t)(-1);
Packit 5b56b6
                      break;
Packit 5b56b6
                    }
Packit 5b56b6
                  abort ();
Packit 5b56b6
                }
Packit 5b56b6
              else
Packit 5b56b6
                {
Packit 5b56b6
                  int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
Packit 5b56b6
                  if (n < 0)
Packit 5b56b6
                    {
Packit 5b56b6
                      if (n == -1)
Packit 5b56b6
                        {
Packit 5b56b6
                          errno = EILSEQ;
Packit 5b56b6
                          res = (size_t)(-1);
Packit 5b56b6
                          break;
Packit 5b56b6
                        }
Packit 5b56b6
                      if (n == -2)
Packit 5b56b6
                        {
Packit 5b56b6
                          errno = E2BIG;
Packit 5b56b6
                          res = (size_t)(-1);
Packit 5b56b6
                          break;
Packit 5b56b6
                        }
Packit 5b56b6
                      abort ();
Packit 5b56b6
                    }
Packit 5b56b6
                  else
Packit 5b56b6
                    {
Packit 5b56b6
                      inptr += m;
Packit 5b56b6
                      inleft -= m;
Packit 5b56b6
                      outptr += n;
Packit 5b56b6
                      outleft -= n;
Packit 5b56b6
                    }
Packit 5b56b6
                }
Packit 5b56b6
            }
Packit 5b56b6
          *inbuf = inptr;
Packit 5b56b6
          *inbytesleft = inleft;
Packit 5b56b6
          *outbuf = outptr;
Packit 5b56b6
          *outbytesleft = outleft;
Packit 5b56b6
          return res;
Packit 5b56b6
        }
Packit 5b56b6
      }
Packit 5b56b6
    }
Packit 5b56b6
#endif
Packit 5b56b6
  return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
Packit 5b56b6
}