Blame lib/striconv.c

Packit 33f14e
/* Charset conversion.
Packit 33f14e
   Copyright (C) 2001-2007, 2010-2017 Free Software Foundation, Inc.
Packit 33f14e
   Written by Bruno Haible and Simon Josefsson.
Packit 33f14e
Packit 33f14e
   This program is free software; you can redistribute it and/or modify
Packit 33f14e
   it under the terms of the GNU General Public License as published by
Packit 33f14e
   the Free Software Foundation; either version 3, or (at your option)
Packit 33f14e
   any later version.
Packit 33f14e
Packit 33f14e
   This program is distributed in the hope that it will be useful,
Packit 33f14e
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 33f14e
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 33f14e
   GNU General Public License for more details.
Packit 33f14e
Packit 33f14e
   You should have received a copy of the GNU General Public License
Packit 33f14e
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 33f14e
Packit 33f14e
#include <config.h>
Packit 33f14e
Packit 33f14e
/* Specification.  */
Packit 33f14e
#include "striconv.h"
Packit 33f14e
Packit 33f14e
#include <errno.h>
Packit 33f14e
#include <stdlib.h>
Packit 33f14e
#include <string.h>
Packit 33f14e
Packit 33f14e
#if HAVE_ICONV
Packit 33f14e
# include <iconv.h>
Packit 33f14e
/* Get MB_LEN_MAX, CHAR_BIT.  */
Packit 33f14e
# include <limits.h>
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#include "c-strcase.h"
Packit 33f14e
Packit 33f14e
#ifndef SIZE_MAX
Packit 33f14e
# define SIZE_MAX ((size_t) -1)
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
Packit 33f14e
#if HAVE_ICONV
Packit 33f14e
Packit 33f14e
int
Packit 33f14e
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
Packit 33f14e
              char **resultp, size_t *lengthp)
Packit 33f14e
{
Packit 33f14e
# define tmpbufsize 4096
Packit 33f14e
  size_t length;
Packit 33f14e
  char *result;
Packit 33f14e
Packit 33f14e
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit 33f14e
# if defined _LIBICONV_VERSION \
Packit 33f14e
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 33f14e
          || defined __sun)
Packit 33f14e
  /* Set to the initial state.  */
Packit 33f14e
  iconv (cd, NULL, NULL, NULL, NULL);
Packit 33f14e
# endif
Packit 33f14e
Packit 33f14e
  /* Determine the length we need.  */
Packit 33f14e
  {
Packit 33f14e
    size_t count = 0;
Packit 33f14e
    /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
Packit 33f14e
       libiconv's UCS-4-INTERNAL encoding.  */
Packit 33f14e
    union { unsigned int align; char buf[tmpbufsize]; } tmp;
Packit 33f14e
# define tmpbuf tmp.buf
Packit 33f14e
    const char *inptr = src;
Packit 33f14e
    size_t insize = srclen;
Packit 33f14e
Packit 33f14e
    while (insize > 0)
Packit 33f14e
      {
Packit 33f14e
        char *outptr = tmpbuf;
Packit 33f14e
        size_t outsize = tmpbufsize;
Packit 33f14e
        size_t res = iconv (cd,
Packit 33f14e
                            (ICONV_CONST char **) &inptr, &insize,
Packit 33f14e
                            &outptr, &outsize);
Packit 33f14e
Packit 33f14e
        if (res == (size_t)(-1))
Packit 33f14e
          {
Packit 33f14e
            if (errno == E2BIG)
Packit 33f14e
              ;
Packit 33f14e
            else if (errno == EINVAL)
Packit 33f14e
              break;
Packit 33f14e
            else
Packit 33f14e
              return -1;
Packit 33f14e
          }
Packit 33f14e
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit 33f14e
        /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit 33f14e
           NetBSD iconv() inserts a question mark if it cannot convert.
Packit 33f14e
           Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit 33f14e
           than doing a lossy conversion.  */
Packit 33f14e
        else if (res > 0)
Packit 33f14e
          {
Packit 33f14e
            errno = EILSEQ;
Packit 33f14e
            return -1;
Packit 33f14e
          }
Packit 33f14e
# endif
Packit 33f14e
        count += outptr - tmpbuf;
Packit 33f14e
      }
Packit 33f14e
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit 33f14e
# if defined _LIBICONV_VERSION \
Packit 33f14e
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 33f14e
          || defined __sun)
Packit 33f14e
    {
Packit 33f14e
      char *outptr = tmpbuf;
Packit 33f14e
      size_t outsize = tmpbufsize;
Packit 33f14e
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
Packit 33f14e
Packit 33f14e
      if (res == (size_t)(-1))
Packit 33f14e
        return -1;
Packit 33f14e
      count += outptr - tmpbuf;
Packit 33f14e
    }
Packit 33f14e
# endif
Packit 33f14e
    length = count;
Packit 33f14e
# undef tmpbuf
Packit 33f14e
  }
Packit 33f14e
Packit 33f14e
  if (length == 0)
Packit 33f14e
    {
Packit 33f14e
      *lengthp = 0;
Packit 33f14e
      return 0;
Packit 33f14e
    }
Packit 33f14e
  if (*resultp != NULL && *lengthp >= length)
Packit 33f14e
    result = *resultp;
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      result = (char *) malloc (length);
Packit 33f14e
      if (result == NULL)
Packit 33f14e
        {
Packit 33f14e
          errno = ENOMEM;
Packit 33f14e
          return -1;
Packit 33f14e
        }
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit 33f14e
# if defined _LIBICONV_VERSION \
Packit 33f14e
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 33f14e
          || defined __sun)
Packit 33f14e
  /* Return to the initial state.  */
Packit 33f14e
  iconv (cd, NULL, NULL, NULL, NULL);
Packit 33f14e
# endif
Packit 33f14e
Packit 33f14e
  /* Do the conversion for real.  */
Packit 33f14e
  {
Packit 33f14e
    const char *inptr = src;
Packit 33f14e
    size_t insize = srclen;
Packit 33f14e
    char *outptr = result;
Packit 33f14e
    size_t outsize = length;
Packit 33f14e
Packit 33f14e
    while (insize > 0)
Packit 33f14e
      {
Packit 33f14e
        size_t res = iconv (cd,
Packit 33f14e
                            (ICONV_CONST char **) &inptr, &insize,
Packit 33f14e
                            &outptr, &outsize);
Packit 33f14e
Packit 33f14e
        if (res == (size_t)(-1))
Packit 33f14e
          {
Packit 33f14e
            if (errno == EINVAL)
Packit 33f14e
              break;
Packit 33f14e
            else
Packit 33f14e
              goto fail;
Packit 33f14e
          }
Packit 33f14e
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit 33f14e
        /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit 33f14e
           NetBSD iconv() inserts a question mark if it cannot convert.
Packit 33f14e
           Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit 33f14e
           than doing a lossy conversion.  */
Packit 33f14e
        else if (res > 0)
Packit 33f14e
          {
Packit 33f14e
            errno = EILSEQ;
Packit 33f14e
            goto fail;
Packit 33f14e
          }
Packit 33f14e
# endif
Packit 33f14e
      }
Packit 33f14e
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit 33f14e
# if defined _LIBICONV_VERSION \
Packit 33f14e
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 33f14e
          || defined __sun)
Packit 33f14e
    {
Packit 33f14e
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
Packit 33f14e
Packit 33f14e
      if (res == (size_t)(-1))
Packit 33f14e
        goto fail;
Packit 33f14e
    }
Packit 33f14e
# endif
Packit 33f14e
    if (outsize != 0)
Packit 33f14e
      abort ();
Packit 33f14e
  }
Packit 33f14e
Packit 33f14e
  *resultp = result;
Packit 33f14e
  *lengthp = length;
Packit 33f14e
Packit 33f14e
  return 0;
Packit 33f14e
Packit 33f14e
 fail:
Packit 33f14e
  {
Packit 33f14e
    if (result != *resultp)
Packit 33f14e
      {
Packit 33f14e
        int saved_errno = errno;
Packit 33f14e
        free (result);
Packit 33f14e
        errno = saved_errno;
Packit 33f14e
      }
Packit 33f14e
    return -1;
Packit 33f14e
  }
Packit 33f14e
# undef tmpbufsize
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
char *
Packit 33f14e
str_cd_iconv (const char *src, iconv_t cd)
Packit 33f14e
{
Packit 33f14e
  /* For most encodings, a trailing NUL byte in the input will be converted
Packit 33f14e
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
Packit 33f14e
     function is usable for UTF-7, we have to exclude the NUL byte from the
Packit 33f14e
     conversion and add it by hand afterwards.  */
Packit 33f14e
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit 33f14e
  /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit 33f14e
     NetBSD iconv() inserts a question mark if it cannot convert.
Packit 33f14e
     Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit 33f14e
     than doing a lossy conversion.  For other iconv() implementations,
Packit 33f14e
     we have to look at the number of irreversible conversions returned;
Packit 33f14e
     but this information is lost when iconv() returns for an E2BIG reason.
Packit 33f14e
     Therefore we cannot use the second, faster algorithm.  */
Packit 33f14e
Packit 33f14e
  char *result = NULL;
Packit 33f14e
  size_t length = 0;
Packit 33f14e
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
Packit 33f14e
  char *final_result;
Packit 33f14e
Packit 33f14e
  if (retval < 0)
Packit 33f14e
    {
Packit 33f14e
      if (result != NULL)
Packit 33f14e
        abort ();
Packit 33f14e
      return NULL;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Add the terminating NUL byte.  */
Packit 33f14e
  final_result =
Packit 33f14e
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
Packit 33f14e
  if (final_result == NULL)
Packit 33f14e
    {
Packit 33f14e
      free (result);
Packit 33f14e
      errno = ENOMEM;
Packit 33f14e
      return NULL;
Packit 33f14e
    }
Packit 33f14e
  final_result[length] = '\0';
Packit 33f14e
Packit 33f14e
  return final_result;
Packit 33f14e
Packit 33f14e
# else
Packit 33f14e
  /* This algorithm is likely faster than the one above.  But it may produce
Packit 33f14e
     iconv() returns for an E2BIG reason, when the output size guess is too
Packit 33f14e
     small.  Therefore it can only be used when we don't need the number of
Packit 33f14e
     irreversible conversions performed.  */
Packit 33f14e
  char *result;
Packit 33f14e
  size_t result_size;
Packit 33f14e
  size_t length;
Packit 33f14e
  const char *inptr = src;
Packit 33f14e
  size_t inbytes_remaining = strlen (src);
Packit 33f14e
Packit 33f14e
  /* Make a guess for the worst-case output size, in order to avoid a
Packit 33f14e
     realloc.  It's OK if the guess is wrong as long as it is not zero and
Packit 33f14e
     doesn't lead to an integer overflow.  */
Packit 33f14e
  result_size = inbytes_remaining;
Packit 33f14e
  {
Packit 33f14e
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
Packit 33f14e
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
Packit 33f14e
      result_size *= MB_LEN_MAX;
Packit 33f14e
  }
Packit 33f14e
  result_size += 1; /* for the terminating NUL */
Packit 33f14e
Packit 33f14e
  result = (char *) malloc (result_size);
Packit 33f14e
  if (result == NULL)
Packit 33f14e
    {
Packit 33f14e
      errno = ENOMEM;
Packit 33f14e
      return NULL;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit 33f14e
# if defined _LIBICONV_VERSION \
Packit 33f14e
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 33f14e
          || defined __sun)
Packit 33f14e
  /* Set to the initial state.  */
Packit 33f14e
  iconv (cd, NULL, NULL, NULL, NULL);
Packit 33f14e
# endif
Packit 33f14e
Packit 33f14e
  /* Do the conversion.  */
Packit 33f14e
  {
Packit 33f14e
    char *outptr = result;
Packit 33f14e
    size_t outbytes_remaining = result_size - 1;
Packit 33f14e
Packit 33f14e
    for (;;)
Packit 33f14e
      {
Packit 33f14e
        /* Here inptr + inbytes_remaining = src + strlen (src),
Packit 33f14e
                outptr + outbytes_remaining = result + result_size - 1.  */
Packit 33f14e
        size_t res = iconv (cd,
Packit 33f14e
                            (ICONV_CONST char **) &inptr, &inbytes_remaining,
Packit 33f14e
                            &outptr, &outbytes_remaining);
Packit 33f14e
Packit 33f14e
        if (res == (size_t)(-1))
Packit 33f14e
          {
Packit 33f14e
            if (errno == EINVAL)
Packit 33f14e
              break;
Packit 33f14e
            else if (errno == E2BIG)
Packit 33f14e
              {
Packit 33f14e
                size_t used = outptr - result;
Packit 33f14e
                size_t newsize = result_size * 2;
Packit 33f14e
                char *newresult;
Packit 33f14e
Packit 33f14e
                if (!(newsize > result_size))
Packit 33f14e
                  {
Packit 33f14e
                    errno = ENOMEM;
Packit 33f14e
                    goto failed;
Packit 33f14e
                  }
Packit 33f14e
                newresult = (char *) realloc (result, newsize);
Packit 33f14e
                if (newresult == NULL)
Packit 33f14e
                  {
Packit 33f14e
                    errno = ENOMEM;
Packit 33f14e
                    goto failed;
Packit 33f14e
                  }
Packit 33f14e
                result = newresult;
Packit 33f14e
                result_size = newsize;
Packit 33f14e
                outptr = result + used;
Packit 33f14e
                outbytes_remaining = result_size - 1 - used;
Packit 33f14e
              }
Packit 33f14e
            else
Packit 33f14e
              goto failed;
Packit 33f14e
          }
Packit 33f14e
        else
Packit 33f14e
          break;
Packit 33f14e
      }
Packit 33f14e
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit 33f14e
# if defined _LIBICONV_VERSION \
Packit 33f14e
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 33f14e
          || defined __sun)
Packit 33f14e
    for (;;)
Packit 33f14e
      {
Packit 33f14e
        /* Here outptr + outbytes_remaining = result + result_size - 1.  */
Packit 33f14e
        size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
Packit 33f14e
Packit 33f14e
        if (res == (size_t)(-1))
Packit 33f14e
          {
Packit 33f14e
            if (errno == E2BIG)
Packit 33f14e
              {
Packit 33f14e
                size_t used = outptr - result;
Packit 33f14e
                size_t newsize = result_size * 2;
Packit 33f14e
                char *newresult;
Packit 33f14e
Packit 33f14e
                if (!(newsize > result_size))
Packit 33f14e
                  {
Packit 33f14e
                    errno = ENOMEM;
Packit 33f14e
                    goto failed;
Packit 33f14e
                  }
Packit 33f14e
                newresult = (char *) realloc (result, newsize);
Packit 33f14e
                if (newresult == NULL)
Packit 33f14e
                  {
Packit 33f14e
                    errno = ENOMEM;
Packit 33f14e
                    goto failed;
Packit 33f14e
                  }
Packit 33f14e
                result = newresult;
Packit 33f14e
                result_size = newsize;
Packit 33f14e
                outptr = result + used;
Packit 33f14e
                outbytes_remaining = result_size - 1 - used;
Packit 33f14e
              }
Packit 33f14e
            else
Packit 33f14e
              goto failed;
Packit 33f14e
          }
Packit 33f14e
        else
Packit 33f14e
          break;
Packit 33f14e
      }
Packit 33f14e
# endif
Packit 33f14e
Packit 33f14e
    /* Add the terminating NUL byte.  */
Packit 33f14e
    *outptr++ = '\0';
Packit 33f14e
Packit 33f14e
    length = outptr - result;
Packit 33f14e
  }
Packit 33f14e
Packit 33f14e
  /* Give away unused memory.  */
Packit 33f14e
  if (length < result_size)
Packit 33f14e
    {
Packit 33f14e
      char *smaller_result = (char *) realloc (result, length);
Packit 33f14e
Packit 33f14e
      if (smaller_result != NULL)
Packit 33f14e
        result = smaller_result;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  return result;
Packit 33f14e
Packit 33f14e
 failed:
Packit 33f14e
  {
Packit 33f14e
    int saved_errno = errno;
Packit 33f14e
    free (result);
Packit 33f14e
    errno = saved_errno;
Packit 33f14e
    return NULL;
Packit 33f14e
  }
Packit 33f14e
Packit 33f14e
# endif
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
char *
Packit 33f14e
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
Packit 33f14e
{
Packit 33f14e
  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
Packit 33f14e
    {
Packit 33f14e
      char *result = strdup (src);
Packit 33f14e
Packit 33f14e
      if (result == NULL)
Packit 33f14e
        errno = ENOMEM;
Packit 33f14e
      return result;
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
#if HAVE_ICONV
Packit 33f14e
      iconv_t cd;
Packit 33f14e
      char *result;
Packit 33f14e
Packit 33f14e
      /* Avoid glibc-2.1 bug with EUC-KR.  */
Packit 33f14e
# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 33f14e
     && !defined _LIBICONV_VERSION
Packit 33f14e
      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
Packit 33f14e
          || c_strcasecmp (to_codeset, "EUC-KR") == 0)
Packit 33f14e
        {
Packit 33f14e
          errno = EINVAL;
Packit 33f14e
          return NULL;
Packit 33f14e
        }
Packit 33f14e
# endif
Packit 33f14e
      cd = iconv_open (to_codeset, from_codeset);
Packit 33f14e
      if (cd == (iconv_t) -1)
Packit 33f14e
        return NULL;
Packit 33f14e
Packit 33f14e
      result = str_cd_iconv (src, cd);
Packit 33f14e
Packit 33f14e
      if (result == NULL)
Packit 33f14e
        {
Packit 33f14e
          /* Close cd, but preserve the errno from str_cd_iconv.  */
Packit 33f14e
          int saved_errno = errno;
Packit 33f14e
          iconv_close (cd);
Packit 33f14e
          errno = saved_errno;
Packit 33f14e
        }
Packit 33f14e
      else
Packit 33f14e
        {
Packit 33f14e
          if (iconv_close (cd) < 0)
Packit 33f14e
            {
Packit 33f14e
              /* Return NULL, but free the allocated memory, and while doing
Packit 33f14e
                 that, preserve the errno from iconv_close.  */
Packit 33f14e
              int saved_errno = errno;
Packit 33f14e
              free (result);
Packit 33f14e
              errno = saved_errno;
Packit 33f14e
              return NULL;
Packit 33f14e
            }
Packit 33f14e
        }
Packit 33f14e
      return result;
Packit 33f14e
#else
Packit 33f14e
      /* This is a different error code than if iconv_open existed but didn't
Packit 33f14e
         support from_codeset and to_codeset, so that the caller can emit
Packit 33f14e
         an error message such as
Packit 33f14e
           "iconv() is not supported. Installing GNU libiconv and
Packit 33f14e
            then reinstalling this package would fix this."  */
Packit 33f14e
      errno = ENOSYS;
Packit 33f14e
      return NULL;
Packit 33f14e
#endif
Packit 33f14e
    }
Packit 33f14e
}