Blame lib/striconv.c

Packit 709fb3
/* Charset conversion.
Packit 709fb3
   Copyright (C) 2001-2007, 2010-2017 Free Software Foundation, Inc.
Packit 709fb3
   Written by Bruno Haible and Simon Josefsson.
Packit 709fb3
Packit 709fb3
   This program is free software; you can redistribute it and/or modify
Packit 709fb3
   it under the terms of the GNU General Public License as published by
Packit 709fb3
   the Free Software Foundation; either version 3, or (at your option)
Packit 709fb3
   any later version.
Packit 709fb3
Packit 709fb3
   This program is distributed in the hope that it will be useful,
Packit 709fb3
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 709fb3
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 709fb3
   GNU General Public License for more details.
Packit 709fb3
Packit 709fb3
   You should have received a copy of the GNU General Public License
Packit 709fb3
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 709fb3
Packit 709fb3
#include <config.h>
Packit 709fb3
Packit 709fb3
/* Specification.  */
Packit 709fb3
#include "striconv.h"
Packit 709fb3
Packit 709fb3
#include <errno.h>
Packit 709fb3
#include <stdlib.h>
Packit 709fb3
#include <string.h>
Packit 709fb3
Packit 709fb3
#if HAVE_ICONV
Packit 709fb3
# include <iconv.h>
Packit 709fb3
/* Get MB_LEN_MAX, CHAR_BIT.  */
Packit 709fb3
# include <limits.h>
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
#include "c-strcase.h"
Packit 709fb3
Packit 709fb3
#ifndef SIZE_MAX
Packit 709fb3
# define SIZE_MAX ((size_t) -1)
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
Packit 709fb3
#if HAVE_ICONV
Packit 709fb3
Packit 709fb3
int
Packit 709fb3
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
Packit 709fb3
              char **resultp, size_t *lengthp)
Packit 709fb3
{
Packit 709fb3
# define tmpbufsize 4096
Packit 709fb3
  size_t length;
Packit 709fb3
  char *result;
Packit 709fb3
Packit 709fb3
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit 709fb3
# if defined _LIBICONV_VERSION \
Packit 709fb3
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 709fb3
          || defined __sun)
Packit 709fb3
  /* Set to the initial state.  */
Packit 709fb3
  iconv (cd, NULL, NULL, NULL, NULL);
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
  /* Determine the length we need.  */
Packit 709fb3
  {
Packit 709fb3
    size_t count = 0;
Packit 709fb3
    /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
Packit 709fb3
       libiconv's UCS-4-INTERNAL encoding.  */
Packit 709fb3
    union { unsigned int align; char buf[tmpbufsize]; } tmp;
Packit 709fb3
# define tmpbuf tmp.buf
Packit 709fb3
    const char *inptr = src;
Packit 709fb3
    size_t insize = srclen;
Packit 709fb3
Packit 709fb3
    while (insize > 0)
Packit 709fb3
      {
Packit 709fb3
        char *outptr = tmpbuf;
Packit 709fb3
        size_t outsize = tmpbufsize;
Packit 709fb3
        size_t res = iconv (cd,
Packit 709fb3
                            (ICONV_CONST char **) &inptr, &insize,
Packit 709fb3
                            &outptr, &outsize);
Packit 709fb3
Packit 709fb3
        if (res == (size_t)(-1))
Packit 709fb3
          {
Packit 709fb3
            if (errno == E2BIG)
Packit 709fb3
              ;
Packit 709fb3
            else if (errno == EINVAL)
Packit 709fb3
              break;
Packit 709fb3
            else
Packit 709fb3
              return -1;
Packit 709fb3
          }
Packit 709fb3
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit 709fb3
        /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit 709fb3
           NetBSD iconv() inserts a question mark if it cannot convert.
Packit 709fb3
           Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit 709fb3
           than doing a lossy conversion.  */
Packit 709fb3
        else if (res > 0)
Packit 709fb3
          {
Packit 709fb3
            errno = EILSEQ;
Packit 709fb3
            return -1;
Packit 709fb3
          }
Packit 709fb3
# endif
Packit 709fb3
        count += outptr - tmpbuf;
Packit 709fb3
      }
Packit 709fb3
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit 709fb3
# if defined _LIBICONV_VERSION \
Packit 709fb3
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 709fb3
          || defined __sun)
Packit 709fb3
    {
Packit 709fb3
      char *outptr = tmpbuf;
Packit 709fb3
      size_t outsize = tmpbufsize;
Packit 709fb3
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
Packit 709fb3
Packit 709fb3
      if (res == (size_t)(-1))
Packit 709fb3
        return -1;
Packit 709fb3
      count += outptr - tmpbuf;
Packit 709fb3
    }
Packit 709fb3
# endif
Packit 709fb3
    length = count;
Packit 709fb3
# undef tmpbuf
Packit 709fb3
  }
Packit 709fb3
Packit 709fb3
  if (length == 0)
Packit 709fb3
    {
Packit 709fb3
      *lengthp = 0;
Packit 709fb3
      return 0;
Packit 709fb3
    }
Packit 709fb3
  if (*resultp != NULL && *lengthp >= length)
Packit 709fb3
    result = *resultp;
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      result = (char *) malloc (length);
Packit 709fb3
      if (result == NULL)
Packit 709fb3
        {
Packit 709fb3
          errno = ENOMEM;
Packit 709fb3
          return -1;
Packit 709fb3
        }
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit 709fb3
# if defined _LIBICONV_VERSION \
Packit 709fb3
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 709fb3
          || defined __sun)
Packit 709fb3
  /* Return to the initial state.  */
Packit 709fb3
  iconv (cd, NULL, NULL, NULL, NULL);
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
  /* Do the conversion for real.  */
Packit 709fb3
  {
Packit 709fb3
    const char *inptr = src;
Packit 709fb3
    size_t insize = srclen;
Packit 709fb3
    char *outptr = result;
Packit 709fb3
    size_t outsize = length;
Packit 709fb3
Packit 709fb3
    while (insize > 0)
Packit 709fb3
      {
Packit 709fb3
        size_t res = iconv (cd,
Packit 709fb3
                            (ICONV_CONST char **) &inptr, &insize,
Packit 709fb3
                            &outptr, &outsize);
Packit 709fb3
Packit 709fb3
        if (res == (size_t)(-1))
Packit 709fb3
          {
Packit 709fb3
            if (errno == EINVAL)
Packit 709fb3
              break;
Packit 709fb3
            else
Packit 709fb3
              goto fail;
Packit 709fb3
          }
Packit 709fb3
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit 709fb3
        /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit 709fb3
           NetBSD iconv() inserts a question mark if it cannot convert.
Packit 709fb3
           Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit 709fb3
           than doing a lossy conversion.  */
Packit 709fb3
        else if (res > 0)
Packit 709fb3
          {
Packit 709fb3
            errno = EILSEQ;
Packit 709fb3
            goto fail;
Packit 709fb3
          }
Packit 709fb3
# endif
Packit 709fb3
      }
Packit 709fb3
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit 709fb3
# if defined _LIBICONV_VERSION \
Packit 709fb3
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 709fb3
          || defined __sun)
Packit 709fb3
    {
Packit 709fb3
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
Packit 709fb3
Packit 709fb3
      if (res == (size_t)(-1))
Packit 709fb3
        goto fail;
Packit 709fb3
    }
Packit 709fb3
# endif
Packit 709fb3
    if (outsize != 0)
Packit 709fb3
      abort ();
Packit 709fb3
  }
Packit 709fb3
Packit 709fb3
  *resultp = result;
Packit 709fb3
  *lengthp = length;
Packit 709fb3
Packit 709fb3
  return 0;
Packit 709fb3
Packit 709fb3
 fail:
Packit 709fb3
  {
Packit 709fb3
    if (result != *resultp)
Packit 709fb3
      {
Packit 709fb3
        int saved_errno = errno;
Packit 709fb3
        free (result);
Packit 709fb3
        errno = saved_errno;
Packit 709fb3
      }
Packit 709fb3
    return -1;
Packit 709fb3
  }
Packit 709fb3
# undef tmpbufsize
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
char *
Packit 709fb3
str_cd_iconv (const char *src, iconv_t cd)
Packit 709fb3
{
Packit 709fb3
  /* For most encodings, a trailing NUL byte in the input will be converted
Packit 709fb3
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
Packit 709fb3
     function is usable for UTF-7, we have to exclude the NUL byte from the
Packit 709fb3
     conversion and add it by hand afterwards.  */
Packit 709fb3
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit 709fb3
  /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit 709fb3
     NetBSD iconv() inserts a question mark if it cannot convert.
Packit 709fb3
     Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit 709fb3
     than doing a lossy conversion.  For other iconv() implementations,
Packit 709fb3
     we have to look at the number of irreversible conversions returned;
Packit 709fb3
     but this information is lost when iconv() returns for an E2BIG reason.
Packit 709fb3
     Therefore we cannot use the second, faster algorithm.  */
Packit 709fb3
Packit 709fb3
  char *result = NULL;
Packit 709fb3
  size_t length = 0;
Packit 709fb3
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
Packit 709fb3
  char *final_result;
Packit 709fb3
Packit 709fb3
  if (retval < 0)
Packit 709fb3
    {
Packit 709fb3
      if (result != NULL)
Packit 709fb3
        abort ();
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Add the terminating NUL byte.  */
Packit 709fb3
  final_result =
Packit 709fb3
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
Packit 709fb3
  if (final_result == NULL)
Packit 709fb3
    {
Packit 709fb3
      free (result);
Packit 709fb3
      errno = ENOMEM;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
  final_result[length] = '\0';
Packit 709fb3
Packit 709fb3
  return final_result;
Packit 709fb3
Packit 709fb3
# else
Packit 709fb3
  /* This algorithm is likely faster than the one above.  But it may produce
Packit 709fb3
     iconv() returns for an E2BIG reason, when the output size guess is too
Packit 709fb3
     small.  Therefore it can only be used when we don't need the number of
Packit 709fb3
     irreversible conversions performed.  */
Packit 709fb3
  char *result;
Packit 709fb3
  size_t result_size;
Packit 709fb3
  size_t length;
Packit 709fb3
  const char *inptr = src;
Packit 709fb3
  size_t inbytes_remaining = strlen (src);
Packit 709fb3
Packit 709fb3
  /* Make a guess for the worst-case output size, in order to avoid a
Packit 709fb3
     realloc.  It's OK if the guess is wrong as long as it is not zero and
Packit 709fb3
     doesn't lead to an integer overflow.  */
Packit 709fb3
  result_size = inbytes_remaining;
Packit 709fb3
  {
Packit 709fb3
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
Packit 709fb3
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
Packit 709fb3
      result_size *= MB_LEN_MAX;
Packit 709fb3
  }
Packit 709fb3
  result_size += 1; /* for the terminating NUL */
Packit 709fb3
Packit 709fb3
  result = (char *) malloc (result_size);
Packit 709fb3
  if (result == NULL)
Packit 709fb3
    {
Packit 709fb3
      errno = ENOMEM;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit 709fb3
# if defined _LIBICONV_VERSION \
Packit 709fb3
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 709fb3
          || defined __sun)
Packit 709fb3
  /* Set to the initial state.  */
Packit 709fb3
  iconv (cd, NULL, NULL, NULL, NULL);
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
  /* Do the conversion.  */
Packit 709fb3
  {
Packit 709fb3
    char *outptr = result;
Packit 709fb3
    size_t outbytes_remaining = result_size - 1;
Packit 709fb3
Packit 709fb3
    for (;;)
Packit 709fb3
      {
Packit 709fb3
        /* Here inptr + inbytes_remaining = src + strlen (src),
Packit 709fb3
                outptr + outbytes_remaining = result + result_size - 1.  */
Packit 709fb3
        size_t res = iconv (cd,
Packit 709fb3
                            (ICONV_CONST char **) &inptr, &inbytes_remaining,
Packit 709fb3
                            &outptr, &outbytes_remaining);
Packit 709fb3
Packit 709fb3
        if (res == (size_t)(-1))
Packit 709fb3
          {
Packit 709fb3
            if (errno == EINVAL)
Packit 709fb3
              break;
Packit 709fb3
            else if (errno == E2BIG)
Packit 709fb3
              {
Packit 709fb3
                size_t used = outptr - result;
Packit 709fb3
                size_t newsize = result_size * 2;
Packit 709fb3
                char *newresult;
Packit 709fb3
Packit 709fb3
                if (!(newsize > result_size))
Packit 709fb3
                  {
Packit 709fb3
                    errno = ENOMEM;
Packit 709fb3
                    goto failed;
Packit 709fb3
                  }
Packit 709fb3
                newresult = (char *) realloc (result, newsize);
Packit 709fb3
                if (newresult == NULL)
Packit 709fb3
                  {
Packit 709fb3
                    errno = ENOMEM;
Packit 709fb3
                    goto failed;
Packit 709fb3
                  }
Packit 709fb3
                result = newresult;
Packit 709fb3
                result_size = newsize;
Packit 709fb3
                outptr = result + used;
Packit 709fb3
                outbytes_remaining = result_size - 1 - used;
Packit 709fb3
              }
Packit 709fb3
            else
Packit 709fb3
              goto failed;
Packit 709fb3
          }
Packit 709fb3
        else
Packit 709fb3
          break;
Packit 709fb3
      }
Packit 709fb3
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit 709fb3
# if defined _LIBICONV_VERSION \
Packit 709fb3
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 709fb3
          || defined __sun)
Packit 709fb3
    for (;;)
Packit 709fb3
      {
Packit 709fb3
        /* Here outptr + outbytes_remaining = result + result_size - 1.  */
Packit 709fb3
        size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
Packit 709fb3
Packit 709fb3
        if (res == (size_t)(-1))
Packit 709fb3
          {
Packit 709fb3
            if (errno == E2BIG)
Packit 709fb3
              {
Packit 709fb3
                size_t used = outptr - result;
Packit 709fb3
                size_t newsize = result_size * 2;
Packit 709fb3
                char *newresult;
Packit 709fb3
Packit 709fb3
                if (!(newsize > result_size))
Packit 709fb3
                  {
Packit 709fb3
                    errno = ENOMEM;
Packit 709fb3
                    goto failed;
Packit 709fb3
                  }
Packit 709fb3
                newresult = (char *) realloc (result, newsize);
Packit 709fb3
                if (newresult == NULL)
Packit 709fb3
                  {
Packit 709fb3
                    errno = ENOMEM;
Packit 709fb3
                    goto failed;
Packit 709fb3
                  }
Packit 709fb3
                result = newresult;
Packit 709fb3
                result_size = newsize;
Packit 709fb3
                outptr = result + used;
Packit 709fb3
                outbytes_remaining = result_size - 1 - used;
Packit 709fb3
              }
Packit 709fb3
            else
Packit 709fb3
              goto failed;
Packit 709fb3
          }
Packit 709fb3
        else
Packit 709fb3
          break;
Packit 709fb3
      }
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
    /* Add the terminating NUL byte.  */
Packit 709fb3
    *outptr++ = '\0';
Packit 709fb3
Packit 709fb3
    length = outptr - result;
Packit 709fb3
  }
Packit 709fb3
Packit 709fb3
  /* Give away unused memory.  */
Packit 709fb3
  if (length < result_size)
Packit 709fb3
    {
Packit 709fb3
      char *smaller_result = (char *) realloc (result, length);
Packit 709fb3
Packit 709fb3
      if (smaller_result != NULL)
Packit 709fb3
        result = smaller_result;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return result;
Packit 709fb3
Packit 709fb3
 failed:
Packit 709fb3
  {
Packit 709fb3
    int saved_errno = errno;
Packit 709fb3
    free (result);
Packit 709fb3
    errno = saved_errno;
Packit 709fb3
    return NULL;
Packit 709fb3
  }
Packit 709fb3
Packit 709fb3
# endif
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
char *
Packit 709fb3
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
Packit 709fb3
{
Packit 709fb3
  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
Packit 709fb3
    {
Packit 709fb3
      char *result = strdup (src);
Packit 709fb3
Packit 709fb3
      if (result == NULL)
Packit 709fb3
        errno = ENOMEM;
Packit 709fb3
      return result;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
#if HAVE_ICONV
Packit 709fb3
      iconv_t cd;
Packit 709fb3
      char *result;
Packit 709fb3
Packit 709fb3
      /* Avoid glibc-2.1 bug with EUC-KR.  */
Packit 709fb3
# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit 709fb3
     && !defined _LIBICONV_VERSION
Packit 709fb3
      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
Packit 709fb3
          || c_strcasecmp (to_codeset, "EUC-KR") == 0)
Packit 709fb3
        {
Packit 709fb3
          errno = EINVAL;
Packit 709fb3
          return NULL;
Packit 709fb3
        }
Packit 709fb3
# endif
Packit 709fb3
      cd = iconv_open (to_codeset, from_codeset);
Packit 709fb3
      if (cd == (iconv_t) -1)
Packit 709fb3
        return NULL;
Packit 709fb3
Packit 709fb3
      result = str_cd_iconv (src, cd);
Packit 709fb3
Packit 709fb3
      if (result == NULL)
Packit 709fb3
        {
Packit 709fb3
          /* Close cd, but preserve the errno from str_cd_iconv.  */
Packit 709fb3
          int saved_errno = errno;
Packit 709fb3
          iconv_close (cd);
Packit 709fb3
          errno = saved_errno;
Packit 709fb3
        }
Packit 709fb3
      else
Packit 709fb3
        {
Packit 709fb3
          if (iconv_close (cd) < 0)
Packit 709fb3
            {
Packit 709fb3
              /* Return NULL, but free the allocated memory, and while doing
Packit 709fb3
                 that, preserve the errno from iconv_close.  */
Packit 709fb3
              int saved_errno = errno;
Packit 709fb3
              free (result);
Packit 709fb3
              errno = saved_errno;
Packit 709fb3
              return NULL;
Packit 709fb3
            }
Packit 709fb3
        }
Packit 709fb3
      return result;
Packit 709fb3
#else
Packit 709fb3
      /* This is a different error code than if iconv_open existed but didn't
Packit 709fb3
         support from_codeset and to_codeset, so that the caller can emit
Packit 709fb3
         an error message such as
Packit 709fb3
           "iconv() is not supported. Installing GNU libiconv and
Packit 709fb3
            then reinstalling this package would fix this."  */
Packit 709fb3
      errno = ENOSYS;
Packit 709fb3
      return NULL;
Packit 709fb3
#endif
Packit 709fb3
    }
Packit 709fb3
}