Blame lib/striconv.c

Packit Service fdd496
/* Charset conversion.
Packit Service fdd496
   Copyright (C) 2001-2007, 2010-2017 Free Software Foundation, Inc.
Packit Service fdd496
   Written by Bruno Haible and Simon Josefsson.
Packit Service fdd496
Packit Service fdd496
   This program is free software; you can redistribute it and/or modify
Packit Service fdd496
   it under the terms of the GNU General Public License as published by
Packit Service fdd496
   the Free Software Foundation; either version 3, or (at your option)
Packit Service fdd496
   any later version.
Packit Service fdd496
Packit Service fdd496
   This program is distributed in the hope that it will be useful,
Packit Service fdd496
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service fdd496
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service fdd496
   GNU General Public License for more details.
Packit Service fdd496
Packit Service fdd496
   You should have received a copy of the GNU General Public License
Packit Service fdd496
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit Service fdd496
Packit Service fdd496
#include <config.h>
Packit Service fdd496
Packit Service fdd496
/* Specification.  */
Packit Service fdd496
#include "striconv.h"
Packit Service fdd496
Packit Service fdd496
#include <errno.h>
Packit Service fdd496
#include <stdlib.h>
Packit Service fdd496
#include <string.h>
Packit Service fdd496
Packit Service fdd496
#if HAVE_ICONV
Packit Service fdd496
# include <iconv.h>
Packit Service fdd496
/* Get MB_LEN_MAX, CHAR_BIT.  */
Packit Service fdd496
# include <limits.h>
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
#include "c-strcase.h"
Packit Service fdd496
Packit Service fdd496
#ifndef SIZE_MAX
Packit Service fdd496
# define SIZE_MAX ((size_t) -1)
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
Packit Service fdd496
#if HAVE_ICONV
Packit Service fdd496
Packit Service fdd496
int
Packit Service fdd496
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
Packit Service fdd496
              char **resultp, size_t *lengthp)
Packit Service fdd496
{
Packit Service fdd496
# define tmpbufsize 4096
Packit Service fdd496
  size_t length;
Packit Service fdd496
  char *result;
Packit Service fdd496
Packit Service fdd496
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit Service fdd496
# if defined _LIBICONV_VERSION \
Packit Service fdd496
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit Service fdd496
          || defined __sun)
Packit Service fdd496
  /* Set to the initial state.  */
Packit Service fdd496
  iconv (cd, NULL, NULL, NULL, NULL);
Packit Service fdd496
# endif
Packit Service fdd496
Packit Service fdd496
  /* Determine the length we need.  */
Packit Service fdd496
  {
Packit Service fdd496
    size_t count = 0;
Packit Service fdd496
    /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
Packit Service fdd496
       libiconv's UCS-4-INTERNAL encoding.  */
Packit Service fdd496
    union { unsigned int align; char buf[tmpbufsize]; } tmp;
Packit Service fdd496
# define tmpbuf tmp.buf
Packit Service fdd496
    const char *inptr = src;
Packit Service fdd496
    size_t insize = srclen;
Packit Service fdd496
Packit Service fdd496
    while (insize > 0)
Packit Service fdd496
      {
Packit Service fdd496
        char *outptr = tmpbuf;
Packit Service fdd496
        size_t outsize = tmpbufsize;
Packit Service fdd496
        size_t res = iconv (cd,
Packit Service fdd496
                            (ICONV_CONST char **) &inptr, &insize,
Packit Service fdd496
                            &outptr, &outsize);
Packit Service fdd496
Packit Service fdd496
        if (res == (size_t)(-1))
Packit Service fdd496
          {
Packit Service fdd496
            if (errno == E2BIG)
Packit Service fdd496
              ;
Packit Service fdd496
            else if (errno == EINVAL)
Packit Service fdd496
              break;
Packit Service fdd496
            else
Packit Service fdd496
              return -1;
Packit Service fdd496
          }
Packit Service fdd496
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit Service fdd496
        /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit Service fdd496
           NetBSD iconv() inserts a question mark if it cannot convert.
Packit Service fdd496
           Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit Service fdd496
           than doing a lossy conversion.  */
Packit Service fdd496
        else if (res > 0)
Packit Service fdd496
          {
Packit Service fdd496
            errno = EILSEQ;
Packit Service fdd496
            return -1;
Packit Service fdd496
          }
Packit Service fdd496
# endif
Packit Service fdd496
        count += outptr - tmpbuf;
Packit Service fdd496
      }
Packit Service fdd496
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit Service fdd496
# if defined _LIBICONV_VERSION \
Packit Service fdd496
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit Service fdd496
          || defined __sun)
Packit Service fdd496
    {
Packit Service fdd496
      char *outptr = tmpbuf;
Packit Service fdd496
      size_t outsize = tmpbufsize;
Packit Service fdd496
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
Packit Service fdd496
Packit Service fdd496
      if (res == (size_t)(-1))
Packit Service fdd496
        return -1;
Packit Service fdd496
      count += outptr - tmpbuf;
Packit Service fdd496
    }
Packit Service fdd496
# endif
Packit Service fdd496
    length = count;
Packit Service fdd496
# undef tmpbuf
Packit Service fdd496
  }
Packit Service fdd496
Packit Service fdd496
  if (length == 0)
Packit Service fdd496
    {
Packit Service fdd496
      *lengthp = 0;
Packit Service fdd496
      return 0;
Packit Service fdd496
    }
Packit Service fdd496
  if (*resultp != NULL && *lengthp >= length)
Packit Service fdd496
    result = *resultp;
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      result = (char *) malloc (length);
Packit Service fdd496
      if (result == NULL)
Packit Service fdd496
        {
Packit Service fdd496
          errno = ENOMEM;
Packit Service fdd496
          return -1;
Packit Service fdd496
        }
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit Service fdd496
# if defined _LIBICONV_VERSION \
Packit Service fdd496
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit Service fdd496
          || defined __sun)
Packit Service fdd496
  /* Return to the initial state.  */
Packit Service fdd496
  iconv (cd, NULL, NULL, NULL, NULL);
Packit Service fdd496
# endif
Packit Service fdd496
Packit Service fdd496
  /* Do the conversion for real.  */
Packit Service fdd496
  {
Packit Service fdd496
    const char *inptr = src;
Packit Service fdd496
    size_t insize = srclen;
Packit Service fdd496
    char *outptr = result;
Packit Service fdd496
    size_t outsize = length;
Packit Service fdd496
Packit Service fdd496
    while (insize > 0)
Packit Service fdd496
      {
Packit Service fdd496
        size_t res = iconv (cd,
Packit Service fdd496
                            (ICONV_CONST char **) &inptr, &insize,
Packit Service fdd496
                            &outptr, &outsize);
Packit Service fdd496
Packit Service fdd496
        if (res == (size_t)(-1))
Packit Service fdd496
          {
Packit Service fdd496
            if (errno == EINVAL)
Packit Service fdd496
              break;
Packit Service fdd496
            else
Packit Service fdd496
              goto fail;
Packit Service fdd496
          }
Packit Service fdd496
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit Service fdd496
        /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit Service fdd496
           NetBSD iconv() inserts a question mark if it cannot convert.
Packit Service fdd496
           Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit Service fdd496
           than doing a lossy conversion.  */
Packit Service fdd496
        else if (res > 0)
Packit Service fdd496
          {
Packit Service fdd496
            errno = EILSEQ;
Packit Service fdd496
            goto fail;
Packit Service fdd496
          }
Packit Service fdd496
# endif
Packit Service fdd496
      }
Packit Service fdd496
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit Service fdd496
# if defined _LIBICONV_VERSION \
Packit Service fdd496
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit Service fdd496
          || defined __sun)
Packit Service fdd496
    {
Packit Service fdd496
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
Packit Service fdd496
Packit Service fdd496
      if (res == (size_t)(-1))
Packit Service fdd496
        goto fail;
Packit Service fdd496
    }
Packit Service fdd496
# endif
Packit Service fdd496
    if (outsize != 0)
Packit Service fdd496
      abort ();
Packit Service fdd496
  }
Packit Service fdd496
Packit Service fdd496
  *resultp = result;
Packit Service fdd496
  *lengthp = length;
Packit Service fdd496
Packit Service fdd496
  return 0;
Packit Service fdd496
Packit Service fdd496
 fail:
Packit Service fdd496
  {
Packit Service fdd496
    if (result != *resultp)
Packit Service fdd496
      {
Packit Service fdd496
        int saved_errno = errno;
Packit Service fdd496
        free (result);
Packit Service fdd496
        errno = saved_errno;
Packit Service fdd496
      }
Packit Service fdd496
    return -1;
Packit Service fdd496
  }
Packit Service fdd496
# undef tmpbufsize
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
char *
Packit Service fdd496
str_cd_iconv (const char *src, iconv_t cd)
Packit Service fdd496
{
Packit Service fdd496
  /* For most encodings, a trailing NUL byte in the input will be converted
Packit Service fdd496
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
Packit Service fdd496
     function is usable for UTF-7, we have to exclude the NUL byte from the
Packit Service fdd496
     conversion and add it by hand afterwards.  */
Packit Service fdd496
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
Packit Service fdd496
  /* Irix iconv() inserts a NUL byte if it cannot convert.
Packit Service fdd496
     NetBSD iconv() inserts a question mark if it cannot convert.
Packit Service fdd496
     Only GNU libiconv and GNU libc are known to prefer to fail rather
Packit Service fdd496
     than doing a lossy conversion.  For other iconv() implementations,
Packit Service fdd496
     we have to look at the number of irreversible conversions returned;
Packit Service fdd496
     but this information is lost when iconv() returns for an E2BIG reason.
Packit Service fdd496
     Therefore we cannot use the second, faster algorithm.  */
Packit Service fdd496
Packit Service fdd496
  char *result = NULL;
Packit Service fdd496
  size_t length = 0;
Packit Service fdd496
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
Packit Service fdd496
  char *final_result;
Packit Service fdd496
Packit Service fdd496
  if (retval < 0)
Packit Service fdd496
    {
Packit Service fdd496
      if (result != NULL)
Packit Service fdd496
        abort ();
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Add the terminating NUL byte.  */
Packit Service fdd496
  final_result =
Packit Service fdd496
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
Packit Service fdd496
  if (final_result == NULL)
Packit Service fdd496
    {
Packit Service fdd496
      free (result);
Packit Service fdd496
      errno = ENOMEM;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
  final_result[length] = '\0';
Packit Service fdd496
Packit Service fdd496
  return final_result;
Packit Service fdd496
Packit Service fdd496
# else
Packit Service fdd496
  /* This algorithm is likely faster than the one above.  But it may produce
Packit Service fdd496
     iconv() returns for an E2BIG reason, when the output size guess is too
Packit Service fdd496
     small.  Therefore it can only be used when we don't need the number of
Packit Service fdd496
     irreversible conversions performed.  */
Packit Service fdd496
  char *result;
Packit Service fdd496
  size_t result_size;
Packit Service fdd496
  size_t length;
Packit Service fdd496
  const char *inptr = src;
Packit Service fdd496
  size_t inbytes_remaining = strlen (src);
Packit Service fdd496
Packit Service fdd496
  /* Make a guess for the worst-case output size, in order to avoid a
Packit Service fdd496
     realloc.  It's OK if the guess is wrong as long as it is not zero and
Packit Service fdd496
     doesn't lead to an integer overflow.  */
Packit Service fdd496
  result_size = inbytes_remaining;
Packit Service fdd496
  {
Packit Service fdd496
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
Packit Service fdd496
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
Packit Service fdd496
      result_size *= MB_LEN_MAX;
Packit Service fdd496
  }
Packit Service fdd496
  result_size += 1; /* for the terminating NUL */
Packit Service fdd496
Packit Service fdd496
  result = (char *) malloc (result_size);
Packit Service fdd496
  if (result == NULL)
Packit Service fdd496
    {
Packit Service fdd496
      errno = ENOMEM;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
Packit Service fdd496
# if defined _LIBICONV_VERSION \
Packit Service fdd496
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit Service fdd496
          || defined __sun)
Packit Service fdd496
  /* Set to the initial state.  */
Packit Service fdd496
  iconv (cd, NULL, NULL, NULL, NULL);
Packit Service fdd496
# endif
Packit Service fdd496
Packit Service fdd496
  /* Do the conversion.  */
Packit Service fdd496
  {
Packit Service fdd496
    char *outptr = result;
Packit Service fdd496
    size_t outbytes_remaining = result_size - 1;
Packit Service fdd496
Packit Service fdd496
    for (;;)
Packit Service fdd496
      {
Packit Service fdd496
        /* Here inptr + inbytes_remaining = src + strlen (src),
Packit Service fdd496
                outptr + outbytes_remaining = result + result_size - 1.  */
Packit Service fdd496
        size_t res = iconv (cd,
Packit Service fdd496
                            (ICONV_CONST char **) &inptr, &inbytes_remaining,
Packit Service fdd496
                            &outptr, &outbytes_remaining);
Packit Service fdd496
Packit Service fdd496
        if (res == (size_t)(-1))
Packit Service fdd496
          {
Packit Service fdd496
            if (errno == EINVAL)
Packit Service fdd496
              break;
Packit Service fdd496
            else if (errno == E2BIG)
Packit Service fdd496
              {
Packit Service fdd496
                size_t used = outptr - result;
Packit Service fdd496
                size_t newsize = result_size * 2;
Packit Service fdd496
                char *newresult;
Packit Service fdd496
Packit Service fdd496
                if (!(newsize > result_size))
Packit Service fdd496
                  {
Packit Service fdd496
                    errno = ENOMEM;
Packit Service fdd496
                    goto failed;
Packit Service fdd496
                  }
Packit Service fdd496
                newresult = (char *) realloc (result, newsize);
Packit Service fdd496
                if (newresult == NULL)
Packit Service fdd496
                  {
Packit Service fdd496
                    errno = ENOMEM;
Packit Service fdd496
                    goto failed;
Packit Service fdd496
                  }
Packit Service fdd496
                result = newresult;
Packit Service fdd496
                result_size = newsize;
Packit Service fdd496
                outptr = result + used;
Packit Service fdd496
                outbytes_remaining = result_size - 1 - used;
Packit Service fdd496
              }
Packit Service fdd496
            else
Packit Service fdd496
              goto failed;
Packit Service fdd496
          }
Packit Service fdd496
        else
Packit Service fdd496
          break;
Packit Service fdd496
      }
Packit Service fdd496
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
Packit Service fdd496
# if defined _LIBICONV_VERSION \
Packit Service fdd496
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit Service fdd496
          || defined __sun)
Packit Service fdd496
    for (;;)
Packit Service fdd496
      {
Packit Service fdd496
        /* Here outptr + outbytes_remaining = result + result_size - 1.  */
Packit Service fdd496
        size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
Packit Service fdd496
Packit Service fdd496
        if (res == (size_t)(-1))
Packit Service fdd496
          {
Packit Service fdd496
            if (errno == E2BIG)
Packit Service fdd496
              {
Packit Service fdd496
                size_t used = outptr - result;
Packit Service fdd496
                size_t newsize = result_size * 2;
Packit Service fdd496
                char *newresult;
Packit Service fdd496
Packit Service fdd496
                if (!(newsize > result_size))
Packit Service fdd496
                  {
Packit Service fdd496
                    errno = ENOMEM;
Packit Service fdd496
                    goto failed;
Packit Service fdd496
                  }
Packit Service fdd496
                newresult = (char *) realloc (result, newsize);
Packit Service fdd496
                if (newresult == NULL)
Packit Service fdd496
                  {
Packit Service fdd496
                    errno = ENOMEM;
Packit Service fdd496
                    goto failed;
Packit Service fdd496
                  }
Packit Service fdd496
                result = newresult;
Packit Service fdd496
                result_size = newsize;
Packit Service fdd496
                outptr = result + used;
Packit Service fdd496
                outbytes_remaining = result_size - 1 - used;
Packit Service fdd496
              }
Packit Service fdd496
            else
Packit Service fdd496
              goto failed;
Packit Service fdd496
          }
Packit Service fdd496
        else
Packit Service fdd496
          break;
Packit Service fdd496
      }
Packit Service fdd496
# endif
Packit Service fdd496
Packit Service fdd496
    /* Add the terminating NUL byte.  */
Packit Service fdd496
    *outptr++ = '\0';
Packit Service fdd496
Packit Service fdd496
    length = outptr - result;
Packit Service fdd496
  }
Packit Service fdd496
Packit Service fdd496
  /* Give away unused memory.  */
Packit Service fdd496
  if (length < result_size)
Packit Service fdd496
    {
Packit Service fdd496
      char *smaller_result = (char *) realloc (result, length);
Packit Service fdd496
Packit Service fdd496
      if (smaller_result != NULL)
Packit Service fdd496
        result = smaller_result;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return result;
Packit Service fdd496
Packit Service fdd496
 failed:
Packit Service fdd496
  {
Packit Service fdd496
    int saved_errno = errno;
Packit Service fdd496
    free (result);
Packit Service fdd496
    errno = saved_errno;
Packit Service fdd496
    return NULL;
Packit Service fdd496
  }
Packit Service fdd496
Packit Service fdd496
# endif
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
char *
Packit Service fdd496
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
Packit Service fdd496
{
Packit Service fdd496
  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
Packit Service fdd496
    {
Packit Service fdd496
      char *result = strdup (src);
Packit Service fdd496
Packit Service fdd496
      if (result == NULL)
Packit Service fdd496
        errno = ENOMEM;
Packit Service fdd496
      return result;
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
#if HAVE_ICONV
Packit Service fdd496
      iconv_t cd;
Packit Service fdd496
      char *result;
Packit Service fdd496
Packit Service fdd496
      /* Avoid glibc-2.1 bug with EUC-KR.  */
Packit Service fdd496
# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
Packit Service fdd496
     && !defined _LIBICONV_VERSION
Packit Service fdd496
      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
Packit Service fdd496
          || c_strcasecmp (to_codeset, "EUC-KR") == 0)
Packit Service fdd496
        {
Packit Service fdd496
          errno = EINVAL;
Packit Service fdd496
          return NULL;
Packit Service fdd496
        }
Packit Service fdd496
# endif
Packit Service fdd496
      cd = iconv_open (to_codeset, from_codeset);
Packit Service fdd496
      if (cd == (iconv_t) -1)
Packit Service fdd496
        return NULL;
Packit Service fdd496
Packit Service fdd496
      result = str_cd_iconv (src, cd);
Packit Service fdd496
Packit Service fdd496
      if (result == NULL)
Packit Service fdd496
        {
Packit Service fdd496
          /* Close cd, but preserve the errno from str_cd_iconv.  */
Packit Service fdd496
          int saved_errno = errno;
Packit Service fdd496
          iconv_close (cd);
Packit Service fdd496
          errno = saved_errno;
Packit Service fdd496
        }
Packit Service fdd496
      else
Packit Service fdd496
        {
Packit Service fdd496
          if (iconv_close (cd) < 0)
Packit Service fdd496
            {
Packit Service fdd496
              /* Return NULL, but free the allocated memory, and while doing
Packit Service fdd496
                 that, preserve the errno from iconv_close.  */
Packit Service fdd496
              int saved_errno = errno;
Packit Service fdd496
              free (result);
Packit Service fdd496
              errno = saved_errno;
Packit Service fdd496
              return NULL;
Packit Service fdd496
            }
Packit Service fdd496
        }
Packit Service fdd496
      return result;
Packit Service fdd496
#else
Packit Service fdd496
      /* This is a different error code than if iconv_open existed but didn't
Packit Service fdd496
         support from_codeset and to_codeset, so that the caller can emit
Packit Service fdd496
         an error message such as
Packit Service fdd496
           "iconv() is not supported. Installing GNU libiconv and
Packit Service fdd496
            then reinstalling this package would fix this."  */
Packit Service fdd496
      errno = ENOSYS;
Packit Service fdd496
      return NULL;
Packit Service fdd496
#endif
Packit Service fdd496
    }
Packit Service fdd496
}