Blob Blame History Raw
/* Message list charset and locale charset handling.
   Copyright (C) 2001-2003, 2005-2009, 2015 Free Software Foundation,
   Inc.
   Written by Bruno Haible <haible@clisp.cons.org>, 2001.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */


#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <alloca.h>

/* Specification.  */
#include "msgl-iconv.h"

#include <stdbool.h>
#include <stdlib.h>
#include <string.h>

#if HAVE_ICONV
# include <iconv.h>
#endif

#include "progname.h"
#include "basename.h"
#include "message.h"
#include "po-charset.h"
#include "xstriconv.h"
#include "xstriconveh.h"
#include "msgl-ascii.h"
#include "xalloc.h"
#include "xmalloca.h"
#include "c-strstr.h"
#include "xvasprintf.h"
#include "po-xerror.h"
#include "gettext.h"

#define _(str) gettext (str)


#if HAVE_ICONV

static void conversion_error (const struct conversion_context* context)
#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
     __attribute__ ((noreturn))
#endif
;
static void
conversion_error (const struct conversion_context* context)
{
  if (context->to_code == po_charset_utf8)
    /* If a conversion to UTF-8 fails, the problem lies in the input.  */
    po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
               xasprintf (_("%s: input is not valid in \"%s\" encoding"),
                          context->from_filename, context->from_code));
  else
    po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
               xasprintf (_("\
%s: error while converting from \"%s\" encoding to \"%s\" encoding"),
                          context->from_filename, context->from_code,
                          context->to_code));
  /* NOTREACHED */
  abort ();
}

char *
convert_string_directly (iconv_t cd, const char *string,
                         const struct conversion_context* context)
{
  size_t len = strlen (string) + 1;
  char *result = NULL;
  size_t resultlen = 0;

  if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
    /* Verify the result has exactly one NUL byte, at the end.  */
    if (resultlen > 0 && result[resultlen - 1] == '\0'
        && strlen (result) == resultlen - 1)
      return result;

  conversion_error (context);
  /* NOTREACHED */
  return NULL;
}

static char *
convert_string (const iconveh_t *cd, const char *string,
                const struct conversion_context* context)
{
  size_t len = strlen (string) + 1;
  char *result = NULL;
  size_t resultlen = 0;

  if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
                       &result, &resultlen) == 0)
    /* Verify the result has exactly one NUL byte, at the end.  */
    if (resultlen > 0 && result[resultlen - 1] == '\0'
        && strlen (result) == resultlen - 1)
      return result;

  conversion_error (context);
  /* NOTREACHED */
  return NULL;
}

static void
convert_string_list (const iconveh_t *cd, string_list_ty *slp,
                     const struct conversion_context* context)
{
  size_t i;

  if (slp != NULL)
    for (i = 0; i < slp->nitems; i++)
      slp->item[i] = convert_string (cd, slp->item[i], context);
}

static void
convert_prev_msgid (const iconveh_t *cd, message_ty *mp,
                    const struct conversion_context* context)
{
  if (mp->prev_msgctxt != NULL)
    mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context);
  if (mp->prev_msgid != NULL)
    mp->prev_msgid = convert_string (cd, mp->prev_msgid, context);
  if (mp->prev_msgid_plural != NULL)
    mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context);
}

static void
convert_msgid (const iconveh_t *cd, message_ty *mp,
               const struct conversion_context* context)
{
  if (mp->msgctxt != NULL)
    mp->msgctxt = convert_string (cd, mp->msgctxt, context);
  mp->msgid = convert_string (cd, mp->msgid, context);
  if (mp->msgid_plural != NULL)
    mp->msgid_plural = convert_string (cd, mp->msgid_plural, context);
}

static void
convert_msgstr (const iconveh_t *cd, message_ty *mp,
                const struct conversion_context* context)
{
  char *result = NULL;
  size_t resultlen = 0;

  if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
    abort ();

  if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
                       &result, &resultlen) == 0)
    /* Verify the result has a NUL byte at the end.  */
    if (resultlen > 0 && result[resultlen - 1] == '\0')
      /* Verify the result has the same number of NUL bytes.  */
      {
        const char *p;
        const char *pend;
        int nulcount1;
        int nulcount2;

        for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
             p < pend;
             p += strlen (p) + 1, nulcount1++);
        for (p = result, pend = p + resultlen, nulcount2 = 0;
             p < pend;
             p += strlen (p) + 1, nulcount2++);

        if (nulcount1 == nulcount2)
          {
            mp->msgstr = result;
            mp->msgstr_len = resultlen;
            return;
          }
      }

  conversion_error (context);
}

#endif


static bool
iconv_message_list_internal (message_list_ty *mlp,
                             const char *canon_from_code,
                             const char *canon_to_code,
                             bool update_header,
                             const char *from_filename)
{
  bool canon_from_code_overridden = (canon_from_code != NULL);
  bool msgids_changed;
  size_t j;

  /* If the list is empty, nothing to do.  */
  if (mlp->nitems == 0)
    return false;

  /* Search the header entry, and extract and replace the charset name.  */
  for (j = 0; j < mlp->nitems; j++)
    if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
      {
        const char *header = mlp->item[j]->msgstr;

        if (header != NULL)
          {
            const char *charsetstr = c_strstr (header, "charset=");

            if (charsetstr != NULL)
              {
                size_t len;
                char *charset;
                const char *canon_charset;

                charsetstr += strlen ("charset=");
                len = strcspn (charsetstr, " \t\n");
                charset = (char *) xmalloca (len + 1);
                memcpy (charset, charsetstr, len);
                charset[len] = '\0';

                canon_charset = po_charset_canonicalize (charset);
                if (canon_charset == NULL)
                  {
                    if (!canon_from_code_overridden)
                      {
                        /* Don't give an error for POT files, because POT
                           files usually contain only ASCII msgids.  */
                        const char *filename = from_filename;
                        size_t filenamelen;

                        if (filename != NULL
                            && (filenamelen = strlen (filename)) >= 4
                            && memcmp (filename + filenamelen - 4, ".pot", 4)
                               == 0
                            && strcmp (charset, "CHARSET") == 0)
                          canon_charset = po_charset_ascii;
                        else
                          po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
                                     false, xasprintf (_("\
present charset \"%s\" is not a portable encoding name"),
                                                charset));
                      }
                  }
                else
                  {
                    if (canon_from_code == NULL)
                      canon_from_code = canon_charset;
                    else if (canon_from_code != canon_charset)
                      po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0,  0,
                                 false,
                                 xasprintf (_("\
two different charsets \"%s\" and \"%s\" in input file"),
                                            canon_from_code, canon_charset));
                  }
                freea (charset);

                if (update_header)
                  {
                    size_t len1, len2, len3;
                    char *new_header;

                    len1 = charsetstr - header;
                    len2 = strlen (canon_to_code);
                    len3 = (header + strlen (header)) - (charsetstr + len);
                    new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
                    memcpy (new_header, header, len1);
                    memcpy (new_header + len1, canon_to_code, len2);
                    memcpy (new_header + len1 + len2, charsetstr + len,
                            len3 + 1);
                    mlp->item[j]->msgstr = new_header;
                    mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
                  }
              }
          }
      }
  if (canon_from_code == NULL)
    {
      if (is_ascii_message_list (mlp))
        canon_from_code = po_charset_ascii;
      else
        po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
                   _("\
input file doesn't contain a header entry with a charset specification"));
    }

  msgids_changed = false;

  /* If the two encodings are the same, nothing to do.  */
  if (canon_from_code != canon_to_code)
    {
#if HAVE_ICONV
      iconveh_t cd;
      struct conversion_context context;

      if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
        po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
                   xasprintf (_("\
Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
and iconv() does not support this conversion."),
                              canon_from_code, canon_to_code,
                              basename (program_name)));

      context.from_code = canon_from_code;
      context.to_code = canon_to_code;
      context.from_filename = from_filename;

      for (j = 0; j < mlp->nitems; j++)
        {
          message_ty *mp = mlp->item[j];

          if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt))
              || !is_ascii_string (mp->msgid))
            msgids_changed = true;
          context.message = mp;
          convert_string_list (&cd, mp->comment, &context);
          convert_string_list (&cd, mp->comment_dot, &context);
          convert_prev_msgid (&cd, mp, &context);
          convert_msgid (&cd, mp, &context);
          convert_msgstr (&cd, mp, &context);
        }

      iconveh_close (&cd);

      if (msgids_changed)
        if (message_list_msgids_changed (mlp))
          po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
                     xasprintf (_("\
Conversion from \"%s\" to \"%s\" introduces duplicates: \
some different msgids become equal."),
                                canon_from_code, canon_to_code));
#else
          po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
                     xasprintf (_("\
Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
This version was built without iconv()."),
                                canon_from_code, canon_to_code,
                                basename (program_name)));
#endif
    }

  return msgids_changed;
}

bool
iconv_message_list (message_list_ty *mlp,
                    const char *canon_from_code, const char *canon_to_code,
                    const char *from_filename)
{
  return iconv_message_list_internal (mlp,
                                      canon_from_code, canon_to_code, true,
                                      from_filename);
}

msgdomain_list_ty *
iconv_msgdomain_list (msgdomain_list_ty *mdlp,
                      const char *to_code,
                      bool update_header,
                      const char *from_filename)
{
  const char *canon_to_code;
  size_t k;

  /* Canonicalize target encoding.  */
  canon_to_code = po_charset_canonicalize (to_code);
  if (canon_to_code == NULL)
    po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
               xasprintf (_("\
target charset \"%s\" is not a portable encoding name."),
                          to_code));

  for (k = 0; k < mdlp->nitems; k++)
    iconv_message_list_internal (mdlp->item[k]->messages,
                                 mdlp->encoding, canon_to_code, update_header,
                                 from_filename);

  mdlp->encoding = canon_to_code;
  return mdlp;
}

#if HAVE_ICONV

static bool
iconvable_string (const iconveh_t *cd, const char *string)
{
  size_t len = strlen (string) + 1;
  char *result = NULL;
  size_t resultlen = 0;

  if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
                       &result, &resultlen) == 0)
    {
      /* Test if the result has exactly one NUL byte, at the end.  */
      bool ok = (resultlen > 0 && result[resultlen - 1] == '\0'
                 && strlen (result) == resultlen - 1);
      free (result);
      return ok;
    }
  return false;
}

static bool
iconvable_string_list (const iconveh_t *cd, string_list_ty *slp)
{
  size_t i;

  if (slp != NULL)
    for (i = 0; i < slp->nitems; i++)
      if (!iconvable_string (cd, slp->item[i]))
        return false;
  return true;
}

static bool
iconvable_prev_msgid (const iconveh_t *cd, message_ty *mp)
{
  if (mp->prev_msgctxt != NULL)
    if (!iconvable_string (cd, mp->prev_msgctxt))
      return false;
  if (mp->prev_msgid != NULL)
    if (!iconvable_string (cd, mp->prev_msgid))
      return false;
  if (mp->prev_msgid_plural != NULL)
    if (!iconvable_string (cd, mp->prev_msgid_plural))
      return false;
  return true;
}

static bool
iconvable_msgid (const iconveh_t *cd, message_ty *mp)
{
  if (mp->msgctxt != NULL)
    if (!iconvable_string (cd, mp->msgctxt))
      return false;
  if (!iconvable_string (cd, mp->msgid))
    return false;
  if (mp->msgid_plural != NULL)
    if (!iconvable_string (cd, mp->msgid_plural))
      return false;
  return true;
}

static bool
iconvable_msgstr (const iconveh_t *cd, message_ty *mp)
{
  char *result = NULL;
  size_t resultlen = 0;

  if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
    abort ();

  if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
                       &result, &resultlen) == 0)
    {
      bool ok = false;

      /* Test if the result has a NUL byte at the end.  */
      if (resultlen > 0 && result[resultlen - 1] == '\0')
        /* Test if the result has the same number of NUL bytes.  */
        {
          const char *p;
          const char *pend;
          int nulcount1;
          int nulcount2;

          for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
               p < pend;
               p += strlen (p) + 1, nulcount1++);
          for (p = result, pend = p + resultlen, nulcount2 = 0;
               p < pend;
               p += strlen (p) + 1, nulcount2++);

          if (nulcount1 == nulcount2)
            ok = true;
        }

      free (result);
      return ok;
    }
  return false;
}

#endif

bool
is_message_list_iconvable (message_list_ty *mlp,
                           const char *canon_from_code,
                           const char *canon_to_code)
{
  bool canon_from_code_overridden = (canon_from_code != NULL);
  size_t j;

  /* If the list is empty, nothing to check.  */
  if (mlp->nitems == 0)
    return true;

  /* Search the header entry, and extract the charset name.  */
  for (j = 0; j < mlp->nitems; j++)
    if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
      {
        const char *header = mlp->item[j]->msgstr;

        if (header != NULL)
          {
            const char *charsetstr = c_strstr (header, "charset=");

            if (charsetstr != NULL)
              {
                size_t len;
                char *charset;
                const char *canon_charset;

                charsetstr += strlen ("charset=");
                len = strcspn (charsetstr, " \t\n");
                charset = (char *) xmalloca (len + 1);
                memcpy (charset, charsetstr, len);
                charset[len] = '\0';

                canon_charset = po_charset_canonicalize (charset);
                if (canon_charset == NULL)
                  {
                    if (!canon_from_code_overridden)
                      {
                        /* Don't give an error for POT files, because POT
                           files usually contain only ASCII msgids.  */
                        if (strcmp (charset, "CHARSET") == 0)
                          canon_charset = po_charset_ascii;
                        else
                          {
                            /* charset is not a portable encoding name.  */
                            freea (charset);
                            return false;
                          }
                      }
                  }
                else
                  {
                    if (canon_from_code == NULL)
                      canon_from_code = canon_charset;
                    else if (canon_from_code != canon_charset)
                      {
                        /* Two different charsets in input file.  */
                        freea (charset);
                        return false;
                      }
                  }
                freea (charset);
              }
          }
      }
  if (canon_from_code == NULL)
    {
      if (is_ascii_message_list (mlp))
        canon_from_code = po_charset_ascii;
      else
        /* Input file lacks a header entry with a charset specification.  */
        return false;
    }

  /* If the two encodings are the same, nothing to check.  */
  if (canon_from_code != canon_to_code)
    {
#if HAVE_ICONV
      iconveh_t cd;

      if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
        /* iconv() doesn't support this conversion.  */
        return false;

      for (j = 0; j < mlp->nitems; j++)
        {
          message_ty *mp = mlp->item[j];

          if (!(iconvable_string_list (&cd, mp->comment)
                && iconvable_string_list (&cd, mp->comment_dot)
                && iconvable_prev_msgid (&cd, mp)
                && iconvable_msgid (&cd, mp)
                && iconvable_msgstr (&cd, mp)))
            return false;
        }

      iconveh_close (&cd);
#else
      /* This version was built without iconv().  */
      return false;
#endif
    }

  return true;
}