Blame lib/propername.c

Packit Service fdd496
/* Localization of proper names.
Packit Service fdd496
   Copyright (C) 2006-2017 Free Software Foundation, Inc.
Packit Service fdd496
   Written by Bruno Haible <bruno@clisp.org>, 2006.
Packit Service fdd496
Packit Service fdd496
   This program is free software: you can redistribute it and/or modify
Packit Service fdd496
   it under the terms of the GNU General Public License as published by
Packit Service fdd496
   the Free Software Foundation; either version 3 of the License, or
Packit Service fdd496
   (at your option) any later version.
Packit Service fdd496
Packit Service fdd496
   This program is distributed in the hope that it will be useful,
Packit Service fdd496
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service fdd496
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service fdd496
   GNU General Public License for more details.
Packit Service fdd496
Packit Service fdd496
   You should have received a copy of the GNU General Public License
Packit Service fdd496
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
Packit Service fdd496
Packit Service fdd496
/* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that
Packit Service fdd496
   the proper_name function might be candidate for attribute 'const'  */
Packit Service fdd496
#if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__
Packit Service fdd496
# pragma GCC diagnostic ignored "-Wsuggest-attribute=const"
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
#include <config.h>
Packit Service fdd496
Packit Service fdd496
/* Specification.  */
Packit Service fdd496
#include "propername.h"
Packit Service fdd496
Packit Service fdd496
#include <ctype.h>
Packit Service fdd496
#include <stdbool.h>
Packit Service fdd496
#include <stdio.h>
Packit Service fdd496
#include <stdlib.h>
Packit Service fdd496
#include <string.h>
Packit Service fdd496
#if HAVE_ICONV
Packit Service fdd496
# include <iconv.h>
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
#include "trim.h"
Packit Service fdd496
#include "mbchar.h"
Packit Service fdd496
#include "mbuiter.h"
Packit Service fdd496
#include "localcharset.h"
Packit Service fdd496
#include "c-strcase.h"
Packit Service fdd496
#include "xstriconv.h"
Packit Service fdd496
#include "xalloc.h"
Packit Service fdd496
#include "gettext.h"
Packit Service fdd496
Packit Service fdd496
Packit Service fdd496
/* Tests whether STRING contains trim (SUB), starting and ending at word
Packit Service fdd496
   boundaries.
Packit Service fdd496
   Here, instead of implementing Unicode Standard Annex #29 for determining
Packit Service fdd496
   word boundaries, we assume that trim (SUB) starts and ends with words and
Packit Service fdd496
   only test whether the part before it ends with a non-word and the part
Packit Service fdd496
   after it starts with a non-word.  */
Packit Service fdd496
static bool
Packit Service fdd496
mbsstr_trimmed_wordbounded (const char *string, const char *sub)
Packit Service fdd496
{
Packit Service fdd496
  char *tsub = trim (sub);
Packit Service fdd496
  bool found = false;
Packit Service fdd496
Packit Service fdd496
  for (; *string != '\0';)
Packit Service fdd496
    {
Packit Service fdd496
      const char *tsub_in_string = mbsstr (string, tsub);
Packit Service fdd496
      if (tsub_in_string == NULL)
Packit Service fdd496
        break;
Packit Service fdd496
      else
Packit Service fdd496
        {
Packit Service fdd496
          if (MB_CUR_MAX > 1)
Packit Service fdd496
            {
Packit Service fdd496
              mbui_iterator_t string_iter;
Packit Service fdd496
              bool word_boundary_before;
Packit Service fdd496
              bool word_boundary_after;
Packit Service fdd496
Packit Service fdd496
              mbui_init (string_iter, string);
Packit Service fdd496
              word_boundary_before = true;
Packit Service fdd496
              if (mbui_cur_ptr (string_iter) < tsub_in_string)
Packit Service fdd496
                {
Packit Service fdd496
                  mbchar_t last_char_before_tsub;
Packit Service fdd496
                  do
Packit Service fdd496
                    {
Packit Service fdd496
                      if (!mbui_avail (string_iter))
Packit Service fdd496
                        abort ();
Packit Service fdd496
                      last_char_before_tsub = mbui_cur (string_iter);
Packit Service fdd496
                      mbui_advance (string_iter);
Packit Service fdd496
                    }
Packit Service fdd496
                  while (mbui_cur_ptr (string_iter) < tsub_in_string);
Packit Service fdd496
                  if (mb_isalnum (last_char_before_tsub))
Packit Service fdd496
                    word_boundary_before = false;
Packit Service fdd496
                }
Packit Service fdd496
Packit Service fdd496
              mbui_init (string_iter, tsub_in_string);
Packit Service fdd496
              {
Packit Service fdd496
                mbui_iterator_t tsub_iter;
Packit Service fdd496
Packit Service fdd496
                for (mbui_init (tsub_iter, tsub);
Packit Service fdd496
                     mbui_avail (tsub_iter);
Packit Service fdd496
                     mbui_advance (tsub_iter))
Packit Service fdd496
                  {
Packit Service fdd496
                    if (!mbui_avail (string_iter))
Packit Service fdd496
                      abort ();
Packit Service fdd496
                    mbui_advance (string_iter);
Packit Service fdd496
                  }
Packit Service fdd496
              }
Packit Service fdd496
              word_boundary_after = true;
Packit Service fdd496
              if (mbui_avail (string_iter))
Packit Service fdd496
                {
Packit Service fdd496
                  mbchar_t first_char_after_tsub = mbui_cur (string_iter);
Packit Service fdd496
                  if (mb_isalnum (first_char_after_tsub))
Packit Service fdd496
                    word_boundary_after = false;
Packit Service fdd496
                }
Packit Service fdd496
Packit Service fdd496
              if (word_boundary_before && word_boundary_after)
Packit Service fdd496
                {
Packit Service fdd496
                  found = true;
Packit Service fdd496
                  break;
Packit Service fdd496
                }
Packit Service fdd496
Packit Service fdd496
              mbui_init (string_iter, tsub_in_string);
Packit Service fdd496
              if (!mbui_avail (string_iter))
Packit Service fdd496
                break;
Packit Service fdd496
              string = tsub_in_string + mb_len (mbui_cur (string_iter));
Packit Service fdd496
            }
Packit Service fdd496
          else
Packit Service fdd496
            {
Packit Service fdd496
              bool word_boundary_before;
Packit Service fdd496
              const char *p;
Packit Service fdd496
              bool word_boundary_after;
Packit Service fdd496
Packit Service fdd496
              word_boundary_before = true;
Packit Service fdd496
              if (string < tsub_in_string)
Packit Service fdd496
                if (isalnum ((unsigned char) tsub_in_string[-1]))
Packit Service fdd496
                  word_boundary_before = false;
Packit Service fdd496
Packit Service fdd496
              p = tsub_in_string + strlen (tsub);
Packit Service fdd496
              word_boundary_after = true;
Packit Service fdd496
              if (*p != '\0')
Packit Service fdd496
                if (isalnum ((unsigned char) *p))
Packit Service fdd496
                  word_boundary_after = false;
Packit Service fdd496
Packit Service fdd496
              if (word_boundary_before && word_boundary_after)
Packit Service fdd496
                {
Packit Service fdd496
                  found = true;
Packit Service fdd496
                  break;
Packit Service fdd496
                }
Packit Service fdd496
Packit Service fdd496
              if (*tsub_in_string == '\0')
Packit Service fdd496
                break;
Packit Service fdd496
              string = tsub_in_string + 1;
Packit Service fdd496
            }
Packit Service fdd496
        }
Packit Service fdd496
    }
Packit Service fdd496
  free (tsub);
Packit Service fdd496
  return found;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Return the localization of NAME.  NAME is written in ASCII.  */
Packit Service fdd496
Packit Service fdd496
const char *
Packit Service fdd496
proper_name (const char *name)
Packit Service fdd496
{
Packit Service fdd496
  /* See whether there is a translation.   */
Packit Service fdd496
  const char *translation = gettext (name);
Packit Service fdd496
Packit Service fdd496
  if (translation != name)
Packit Service fdd496
    {
Packit Service fdd496
      /* See whether the translation contains the original name.  */
Packit Service fdd496
      if (mbsstr_trimmed_wordbounded (translation, name))
Packit Service fdd496
        return translation;
Packit Service fdd496
      else
Packit Service fdd496
        {
Packit Service fdd496
          /* Return "TRANSLATION (NAME)".  */
Packit Service fdd496
          char *result =
Packit Service fdd496
            XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
Packit Service fdd496
Packit Service fdd496
          sprintf (result, "%s (%s)", translation, name);
Packit Service fdd496
          return result;
Packit Service fdd496
        }
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    return name;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Return the localization of a name whose original writing is not ASCII.
Packit Service fdd496
   NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
Packit Service fdd496
   escape sequences.  NAME_ASCII is a fallback written only with ASCII
Packit Service fdd496
   characters.  */
Packit Service fdd496
Packit Service fdd496
const char *
Packit Service fdd496
proper_name_utf8 (const char *name_ascii, const char *name_utf8)
Packit Service fdd496
{
Packit Service fdd496
  /* See whether there is a translation.   */
Packit Service fdd496
  const char *translation = gettext (name_ascii);
Packit Service fdd496
Packit Service fdd496
  /* Try to convert NAME_UTF8 to the locale encoding.  */
Packit Service fdd496
  const char *locale_code = locale_charset ();
Packit Service fdd496
  char *alloc_name_converted = NULL;
Packit Service fdd496
  char *alloc_name_converted_translit = NULL;
Packit Service fdd496
  const char *name_converted = NULL;
Packit Service fdd496
  const char *name_converted_translit = NULL;
Packit Service fdd496
  const char *name;
Packit Service fdd496
Packit Service fdd496
  if (c_strcasecmp (locale_code, "UTF-8") != 0)
Packit Service fdd496
    {
Packit Service fdd496
#if HAVE_ICONV
Packit Service fdd496
      name_converted = alloc_name_converted =
Packit Service fdd496
        xstr_iconv (name_utf8, "UTF-8", locale_code);
Packit Service fdd496
Packit Service fdd496
# if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
Packit Service fdd496
      && !defined __UCLIBC__) \
Packit Service fdd496
     || _LIBICONV_VERSION >= 0x0105
Packit Service fdd496
      {
Packit Service fdd496
        char *converted_translit;
Packit Service fdd496
Packit Service fdd496
        size_t len = strlen (locale_code);
Packit Service fdd496
        char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
Packit Service fdd496
        memcpy (locale_code_translit, locale_code, len);
Packit Service fdd496
        memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
Packit Service fdd496
Packit Service fdd496
        converted_translit =
Packit Service fdd496
          xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
Packit Service fdd496
Packit Service fdd496
        free (locale_code_translit);
Packit Service fdd496
Packit Service fdd496
        if (converted_translit != NULL)
Packit Service fdd496
          {
Packit Service fdd496
#  if !_LIBICONV_VERSION
Packit Service fdd496
            /* Don't use the transliteration if it added question marks.
Packit Service fdd496
               glibc's transliteration falls back to question marks; libiconv's
Packit Service fdd496
               transliteration does not.
Packit Service fdd496
               mbschr is equivalent to strchr in this case.  */
Packit Service fdd496
            if (strchr (converted_translit, '?') != NULL)
Packit Service fdd496
              free (converted_translit);
Packit Service fdd496
            else
Packit Service fdd496
#  endif
Packit Service fdd496
              name_converted_translit = alloc_name_converted_translit =
Packit Service fdd496
                converted_translit;
Packit Service fdd496
          }
Packit Service fdd496
      }
Packit Service fdd496
# endif
Packit Service fdd496
#endif
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      name_converted = name_utf8;
Packit Service fdd496
      name_converted_translit = name_utf8;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* The name in locale encoding.  */
Packit Service fdd496
  name = (name_converted != NULL ? name_converted :
Packit Service fdd496
          name_converted_translit != NULL ? name_converted_translit :
Packit Service fdd496
          name_ascii);
Packit Service fdd496
Packit Service fdd496
  /* See whether we have a translation.  Some translators have not understood
Packit Service fdd496
     that they should use the UTF-8 form of the name, if possible.  So if the
Packit Service fdd496
     translator provided a no-op translation, we ignore it.  */
Packit Service fdd496
  if (strcmp (translation, name_ascii) != 0)
Packit Service fdd496
    {
Packit Service fdd496
      /* See whether the translation contains the original name.  */
Packit Service fdd496
      if (mbsstr_trimmed_wordbounded (translation, name_ascii)
Packit Service fdd496
          || (name_converted != NULL
Packit Service fdd496
              && mbsstr_trimmed_wordbounded (translation, name_converted))
Packit Service fdd496
          || (name_converted_translit != NULL
Packit Service fdd496
              && mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
Packit Service fdd496
        {
Packit Service fdd496
          if (alloc_name_converted != NULL)
Packit Service fdd496
            free (alloc_name_converted);
Packit Service fdd496
          if (alloc_name_converted_translit != NULL)
Packit Service fdd496
            free (alloc_name_converted_translit);
Packit Service fdd496
          return translation;
Packit Service fdd496
        }
Packit Service fdd496
      else
Packit Service fdd496
        {
Packit Service fdd496
          /* Return "TRANSLATION (NAME)".  */
Packit Service fdd496
          char *result =
Packit Service fdd496
            XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
Packit Service fdd496
Packit Service fdd496
          sprintf (result, "%s (%s)", translation, name);
Packit Service fdd496
Packit Service fdd496
          if (alloc_name_converted != NULL)
Packit Service fdd496
            free (alloc_name_converted);
Packit Service fdd496
          if (alloc_name_converted_translit != NULL)
Packit Service fdd496
            free (alloc_name_converted_translit);
Packit Service fdd496
          return result;
Packit Service fdd496
        }
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      if (alloc_name_converted != NULL && alloc_name_converted != name)
Packit Service fdd496
        free (alloc_name_converted);
Packit Service fdd496
      if (alloc_name_converted_translit != NULL
Packit Service fdd496
          && alloc_name_converted_translit != name)
Packit Service fdd496
        free (alloc_name_converted_translit);
Packit Service fdd496
      return name;
Packit Service fdd496
    }
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
#ifdef TEST1
Packit Service fdd496
# include <locale.h>
Packit Service fdd496
int
Packit Service fdd496
main (int argc, char *argv[])
Packit Service fdd496
{
Packit Service fdd496
  setlocale (LC_ALL, "");
Packit Service fdd496
  if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))
Packit Service fdd496
    printf("found\n");
Packit Service fdd496
  return 0;
Packit Service fdd496
}
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
#ifdef TEST2
Packit Service fdd496
# include <locale.h>
Packit Service fdd496
# include <stdio.h>
Packit Service fdd496
int
Packit Service fdd496
main (int argc, char *argv[])
Packit Service fdd496
{
Packit Service fdd496
  setlocale (LC_ALL, "");
Packit Service fdd496
  printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard"));
Packit Service fdd496
  return 0;
Packit Service fdd496
}
Packit Service fdd496
#endif