Blame glib/gtranslit.c

Packit Service d3d246
/*
Packit Service d3d246
 * Copyright © 2014 Canonical Limited
Packit Service d3d246
 *
Packit Service d3d246
 * This library is free software; you can redistribute it and/or
Packit Service d3d246
 * modify it under the terms of the GNU Lesser General Public
Packit Service d3d246
 * License as published by the Free Software Foundation; either
Packit Service d3d246
 * version 2.1 of the License, or (at your option) any later version.
Packit Service d3d246
 *
Packit Service d3d246
 * This library is distributed in the hope that it will be useful,
Packit Service d3d246
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service d3d246
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service d3d246
 * Lesser General Public License for more details.
Packit Service d3d246
 *
Packit Service d3d246
 * You should have received a copy of the GNU Lesser General Public
Packit Service d3d246
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
Packit Service d3d246
 *
Packit Service d3d246
 * Author: Ryan Lortie <desrt@desrt.ca>
Packit Service d3d246
 */
Packit Service d3d246
Packit Service d3d246
#include <config.h>
Packit Service d3d246
Packit Service d3d246
#include "gstrfuncs.h"
Packit Service d3d246
Packit Service d3d246
#include <glib.h>
Packit Service d3d246
#include <locale.h>
Packit Service d3d246
#include <stdlib.h>
Packit Service d3d246
#include <string.h>
Packit Service d3d246
Packit Service d3d246
struct mapping_entry
Packit Service d3d246
{
Packit Service d3d246
  guint16 src;
Packit Service d3d246
  guint16 ascii;
Packit Service d3d246
};
Packit Service d3d246
Packit Service d3d246
struct mapping_range
Packit Service d3d246
{
Packit Service d3d246
  guint16 start;
Packit Service d3d246
  guint16 length;
Packit Service d3d246
};
Packit Service d3d246
Packit Service d3d246
struct locale_entry
Packit Service d3d246
{
Packit Service d3d246
  guint8 name_offset;
Packit Service d3d246
  guint8 item_id;
Packit Service d3d246
};
Packit Service d3d246
Packit Service d3d246
#include "gtranslit-data.h"
Packit Service d3d246
Packit Service d3d246
#define get_src_char(array, encoded, index) ((encoded & 0x8000) ? (array)[((encoded) & 0xfff) + index] : encoded)
Packit Service d3d246
#define get_length(encoded)                 ((encoded & 0x8000) ? ((encoded & 0x7000) >> 12) : 1)
Packit Service d3d246
Packit Service d3d246
#if G_BYTE_ORDER == G_BIG_ENDIAN
Packit Service d3d246
#define get_ascii_item(array, encoded)      ((encoded & 0x8000) ? &(array)[(encoded) & 0xfff] : (gpointer) (((char *) &(encoded)) + 1))
Packit Service d3d246
#else
Packit Service d3d246
#define get_ascii_item(array, encoded)      ((encoded & 0x8000) ? &(array)[(encoded) & 0xfff] : (gpointer) &(encoded))
Packit Service d3d246
#endif
Packit Service d3d246
Packit Service d3d246
static const gchar * lookup_in_item (guint           item_id,
Packit Service d3d246
                                     const gunichar *key,
Packit Service d3d246
                                     gint           *result_len,
Packit Service d3d246
                                     gint           *key_consumed);
Packit Service d3d246
Packit Service d3d246
static gint
Packit Service d3d246
compare_mapping_entry (gconstpointer user_data,
Packit Service d3d246
                       gconstpointer data)
Packit Service d3d246
{
Packit Service d3d246
  const struct mapping_entry *entry = data;
Packit Service d3d246
  const gunichar *key = user_data;
Packit Service d3d246
  gunichar src_0;
Packit Service d3d246
Packit Service d3d246
  G_STATIC_ASSERT(MAX_KEY_SIZE == 2);
Packit Service d3d246
Packit Service d3d246
  src_0 = get_src_char (src_table, entry->src, 0);
Packit Service d3d246
Packit Service d3d246
  if (key[0] > src_0)
Packit Service d3d246
    return 1;
Packit Service d3d246
  else if (key[0] < src_0)
Packit Service d3d246
    return -1;
Packit Service d3d246
Packit Service d3d246
  if (get_length (entry->src) > 1)
Packit Service d3d246
    {
Packit Service d3d246
      gunichar src_1;
Packit Service d3d246
Packit Service d3d246
      src_1 = get_src_char (src_table, entry->src, 1);
Packit Service d3d246
Packit Service d3d246
      if (key[1] > src_1)
Packit Service d3d246
        return 1;
Packit Service d3d246
      else if (key[1] < src_1)
Packit Service d3d246
        return -1;
Packit Service d3d246
    }
Packit Service d3d246
  else if (key[1])
Packit Service d3d246
    return 1;
Packit Service d3d246
Packit Service d3d246
  return 0;
Packit Service d3d246
}
Packit Service d3d246
Packit Service d3d246
static const gchar *
Packit Service d3d246
lookup_in_mapping (const struct mapping_entry *mapping,
Packit Service d3d246
                   gint                        mapping_size,
Packit Service d3d246
                   const gunichar             *key,
Packit Service d3d246
                   gint                       *result_len,
Packit Service d3d246
                   gint                       *key_consumed)
Packit Service d3d246
{
Packit Service d3d246
  const struct mapping_entry *hit;
Packit Service d3d246
Packit Service d3d246
  hit = bsearch (key, mapping, mapping_size, sizeof (struct mapping_entry), compare_mapping_entry);
Packit Service d3d246
Packit Service d3d246
  if (hit == NULL)
Packit Service d3d246
    return NULL;
Packit Service d3d246
Packit Service d3d246
  *key_consumed = get_length (hit->src);
Packit Service d3d246
  *result_len = get_length (hit->ascii);
Packit Service d3d246
Packit Service d3d246
  return get_ascii_item(ascii_table, hit->ascii);
Packit Service d3d246
}
Packit Service d3d246
Packit Service d3d246
static const gchar *
Packit Service d3d246
lookup_in_chain (const guint8   *chain,
Packit Service d3d246
                 const gunichar *key,
Packit Service d3d246
                 gint           *result_len,
Packit Service d3d246
                 gint           *key_consumed)
Packit Service d3d246
{
Packit Service d3d246
  const gchar *result;
Packit Service d3d246
Packit Service d3d246
  while (*chain != 0xff)
Packit Service d3d246
    {
Packit Service d3d246
      result = lookup_in_item (*chain, key, result_len, key_consumed);
Packit Service d3d246
Packit Service d3d246
      if (result)
Packit Service d3d246
        return result;
Packit Service d3d246
Packit Service d3d246
      chain++;
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  return NULL;
Packit Service d3d246
}
Packit Service d3d246
Packit Service d3d246
static const gchar *
Packit Service d3d246
lookup_in_item (guint           item_id,
Packit Service d3d246
                const gunichar *key,
Packit Service d3d246
                gint           *result_len,
Packit Service d3d246
                gint           *key_consumed)
Packit Service d3d246
{
Packit Service d3d246
  if (item_id & 0x80)
Packit Service d3d246
    {
Packit Service d3d246
      const guint8 *chain = chains_table + chain_starts[item_id & 0x7f];
Packit Service d3d246
Packit Service d3d246
      return lookup_in_chain (chain, key, result_len, key_consumed);
Packit Service d3d246
    }
Packit Service d3d246
  else
Packit Service d3d246
    {
Packit Service d3d246
      const struct mapping_range *range = &mapping_ranges[item_id];
Packit Service d3d246
Packit Service d3d246
      return lookup_in_mapping (mappings_table + range->start, range->length, key, result_len, key_consumed);
Packit Service d3d246
    }
Packit Service d3d246
}
Packit Service d3d246
Packit Service d3d246
static gint
Packit Service d3d246
compare_locale_entry (gconstpointer user_data,
Packit Service d3d246
                      gconstpointer data)
Packit Service d3d246
{
Packit Service d3d246
  const struct locale_entry *entry = data;
Packit Service d3d246
  const gchar *key = user_data;
Packit Service d3d246
Packit Service d3d246
  return strcmp (key, &locale_names[entry->name_offset]);
Packit Service d3d246
}
Packit Service d3d246
Packit Service d3d246
static gboolean
Packit Service d3d246
lookup_item_id_for_one_locale (const gchar *key,
Packit Service d3d246
                               guint       *item_id)
Packit Service d3d246
{
Packit Service d3d246
  const struct locale_entry *hit;
Packit Service d3d246
Packit Service d3d246
  hit = bsearch (key, locale_index, G_N_ELEMENTS (locale_index), sizeof (struct locale_entry), compare_locale_entry);
Packit Service d3d246
Packit Service d3d246
  if (hit == NULL)
Packit Service d3d246
    return FALSE;
Packit Service d3d246
Packit Service d3d246
  *item_id = hit->item_id;
Packit Service d3d246
  return TRUE;
Packit Service d3d246
}
Packit Service d3d246
Packit Service d3d246
static guint
Packit Service d3d246
lookup_item_id_for_locale (const gchar *locale)
Packit Service d3d246
{
Packit Service d3d246
  gchar key[MAX_LOCALE_NAME + 1];
Packit Service d3d246
  const gchar *language;
Packit Service d3d246
  guint language_len;
Packit Service d3d246
  const gchar *territory = NULL;
Packit Service d3d246
  guint territory_len = 0;
Packit Service d3d246
  const gchar *modifier = NULL;
Packit Service d3d246
  guint modifier_len = 0;
Packit Service d3d246
  const gchar *next_char;
Packit Service d3d246
  guint id;
Packit Service d3d246
Packit Service d3d246
  /* As per POSIX, a valid locale looks like:
Packit Service d3d246
   *
Packit Service d3d246
   *   language[_territory][.codeset][@modifier]
Packit Service d3d246
   */
Packit Service d3d246
  language = locale;
Packit Service d3d246
  language_len = strcspn (language, "_.@");
Packit Service d3d246
  next_char = language + language_len;
Packit Service d3d246
Packit Service d3d246
  if (*next_char == '_')
Packit Service d3d246
    {
Packit Service d3d246
      territory = next_char;
Packit Service d3d246
      territory_len = strcspn (territory + 1, "_.@") + 1;
Packit Service d3d246
      next_char = territory + territory_len;
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  if (*next_char == '.')
Packit Service d3d246
    {
Packit Service d3d246
      const gchar *codeset;
Packit Service d3d246
      guint codeset_len;
Packit Service d3d246
Packit Service d3d246
      codeset = next_char;
Packit Service d3d246
      codeset_len = strcspn (codeset + 1, "_.@") + 1;
Packit Service d3d246
      next_char = codeset + codeset_len;
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  if (*next_char == '@')
Packit Service d3d246
    {
Packit Service d3d246
      modifier = next_char;
Packit Service d3d246
      modifier_len = strcspn (modifier + 1, "_.@") + 1;
Packit Service d3d246
      next_char = modifier + modifier_len;
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  /* What madness is this? */
Packit Service d3d246
  if (language_len == 0 || *next_char)
Packit Service d3d246
    return default_item_id;
Packit Service d3d246
Packit Service d3d246
  /* We are not interested in codeset.
Packit Service d3d246
   *
Packit Service d3d246
   * For this locale:
Packit Service d3d246
   *
Packit Service d3d246
   *  aa_BB@cc
Packit Service d3d246
   *
Packit Service d3d246
   * try in this order:
Packit Service d3d246
   *
Packit Service d3d246
   * Note: we have no locales of the form aa_BB@cc in the database.
Packit Service d3d246
   *
Packit Service d3d246
   *  1. aa@cc
Packit Service d3d246
   *  2. aa_BB
Packit Service d3d246
   *  3. aa
Packit Service d3d246
   */
Packit Service d3d246
Packit Service d3d246
  /* 1. */
Packit Service d3d246
  if (modifier_len && language_len + modifier_len <= MAX_LOCALE_NAME)
Packit Service d3d246
    {
Packit Service d3d246
      memcpy (key, language, language_len);
Packit Service d3d246
      memcpy (key + language_len, modifier, modifier_len);
Packit Service d3d246
      key[language_len + modifier_len] = '\0';
Packit Service d3d246
Packit Service d3d246
      if (lookup_item_id_for_one_locale (key, &id))
Packit Service d3d246
        return id;
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  /* 2. */
Packit Service d3d246
  if (territory_len && language_len + territory_len <= MAX_LOCALE_NAME)
Packit Service d3d246
    {
Packit Service d3d246
      memcpy (key, language, language_len);
Packit Service d3d246
      memcpy (key + language_len, territory, territory_len);
Packit Service d3d246
      key[language_len + territory_len] = '\0';
Packit Service d3d246
Packit Service d3d246
      if (lookup_item_id_for_one_locale (key, &id))
Packit Service d3d246
        return id;
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  /* 3. */
Packit Service d3d246
  if (language_len <= MAX_LOCALE_NAME)
Packit Service d3d246
    {
Packit Service d3d246
      memcpy (key, language, language_len);
Packit Service d3d246
      key[language_len] = '\0';
Packit Service d3d246
Packit Service d3d246
      if (lookup_item_id_for_one_locale (key, &id))
Packit Service d3d246
        return id;
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  return default_item_id;
Packit Service d3d246
}
Packit Service d3d246
Packit Service d3d246
static guint
Packit Service d3d246
get_default_item_id (void)
Packit Service d3d246
{
Packit Service d3d246
  static guint item_id;
Packit Service d3d246
  static gboolean done;
Packit Service d3d246
Packit Service d3d246
  /* Doesn't need to be locked -- no harm in doing it twice. */
Packit Service d3d246
  if (!done)
Packit Service d3d246
    {
Packit Service d3d246
      const gchar *locale;
Packit Service d3d246
Packit Service d3d246
      locale = setlocale (LC_CTYPE, NULL);
Packit Service d3d246
      item_id = lookup_item_id_for_locale (locale);
Packit Service d3d246
      done = TRUE;
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  return item_id;
Packit Service d3d246
}
Packit Service d3d246
Packit Service d3d246
/**
Packit Service d3d246
 * g_str_to_ascii:
Packit Service d3d246
 * @str: a string, in UTF-8
Packit Service d3d246
 * @from_locale: (nullable): the source locale, if known
Packit Service d3d246
 *
Packit Service d3d246
 * Transliterate @str to plain ASCII.
Packit Service d3d246
 *
Packit Service d3d246
 * For best results, @str should be in composed normalised form.
Packit Service d3d246
 *
Packit Service d3d246
 * This function performs a reasonably good set of character
Packit Service d3d246
 * replacements.  The particular set of replacements that is done may
Packit Service d3d246
 * change by version or even by runtime environment.
Packit Service d3d246
 *
Packit Service d3d246
 * If the source language of @str is known, it can used to improve the
Packit Service d3d246
 * accuracy of the translation by passing it as @from_locale.  It should
Packit Service d3d246
 * be a valid POSIX locale string (of the form
Packit Service d3d246
 * "language[_territory][.codeset][@modifier]").
Packit Service d3d246
 *
Packit Service d3d246
 * If @from_locale is %NULL then the current locale is used.
Packit Service d3d246
 *
Packit Service d3d246
 * If you want to do translation for no specific locale, and you want it
Packit Service d3d246
 * to be done independently of the currently locale, specify "C" for
Packit Service d3d246
 * @from_locale.
Packit Service d3d246
 *
Packit Service d3d246
 * Returns: a string in plain ASCII
Packit Service d3d246
 *
Packit Service d3d246
 * Since: 2.40
Packit Service d3d246
 **/
Packit Service d3d246
gchar *
Packit Service d3d246
g_str_to_ascii (const gchar *str,
Packit Service d3d246
                const gchar *from_locale)
Packit Service d3d246
{
Packit Service d3d246
  GString *result;
Packit Service d3d246
  guint item_id;
Packit Service d3d246
Packit Service d3d246
  g_return_val_if_fail (str != NULL, NULL);
Packit Service d3d246
Packit Service d3d246
  if (g_str_is_ascii (str))
Packit Service d3d246
    return g_strdup (str);
Packit Service d3d246
Packit Service d3d246
  if (from_locale)
Packit Service d3d246
    item_id = lookup_item_id_for_locale (from_locale);
Packit Service d3d246
  else
Packit Service d3d246
    item_id = get_default_item_id ();
Packit Service d3d246
Packit Service d3d246
  result = g_string_sized_new (strlen (str));
Packit Service d3d246
Packit Service d3d246
  while (*str)
Packit Service d3d246
    {
Packit Service d3d246
      /* We only need to transliterate non-ASCII values... */
Packit Service d3d246
      if (*str & 0x80)
Packit Service d3d246
        {
Packit Service d3d246
          gunichar key[MAX_KEY_SIZE];
Packit Service d3d246
          const gchar *r;
Packit Service d3d246
          gint consumed;
Packit Service d3d246
          gint r_len;
Packit Service d3d246
          gunichar c;
Packit Service d3d246
Packit Service d3d246
          G_STATIC_ASSERT(MAX_KEY_SIZE == 2);
Packit Service d3d246
Packit Service d3d246
          c = g_utf8_get_char (str);
Packit Service d3d246
Packit Service d3d246
          /* This is where it gets evil...
Packit Service d3d246
           *
Packit Service d3d246
           * We know that MAX_KEY_SIZE is 2.  We also know that we
Packit Service d3d246
           * only want to try another character if it's non-ascii.
Packit Service d3d246
           */
Packit Service d3d246
          str = g_utf8_next_char (str);
Packit Service d3d246
Packit Service d3d246
          key[0] = c;
Packit Service d3d246
          if (*str & 0x80)
Packit Service d3d246
            key[1] = g_utf8_get_char (str);
Packit Service d3d246
          else
Packit Service d3d246
            key[1] = 0;
Packit Service d3d246
Packit Service d3d246
          r = lookup_in_item (item_id, key, &r_len, &consumed);
Packit Service d3d246
Packit Service d3d246
          /* If we failed to map two characters, try again with one.
Packit Service d3d246
           *
Packit Service d3d246
           * gconv behaviour is a bit weird here -- it seems to
Packit Service d3d246
           * depend in the randomness of the binary search and the
Packit Service d3d246
           * size of the input buffer as to what result we get here.
Packit Service d3d246
           *
Packit Service d3d246
           * Doing it this way is more work, but should be
Packit Service d3d246
           * more-correct.
Packit Service d3d246
           */
Packit Service d3d246
          if (r == NULL && key[1])
Packit Service d3d246
            {
Packit Service d3d246
              key[1] = 0;
Packit Service d3d246
              r = lookup_in_item (item_id, key, &r_len, &consumed);
Packit Service d3d246
            }
Packit Service d3d246
Packit Service d3d246
          if (r != NULL)
Packit Service d3d246
            {
Packit Service d3d246
              g_string_append_len (result, r, r_len);
Packit Service d3d246
              if (consumed == 2)
Packit Service d3d246
                /* If it took both then skip again */
Packit Service d3d246
                str = g_utf8_next_char (str);
Packit Service d3d246
            }
Packit Service d3d246
          else /* no match found */
Packit Service d3d246
            g_string_append_c (result, '?');
Packit Service d3d246
        }
Packit Service d3d246
      else if (*str & 0x80) /* Out-of-range non-ASCII case */
Packit Service d3d246
        {
Packit Service d3d246
          g_string_append_c (result, '?');
Packit Service d3d246
          str = g_utf8_next_char (str);
Packit Service d3d246
        }
Packit Service d3d246
      else /* ASCII case */
Packit Service d3d246
        g_string_append_c (result, *str++);
Packit Service d3d246
    }
Packit Service d3d246
Packit Service d3d246
  return g_string_free (result, FALSE);
Packit Service d3d246
}