Blame pango/pango-bidi-type.c

Packit 0ec9dd
/* Pango
Packit 0ec9dd
 * pango-bidi-type.c: Bidirectional Character Types
Packit 0ec9dd
 *
Packit 0ec9dd
 * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
Packit 0ec9dd
 *
Packit 0ec9dd
 * This library is free software; you can redistribute it and/or
Packit 0ec9dd
 * modify it under the terms of the GNU Library General Public
Packit 0ec9dd
 * License as published by the Free Software Foundation; either
Packit 0ec9dd
 * version 2 of the License, or (at your option) any later version.
Packit 0ec9dd
 *
Packit 0ec9dd
 * This library is distributed in the hope that it will be useful,
Packit 0ec9dd
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 0ec9dd
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
Packit 0ec9dd
 * Library General Public License for more details.
Packit 0ec9dd
 *
Packit 0ec9dd
 * You should have received a copy of the GNU Library General Public
Packit 0ec9dd
 * License along with this library; if not, write to the
Packit 0ec9dd
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Packit 0ec9dd
 * Boston, MA 02111-1307, USA.
Packit 0ec9dd
 */
Packit 0ec9dd
Packit 0ec9dd
/**
Packit 0ec9dd
 * SECTION:bidi
Packit 0ec9dd
 * @short_description:Types and functions to help with handling bidirectional text
Packit 0ec9dd
 * @title:Bidirectional Text
Packit 0ec9dd
 * @see_also:
Packit 0ec9dd
 * pango_context_get_base_dir(),
Packit 0ec9dd
 * pango_context_set_base_dir(),
Packit 0ec9dd
 * pango_itemize_with_base_dir()
Packit 0ec9dd
 *
Packit 0ec9dd
 * Pango supports bidirectional text (like Arabic and Hebrew) automatically.
Packit 0ec9dd
 * Some applications however, need some help to correctly handle bidirectional text.
Packit 0ec9dd
 *
Packit 0ec9dd
 * The #PangoDirection type can be used with pango_context_set_base_dir() to
Packit 0ec9dd
 * instruct Pango about direction of text, though in most cases Pango detects
Packit 0ec9dd
 * that correctly and automatically.  The rest of the facilities in this section
Packit 0ec9dd
 * are used internally by Pango already, and are provided to help applications
Packit 0ec9dd
 * that need more direct control over bidirectional setting of text.
Packit 0ec9dd
 */
Packit 0ec9dd
#include "config.h"
Packit 0ec9dd
Packit 0ec9dd
#include <string.h>
Packit 0ec9dd
Packit 0ec9dd
#include <fribidi.h>
Packit 0ec9dd
Packit 0ec9dd
#include "pango-bidi-type.h"
Packit 0ec9dd
#include "pango-utils.h"
Packit 0ec9dd
Packit 0ec9dd
#if FRIBIDI_MAJOR_VERSION >= 1
Packit 0ec9dd
#define USE_FRIBIDI_EX_API
Packit 0ec9dd
#endif
Packit 0ec9dd
Packit 0ec9dd
/**
Packit 0ec9dd
 * pango_bidi_type_for_unichar:
Packit 0ec9dd
 * @ch: a Unicode character
Packit 0ec9dd
 *
Packit 0ec9dd
 * Determines the normative bidirectional character type of a
Packit 0ec9dd
 * character, as specified in the Unicode Character Database.
Packit 0ec9dd
 *
Packit 0ec9dd
 * A simplified version of this function is available as
Packit 0ec9dd
 * pango_unichar_direction().
Packit 0ec9dd
 *
Packit 0ec9dd
 * Return value: the bidirectional character type, as used in the
Packit 0ec9dd
 * Unicode bidirectional algorithm.
Packit 0ec9dd
 *
Packit 0ec9dd
 * Since: 1.22
Packit 0ec9dd
 */
Packit 0ec9dd
PangoBidiType
Packit 0ec9dd
pango_bidi_type_for_unichar (gunichar ch)
Packit 0ec9dd
{
Packit 0ec9dd
  FriBidiCharType fribidi_ch_type;
Packit 0ec9dd
Packit 0ec9dd
  G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar));
Packit 0ec9dd
Packit 0ec9dd
  fribidi_ch_type = fribidi_get_bidi_type (ch);
Packit 0ec9dd
Packit 0ec9dd
  switch (fribidi_ch_type)
Packit 0ec9dd
    {
Packit 0ec9dd
    case FRIBIDI_TYPE_LTR:  return PANGO_BIDI_TYPE_L;
Packit 0ec9dd
    case FRIBIDI_TYPE_LRE:  return PANGO_BIDI_TYPE_LRE;
Packit 0ec9dd
    case FRIBIDI_TYPE_LRO:  return PANGO_BIDI_TYPE_LRO;
Packit 0ec9dd
    case FRIBIDI_TYPE_RTL:  return PANGO_BIDI_TYPE_R;
Packit 0ec9dd
    case FRIBIDI_TYPE_AL:   return PANGO_BIDI_TYPE_AL;
Packit 0ec9dd
    case FRIBIDI_TYPE_RLE:  return PANGO_BIDI_TYPE_RLE;
Packit 0ec9dd
    case FRIBIDI_TYPE_RLO:  return PANGO_BIDI_TYPE_RLO;
Packit 0ec9dd
    case FRIBIDI_TYPE_PDF:  return PANGO_BIDI_TYPE_PDF;
Packit 0ec9dd
    case FRIBIDI_TYPE_EN:   return PANGO_BIDI_TYPE_EN;
Packit 0ec9dd
    case FRIBIDI_TYPE_ES:   return PANGO_BIDI_TYPE_ES;
Packit 0ec9dd
    case FRIBIDI_TYPE_ET:   return PANGO_BIDI_TYPE_ET;
Packit 0ec9dd
    case FRIBIDI_TYPE_AN:   return PANGO_BIDI_TYPE_AN;
Packit 0ec9dd
    case FRIBIDI_TYPE_CS:   return PANGO_BIDI_TYPE_CS;
Packit 0ec9dd
    case FRIBIDI_TYPE_NSM:  return PANGO_BIDI_TYPE_NSM;
Packit 0ec9dd
    case FRIBIDI_TYPE_BN:   return PANGO_BIDI_TYPE_BN;
Packit 0ec9dd
    case FRIBIDI_TYPE_BS:   return PANGO_BIDI_TYPE_B;
Packit 0ec9dd
    case FRIBIDI_TYPE_SS:   return PANGO_BIDI_TYPE_S;
Packit 0ec9dd
    case FRIBIDI_TYPE_WS:   return PANGO_BIDI_TYPE_WS;
Packit 0ec9dd
    case FRIBIDI_TYPE_ON:   return PANGO_BIDI_TYPE_ON;
Packit 0ec9dd
    default:
Packit 0ec9dd
      /* TODO
Packit 0ec9dd
       * This function has not been updated for latest FriBidi.
Packit 0ec9dd
       * Should add new types and / or deprecate this function. */
Packit 0ec9dd
      return PANGO_BIDI_TYPE_ON;
Packit 0ec9dd
    }
Packit 0ec9dd
}
Packit 0ec9dd
Packit 0ec9dd
/* Some bidi-related functions */
Packit 0ec9dd
Packit 0ec9dd
/**
Packit 0ec9dd
 * pango_log2vis_get_embedding_levels:
Packit 0ec9dd
 * @text:      the text to itemize.
Packit 0ec9dd
 * @length:    the number of bytes (not characters) to process, or -1
Packit 0ec9dd
 *             if @text is nul-terminated and the length should be calculated.
Packit 0ec9dd
 * @pbase_dir: input base direction, and output resolved direction.
Packit 0ec9dd
 *
Packit 0ec9dd
 * This will return the bidirectional embedding levels of the input paragraph
Packit 0ec9dd
 * as defined by the Unicode Bidirectional Algorithm available at:
Packit 0ec9dd
 *
Packit 0ec9dd
 *   http://www.unicode.org/reports/tr9/
Packit 0ec9dd
 *
Packit 0ec9dd
 * If the input base direction is a weak direction, the direction of the
Packit 0ec9dd
 * characters in the text will determine the final resolved direction.
Packit 0ec9dd
 *
Packit 0ec9dd
 * Return value: a newly allocated array of embedding levels, one item per
Packit 0ec9dd
 *               character (not byte), that should be freed using g_free.
Packit 0ec9dd
 *
Packit 0ec9dd
 * Since: 1.4
Packit 0ec9dd
 */
Packit 0ec9dd
guint8 *
Packit 0ec9dd
pango_log2vis_get_embedding_levels (const gchar    *text,
Packit 0ec9dd
				    int             length,
Packit 0ec9dd
				    PangoDirection *pbase_dir)
Packit 0ec9dd
{
Packit 0ec9dd
  glong n_chars, i;
Packit 0ec9dd
  guint8 *embedding_levels_list;
Packit 0ec9dd
  const gchar *p;
Packit 0ec9dd
  FriBidiParType fribidi_base_dir;
Packit 0ec9dd
  FriBidiCharType *bidi_types;
Packit 0ec9dd
#ifdef USE_FRIBIDI_EX_API
Packit 0ec9dd
  FriBidiBracketType *bracket_types;
Packit 0ec9dd
#endif
Packit 0ec9dd
  FriBidiLevel max_level;
Packit 0ec9dd
  FriBidiCharType ored_types = 0;
Packit 0ec9dd
  FriBidiCharType anded_strongs = FRIBIDI_TYPE_RLE;
Packit 0ec9dd
Packit 0ec9dd
  G_STATIC_ASSERT (sizeof (FriBidiLevel) == sizeof (guint8));
Packit 0ec9dd
  G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar));
Packit 0ec9dd
Packit 0ec9dd
  switch (*pbase_dir)
Packit 0ec9dd
    {
Packit 0ec9dd
    case PANGO_DIRECTION_LTR:
Packit 0ec9dd
    case PANGO_DIRECTION_TTB_RTL:
Packit 0ec9dd
      fribidi_base_dir = FRIBIDI_PAR_LTR;
Packit 0ec9dd
      break;
Packit 0ec9dd
    case PANGO_DIRECTION_RTL:
Packit 0ec9dd
    case PANGO_DIRECTION_TTB_LTR:
Packit 0ec9dd
      fribidi_base_dir = FRIBIDI_PAR_RTL;
Packit 0ec9dd
      break;
Packit 0ec9dd
    case PANGO_DIRECTION_WEAK_RTL:
Packit 0ec9dd
      fribidi_base_dir = FRIBIDI_PAR_WRTL;
Packit 0ec9dd
      break;
Packit 0ec9dd
    case PANGO_DIRECTION_WEAK_LTR:
Packit 0ec9dd
    case PANGO_DIRECTION_NEUTRAL:
Packit 0ec9dd
    default:
Packit 0ec9dd
      fribidi_base_dir = FRIBIDI_PAR_WLTR;
Packit 0ec9dd
      break;
Packit 0ec9dd
    }
Packit 0ec9dd
Packit 0ec9dd
  if (length < 0)
Packit 0ec9dd
    length = strlen (text);
Packit 0ec9dd
Packit 0ec9dd
  n_chars = g_utf8_strlen (text, length);
Packit 0ec9dd
Packit 0ec9dd
  bidi_types = g_new (FriBidiCharType, n_chars);
Packit 0ec9dd
#ifdef USE_FRIBIDI_EX_API
Packit 0ec9dd
  bracket_types = g_new (FriBidiBracketType, n_chars);
Packit 0ec9dd
#endif
Packit 0ec9dd
  embedding_levels_list = g_new (guint8, n_chars);
Packit 0ec9dd
Packit 0ec9dd
  for (i = 0, p = text; p < text + length; p = g_utf8_next_char(p), i++)
Packit 0ec9dd
    {
Packit 0ec9dd
      gunichar ch = g_utf8_get_char (p);
Packit Service e60a5a
      FriBidiCharType char_type = fribidi_get_bidi_type (ch);
Packit Service e60a5a
Packit Service e60a5a
      if (i == n_chars)
Packit Service e60a5a
        break;
Packit Service e60a5a
Packit 0ec9dd
      bidi_types[i] = char_type;
Packit 0ec9dd
      ored_types |= char_type;
Packit 0ec9dd
      if (FRIBIDI_IS_STRONG (char_type))
Packit 0ec9dd
        anded_strongs &= char_type;
Packit 0ec9dd
#ifdef USE_FRIBIDI_EX_API
Packit 0ec9dd
      if (G_UNLIKELY(bidi_types[i] == FRIBIDI_TYPE_ON))
Packit 0ec9dd
        bracket_types[i] = fribidi_get_bracket (ch);
Packit 0ec9dd
      else
Packit 0ec9dd
        bracket_types[i] = FRIBIDI_NO_BRACKET;
Packit 0ec9dd
#endif
Packit 0ec9dd
    }
Packit 0ec9dd
Packit 0ec9dd
    /* Short-circuit (malloc-expensive) FriBidi call for unidirectional
Packit 0ec9dd
     * text.
Packit 0ec9dd
     *
Packit 0ec9dd
     * For details see:
Packit 0ec9dd
     * https://bugzilla.gnome.org/show_bug.cgi?id=590183
Packit 0ec9dd
     */
Packit 0ec9dd
Packit 0ec9dd
#ifndef FRIBIDI_IS_ISOLATE
Packit 0ec9dd
#define FRIBIDI_IS_ISOLATE(x) 0
Packit 0ec9dd
#endif
Packit 0ec9dd
    /* The case that all resolved levels will be ltr.
Packit 0ec9dd
     * No isolates, all strongs be LTR, there should be no Arabic numbers
Packit 0ec9dd
     * (or letters for that matter), and one of the following:
Packit 0ec9dd
     *
Packit 0ec9dd
     * o base_dir doesn't have an RTL taste.
Packit 0ec9dd
     * o there are letters, and base_dir is weak.
Packit 0ec9dd
     */
Packit 0ec9dd
    if (!FRIBIDI_IS_ISOLATE (ored_types) &&
Packit 0ec9dd
	!FRIBIDI_IS_RTL (ored_types) &&
Packit 0ec9dd
	!FRIBIDI_IS_ARABIC (ored_types) &&
Packit 0ec9dd
	(!FRIBIDI_IS_RTL (fribidi_base_dir) ||
Packit 0ec9dd
	  (FRIBIDI_IS_WEAK (fribidi_base_dir) &&
Packit 0ec9dd
	   FRIBIDI_IS_LETTER (ored_types))
Packit 0ec9dd
	))
Packit 0ec9dd
      {
Packit 0ec9dd
        /* all LTR */
Packit 0ec9dd
	fribidi_base_dir = FRIBIDI_PAR_LTR;
Packit 0ec9dd
	memset (embedding_levels_list, 0, n_chars);
Packit 0ec9dd
	goto resolved;
Packit 0ec9dd
      }
Packit 0ec9dd
    /* The case that all resolved levels will be RTL is much more complex.
Packit 0ec9dd
     * No isolates, no numbers, all strongs are RTL, and one of
Packit 0ec9dd
     * the following:
Packit 0ec9dd
     *
Packit 0ec9dd
     * o base_dir has an RTL taste (may be weak).
Packit 0ec9dd
     * o there are letters, and base_dir is weak.
Packit 0ec9dd
     */
Packit 0ec9dd
    else if (!FRIBIDI_IS_ISOLATE (ored_types) &&
Packit 0ec9dd
	     !FRIBIDI_IS_NUMBER (ored_types) &&
Packit 0ec9dd
	     FRIBIDI_IS_RTL (anded_strongs) &&
Packit 0ec9dd
	     (FRIBIDI_IS_RTL (fribidi_base_dir) ||
Packit 0ec9dd
	       (FRIBIDI_IS_WEAK (fribidi_base_dir) &&
Packit 0ec9dd
		FRIBIDI_IS_LETTER (ored_types))
Packit 0ec9dd
	     ))
Packit 0ec9dd
      {
Packit 0ec9dd
        /* all RTL */
Packit 0ec9dd
	fribidi_base_dir = FRIBIDI_PAR_RTL;
Packit 0ec9dd
	memset (embedding_levels_list, 1, n_chars);
Packit 0ec9dd
	goto resolved;
Packit 0ec9dd
      }
Packit 0ec9dd
Packit 0ec9dd
Packit 0ec9dd
#ifdef USE_FRIBIDI_EX_API
Packit 0ec9dd
  max_level = fribidi_get_par_embedding_levels_ex (bidi_types, bracket_types, n_chars,
Packit 0ec9dd
						   &fribidi_base_dir,
Packit 0ec9dd
						   (FriBidiLevel*)embedding_levels_list);
Packit 0ec9dd
#else
Packit 0ec9dd
  max_level = fribidi_get_par_embedding_levels (bidi_types, n_chars,
Packit 0ec9dd
						&fribidi_base_dir,
Packit 0ec9dd
						(FriBidiLevel*)embedding_levels_list);
Packit 0ec9dd
#endif
Packit 0ec9dd
Packit 0ec9dd
  if (G_UNLIKELY(max_level == 0))
Packit 0ec9dd
    {
Packit 0ec9dd
      /* fribidi_get_par_embedding_levels() failed. */
Packit 0ec9dd
      memset (embedding_levels_list, 0, length);
Packit 0ec9dd
    }
Packit 0ec9dd
Packit 0ec9dd
resolved:
Packit 0ec9dd
  g_free (bidi_types);
Packit 0ec9dd
Packit 0ec9dd
#ifdef USE_FRIBIDI_EX_API
Packit 0ec9dd
  g_free (bracket_types);
Packit 0ec9dd
#endif
Packit 0ec9dd
Packit 0ec9dd
  *pbase_dir = (fribidi_base_dir == FRIBIDI_PAR_LTR) ?  PANGO_DIRECTION_LTR : PANGO_DIRECTION_RTL;
Packit 0ec9dd
Packit 0ec9dd
  return embedding_levels_list;
Packit 0ec9dd
}
Packit 0ec9dd
Packit 0ec9dd
/**
Packit 0ec9dd
 * pango_unichar_direction:
Packit 0ec9dd
 * @ch: a Unicode character
Packit 0ec9dd
 *
Packit 0ec9dd
 * Determines the inherent direction of a character; either
Packit 0ec9dd
 * %PANGO_DIRECTION_LTR, %PANGO_DIRECTION_RTL, or
Packit 0ec9dd
 * %PANGO_DIRECTION_NEUTRAL.
Packit 0ec9dd
 *
Packit 0ec9dd
 * This function is useful to categorize characters into left-to-right
Packit 0ec9dd
 * letters, right-to-left letters, and everything else.  If full
Packit 0ec9dd
 * Unicode bidirectional type of a character is needed,
Packit 0ec9dd
 * pango_bidi_type_for_unichar() can be used instead.
Packit 0ec9dd
 *
Packit 0ec9dd
 * Return value: the direction of the character.
Packit 0ec9dd
 */
Packit 0ec9dd
PangoDirection
Packit 0ec9dd
pango_unichar_direction (gunichar ch)
Packit 0ec9dd
{
Packit 0ec9dd
  FriBidiCharType fribidi_ch_type;
Packit 0ec9dd
Packit 0ec9dd
  G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar));
Packit 0ec9dd
Packit 0ec9dd
  fribidi_ch_type = fribidi_get_bidi_type (ch);
Packit 0ec9dd
Packit 0ec9dd
  if (!FRIBIDI_IS_STRONG (fribidi_ch_type))
Packit 0ec9dd
    return PANGO_DIRECTION_NEUTRAL;
Packit 0ec9dd
  else if (FRIBIDI_IS_RTL (fribidi_ch_type))
Packit 0ec9dd
    return PANGO_DIRECTION_RTL;
Packit 0ec9dd
  else
Packit 0ec9dd
    return PANGO_DIRECTION_LTR;
Packit 0ec9dd
}
Packit 0ec9dd
Packit 0ec9dd
/**
Packit 0ec9dd
 * pango_get_mirror_char:
Packit 0ec9dd
 * @ch: a Unicode character
Packit 0ec9dd
 * @mirrored_ch: location to store the mirrored character
Packit 0ec9dd
 *
Packit 0ec9dd
 * If @ch has the Unicode mirrored property and there is another Unicode
Packit 0ec9dd
 * character that typically has a glyph that is the mirror image of @ch's
Packit 0ec9dd
 * glyph, puts that character in the address pointed to by @mirrored_ch.
Packit 0ec9dd
 *
Packit 0ec9dd
 * Use g_unichar_get_mirror_char() instead; the docs for that function
Packit 0ec9dd
 * provide full details.
Packit 0ec9dd
 *
Packit 0ec9dd
 * Return value: %TRUE if @ch has a mirrored character and @mirrored_ch is
Packit 0ec9dd
 * filled in, %FALSE otherwise
Packit 0ec9dd
 **/
Packit 0ec9dd
gboolean
Packit 0ec9dd
pango_get_mirror_char (gunichar        ch,
Packit 0ec9dd
		       gunichar       *mirrored_ch)
Packit 0ec9dd
{
Packit 0ec9dd
  return g_unichar_get_mirror_char (ch, mirrored_ch);
Packit 0ec9dd
}
Packit 0ec9dd