Blob Blame History Raw
/* Pango
 * pangocoretext-shape.c
 *
 * Copyright (C) 2005 Imendio AB
 * Copyright (C) 2010  Kristian Rietveld  <kris@gtk.org>
 * Copyright (C) 2012  Kristian Rietveld  <kris@lanedo.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#include "config.h"
#include <glib.h>
#include <string.h>
#include <Carbon/Carbon.h>
#include "pango-utils.h"
#include "pangocoretext-private.h"
#include "pango-impl-utils.h"

#if defined(MAC_OS_X_VERSION_MAX_ALLOWED) && MAC_OS_X_VERSION_MAX_ALLOWED < 1060
CF_INLINE Boolean CFStringIsSurrogateHighCharacter(UniChar character) {
    return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
}

CF_INLINE Boolean CFStringIsSurrogateLowCharacter(UniChar character) {
    return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
}

CF_INLINE UTF32Char CFStringGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
    return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
}
#endif

static void
set_glyph (PangoFont        *font,
	   PangoGlyphString *glyphs,
	   int               i,
	   int               offset,
	   PangoGlyph        glyph)
{
  PangoRectangle logical_rect;

  glyphs->glyphs[i].glyph = glyph;

  glyphs->glyphs[i].geometry.x_offset = 0;
  glyphs->glyphs[i].geometry.y_offset = 0;

  glyphs->log_clusters[i] = offset;
  pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, NULL, &logical_rect);
  glyphs->glyphs[i].geometry.width = logical_rect.width;
}


/* The "RunIterator" helps us to iterate over the array of runs that is obtained from
 * the CoreText type setter. Even though Pango considers the string that is passed to
 * the shape function a single run, CoreText might consider it to consist out of
 * multiple runs. Because of this, we have an interface around the CoreText array of
 * runs that works like iterating a single array, which makes our job in the shape
 * function easier.
 */

struct RunIterator
{
  CTLineRef line;
  CFStringRef cstr;
  CFArrayRef runs;
  CFIndex glyph_count;

  CFIndex total_ct_i;
  CFIndex ct_i;

  CFIndex *chr_idx_lut;

  int current_run_number;
  CTRunRef current_run;
  CFIndex *current_indices;
  const CGGlyph *current_cgglyphs;
  CGGlyph *current_cgglyphs_buffer;
  CTRunStatus current_run_status;
};

static void
run_iterator_free_current_run (struct RunIterator *iter)
{
  iter->current_run_number = -1;
  iter->current_run = NULL;
  iter->current_cgglyphs = NULL;
  if (iter->current_cgglyphs_buffer)
    free (iter->current_cgglyphs_buffer);
  iter->current_cgglyphs_buffer = NULL;
  if (iter->current_indices)
    free (iter->current_indices);
  iter->current_indices = NULL;
}

static void
run_iterator_set_current_run (struct RunIterator *iter,
                              const int           run_number)
{
  CFIndex ct_glyph_count;

  run_iterator_free_current_run (iter);

  iter->current_run_number = run_number;
  iter->current_run = CFArrayGetValueAtIndex (iter->runs, run_number);
  ct_glyph_count = CTRunGetGlyphCount (iter->current_run);

  iter->current_run_status = CTRunGetStatus (iter->current_run);
  iter->current_cgglyphs = CTRunGetGlyphsPtr (iter->current_run);
  if (!iter->current_cgglyphs)
    {
      iter->current_cgglyphs_buffer = (CGGlyph *)malloc (sizeof (CGGlyph) * ct_glyph_count);
      CTRunGetGlyphs (iter->current_run, CFRangeMake (0, ct_glyph_count),
                      iter->current_cgglyphs_buffer);
      iter->current_cgglyphs = iter->current_cgglyphs_buffer;
    }

  iter->current_indices = malloc (sizeof (CFIndex) * ct_glyph_count);
  CTRunGetStringIndices (iter->current_run, CFRangeMake (0, ct_glyph_count),
                         iter->current_indices);

  iter->ct_i = 0;
}

static CFIndex
run_iterator_get_glyph_count (struct RunIterator *iter)
{
  CFIndex accumulator = 0;
  CFIndex i;

  for (i = 0; i < CFArrayGetCount (iter->runs); i++)
    accumulator += CTRunGetGlyphCount (CFArrayGetValueAtIndex (iter->runs, i));

  return accumulator;
}

/* This function generates a lookup table to match string indices of glyphs to
 * actual unicode character indices. This also takes unicode characters into
 * account that are encoded using 2 UTF16 code points in CFStrings. We use the
 * unicode character index to match up with the unicode characters in the UTF8
 * string provided by Pango.
 */
static CFIndex *
run_iterator_get_chr_idx_lut (CFStringRef cstr)
{
  CFIndex cstr_length = CFStringGetLength (cstr);
  CFIndex *chr_idx_lut = malloc (sizeof (CFIndex) * cstr_length);
  CFIndex i;
  CFIndex current_value = 0;

  for (i = 0; i < cstr_length; i++)
    {
      chr_idx_lut[i] = current_value;

      if (CFStringIsSurrogateHighCharacter (CFStringGetCharacterAtIndex (cstr, i)) &&
          i + 1 < cstr_length &&
          CFStringIsSurrogateLowCharacter (CFStringGetCharacterAtIndex (cstr, i + 1)))
        continue;

      current_value++;
    }

  return chr_idx_lut;
}

/* These functions are commented out to silence the compiler, but
 * kept around because they might be of use when fixing the more
 * intricate issues noted in the comment in the function
 * pangocoretext_shape() below.
 */
#if 0
static gboolean
run_iterator_is_rtl (struct RunIterator *iter)
{
  /* Assume run status is equal for all runs? */
  CTRunStatus run_status = CTRunGetStatus (CFArrayGetValueAtIndex (iter->runs, 0));

  return run_status & kCTRunStatusRightToLeft;
}

static gboolean
run_iterator_run_is_non_monotonic (struct RunIterator *iter)
{
  CTRunStatus run_status = CTRunGetStatus (iter->current_run);

  return run_status & kCTRunStatusNonMonotonic;
}
#endif

static gunichar
run_iterator_get_character (struct RunIterator *iter)
{
  UniChar ch = CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]);

  if (CFStringIsSurrogateHighCharacter (ch) &&
      iter->current_indices[iter->ct_i] + 1 < CFStringGetLength (iter->cstr))
    {
      UniChar ch2 = CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]+1);

      if (CFStringIsSurrogateLowCharacter (ch2))
        return CFStringGetLongCharacterForSurrogatePair (ch, ch2);
    }

  return ch;
}

static CGGlyph
run_iterator_get_cgglyph (struct RunIterator *iter)
{
  return iter->current_cgglyphs[iter->ct_i];
}

static CFIndex
run_iterator_get_index (struct RunIterator *iter)
{
  return iter->chr_idx_lut[iter->current_indices[iter->ct_i]];
}

static gboolean
run_iterator_create (struct RunIterator *iter,
                     const char         *text,
                     const gint          length,
                     CTFontRef           ctfont)
{
  char *copy;
  CFDictionaryRef attributes;
  CFAttributedStringRef attstr;

  CFTypeRef keys[] = {
      (CFTypeRef) kCTFontAttributeName
  };

  CFTypeRef values[] = {
      ctfont
  };

  /* Initialize RunIterator structure */
  iter->current_run_number = -1;
  iter->current_run = NULL;
  iter->current_indices = NULL;
  iter->chr_idx_lut = NULL;
  iter->current_cgglyphs = NULL;
  iter->current_cgglyphs_buffer = NULL;

  /* Create CTLine */
  attributes = CFDictionaryCreate (kCFAllocatorDefault,
                                   (const void **)keys,
                                   (const void **)values,
                                   1,
                                   &kCFCopyStringDictionaryKeyCallBacks,
                                   &kCFTypeDictionaryValueCallBacks);

  copy = g_strndup (text, length + 1);
  copy[length] = 0;

  iter->cstr = CFStringCreateWithCString (kCFAllocatorDefault, copy,
                                          kCFStringEncodingUTF8);
  g_free (copy);

  if (!iter->cstr)
    /* Creating a CFString can fail if the input string does not
     * adhere to the specified encoding (i.e. it contains invalid UTF8).
     */
    return FALSE;

  attstr = CFAttributedStringCreate (kCFAllocatorDefault,
                                     iter->cstr,
                                     attributes);

  iter->line = CTLineCreateWithAttributedString (attstr);
  iter->runs = CTLineGetGlyphRuns (iter->line);

  CFRelease (attstr);
  CFRelease (attributes);

  iter->chr_idx_lut = run_iterator_get_chr_idx_lut (iter->cstr);

  iter->total_ct_i = 0;
  iter->glyph_count = run_iterator_get_glyph_count (iter);

  /* If CoreText did not render any glyphs for this string (can happen,
   * e.g. a run solely consisting of a BOM), glyph_count will be zero and
   * we immediately set the iterator variable to indicate end of glyph list.
   */
  if (iter->glyph_count > 0)
    run_iterator_set_current_run (iter, 0);
  else
    iter->total_ct_i = -1;

  return TRUE;
}

static void
run_iterator_free (struct RunIterator *iter)
{
  run_iterator_free_current_run (iter);

  free (iter->chr_idx_lut);

  CFRelease (iter->line);
  CFRelease (iter->cstr);
}

static gboolean
run_iterator_at_end (struct RunIterator *iter)
{
  if (iter->total_ct_i == -1)
    return TRUE;

  return FALSE;
}

static void
run_iterator_advance (struct RunIterator *iter)
{
  if (iter->total_ct_i >= iter->glyph_count - 1)
    {
      run_iterator_free_current_run (iter);
      iter->ct_i = iter->total_ct_i = -1;
    }
  else
    {
      iter->total_ct_i++;
      iter->ct_i++;

      if (iter->total_ct_i < iter->glyph_count &&
          iter->ct_i >= CTRunGetGlyphCount (iter->current_run))
        {
          iter->current_run_number++;
          run_iterator_set_current_run (iter, iter->current_run_number);
        }
    }
}



struct GlyphInfo
{
  CFIndex index;
  CGGlyph cgglyph;
  gunichar wc;
};

static gint
glyph_info_compare_func (gconstpointer a, gconstpointer b)
{
  const struct GlyphInfo *gi_a = a;
  const struct GlyphInfo *gi_b = b;

  if (gi_a->index < gi_b->index)
    return -1;
  else if (gi_a->index > gi_b->index)
    return 1;
  /* else */
  return 0;
}

static void
glyph_info_free (gpointer data, gpointer user_data)
{
  g_slice_free (struct GlyphInfo, data);
}

static GSList *
create_core_text_glyph_list (const char *text,
                             gint        length,
                             CTFontRef   ctfont)
{
  GSList *glyph_list = NULL;
  struct RunIterator riter;

  if (!run_iterator_create (&riter, text, length, ctfont))
    return NULL;

  while (!run_iterator_at_end (&riter))
    {
      struct GlyphInfo *gi;

      gi = g_slice_new (struct GlyphInfo);
      gi->index = run_iterator_get_index (&riter);
      gi->cgglyph = run_iterator_get_cgglyph (&riter);
      gi->wc = run_iterator_get_character (&riter);

      glyph_list = g_slist_prepend (glyph_list, gi);

      run_iterator_advance (&riter);
    }

  glyph_list = g_slist_sort (glyph_list, glyph_info_compare_func);

  run_iterator_free (&riter);

  return glyph_list;
}


void
_pango_core_text_shape (PangoFont           *font,
			const char          *text,
			gint                 length,
			const PangoAnalysis *analysis,
			PangoGlyphString    *glyphs,
			const char          *paragraph_text G_GNUC_UNUSED,
			unsigned int         paragraph_length G_GNUC_UNUSED)
{
  const char *p;
  gulong n_chars, gs_i, gs_prev_i;
  PangoCoreTextFont *cfont = PANGO_CORE_TEXT_FONT (font);
  PangoCoverage *coverage;
  GSList *glyph_list;
  GSList *glyph_iter;

  /* We first fully iterate over the glyph sequence generated by CoreText and
   * store this into a list, which is sorted after the iteration. We make a pass
   * over the sorted linked list to build up the PangoGlyphString.
   *
   * We have to do this in order to properly handle a bunch of characteristics of the
   * glyph sequence generated by the CoreText typesetter:
   *   # E.g. zero-width spaces do not end up in the CoreText glyph sequence. We have
   *     to manually account for the gap in the character indices.
   *   # Sometimes, CoreText generates two glyph for the same character index. We
   *     currently handle this "properly" as in we do not crash or corrupt memory,
   *     but that's about it.
   *   # Due to mismatches in size, the CoreText glyph sequence can either be longer or
   *     shorter than the PangoGlyphString. Note that the size of the PangoGlyphString
   *     should match the number of characters in "text".
   *
   * If performance becomes a problem, it is certainly possible to use a faster code
   * that only does a single iteration over the string for "simple cases". Simple cases
   * could include these that only consist out of one run (simple Latin text), which
   * don't have gaps in the glyph sequence and which are monotonically
   * increasing/decreasing.
   *
   * FIXME items for future fixing:
   *   # We currently don't bother about LTR, Pango core appears to fix this up for us.
   *     (Even when we cared warnings were generated that strings were in the wrong
   *     order, this should be investigated).
   *   # When CoreText generates two glyphs for one character, only one is stored.
   *     This breaks the example strings for e.g. Georgian and Gothic.
   */

  glyph_list = create_core_text_glyph_list (text, length,
                                            pango_core_text_font_get_ctfont (cfont));
  if (!glyph_list)
    return;

  /* Set up for translation of the glyph list to a PangoGlyphString. */
  n_chars = pango_utf8_strlen (text, length);
  pango_glyph_string_set_size (glyphs, n_chars);

  glyph_iter = glyph_list;

  coverage = pango_font_get_coverage (PANGO_FONT (cfont),
                                      analysis->language);

  /* gs_i is the index into the Pango glyph string. gi is the iterator into
   * the (CoreText) glyph list, gi->index is the index into the CFString.
   * In matching, we want gs_i and gi->index to match up.
   */
  for (gs_prev_i = -1, gs_i = 0, p = text; gs_i < n_chars;
       gs_prev_i = gs_i, gs_i++, p = g_utf8_next_char (p))
    {
      struct GlyphInfo *gi = glyph_iter != NULL ? glyph_iter->data : NULL;

      if (gi == NULL || gi->index > gs_i)
        {
          /* The glyph string is behind, insert an empty glyph to catch
           * up with the CoreText glyph list. This occurs for instance when
           * CoreText inserts a ligature that covers two characters.
           */
          set_glyph (font, glyphs, gs_i, p - text, PANGO_GLYPH_EMPTY);
          continue;
        }
      else if (gi->index < gs_i)
        {
          /* The CoreText glyph list is behind, fast forward the iterator
           * to catch up. This can happen when CoreText emits two glyphs
           * for once character, which is (as noted in the FIXME) above
           * not handled by us yet.
           */
          while (gi && gi->index < gs_i)
            {
              glyph_iter = g_slist_next (glyph_iter);
              if (glyph_iter)
                gi = glyph_iter->data;
              else
                gi = NULL;
            }
        }

      if (gi != NULL && gi->index == gs_i)
        {
          gunichar mirrored_ch;
          PangoCoverageLevel result;

          if (analysis->level % 2)
            if (g_unichar_get_mirror_char (gi->wc, &mirrored_ch))
              gi->wc = mirrored_ch;

          if (gi->wc == 0xa0)	/* non-break-space */
            gi->wc = 0x20;

          result = pango_coverage_get (coverage, gi->wc);

          if (result != PANGO_COVERAGE_NONE)
            {
              set_glyph (font, glyphs, gs_i, p - text, gi->cgglyph);

              if (g_unichar_type (gi->wc) == G_UNICODE_NON_SPACING_MARK)
                {
                  if (gi->index > 0)
                    {
                      PangoRectangle logical_rect, ink_rect;

                      glyphs->glyphs[gs_i].geometry.width = MAX (glyphs->glyphs[gs_prev_i].geometry.width,
                                                                 glyphs->glyphs[gs_i].geometry.width);
                      glyphs->glyphs[gs_prev_i].geometry.width = 0;
                      glyphs->log_clusters[gs_i] = glyphs->log_clusters[gs_prev_i];

                      /* Some heuristics to try to guess how overstrike glyphs are
                       * done and compensate
                       */
                      pango_font_get_glyph_extents (font, glyphs->glyphs[gs_i].glyph, &ink_rect, &logical_rect);
                      if (logical_rect.width == 0 && ink_rect.x == 0)
                        glyphs->glyphs[gs_i].geometry.x_offset = (glyphs->glyphs[gs_i].geometry.width - ink_rect.width) / 2;
                    }
                }
            }
          else
            set_glyph (font, glyphs, gs_i, p - text, PANGO_GET_UNKNOWN_GLYPH (gi->wc));

          glyph_iter = g_slist_next (glyph_iter);
        }
    }

  pango_coverage_unref (coverage);
  g_slist_foreach (glyph_list, glyph_info_free, NULL);
  g_slist_free (glyph_list);

  if (analysis->level & 1)
    pango_glyph_string_reverse_range (glyphs, 0, glyphs->num_glyphs);
}