Blame gllib/uninorm/canonical-decomposition.c

rpm-build 858c0f
/* Canonical decomposition of Unicode characters.
rpm-build 858c0f
   Copyright (C) 2009-2017 Free Software Foundation, Inc.
rpm-build 858c0f
   Written by Bruno Haible <bruno@clisp.org>, 2009.
rpm-build 858c0f
rpm-build 858c0f
   This program is free software: you can redistribute it and/or modify it
rpm-build 858c0f
   under the terms of the GNU General Public License as published
rpm-build 858c0f
   by the Free Software Foundation; either version 3 of the License, or
rpm-build 858c0f
   (at your option) any later version.
rpm-build 858c0f
rpm-build 858c0f
   This program is distributed in the hope that it will be useful,
rpm-build 858c0f
   but WITHOUT ANY WARRANTY; without even the implied warranty of
rpm-build 858c0f
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
rpm-build 858c0f
   General Public License for more details.
rpm-build 858c0f
rpm-build 858c0f
   You should have received a copy of the GNU General Public License
rpm-build 858c0f
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
rpm-build 858c0f
rpm-build 858c0f
#include <config.h>
rpm-build 858c0f
rpm-build 858c0f
/* Specification.  */
rpm-build 858c0f
#include "uninorm.h"
rpm-build 858c0f
rpm-build 858c0f
#include <stdlib.h>
rpm-build 858c0f
rpm-build 858c0f
#include "decomposition-table.h"
rpm-build 858c0f
rpm-build 858c0f
int
rpm-build 858c0f
uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition)
rpm-build 858c0f
{
rpm-build 858c0f
  if (uc >= 0xAC00 && uc < 0xD7A4)
rpm-build 858c0f
    {
rpm-build 858c0f
      /* Hangul syllable.  See Unicode standard, chapter 3, section
rpm-build 858c0f
         "Hangul Syllable Decomposition",  See also the clarification at
rpm-build 858c0f
         <http://www.unicode.org/versions/Unicode5.1.0/>, section
rpm-build 858c0f
         "Clarification of Hangul Jamo Handling".  */
rpm-build 858c0f
      unsigned int t;
rpm-build 858c0f
rpm-build 858c0f
      uc -= 0xAC00;
rpm-build 858c0f
      t = uc % 28;
rpm-build 858c0f
rpm-build 858c0f
      if (t == 0)
rpm-build 858c0f
        {
rpm-build 858c0f
          unsigned int v, l;
rpm-build 858c0f
rpm-build 858c0f
          uc = uc / 28;
rpm-build 858c0f
          v = uc % 21;
rpm-build 858c0f
          l = uc / 21;
rpm-build 858c0f
rpm-build 858c0f
          decomposition[0] = 0x1100 + l;
rpm-build 858c0f
          decomposition[1] = 0x1161 + v;
rpm-build 858c0f
          return 2;
rpm-build 858c0f
        }
rpm-build 858c0f
      else
rpm-build 858c0f
        {
rpm-build 858c0f
#if 1 /* Return the pairwise decomposition, not the full decomposition.  */
rpm-build 858c0f
          decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
rpm-build 858c0f
          decomposition[1] = 0x11A7 + t;
rpm-build 858c0f
          return 2;
rpm-build 858c0f
#else
rpm-build 858c0f
          unsigned int v, l;
rpm-build 858c0f
rpm-build 858c0f
          uc = uc / 28;
rpm-build 858c0f
          v = uc % 21;
rpm-build 858c0f
          l = uc / 21;
rpm-build 858c0f
rpm-build 858c0f
          decomposition[0] = 0x1100 + l;
rpm-build 858c0f
          decomposition[1] = 0x1161 + v;
rpm-build 858c0f
          decomposition[2] = 0x11A7 + t;
rpm-build 858c0f
          return 3;
rpm-build 858c0f
#endif
rpm-build 858c0f
        }
rpm-build 858c0f
    }
rpm-build 858c0f
  else if (uc < 0x110000)
rpm-build 858c0f
    {
rpm-build 858c0f
      unsigned short entry = decomp_index (uc);
rpm-build 858c0f
      /* An entry of (unsigned short)(-1) denotes an absent entry.
rpm-build 858c0f
         Otherwise, bit 15 of the entry tells whether the decomposition
rpm-build 858c0f
         is a canonical one.  */
rpm-build 858c0f
      if (entry < 0x8000)
rpm-build 858c0f
        {
rpm-build 858c0f
          const unsigned char *p;
rpm-build 858c0f
          unsigned int element;
rpm-build 858c0f
          unsigned int length;
rpm-build 858c0f
rpm-build 858c0f
          p = &gl_uninorm_decomp_chars_table[3 * entry];
rpm-build 858c0f
          element = (p[0] << 16) | (p[1] << 8) | p[2];
rpm-build 858c0f
          /* The first element has 5 bits for the decomposition type.  */
rpm-build 858c0f
          if (((element >> 18) & 0x1f) != UC_DECOMP_CANONICAL)
rpm-build 858c0f
            abort ();
rpm-build 858c0f
          length = 1;
rpm-build 858c0f
          for (;;)
rpm-build 858c0f
            {
rpm-build 858c0f
              /* Every element has an 18 bits wide Unicode code point.  */
rpm-build 858c0f
              *decomposition = element & 0x3ffff;
rpm-build 858c0f
              /* Bit 23 tells whether there are more elements,  */
rpm-build 858c0f
              if ((element & (1 << 23)) == 0)
rpm-build 858c0f
                break;
rpm-build 858c0f
              p += 3;
rpm-build 858c0f
              element = (p[0] << 16) | (p[1] << 8) | p[2];
rpm-build 858c0f
              decomposition++;
rpm-build 858c0f
              length++;
rpm-build 858c0f
            }
rpm-build 858c0f
          return length;
rpm-build 858c0f
        }
rpm-build 858c0f
    }
rpm-build 858c0f
  return -1;
rpm-build 858c0f
}