Blame lib/unistring/uninorm/decomposition.c

Packit aea12f
/* Decomposition of Unicode characters.
Packit Service 991b93
   Copyright (C) 2009-2020 Free Software Foundation, Inc.
Packit aea12f
   Written by Bruno Haible <bruno@clisp.org>, 2009.
Packit aea12f
Packit aea12f
   This program is free software: you can redistribute it and/or
Packit aea12f
   modify it under the terms of either:
Packit aea12f
Packit aea12f
     * the GNU Lesser General Public License as published by the Free
Packit aea12f
       Software Foundation; either version 3 of the License, or (at your
Packit aea12f
       option) any later version.
Packit aea12f
Packit aea12f
   or
Packit aea12f
Packit aea12f
     * the GNU General Public License as published by the Free
Packit aea12f
       Software Foundation; either version 2 of the License, or (at your
Packit aea12f
       option) any later version.
Packit aea12f
Packit aea12f
   or both in parallel, as here.
Packit aea12f
   This program is distributed in the hope that it will be useful,
Packit aea12f
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit aea12f
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit aea12f
   Lesser General Public License for more details.
Packit aea12f
Packit aea12f
   You should have received a copy of the GNU Lesser General Public License
Packit aea12f
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
Packit aea12f
Packit aea12f
#include <config.h>
Packit aea12f
Packit aea12f
/* Specification.  */
Packit aea12f
#include "uninorm.h"
Packit aea12f
Packit aea12f
#include "uninorm/decomposition-table.h"
Packit aea12f
Packit aea12f
int
Packit aea12f
uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
Packit aea12f
{
Packit aea12f
  if (uc >= 0xAC00 && uc < 0xD7A4)
Packit aea12f
    {
Packit aea12f
      /* Hangul syllable.  See Unicode standard, chapter 3, section
Packit aea12f
         "Hangul Syllable Decomposition",  See also the clarification at
Packit Service 991b93
         <https://www.unicode.org/versions/Unicode5.1.0/>, section
Packit aea12f
         "Clarification of Hangul Jamo Handling".  */
Packit aea12f
      unsigned int t;
Packit aea12f
Packit aea12f
      uc -= 0xAC00;
Packit aea12f
      t = uc % 28;
Packit aea12f
Packit aea12f
      *decomp_tag = UC_DECOMP_CANONICAL;
Packit aea12f
      if (t == 0)
Packit aea12f
        {
Packit aea12f
          unsigned int v, l;
Packit aea12f
Packit aea12f
          uc = uc / 28;
Packit aea12f
          v = uc % 21;
Packit aea12f
          l = uc / 21;
Packit aea12f
Packit aea12f
          decomposition[0] = 0x1100 + l;
Packit aea12f
          decomposition[1] = 0x1161 + v;
Packit aea12f
          return 2;
Packit aea12f
        }
Packit aea12f
      else
Packit aea12f
        {
Packit aea12f
#if 1 /* Return the pairwise decomposition, not the full decomposition.  */
Packit aea12f
          decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
Packit aea12f
          decomposition[1] = 0x11A7 + t;
Packit aea12f
          return 2;
Packit aea12f
#else
Packit aea12f
          unsigned int v, l;
Packit aea12f
Packit aea12f
          uc = uc / 28;
Packit aea12f
          v = uc % 21;
Packit aea12f
          l = uc / 21;
Packit aea12f
Packit aea12f
          decomposition[0] = 0x1100 + l;
Packit aea12f
          decomposition[1] = 0x1161 + v;
Packit aea12f
          decomposition[2] = 0x11A7 + t;
Packit aea12f
          return 3;
Packit aea12f
#endif
Packit aea12f
        }
Packit aea12f
    }
Packit aea12f
  else if (uc < 0x110000)
Packit aea12f
    {
Packit aea12f
      unsigned short entry = decomp_index (uc);
Packit aea12f
      if (entry != (unsigned short)(-1))
Packit aea12f
        {
Packit aea12f
          const unsigned char *p;
Packit aea12f
          unsigned int element;
Packit aea12f
          unsigned int length;
Packit aea12f
Packit aea12f
          p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
Packit aea12f
          element = (p[0] << 16) | (p[1] << 8) | p[2];
Packit aea12f
          /* The first element has 5 bits for the decomposition type.  */
Packit aea12f
          *decomp_tag = (element >> 18) & 0x1f;
Packit aea12f
          length = 1;
Packit aea12f
          for (;;)
Packit aea12f
            {
Packit aea12f
              /* Every element has an 18 bits wide Unicode code point.  */
Packit aea12f
              *decomposition = element & 0x3ffff;
Packit aea12f
              /* Bit 23 tells whether there are more elements,  */
Packit aea12f
              if ((element & (1 << 23)) == 0)
Packit aea12f
                break;
Packit aea12f
              p += 3;
Packit aea12f
              element = (p[0] << 16) | (p[1] << 8) | p[2];
Packit aea12f
              decomposition++;
Packit aea12f
              length++;
Packit aea12f
            }
Packit aea12f
          return length;
Packit aea12f
        }
Packit aea12f
    }
Packit aea12f
  return -1;
Packit aea12f
}