Tree - source-git/mingw-glib2 - CentOS Git server

source-git / mingw-glib2

Blame glib/gunidecomp.c

Blob History Raw

Packit	ae235b	`/* decomp.c - Character decomposition.`
Packit	ae235b	`*`
Packit	ae235b	`* Copyright (C) 1999, 2000 Tom Tromey`
Packit	ae235b	`* Copyright 2000 Red Hat, Inc.`
Packit	ae235b	`*`
Packit	ae235b	`* This library is free software; you can redistribute it and/or`
Packit	ae235b	`* modify it under the terms of the GNU Lesser General Public`
Packit	ae235b	`* License as published by the Free Software Foundation; either`
Packit	ae235b	`* version 2.1 of the License, or (at your option) any later version.`
Packit	ae235b	`*`
Packit	ae235b	`* This library is distributed in the hope that it will be useful,`
Packit	ae235b	`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit	ae235b	`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
Packit	ae235b	`* Lesser General Public License for more details.`
Packit	ae235b	`*`
Packit	ae235b	`* You should have received a copy of the GNU Lesser General Public License`
Packit	ae235b	`* along with this library; if not, see <http://www.gnu.org/licenses/>.`
Packit	ae235b	`*/`
Packit	ae235b
Packit	ae235b	`/**`
Packit	ae235b	`* SECTION:unicode`
Packit	ae235b	`* @Title: Unicode Manipulation`
Packit	ae235b	`* @Short_description: functions operating on Unicode characters and`
Packit	ae235b	`* UTF-8 strings`
Packit	ae235b	`* @See_also: g_locale_to_utf8(), g_locale_from_utf8()`
Packit	ae235b	`*`
Packit	ae235b	`* This section describes a number of functions for dealing with`
Packit	ae235b	`* Unicode characters and strings. There are analogues of the`
Packit	ae235b	* traditional `ctype.h` character classification and case conversion
Packit	ae235b	`* functions, UTF-8 analogues of some string utility functions,`
Packit	ae235b	`* functions to perform normalization, case conversion and collation`
Packit	ae235b	`* on UTF-8 strings and finally functions to convert between the UTF-8,`
Packit	ae235b	`* UTF-16 and UCS-4 encodings of Unicode.`
Packit	ae235b	`*`
Packit	ae235b	`* The implementations of the Unicode functions in GLib are based`
Packit	ae235b	`* on the Unicode Character Data tables, which are available from`
Packit	ae235b	`* [www.unicode.org](http://www.unicode.org/).`
Packit	ae235b	`* GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,`
Packit	ae235b	`* GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1,`
Packit	ae235b	`* GLib 2.30 supports Unicode 6.0.`
Packit	ae235b	`*/`
Packit	ae235b
Packit	ae235b	`#include "config.h"`
Packit	ae235b
Packit	ae235b	`#include <stdlib.h>`
Packit	ae235b
Packit	ae235b	`#include "gunicode.h"`
Packit	ae235b	`#include "gunidecomp.h"`
Packit	ae235b	`#include "gmem.h"`
Packit	ae235b	`#include "gunicomp.h"`
Packit	ae235b	`#include "gunicodeprivate.h"`
Packit	ae235b
Packit	ae235b
Packit	ae235b	`#define CC_PART1(Page, Char) \`
Packit	ae235b	`((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \`
Packit	ae235b	`? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \`
Packit	ae235b	`: (cclass_data[combining_class_table_part1[Page]][Char]))`
Packit	ae235b
Packit	ae235b	`#define CC_PART2(Page, Char) \`
Packit	ae235b	`((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \`
Packit	ae235b	`? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \`
Packit	ae235b	`: (cclass_data[combining_class_table_part2[Page]][Char]))`
Packit	ae235b
Packit	ae235b	`#define COMBINING_CLASS(Char) \`
Packit	ae235b	`(((Char) <= G_UNICODE_LAST_CHAR_PART1) \`
Packit	ae235b	`? CC_PART1 ((Char) >> 8, (Char) & 0xff) \`
Packit	ae235b	`: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \`
Packit	ae235b	`? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \`
Packit	ae235b	`: 0))`
Packit	ae235b
Packit	ae235b	`/**`
Packit	ae235b	`* g_unichar_combining_class:`
Packit	ae235b	`* @uc: a Unicode character`
Packit	ae235b	`*`
Packit	ae235b	`* Determines the canonical combining class of a Unicode character.`
Packit	ae235b	`*`
Packit	ae235b	`* Returns: the combining class of the character`
Packit	ae235b	`*`
Packit	ae235b	`* Since: 2.14`
Packit	ae235b	`**/`
Packit	ae235b	`gint`
Packit	ae235b	`g_unichar_combining_class (gunichar uc)`
Packit	ae235b	`{`
Packit	ae235b	`return COMBINING_CLASS (uc);`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/* constants for hangul syllable [de]composition */`
Packit	ae235b	`#define SBase 0xAC00`
Packit	ae235b	`#define LBase 0x1100`
Packit	ae235b	`#define VBase 0x1161`
Packit	ae235b	`#define TBase 0x11A7`
Packit	ae235b	`#define LCount 19`
Packit	ae235b	`#define VCount 21`
Packit	ae235b	`#define TCount 28`
Packit	ae235b	`#define NCount (VCount * TCount)`
Packit	ae235b	`#define SCount (LCount * NCount)`
Packit	ae235b
Packit	ae235b	`/**`
Packit	ae235b	`* g_unicode_canonical_ordering:`
Packit	ae235b	`* @string: a UCS-4 encoded string.`
Packit	ae235b	`* @len: the maximum length of @string to use.`
Packit	ae235b	`*`
Packit	ae235b	`* Computes the canonical ordering of a string in-place.`
Packit	ae235b	`* This rearranges decomposed characters in the string`
Packit	ae235b	`* according to their combining classes. See the Unicode`
Packit	ae235b	`* manual for more information.`
Packit	ae235b	`**/`
Packit	ae235b	`void`
Packit	ae235b	`g_unicode_canonical_ordering (gunichar *string,`
Packit	ae235b	`gsize len)`
Packit	ae235b	`{`
Packit	ae235b	`gsize i;`
Packit	ae235b	`int swap = 1;`
Packit	ae235b
Packit	ae235b	`while (swap)`
Packit	ae235b	`{`
Packit	ae235b	`int last;`
Packit	ae235b	`swap = 0;`
Packit	ae235b	`last = COMBINING_CLASS (string[0]);`
Packit	ae235b	`for (i = 0; i < len - 1; ++i)`
Packit	ae235b	`{`
Packit	ae235b	`int next = COMBINING_CLASS (string[i + 1]);`
Packit	ae235b	`if (next != 0 && last > next)`
Packit	ae235b	`{`
Packit	ae235b	`gsize j;`
Packit	ae235b	`/* Percolate item leftward through string. */`
Packit	ae235b	`for (j = i + 1; j > 0; --j)`
Packit	ae235b	`{`
Packit	ae235b	`gunichar t;`
Packit	ae235b	`if (COMBINING_CLASS (string[j - 1]) <= next)`
Packit	ae235b	`break;`
Packit	ae235b	`t = string[j];`
Packit	ae235b	`string[j] = string[j - 1];`
Packit	ae235b	`string[j - 1] = t;`
Packit	ae235b	`swap = 1;`
Packit	ae235b	`}`
Packit	ae235b	`/* We're re-entering the loop looking at the old`
Packit	ae235b	`character again. */`
Packit	ae235b	`next = last;`
Packit	ae235b	`}`
Packit	ae235b	`last = next;`
Packit	ae235b	`}`
Packit	ae235b	`}`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/* http://www.unicode.org/unicode/reports/tr15/#Hangul`
Packit	ae235b	`* r should be null or have sufficient space. Calling with r == NULL will`
Packit	ae235b	`* only calculate the result_len; however, a buffer with space for three`
Packit	ae235b	`* characters will always be big enough. */`
Packit	ae235b	`static void`
Packit	ae235b	`decompose_hangul (gunichar s,`
Packit	ae235b	`gunichar *r,`
Packit	ae235b	`gsize *result_len)`
Packit	ae235b	`{`
Packit	ae235b	`gint SIndex = s - SBase;`
Packit	ae235b	`gint TIndex = SIndex % TCount;`
Packit	ae235b
Packit	ae235b	`if (r)`
Packit	ae235b	`{`
Packit	ae235b	`r[0] = LBase + SIndex / NCount;`
Packit	ae235b	`r[1] = VBase + (SIndex % NCount) / TCount;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`if (TIndex)`
Packit	ae235b	`{`
Packit	ae235b	`if (r)`
Packit	ae235b	`r[2] = TBase + TIndex;`
Packit	ae235b	`*result_len = 3;`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`*result_len = 2;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/* returns a pointer to a null-terminated UTF-8 string */`
Packit	ae235b	`static const gchar *`
Packit	ae235b	`find_decomposition (gunichar ch,`
Packit	ae235b	`gboolean compat)`
Packit	ae235b	`{`
Packit	ae235b	`int start = 0;`
Packit	ae235b	`int end = G_N_ELEMENTS (decomp_table);`
Packit	ae235b
Packit	ae235b	`if (ch >= decomp_table[start].ch &&`
Packit	ae235b	`ch <= decomp_table[end - 1].ch)`
Packit	ae235b	`{`
Packit	ae235b	`while (TRUE)`
Packit	ae235b	`{`
Packit	ae235b	`int half = (start + end) / 2;`
Packit	ae235b	`if (ch == decomp_table[half].ch)`
Packit	ae235b	`{`
Packit	ae235b	`int offset;`
Packit	ae235b
Packit	ae235b	`if (compat)`
Packit	ae235b	`{`
Packit	ae235b	`offset = decomp_table[half].compat_offset;`
Packit	ae235b	`if (offset == G_UNICODE_NOT_PRESENT_OFFSET)`
Packit	ae235b	`offset = decomp_table[half].canon_offset;`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`{`
Packit	ae235b	`offset = decomp_table[half].canon_offset;`
Packit	ae235b	`if (offset == G_UNICODE_NOT_PRESENT_OFFSET)`
Packit	ae235b	`return NULL;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`return &(decomp_expansion_string[offset]);`
Packit	ae235b	`}`
Packit	ae235b	`else if (half == start)`
Packit	ae235b	`break;`
Packit	ae235b	`else if (ch > decomp_table[half].ch)`
Packit	ae235b	`start = half;`
Packit	ae235b	`else`
Packit	ae235b	`end = half;`
Packit	ae235b	`}`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`return NULL;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/**`
Packit	ae235b	`* g_unicode_canonical_decomposition:`
Packit	ae235b	`* @ch: a Unicode character.`
Packit	ae235b	`* @result_len: location to store the length of the return value.`
Packit	ae235b	`*`
Packit	ae235b	`* Computes the canonical decomposition of a Unicode character.`
Packit	ae235b	`*`
Packit	ae235b	`* Returns: a newly allocated string of Unicode characters.`
Packit	ae235b	`* @result_len is set to the resulting length of the string.`
Packit	ae235b	`*`
Packit	ae235b	`* Deprecated: 2.30: Use the more flexible g_unichar_fully_decompose()`
Packit	ae235b	`* instead.`
Packit	ae235b	`**/`
Packit	ae235b	`gunichar *`
Packit	ae235b	`g_unicode_canonical_decomposition (gunichar ch,`
Packit	ae235b	`gsize *result_len)`
Packit	ae235b	`{`
Packit	ae235b	`const gchar *decomp;`
Packit	ae235b	`const gchar *p;`
Packit	ae235b	`gunichar *r;`
Packit	ae235b
Packit	ae235b	`/* Hangul syllable */`
Packit	ae235b	`if (ch >= SBase && ch < SBase + SCount)`
Packit	ae235b	`{`
Packit	ae235b	`decompose_hangul (ch, NULL, result_len);`
Packit	ae235b	`r = g_malloc (result_len sizeof (gunichar));`
Packit	ae235b	`decompose_hangul (ch, r, result_len);`
Packit	ae235b	`}`
Packit	ae235b	`else if ((decomp = find_decomposition (ch, FALSE)) != NULL)`
Packit	ae235b	`{`
Packit	ae235b	`/* Found it. */`
Packit	ae235b	`int i;`
Packit	ae235b
Packit	ae235b	`*result_len = g_utf8_strlen (decomp, -1);`
Packit	ae235b	`r = g_malloc (result_len sizeof (gunichar));`
Packit	ae235b
Packit	ae235b	`for (p = decomp, i = 0; *p != '\0'; p = g_utf8_next_char (p), i++)`
Packit	ae235b	`r[i] = g_utf8_get_char (p);`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`{`
Packit	ae235b	`/* Not in our table. */`
Packit	ae235b	`r = g_malloc (sizeof (gunichar));`
Packit	ae235b	`*r = ch;`
Packit	ae235b	`*result_len = 1;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`return r;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/* L,V => LV and LV,T => LVT */`
Packit	ae235b	`static gboolean`
Packit	ae235b	`combine_hangul (gunichar a,`
Packit	ae235b	`gunichar b,`
Packit	ae235b	`gunichar *result)`
Packit	ae235b	`{`
Packit	ae235b	`gint LIndex = a - LBase;`
Packit	ae235b	`gint SIndex = a - SBase;`
Packit	ae235b
Packit	ae235b	`gint VIndex = b - VBase;`
Packit	ae235b	`gint TIndex = b - TBase;`
Packit	ae235b
Packit	ae235b	`if (0 <= LIndex && LIndex < LCount`
Packit	ae235b	`&& 0 <= VIndex && VIndex < VCount)`
Packit	ae235b	`{`
Packit	ae235b	`result = SBase + (LIndex VCount + VIndex) * TCount;`
Packit	ae235b	`return TRUE;`
Packit	ae235b	`}`
Packit	ae235b	`else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0`
Packit	ae235b	`&& 0 < TIndex && TIndex < TCount)`
Packit	ae235b	`{`
Packit	ae235b	`*result = a + TIndex;`
Packit	ae235b	`return TRUE;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`return FALSE;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`#define CI(Page, Char) \`
Packit	ae235b	`((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \`
Packit	ae235b	`? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \`
Packit	ae235b	`: (compose_data[compose_table[Page]][Char]))`
Packit	ae235b
Packit	ae235b	`#define COMPOSE_INDEX(Char) \`
Packit	ae235b	`(((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))`
Packit	ae235b
Packit	ae235b	`static gboolean`
Packit	ae235b	`combine (gunichar a,`
Packit	ae235b	`gunichar b,`
Packit	ae235b	`gunichar *result)`
Packit	ae235b	`{`
Packit	ae235b	`gushort index_a, index_b;`
Packit	ae235b
Packit	ae235b	`if (combine_hangul (a, b, result))`
Packit	ae235b	`return TRUE;`
Packit	ae235b
Packit	ae235b	`index_a = COMPOSE_INDEX(a);`
Packit	ae235b
Packit	ae235b	`if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)`
Packit	ae235b	`{`
Packit	ae235b	`if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])`
Packit	ae235b	`{`
Packit	ae235b	`*result = compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];`
Packit	ae235b	`return TRUE;`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`return FALSE;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`index_b = COMPOSE_INDEX(b);`
Packit	ae235b
Packit	ae235b	`if (index_b >= COMPOSE_SECOND_SINGLE_START)`
Packit	ae235b	`{`
Packit	ae235b	`if (a == compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])`
Packit	ae235b	`{`
Packit	ae235b	`*result = compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];`
Packit	ae235b	`return TRUE;`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`return FALSE;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START &&`
Packit	ae235b	`index_b >= COMPOSE_SECOND_START && index_b < COMPOSE_SECOND_SINGLE_START)`
Packit	ae235b	`{`
Packit	ae235b	`gunichar res = compose_array[index_a - COMPOSE_FIRST_START][index_b - COMPOSE_SECOND_START];`
Packit	ae235b
Packit	ae235b	`if (res)`
Packit	ae235b	`{`
Packit	ae235b	`*result = res;`
Packit	ae235b	`return TRUE;`
Packit	ae235b	`}`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`return FALSE;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`gunichar *`
Packit	ae235b	`_g_utf8_normalize_wc (const gchar *str,`
Packit	ae235b	`gssize max_len,`
Packit	ae235b	`GNormalizeMode mode)`
Packit	ae235b	`{`
Packit	ae235b	`gsize n_wc;`
Packit	ae235b	`gunichar *wc_buffer;`
Packit	ae235b	`const char *p;`
Packit	ae235b	`gsize last_start;`
Packit	ae235b	`gboolean do_compat = (mode == G_NORMALIZE_NFKC \|\|`
Packit	ae235b	`mode == G_NORMALIZE_NFKD);`
Packit	ae235b	`gboolean do_compose = (mode == G_NORMALIZE_NFC \|\|`
Packit	ae235b	`mode == G_NORMALIZE_NFKC);`
Packit	ae235b
Packit	ae235b	`n_wc = 0;`
Packit	ae235b	`p = str;`
Packit	ae235b	`while ((max_len < 0 \|\| p < str + max_len) && *p)`
Packit	ae235b	`{`
Packit	ae235b	`const gchar *decomp;`
Packit	ae235b	`gunichar wc = g_utf8_get_char (p);`
Packit	ae235b
Packit	ae235b	`if (wc >= SBase && wc < SBase + SCount)`
Packit	ae235b	`{`
Packit	ae235b	`gsize result_len;`
Packit	ae235b	`decompose_hangul (wc, NULL, &result_len);`
Packit	ae235b	`n_wc += result_len;`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`{`
Packit	ae235b	`decomp = find_decomposition (wc, do_compat);`
Packit	ae235b
Packit	ae235b	`if (decomp)`
Packit	ae235b	`n_wc += g_utf8_strlen (decomp, -1);`
Packit	ae235b	`else`
Packit	ae235b	`n_wc++;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`p = g_utf8_next_char (p);`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`wc_buffer = g_new (gunichar, n_wc + 1);`
Packit	ae235b
Packit	ae235b	`last_start = 0;`
Packit	ae235b	`n_wc = 0;`
Packit	ae235b	`p = str;`
Packit	ae235b	`while ((max_len < 0 \|\| p < str + max_len) && *p)`
Packit	ae235b	`{`
Packit	ae235b	`gunichar wc = g_utf8_get_char (p);`
Packit	ae235b	`const gchar *decomp;`
Packit	ae235b	`int cc;`
Packit	ae235b	`gsize old_n_wc = n_wc;`
Packit	ae235b
Packit	ae235b	`if (wc >= SBase && wc < SBase + SCount)`
Packit	ae235b	`{`
Packit	ae235b	`gsize result_len;`
Packit	ae235b	`decompose_hangul (wc, wc_buffer + n_wc, &result_len);`
Packit	ae235b	`n_wc += result_len;`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`{`
Packit	ae235b	`decomp = find_decomposition (wc, do_compat);`
Packit	ae235b
Packit	ae235b	`if (decomp)`
Packit	ae235b	`{`
Packit	ae235b	`const char *pd;`
Packit	ae235b	`for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))`
Packit	ae235b	`wc_buffer[n_wc++] = g_utf8_get_char (pd);`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`wc_buffer[n_wc++] = wc;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`if (n_wc > 0)`
Packit	ae235b	`{`
Packit	ae235b	`cc = COMBINING_CLASS (wc_buffer[old_n_wc]);`
Packit	ae235b
Packit	ae235b	`if (cc == 0)`
Packit	ae235b	`{`
Packit	ae235b	`g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);`
Packit	ae235b	`last_start = old_n_wc;`
Packit	ae235b	`}`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`p = g_utf8_next_char (p);`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`if (n_wc > 0)`
Packit	ae235b	`{`
Packit	ae235b	`g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);`
Packit	ae235b	`last_start = n_wc;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`wc_buffer[n_wc] = 0;`
Packit	ae235b
Packit	ae235b	`/* All decomposed and reordered */`
Packit	ae235b
Packit	ae235b	`if (do_compose && n_wc > 0)`
Packit	ae235b	`{`
Packit	ae235b	`gsize i, j;`
Packit	ae235b	`int last_cc = 0;`
Packit	ae235b	`last_start = 0;`
Packit	ae235b
Packit	ae235b	`for (i = 0; i < n_wc; i++)`
Packit	ae235b	`{`
Packit	ae235b	`int cc = COMBINING_CLASS (wc_buffer[i]);`
Packit	ae235b
Packit	ae235b	`if (i > 0 &&`
Packit	ae235b	`(last_cc == 0 \|\| last_cc < cc) &&`
Packit	ae235b	`combine (wc_buffer[last_start], wc_buffer[i],`
Packit	ae235b	`&wc_buffer[last_start]))`
Packit	ae235b	`{`
Packit	ae235b	`for (j = i + 1; j < n_wc; j++)`
Packit	ae235b	`wc_buffer[j-1] = wc_buffer[j];`
Packit	ae235b	`n_wc--;`
Packit	ae235b	`i--;`
Packit	ae235b
Packit	ae235b	`if (i == last_start)`
Packit	ae235b	`last_cc = 0;`
Packit	ae235b	`else`
Packit	ae235b	`last_cc = COMBINING_CLASS (wc_buffer[i-1]);`
Packit	ae235b
Packit	ae235b	`continue;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`if (cc == 0)`
Packit	ae235b	`last_start = i;`
Packit	ae235b
Packit	ae235b	`last_cc = cc;`
Packit	ae235b	`}`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`wc_buffer[n_wc] = 0;`
Packit	ae235b
Packit	ae235b	`return wc_buffer;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/**`
Packit	ae235b	`* g_utf8_normalize:`
Packit	ae235b	`* @str: a UTF-8 encoded string.`
Packit	ae235b	`* @len: length of @str, in bytes, or -1 if @str is nul-terminated.`
Packit	ae235b	`* @mode: the type of normalization to perform.`
Packit	ae235b	`*`
Packit	ae235b	`* Converts a string into canonical form, standardizing`
Packit	ae235b	`* such issues as whether a character with an accent`
Packit	ae235b	`* is represented as a base character and combining`
Packit	ae235b	`* accent or as a single precomposed character. The`
Packit	ae235b	`* string has to be valid UTF-8, otherwise %NULL is`
Packit	ae235b	`* returned. You should generally call g_utf8_normalize()`
Packit	ae235b	`* before comparing two Unicode strings.`
Packit	ae235b	`*`
Packit	ae235b	`* The normalization mode %G_NORMALIZE_DEFAULT only`
Packit	ae235b	`* standardizes differences that do not affect the`
Packit	ae235b	`* text content, such as the above-mentioned accent`
Packit	ae235b	`* representation. %G_NORMALIZE_ALL also standardizes`
Packit	ae235b	`* the "compatibility" characters in Unicode, such`
Packit	ae235b	`* as SUPERSCRIPT THREE to the standard forms`
Packit	ae235b	`* (in this case DIGIT THREE). Formatting information`
Packit	ae235b	`* may be lost but for most text operations such`
Packit	ae235b	`* characters should be considered the same.`
Packit	ae235b	`*`
Packit	ae235b	`* %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE`
Packit	ae235b	`* are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,`
Packit	ae235b	`* but returned a result with composed forms rather`
Packit	ae235b	`* than a maximally decomposed form. This is often`
Packit	ae235b	`* useful if you intend to convert the string to`
Packit	ae235b	`* a legacy encoding or pass it to a system with`
Packit	ae235b	`* less capable Unicode handling.`
Packit	ae235b	`*`
Packit	ae235b	`* Returns: a newly allocated string, that is the`
Packit	ae235b	`* normalized form of @str, or %NULL if @str is not`
Packit	ae235b	`* valid UTF-8.`
Packit	ae235b	`**/`
Packit	ae235b	`gchar *`
Packit	ae235b	`g_utf8_normalize (const gchar *str,`
Packit	ae235b	`gssize len,`
Packit	ae235b	`GNormalizeMode mode)`
Packit	ae235b	`{`
Packit	ae235b	`gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);`
Packit	ae235b	`gchar *result;`
Packit	ae235b
Packit	ae235b	`result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);`
Packit	ae235b	`g_free (result_wc);`
Packit	ae235b
Packit	ae235b	`return result;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`static gboolean`
Packit	ae235b	`decompose_hangul_step (gunichar ch,`
Packit	ae235b	`gunichar *a,`
Packit	ae235b	`gunichar *b)`
Packit	ae235b	`{`
Packit	ae235b	`gint SIndex, TIndex;`
Packit	ae235b
Packit	ae235b	`if (ch < SBase \|\| ch >= SBase + SCount)`
Packit	ae235b	`return FALSE; /* not a hangul syllable */`
Packit	ae235b
Packit	ae235b	`SIndex = ch - SBase;`
Packit	ae235b	`TIndex = SIndex % TCount;`
Packit	ae235b
Packit	ae235b	`if (TIndex)`
Packit	ae235b	`{`
Packit	ae235b	`/* split LVT -> LV,T */`
Packit	ae235b	`*a = ch - TIndex;`
Packit	ae235b	`*b = TBase + TIndex;`
Packit	ae235b	`}`
Packit	ae235b	`else`
Packit	ae235b	`{`
Packit	ae235b	`/* split LV -> L,V */`
Packit	ae235b	`*a = LBase + SIndex / NCount;`
Packit	ae235b	`*b = VBase + (SIndex % NCount) / TCount;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`return TRUE;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/**`
Packit	ae235b	`* g_unichar_decompose:`
Packit	ae235b	`* @ch: a Unicode character`
Packit	ae235b	`* @a: return location for the first component of @ch`
Packit	ae235b	`* @b: return location for the second component of @ch`
Packit	ae235b	`*`
Packit	ae235b	`* Performs a single decomposition step of the`
Packit	ae235b	`* Unicode canonical decomposition algorithm.`
Packit	ae235b	`*`
Packit	ae235b	`* This function does not include compatibility`
Packit	ae235b	`* decompositions. It does, however, include algorithmic`
Packit	ae235b	`* Hangul Jamo decomposition, as well as 'singleton'`
Packit	ae235b	`* decompositions which replace a character by a single`
Packit	ae235b	`* other character. In the case of singletons *@b will`
Packit	ae235b	`* be set to zero.`
Packit	ae235b	`*`
Packit	ae235b	`* If @ch is not decomposable, @a is set to @ch and @b`
Packit	ae235b	`* is set to zero.`
Packit	ae235b	`*`
Packit	ae235b	`* Note that the way Unicode decomposition pairs are`
Packit	ae235b	`* defined, it is guaranteed that @b would not decompose`
Packit	ae235b	`* further, but @a may itself decompose. To get the full`
Packit	ae235b	`* canonical decomposition for @ch, one would need to`
Packit	ae235b	`* recursively call this function on @a. Or use`
Packit	ae235b	`* g_unichar_fully_decompose().`
Packit	ae235b	`*`
Packit	ae235b	`* See`
Packit	ae235b	`* [UAX#15](http://unicode.org/reports/tr15/)`
Packit	ae235b	`* for details.`
Packit	ae235b	`*`
Packit	ae235b	`* Returns: %TRUE if the character could be decomposed`
Packit	ae235b	`*`
Packit	ae235b	`* Since: 2.30`
Packit	ae235b	`*/`
Packit	ae235b	`gboolean`
Packit	ae235b	`g_unichar_decompose (gunichar ch,`
Packit	ae235b	`gunichar *a,`
Packit	ae235b	`gunichar *b)`
Packit	ae235b	`{`
Packit	ae235b	`gint start = 0;`
Packit	ae235b	`gint end = G_N_ELEMENTS (decomp_step_table);`
Packit	ae235b
Packit	ae235b	`if (decompose_hangul_step (ch, a, b))`
Packit	ae235b	`return TRUE;`
Packit	ae235b
Packit	ae235b	`/* TODO use bsearch() */`
Packit	ae235b	`if (ch >= decomp_step_table[start].ch &&`
Packit	ae235b	`ch <= decomp_step_table[end - 1].ch)`
Packit	ae235b	`{`
Packit	ae235b	`while (TRUE)`
Packit	ae235b	`{`
Packit	ae235b	`gint half = (start + end) / 2;`
Packit	ae235b	`const decomposition_step *p = &(decomp_step_table[half]);`
Packit	ae235b	`if (ch == p->ch)`
Packit	ae235b	`{`
Packit	ae235b	`*a = p->a;`
Packit	ae235b	`*b = p->b;`
Packit	ae235b	`return TRUE;`
Packit	ae235b	`}`
Packit	ae235b	`else if (half == start)`
Packit	ae235b	`break;`
Packit	ae235b	`else if (ch > p->ch)`
Packit	ae235b	`start = half;`
Packit	ae235b	`else`
Packit	ae235b	`end = half;`
Packit	ae235b	`}`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`*a = ch;`
Packit	ae235b	`*b = 0;`
Packit	ae235b
Packit	ae235b	`return FALSE;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/**`
Packit	ae235b	`* g_unichar_compose:`
Packit	ae235b	`* @a: a Unicode character`
Packit	ae235b	`* @b: a Unicode character`
Packit	ae235b	`* @ch: return location for the composed character`
Packit	ae235b	`*`
Packit	ae235b	`* Performs a single composition step of the`
Packit	ae235b	`* Unicode canonical composition algorithm.`
Packit	ae235b	`*`
Packit	ae235b	`* This function includes algorithmic Hangul Jamo composition,`
Packit	ae235b	`* but it is not exactly the inverse of g_unichar_decompose().`
Packit	ae235b	`* No composition can have either of @a or @b equal to zero.`
Packit	ae235b	`* To be precise, this function composes if and only if`
Packit	ae235b	`* there exists a Primary Composite P which is canonically`
Packit	ae235b	`* equivalent to the sequence <@a,@b>. See the Unicode`
Packit	ae235b	`* Standard for the definition of Primary Composite.`
Packit	ae235b	`*`
Packit	ae235b	`* If @a and @b do not compose a new character, @ch is set to zero.`
Packit	ae235b	`*`
Packit	ae235b	`* See`
Packit	ae235b	`* [UAX#15](http://unicode.org/reports/tr15/)`
Packit	ae235b	`* for details.`
Packit	ae235b	`*`
Packit	ae235b	`* Returns: %TRUE if the characters could be composed`
Packit	ae235b	`*`
Packit	ae235b	`* Since: 2.30`
Packit	ae235b	`*/`
Packit	ae235b	`gboolean`
Packit	ae235b	`g_unichar_compose (gunichar a,`
Packit	ae235b	`gunichar b,`
Packit	ae235b	`gunichar *ch)`
Packit	ae235b	`{`
Packit	ae235b	`if (combine (a, b, ch))`
Packit	ae235b	`return TRUE;`
Packit	ae235b
Packit	ae235b	`*ch = 0;`
Packit	ae235b	`return FALSE;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/**`
Packit	ae235b	`* g_unichar_fully_decompose:`
Packit	ae235b	`* @ch: a Unicode character.`
Packit	ae235b	`* @compat: whether perform canonical or compatibility decomposition`
Packit	ae235b	`* @result: (nullable): location to store decomposed result, or %NULL`
Packit	ae235b	`* @result_len: length of @result`
Packit	ae235b	`*`
Packit	ae235b	`* Computes the canonical or compatibility decomposition of a`
Packit	ae235b	`* Unicode character. For compatibility decomposition,`
Packit	ae235b	`* pass %TRUE for @compat; for canonical decomposition`
Packit	ae235b	`* pass %FALSE for @compat.`
Packit	ae235b	`*`
Packit	ae235b	`* The decomposed sequence is placed in @result. Only up to`
Packit	ae235b	`* @result_len characters are written into @result. The length`
Packit	ae235b	`* of the full decomposition (irrespective of @result_len) is`
Packit	ae235b	`* returned by the function. For canonical decomposition,`
Packit	ae235b	`* currently all decompositions are of length at most 4, but`
Packit	ae235b	`* this may change in the future (very unlikely though).`
Packit	ae235b	`* At any rate, Unicode does guarantee that a buffer of length`
Packit	ae235b	`* 18 is always enough for both compatibility and canonical`
Packit	ae235b	`* decompositions, so that is the size recommended. This is provided`
Packit	ae235b	`* as %G_UNICHAR_MAX_DECOMPOSITION_LENGTH.`
Packit	ae235b	`*`
Packit	ae235b	`* See`
Packit	ae235b	`* [UAX#15](http://unicode.org/reports/tr15/)`
Packit	ae235b	`* for details.`
Packit	ae235b	`*`
Packit	ae235b	`* Returns: the length of the full decomposition.`
Packit	ae235b	`*`
Packit	ae235b	`* Since: 2.30`
Packit	ae235b	`**/`
Packit	ae235b	`gsize`
Packit	ae235b	`g_unichar_fully_decompose (gunichar ch,`
Packit	ae235b	`gboolean compat,`
Packit	ae235b	`gunichar *result,`
Packit	ae235b	`gsize result_len)`
Packit	ae235b	`{`
Packit	ae235b	`const gchar *decomp;`
Packit	ae235b	`const gchar *p;`
Packit	ae235b
Packit	ae235b	`/* Hangul syllable */`
Packit	ae235b	`if (ch >= SBase && ch < SBase + SCount)`
Packit	ae235b	`{`
Packit	ae235b	`gsize len, i;`
Packit	ae235b	`gunichar buffer[3];`
Packit	ae235b	`decompose_hangul (ch, result ? buffer : NULL, &len;;`
Packit	ae235b	`if (result)`
Packit	ae235b	`for (i = 0; i < len && i < result_len; i++)`
Packit	ae235b	`result[i] = buffer[i];`
Packit	ae235b	`return len;`
Packit	ae235b	`}`
Packit	ae235b	`else if ((decomp = find_decomposition (ch, compat)) != NULL)`
Packit	ae235b	`{`
Packit	ae235b	`/* Found it. */`
Packit	ae235b	`gsize len, i;`
Packit	ae235b
Packit	ae235b	`len = g_utf8_strlen (decomp, -1);`
Packit	ae235b
Packit	ae235b	`for (p = decomp, i = 0; i < len && i < result_len; p = g_utf8_next_char (p), i++)`
Packit	ae235b	`result[i] = g_utf8_get_char (p);`
Packit	ae235b
Packit	ae235b	`return len;`
Packit	ae235b	`}`
Packit	ae235b
Packit	ae235b	`/* Does not decompose */`
Packit	ae235b	`if (result && result_len >= 1)`
Packit	ae235b	`*result = ch;`
Packit	ae235b	`return 1;`
Packit	ae235b	`}`

source-git / mingw-glib2

Source Code

Blame glib/gunidecomp.c