|
Packit |
aea12f |
/* Categories of Unicode characters.
|
|
Packit Service |
991b93 |
Copyright (C) 2002, 2006-2007, 2011-2020 Free Software Foundation, Inc.
|
|
Packit |
aea12f |
Written by Bruno Haible <bruno@clisp.org>, 2011.
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
This program is free software: you can redistribute it and/or
|
|
Packit |
aea12f |
modify it under the terms of either:
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
aea12f |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
aea12f |
option) any later version.
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
or
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
* the GNU General Public License as published by the Free
|
|
Packit |
aea12f |
Software Foundation; either version 2 of the License, or (at your
|
|
Packit |
aea12f |
option) any later version.
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
or both in parallel, as here.
|
|
Packit |
aea12f |
This program is distributed in the hope that it will be useful,
|
|
Packit |
aea12f |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
aea12f |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
aea12f |
Lesser General Public License for more details.
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
You should have received a copy of the GNU Lesser General Public License
|
|
Packit |
aea12f |
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
#include <config.h>
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
/* Specification. */
|
|
Packit |
aea12f |
#include "unictype.h"
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
static const char u_category_long_name[30][22] =
|
|
Packit |
aea12f |
{
|
|
Packit |
aea12f |
"Uppercase Letter",
|
|
Packit |
aea12f |
"Lowercase Letter",
|
|
Packit |
aea12f |
"Titlecase Letter",
|
|
Packit |
aea12f |
"Modifier Letter",
|
|
Packit |
aea12f |
"Other Letter",
|
|
Packit |
aea12f |
"Nonspacing Mark",
|
|
Packit |
aea12f |
"Spacing Mark",
|
|
Packit |
aea12f |
"Enclosing Mark",
|
|
Packit |
aea12f |
"Decimal Number",
|
|
Packit |
aea12f |
"Letter Number",
|
|
Packit |
aea12f |
"Other Number",
|
|
Packit |
aea12f |
"Connector Punctuation",
|
|
Packit |
aea12f |
"Dash Punctuation",
|
|
Packit |
aea12f |
"Open Punctuation",
|
|
Packit |
aea12f |
"Close Punctuation",
|
|
Packit |
aea12f |
"Initial Punctuation",
|
|
Packit |
aea12f |
"Final Punctuation",
|
|
Packit |
aea12f |
"Other Punctuation",
|
|
Packit |
aea12f |
"Math Symbol",
|
|
Packit |
aea12f |
"Currency Symbol",
|
|
Packit |
aea12f |
"Modifier Symbol",
|
|
Packit |
aea12f |
"Other Symbol",
|
|
Packit |
aea12f |
"Space Separator",
|
|
Packit |
aea12f |
"Line Separator",
|
|
Packit |
aea12f |
"Paragraph Separator",
|
|
Packit |
aea12f |
"Control",
|
|
Packit |
aea12f |
"Format",
|
|
Packit |
aea12f |
"Surrogate",
|
|
Packit |
aea12f |
"Private Use",
|
|
Packit |
aea12f |
"Unassigned"
|
|
Packit |
aea12f |
};
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
const char *
|
|
Packit |
aea12f |
uc_general_category_long_name (uc_general_category_t category)
|
|
Packit |
aea12f |
{
|
|
Packit |
aea12f |
uint32_t bitmask = category.bitmask;
|
|
Packit |
aea12f |
/* bitmask should consist of a single bit. */
|
|
Packit |
aea12f |
if (bitmask != 0)
|
|
Packit |
aea12f |
{
|
|
Packit |
aea12f |
if ((bitmask & (bitmask - 1)) == 0)
|
|
Packit |
aea12f |
{
|
|
Packit |
aea12f |
int bit;
|
|
Packit |
aea12f |
/* Take log2 using a variant of Robert Harley's method.
|
|
Packit |
aea12f |
Found by Bruno Haible 1996. */
|
|
Packit |
aea12f |
uint32_t n = bitmask;
|
|
Packit |
aea12f |
static const char ord2_tab[64] =
|
|
Packit |
aea12f |
{
|
|
Packit |
aea12f |
-1, 0, 1, 12, 2, 6, -1, 13, 3, -1, 7, -1, -1, -1, -1, 14,
|
|
Packit |
aea12f |
10, 4, -1, -1, 8, -1, -1, 25, -1, -1, -1, -1, -1, 21, 27, 15,
|
|
Packit |
aea12f |
31, 11, 5, -1, -1, -1, -1, -1, 9, -1, -1, 24, -1, -1, 20, 26,
|
|
Packit |
aea12f |
30, -1, -1, -1, -1, 23, -1, 19, 29, -1, 22, 18, 28, 17, 16, -1
|
|
Packit |
aea12f |
};
|
|
Packit |
aea12f |
n += n << 4;
|
|
Packit |
aea12f |
n += n << 6;
|
|
Packit |
aea12f |
n = (n << 16) - n;
|
|
Packit |
aea12f |
bit = ord2_tab[n >> 26];
|
|
Packit |
aea12f |
|
|
Packit |
aea12f |
if (bit < sizeof (u_category_long_name) / sizeof (u_category_long_name[0]))
|
|
Packit |
aea12f |
return u_category_long_name[bit];
|
|
Packit |
aea12f |
}
|
|
Packit |
aea12f |
else
|
|
Packit |
aea12f |
{
|
|
Packit |
aea12f |
if (bitmask == UC_CATEGORY_MASK_L)
|
|
Packit |
aea12f |
return "Letter";
|
|
Packit |
aea12f |
if (bitmask == UC_CATEGORY_MASK_LC)
|
|
Packit |
aea12f |
return "Cased Letter";
|
|
Packit |
aea12f |
if (bitmask == UC_CATEGORY_MASK_M)
|
|
Packit |
aea12f |
return "Mark";
|
|
Packit |
aea12f |
if (bitmask == UC_CATEGORY_MASK_N)
|
|
Packit |
aea12f |
return "Number";
|
|
Packit |
aea12f |
if (bitmask == UC_CATEGORY_MASK_P)
|
|
Packit |
aea12f |
return "Punctuation";
|
|
Packit |
aea12f |
if (bitmask == UC_CATEGORY_MASK_S)
|
|
Packit |
aea12f |
return "Symbol";
|
|
Packit |
aea12f |
if (bitmask == UC_CATEGORY_MASK_Z)
|
|
Packit |
aea12f |
return "Separator";
|
|
Packit |
aea12f |
if (bitmask == UC_CATEGORY_MASK_C)
|
|
Packit |
aea12f |
return "Other";
|
|
Packit |
aea12f |
}
|
|
Packit |
aea12f |
}
|
|
Packit |
aea12f |
return NULL;
|
|
Packit |
aea12f |
}
|