|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* unicode.c
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Helper unicode functions/values to determine the
|
|
Packit |
89ede9 |
* types of unicode characters.
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
#include "utf8.h"
|
|
Packit |
89ede9 |
#include "unicode.h"
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
typedef struct {
|
|
Packit |
89ede9 |
unsigned int value;
|
|
Packit |
89ede9 |
unsigned short info;
|
|
Packit |
89ede9 |
} unicodeinfo_t;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static unicodeinfo_t unicodeinfo[] = {
|
|
Packit |
89ede9 |
{ 48, UNICODE_NUMBER }, /* 0 */
|
|
Packit |
89ede9 |
{ 49, UNICODE_NUMBER }, /* 1 */
|
|
Packit |
89ede9 |
{ 50, UNICODE_NUMBER }, /* 2 */
|
|
Packit |
89ede9 |
{ 51, UNICODE_NUMBER }, /* 3 */
|
|
Packit |
89ede9 |
{ 52, UNICODE_NUMBER }, /* 4 */
|
|
Packit |
89ede9 |
{ 53, UNICODE_NUMBER }, /* 5 */
|
|
Packit |
89ede9 |
{ 54, UNICODE_NUMBER }, /* 6 */
|
|
Packit |
89ede9 |
{ 55, UNICODE_NUMBER }, /* 7 */
|
|
Packit |
89ede9 |
{ 56, UNICODE_NUMBER }, /* 8 */
|
|
Packit |
89ede9 |
{ 57, UNICODE_NUMBER }, /* 9 */
|
|
Packit |
89ede9 |
{ 65, UNICODE_UPPER }, /* Latin Capital A */
|
|
Packit |
89ede9 |
{ 66, UNICODE_UPPER }, /* Latin Capital B */
|
|
Packit |
89ede9 |
{ 67, UNICODE_UPPER }, /* Latin Capital C */
|
|
Packit |
89ede9 |
{ 68, UNICODE_UPPER }, /* Latin Capital D */
|
|
Packit |
89ede9 |
{ 69, UNICODE_UPPER }, /* Latin Capital E */
|
|
Packit |
89ede9 |
{ 70, UNICODE_UPPER }, /* Latin Capital F */
|
|
Packit |
89ede9 |
{ 71, UNICODE_UPPER }, /* Latin Capital G */
|
|
Packit |
89ede9 |
{ 72, UNICODE_UPPER }, /* Latin Capital H */
|
|
Packit |
89ede9 |
{ 73, UNICODE_UPPER }, /* Latin Capital I */
|
|
Packit |
89ede9 |
{ 74, UNICODE_UPPER }, /* Latin Capital J */
|
|
Packit |
89ede9 |
{ 75, UNICODE_UPPER }, /* Latin Capital K */
|
|
Packit |
89ede9 |
{ 76, UNICODE_UPPER }, /* Latin Capital L */
|
|
Packit |
89ede9 |
{ 77, UNICODE_UPPER }, /* Latin Capital M */
|
|
Packit |
89ede9 |
{ 78, UNICODE_UPPER }, /* Latin Capital N */
|
|
Packit |
89ede9 |
{ 79, UNICODE_UPPER }, /* Latin Capital O */
|
|
Packit |
89ede9 |
{ 80, UNICODE_UPPER }, /* Latin Capital P */
|
|
Packit |
89ede9 |
{ 81, UNICODE_UPPER }, /* Latin Capital Q */
|
|
Packit |
89ede9 |
{ 82, UNICODE_UPPER }, /* Latin Capital R */
|
|
Packit |
89ede9 |
{ 83, UNICODE_UPPER }, /* Latin Capital S */
|
|
Packit |
89ede9 |
{ 84, UNICODE_UPPER }, /* Latin Capital T */
|
|
Packit |
89ede9 |
{ 85, UNICODE_UPPER }, /* Latin Capital U */
|
|
Packit |
89ede9 |
{ 86, UNICODE_UPPER }, /* Latin Capital V */
|
|
Packit |
89ede9 |
{ 87, UNICODE_UPPER }, /* Latin Capital W */
|
|
Packit |
89ede9 |
{ 88, UNICODE_UPPER }, /* Latin Capital X */
|
|
Packit |
89ede9 |
{ 89, UNICODE_UPPER }, /* Latin Capital Y */
|
|
Packit |
89ede9 |
{ 90, UNICODE_UPPER }, /* Latin Capital Z */
|
|
Packit |
89ede9 |
{ 97, UNICODE_LOWER }, /* Latin Small a */
|
|
Packit |
89ede9 |
{ 98, UNICODE_LOWER }, /* Latin Small b */
|
|
Packit |
89ede9 |
{ 99, UNICODE_LOWER }, /* Latin Small c */
|
|
Packit |
89ede9 |
{ 100, UNICODE_LOWER }, /* Latin Small d */
|
|
Packit |
89ede9 |
{ 101, UNICODE_LOWER }, /* Latin Small e */
|
|
Packit |
89ede9 |
{ 102, UNICODE_LOWER }, /* Latin Small f */
|
|
Packit |
89ede9 |
{ 103, UNICODE_LOWER }, /* Latin Small g */
|
|
Packit |
89ede9 |
{ 104, UNICODE_LOWER }, /* Latin Small h */
|
|
Packit |
89ede9 |
{ 105, UNICODE_LOWER }, /* Latin Small i */
|
|
Packit |
89ede9 |
{ 106, UNICODE_LOWER }, /* Latin Small j */
|
|
Packit |
89ede9 |
{ 107, UNICODE_LOWER }, /* Latin Small k */
|
|
Packit |
89ede9 |
{ 108, UNICODE_LOWER }, /* Latin Small l */
|
|
Packit |
89ede9 |
{ 109, UNICODE_LOWER }, /* Latin Small m */
|
|
Packit |
89ede9 |
{ 110, UNICODE_LOWER }, /* Latin Small n */
|
|
Packit |
89ede9 |
{ 111, UNICODE_LOWER }, /* Latin Small o */
|
|
Packit |
89ede9 |
{ 112, UNICODE_LOWER }, /* Latin Small p */
|
|
Packit |
89ede9 |
{ 113, UNICODE_LOWER }, /* Latin Small q */
|
|
Packit |
89ede9 |
{ 114, UNICODE_LOWER }, /* Latin Small r */
|
|
Packit |
89ede9 |
{ 115, UNICODE_LOWER }, /* Latin Small s */
|
|
Packit |
89ede9 |
{ 116, UNICODE_LOWER }, /* Latin Small t */
|
|
Packit |
89ede9 |
{ 117, UNICODE_LOWER }, /* Latin Small u */
|
|
Packit |
89ede9 |
{ 118, UNICODE_LOWER }, /* Latin Small v */
|
|
Packit |
89ede9 |
{ 119, UNICODE_LOWER }, /* Latin Small w */
|
|
Packit |
89ede9 |
{ 120, UNICODE_LOWER }, /* Latin Small x */
|
|
Packit |
89ede9 |
{ 121, UNICODE_LOWER }, /* Latin Small y */
|
|
Packit |
89ede9 |
{ 122, UNICODE_LOWER }, /* Latin Small z */
|
|
Packit |
89ede9 |
{ 192, UNICODE_UPPER }, /* Latin Capital A with grave */
|
|
Packit |
89ede9 |
{ 193, UNICODE_UPPER }, /* Latin Capital A with acute */
|
|
Packit |
89ede9 |
{ 194, UNICODE_UPPER }, /* Latin Capital A with circumflex */
|
|
Packit |
89ede9 |
{ 195, UNICODE_UPPER }, /* Latin Capital A with tilde */
|
|
Packit |
89ede9 |
{ 196, UNICODE_UPPER }, /* Latin Capital A with diuresis */
|
|
Packit |
89ede9 |
{ 197, UNICODE_UPPER }, /* Latin Capital A with ring above */
|
|
Packit |
89ede9 |
{ 198, UNICODE_UPPER }, /* Latin Capital AE */
|
|
Packit |
89ede9 |
{ 199, UNICODE_UPPER }, /* Latin Capital C with cedilla */
|
|
Packit |
89ede9 |
{ 200, UNICODE_UPPER }, /* Latin Capital E with grave */
|
|
Packit |
89ede9 |
{ 201, UNICODE_UPPER }, /* Latin Capital E with acute */
|
|
Packit |
89ede9 |
{ 202, UNICODE_UPPER }, /* Latin Capital E with circumflex */
|
|
Packit |
89ede9 |
{ 203, UNICODE_UPPER }, /* Latin Capital E with diuresis */
|
|
Packit |
89ede9 |
{ 204, UNICODE_UPPER }, /* Latin Capital I with grave */
|
|
Packit |
89ede9 |
{ 205, UNICODE_UPPER }, /* Latin Capital I with acute */
|
|
Packit |
89ede9 |
{ 206, UNICODE_UPPER }, /* Latin Capital I with circumflex */
|
|
Packit |
89ede9 |
{ 207, UNICODE_UPPER }, /* Latin Capital I with diuresis */
|
|
Packit |
89ede9 |
{ 208, UNICODE_UPPER }, /* Latin Capital ETH */
|
|
Packit |
89ede9 |
{ 209, UNICODE_UPPER }, /* Latin Capital N with tilde */
|
|
Packit |
89ede9 |
{ 210, UNICODE_UPPER }, /* Latin Capital O with grave */
|
|
Packit |
89ede9 |
{ 211, UNICODE_UPPER }, /* Latin Capital O with acute */
|
|
Packit |
89ede9 |
{ 212, UNICODE_UPPER }, /* Latin Capital O with circumflex */
|
|
Packit |
89ede9 |
{ 213, UNICODE_UPPER }, /* Latin Capital O with tilde */
|
|
Packit |
89ede9 |
{ 214, UNICODE_UPPER }, /* Latin Captial O with diaeresis */
|
|
Packit |
89ede9 |
{ 216, UNICODE_UPPER }, /* Latin Capital O with stroke */
|
|
Packit |
89ede9 |
{ 217, UNICODE_UPPER }, /* Latin Capital U with grave */
|
|
Packit |
89ede9 |
{ 218, UNICODE_UPPER }, /* Latin Capital U with acute */
|
|
Packit |
89ede9 |
{ 219, UNICODE_UPPER }, /* Latin Capital U with circumflex */
|
|
Packit |
89ede9 |
{ 220, UNICODE_UPPER }, /* Latin Capital U with diaeresis */
|
|
Packit |
89ede9 |
{ 221, UNICODE_UPPER }, /* Latin Capital Y with acute */
|
|
Packit |
89ede9 |
{ 222, UNICODE_UPPER }, /* Latin Capital THORN */
|
|
Packit |
89ede9 |
{ 223, UNICODE_LOWER }, /* German sz ligature */
|
|
Packit |
89ede9 |
{ 224, UNICODE_LOWER }, /* Latin Small a with grave */
|
|
Packit |
89ede9 |
{ 225, UNICODE_LOWER }, /* Latin Small a with acute */
|
|
Packit |
89ede9 |
{ 226, UNICODE_LOWER }, /* Latin Small a with circumflex */
|
|
Packit |
89ede9 |
{ 227, UNICODE_LOWER }, /* Latin Small a with tilde */
|
|
Packit |
89ede9 |
{ 228, UNICODE_LOWER }, /* Latin Small a with diuresis */
|
|
Packit |
89ede9 |
{ 229, UNICODE_LOWER }, /* Latin Small a with ring above */
|
|
Packit |
89ede9 |
{ 230, UNICODE_LOWER }, /* Latin Small ae */
|
|
Packit |
89ede9 |
{ 231, UNICODE_LOWER }, /* Latin Small c with cedilla */
|
|
Packit |
89ede9 |
{ 232, UNICODE_LOWER }, /* Latin Small e with grave */
|
|
Packit |
89ede9 |
{ 233, UNICODE_LOWER }, /* Latin Small e with acute */
|
|
Packit |
89ede9 |
{ 234, UNICODE_LOWER }, /* Latin Small e with circumflex */
|
|
Packit |
89ede9 |
{ 235, UNICODE_LOWER }, /* Latin Small e with diuresis */
|
|
Packit |
89ede9 |
{ 236, UNICODE_LOWER }, /* Latin Small i with grave */
|
|
Packit |
89ede9 |
{ 237, UNICODE_LOWER }, /* Latin Small i with acute */
|
|
Packit |
89ede9 |
{ 238, UNICODE_LOWER }, /* Latin Small i with circumflex */
|
|
Packit |
89ede9 |
{ 239, UNICODE_LOWER }, /* Latin Small i with diuresis */
|
|
Packit |
89ede9 |
{ 240, UNICODE_LOWER }, /* Latin Small eth */
|
|
Packit |
89ede9 |
{ 241, UNICODE_LOWER }, /* Latin Small n with tilde */
|
|
Packit |
89ede9 |
{ 242, UNICODE_LOWER }, /* Latin Small o with grave */
|
|
Packit |
89ede9 |
{ 243, UNICODE_LOWER }, /* Latin Small o with acute */
|
|
Packit |
89ede9 |
{ 244, UNICODE_LOWER }, /* Latin Small o with circumflex */
|
|
Packit |
89ede9 |
{ 245, UNICODE_LOWER }, /* Latin Small o with tilde */
|
|
Packit |
89ede9 |
{ 246, UNICODE_LOWER }, /* Latin Small o with diaeresis */
|
|
Packit |
89ede9 |
{ 248, UNICODE_LOWER }, /* Latin Small o with stroke */
|
|
Packit |
89ede9 |
{ 249, UNICODE_LOWER }, /* Latin Small u with grave */
|
|
Packit |
89ede9 |
{ 250, UNICODE_LOWER }, /* Latin Small u with acute */
|
|
Packit |
89ede9 |
{ 251, UNICODE_LOWER }, /* Latin Small u with circumflex */
|
|
Packit |
89ede9 |
{ 252, UNICODE_LOWER }, /* Latin Small u with diaeresis */
|
|
Packit |
89ede9 |
{ 253, UNICODE_LOWER }, /* Latin Small y with acute */
|
|
Packit |
89ede9 |
{ 254, UNICODE_LOWER }, /* Latin Small thorn */
|
|
Packit |
89ede9 |
{ 255, UNICODE_LOWER }, /* Latin Small y with diaeresis */
|
|
Packit |
89ede9 |
{ 256, UNICODE_UPPER }, /* Latin Capital A with macron */
|
|
Packit |
89ede9 |
{ 257, UNICODE_LOWER }, /* Latin Small a with macron */
|
|
Packit |
89ede9 |
{ 258, UNICODE_UPPER }, /* Latin Capital A with breve */
|
|
Packit |
89ede9 |
{ 259, UNICODE_LOWER }, /* Latin Small a with breve */
|
|
Packit |
89ede9 |
{ 260, UNICODE_UPPER }, /* Latin Capital A with ogonek */
|
|
Packit |
89ede9 |
{ 261, UNICODE_LOWER }, /* Latin Small a with ogonek */
|
|
Packit |
89ede9 |
{ 262, UNICODE_UPPER }, /* Latin Capital C with acute */
|
|
Packit |
89ede9 |
{ 263, UNICODE_LOWER }, /* Latin Small c with acute */
|
|
Packit |
89ede9 |
{ 264, UNICODE_UPPER }, /* Latin Capital C with circumflex */
|
|
Packit |
89ede9 |
{ 265, UNICODE_LOWER }, /* Latin Small c with circumflex */
|
|
Packit |
89ede9 |
{ 266, UNICODE_UPPER }, /* Latin Capital C with dot above */
|
|
Packit |
89ede9 |
{ 267, UNICODE_LOWER }, /* Latin Small c with dot above */
|
|
Packit |
89ede9 |
{ 268, UNICODE_UPPER }, /* Latin Capital C with caron (hacek) */
|
|
Packit |
89ede9 |
{ 269, UNICODE_LOWER }, /* Latin Small c with caron (hacek) */
|
|
Packit |
89ede9 |
{ 270, UNICODE_UPPER }, /* Latin Capital D with caron (hacek) */
|
|
Packit |
89ede9 |
{ 271, UNICODE_LOWER }, /* Latin Small d with caron (hacek) */
|
|
Packit |
89ede9 |
{ 272, UNICODE_UPPER }, /* Latin Capital D with stroke */
|
|
Packit |
89ede9 |
{ 273, UNICODE_LOWER }, /* Latin Small d with stroke */
|
|
Packit |
89ede9 |
{ 274, UNICODE_UPPER }, /* Latin Capital E with macron */
|
|
Packit |
89ede9 |
{ 275, UNICODE_LOWER }, /* Latin Small e with macron */
|
|
Packit |
89ede9 |
{ 276, UNICODE_UPPER }, /* Latin Capital E with breve */
|
|
Packit |
89ede9 |
{ 277, UNICODE_LOWER }, /* Latin Small e with breve */
|
|
Packit |
89ede9 |
{ 278, UNICODE_UPPER }, /* Latin Capital E with dot above */
|
|
Packit |
89ede9 |
{ 279, UNICODE_LOWER }, /* Latin Small e with dot above */
|
|
Packit |
89ede9 |
{ 280, UNICODE_UPPER }, /* Latin Capital E with ogonek */
|
|
Packit |
89ede9 |
{ 281, UNICODE_LOWER }, /* Latin Small e with ogonek */
|
|
Packit |
89ede9 |
{ 282, UNICODE_UPPER }, /* Latin Capital E with caron (hacek) */
|
|
Packit |
89ede9 |
{ 283, UNICODE_LOWER }, /* Latin Small e with caron */
|
|
Packit |
89ede9 |
{ 284, UNICODE_UPPER }, /* Latin Capital G with circumflex */
|
|
Packit |
89ede9 |
{ 285, UNICODE_LOWER }, /* Latin Small g with circumflex */
|
|
Packit |
89ede9 |
{ 286, UNICODE_UPPER }, /* Latin Capital G with breve */
|
|
Packit |
89ede9 |
{ 287, UNICODE_LOWER }, /* Latin Small g with breve */
|
|
Packit |
89ede9 |
{ 288, UNICODE_UPPER }, /* Latin Capital G with dot above */
|
|
Packit |
89ede9 |
{ 289, UNICODE_LOWER }, /* Latin Small g with dot above */
|
|
Packit |
89ede9 |
{ 290, UNICODE_UPPER }, /* Latin Capital G with cedilla */
|
|
Packit |
89ede9 |
{ 291, UNICODE_LOWER }, /* Latin Small g with cedilla */
|
|
Packit |
89ede9 |
{ 292, UNICODE_UPPER }, /* Latin Capital H with circumflex */
|
|
Packit |
89ede9 |
{ 293, UNICODE_LOWER }, /* Latin Small h with circumflex */
|
|
Packit |
89ede9 |
{ 294, UNICODE_UPPER }, /* Latin Capital H with stroke */
|
|
Packit |
89ede9 |
{ 295, UNICODE_LOWER }, /* Latin Small h with stroke */
|
|
Packit |
89ede9 |
{ 296, UNICODE_UPPER }, /* Latin Capital I with tilde */
|
|
Packit |
89ede9 |
{ 297, UNICODE_LOWER }, /* Latin Small i with tilde */
|
|
Packit |
89ede9 |
{ 298, UNICODE_UPPER }, /* Latin Capital I with macron */
|
|
Packit |
89ede9 |
{ 299, UNICODE_LOWER }, /* Latin Small i with macron */
|
|
Packit |
89ede9 |
{ 300, UNICODE_UPPER }, /* Latin Capital I with breve */
|
|
Packit |
89ede9 |
{ 301, UNICODE_LOWER }, /* Latin Small i with breve */
|
|
Packit |
89ede9 |
{ 302, UNICODE_UPPER }, /* Latin Capital I with ogonek */
|
|
Packit |
89ede9 |
{ 303, UNICODE_LOWER }, /* Latin Small i with ogonek */
|
|
Packit |
89ede9 |
{ 304, UNICODE_UPPER }, /* Latin Capital I with dot above */
|
|
Packit |
89ede9 |
{ 305, UNICODE_LOWER }, /* Latin Small i without dot above */
|
|
Packit |
89ede9 |
{ 306, UNICODE_UPPER }, /* Latin Capital IJ */
|
|
Packit |
89ede9 |
{ 307, UNICODE_LOWER }, /* Latin Small IJ */
|
|
Packit |
89ede9 |
{ 308, UNICODE_UPPER }, /* Latin Capital J with circumflex */
|
|
Packit |
89ede9 |
{ 309, UNICODE_LOWER }, /* Latin Small j with circumflex */
|
|
Packit |
89ede9 |
{ 310, UNICODE_UPPER }, /* Latin Capital K with cedilla */
|
|
Packit |
89ede9 |
{ 311, UNICODE_LOWER }, /* Latin Small j with cedilla */
|
|
Packit |
89ede9 |
{ 312, UNICODE_LOWER }, /* Latin Small kra */
|
|
Packit |
89ede9 |
{ 313, UNICODE_UPPER }, /* Latin Capital L with acute */
|
|
Packit |
89ede9 |
{ 314, UNICODE_LOWER }, /* Latin Small l with acute */
|
|
Packit |
89ede9 |
{ 315, UNICODE_UPPER }, /* Latin Capital L with cedilla */
|
|
Packit |
89ede9 |
{ 316, UNICODE_LOWER }, /* Latin Small l with cedilla */
|
|
Packit |
89ede9 |
{ 317, UNICODE_UPPER }, /* Latin Capital L with caron */
|
|
Packit |
89ede9 |
{ 318, UNICODE_LOWER }, /* Latin Small l with caron */
|
|
Packit |
89ede9 |
{ 319, UNICODE_UPPER }, /* Latin Capital L with middle dot */
|
|
Packit |
89ede9 |
{ 320, UNICODE_LOWER }, /* Latin Small l with middle dot */
|
|
Packit |
89ede9 |
{ 321, UNICODE_UPPER }, /* Latin Capital L with stroke */
|
|
Packit |
89ede9 |
{ 322, UNICODE_LOWER }, /* Latin Small l with stroke */
|
|
Packit |
89ede9 |
{ 323, UNICODE_UPPER }, /* Latin Capital N with acute */
|
|
Packit |
89ede9 |
{ 324, UNICODE_LOWER }, /* Latin Small n with acute */
|
|
Packit |
89ede9 |
{ 325, UNICODE_UPPER }, /* Latin Capital N with cedilla */
|
|
Packit |
89ede9 |
{ 326, UNICODE_LOWER }, /* Latin Small n with cedilla */
|
|
Packit |
89ede9 |
{ 327, UNICODE_UPPER }, /* Latin Capital N with caron */
|
|
Packit |
89ede9 |
{ 328, UNICODE_LOWER }, /* Latin Small n with caron */
|
|
Packit |
89ede9 |
{ 329, UNICODE_LOWER }, /* Latin Small n preceeded by apostrophe */
|
|
Packit |
89ede9 |
{ 330, UNICODE_UPPER }, /* Latin Capital Eng */
|
|
Packit |
89ede9 |
{ 331, UNICODE_LOWER }, /* Latin Small eng */
|
|
Packit |
89ede9 |
{ 332, UNICODE_UPPER }, /* Latin Capital O with macron */
|
|
Packit |
89ede9 |
{ 333, UNICODE_LOWER }, /* Latin Small o with macron */
|
|
Packit |
89ede9 |
{ 334, UNICODE_UPPER }, /* Latin Capital O with breve */
|
|
Packit |
89ede9 |
{ 335, UNICODE_LOWER }, /* Latin Small o with breve */
|
|
Packit |
89ede9 |
{ 336, UNICODE_UPPER }, /* Latin Capital O with double acute */
|
|
Packit |
89ede9 |
{ 337, UNICODE_LOWER }, /* Latin Small o with double acute */
|
|
Packit |
89ede9 |
{ 338, UNICODE_UPPER }, /* Latin Capital OE */
|
|
Packit |
89ede9 |
{ 339, UNICODE_LOWER }, /* Latin Small oe */
|
|
Packit |
89ede9 |
{ 340, UNICODE_UPPER }, /* Latin Capital R with acute */
|
|
Packit |
89ede9 |
{ 341, UNICODE_LOWER }, /* Latin Small r with acute */
|
|
Packit |
89ede9 |
{ 342, UNICODE_UPPER }, /* Latin Capital R with cedilla */
|
|
Packit |
89ede9 |
{ 343, UNICODE_LOWER }, /* Latin Small r with cedilla */
|
|
Packit |
89ede9 |
{ 344, UNICODE_UPPER }, /* Latin Capital R with caron */
|
|
Packit |
89ede9 |
{ 345, UNICODE_LOWER }, /* Latin Small r with caron */
|
|
Packit |
89ede9 |
{ 346, UNICODE_UPPER }, /* Latin Capital S with acute */
|
|
Packit |
89ede9 |
{ 347, UNICODE_LOWER }, /* Latin Small s with acute */
|
|
Packit |
89ede9 |
{ 348, UNICODE_UPPER }, /* Latin Capital S with circumflex */
|
|
Packit |
89ede9 |
{ 349, UNICODE_LOWER }, /* Latin Small s with circumflex */
|
|
Packit |
89ede9 |
{ 350, UNICODE_UPPER }, /* Latin Capital S with cedilla */
|
|
Packit |
89ede9 |
{ 351, UNICODE_LOWER }, /* Latin Small s with cedilla */
|
|
Packit |
89ede9 |
{ 352, UNICODE_UPPER }, /* Latin Capital S with caron */
|
|
Packit |
89ede9 |
{ 353, UNICODE_LOWER }, /* Latin Small s with caron */
|
|
Packit |
89ede9 |
{ 354, UNICODE_UPPER }, /* Latin Capital T with cedilla */
|
|
Packit |
89ede9 |
{ 355, UNICODE_LOWER }, /* Latin Small t with cedilla */
|
|
Packit |
89ede9 |
{ 356, UNICODE_UPPER }, /* Latin Capital T with caron */
|
|
Packit |
89ede9 |
{ 357, UNICODE_LOWER }, /* Latin Small t with caron */
|
|
Packit |
89ede9 |
{ 358, UNICODE_UPPER }, /* Latin Capital T with stroke */
|
|
Packit |
89ede9 |
{ 359, UNICODE_LOWER }, /* Latin Small t with stroke */
|
|
Packit |
89ede9 |
{ 360, UNICODE_UPPER }, /* Latin Capital U with tilde */
|
|
Packit |
89ede9 |
{ 361, UNICODE_LOWER }, /* Latin Small u with tilde */
|
|
Packit |
89ede9 |
{ 362, UNICODE_UPPER }, /* Latin Capital U with macron */
|
|
Packit |
89ede9 |
{ 363, UNICODE_LOWER }, /* Latin Small u with macron */
|
|
Packit |
89ede9 |
{ 364, UNICODE_UPPER }, /* Latin Capital U with breve */
|
|
Packit |
89ede9 |
{ 365, UNICODE_LOWER }, /* Latin Small u with breve */
|
|
Packit |
89ede9 |
{ 366, UNICODE_UPPER }, /* Latin Capital U with ring above */
|
|
Packit |
89ede9 |
{ 367, UNICODE_LOWER }, /* Latin Small u with ring above */
|
|
Packit |
89ede9 |
{ 368, UNICODE_UPPER }, /* Latin Capital U with double acute */
|
|
Packit |
89ede9 |
{ 369, UNICODE_LOWER }, /* Latin Small u with double acute */
|
|
Packit |
89ede9 |
{ 370, UNICODE_UPPER }, /* Latin Capital U with ogonek */
|
|
Packit |
89ede9 |
{ 371, UNICODE_LOWER }, /* Latin Small u with ogonek */
|
|
Packit |
89ede9 |
{ 372, UNICODE_UPPER }, /* Latin Capital W with circumflex */
|
|
Packit |
89ede9 |
{ 373, UNICODE_LOWER }, /* Latin Small w with circumflex */
|
|
Packit |
89ede9 |
{ 374, UNICODE_UPPER }, /* Latin Capital Y with circumflex */
|
|
Packit |
89ede9 |
{ 375, UNICODE_LOWER }, /* Latin Small y with circumflex */
|
|
Packit |
89ede9 |
{ 376, UNICODE_UPPER }, /* Latin Capital Y with diaeresis */
|
|
Packit |
89ede9 |
{ 377, UNICODE_UPPER }, /* Latin Capital Z with acute */
|
|
Packit |
89ede9 |
{ 378, UNICODE_LOWER }, /* Latin Small z with acute */
|
|
Packit |
89ede9 |
{ 379, UNICODE_UPPER }, /* Latin Capital Z with dot above */
|
|
Packit |
89ede9 |
{ 380, UNICODE_LOWER }, /* Latin Small z with dot above */
|
|
Packit |
89ede9 |
{ 381, UNICODE_UPPER }, /* Latin Capital Z with caron */
|
|
Packit |
89ede9 |
{ 382, UNICODE_LOWER }, /* Latin Small z with caron */
|
|
Packit |
89ede9 |
{ 383, UNICODE_LOWER }, /* Latin Small long S */
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
{ 461, UNICODE_UPPER }, /* Latin Capital A with caron (hacek) */
|
|
Packit |
89ede9 |
{ 462, UNICODE_LOWER }, /* Latin Small a with caron (hacek) */
|
|
Packit |
89ede9 |
{ 463, UNICODE_UPPER }, /* Latin Capital I with caron (hacek) */
|
|
Packit |
89ede9 |
{ 464, UNICODE_LOWER }, /* Latin Small i with caron (hacek) */
|
|
Packit |
89ede9 |
{ 465, UNICODE_UPPER }, /* Latin Capital O with caron (hacek) */
|
|
Packit |
89ede9 |
{ 466, UNICODE_LOWER }, /* Latin Small o with caron (hacek) */
|
|
Packit |
89ede9 |
{ 467, UNICODE_UPPER }, /* Latin Capital U with caron (hacek) */
|
|
Packit |
89ede9 |
{ 468, UNICODE_LOWER }, /* Latin Small u with caron (hacek) */
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
{ 486, UNICODE_UPPER }, /* Latin Capital G with caron */
|
|
Packit |
89ede9 |
{ 487, UNICODE_LOWER }, /* Latin Small g with caron */
|
|
Packit |
89ede9 |
{ 488, UNICODE_UPPER }, /* Latin Capital J with caron */
|
|
Packit |
89ede9 |
{ 489, UNICODE_LOWER }, /* Latin Small j with caron */
|
|
Packit |
89ede9 |
{ 490, UNICODE_UPPER }, /* Latin Capital O with caron */
|
|
Packit |
89ede9 |
{ 491, UNICODE_LOWER }, /* Latin Small o with caron */
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
{ 500, UNICODE_UPPER }, /* Latin Capital G with acute */
|
|
Packit |
89ede9 |
{ 501, UNICODE_LOWER }, /* Latin Small g with caron */
|
|
Packit |
89ede9 |
};
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int nunicodeinfo = sizeof( unicodeinfo ) / sizeof( unicodeinfo[0] );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
unicode_find( unsigned int unicode_character )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int min = 0, max = nunicodeinfo, mid;
|
|
Packit |
89ede9 |
while ( min < max ) {
|
|
Packit |
89ede9 |
mid = ( min + max ) / 2;
|
|
Packit |
89ede9 |
if ( unicodeinfo[mid].value < unicode_character )
|
|
Packit |
89ede9 |
min = mid + 1;
|
|
Packit |
89ede9 |
else
|
|
Packit |
89ede9 |
max = mid;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( ( max==min ) && ( unicodeinfo[min].value == unicode_character ) )
|
|
Packit |
89ede9 |
return min;
|
|
Packit |
89ede9 |
else
|
|
Packit |
89ede9 |
return -1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
unsigned short
|
|
Packit |
89ede9 |
unicode_utf8_classify( char *p )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned int unicode_character, pos = 0;
|
|
Packit |
89ede9 |
int n;
|
|
Packit |
89ede9 |
unicode_character = utf8_decode( p, &pos );
|
|
Packit |
89ede9 |
n = unicode_find( unicode_character );
|
|
Packit |
89ede9 |
if ( n==-1 ) return UNICODE_SYMBOL;
|
|
Packit |
89ede9 |
else return unicodeinfo[n].info;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
unsigned short
|
|
Packit |
89ede9 |
unicode_utf8_classify_str( str *s )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned int unicode_character, pos = 0;
|
|
Packit |
89ede9 |
unsigned short value = 0;
|
|
Packit |
89ede9 |
int n;
|
|
Packit |
89ede9 |
while ( pos < s->len ) {
|
|
Packit |
89ede9 |
unicode_character = utf8_decode( str_cstr( s ), &pos );
|
|
Packit |
89ede9 |
n = unicode_find( unicode_character );
|
|
Packit |
89ede9 |
if ( n==-1 ) value |= UNICODE_SYMBOL;
|
|
Packit |
89ede9 |
else value |= unicodeinfo[n].info;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return value;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|