|
Packit |
b89d10 |
/**********************************************************************
|
|
Packit |
b89d10 |
unicode.c - Oniguruma (regular expression library)
|
|
Packit |
b89d10 |
**********************************************************************/
|
|
Packit |
b89d10 |
/*-
|
|
Packit |
b89d10 |
* Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
|
Packit |
b89d10 |
* All rights reserved.
|
|
Packit |
b89d10 |
*
|
|
Packit |
b89d10 |
* Redistribution and use in source and binary forms, with or without
|
|
Packit |
b89d10 |
* modification, are permitted provided that the following conditions
|
|
Packit |
b89d10 |
* are met:
|
|
Packit |
b89d10 |
* 1. Redistributions of source code must retain the above copyright
|
|
Packit |
b89d10 |
* notice, this list of conditions and the following disclaimer.
|
|
Packit |
b89d10 |
* 2. Redistributions in binary form must reproduce the above copyright
|
|
Packit |
b89d10 |
* notice, this list of conditions and the following disclaimer in the
|
|
Packit |
b89d10 |
* documentation and/or other materials provided with the distribution.
|
|
Packit |
b89d10 |
*
|
|
Packit |
b89d10 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
Packit |
b89d10 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
Packit |
b89d10 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
Packit |
b89d10 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
Packit |
b89d10 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
Packit |
b89d10 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
Packit |
b89d10 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
Packit |
b89d10 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
Packit |
b89d10 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
Packit |
b89d10 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
Packit |
b89d10 |
* SUCH DAMAGE.
|
|
Packit |
b89d10 |
*/
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#include "regint.h"
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
struct PoolPropertyNameCtype {
|
|
Packit |
b89d10 |
short int name;
|
|
Packit |
b89d10 |
short int ctype;
|
|
Packit |
b89d10 |
};
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
|
|
Packit |
b89d10 |
((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
|
|
Packit |
b89d10 |
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
|
Packit |
b89d10 |
0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
|
|
Packit |
b89d10 |
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
|
Packit |
b89d10 |
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
|
Packit |
b89d10 |
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
|
Packit |
b89d10 |
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
|
Packit |
b89d10 |
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
|
Packit |
b89d10 |
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
|
Packit |
b89d10 |
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
|
Packit |
b89d10 |
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
|
Packit |
b89d10 |
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
|
Packit |
b89d10 |
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
|
Packit |
b89d10 |
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
|
Packit |
b89d10 |
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
|
Packit |
b89d10 |
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
|
Packit |
b89d10 |
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
|
Packit |
b89d10 |
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
|
|
Packit |
b89d10 |
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
|
Packit |
b89d10 |
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
|
Packit |
b89d10 |
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
|
Packit |
b89d10 |
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
|
Packit |
b89d10 |
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
|
|
Packit |
b89d10 |
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
|
|
Packit |
b89d10 |
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
|
|
Packit |
b89d10 |
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
|
Packit |
b89d10 |
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
|
Packit |
b89d10 |
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
|
Packit |
b89d10 |
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
|
Packit |
b89d10 |
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
|
Packit |
b89d10 |
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
|
Packit |
b89d10 |
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
|
Packit |
b89d10 |
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
|
Packit |
b89d10 |
};
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#include "st.h"
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#include "unicode_fold_data.c"
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onigenc_unicode_mbc_case_fold(OnigEncoding enc,
|
|
Packit |
b89d10 |
OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
|
|
Packit |
b89d10 |
UChar* fold)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
const struct ByUnfoldKey* buk;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
OnigCodePoint code;
|
|
Packit |
b89d10 |
int i, len, rlen;
|
|
Packit |
b89d10 |
const UChar *p = *pp;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
|
Packit |
b89d10 |
len = enclen(enc, p);
|
|
Packit |
b89d10 |
*pp += len;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
|
Packit |
b89d10 |
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
|
Packit |
b89d10 |
if (code == 0x0130) {
|
|
Packit |
b89d10 |
return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
#if 0
|
|
Packit |
b89d10 |
if (code == 0x0049) {
|
|
Packit |
b89d10 |
return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
buk = unicode_unfold_key(code);
|
|
Packit |
b89d10 |
if (buk != 0) {
|
|
Packit |
b89d10 |
if (buk->fold_len == 1) {
|
|
Packit |
b89d10 |
return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else {
|
|
Packit |
b89d10 |
OnigCodePoint* addr;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
FOLDS_FOLD_ADDR_BUK(buk, addr);
|
|
Packit |
b89d10 |
rlen = 0;
|
|
Packit |
b89d10 |
for (i = 0; i < buk->fold_len; i++) {
|
|
Packit |
b89d10 |
OnigCodePoint c = addr[i];
|
|
Packit |
b89d10 |
len = ONIGENC_CODE_TO_MBC(enc, c, fold);
|
|
Packit |
b89d10 |
fold += len;
|
|
Packit |
b89d10 |
rlen += len;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
return rlen;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (i = 0; i < len; i++) {
|
|
Packit |
b89d10 |
*fold++ = *p++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
return len;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
static int
|
|
Packit |
b89d10 |
apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
int i, j, k, n, r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (i = from; i < to; ) {
|
|
Packit |
b89d10 |
OnigCodePoint fold = *FOLDS1_FOLD(i);
|
|
Packit |
b89d10 |
n = FOLDS1_UNFOLDS_NUM(i);
|
|
Packit |
b89d10 |
for (j = 0; j < n; j++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold = FOLDS1_UNFOLDS(i)[j];
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r = (*f)(fold, &unfold, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
r = (*f)(unfold, &fold, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (k = 0; k < j; k++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold2 = FOLDS1_UNFOLDS(i)[k];
|
|
Packit |
b89d10 |
r = (*f)(unfold, &unfold2, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
r = (*f)(unfold2, &unfold, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
i = FOLDS1_NEXT_INDEX(i);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
static int
|
|
Packit |
b89d10 |
apply_case_fold2(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
int i, j, k, n, r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (i = from; i < to; ) {
|
|
Packit |
b89d10 |
OnigCodePoint* fold = FOLDS2_FOLD(i);
|
|
Packit |
b89d10 |
n = FOLDS2_UNFOLDS_NUM(i);
|
|
Packit |
b89d10 |
for (j = 0; j < n; j++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold = FOLDS2_UNFOLDS(i)[j];
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r = (*f)(unfold, fold, 2, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (k = 0; k < j; k++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold2 = FOLDS2_UNFOLDS(i)[k];
|
|
Packit |
b89d10 |
r = (*f)(unfold, &unfold2, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
r = (*f)(unfold2, &unfold, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
i = FOLDS2_NEXT_INDEX(i);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
static int
|
|
Packit |
b89d10 |
apply_case_fold3(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
int i, j, k, n, r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (i = from; i < to; ) {
|
|
Packit |
b89d10 |
OnigCodePoint* fold = FOLDS3_FOLD(i);
|
|
Packit |
b89d10 |
n = FOLDS3_UNFOLDS_NUM(i);
|
|
Packit |
b89d10 |
for (j = 0; j < n; j++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold = FOLDS3_UNFOLDS(i)[j];
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r = (*f)(unfold, fold, 3, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (k = 0; k < j; k++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold2 = FOLDS3_UNFOLDS(i)[k];
|
|
Packit |
b89d10 |
r = (*f)(unfold, &unfold2, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
r = (*f)(unfold2, &unfold, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
i = FOLDS3_NEXT_INDEX(i);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
|
|
Packit |
b89d10 |
OnigApplyAllCaseFoldFunc f, void* arg)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
int r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r = apply_case_fold1(0, FOLDS1_NORMAL_END_INDEX, f, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
|
Packit |
b89d10 |
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
|
Packit |
b89d10 |
code = 0x0131;
|
|
Packit |
b89d10 |
r = (*f)(0x0049, &code, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
code = 0x0049;
|
|
Packit |
b89d10 |
r = (*f)(0x0131, &code, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
code = 0x0130;
|
|
Packit |
b89d10 |
r = (*f)(0x0069, &code, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
code = 0x0069;
|
|
Packit |
b89d10 |
r = (*f)(0x0130, &code, 1, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else {
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
r = apply_case_fold1(FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r = apply_case_fold2(0, FOLDS2_NORMAL_END_INDEX, f, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
|
Packit |
b89d10 |
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
r = apply_case_fold2(FOLDS2_NORMAL_END_INDEX, FOLDS2_END_INDEX, f, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r = apply_case_fold3(0, FOLDS3_NORMAL_END_INDEX, f, arg);
|
|
Packit |
b89d10 |
if (r != 0) return r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
|
Packit |
b89d10 |
OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
|
|
Packit |
b89d10 |
OnigCaseFoldCodeItem items[])
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
int n, m, i, j, k, len;
|
|
Packit |
b89d10 |
OnigCodePoint code, codes[3];
|
|
Packit |
b89d10 |
const struct ByUnfoldKey* buk;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
n = 0;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
|
Packit |
b89d10 |
len = enclen(enc, p);
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
|
Packit |
b89d10 |
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
|
Packit |
b89d10 |
if (code == 0x0049) {
|
|
Packit |
b89d10 |
items[0].byte_len = len;
|
|
Packit |
b89d10 |
items[0].code_len = 1;
|
|
Packit |
b89d10 |
items[0].code[0] = 0x0131;
|
|
Packit |
b89d10 |
return 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else if (code == 0x0130) {
|
|
Packit |
b89d10 |
items[0].byte_len = len;
|
|
Packit |
b89d10 |
items[0].code_len = 1;
|
|
Packit |
b89d10 |
items[0].code[0] = 0x0069;
|
|
Packit |
b89d10 |
return 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else if (code == 0x0131) {
|
|
Packit |
b89d10 |
items[0].byte_len = len;
|
|
Packit |
b89d10 |
items[0].code_len = 1;
|
|
Packit |
b89d10 |
items[0].code[0] = 0x0049;
|
|
Packit |
b89d10 |
return 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else if (code == 0x0069) {
|
|
Packit |
b89d10 |
items[0].byte_len = len;
|
|
Packit |
b89d10 |
items[0].code_len = 1;
|
|
Packit |
b89d10 |
items[0].code[0] = 0x0130;
|
|
Packit |
b89d10 |
return 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
buk = unicode_unfold_key(code);
|
|
Packit |
b89d10 |
if (buk != 0) {
|
|
Packit |
b89d10 |
if (buk->fold_len == 1) {
|
|
Packit |
b89d10 |
int un;
|
|
Packit |
b89d10 |
items[0].byte_len = len;
|
|
Packit |
b89d10 |
items[0].code_len = 1;
|
|
Packit |
b89d10 |
items[0].code[0] = *FOLDS1_FOLD(buk->index);
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
un = FOLDS1_UNFOLDS_NUM(buk->index);
|
|
Packit |
b89d10 |
for (i = 0; i < un; i++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i];
|
|
Packit |
b89d10 |
if (unfold != code) {
|
|
Packit |
b89d10 |
items[n].byte_len = len;
|
|
Packit |
b89d10 |
items[n].code_len = 1;
|
|
Packit |
b89d10 |
items[n].code[0] = unfold;
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
code = items[0].code[0]; /* for multi-code to unfold search. */
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
|
Packit |
b89d10 |
OnigCodePoint cs[3][4];
|
|
Packit |
b89d10 |
int fn, ncs[3];
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (buk->fold_len == 2) {
|
|
Packit |
b89d10 |
m = FOLDS2_UNFOLDS_NUM(buk->index);
|
|
Packit |
b89d10 |
for (i = 0; i < m; i++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i];
|
|
Packit |
b89d10 |
if (unfold == code) continue;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
items[n].byte_len = len;
|
|
Packit |
b89d10 |
items[n].code_len = 1;
|
|
Packit |
b89d10 |
items[n].code[0] = unfold;
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (fn = 0; fn < 2; fn++) {
|
|
Packit |
b89d10 |
int index;
|
|
Packit |
b89d10 |
cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
|
|
Packit |
b89d10 |
index = unicode_fold1_key(&cs[fn][0]);
|
|
Packit |
b89d10 |
if (index >= 0) {
|
|
Packit |
b89d10 |
int m = FOLDS1_UNFOLDS_NUM(index);
|
|
Packit |
b89d10 |
for (i = 0; i < m; i++) {
|
|
Packit |
b89d10 |
cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
ncs[fn] = m + 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else
|
|
Packit |
b89d10 |
ncs[fn] = 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (i = 0; i < ncs[0]; i++) {
|
|
Packit |
b89d10 |
for (j = 0; j < ncs[1]; j++) {
|
|
Packit |
b89d10 |
items[n].byte_len = len;
|
|
Packit |
b89d10 |
items[n].code_len = 2;
|
|
Packit |
b89d10 |
items[n].code[0] = cs[0][i];
|
|
Packit |
b89d10 |
items[n].code[1] = cs[1][j];
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else { /* fold_len == 3 */
|
|
Packit |
b89d10 |
m = FOLDS3_UNFOLDS_NUM(buk->index);
|
|
Packit |
b89d10 |
for (i = 0; i < m; i++) {
|
|
Packit |
b89d10 |
OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i];
|
|
Packit |
b89d10 |
if (unfold == code) continue;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
items[n].byte_len = len;
|
|
Packit |
b89d10 |
items[n].code_len = 1;
|
|
Packit |
b89d10 |
items[n].code[0] = unfold;
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (fn = 0; fn < 3; fn++) {
|
|
Packit |
b89d10 |
int index;
|
|
Packit |
b89d10 |
cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
|
|
Packit |
b89d10 |
index = unicode_fold1_key(&cs[fn][0]);
|
|
Packit |
b89d10 |
if (index >= 0) {
|
|
Packit |
b89d10 |
int m = FOLDS1_UNFOLDS_NUM(index);
|
|
Packit |
b89d10 |
for (i = 0; i < m; i++) {
|
|
Packit |
b89d10 |
cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
ncs[fn] = m + 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else
|
|
Packit |
b89d10 |
ncs[fn] = 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (i = 0; i < ncs[0]; i++) {
|
|
Packit |
b89d10 |
for (j = 0; j < ncs[1]; j++) {
|
|
Packit |
b89d10 |
for (k = 0; k < ncs[2]; k++) {
|
|
Packit |
b89d10 |
items[n].byte_len = len;
|
|
Packit |
b89d10 |
items[n].code_len = 3;
|
|
Packit |
b89d10 |
items[n].code[0] = cs[0][i];
|
|
Packit |
b89d10 |
items[n].code[1] = cs[1][j];
|
|
Packit |
b89d10 |
items[n].code[2] = cs[2][k];
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* multi char folded code is not head of another folded multi char */
|
|
Packit |
b89d10 |
return n;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else {
|
|
Packit |
b89d10 |
int index = unicode_fold1_key(&code);
|
|
Packit |
b89d10 |
if (index >= 0) {
|
|
Packit |
b89d10 |
int m = FOLDS1_UNFOLDS_NUM(index);
|
|
Packit |
b89d10 |
for (i = 0; i < m; i++) {
|
|
Packit |
b89d10 |
items[n].byte_len = len;
|
|
Packit |
b89d10 |
items[n].code_len = 1;
|
|
Packit |
b89d10 |
items[n].code[0] = FOLDS1_UNFOLDS(index)[i];
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
|
|
Packit |
b89d10 |
return n;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
p += len;
|
|
Packit |
b89d10 |
if (p < end) {
|
|
Packit |
b89d10 |
int clen;
|
|
Packit |
b89d10 |
int index;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
codes[0] = code;
|
|
Packit |
b89d10 |
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
buk = unicode_unfold_key(code);
|
|
Packit |
b89d10 |
if (buk != 0 && buk->fold_len == 1) {
|
|
Packit |
b89d10 |
codes[1] = *FOLDS1_FOLD(buk->index);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else
|
|
Packit |
b89d10 |
codes[1] = code;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
clen = enclen(enc, p);
|
|
Packit |
b89d10 |
len += clen;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
index = unicode_fold2_key(codes);
|
|
Packit |
b89d10 |
if (index >= 0) {
|
|
Packit |
b89d10 |
m = FOLDS2_UNFOLDS_NUM(index);
|
|
Packit |
b89d10 |
for (i = 0; i < m; i++) {
|
|
Packit |
b89d10 |
items[n].byte_len = len;
|
|
Packit |
b89d10 |
items[n].code_len = 1;
|
|
Packit |
b89d10 |
items[n].code[0] = FOLDS2_UNFOLDS(index)[i];
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
p += clen;
|
|
Packit |
b89d10 |
if (p < end) {
|
|
Packit |
b89d10 |
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
|
Packit |
b89d10 |
buk = unicode_unfold_key(code);
|
|
Packit |
b89d10 |
if (buk != 0 && buk->fold_len == 1) {
|
|
Packit |
b89d10 |
codes[2] = *FOLDS1_FOLD(buk->index);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else
|
|
Packit |
b89d10 |
codes[2] = code;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
clen = enclen(enc, p);
|
|
Packit |
b89d10 |
len += clen;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
index = unicode_fold3_key(codes);
|
|
Packit |
b89d10 |
if (index >= 0) {
|
|
Packit |
b89d10 |
m = FOLDS3_UNFOLDS_NUM(index);
|
|
Packit |
b89d10 |
for (i = 0; i < m; i++) {
|
|
Packit |
b89d10 |
items[n].byte_len = len;
|
|
Packit |
b89d10 |
items[n].code_len = 1;
|
|
Packit |
b89d10 |
items[n].code[0] = FOLDS3_UNFOLDS(index)[i];
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return n;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
enum EGCB_BREAK_TYPE {
|
|
Packit |
b89d10 |
EGCB_NOT_BREAK = 0,
|
|
Packit |
b89d10 |
EGCB_BREAK = 1,
|
|
Packit |
b89d10 |
EGCB_BREAK_UNDEF_E_MODIFIER = 2,
|
|
Packit |
b89d10 |
EGCB_BREAK_UNDEF_RI_RI = 3
|
|
Packit |
b89d10 |
};
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
enum EGCB_TYPE {
|
|
Packit |
b89d10 |
EGCB_Other = 0,
|
|
Packit |
b89d10 |
EGCB_CR = 1,
|
|
Packit |
b89d10 |
EGCB_LF = 2,
|
|
Packit |
b89d10 |
EGCB_Control = 3,
|
|
Packit |
b89d10 |
EGCB_Extend = 4,
|
|
Packit |
b89d10 |
EGCB_Prepend = 5,
|
|
Packit |
b89d10 |
EGCB_Regional_Indicator = 6,
|
|
Packit |
b89d10 |
EGCB_SpacingMark = 7,
|
|
Packit |
b89d10 |
EGCB_ZWJ = 8,
|
|
Packit |
b89d10 |
EGCB_E_Base = 9,
|
|
Packit |
b89d10 |
EGCB_E_Base_GAZ = 10,
|
|
Packit |
b89d10 |
EGCB_E_Modifier = 11,
|
|
Packit |
b89d10 |
EGCB_Glue_After_Zwj = 12,
|
|
Packit |
b89d10 |
EGCB_L = 13,
|
|
Packit |
b89d10 |
EGCB_LV = 14,
|
|
Packit |
b89d10 |
EGCB_LVT = 15,
|
|
Packit |
b89d10 |
EGCB_T = 16,
|
|
Packit |
b89d10 |
EGCB_V = 17
|
|
Packit |
b89d10 |
};
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
typedef struct {
|
|
Packit |
b89d10 |
OnigCodePoint start;
|
|
Packit |
b89d10 |
OnigCodePoint end;
|
|
Packit |
b89d10 |
enum EGCB_TYPE type;
|
|
Packit |
b89d10 |
} EGCB_RANGE_TYPE;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#include "unicode_egcb_data.c"
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
static enum EGCB_TYPE
|
|
Packit |
b89d10 |
egcb_get_type(OnigCodePoint code)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
OnigCodePoint low, high, x;
|
|
Packit |
b89d10 |
enum EGCB_TYPE type;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
for (low = 0, high = (OnigCodePoint )EGCB_RANGE_NUM; low < high; ) {
|
|
Packit |
b89d10 |
x = (low + high) >> 1;
|
|
Packit |
b89d10 |
if (code > EGCB_RANGES[x].end)
|
|
Packit |
b89d10 |
low = x + 1;
|
|
Packit |
b89d10 |
else
|
|
Packit |
b89d10 |
high = x;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
type = (low < (OnigCodePoint )EGCB_RANGE_NUM &&
|
|
Packit |
b89d10 |
code >= EGCB_RANGES[low].start) ?
|
|
Packit |
b89d10 |
EGCB_RANGES[low].type : EGCB_Other;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return type;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#define IS_CONTROL_CR_LF(code) ((code) <= EGCB_Control && (code) >= EGCB_CR)
|
|
Packit |
b89d10 |
#define IS_HANGUL(code) ((code) >= EGCB_L)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB1 and GB2 are outside of this function. */
|
|
Packit |
b89d10 |
static enum EGCB_BREAK_TYPE
|
|
Packit |
b89d10 |
unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
enum EGCB_TYPE from;
|
|
Packit |
b89d10 |
enum EGCB_TYPE to;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
from = egcb_get_type(from_code);
|
|
Packit |
b89d10 |
to = egcb_get_type(to_code);
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* short cut */
|
|
Packit |
b89d10 |
if (from == 0 && to == 0) goto GB999;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB3 */
|
|
Packit |
b89d10 |
if (from == EGCB_CR && to == EGCB_LF) return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
/* GB4 */
|
|
Packit |
b89d10 |
if (IS_CONTROL_CR_LF(from)) return EGCB_BREAK;
|
|
Packit |
b89d10 |
/* GB5 */
|
|
Packit |
b89d10 |
if (IS_CONTROL_CR_LF(to)) return EGCB_BREAK;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (IS_HANGUL(from) && IS_HANGUL(to)) {
|
|
Packit |
b89d10 |
/* GB6 */
|
|
Packit |
b89d10 |
if (from == EGCB_L && to != EGCB_T) return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
/* GB7 */
|
|
Packit |
b89d10 |
if ((from == EGCB_LV || from == EGCB_V)
|
|
Packit |
b89d10 |
&& (to == EGCB_V || to == EGCB_T)) return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB8 */
|
|
Packit |
b89d10 |
if ((from == EGCB_LVT || from == EGCB_T) && (to == EGCB_T))
|
|
Packit |
b89d10 |
return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
goto GB999;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB9 */
|
|
Packit |
b89d10 |
if (to == EGCB_Extend || to == EGCB_ZWJ) return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB9a */
|
|
Packit |
b89d10 |
if (to == EGCB_SpacingMark) return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
/* GB9b */
|
|
Packit |
b89d10 |
if (from == EGCB_Prepend) return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB10 */
|
|
Packit |
b89d10 |
if (to == EGCB_E_Modifier) {
|
|
Packit |
b89d10 |
if (from == EGCB_E_Base || from == EGCB_E_Base_GAZ) return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
if (from == EGCB_Extend) return EGCB_BREAK_UNDEF_E_MODIFIER;
|
|
Packit |
b89d10 |
goto GB999;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB11 */
|
|
Packit |
b89d10 |
if (from == EGCB_ZWJ) {
|
|
Packit |
b89d10 |
if (to == EGCB_Glue_After_Zwj || to == EGCB_E_Base_GAZ) return EGCB_NOT_BREAK;
|
|
Packit |
b89d10 |
goto GB999;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB12, GB13 */
|
|
Packit |
b89d10 |
if (from == EGCB_Regional_Indicator && to == EGCB_Regional_Indicator) {
|
|
Packit |
b89d10 |
return EGCB_BREAK_UNDEF_RI_RI;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
GB999:
|
|
Packit |
b89d10 |
return EGCB_BREAK;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
|
Packit |
b89d10 |
const UChar* start, const UChar* end)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
OnigCodePoint from;
|
|
Packit |
b89d10 |
OnigCodePoint to;
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
|
Packit |
b89d10 |
enum EGCB_BREAK_TYPE btype;
|
|
Packit |
b89d10 |
enum EGCB_TYPE type;
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/* GB1 and GB2 */
|
|
Packit |
b89d10 |
if (p == start) return 1;
|
|
Packit |
b89d10 |
if (p == end) return 1;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (IS_NULL(prev)) {
|
|
Packit |
b89d10 |
prev = onigenc_get_prev_char_head(enc, start, p);
|
|
Packit |
b89d10 |
if (IS_NULL(prev)) return 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
from = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
|
Packit |
b89d10 |
to = ONIGENC_MBC_TO_CODE(enc, p, end);
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
|
Packit |
b89d10 |
if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
|
|
Packit |
b89d10 |
if (from == 0x000d && to == 0x000a) return 0;
|
|
Packit |
b89d10 |
else return 1;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
btype = unicode_egcb_is_break_2code(from, to);
|
|
Packit |
b89d10 |
switch (btype) {
|
|
Packit |
b89d10 |
case EGCB_NOT_BREAK:
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
break;
|
|
Packit |
b89d10 |
case EGCB_BREAK:
|
|
Packit |
b89d10 |
return 1;
|
|
Packit |
b89d10 |
break;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
case EGCB_BREAK_UNDEF_E_MODIFIER:
|
|
Packit |
b89d10 |
while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
|
Packit |
b89d10 |
from = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
|
Packit |
b89d10 |
type = egcb_get_type(from);
|
|
Packit |
b89d10 |
if (type == EGCB_E_Base || type == EGCB_E_Base_GAZ)
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
if (type != EGCB_Extend)
|
|
Packit |
b89d10 |
break;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
break;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
case EGCB_BREAK_UNDEF_RI_RI:
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
int n = 0;
|
|
Packit |
b89d10 |
while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
|
Packit |
b89d10 |
from = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
|
Packit |
b89d10 |
type = egcb_get_type(from);
|
|
Packit |
b89d10 |
if (type != EGCB_Regional_Indicator)
|
|
Packit |
b89d10 |
break;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
if ((n % 2) == 0) return 0;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
break;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return 1;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#else
|
|
Packit |
b89d10 |
if (from == 0x000d && to == 0x000a) return 0;
|
|
Packit |
b89d10 |
else return 1;
|
|
Packit |
b89d10 |
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
/*
|
|
Packit |
b89d10 |
Undefine __GNUC__ for Escape warnings in Clang.
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
./unicode_property_data.c:26730:44: warning: static variable
|
|
Packit |
b89d10 |
'unicode_prop_name_pool_contents' is used in an inline function with
|
|
Packit |
b89d10 |
external linkage [-Wstatic-in-inline]
|
|
Packit |
b89d10 |
register const char *s = o + unicode_prop_name_pool;
|
|
Packit |
b89d10 |
*/
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#ifdef __clang__
|
|
Packit |
b89d10 |
#undef __GNUC__
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_PROPERTIES
|
|
Packit |
b89d10 |
#include "unicode_property_data.c"
|
|
Packit |
b89d10 |
#else
|
|
Packit |
b89d10 |
#include "unicode_property_data_posix.c"
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
#define USER_DEFINED_PROPERTY_MAX_NUM 20
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
typedef struct {
|
|
Packit |
b89d10 |
int ctype;
|
|
Packit |
b89d10 |
OnigCodePoint* ranges;
|
|
Packit |
b89d10 |
} UserDefinedPropertyValue;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
static int UserDefinedPropertyNum;
|
|
Packit |
b89d10 |
static UserDefinedPropertyValue
|
|
Packit |
b89d10 |
UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM];
|
|
Packit |
b89d10 |
static st_table* UserDefinedPropertyTable;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
UserDefinedPropertyValue* e;
|
|
Packit |
b89d10 |
int r;
|
|
Packit |
b89d10 |
int i;
|
|
Packit |
b89d10 |
int n;
|
|
Packit |
b89d10 |
int len;
|
|
Packit |
b89d10 |
int c;
|
|
Packit |
b89d10 |
char* s;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
|
|
Packit |
b89d10 |
return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
len = (int )strlen(name);
|
|
Packit |
b89d10 |
if (len >= PROPERTY_NAME_MAX_SIZE)
|
|
Packit |
b89d10 |
return ONIGERR_TOO_LONG_PROPERTY_NAME;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
s = (char* )xmalloc(len + 1);
|
|
Packit |
b89d10 |
if (s == 0)
|
|
Packit |
b89d10 |
return ONIGERR_MEMORY;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
n = 0;
|
|
Packit |
b89d10 |
for (i = 0; i < len; i++) {
|
|
Packit |
b89d10 |
c = name[i];
|
|
Packit |
b89d10 |
if (c <= 0 || c >= 0x80) {
|
|
Packit |
b89d10 |
xfree(s);
|
|
Packit |
b89d10 |
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (c != ' ' && c != '-' && c != '_') {
|
|
Packit |
b89d10 |
s[n] = c;
|
|
Packit |
b89d10 |
n++;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
s[n] = '\0';
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (UserDefinedPropertyTable == 0) {
|
|
Packit |
b89d10 |
UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
e = UserDefinedPropertyRanges + UserDefinedPropertyNum;
|
|
Packit |
b89d10 |
e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum;
|
|
Packit |
b89d10 |
e->ranges = ranges;
|
|
Packit |
b89d10 |
r = onig_st_insert_strend(UserDefinedPropertyTable,
|
|
Packit |
b89d10 |
(const UChar* )s, (const UChar* )s + n,
|
|
Packit |
b89d10 |
(hash_data_type )((void* )e));
|
|
Packit |
b89d10 |
if (r < 0) return r;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
UserDefinedPropertyNum++;
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
if (
|
|
Packit |
b89d10 |
#ifdef USE_UNICODE_PROPERTIES
|
|
Packit |
b89d10 |
ctype <= ONIGENC_MAX_STD_CTYPE &&
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
code < 256) {
|
|
Packit |
b89d10 |
return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (ctype >= CODE_RANGES_NUM) {
|
|
Packit |
b89d10 |
int index = ctype - CODE_RANGES_NUM;
|
|
Packit |
b89d10 |
if (index < UserDefinedPropertyNum)
|
|
Packit |
b89d10 |
return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code);
|
|
Packit |
b89d10 |
else
|
|
Packit |
b89d10 |
return ONIGERR_TYPE_BUG;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[])
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
if (ctype >= CODE_RANGES_NUM) {
|
|
Packit |
b89d10 |
int index = ctype - CODE_RANGES_NUM;
|
|
Packit |
b89d10 |
if (index < UserDefinedPropertyNum) {
|
|
Packit |
b89d10 |
*ranges = UserDefinedPropertyRanges[index].ranges;
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
else
|
|
Packit |
b89d10 |
return ONIGERR_TYPE_BUG;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
*ranges = CodeRanges[ctype];
|
|
Packit |
b89d10 |
return 0;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
|
|
Packit |
b89d10 |
const OnigCodePoint* ranges[])
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
*sb_out = 0x00;
|
|
Packit |
b89d10 |
return onigenc_unicode_ctype_code_range(ctype, ranges);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
extern int
|
|
Packit |
b89d10 |
onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
|
|
Packit |
b89d10 |
{
|
|
Packit |
b89d10 |
int len;
|
|
Packit |
b89d10 |
UChar *p;
|
|
Packit |
b89d10 |
OnigCodePoint code;
|
|
Packit |
b89d10 |
const struct PoolPropertyNameCtype* pc;
|
|
Packit |
b89d10 |
char buf[PROPERTY_NAME_MAX_SIZE];
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
p = name;
|
|
Packit |
b89d10 |
len = 0;
|
|
Packit |
b89d10 |
while (p < end) {
|
|
Packit |
b89d10 |
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
|
Packit |
b89d10 |
if (code >= 0x80)
|
|
Packit |
b89d10 |
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (code != ' ' && code != '-' && code != '_') {
|
|
Packit |
b89d10 |
buf[len++] = (char )code;
|
|
Packit |
b89d10 |
if (len >= PROPERTY_NAME_MAX_SIZE)
|
|
Packit |
b89d10 |
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
p += enclen(enc, p);
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
buf[len] = 0;
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if (UserDefinedPropertyTable != 0) {
|
|
Packit |
b89d10 |
UserDefinedPropertyValue* e;
|
|
Packit |
b89d10 |
e = (UserDefinedPropertyValue* )NULL;
|
|
Packit |
b89d10 |
onig_st_lookup_strend(UserDefinedPropertyTable,
|
|
Packit |
b89d10 |
(const UChar* )buf, (const UChar* )buf + len,
|
|
Packit |
b89d10 |
(hash_data_type* )((void* )(&e)));
|
|
Packit |
b89d10 |
if (e != 0) {
|
|
Packit |
b89d10 |
return e->ctype;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
pc = unicode_lookup_property_name(buf, len);
|
|
Packit |
b89d10 |
if (pc != 0) {
|
|
Packit |
b89d10 |
/* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */
|
|
Packit |
b89d10 |
#ifndef USE_UNICODE_PROPERTIES
|
|
Packit |
b89d10 |
if (pc->ctype > ONIGENC_MAX_STD_CTYPE)
|
|
Packit |
b89d10 |
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
|
Packit |
b89d10 |
#endif
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return (int )pc->ctype;
|
|
Packit |
b89d10 |
}
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
|
Packit |
b89d10 |
}
|