Blame src/unicode.c

Packit b89d10
/**********************************************************************
Packit b89d10
  unicode.c -  Oniguruma (regular expression library)
Packit b89d10
**********************************************************************/
Packit b89d10
/*-
Packit b89d10
 * Copyright (c) 2002-2017  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
Packit b89d10
 * All rights reserved.
Packit b89d10
 *
Packit b89d10
 * Redistribution and use in source and binary forms, with or without
Packit b89d10
 * modification, are permitted provided that the following conditions
Packit b89d10
 * are met:
Packit b89d10
 * 1. Redistributions of source code must retain the above copyright
Packit b89d10
 *    notice, this list of conditions and the following disclaimer.
Packit b89d10
 * 2. Redistributions in binary form must reproduce the above copyright
Packit b89d10
 *    notice, this list of conditions and the following disclaimer in the
Packit b89d10
 *    documentation and/or other materials provided with the distribution.
Packit b89d10
 *
Packit b89d10
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
Packit b89d10
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit b89d10
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit b89d10
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
Packit b89d10
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
Packit b89d10
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
Packit b89d10
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
Packit b89d10
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
Packit b89d10
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
Packit b89d10
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
Packit b89d10
 * SUCH DAMAGE.
Packit b89d10
 */
Packit b89d10
Packit b89d10
#include "regint.h"
Packit b89d10
Packit b89d10
struct PoolPropertyNameCtype {
Packit b89d10
  short int name;
Packit b89d10
  short int ctype;
Packit b89d10
};
Packit b89d10
Packit b89d10
#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
Packit b89d10
  ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
Packit b89d10
Packit b89d10
static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
Packit b89d10
  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
Packit b89d10
  0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
Packit b89d10
  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
Packit b89d10
  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
Packit b89d10
  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
Packit b89d10
  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
Packit b89d10
  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
Packit b89d10
  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
Packit b89d10
  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
Packit b89d10
  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
Packit b89d10
  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
Packit b89d10
  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
Packit b89d10
  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
Packit b89d10
  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
Packit b89d10
  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
Packit b89d10
  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
Packit b89d10
  0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
Packit b89d10
  0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
Packit b89d10
  0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
Packit b89d10
  0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
Packit b89d10
  0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
Packit b89d10
  0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
Packit b89d10
  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
Packit b89d10
  0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
Packit b89d10
  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
Packit b89d10
  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
Packit b89d10
  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
Packit b89d10
  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
Packit b89d10
  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
Packit b89d10
  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
Packit b89d10
  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
Packit b89d10
  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
Packit b89d10
};
Packit b89d10
Packit b89d10
#include "st.h"
Packit b89d10
Packit b89d10
#include "unicode_fold_data.c"
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onigenc_unicode_mbc_case_fold(OnigEncoding enc,
Packit b89d10
    OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
Packit b89d10
    UChar* fold)
Packit b89d10
{
Packit b89d10
  const struct ByUnfoldKey* buk;
Packit b89d10
Packit b89d10
  OnigCodePoint code;
Packit b89d10
  int i, len, rlen;
Packit b89d10
  const UChar *p = *pp;
Packit b89d10
Packit b89d10
  code = ONIGENC_MBC_TO_CODE(enc, p, end);
Packit b89d10
  len = enclen(enc, p);
Packit b89d10
  *pp += len;
Packit b89d10
Packit b89d10
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
Packit b89d10
  if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
Packit b89d10
    if (code == 0x0130) {
Packit b89d10
      return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
Packit b89d10
    }
Packit b89d10
#if 0
Packit b89d10
    if (code == 0x0049) {
Packit b89d10
      return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
Packit b89d10
    }
Packit b89d10
#endif
Packit b89d10
  }
Packit b89d10
#endif
Packit b89d10
Packit b89d10
  buk = unicode_unfold_key(code);
Packit b89d10
  if (buk != 0) {
Packit b89d10
    if (buk->fold_len == 1) {
Packit b89d10
      return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
Packit b89d10
    }
Packit b89d10
    else {
Packit b89d10
      OnigCodePoint* addr;
Packit b89d10
Packit b89d10
      FOLDS_FOLD_ADDR_BUK(buk, addr);
Packit b89d10
      rlen = 0;
Packit b89d10
      for (i = 0; i < buk->fold_len; i++) {
Packit b89d10
        OnigCodePoint c = addr[i];
Packit b89d10
        len = ONIGENC_CODE_TO_MBC(enc, c, fold);
Packit b89d10
        fold += len;
Packit b89d10
        rlen += len;
Packit b89d10
      }
Packit b89d10
      return rlen;
Packit b89d10
    }
Packit b89d10
  }
Packit b89d10
Packit b89d10
  for (i = 0; i < len; i++) {
Packit b89d10
    *fold++ = *p++;
Packit b89d10
  }
Packit b89d10
  return len;
Packit b89d10
}
Packit b89d10
Packit b89d10
static int
Packit b89d10
apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
Packit b89d10
{
Packit b89d10
  int i, j, k, n, r;
Packit b89d10
Packit b89d10
  for (i = from; i < to; ) {
Packit b89d10
    OnigCodePoint fold = *FOLDS1_FOLD(i);
Packit b89d10
    n = FOLDS1_UNFOLDS_NUM(i);
Packit b89d10
    for (j = 0; j < n; j++) {
Packit b89d10
      OnigCodePoint unfold = FOLDS1_UNFOLDS(i)[j];
Packit b89d10
Packit b89d10
      r = (*f)(fold, &unfold, 1, arg);
Packit b89d10
      if (r != 0) return r;
Packit b89d10
      r = (*f)(unfold, &fold, 1, arg);
Packit b89d10
      if (r != 0) return r;
Packit b89d10
Packit b89d10
      for (k = 0; k < j; k++) {
Packit b89d10
        OnigCodePoint unfold2 = FOLDS1_UNFOLDS(i)[k];
Packit b89d10
        r = (*f)(unfold, &unfold2, 1, arg);
Packit b89d10
        if (r != 0) return r;
Packit b89d10
        r = (*f)(unfold2, &unfold, 1, arg);
Packit b89d10
        if (r != 0) return r;
Packit b89d10
      }
Packit b89d10
    }
Packit b89d10
Packit b89d10
    i = FOLDS1_NEXT_INDEX(i);
Packit b89d10
  }
Packit b89d10
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
static int
Packit b89d10
apply_case_fold2(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
Packit b89d10
{
Packit b89d10
  int i, j, k, n, r;
Packit b89d10
Packit b89d10
  for (i = from; i < to; ) {
Packit b89d10
    OnigCodePoint* fold = FOLDS2_FOLD(i);
Packit b89d10
    n = FOLDS2_UNFOLDS_NUM(i);
Packit b89d10
    for (j = 0; j < n; j++) {
Packit b89d10
      OnigCodePoint unfold = FOLDS2_UNFOLDS(i)[j];
Packit b89d10
Packit b89d10
      r = (*f)(unfold, fold, 2, arg);
Packit b89d10
      if (r != 0) return r;
Packit b89d10
Packit b89d10
      for (k = 0; k < j; k++) {
Packit b89d10
        OnigCodePoint unfold2 = FOLDS2_UNFOLDS(i)[k];
Packit b89d10
        r = (*f)(unfold, &unfold2, 1, arg);
Packit b89d10
        if (r != 0) return r;
Packit b89d10
        r = (*f)(unfold2, &unfold, 1, arg);
Packit b89d10
        if (r != 0) return r;
Packit b89d10
      }
Packit b89d10
    }
Packit b89d10
Packit b89d10
    i = FOLDS2_NEXT_INDEX(i);
Packit b89d10
  }
Packit b89d10
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
static int
Packit b89d10
apply_case_fold3(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
Packit b89d10
{
Packit b89d10
  int i, j, k, n, r;
Packit b89d10
Packit b89d10
  for (i = from; i < to; ) {
Packit b89d10
    OnigCodePoint* fold = FOLDS3_FOLD(i);
Packit b89d10
    n = FOLDS3_UNFOLDS_NUM(i);
Packit b89d10
    for (j = 0; j < n; j++) {
Packit b89d10
      OnigCodePoint unfold = FOLDS3_UNFOLDS(i)[j];
Packit b89d10
Packit b89d10
      r = (*f)(unfold, fold, 3, arg);
Packit b89d10
      if (r != 0) return r;
Packit b89d10
Packit b89d10
      for (k = 0; k < j; k++) {
Packit b89d10
        OnigCodePoint unfold2 = FOLDS3_UNFOLDS(i)[k];
Packit b89d10
        r = (*f)(unfold, &unfold2, 1, arg);
Packit b89d10
        if (r != 0) return r;
Packit b89d10
        r = (*f)(unfold2, &unfold, 1, arg);
Packit b89d10
        if (r != 0) return r;
Packit b89d10
      }
Packit b89d10
    }
Packit b89d10
Packit b89d10
    i = FOLDS3_NEXT_INDEX(i);
Packit b89d10
  }
Packit b89d10
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
Packit b89d10
                                    OnigApplyAllCaseFoldFunc f, void* arg)
Packit b89d10
{
Packit b89d10
  int r;
Packit b89d10
Packit b89d10
  r = apply_case_fold1(0, FOLDS1_NORMAL_END_INDEX, f, arg);
Packit b89d10
  if (r != 0) return r;
Packit b89d10
Packit b89d10
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
Packit b89d10
  if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
Packit b89d10
    code = 0x0131;
Packit b89d10
    r = (*f)(0x0049, &code, 1, arg);
Packit b89d10
    if (r != 0) return r;
Packit b89d10
    code = 0x0049;
Packit b89d10
    r = (*f)(0x0131, &code, 1, arg);
Packit b89d10
    if (r != 0) return r;
Packit b89d10
Packit b89d10
    code = 0x0130;
Packit b89d10
    r = (*f)(0x0069, &code, 1, arg);
Packit b89d10
    if (r != 0) return r;
Packit b89d10
    code = 0x0069;
Packit b89d10
    r = (*f)(0x0130, &code, 1, arg);
Packit b89d10
    if (r != 0) return r;
Packit b89d10
  }
Packit b89d10
  else {
Packit b89d10
#endif
Packit b89d10
    r = apply_case_fold1(FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);
Packit b89d10
    if (r != 0) return r;
Packit b89d10
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
Packit b89d10
  }
Packit b89d10
#endif
Packit b89d10
Packit b89d10
  if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
Packit b89d10
    return 0;
Packit b89d10
Packit b89d10
  r = apply_case_fold2(0, FOLDS2_NORMAL_END_INDEX, f, arg);
Packit b89d10
  if (r != 0) return r;
Packit b89d10
Packit b89d10
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
Packit b89d10
  if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
Packit b89d10
#endif
Packit b89d10
    r = apply_case_fold2(FOLDS2_NORMAL_END_INDEX, FOLDS2_END_INDEX, f, arg);
Packit b89d10
    if (r != 0) return r;
Packit b89d10
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
Packit b89d10
  }
Packit b89d10
#endif
Packit b89d10
Packit b89d10
  r = apply_case_fold3(0, FOLDS3_NORMAL_END_INDEX, f, arg);
Packit b89d10
  if (r != 0) return r;
Packit b89d10
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
Packit b89d10
    OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
Packit b89d10
    OnigCaseFoldCodeItem items[])
Packit b89d10
{
Packit b89d10
  int n, m, i, j, k, len;
Packit b89d10
  OnigCodePoint code, codes[3];
Packit b89d10
  const struct ByUnfoldKey* buk;
Packit b89d10
Packit b89d10
  n = 0;
Packit b89d10
Packit b89d10
  code = ONIGENC_MBC_TO_CODE(enc, p, end);
Packit b89d10
  len = enclen(enc, p);
Packit b89d10
Packit b89d10
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
Packit b89d10
  if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
Packit b89d10
    if (code == 0x0049) {
Packit b89d10
      items[0].byte_len = len;
Packit b89d10
      items[0].code_len = 1;
Packit b89d10
      items[0].code[0]  = 0x0131;
Packit b89d10
      return 1;
Packit b89d10
    }
Packit b89d10
    else if (code == 0x0130) {
Packit b89d10
      items[0].byte_len = len;
Packit b89d10
      items[0].code_len = 1;
Packit b89d10
      items[0].code[0]  = 0x0069;
Packit b89d10
      return 1;
Packit b89d10
    }
Packit b89d10
    else if (code == 0x0131) {
Packit b89d10
      items[0].byte_len = len;
Packit b89d10
      items[0].code_len = 1;
Packit b89d10
      items[0].code[0]  = 0x0049;
Packit b89d10
      return 1;
Packit b89d10
    }
Packit b89d10
    else if (code == 0x0069) {
Packit b89d10
      items[0].byte_len = len;
Packit b89d10
      items[0].code_len = 1;
Packit b89d10
      items[0].code[0]  = 0x0130;
Packit b89d10
      return 1;
Packit b89d10
    }
Packit b89d10
  }
Packit b89d10
#endif
Packit b89d10
Packit b89d10
  buk = unicode_unfold_key(code);
Packit b89d10
  if (buk != 0) {
Packit b89d10
    if (buk->fold_len == 1) {
Packit b89d10
      int un;
Packit b89d10
      items[0].byte_len = len;
Packit b89d10
      items[0].code_len = 1;
Packit b89d10
      items[0].code[0]  = *FOLDS1_FOLD(buk->index);
Packit b89d10
      n++;
Packit b89d10
Packit b89d10
      un = FOLDS1_UNFOLDS_NUM(buk->index);
Packit b89d10
      for (i = 0; i < un; i++) {
Packit b89d10
        OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i];
Packit b89d10
        if (unfold != code) {
Packit b89d10
          items[n].byte_len = len;
Packit b89d10
          items[n].code_len = 1;
Packit b89d10
          items[n].code[0]  = unfold;
Packit b89d10
          n++;
Packit b89d10
        }
Packit b89d10
      }
Packit b89d10
      code = items[0].code[0]; /* for multi-code to unfold search. */
Packit b89d10
    }
Packit b89d10
    else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
Packit b89d10
      OnigCodePoint cs[3][4];
Packit b89d10
      int fn, ncs[3];
Packit b89d10
Packit b89d10
      if (buk->fold_len == 2) {
Packit b89d10
        m = FOLDS2_UNFOLDS_NUM(buk->index);
Packit b89d10
        for (i = 0; i < m; i++) {
Packit b89d10
          OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i];
Packit b89d10
          if (unfold == code) continue;
Packit b89d10
Packit b89d10
          items[n].byte_len = len;
Packit b89d10
          items[n].code_len = 1;
Packit b89d10
          items[n].code[0]  = unfold;
Packit b89d10
          n++;
Packit b89d10
        }
Packit b89d10
Packit b89d10
        for (fn = 0; fn < 2; fn++) {
Packit b89d10
          int index;
Packit b89d10
          cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
Packit b89d10
          index = unicode_fold1_key(&cs[fn][0]);
Packit b89d10
          if (index >= 0) {
Packit b89d10
            int m = FOLDS1_UNFOLDS_NUM(index);
Packit b89d10
            for (i = 0; i < m; i++) {
Packit b89d10
              cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
Packit b89d10
            }
Packit b89d10
            ncs[fn] = m + 1;
Packit b89d10
          }
Packit b89d10
          else
Packit b89d10
            ncs[fn] = 1;
Packit b89d10
        }
Packit b89d10
Packit b89d10
        for (i = 0; i < ncs[0]; i++) {
Packit b89d10
          for (j = 0; j < ncs[1]; j++) {
Packit b89d10
            items[n].byte_len = len;
Packit b89d10
            items[n].code_len = 2;
Packit b89d10
            items[n].code[0]  = cs[0][i];
Packit b89d10
            items[n].code[1]  = cs[1][j];
Packit b89d10
            n++;
Packit b89d10
          }
Packit b89d10
        }
Packit b89d10
      }
Packit b89d10
      else { /* fold_len == 3 */
Packit b89d10
        m = FOLDS3_UNFOLDS_NUM(buk->index);
Packit b89d10
        for (i = 0; i < m; i++) {
Packit b89d10
          OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i];
Packit b89d10
          if (unfold == code) continue;
Packit b89d10
Packit b89d10
          items[n].byte_len = len;
Packit b89d10
          items[n].code_len = 1;
Packit b89d10
          items[n].code[0]  = unfold;
Packit b89d10
          n++;
Packit b89d10
        }
Packit b89d10
Packit b89d10
        for (fn = 0; fn < 3; fn++) {
Packit b89d10
          int index;
Packit b89d10
          cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
Packit b89d10
          index = unicode_fold1_key(&cs[fn][0]);
Packit b89d10
          if (index >= 0) {
Packit b89d10
            int m = FOLDS1_UNFOLDS_NUM(index);
Packit b89d10
            for (i = 0; i < m; i++) {
Packit b89d10
              cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
Packit b89d10
            }
Packit b89d10
            ncs[fn] = m + 1;
Packit b89d10
          }
Packit b89d10
          else
Packit b89d10
            ncs[fn] = 1;
Packit b89d10
        }
Packit b89d10
Packit b89d10
        for (i = 0; i < ncs[0]; i++) {
Packit b89d10
          for (j = 0; j < ncs[1]; j++) {
Packit b89d10
            for (k = 0; k < ncs[2]; k++) {
Packit b89d10
              items[n].byte_len = len;
Packit b89d10
              items[n].code_len = 3;
Packit b89d10
              items[n].code[0]  = cs[0][i];
Packit b89d10
              items[n].code[1]  = cs[1][j];
Packit b89d10
              items[n].code[2]  = cs[2][k];
Packit b89d10
              n++;
Packit b89d10
            }
Packit b89d10
          }
Packit b89d10
        }
Packit b89d10
      }
Packit b89d10
Packit b89d10
      /* multi char folded code is not head of another folded multi char */
Packit b89d10
      return n;
Packit b89d10
    }
Packit b89d10
  }
Packit b89d10
  else {
Packit b89d10
    int index = unicode_fold1_key(&code);
Packit b89d10
    if (index >= 0) {
Packit b89d10
      int m = FOLDS1_UNFOLDS_NUM(index);
Packit b89d10
      for (i = 0; i < m; i++) {
Packit b89d10
        items[n].byte_len = len;
Packit b89d10
        items[n].code_len = 1;
Packit b89d10
        items[n].code[0]  = FOLDS1_UNFOLDS(index)[i];
Packit b89d10
        n++;
Packit b89d10
      }
Packit b89d10
    }
Packit b89d10
  }
Packit b89d10
Packit b89d10
  if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
Packit b89d10
    return n;
Packit b89d10
Packit b89d10
  p += len;
Packit b89d10
  if (p < end) {
Packit b89d10
    int clen;
Packit b89d10
    int index;
Packit b89d10
Packit b89d10
    codes[0] = code;
Packit b89d10
    code = ONIGENC_MBC_TO_CODE(enc, p, end);
Packit b89d10
Packit b89d10
    buk = unicode_unfold_key(code);
Packit b89d10
    if (buk != 0 && buk->fold_len == 1) {
Packit b89d10
      codes[1] = *FOLDS1_FOLD(buk->index);
Packit b89d10
    }
Packit b89d10
    else
Packit b89d10
      codes[1] = code;
Packit b89d10
Packit b89d10
    clen = enclen(enc, p);
Packit b89d10
    len += clen;
Packit b89d10
Packit b89d10
    index = unicode_fold2_key(codes);
Packit b89d10
    if (index >= 0) {
Packit b89d10
      m = FOLDS2_UNFOLDS_NUM(index);
Packit b89d10
      for (i = 0; i < m; i++) {
Packit b89d10
        items[n].byte_len = len;
Packit b89d10
        items[n].code_len = 1;
Packit b89d10
        items[n].code[0]  = FOLDS2_UNFOLDS(index)[i];
Packit b89d10
        n++;
Packit b89d10
      }
Packit b89d10
    }
Packit b89d10
Packit b89d10
    p += clen;
Packit b89d10
    if (p < end) {
Packit b89d10
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
Packit b89d10
      buk = unicode_unfold_key(code);
Packit b89d10
      if (buk != 0 && buk->fold_len == 1) {
Packit b89d10
        codes[2] = *FOLDS1_FOLD(buk->index);
Packit b89d10
      }
Packit b89d10
      else
Packit b89d10
        codes[2] = code;
Packit b89d10
Packit b89d10
      clen = enclen(enc, p);
Packit b89d10
      len += clen;
Packit b89d10
Packit b89d10
      index = unicode_fold3_key(codes);
Packit b89d10
      if (index >= 0) {
Packit b89d10
        m = FOLDS3_UNFOLDS_NUM(index);
Packit b89d10
        for (i = 0; i < m; i++) {
Packit b89d10
          items[n].byte_len = len;
Packit b89d10
          items[n].code_len = 1;
Packit b89d10
          items[n].code[0]  = FOLDS3_UNFOLDS(index)[i];
Packit b89d10
          n++;
Packit b89d10
        }
Packit b89d10
      }
Packit b89d10
    }
Packit b89d10
  }
Packit b89d10
Packit b89d10
  return n;
Packit b89d10
}
Packit b89d10
Packit b89d10
Packit b89d10
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
Packit b89d10
Packit b89d10
enum EGCB_BREAK_TYPE {
Packit b89d10
  EGCB_NOT_BREAK = 0,
Packit b89d10
  EGCB_BREAK     = 1,
Packit b89d10
  EGCB_BREAK_UNDEF_E_MODIFIER = 2,
Packit b89d10
  EGCB_BREAK_UNDEF_RI_RI = 3
Packit b89d10
};
Packit b89d10
Packit b89d10
enum EGCB_TYPE {
Packit b89d10
  EGCB_Other   = 0,
Packit b89d10
  EGCB_CR      = 1,
Packit b89d10
  EGCB_LF      = 2,
Packit b89d10
  EGCB_Control = 3,
Packit b89d10
  EGCB_Extend  = 4,
Packit b89d10
  EGCB_Prepend = 5,
Packit b89d10
  EGCB_Regional_Indicator = 6,
Packit b89d10
  EGCB_SpacingMark = 7,
Packit b89d10
  EGCB_ZWJ         = 8,
Packit b89d10
  EGCB_E_Base         = 9,
Packit b89d10
  EGCB_E_Base_GAZ     = 10,
Packit b89d10
  EGCB_E_Modifier     = 11,
Packit b89d10
  EGCB_Glue_After_Zwj = 12,
Packit b89d10
  EGCB_L   = 13,
Packit b89d10
  EGCB_LV  = 14,
Packit b89d10
  EGCB_LVT = 15,
Packit b89d10
  EGCB_T   = 16,
Packit b89d10
  EGCB_V   = 17
Packit b89d10
};
Packit b89d10
Packit b89d10
typedef struct {
Packit b89d10
  OnigCodePoint  start;
Packit b89d10
  OnigCodePoint  end;
Packit b89d10
  enum EGCB_TYPE type;
Packit b89d10
} EGCB_RANGE_TYPE;
Packit b89d10
Packit b89d10
#include "unicode_egcb_data.c"
Packit b89d10
Packit b89d10
static enum EGCB_TYPE
Packit b89d10
egcb_get_type(OnigCodePoint code)
Packit b89d10
{
Packit b89d10
  OnigCodePoint low, high, x;
Packit b89d10
  enum EGCB_TYPE type;
Packit b89d10
Packit b89d10
  for (low = 0, high = (OnigCodePoint )EGCB_RANGE_NUM; low < high; ) {
Packit b89d10
    x = (low + high) >> 1;
Packit b89d10
    if (code > EGCB_RANGES[x].end)
Packit b89d10
      low = x + 1;
Packit b89d10
    else
Packit b89d10
      high = x;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  type = (low < (OnigCodePoint )EGCB_RANGE_NUM &&
Packit b89d10
          code >= EGCB_RANGES[low].start) ?
Packit b89d10
    EGCB_RANGES[low].type : EGCB_Other;
Packit b89d10
Packit b89d10
  return type;
Packit b89d10
}
Packit b89d10
Packit b89d10
#define IS_CONTROL_CR_LF(code)   ((code) <= EGCB_Control && (code) >= EGCB_CR)
Packit b89d10
#define IS_HANGUL(code)          ((code) >= EGCB_L)
Packit b89d10
Packit b89d10
/* GB1 and GB2 are outside of this function. */
Packit b89d10
static enum EGCB_BREAK_TYPE
Packit b89d10
unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)
Packit b89d10
{
Packit b89d10
  enum EGCB_TYPE from;
Packit b89d10
  enum EGCB_TYPE to;
Packit b89d10
Packit b89d10
  from = egcb_get_type(from_code);
Packit b89d10
  to   = egcb_get_type(to_code);
Packit b89d10
Packit b89d10
  /* short cut */
Packit b89d10
  if (from == 0 && to == 0) goto GB999;
Packit b89d10
Packit b89d10
  /* GB3 */
Packit b89d10
  if (from == EGCB_CR && to == EGCB_LF) return EGCB_NOT_BREAK;
Packit b89d10
  /* GB4 */
Packit b89d10
  if (IS_CONTROL_CR_LF(from)) return EGCB_BREAK;
Packit b89d10
  /* GB5 */
Packit b89d10
  if (IS_CONTROL_CR_LF(to)) return EGCB_BREAK;
Packit b89d10
Packit b89d10
  if (IS_HANGUL(from) && IS_HANGUL(to)) {
Packit b89d10
    /* GB6 */
Packit b89d10
    if (from == EGCB_L && to != EGCB_T) return EGCB_NOT_BREAK;
Packit b89d10
    /* GB7 */
Packit b89d10
    if ((from == EGCB_LV || from == EGCB_V)
Packit b89d10
        && (to == EGCB_V || to == EGCB_T)) return EGCB_NOT_BREAK;
Packit b89d10
Packit b89d10
    /* GB8 */
Packit b89d10
    if ((from == EGCB_LVT || from == EGCB_T) && (to == EGCB_T))
Packit b89d10
      return EGCB_NOT_BREAK;
Packit b89d10
Packit b89d10
    goto GB999;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  /* GB9 */
Packit b89d10
  if (to == EGCB_Extend || to == EGCB_ZWJ) return EGCB_NOT_BREAK;
Packit b89d10
Packit b89d10
  /* GB9a */
Packit b89d10
  if (to == EGCB_SpacingMark) return EGCB_NOT_BREAK;
Packit b89d10
  /* GB9b */
Packit b89d10
  if (from == EGCB_Prepend) return EGCB_NOT_BREAK;
Packit b89d10
Packit b89d10
  /* GB10 */
Packit b89d10
  if (to == EGCB_E_Modifier) {
Packit b89d10
    if (from == EGCB_E_Base || from == EGCB_E_Base_GAZ) return EGCB_NOT_BREAK;
Packit b89d10
    if (from == EGCB_Extend) return EGCB_BREAK_UNDEF_E_MODIFIER;
Packit b89d10
    goto GB999;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  /* GB11 */
Packit b89d10
  if (from == EGCB_ZWJ) {
Packit b89d10
    if (to == EGCB_Glue_After_Zwj || to == EGCB_E_Base_GAZ) return EGCB_NOT_BREAK;
Packit b89d10
    goto GB999;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  /* GB12, GB13 */
Packit b89d10
  if (from == EGCB_Regional_Indicator && to == EGCB_Regional_Indicator) {
Packit b89d10
    return EGCB_BREAK_UNDEF_RI_RI;
Packit b89d10
  }
Packit b89d10
Packit b89d10
 GB999:
Packit b89d10
  return EGCB_BREAK;
Packit b89d10
}
Packit b89d10
Packit b89d10
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
Packit b89d10
                               const UChar* start, const UChar* end)
Packit b89d10
{
Packit b89d10
  OnigCodePoint from;
Packit b89d10
  OnigCodePoint to;
Packit b89d10
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
Packit b89d10
  enum EGCB_BREAK_TYPE btype;
Packit b89d10
  enum EGCB_TYPE type;
Packit b89d10
#endif
Packit b89d10
Packit b89d10
  /* GB1 and GB2 */
Packit b89d10
  if (p == start) return 1;
Packit b89d10
  if (p == end)   return 1;
Packit b89d10
Packit b89d10
  if (IS_NULL(prev)) {
Packit b89d10
    prev = onigenc_get_prev_char_head(enc, start, p);
Packit b89d10
    if (IS_NULL(prev)) return 1;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  from = ONIGENC_MBC_TO_CODE(enc, prev, end);
Packit b89d10
  to   = ONIGENC_MBC_TO_CODE(enc, p, end);
Packit b89d10
Packit b89d10
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
Packit b89d10
  if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
Packit b89d10
    if (from == 0x000d && to == 0x000a) return 0;
Packit b89d10
    else return 1;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  btype = unicode_egcb_is_break_2code(from, to);
Packit b89d10
  switch (btype) {
Packit b89d10
  case EGCB_NOT_BREAK:
Packit b89d10
    return 0;
Packit b89d10
    break;
Packit b89d10
  case EGCB_BREAK:
Packit b89d10
    return 1;
Packit b89d10
    break;
Packit b89d10
Packit b89d10
  case EGCB_BREAK_UNDEF_E_MODIFIER:
Packit b89d10
    while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
Packit b89d10
      from = ONIGENC_MBC_TO_CODE(enc, prev, end);
Packit b89d10
      type = egcb_get_type(from);
Packit b89d10
      if (type == EGCB_E_Base || type == EGCB_E_Base_GAZ)
Packit b89d10
        return 0;
Packit b89d10
      if (type != EGCB_Extend)
Packit b89d10
        break;
Packit b89d10
    }
Packit b89d10
    break;
Packit b89d10
Packit b89d10
  case EGCB_BREAK_UNDEF_RI_RI:
Packit b89d10
    {
Packit b89d10
      int n = 0;
Packit b89d10
      while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
Packit b89d10
        from = ONIGENC_MBC_TO_CODE(enc, prev, end);
Packit b89d10
        type = egcb_get_type(from);
Packit b89d10
        if (type != EGCB_Regional_Indicator)
Packit b89d10
          break;
Packit b89d10
Packit b89d10
        n++;
Packit b89d10
      }
Packit b89d10
      if ((n % 2) == 0) return 0;
Packit b89d10
    }
Packit b89d10
    break;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  return 1;
Packit b89d10
Packit b89d10
#else
Packit b89d10
  if (from == 0x000d && to == 0x000a) return 0;
Packit b89d10
  else return 1;
Packit b89d10
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
Packit b89d10
}
Packit b89d10
Packit b89d10
Packit b89d10
/*
Packit b89d10
 Undefine __GNUC__ for Escape warnings in Clang.
Packit b89d10
Packit b89d10
./unicode_property_data.c:26730:44: warning: static variable
Packit b89d10
      'unicode_prop_name_pool_contents' is used in an inline function with
Packit b89d10
      external linkage [-Wstatic-in-inline]
Packit b89d10
              register const char *s = o + unicode_prop_name_pool;
Packit b89d10
*/
Packit b89d10
Packit b89d10
#ifdef __clang__
Packit b89d10
#undef __GNUC__
Packit b89d10
#endif
Packit b89d10
Packit b89d10
#ifdef USE_UNICODE_PROPERTIES
Packit b89d10
#include "unicode_property_data.c"
Packit b89d10
#else
Packit b89d10
#include "unicode_property_data_posix.c"
Packit b89d10
#endif
Packit b89d10
Packit b89d10
#define USER_DEFINED_PROPERTY_MAX_NUM  20
Packit b89d10
Packit b89d10
typedef struct {
Packit b89d10
  int ctype;
Packit b89d10
  OnigCodePoint* ranges;
Packit b89d10
} UserDefinedPropertyValue;
Packit b89d10
Packit b89d10
static int UserDefinedPropertyNum;
Packit b89d10
static UserDefinedPropertyValue
Packit b89d10
UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM];
Packit b89d10
static st_table* UserDefinedPropertyTable;
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
Packit b89d10
{
Packit b89d10
  UserDefinedPropertyValue* e;
Packit b89d10
  int r;
Packit b89d10
  int i;
Packit b89d10
  int n;
Packit b89d10
  int len;
Packit b89d10
  int c;
Packit b89d10
  char* s;
Packit b89d10
Packit b89d10
  if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
Packit b89d10
    return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
Packit b89d10
Packit b89d10
  len = (int )strlen(name);
Packit b89d10
  if (len >= PROPERTY_NAME_MAX_SIZE)
Packit b89d10
    return ONIGERR_TOO_LONG_PROPERTY_NAME;
Packit b89d10
Packit b89d10
  s = (char* )xmalloc(len + 1);
Packit b89d10
  if (s == 0)
Packit b89d10
    return ONIGERR_MEMORY;
Packit b89d10
Packit b89d10
  n = 0;
Packit b89d10
  for (i = 0; i < len; i++) {
Packit b89d10
    c = name[i];
Packit b89d10
    if (c <= 0 || c >= 0x80) {
Packit b89d10
      xfree(s);
Packit b89d10
      return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
Packit b89d10
    }
Packit b89d10
Packit b89d10
    if (c != ' ' && c != '-' && c != '_') {
Packit b89d10
      s[n] = c;
Packit b89d10
      n++;
Packit b89d10
    }
Packit b89d10
  }
Packit b89d10
  s[n] = '\0';
Packit b89d10
Packit b89d10
  if (UserDefinedPropertyTable == 0) {
Packit b89d10
    UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10);
Packit b89d10
  }
Packit b89d10
Packit b89d10
  e = UserDefinedPropertyRanges + UserDefinedPropertyNum;
Packit b89d10
  e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum;
Packit b89d10
  e->ranges = ranges;
Packit b89d10
  r = onig_st_insert_strend(UserDefinedPropertyTable,
Packit b89d10
                            (const UChar* )s, (const UChar* )s + n,
Packit b89d10
                            (hash_data_type )((void* )e));
Packit b89d10
  if (r < 0) return r;
Packit b89d10
Packit b89d10
  UserDefinedPropertyNum++;
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
Packit b89d10
{
Packit b89d10
  if (
Packit b89d10
#ifdef USE_UNICODE_PROPERTIES
Packit b89d10
      ctype <= ONIGENC_MAX_STD_CTYPE &&
Packit b89d10
#endif
Packit b89d10
      code < 256) {
Packit b89d10
    return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
Packit b89d10
  }
Packit b89d10
Packit b89d10
  if (ctype >= CODE_RANGES_NUM) {
Packit b89d10
    int index = ctype - CODE_RANGES_NUM;
Packit b89d10
    if (index < UserDefinedPropertyNum)
Packit b89d10
      return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code);
Packit b89d10
    else
Packit b89d10
      return ONIGERR_TYPE_BUG;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
Packit b89d10
}
Packit b89d10
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[])
Packit b89d10
{
Packit b89d10
  if (ctype >= CODE_RANGES_NUM) {
Packit b89d10
    int index = ctype - CODE_RANGES_NUM;
Packit b89d10
    if (index < UserDefinedPropertyNum) {
Packit b89d10
      *ranges = UserDefinedPropertyRanges[index].ranges;
Packit b89d10
      return 0;
Packit b89d10
    }
Packit b89d10
    else
Packit b89d10
      return ONIGERR_TYPE_BUG;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  *ranges = CodeRanges[ctype];
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
Packit b89d10
                                      const OnigCodePoint* ranges[])
Packit b89d10
{
Packit b89d10
  *sb_out = 0x00;
Packit b89d10
  return onigenc_unicode_ctype_code_range(ctype, ranges);
Packit b89d10
}
Packit b89d10
Packit b89d10
extern int
Packit b89d10
onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
Packit b89d10
{
Packit b89d10
  int len;
Packit b89d10
  UChar *p;
Packit b89d10
  OnigCodePoint code;
Packit b89d10
  const struct PoolPropertyNameCtype* pc;
Packit b89d10
  char buf[PROPERTY_NAME_MAX_SIZE];
Packit b89d10
Packit b89d10
  p = name;
Packit b89d10
  len = 0;
Packit b89d10
  while (p < end) {
Packit b89d10
    code = ONIGENC_MBC_TO_CODE(enc, p, end);
Packit b89d10
    if (code >= 0x80)
Packit b89d10
      return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
Packit b89d10
Packit b89d10
    if (code != ' ' && code != '-' && code != '_') {
Packit b89d10
      buf[len++] = (char )code;
Packit b89d10
      if (len >= PROPERTY_NAME_MAX_SIZE)
Packit b89d10
        return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
Packit b89d10
    }
Packit b89d10
Packit b89d10
    p += enclen(enc, p);
Packit b89d10
  }
Packit b89d10
Packit b89d10
  buf[len] = 0;
Packit b89d10
Packit b89d10
  if (UserDefinedPropertyTable != 0) {
Packit b89d10
    UserDefinedPropertyValue* e;
Packit b89d10
    e = (UserDefinedPropertyValue* )NULL;
Packit b89d10
    onig_st_lookup_strend(UserDefinedPropertyTable,
Packit b89d10
                          (const UChar* )buf, (const UChar* )buf + len,
Packit b89d10
                          (hash_data_type* )((void* )(&e)));
Packit b89d10
    if (e != 0) {
Packit b89d10
      return e->ctype;
Packit b89d10
    }
Packit b89d10
  }
Packit b89d10
Packit b89d10
  pc = unicode_lookup_property_name(buf, len);
Packit b89d10
  if (pc != 0) {
Packit b89d10
    /* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */
Packit b89d10
#ifndef USE_UNICODE_PROPERTIES
Packit b89d10
    if (pc->ctype > ONIGENC_MAX_STD_CTYPE)
Packit b89d10
      return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
Packit b89d10
#endif
Packit b89d10
Packit b89d10
    return (int )pc->ctype;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
Packit b89d10
}