Blame src/pcre2_ucp.h

Packit 504f36
/*************************************************
Packit 504f36
*      Perl-Compatible Regular Expressions       *
Packit 504f36
*************************************************/
Packit 504f36
Packit 504f36
/* PCRE is a library of functions to support regular expressions whose syntax
Packit 504f36
and semantics are as close as possible to those of the Perl 5 language.
Packit 504f36
Packit 504f36
                       Written by Philip Hazel
Packit 504f36
     Original API code Copyright (c) 1997-2012 University of Cambridge
Packit 504f36
          New API code Copyright (c) 2016-2018 University of Cambridge
Packit 504f36
Packit 504f36
-----------------------------------------------------------------------------
Packit 504f36
Redistribution and use in source and binary forms, with or without
Packit 504f36
modification, are permitted provided that the following conditions are met:
Packit 504f36
Packit 504f36
    * Redistributions of source code must retain the above copyright notice,
Packit 504f36
      this list of conditions and the following disclaimer.
Packit 504f36
Packit 504f36
    * Redistributions in binary form must reproduce the above copyright
Packit 504f36
      notice, this list of conditions and the following disclaimer in the
Packit 504f36
      documentation and/or other materials provided with the distribution.
Packit 504f36
Packit 504f36
    * Neither the name of the University of Cambridge nor the names of its
Packit 504f36
      contributors may be used to endorse or promote products derived from
Packit 504f36
      this software without specific prior written permission.
Packit 504f36
Packit 504f36
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Packit 504f36
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit 504f36
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit 504f36
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Packit 504f36
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Packit 504f36
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Packit 504f36
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Packit 504f36
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Packit 504f36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Packit 504f36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Packit 504f36
POSSIBILITY OF SUCH DAMAGE.
Packit 504f36
-----------------------------------------------------------------------------
Packit 504f36
*/
Packit 504f36
Packit 504f36
Packit 504f36
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
Packit 504f36
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
Packit 504f36
Packit 504f36
/* This file contains definitions of the property values that are returned by
Packit 504f36
the UCD access macros. New values that are added for new releases of Unicode
Packit 504f36
should always be at the end of each enum, for backwards compatibility.
Packit 504f36
Packit 504f36
IMPORTANT: Note also that the specific numeric values of the enums have to be
Packit 504f36
the same as the values that are generated by the maint/MultiStage2.py script,
Packit 504f36
where the equivalent property descriptive names are listed in vectors.
Packit 504f36
Packit 504f36
ALSO: The specific values of the first two enums are assumed for the table
Packit 504f36
called catposstab in pcre2_compile.c. */
Packit 504f36
Packit 504f36
/* These are the general character categories. */
Packit 504f36
Packit 504f36
enum {
Packit 504f36
  ucp_C,     /* Other */
Packit 504f36
  ucp_L,     /* Letter */
Packit 504f36
  ucp_M,     /* Mark */
Packit 504f36
  ucp_N,     /* Number */
Packit 504f36
  ucp_P,     /* Punctuation */
Packit 504f36
  ucp_S,     /* Symbol */
Packit 504f36
  ucp_Z      /* Separator */
Packit 504f36
};
Packit 504f36
Packit 504f36
/* These are the particular character categories. */
Packit 504f36
Packit 504f36
enum {
Packit 504f36
  ucp_Cc,    /* Control */
Packit 504f36
  ucp_Cf,    /* Format */
Packit 504f36
  ucp_Cn,    /* Unassigned */
Packit 504f36
  ucp_Co,    /* Private use */
Packit 504f36
  ucp_Cs,    /* Surrogate */
Packit 504f36
  ucp_Ll,    /* Lower case letter */
Packit 504f36
  ucp_Lm,    /* Modifier letter */
Packit 504f36
  ucp_Lo,    /* Other letter */
Packit 504f36
  ucp_Lt,    /* Title case letter */
Packit 504f36
  ucp_Lu,    /* Upper case letter */
Packit 504f36
  ucp_Mc,    /* Spacing mark */
Packit 504f36
  ucp_Me,    /* Enclosing mark */
Packit 504f36
  ucp_Mn,    /* Non-spacing mark */
Packit 504f36
  ucp_Nd,    /* Decimal number */
Packit 504f36
  ucp_Nl,    /* Letter number */
Packit 504f36
  ucp_No,    /* Other number */
Packit 504f36
  ucp_Pc,    /* Connector punctuation */
Packit 504f36
  ucp_Pd,    /* Dash punctuation */
Packit 504f36
  ucp_Pe,    /* Close punctuation */
Packit 504f36
  ucp_Pf,    /* Final punctuation */
Packit 504f36
  ucp_Pi,    /* Initial punctuation */
Packit 504f36
  ucp_Po,    /* Other punctuation */
Packit 504f36
  ucp_Ps,    /* Open punctuation */
Packit 504f36
  ucp_Sc,    /* Currency symbol */
Packit 504f36
  ucp_Sk,    /* Modifier symbol */
Packit 504f36
  ucp_Sm,    /* Mathematical symbol */
Packit 504f36
  ucp_So,    /* Other symbol */
Packit 504f36
  ucp_Zl,    /* Line separator */
Packit 504f36
  ucp_Zp,    /* Paragraph separator */
Packit 504f36
  ucp_Zs     /* Space separator */
Packit 504f36
};
Packit 504f36
Packit 504f36
/* These are grapheme break properties. The Extended Pictographic property
Packit 504f36
comes from the emoji-data.txt file. */
Packit 504f36
Packit 504f36
enum {
Packit 504f36
  ucp_gbCR,                    /*  0 */
Packit 504f36
  ucp_gbLF,                    /*  1 */
Packit 504f36
  ucp_gbControl,               /*  2 */
Packit 504f36
  ucp_gbExtend,                /*  3 */
Packit 504f36
  ucp_gbPrepend,               /*  4 */
Packit 504f36
  ucp_gbSpacingMark,           /*  5 */
Packit 504f36
  ucp_gbL,                     /*  6 Hangul syllable type L */
Packit 504f36
  ucp_gbV,                     /*  7 Hangul syllable type V */
Packit 504f36
  ucp_gbT,                     /*  8 Hangul syllable type T */
Packit 504f36
  ucp_gbLV,                    /*  9 Hangul syllable type LV */
Packit 504f36
  ucp_gbLVT,                   /* 10 Hangul syllable type LVT */
Packit 504f36
  ucp_gbRegionalIndicator,     /* 11 */
Packit 504f36
  ucp_gbOther,                 /* 12 */
Packit 504f36
  ucp_gbZWJ,                   /* 13 */
Packit 504f36
  ucp_gbExtended_Pictographic  /* 14 */
Packit 504f36
};
Packit 504f36
Packit 504f36
/* These are the script identifications. */
Packit 504f36
Packit 504f36
enum {
Packit 504f36
  ucp_Arabic,
Packit 504f36
  ucp_Armenian,
Packit 504f36
  ucp_Bengali,
Packit 504f36
  ucp_Bopomofo,
Packit 504f36
  ucp_Braille,
Packit 504f36
  ucp_Buginese,
Packit 504f36
  ucp_Buhid,
Packit 504f36
  ucp_Canadian_Aboriginal,
Packit 504f36
  ucp_Cherokee,
Packit 504f36
  ucp_Common,
Packit 504f36
  ucp_Coptic,
Packit 504f36
  ucp_Cypriot,
Packit 504f36
  ucp_Cyrillic,
Packit 504f36
  ucp_Deseret,
Packit 504f36
  ucp_Devanagari,
Packit 504f36
  ucp_Ethiopic,
Packit 504f36
  ucp_Georgian,
Packit 504f36
  ucp_Glagolitic,
Packit 504f36
  ucp_Gothic,
Packit 504f36
  ucp_Greek,
Packit 504f36
  ucp_Gujarati,
Packit 504f36
  ucp_Gurmukhi,
Packit 504f36
  ucp_Han,
Packit 504f36
  ucp_Hangul,
Packit 504f36
  ucp_Hanunoo,
Packit 504f36
  ucp_Hebrew,
Packit 504f36
  ucp_Hiragana,
Packit 504f36
  ucp_Inherited,
Packit 504f36
  ucp_Kannada,
Packit 504f36
  ucp_Katakana,
Packit 504f36
  ucp_Kharoshthi,
Packit 504f36
  ucp_Khmer,
Packit 504f36
  ucp_Lao,
Packit 504f36
  ucp_Latin,
Packit 504f36
  ucp_Limbu,
Packit 504f36
  ucp_Linear_B,
Packit 504f36
  ucp_Malayalam,
Packit 504f36
  ucp_Mongolian,
Packit 504f36
  ucp_Myanmar,
Packit 504f36
  ucp_New_Tai_Lue,
Packit 504f36
  ucp_Ogham,
Packit 504f36
  ucp_Old_Italic,
Packit 504f36
  ucp_Old_Persian,
Packit 504f36
  ucp_Oriya,
Packit 504f36
  ucp_Osmanya,
Packit 504f36
  ucp_Runic,
Packit 504f36
  ucp_Shavian,
Packit 504f36
  ucp_Sinhala,
Packit 504f36
  ucp_Syloti_Nagri,
Packit 504f36
  ucp_Syriac,
Packit 504f36
  ucp_Tagalog,
Packit 504f36
  ucp_Tagbanwa,
Packit 504f36
  ucp_Tai_Le,
Packit 504f36
  ucp_Tamil,
Packit 504f36
  ucp_Telugu,
Packit 504f36
  ucp_Thaana,
Packit 504f36
  ucp_Thai,
Packit 504f36
  ucp_Tibetan,
Packit 504f36
  ucp_Tifinagh,
Packit 504f36
  ucp_Ugaritic,
Packit 504f36
  ucp_Yi,
Packit 504f36
  /* New for Unicode 5.0 */
Packit 504f36
  ucp_Balinese,
Packit 504f36
  ucp_Cuneiform,
Packit 504f36
  ucp_Nko,
Packit 504f36
  ucp_Phags_Pa,
Packit 504f36
  ucp_Phoenician,
Packit 504f36
  /* New for Unicode 5.1 */
Packit 504f36
  ucp_Carian,
Packit 504f36
  ucp_Cham,
Packit 504f36
  ucp_Kayah_Li,
Packit 504f36
  ucp_Lepcha,
Packit 504f36
  ucp_Lycian,
Packit 504f36
  ucp_Lydian,
Packit 504f36
  ucp_Ol_Chiki,
Packit 504f36
  ucp_Rejang,
Packit 504f36
  ucp_Saurashtra,
Packit 504f36
  ucp_Sundanese,
Packit 504f36
  ucp_Vai,
Packit 504f36
  /* New for Unicode 5.2 */
Packit 504f36
  ucp_Avestan,
Packit 504f36
  ucp_Bamum,
Packit 504f36
  ucp_Egyptian_Hieroglyphs,
Packit 504f36
  ucp_Imperial_Aramaic,
Packit 504f36
  ucp_Inscriptional_Pahlavi,
Packit 504f36
  ucp_Inscriptional_Parthian,
Packit 504f36
  ucp_Javanese,
Packit 504f36
  ucp_Kaithi,
Packit 504f36
  ucp_Lisu,
Packit 504f36
  ucp_Meetei_Mayek,
Packit 504f36
  ucp_Old_South_Arabian,
Packit 504f36
  ucp_Old_Turkic,
Packit 504f36
  ucp_Samaritan,
Packit 504f36
  ucp_Tai_Tham,
Packit 504f36
  ucp_Tai_Viet,
Packit 504f36
  /* New for Unicode 6.0.0 */
Packit 504f36
  ucp_Batak,
Packit 504f36
  ucp_Brahmi,
Packit 504f36
  ucp_Mandaic,
Packit 504f36
  /* New for Unicode 6.1.0 */
Packit 504f36
  ucp_Chakma,
Packit 504f36
  ucp_Meroitic_Cursive,
Packit 504f36
  ucp_Meroitic_Hieroglyphs,
Packit 504f36
  ucp_Miao,
Packit 504f36
  ucp_Sharada,
Packit 504f36
  ucp_Sora_Sompeng,
Packit 504f36
  ucp_Takri,
Packit 504f36
  /* New for Unicode 7.0.0 */
Packit 504f36
  ucp_Bassa_Vah,
Packit 504f36
  ucp_Caucasian_Albanian,
Packit 504f36
  ucp_Duployan,
Packit 504f36
  ucp_Elbasan,
Packit 504f36
  ucp_Grantha,
Packit 504f36
  ucp_Khojki,
Packit 504f36
  ucp_Khudawadi,
Packit 504f36
  ucp_Linear_A,
Packit 504f36
  ucp_Mahajani,
Packit 504f36
  ucp_Manichaean,
Packit 504f36
  ucp_Mende_Kikakui,
Packit 504f36
  ucp_Modi,
Packit 504f36
  ucp_Mro,
Packit 504f36
  ucp_Nabataean,
Packit 504f36
  ucp_Old_North_Arabian,
Packit 504f36
  ucp_Old_Permic,
Packit 504f36
  ucp_Pahawh_Hmong,
Packit 504f36
  ucp_Palmyrene,
Packit 504f36
  ucp_Psalter_Pahlavi,
Packit 504f36
  ucp_Pau_Cin_Hau,
Packit 504f36
  ucp_Siddham,
Packit 504f36
  ucp_Tirhuta,
Packit 504f36
  ucp_Warang_Citi,
Packit 504f36
  /* New for Unicode 8.0.0 */
Packit 504f36
  ucp_Ahom,
Packit 504f36
  ucp_Anatolian_Hieroglyphs,
Packit 504f36
  ucp_Hatran,
Packit 504f36
  ucp_Multani,
Packit 504f36
  ucp_Old_Hungarian,
Packit 504f36
  ucp_SignWriting,
Packit 504f36
  /* New for Unicode 10.0.0 (no update since 8.0.0) */
Packit 504f36
  ucp_Adlam,
Packit 504f36
  ucp_Bhaiksuki,
Packit 504f36
  ucp_Marchen,
Packit 504f36
  ucp_Newa,
Packit 504f36
  ucp_Osage,
Packit 504f36
  ucp_Tangut,
Packit 504f36
  ucp_Masaram_Gondi,
Packit 504f36
  ucp_Nushu,
Packit 504f36
  ucp_Soyombo,
Packit 504f36
  ucp_Zanabazar_Square,
Packit 504f36
  /* New for Unicode 11.0.0 */
Packit 504f36
  ucp_Dogra,
Packit 504f36
  ucp_Gunjala_Gondi,
Packit 504f36
  ucp_Hanifi_Rohingya,
Packit 504f36
  ucp_Makasar,
Packit 504f36
  ucp_Medefaidrin,
Packit 504f36
  ucp_Old_Sogdian,
Packit 504f36
  ucp_Sogdian
Packit 504f36
};
Packit 504f36
Packit 504f36
#endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
Packit 504f36
Packit 504f36
/* End of pcre2_ucp.h */