Blame src/pcre2_extuni.c

Packit 504f36
/*************************************************
Packit 504f36
*      Perl-Compatible Regular Expressions       *
Packit 504f36
*************************************************/
Packit 504f36
Packit 504f36
/* PCRE is a library of functions to support regular expressions whose syntax
Packit 504f36
and semantics are as close as possible to those of the Perl 5 language.
Packit 504f36
Packit 504f36
                       Written by Philip Hazel
Packit 504f36
     Original API code Copyright (c) 1997-2012 University of Cambridge
Packit 504f36
          New API code Copyright (c) 2016-2018 University of Cambridge
Packit 504f36
Packit 504f36
-----------------------------------------------------------------------------
Packit 504f36
Redistribution and use in source and binary forms, with or without
Packit 504f36
modification, are permitted provided that the following conditions are met:
Packit 504f36
Packit 504f36
    * Redistributions of source code must retain the above copyright notice,
Packit 504f36
      this list of conditions and the following disclaimer.
Packit 504f36
Packit 504f36
    * Redistributions in binary form must reproduce the above copyright
Packit 504f36
      notice, this list of conditions and the following disclaimer in the
Packit 504f36
      documentation and/or other materials provided with the distribution.
Packit 504f36
Packit 504f36
    * Neither the name of the University of Cambridge nor the names of its
Packit 504f36
      contributors may be used to endorse or promote products derived from
Packit 504f36
      this software without specific prior written permission.
Packit 504f36
Packit 504f36
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Packit 504f36
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit 504f36
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit 504f36
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Packit 504f36
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Packit 504f36
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Packit 504f36
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Packit 504f36
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Packit 504f36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Packit 504f36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Packit 504f36
POSSIBILITY OF SUCH DAMAGE.
Packit 504f36
-----------------------------------------------------------------------------
Packit 504f36
*/
Packit 504f36
Packit 504f36
/* This module contains an internal function that is used to match a Unicode
Packit 504f36
extended grapheme sequence. It is used by both pcre2_match() and
Packit 504f36
pcre2_def_match(). However, it is called only when Unicode support is being
Packit 504f36
compiled. Nevertheless, we provide a dummy function when there is no Unicode
Packit 504f36
support, because some compilers do not like functionless source files. */
Packit 504f36
Packit 504f36
Packit 504f36
#ifdef HAVE_CONFIG_H
Packit 504f36
#include "config.h"
Packit 504f36
#endif
Packit 504f36
Packit 504f36
Packit 504f36
#include "pcre2_internal.h"
Packit 504f36
Packit 504f36
Packit 504f36
/* Dummy function */
Packit 504f36
Packit 504f36
#ifndef SUPPORT_UNICODE
Packit 504f36
PCRE2_SPTR
Packit 504f36
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
Packit 504f36
  PCRE2_SPTR end_subject, BOOL utf, int *xcount)
Packit 504f36
{
Packit 504f36
(void)c;
Packit 504f36
(void)eptr;
Packit 504f36
(void)start_subject;
Packit 504f36
(void)end_subject;
Packit 504f36
(void)utf;
Packit 504f36
(void)xcount;
Packit 504f36
return NULL;
Packit 504f36
}
Packit 504f36
#else
Packit 504f36
Packit 504f36
Packit 504f36
/*************************************************
Packit 504f36
*      Match an extended grapheme sequence       *
Packit 504f36
*************************************************/
Packit 504f36
Packit 504f36
/*
Packit 504f36
Arguments:
Packit 504f36
  c              the first character
Packit 504f36
  eptr           pointer to next character
Packit 504f36
  start_subject  pointer to start of subject
Packit 504f36
  end_subject    pointer to end of subject
Packit 504f36
  utf            TRUE if in UTF mode
Packit 504f36
  xcount         pointer to count of additional characters,
Packit 504f36
                   or NULL if count not needed
Packit 504f36
Packit 504f36
Returns:         pointer after the end of the sequence
Packit 504f36
*/
Packit 504f36
Packit 504f36
PCRE2_SPTR
Packit 504f36
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
Packit 504f36
  PCRE2_SPTR end_subject, BOOL utf, int *xcount)
Packit 504f36
{
Packit 504f36
int lgb = UCD_GRAPHBREAK(c);
Packit 504f36
Packit 504f36
while (eptr < end_subject)
Packit 504f36
  {
Packit 504f36
  int rgb;
Packit 504f36
  int len = 1;
Packit 504f36
  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
Packit 504f36
  rgb = UCD_GRAPHBREAK(c);
Packit 504f36
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
Packit 504f36
Packit 504f36
  /* Not breaking between Regional Indicators is allowed only if there
Packit 504f36
  are an even number of preceding RIs. */
Packit 504f36
Packit 504f36
  if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
Packit 504f36
    {
Packit 504f36
    int ricount = 0;
Packit 504f36
    PCRE2_SPTR bptr = eptr - 1;
Packit 504f36
    if (utf) BACKCHAR(bptr);
Packit 504f36
Packit 504f36
    /* bptr is pointing to the left-hand character */
Packit 504f36
Packit 504f36
    while (bptr > start_subject)
Packit 504f36
      {
Packit 504f36
      bptr--;
Packit 504f36
      if (utf)
Packit 504f36
        {
Packit 504f36
        BACKCHAR(bptr);
Packit 504f36
        GETCHAR(c, bptr);
Packit 504f36
        }
Packit 504f36
      else
Packit 504f36
      c = *bptr;
Packit 504f36
      if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
Packit 504f36
      ricount++;
Packit 504f36
      }
Packit 504f36
    if ((ricount & 1) != 0) break;  /* Grapheme break required */
Packit 504f36
    }
Packit 504f36
Packit 504f36
  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
Packit 504f36
  allows any number of them before a following Extended_Pictographic. */
Packit 504f36
Packit 504f36
  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
Packit 504f36
       lgb != ucp_gbExtended_Pictographic)
Packit 504f36
    lgb = rgb;
Packit 504f36
Packit 504f36
  eptr += len;
Packit 504f36
  if (xcount != NULL) *xcount += 1;
Packit 504f36
  }
Packit 504f36
Packit 504f36
return eptr;
Packit 504f36
}
Packit 504f36
Packit 504f36
#endif  /* SUPPORT_UNICODE */
Packit 504f36
Packit 504f36
/* End of pcre2_extuni.c */