Blame pcre_newline.c

Packit 78a954
/*************************************************
Packit 78a954
*      Perl-Compatible Regular Expressions       *
Packit 78a954
*************************************************/
Packit 78a954
Packit 78a954
/* PCRE is a library of functions to support regular expressions whose syntax
Packit 78a954
and semantics are as close as possible to those of the Perl 5 language.
Packit 78a954
Packit 78a954
                       Written by Philip Hazel
Packit 78a954
           Copyright (c) 1997-2012 University of Cambridge
Packit 78a954
Packit 78a954
-----------------------------------------------------------------------------
Packit 78a954
Redistribution and use in source and binary forms, with or without
Packit 78a954
modification, are permitted provided that the following conditions are met:
Packit 78a954
Packit 78a954
    * Redistributions of source code must retain the above copyright notice,
Packit 78a954
      this list of conditions and the following disclaimer.
Packit 78a954
Packit 78a954
    * Redistributions in binary form must reproduce the above copyright
Packit 78a954
      notice, this list of conditions and the following disclaimer in the
Packit 78a954
      documentation and/or other materials provided with the distribution.
Packit 78a954
Packit 78a954
    * Neither the name of the University of Cambridge nor the names of its
Packit 78a954
      contributors may be used to endorse or promote products derived from
Packit 78a954
      this software without specific prior written permission.
Packit 78a954
Packit 78a954
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Packit 78a954
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit 78a954
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit 78a954
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Packit 78a954
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Packit 78a954
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Packit 78a954
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Packit 78a954
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Packit 78a954
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Packit 78a954
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Packit 78a954
POSSIBILITY OF SUCH DAMAGE.
Packit 78a954
-----------------------------------------------------------------------------
Packit 78a954
*/
Packit 78a954
Packit 78a954
Packit 78a954
/* This module contains internal functions for testing newlines when more than
Packit 78a954
one kind of newline is to be recognized. When a newline is found, its length is
Packit 78a954
returned. In principle, we could implement several newline "types", each
Packit 78a954
referring to a different set of newline characters. At present, PCRE supports
Packit 78a954
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
Packit 78a954
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
Packit 78a954
http://unicode.org/unicode/reports/tr18/. */
Packit 78a954
Packit 78a954
Packit 78a954
#ifdef HAVE_CONFIG_H
Packit 78a954
#include "config.h"
Packit 78a954
#endif
Packit 78a954
Packit 78a954
#include "pcre_internal.h"
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
/*************************************************
Packit 78a954
*      Check for newline at given position       *
Packit 78a954
*************************************************/
Packit 78a954
Packit 78a954
/* It is guaranteed that the initial value of ptr is less than the end of the
Packit 78a954
string that is being processed.
Packit 78a954
Packit 78a954
Arguments:
Packit 78a954
  ptr          pointer to possible newline
Packit 78a954
  type         the newline type
Packit 78a954
  endptr       pointer to the end of the string
Packit 78a954
  lenptr       where to return the length
Packit 78a954
  utf          TRUE if in utf mode
Packit 78a954
Packit 78a954
Returns:       TRUE or FALSE
Packit 78a954
*/
Packit 78a954
Packit 78a954
BOOL
Packit 78a954
PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
Packit 78a954
  BOOL utf)
Packit 78a954
{
Packit 78a954
pcre_uint32 c;
Packit 78a954
(void)utf;
Packit 78a954
#ifdef SUPPORT_UTF
Packit 78a954
if (utf)
Packit 78a954
  {
Packit 78a954
  GETCHAR(c, ptr);
Packit 78a954
  }
Packit 78a954
else
Packit 78a954
#endif  /* SUPPORT_UTF */
Packit 78a954
  c = *ptr;
Packit 78a954
Packit 78a954
/* Note that this function is called only for ANY or ANYCRLF. */
Packit 78a954
Packit 78a954
if (type == NLTYPE_ANYCRLF) switch(c)
Packit 78a954
  {
Packit 78a954
  case CHAR_LF: *lenptr = 1; return TRUE;
Packit 78a954
  case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
Packit 78a954
               return TRUE;
Packit 78a954
  default: return FALSE;
Packit 78a954
  }
Packit 78a954
Packit 78a954
/* NLTYPE_ANY */
Packit 78a954
Packit 78a954
else switch(c)
Packit 78a954
  {
Packit 78a954
#ifdef EBCDIC
Packit 78a954
  case CHAR_NEL:
Packit 78a954
#endif
Packit 78a954
  case CHAR_LF:
Packit 78a954
  case CHAR_VT:
Packit 78a954
  case CHAR_FF: *lenptr = 1; return TRUE;
Packit 78a954
Packit 78a954
  case CHAR_CR:
Packit 78a954
  *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
Packit 78a954
  return TRUE;
Packit 78a954
Packit 78a954
#ifndef EBCDIC
Packit 78a954
#ifdef COMPILE_PCRE8
Packit 78a954
  case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
Packit 78a954
  case 0x2028:                                       /* LS */
Packit 78a954
  case 0x2029: *lenptr = 3; return TRUE;             /* PS */
Packit 78a954
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
Packit 78a954
  case CHAR_NEL:
Packit 78a954
  case 0x2028:                                       /* LS */
Packit 78a954
  case 0x2029: *lenptr = 1; return TRUE;             /* PS */
Packit 78a954
#endif  /* COMPILE_PCRE8 */
Packit 78a954
#endif  /* Not EBCDIC */
Packit 78a954
Packit 78a954
  default: return FALSE;
Packit 78a954
  }
Packit 78a954
}
Packit 78a954
Packit 78a954
Packit 78a954
Packit 78a954
/*************************************************
Packit 78a954
*     Check for newline at previous position     *
Packit 78a954
*************************************************/
Packit 78a954
Packit 78a954
/* It is guaranteed that the initial value of ptr is greater than the start of
Packit 78a954
the string that is being processed.
Packit 78a954
Packit 78a954
Arguments:
Packit 78a954
  ptr          pointer to possible newline
Packit 78a954
  type         the newline type
Packit 78a954
  startptr     pointer to the start of the string
Packit 78a954
  lenptr       where to return the length
Packit 78a954
  utf          TRUE if in utf mode
Packit 78a954
Packit 78a954
Returns:       TRUE or FALSE
Packit 78a954
*/
Packit 78a954
Packit 78a954
BOOL
Packit 78a954
PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
Packit 78a954
  BOOL utf)
Packit 78a954
{
Packit 78a954
pcre_uint32 c;
Packit 78a954
(void)utf;
Packit 78a954
ptr--;
Packit 78a954
#ifdef SUPPORT_UTF
Packit 78a954
if (utf)
Packit 78a954
  {
Packit 78a954
  BACKCHAR(ptr);
Packit 78a954
  GETCHAR(c, ptr);
Packit 78a954
  }
Packit 78a954
else
Packit 78a954
#endif  /* SUPPORT_UTF */
Packit 78a954
  c = *ptr;
Packit 78a954
Packit 78a954
/* Note that this function is called only for ANY or ANYCRLF. */
Packit 78a954
Packit 78a954
if (type == NLTYPE_ANYCRLF) switch(c)
Packit 78a954
  {
Packit 78a954
  case CHAR_LF:
Packit 78a954
  *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
Packit 78a954
  return TRUE;
Packit 78a954
Packit 78a954
  case CHAR_CR: *lenptr = 1; return TRUE;
Packit 78a954
  default: return FALSE;
Packit 78a954
  }
Packit 78a954
Packit 78a954
/* NLTYPE_ANY */
Packit 78a954
Packit 78a954
else switch(c)
Packit 78a954
  {
Packit 78a954
  case CHAR_LF:
Packit 78a954
  *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
Packit 78a954
  return TRUE;
Packit 78a954
Packit 78a954
#ifdef EBCDIC
Packit 78a954
  case CHAR_NEL:
Packit 78a954
#endif
Packit 78a954
  case CHAR_VT:
Packit 78a954
  case CHAR_FF:
Packit 78a954
  case CHAR_CR: *lenptr = 1; return TRUE;
Packit 78a954
Packit 78a954
#ifndef EBCDIC
Packit 78a954
#ifdef COMPILE_PCRE8
Packit 78a954
  case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
Packit 78a954
  case 0x2028:                                       /* LS */
Packit 78a954
  case 0x2029: *lenptr = 3; return TRUE;             /* PS */
Packit 78a954
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
Packit 78a954
  case CHAR_NEL:
Packit 78a954
  case 0x2028:                                       /* LS */
Packit 78a954
  case 0x2029: *lenptr = 1; return TRUE;             /* PS */
Packit 78a954
#endif  /* COMPILE_PCRE8 */
Packit 78a954
#endif  /* NotEBCDIC */
Packit 78a954
Packit 78a954
  default: return FALSE;
Packit 78a954
  }
Packit 78a954
}
Packit 78a954
Packit 78a954
/* End of pcre_newline.c */