Blame pcre32_valid_utf32.c

Packit 78a954
/*************************************************
Packit 78a954
*      Perl-Compatible Regular Expressions       *
Packit 78a954
*************************************************/
Packit 78a954
Packit 78a954
/* PCRE is a library of functions to support regular expressions whose syntax
Packit 78a954
and semantics are as close as possible to those of the Perl 5 language.
Packit 78a954
Packit 78a954
                       Written by Philip Hazel
Packit 78a954
           Copyright (c) 1997-2013 University of Cambridge
Packit 78a954
Packit 78a954
-----------------------------------------------------------------------------
Packit 78a954
Redistribution and use in source and binary forms, with or without
Packit 78a954
modification, are permitted provided that the following conditions are met:
Packit 78a954
Packit 78a954
    * Redistributions of source code must retain the above copyright notice,
Packit 78a954
      this list of conditions and the following disclaimer.
Packit 78a954
Packit 78a954
    * Redistributions in binary form must reproduce the above copyright
Packit 78a954
      notice, this list of conditions and the following disclaimer in the
Packit 78a954
      documentation and/or other materials provided with the distribution.
Packit 78a954
Packit 78a954
    * Neither the name of the University of Cambridge nor the names of its
Packit 78a954
      contributors may be used to endorse or promote products derived from
Packit 78a954
      this software without specific prior written permission.
Packit 78a954
Packit 78a954
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Packit 78a954
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit 78a954
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit 78a954
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Packit 78a954
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Packit 78a954
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Packit 78a954
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Packit 78a954
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Packit 78a954
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Packit 78a954
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Packit 78a954
POSSIBILITY OF SUCH DAMAGE.
Packit 78a954
-----------------------------------------------------------------------------
Packit 78a954
*/
Packit 78a954
Packit 78a954
Packit 78a954
/* This module contains an internal function for validating UTF-32 character
Packit 78a954
strings. */
Packit 78a954
Packit 78a954
Packit 78a954
#ifdef HAVE_CONFIG_H
Packit 78a954
#include "config.h"
Packit 78a954
#endif
Packit 78a954
Packit 78a954
/* Generate code with 32 bit character support. */
Packit 78a954
#define COMPILE_PCRE32
Packit 78a954
Packit 78a954
#include "pcre_internal.h"
Packit 78a954
Packit 78a954
/*************************************************
Packit 78a954
*         Validate a UTF-32 string                *
Packit 78a954
*************************************************/
Packit 78a954
Packit 78a954
/* This function is called (optionally) at the start of compile or match, to
Packit 78a954
check that a supposed UTF-32 string is actually valid. The early check means
Packit 78a954
that subsequent code can assume it is dealing with a valid string. The check
Packit 78a954
can be turned off for maximum performance, but the consequences of supplying an
Packit 78a954
invalid string are then undefined.
Packit 78a954
Packit 78a954
More information about the details of the error are passed
Packit 78a954
back in the returned value:
Packit 78a954
Packit 78a954
PCRE_UTF32_ERR0  No error
Packit 78a954
PCRE_UTF32_ERR1  Surrogate character
Packit 78a954
PCRE_UTF32_ERR2  Unused (was non-character)
Packit 78a954
PCRE_UTF32_ERR3  Character > 0x10ffff
Packit 78a954
Packit 78a954
Arguments:
Packit 78a954
  string       points to the string
Packit 78a954
  length       length of string, or -1 if the string is zero-terminated
Packit 78a954
  errp         pointer to an error position offset variable
Packit 78a954
Packit 78a954
Returns:       = 0    if the string is a valid UTF-32 string
Packit 78a954
               > 0    otherwise, setting the offset of the bad character
Packit 78a954
*/
Packit 78a954
Packit 78a954
int
Packit 78a954
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
Packit 78a954
{
Packit 78a954
#ifdef SUPPORT_UTF
Packit 78a954
register PCRE_PUCHAR p;
Packit 78a954
register pcre_uchar c;
Packit 78a954
Packit 78a954
if (length < 0)
Packit 78a954
  {
Packit 78a954
  for (p = string; *p != 0; p++);
Packit 78a954
  length = p - string;
Packit 78a954
  }
Packit 78a954
Packit 78a954
for (p = string; length-- > 0; p++)
Packit 78a954
  {
Packit 78a954
  c = *p;
Packit 78a954
Packit 78a954
  if ((c & 0xfffff800u) != 0xd800u)
Packit 78a954
    {
Packit 78a954
    /* Normal UTF-32 code point. Neither high nor low surrogate. */
Packit 78a954
    if (c > 0x10ffffu)
Packit 78a954
      {
Packit 78a954
      *erroroffset = p - string;
Packit 78a954
      return PCRE_UTF32_ERR3;
Packit 78a954
      }
Packit 78a954
    }
Packit 78a954
  else
Packit 78a954
    {
Packit 78a954
    /* A surrogate */
Packit 78a954
    *erroroffset = p - string;
Packit 78a954
    return PCRE_UTF32_ERR1;
Packit 78a954
    }
Packit 78a954
  }
Packit 78a954
Packit 78a954
#else  /* SUPPORT_UTF */
Packit 78a954
(void)(string);  /* Keep picky compilers happy */
Packit 78a954
(void)(length);
Packit 78a954
(void)(erroroffset);
Packit 78a954
#endif /* SUPPORT_UTF */
Packit 78a954
Packit 78a954
return PCRE_UTF32_ERR0;   /* This indicates success */
Packit 78a954
}
Packit 78a954
Packit 78a954
/* End of pcre32_valid_utf32.c */