Blame pcre16_utf16_utils.c

Packit 78a954
/*************************************************
Packit 78a954
*      Perl-Compatible Regular Expressions       *
Packit 78a954
*************************************************/
Packit 78a954
Packit 78a954
/* PCRE is a library of functions to support regular expressions whose syntax
Packit 78a954
and semantics are as close as possible to those of the Perl 5 language.
Packit 78a954
Packit 78a954
                       Written by Philip Hazel
Packit 78a954
           Copyright (c) 1997-2012 University of Cambridge
Packit 78a954
Packit 78a954
-----------------------------------------------------------------------------
Packit 78a954
Redistribution and use in source and binary forms, with or without
Packit 78a954
modification, are permitted provided that the following conditions are met:
Packit 78a954
Packit 78a954
    * Redistributions of source code must retain the above copyright notice,
Packit 78a954
      this list of conditions and the following disclaimer.
Packit 78a954
Packit 78a954
    * Redistributions in binary form must reproduce the above copyright
Packit 78a954
      notice, this list of conditions and the following disclaimer in the
Packit 78a954
      documentation and/or other materials provided with the distribution.
Packit 78a954
Packit 78a954
    * Neither the name of the University of Cambridge nor the names of its
Packit 78a954
      contributors may be used to endorse or promote products derived from
Packit 78a954
      this software without specific prior written permission.
Packit 78a954
Packit 78a954
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Packit 78a954
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit 78a954
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit 78a954
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Packit 78a954
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Packit 78a954
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Packit 78a954
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Packit 78a954
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Packit 78a954
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Packit 78a954
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Packit 78a954
POSSIBILITY OF SUCH DAMAGE.
Packit 78a954
-----------------------------------------------------------------------------
Packit 78a954
*/
Packit 78a954
Packit 78a954
Packit 78a954
/* This module contains a function for converting any UTF-16 character
Packit 78a954
strings to host byte order. */
Packit 78a954
Packit 78a954
Packit 78a954
#ifdef HAVE_CONFIG_H
Packit 78a954
#include "config.h"
Packit 78a954
#endif
Packit 78a954
Packit 78a954
/* Generate code with 16 bit character support. */
Packit 78a954
#define COMPILE_PCRE16
Packit 78a954
Packit 78a954
#include "pcre_internal.h"
Packit 78a954
Packit 78a954
/*************************************************
Packit 78a954
*  Convert any UTF-16 string to host byte order  *
Packit 78a954
*************************************************/
Packit 78a954
Packit 78a954
/* This function takes an UTF-16 string and converts
Packit 78a954
it to host byte order. The length can be explicitly set,
Packit 78a954
or automatically detected for zero terminated strings.
Packit 78a954
BOMs can be kept or discarded during the conversion.
Packit 78a954
Conversion can be done in place (output == input).
Packit 78a954
Packit 78a954
Arguments:
Packit 78a954
  output     the output buffer, its size must be greater
Packit 78a954
             or equal than the input string
Packit 78a954
  input      any UTF-16 string
Packit 78a954
  length     the number of 16-bit units in the input string
Packit 78a954
             can be less than zero for zero terminated strings
Packit 78a954
  host_byte_order
Packit 78a954
             A non-zero value means the input is in host byte
Packit 78a954
             order, which can be dynamically changed by BOMs later.
Packit 78a954
             Initially it contains the starting byte order and returns
Packit 78a954
             with the last byte order so it can be used for stream
Packit 78a954
             processing. It can be NULL, which set the host byte
Packit 78a954
             order mode by default.
Packit 78a954
  keep_boms  for a non-zero value, the BOM (0xfeff) characters
Packit 78a954
             are copied as well
Packit 78a954
Packit 78a954
Returns:     the number of 16-bit units placed into the output buffer,
Packit 78a954
             including the zero-terminator
Packit 78a954
*/
Packit 78a954
Packit 78a954
int
Packit 78a954
pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *output, PCRE_SPTR16 input,
Packit 78a954
  int length, int *host_byte_order, int keep_boms)
Packit 78a954
{
Packit 78a954
#ifdef SUPPORT_UTF
Packit 78a954
/* This function converts any UTF-16 string to host byte order and optionally
Packit 78a954
removes any Byte Order Marks (BOMS). Returns with the remainig length. */
Packit 78a954
int host_bo = host_byte_order != NULL ? *host_byte_order : 1;
Packit 78a954
pcre_uchar *optr = (pcre_uchar *)output;
Packit 78a954
const pcre_uchar *iptr = (const pcre_uchar *)input;
Packit 78a954
const pcre_uchar *end;
Packit 78a954
/* The c variable must be unsigned. */
Packit 78a954
register pcre_uchar c;
Packit 78a954
Packit 78a954
if (length < 0)
Packit 78a954
  length = STRLEN_UC(iptr) + 1;
Packit 78a954
end = iptr + length;
Packit 78a954
Packit 78a954
while (iptr < end)
Packit 78a954
  {
Packit 78a954
  c = *iptr++;
Packit 78a954
  if (c == 0xfeff || c == 0xfffe)
Packit 78a954
    {
Packit 78a954
    /* Detecting the byte order of the machine is unnecessary, it is
Packit 78a954
    enough to know that the UTF-16 string has the same byte order or not. */
Packit 78a954
    host_bo = c == 0xfeff;
Packit 78a954
    if (keep_boms != 0)
Packit 78a954
      *optr++ = 0xfeff;
Packit 78a954
    else
Packit 78a954
      length--;
Packit 78a954
    }
Packit 78a954
  else
Packit 78a954
    *optr++ = host_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */
Packit 78a954
  }
Packit 78a954
if (host_byte_order != NULL)
Packit 78a954
  *host_byte_order = host_bo;
Packit 78a954
Packit 78a954
#else /* Not SUPPORT_UTF */
Packit 78a954
(void)(output);  /* Keep picky compilers happy */
Packit 78a954
(void)(input);
Packit 78a954
(void)(keep_boms);
Packit 78a954
(void)(host_byte_order);
Packit 78a954
#endif /* SUPPORT_UTF */
Packit 78a954
return length;
Packit 78a954
}
Packit 78a954
Packit 78a954
/* End of pcre16_utf16_utils.c */