Blame winpr/libwinpr/crt/utf.c

Packit 1fb8d4
/*
Packit 1fb8d4
 * Copyright 2001-2004 Unicode, Inc.
Packit 1fb8d4
 *
Packit 1fb8d4
 * Disclaimer
Packit 1fb8d4
 *
Packit 1fb8d4
 * This source code is provided as is by Unicode, Inc. No claims are
Packit 1fb8d4
 * made as to fitness for any particular purpose. No warranties of any
Packit 1fb8d4
 * kind are expressed or implied. The recipient agrees to determine
Packit 1fb8d4
 * applicability of information provided. If this file has been
Packit 1fb8d4
 * purchased on magnetic or optical media from Unicode, Inc., the
Packit 1fb8d4
 * sole remedy for any claim will be exchange of defective media
Packit 1fb8d4
 * within 90 days of receipt.
Packit 1fb8d4
 *
Packit 1fb8d4
 * Limitations on Rights to Redistribute This Code
Packit 1fb8d4
 *
Packit 1fb8d4
 * Unicode, Inc. hereby grants the right to freely use the information
Packit 1fb8d4
 * supplied in this file in the creation of products supporting the
Packit 1fb8d4
 * Unicode Standard, and to make copies of this file in any form
Packit 1fb8d4
 * for internal or external distribution as long as this notice
Packit 1fb8d4
 * remains attached.
Packit 1fb8d4
 */
Packit 1fb8d4
Packit 1fb8d4
/* ---------------------------------------------------------------------
Packit 1fb8d4
Packit 1fb8d4
    Conversions between UTF32, UTF-16, and UTF-8. Source code file.
Packit 1fb8d4
    Author: Mark E. Davis, 1994.
Packit 1fb8d4
    Rev History: Rick McGowan, fixes & updates May 2001.
Packit 1fb8d4
    Sept 2001: fixed const & error conditions per
Packit 1fb8d4
    mods suggested by S. Parent & A. Lillich.
Packit 1fb8d4
    June 2002: Tim Dodd added detection and handling of incomplete
Packit 1fb8d4
    source sequences, enhanced error detection, added casts
Packit 1fb8d4
    to eliminate compiler warnings.
Packit 1fb8d4
    July 2003: slight mods to back out aggressive FFFE detection.
Packit 1fb8d4
    Jan 2004: updated switches in from-UTF8 conversions.
Packit 1fb8d4
    Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
Packit 1fb8d4
Packit 1fb8d4
    See the header file "utf.h" for complete documentation.
Packit 1fb8d4
Packit 1fb8d4
------------------------------------------------------------------------ */
Packit 1fb8d4
Packit 1fb8d4
#include "utf.h"
Packit 1fb8d4
#include <winpr/endian.h>
Packit 1fb8d4
Packit Service 5a9772
#pragma GCC diagnostic push
Packit Service 5a9772
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
Packit Service 5a9772
static const int halfShift = 10; /* used for shifting by 10 bits */
Packit 1fb8d4
Packit 1fb8d4
static const DWORD halfBase = 0x0010000UL;
Packit 1fb8d4
static const DWORD halfMask = 0x3FFUL;
Packit 1fb8d4
Packit Service 5a9772
#define UNI_SUR_HIGH_START (DWORD)0xD800
Packit Service 5a9772
#define UNI_SUR_HIGH_END (DWORD)0xDBFF
Packit Service 5a9772
#define UNI_SUR_LOW_START (DWORD)0xDC00
Packit Service 5a9772
#define UNI_SUR_LOW_END (DWORD)0xDFFF
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit Service 5a9772
ConversionResult ConvertUTF32toUTF16(const DWORD** sourceStart, const DWORD* sourceEnd,
Packit Service 5a9772
                                     WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)
Packit 1fb8d4
{
Packit 1fb8d4
	ConversionResult result = conversionOK;
Packit 1fb8d4
	const DWORD* source = *sourceStart;
Packit 1fb8d4
	WCHAR* target = *targetStart;
Packit 1fb8d4
Packit 1fb8d4
	while (source < sourceEnd)
Packit 1fb8d4
	{
Packit 1fb8d4
		DWORD ch;
Packit 1fb8d4
Packit 1fb8d4
		if (target >= targetEnd)
Packit 1fb8d4
		{
Packit 1fb8d4
			result = targetExhausted;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		ch = *source++;
Packit 1fb8d4
Packit Service 5a9772
		if (ch <= UNI_MAX_BMP) /* Target is a character <= 0xFFFF */
Packit 1fb8d4
		{
Packit Service 5a9772
			/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved
Packit Service 5a9772
			 * values */
Packit 1fb8d4
			if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
Packit 1fb8d4
			{
Packit 1fb8d4
				if (flags == strictConversion)
Packit 1fb8d4
				{
Packit 1fb8d4
					--source; /* return to the illegal value itself */
Packit 1fb8d4
					result = sourceIllegal;
Packit 1fb8d4
					break;
Packit 1fb8d4
				}
Packit 1fb8d4
				else
Packit 1fb8d4
				{
Packit 1fb8d4
					*target++ = UNI_REPLACEMENT_CHAR;
Packit 1fb8d4
				}
Packit 1fb8d4
			}
Packit 1fb8d4
			else
Packit 1fb8d4
			{
Packit 1fb8d4
				*target++ = (WCHAR)ch; /* normal case */
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
		else if (ch > UNI_MAX_LEGAL_UTF32)
Packit 1fb8d4
		{
Packit 1fb8d4
			if (flags == strictConversion)
Packit 1fb8d4
			{
Packit 1fb8d4
				result = sourceIllegal;
Packit 1fb8d4
			}
Packit 1fb8d4
			else
Packit 1fb8d4
			{
Packit 1fb8d4
				*target++ = UNI_REPLACEMENT_CHAR;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
		else
Packit 1fb8d4
		{
Packit 1fb8d4
			/* target is a character in range 0xFFFF - 0x10FFFF. */
Packit 1fb8d4
			if (target + 1 >= targetEnd)
Packit 1fb8d4
			{
Packit 1fb8d4
				--source; /* Back up source pointer! */
Packit 1fb8d4
				result = targetExhausted;
Packit 1fb8d4
				break;
Packit 1fb8d4
			}
Packit 1fb8d4
Packit 1fb8d4
			ch -= halfBase;
Packit 1fb8d4
			*target++ = (WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START);
Packit 1fb8d4
			*target++ = (WCHAR)((ch & halfMask) + UNI_SUR_LOW_START);
Packit 1fb8d4
		}
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	*sourceStart = source;
Packit 1fb8d4
	*targetStart = target;
Packit 1fb8d4
	return result;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit Service 5a9772
ConversionResult ConvertUTF16toUTF32(const WCHAR** sourceStart, const WCHAR* sourceEnd,
Packit Service 5a9772
                                     DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags)
Packit 1fb8d4
{
Packit 1fb8d4
	ConversionResult result = conversionOK;
Packit 1fb8d4
	const WCHAR* source = *sourceStart;
Packit 1fb8d4
	DWORD* target = *targetStart;
Packit 1fb8d4
	DWORD ch, ch2;
Packit 1fb8d4
Packit 1fb8d4
	while (source < sourceEnd)
Packit 1fb8d4
	{
Packit Service 5a9772
		const WCHAR* oldSource =
Packit Service 5a9772
		    source; /*  In case we have to back up because of target overflow. */
Packit 1fb8d4
		ch = *source++;
Packit 1fb8d4
Packit 1fb8d4
		/* If we have a surrogate pair, convert to UTF32 first. */
Packit 1fb8d4
		if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
Packit 1fb8d4
		{
Packit 1fb8d4
			/* If the 16 bits following the high surrogate are in the source buffer... */
Packit 1fb8d4
			if (source < sourceEnd)
Packit 1fb8d4
			{
Packit 1fb8d4
				ch2 = *source;
Packit 1fb8d4
Packit 1fb8d4
				/* If it's a low surrogate, convert to UTF32. */
Packit 1fb8d4
				if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
Packit 1fb8d4
				{
Packit Service 5a9772
					ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) +
Packit Service 5a9772
					     halfBase;
Packit 1fb8d4
					++source;
Packit 1fb8d4
				}
Packit Service 5a9772
				else if (flags == strictConversion) /* it's an unpaired high surrogate */
Packit 1fb8d4
				{
Packit 1fb8d4
					--source; /* return to the illegal value itself */
Packit 1fb8d4
					result = sourceIllegal;
Packit 1fb8d4
					break;
Packit 1fb8d4
				}
Packit 1fb8d4
			}
Packit Service 5a9772
			else /* We don't have the 16 bits following the high surrogate. */
Packit 1fb8d4
			{
Packit 1fb8d4
				--source; /* return to the high surrogate */
Packit 1fb8d4
				result = sourceExhausted;
Packit 1fb8d4
				break;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
		else if (flags == strictConversion)
Packit 1fb8d4
		{
Packit 1fb8d4
			/* UTF-16 surrogate values are illegal in UTF-32 */
Packit 1fb8d4
			if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
Packit 1fb8d4
			{
Packit 1fb8d4
				--source; /* return to the illegal value itself */
Packit 1fb8d4
				result = sourceIllegal;
Packit 1fb8d4
				break;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		if (target >= targetEnd)
Packit 1fb8d4
		{
Packit 1fb8d4
			source = oldSource; /* Back up source pointer! */
Packit 1fb8d4
			result = targetExhausted;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		*target++ = ch;
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	*sourceStart = source;
Packit 1fb8d4
	*targetStart = target;
Packit 1fb8d4
#ifdef CVTUTF_DEBUG
Packit 1fb8d4
Packit 1fb8d4
	if (result == sourceIllegal)
Packit 1fb8d4
	{
Packit 1fb8d4
		WLOG_WARN(TAG, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x", ch, ch2);
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
#endif
Packit 1fb8d4
	return result;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit 1fb8d4
/*
Packit 1fb8d4
 * Index into the table below with the first byte of a UTF-8 sequence to
Packit 1fb8d4
 * get the number of trailing bytes that are supposed to follow it.
Packit 1fb8d4
 * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
Packit 1fb8d4
 * left as-is for anyone who may want to do such conversion, which was
Packit 1fb8d4
 * allowed in earlier algorithms.
Packit 1fb8d4
 */
Packit Service 5a9772
static const char trailingBytesForUTF8[256] = {
Packit 1fb8d4
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Packit 1fb8d4
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Packit 1fb8d4
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Packit 1fb8d4
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Packit 1fb8d4
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Packit 1fb8d4
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Packit 1fb8d4
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
Packit 1fb8d4
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
Packit 1fb8d4
};
Packit 1fb8d4
Packit 1fb8d4
/*
Packit 1fb8d4
 * Magic values subtracted from a buffer value during UTF8 conversion.
Packit 1fb8d4
 * This table contains as many values as there might be trailing bytes
Packit 1fb8d4
 * in a UTF-8 sequence.
Packit 1fb8d4
 */
Packit 1fb8d4
static const DWORD offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
Packit Service 5a9772
	                                      0x03C82080UL, 0xFA082080UL, 0x82082080UL };
Packit 1fb8d4
Packit 1fb8d4
/*
Packit 1fb8d4
 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
Packit 1fb8d4
 * into the first byte, depending on how many bytes follow.  There are
Packit 1fb8d4
 * as many entries in this table as there are UTF-8 sequence types.
Packit 1fb8d4
 * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
Packit 1fb8d4
 * for *legal* UTF-8 will be 4 or fewer bytes total.
Packit 1fb8d4
 */
Packit 1fb8d4
static const BYTE firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit 1fb8d4
/* The interface converts a whole buffer to avoid function-call overhead.
Packit 1fb8d4
 * Constants have been gathered. Loops & conditionals have been removed as
Packit 1fb8d4
 * much as possible for efficiency, in favor of drop-through switches.
Packit 1fb8d4
 * (See "Note A" at the bottom of the file for equivalent code.)
Packit 1fb8d4
 * If your compiler supports it, the "isLegalUTF8" call can be turned
Packit 1fb8d4
 * into an inline function.
Packit 1fb8d4
 */
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit Service 5a9772
ConversionResult ConvertUTF16toUTF8(const WCHAR** sourceStart, const WCHAR* sourceEnd,
Packit Service 5a9772
                                    BYTE** targetStart, BYTE* te, ConversionFlags flags)
Packit 1fb8d4
{
Packit Service 5a9772
	size_t pos = 0;
Packit Service 5a9772
	size_t end = 0;
Packit 1fb8d4
	const WCHAR* source;
Packit Service 5a9772
	const BOOL computeLength = (!te) ? TRUE : FALSE;
Packit Service 5a9772
	ConversionResult result = conversionOK;
Packit Service 5a9772
Packit Service 5a9772
	if (targetStart && te)
Packit Service 5a9772
	{
Packit Service 5a9772
		const size_t s = (size_t)*targetStart;
Packit Service 5a9772
		const size_t e = (size_t)te;
Packit Service 5a9772
		if (s > e)
Packit Service 5a9772
			return sourceIllegal;
Packit Service 5a9772
		end = e - s;
Packit Service 5a9772
	}
Packit Service 5a9772
Packit 1fb8d4
	source = *sourceStart;
Packit 1fb8d4
Packit 1fb8d4
	while (source < sourceEnd)
Packit 1fb8d4
	{
Packit 1fb8d4
		DWORD ch;
Packit 1fb8d4
		unsigned short bytesToWrite = 0;
Packit 1fb8d4
		const DWORD byteMask = 0xBF;
Packit 1fb8d4
		const DWORD byteMark = 0x80;
Packit Service 5a9772
		const WCHAR* oldSource =
Packit Service 5a9772
		    source; /* In case we have to back up because of target overflow. */
Packit 1fb8d4
		Data_Read_UINT16(source, ch);
Packit 1fb8d4
		source++;
Packit 1fb8d4
Packit 1fb8d4
		/* If we have a surrogate pair, convert to UTF32 first. */
Packit 1fb8d4
		if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
Packit 1fb8d4
		{
Packit 1fb8d4
			/* If the 16 bits following the high surrogate are in the source buffer... */
Packit 1fb8d4
			if (source < sourceEnd)
Packit 1fb8d4
			{
Packit 1fb8d4
				DWORD ch2;
Packit 1fb8d4
				Data_Read_UINT16(source, ch2);
Packit 1fb8d4
Packit 1fb8d4
				/* If it's a low surrogate, convert to UTF32. */
Packit 1fb8d4
				if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
Packit 1fb8d4
				{
Packit Service 5a9772
					ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) +
Packit Service 5a9772
					     halfBase;
Packit 1fb8d4
					++source;
Packit 1fb8d4
				}
Packit 1fb8d4
				else if (flags == strictConversion)
Packit 1fb8d4
				{
Packit 1fb8d4
					/* it's an unpaired high surrogate */
Packit 1fb8d4
					--source; /* return to the illegal value itself */
Packit 1fb8d4
					result = sourceIllegal;
Packit 1fb8d4
					break;
Packit 1fb8d4
				}
Packit 1fb8d4
			}
Packit 1fb8d4
			else
Packit 1fb8d4
			{
Packit 1fb8d4
				/* We don't have the 16 bits following the high surrogate. */
Packit 1fb8d4
				--source; /* return to the high surrogate */
Packit 1fb8d4
				result = sourceExhausted;
Packit 1fb8d4
				break;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
		else if (flags == strictConversion)
Packit 1fb8d4
		{
Packit 1fb8d4
			/* UTF-16 surrogate values are illegal in UTF-32 */
Packit 1fb8d4
			if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
Packit 1fb8d4
			{
Packit 1fb8d4
				--source; /* return to the illegal value itself */
Packit 1fb8d4
				result = sourceIllegal;
Packit 1fb8d4
				break;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		/* Figure out how many bytes the result will require */
Packit Service 5a9772
		if (ch < (DWORD)0x80)
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 1;
Packit 1fb8d4
		}
Packit Service 5a9772
		else if (ch < (DWORD)0x800)
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 2;
Packit 1fb8d4
		}
Packit Service 5a9772
		else if (ch < (DWORD)0x10000)
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 3;
Packit 1fb8d4
		}
Packit Service 5a9772
		else if (ch < (DWORD)0x110000)
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 4;
Packit 1fb8d4
		}
Packit 1fb8d4
		else
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 3;
Packit 1fb8d4
			ch = UNI_REPLACEMENT_CHAR;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit Service 5a9772
		pos += bytesToWrite;
Packit 1fb8d4
Packit Service 5a9772
		if ((pos > end) && (!computeLength))
Packit 1fb8d4
		{
Packit 1fb8d4
			source = oldSource; /* Back up source pointer! */
Packit Service 5a9772
			pos -= bytesToWrite;
Packit 1fb8d4
			result = targetExhausted;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		if (!computeLength)
Packit 1fb8d4
		{
Packit 1fb8d4
			switch (bytesToWrite)
Packit 1fb8d4
			{
Packit 1fb8d4
				/* note: everything falls through. */
Packit 1fb8d4
				case 4:
Packit Service 5a9772
					(*targetStart)[--pos] = (BYTE)((ch | byteMark) & byteMask);
Packit 1fb8d4
					ch >>= 6;
Packit 1fb8d4
Packit 1fb8d4
				case 3:
Packit Service 5a9772
					(*targetStart)[--pos] = (BYTE)((ch | byteMark) & byteMask);
Packit 1fb8d4
					ch >>= 6;
Packit 1fb8d4
Packit 1fb8d4
				case 2:
Packit Service 5a9772
					(*targetStart)[--pos] = (BYTE)((ch | byteMark) & byteMask);
Packit 1fb8d4
					ch >>= 6;
Packit 1fb8d4
Packit 1fb8d4
				case 1:
Packit Service 5a9772
					(*targetStart)[--pos] = (BYTE)(ch | firstByteMark[bytesToWrite]);
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
		else
Packit 1fb8d4
		{
Packit 1fb8d4
			switch (bytesToWrite)
Packit 1fb8d4
			{
Packit 1fb8d4
				/* note: everything falls through. */
Packit 1fb8d4
				case 4:
Packit Service 5a9772
					--pos;
Packit 1fb8d4
Packit 1fb8d4
				case 3:
Packit Service 5a9772
					--pos;
Packit 1fb8d4
Packit 1fb8d4
				case 2:
Packit Service 5a9772
					--pos;
Packit 1fb8d4
Packit 1fb8d4
				case 1:
Packit Service 5a9772
					--pos;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
Packit Service 5a9772
		pos += bytesToWrite;
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	*sourceStart = source;
Packit Service 5a9772
	if (targetStart && *targetStart)
Packit Service 5a9772
		*targetStart = &(*targetStart)[pos];
Packit Service 5a9772
	else if (targetStart)
Packit Service 5a9772
		*targetStart = (BYTE*)pos;
Packit 1fb8d4
	return result;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit 1fb8d4
/*
Packit 1fb8d4
 * Utility routine to tell whether a sequence of bytes is legal UTF-8.
Packit 1fb8d4
 * This must be called with the length pre-determined by the first byte.
Packit 1fb8d4
 * If not calling this from ConvertUTF8to*, then the length can be set by:
Packit 1fb8d4
 *  length = trailingBytesForUTF8[*source]+1;
Packit 1fb8d4
 * and the sequence is illegal right away if there aren't that many bytes
Packit 1fb8d4
 * available.
Packit 1fb8d4
 * If presented with a length > 4, this returns FALSE.  The Unicode
Packit 1fb8d4
 * definition of UTF-8 goes up to 4-byte sequences.
Packit 1fb8d4
 */
Packit 1fb8d4
Packit 1fb8d4
static BOOL isLegalUTF8(const BYTE* source, int length)
Packit 1fb8d4
{
Packit 1fb8d4
	BYTE a;
Packit 1fb8d4
	const BYTE* srcptr = source + length;
Packit 1fb8d4
Packit 1fb8d4
	switch (length)
Packit 1fb8d4
	{
Packit 1fb8d4
		default:
Packit 1fb8d4
			return FALSE;
Packit 1fb8d4
Packit 1fb8d4
		/* Everything else falls through when "TRUE"... */
Packit 1fb8d4
		case 4:
Packit Service 5a9772
			if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
Packit Service 5a9772
				return FALSE;
Packit 1fb8d4
Packit 1fb8d4
		case 3:
Packit Service 5a9772
			if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
Packit Service 5a9772
				return FALSE;
Packit 1fb8d4
Packit 1fb8d4
		case 2:
Packit Service 5a9772
			if ((a = (*--srcptr)) > 0xBF)
Packit Service 5a9772
				return FALSE;
Packit 1fb8d4
Packit 1fb8d4
			switch (*source)
Packit 1fb8d4
			{
Packit 1fb8d4
				/* no fall-through in this inner switch */
Packit 1fb8d4
				case 0xE0:
Packit Service 5a9772
					if (a < 0xA0)
Packit Service 5a9772
						return FALSE;
Packit 1fb8d4
Packit 1fb8d4
					break;
Packit 1fb8d4
Packit 1fb8d4
				case 0xED:
Packit Service 5a9772
					if (a > 0x9F)
Packit Service 5a9772
						return FALSE;
Packit 1fb8d4
Packit 1fb8d4
					break;
Packit 1fb8d4
Packit 1fb8d4
				case 0xF0:
Packit Service 5a9772
					if (a < 0x90)
Packit Service 5a9772
						return FALSE;
Packit 1fb8d4
Packit 1fb8d4
					break;
Packit 1fb8d4
Packit 1fb8d4
				case 0xF4:
Packit Service 5a9772
					if (a > 0x8F)
Packit Service 5a9772
						return FALSE;
Packit 1fb8d4
Packit 1fb8d4
					break;
Packit 1fb8d4
Packit 1fb8d4
				default:
Packit Service 5a9772
					if (a < 0x80)
Packit Service 5a9772
						return FALSE;
Packit 1fb8d4
			}
Packit 1fb8d4
Packit 1fb8d4
		case 1:
Packit Service 5a9772
			if (*source >= 0x80 && *source < 0xC2)
Packit Service 5a9772
				return FALSE;
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	if (*source > 0xF4)
Packit 1fb8d4
		return FALSE;
Packit 1fb8d4
Packit 1fb8d4
	return TRUE;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit 1fb8d4
/*
Packit 1fb8d4
 * Exported function to return whether a UTF-8 sequence is legal or not.
Packit 1fb8d4
 * This is not used here; it's just exported.
Packit 1fb8d4
 */
Packit 1fb8d4
BOOL isLegalUTF8Sequence(const BYTE* source, const BYTE* sourceEnd)
Packit 1fb8d4
{
Packit 1fb8d4
	int length = trailingBytesForUTF8[*source] + 1;
Packit 1fb8d4
Packit 1fb8d4
	if (source + length > sourceEnd)
Packit 1fb8d4
		return FALSE;
Packit 1fb8d4
Packit 1fb8d4
	return isLegalUTF8(source, length);
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit Service 5a9772
ConversionResult ConvertUTF8toUTF16(const BYTE** sourceStart, const BYTE* sourceEnd,
Packit Service 5a9772
                                    WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)
Packit 1fb8d4
{
Packit Service 5a9772
	size_t target = 0;
Packit Service 5a9772
	size_t end = 0;
Packit 1fb8d4
	const BYTE* source;
Packit 1fb8d4
	BOOL computeLength;
Packit 1fb8d4
	ConversionResult result;
Packit 1fb8d4
	computeLength = (!targetEnd) ? TRUE : FALSE;
Packit 1fb8d4
	result = conversionOK;
Packit 1fb8d4
	source = *sourceStart;
Packit Service 5a9772
Packit Service 5a9772
	if (targetStart && targetEnd)
Packit Service 5a9772
	{
Packit Service 5a9772
		const size_t s = (size_t)*targetStart;
Packit Service 5a9772
		const size_t e = (size_t)targetEnd;
Packit Service 5a9772
		if (s > e)
Packit Service 5a9772
			return sourceIllegal;
Packit Service 5a9772
Packit Service 5a9772
		end = ((size_t)(targetEnd)) - ((size_t)(*targetStart));
Packit Service 5a9772
	}
Packit 1fb8d4
Packit 1fb8d4
	while (source < sourceEnd)
Packit 1fb8d4
	{
Packit 1fb8d4
		DWORD ch = 0;
Packit 1fb8d4
		unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
Packit 1fb8d4
Packit 1fb8d4
		if ((source + extraBytesToRead) >= sourceEnd)
Packit 1fb8d4
		{
Packit 1fb8d4
			result = sourceExhausted;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		/* Do this check whether lenient or strict */
Packit 1fb8d4
		if (!isLegalUTF8(source, extraBytesToRead + 1))
Packit 1fb8d4
		{
Packit 1fb8d4
			result = sourceIllegal;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		/*
Packit 1fb8d4
		 * The cases all fall through. See "Note A" below.
Packit 1fb8d4
		 */
Packit 1fb8d4
		switch (extraBytesToRead)
Packit 1fb8d4
		{
Packit 1fb8d4
			case 5:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6; /* remember, illegal UTF-8 */
Packit 1fb8d4
Packit 1fb8d4
			case 4:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6; /* remember, illegal UTF-8 */
Packit 1fb8d4
Packit 1fb8d4
			case 3:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 2:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 1:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 0:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		ch -= offsetsFromUTF8[extraBytesToRead];
Packit 1fb8d4
Packit Service 5a9772
		if ((target >= end) && (!computeLength))
Packit 1fb8d4
		{
Packit 1fb8d4
			source -= (extraBytesToRead + 1); /* Back up source pointer! */
Packit 1fb8d4
			result = targetExhausted;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		if (ch <= UNI_MAX_BMP)
Packit 1fb8d4
		{
Packit 1fb8d4
			/* Target is a character <= 0xFFFF */
Packit 1fb8d4
			/* UTF-16 surrogate values are illegal in UTF-32 */
Packit 1fb8d4
			if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
Packit 1fb8d4
			{
Packit 1fb8d4
				if (flags == strictConversion)
Packit 1fb8d4
				{
Packit 1fb8d4
					source -= (extraBytesToRead + 1); /* return to the illegal value itself */
Packit 1fb8d4
					result = sourceIllegal;
Packit 1fb8d4
					break;
Packit 1fb8d4
				}
Packit 1fb8d4
				else
Packit 1fb8d4
				{
Packit 1fb8d4
					if (!computeLength)
Packit Service 5a9772
						Data_Write_UINT16(&(*targetStart)[target], UNI_REPLACEMENT_CHAR);
Packit Service 5a9772
					target++;
Packit 1fb8d4
				}
Packit 1fb8d4
			}
Packit 1fb8d4
			else
Packit 1fb8d4
			{
Packit 1fb8d4
				if (!computeLength)
Packit Service 5a9772
					Data_Write_UINT16(&(*targetStart)[target], ch); /* normal case */
Packit Service 5a9772
				target++;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
		else if (ch > UNI_MAX_UTF16)
Packit 1fb8d4
		{
Packit 1fb8d4
			if (flags == strictConversion)
Packit 1fb8d4
			{
Packit 1fb8d4
				result = sourceIllegal;
Packit 1fb8d4
				source -= (extraBytesToRead + 1); /* return to the start */
Packit Service 5a9772
				break;                            /* Bail out; shouldn't continue */
Packit 1fb8d4
			}
Packit 1fb8d4
			else
Packit 1fb8d4
			{
Packit 1fb8d4
				if (!computeLength)
Packit Service 5a9772
					Data_Write_UINT16(&(*targetStart)[target], UNI_REPLACEMENT_CHAR);
Packit Service 5a9772
				target++;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
		else
Packit 1fb8d4
		{
Packit 1fb8d4
			/* target is a character in range 0xFFFF - 0x10FFFF. */
Packit Service 5a9772
			if ((target + 1 >= end) && (!computeLength))
Packit 1fb8d4
			{
Packit 1fb8d4
				source -= (extraBytesToRead + 1); /* Back up source pointer! */
Packit 1fb8d4
				result = targetExhausted;
Packit 1fb8d4
				break;
Packit 1fb8d4
			}
Packit 1fb8d4
Packit 1fb8d4
			ch -= halfBase;
Packit 1fb8d4
Packit 1fb8d4
			if (!computeLength)
Packit 1fb8d4
			{
Packit 1fb8d4
				WCHAR wchar;
Packit 1fb8d4
				wchar = (ch >> halfShift) + UNI_SUR_HIGH_START;
Packit Service 5a9772
				Data_Write_UINT16(&(*targetStart)[target++], wchar);
Packit 1fb8d4
				wchar = (ch & halfMask) + UNI_SUR_LOW_START;
Packit Service 5a9772
				Data_Write_UINT16(&(*targetStart)[target++], wchar);
Packit 1fb8d4
			}
Packit 1fb8d4
			else
Packit 1fb8d4
			{
Packit 1fb8d4
				target++;
Packit 1fb8d4
				target++;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	*sourceStart = source;
Packit Service 5a9772
	if (targetStart && (*targetStart))
Packit Service 5a9772
		*targetStart = &(*targetStart)[target];
Packit Service 5a9772
	else if (targetStart)
Packit Service 5a9772
		*targetStart = (WCHAR*)(target * sizeof(WCHAR));
Packit 1fb8d4
	return result;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit Service 5a9772
ConversionResult ConvertUTF32toUTF8(const DWORD** sourceStart, const DWORD* sourceEnd,
Packit Service 5a9772
                                    BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags)
Packit 1fb8d4
{
Packit 1fb8d4
	ConversionResult result = conversionOK;
Packit 1fb8d4
	const DWORD* source = *sourceStart;
Packit 1fb8d4
	BYTE* target = *targetStart;
Packit 1fb8d4
Packit 1fb8d4
	while (source < sourceEnd)
Packit 1fb8d4
	{
Packit 1fb8d4
		DWORD ch;
Packit 1fb8d4
		unsigned short bytesToWrite = 0;
Packit 1fb8d4
		const DWORD byteMask = 0xBF;
Packit 1fb8d4
		const DWORD byteMark = 0x80;
Packit 1fb8d4
		ch = *source++;
Packit 1fb8d4
Packit 1fb8d4
		if (flags == strictConversion)
Packit 1fb8d4
		{
Packit 1fb8d4
			/* UTF-16 surrogate values are illegal in UTF-32 */
Packit 1fb8d4
			if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
Packit 1fb8d4
			{
Packit 1fb8d4
				--source; /* return to the illegal value itself */
Packit 1fb8d4
				result = sourceIllegal;
Packit 1fb8d4
				break;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		/*
Packit 1fb8d4
		 * Figure out how many bytes the result will require. Turn any
Packit 1fb8d4
		 * illegally large UTF32 things (> Plane 17) into replacement chars.
Packit 1fb8d4
		 */
Packit 1fb8d4
		if (ch < (DWORD)0x80)
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 1;
Packit 1fb8d4
		}
Packit 1fb8d4
		else if (ch < (DWORD)0x800)
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 2;
Packit 1fb8d4
		}
Packit 1fb8d4
		else if (ch < (DWORD)0x10000)
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 3;
Packit 1fb8d4
		}
Packit 1fb8d4
		else if (ch <= UNI_MAX_LEGAL_UTF32)
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 4;
Packit 1fb8d4
		}
Packit 1fb8d4
		else
Packit 1fb8d4
		{
Packit 1fb8d4
			bytesToWrite = 3;
Packit 1fb8d4
			ch = UNI_REPLACEMENT_CHAR;
Packit 1fb8d4
			result = sourceIllegal;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		target += bytesToWrite;
Packit 1fb8d4
Packit 1fb8d4
		if (target > targetEnd)
Packit 1fb8d4
		{
Packit 1fb8d4
			--source; /* Back up source pointer! */
Packit 1fb8d4
			target -= bytesToWrite;
Packit 1fb8d4
			result = targetExhausted;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit Service 5a9772
		switch (bytesToWrite) /* note: everything falls through. */
Packit 1fb8d4
		{
Packit 1fb8d4
			case 4:
Packit 1fb8d4
				*--target = (BYTE)((ch | byteMark) & byteMask);
Packit 1fb8d4
				ch >>= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 3:
Packit 1fb8d4
				*--target = (BYTE)((ch | byteMark) & byteMask);
Packit 1fb8d4
				ch >>= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 2:
Packit 1fb8d4
				*--target = (BYTE)((ch | byteMark) & byteMask);
Packit 1fb8d4
				ch >>= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 1:
Packit 1fb8d4
				*--target = (BYTE)(ch | firstByteMark[bytesToWrite]);
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		target += bytesToWrite;
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	*sourceStart = source;
Packit 1fb8d4
	*targetStart = target;
Packit 1fb8d4
	return result;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* --------------------------------------------------------------------- */
Packit 1fb8d4
Packit Service 5a9772
ConversionResult ConvertUTF8toUTF32(const BYTE** sourceStart, const BYTE* sourceEnd,
Packit Service 5a9772
                                    DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags)
Packit 1fb8d4
{
Packit 1fb8d4
	ConversionResult result = conversionOK;
Packit 1fb8d4
	const BYTE* source = *sourceStart;
Packit 1fb8d4
	DWORD* target = *targetStart;
Packit 1fb8d4
Packit 1fb8d4
	while (source < sourceEnd)
Packit 1fb8d4
	{
Packit 1fb8d4
		DWORD ch = 0;
Packit 1fb8d4
		unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
Packit 1fb8d4
Packit 1fb8d4
		if (source + extraBytesToRead >= sourceEnd)
Packit 1fb8d4
		{
Packit 1fb8d4
			result = sourceExhausted;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		/* Do this check whether lenient or strict */
Packit Service 5a9772
		if (!isLegalUTF8(source, extraBytesToRead + 1))
Packit 1fb8d4
		{
Packit 1fb8d4
			result = sourceIllegal;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		/*
Packit 1fb8d4
		 * The cases all fall through. See "Note A" below.
Packit 1fb8d4
		 */
Packit 1fb8d4
		switch (extraBytesToRead)
Packit 1fb8d4
		{
Packit 1fb8d4
			case 5:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 4:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 3:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 2:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 1:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
				ch <<= 6;
Packit 1fb8d4
Packit 1fb8d4
			case 0:
Packit 1fb8d4
				ch += *source++;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		ch -= offsetsFromUTF8[extraBytesToRead];
Packit 1fb8d4
Packit 1fb8d4
		if (target >= targetEnd)
Packit 1fb8d4
		{
Packit 1fb8d4
			source -= (extraBytesToRead + 1); /* Back up the source pointer! */
Packit 1fb8d4
			result = targetExhausted;
Packit 1fb8d4
			break;
Packit 1fb8d4
		}
Packit 1fb8d4
Packit 1fb8d4
		if (ch <= UNI_MAX_LEGAL_UTF32)
Packit 1fb8d4
		{
Packit 1fb8d4
			/*
Packit 1fb8d4
			 * UTF-16 surrogate values are illegal in UTF-32, and anything
Packit 1fb8d4
			 * over Plane 17 (> 0x10FFFF) is illegal.
Packit 1fb8d4
			 */
Packit 1fb8d4
			if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
Packit 1fb8d4
			{
Packit 1fb8d4
				if (flags == strictConversion)
Packit 1fb8d4
				{
Packit 1fb8d4
					source -= (extraBytesToRead + 1); /* return to the illegal value itself */
Packit 1fb8d4
					result = sourceIllegal;
Packit 1fb8d4
					break;
Packit 1fb8d4
				}
Packit 1fb8d4
				else
Packit 1fb8d4
				{
Packit 1fb8d4
					*target++ = UNI_REPLACEMENT_CHAR;
Packit 1fb8d4
				}
Packit 1fb8d4
			}
Packit 1fb8d4
			else
Packit 1fb8d4
			{
Packit 1fb8d4
				*target++ = ch;
Packit 1fb8d4
			}
Packit 1fb8d4
		}
Packit Service 5a9772
		else /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
Packit 1fb8d4
		{
Packit 1fb8d4
			result = sourceIllegal;
Packit 1fb8d4
			*target++ = UNI_REPLACEMENT_CHAR;
Packit 1fb8d4
		}
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	*sourceStart = source;
Packit 1fb8d4
	*targetStart = target;
Packit 1fb8d4
	return result;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* ---------------------------------------------------------------------
Packit 1fb8d4
Packit 1fb8d4
    Note A.
Packit 1fb8d4
    The fall-through switches in UTF-8 reading code save a
Packit 1fb8d4
    temp variable, some decrements & conditionals.  The switches
Packit 1fb8d4
    are equivalent to the following loop:
Packit 1fb8d4
    {
Packit 1fb8d4
        int tmpBytesToRead = extraBytesToRead+1;
Packit 1fb8d4
        do {
Packit 1fb8d4
        ch += *source++;
Packit 1fb8d4
        --tmpBytesToRead;
Packit 1fb8d4
        if (tmpBytesToRead) ch <<= 6;
Packit 1fb8d4
        } while (tmpBytesToRead > 0);
Packit 1fb8d4
    }
Packit 1fb8d4
    In UTF-8 writing code, the switches on "bytesToWrite" are
Packit 1fb8d4
    similarly unrolled loops.
Packit 1fb8d4
Packit 1fb8d4
   --------------------------------------------------------------------- */
Packit Service 5a9772
#pragma GCC diagnostic pop