Tree - source-git/freerdp - CentOS Git server

source-git / freerdp

Blame winpr/libwinpr/crt/utf.c

Blob History Raw

Packit	1fb8d4	`/*`
Packit	1fb8d4	`* Copyright 2001-2004 Unicode, Inc.`
Packit	1fb8d4	`*`
Packit	1fb8d4	`* Disclaimer`
Packit	1fb8d4	`*`
Packit	1fb8d4	`* This source code is provided as is by Unicode, Inc. No claims are`
Packit	1fb8d4	`* made as to fitness for any particular purpose. No warranties of any`
Packit	1fb8d4	`* kind are expressed or implied. The recipient agrees to determine`
Packit	1fb8d4	`* applicability of information provided. If this file has been`
Packit	1fb8d4	`* purchased on magnetic or optical media from Unicode, Inc., the`
Packit	1fb8d4	`* sole remedy for any claim will be exchange of defective media`
Packit	1fb8d4	`* within 90 days of receipt.`
Packit	1fb8d4	`*`
Packit	1fb8d4	`* Limitations on Rights to Redistribute This Code`
Packit	1fb8d4	`*`
Packit	1fb8d4	`* Unicode, Inc. hereby grants the right to freely use the information`
Packit	1fb8d4	`* supplied in this file in the creation of products supporting the`
Packit	1fb8d4	`* Unicode Standard, and to make copies of this file in any form`
Packit	1fb8d4	`* for internal or external distribution as long as this notice`
Packit	1fb8d4	`* remains attached.`
Packit	1fb8d4	`*/`
Packit	1fb8d4
Packit	1fb8d4	`/* ---------------------------------------------------------------------`
Packit	1fb8d4
Packit	1fb8d4	`Conversions between UTF32, UTF-16, and UTF-8. Source code file.`
Packit	1fb8d4	`Author: Mark E. Davis, 1994.`
Packit	1fb8d4	`Rev History: Rick McGowan, fixes & updates May 2001.`
Packit	1fb8d4	`Sept 2001: fixed const & error conditions per`
Packit	1fb8d4	`mods suggested by S. Parent & A. Lillich.`
Packit	1fb8d4	`June 2002: Tim Dodd added detection and handling of incomplete`
Packit	1fb8d4	`source sequences, enhanced error detection, added casts`
Packit	1fb8d4	`to eliminate compiler warnings.`
Packit	1fb8d4	`July 2003: slight mods to back out aggressive FFFE detection.`
Packit	1fb8d4	`Jan 2004: updated switches in from-UTF8 conversions.`
Packit	1fb8d4	`Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.`
Packit	1fb8d4
Packit	1fb8d4	`See the header file "utf.h" for complete documentation.`
Packit	1fb8d4
Packit	1fb8d4	`------------------------------------------------------------------------ */`
Packit	1fb8d4
Packit	1fb8d4	`#include "utf.h"`
Packit	1fb8d4	`#include <winpr/endian.h>`
Packit	1fb8d4
Packit	1fb8d4	`static const int halfShift = 10; /* used for shifting by 10 bits */`
Packit	1fb8d4
Packit	1fb8d4	`static const DWORD halfBase = 0x0010000UL;`
Packit	1fb8d4	`static const DWORD halfMask = 0x3FFUL;`
Packit	1fb8d4
Packit	1fb8d4	`#define UNI_SUR_HIGH_START (DWORD)0xD800`
Packit	1fb8d4	`#define UNI_SUR_HIGH_END (DWORD)0xDBFF`
Packit	1fb8d4	`#define UNI_SUR_LOW_START (DWORD)0xDC00`
Packit	1fb8d4	`#define UNI_SUR_LOW_END (DWORD)0xDFFF`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`ConversionResult ConvertUTF32toUTF16(`
Packit	1fb8d4	`const DWORD** sourceStart, const DWORD* sourceEnd,`
Packit	1fb8d4	`WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`ConversionResult result = conversionOK;`
Packit	1fb8d4	`const DWORD* source = *sourceStart;`
Packit	1fb8d4	`WCHAR* target = *targetStart;`
Packit	1fb8d4
Packit	1fb8d4	`while (source < sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`DWORD ch;`
Packit	1fb8d4
Packit	1fb8d4	`if (target >= targetEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`result = targetExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`ch = *source++;`
Packit	1fb8d4
Packit	1fb8d4	`if (ch <= UNI_MAX_BMP) /* Target is a character <= 0xFFFF */`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */`
Packit	1fb8d4	`if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`--source; /* return to the illegal value itself */`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`*target++ = UNI_REPLACEMENT_CHAR;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`target++ = (WCHAR)ch; / normal case */`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (ch > UNI_MAX_LEGAL_UTF32)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`*target++ = UNI_REPLACEMENT_CHAR;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* target is a character in range 0xFFFF - 0x10FFFF. */`
Packit	1fb8d4	`if (target + 1 >= targetEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`--source; /* Back up source pointer! */`
Packit	1fb8d4	`result = targetExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`ch -= halfBase;`
Packit	1fb8d4	`*target++ = (WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START);`
Packit	1fb8d4	`*target++ = (WCHAR)((ch & halfMask) + UNI_SUR_LOW_START);`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`*sourceStart = source;`
Packit	1fb8d4	`*targetStart = target;`
Packit	1fb8d4	`return result;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`ConversionResult ConvertUTF16toUTF32(`
Packit	1fb8d4	`const WCHAR** sourceStart, const WCHAR* sourceEnd,`
Packit	1fb8d4	`DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`ConversionResult result = conversionOK;`
Packit	1fb8d4	`const WCHAR* source = *sourceStart;`
Packit	1fb8d4	`DWORD* target = *targetStart;`
Packit	1fb8d4	`DWORD ch, ch2;`
Packit	1fb8d4
Packit	1fb8d4	`while (source < sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`const WCHAR* oldSource = source; /* In case we have to back up because of target overflow. */`
Packit	1fb8d4	`ch = *source++;`
Packit	1fb8d4
Packit	1fb8d4	`/* If we have a surrogate pair, convert to UTF32 first. */`
Packit	1fb8d4	`if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* If the 16 bits following the high surrogate are in the source buffer... */`
Packit	1fb8d4	`if (source < sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`ch2 = *source;`
Packit	1fb8d4
Packit	1fb8d4	`/* If it's a low surrogate, convert to UTF32. */`
Packit	1fb8d4	`if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`ch = ((ch - UNI_SUR_HIGH_START) << halfShift)`
Packit	1fb8d4	`+ (ch2 - UNI_SUR_LOW_START) + halfBase;`
Packit	1fb8d4	`++source;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (flags == strictConversion) /* it's an unpaired high surrogate */`
Packit	1fb8d4	`{`
Packit	1fb8d4	`--source; /* return to the illegal value itself */`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else /* We don't have the 16 bits following the high surrogate. */`
Packit	1fb8d4	`{`
Packit	1fb8d4	`--source; /* return to the high surrogate */`
Packit	1fb8d4	`result = sourceExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* UTF-16 surrogate values are illegal in UTF-32 */`
Packit	1fb8d4	`if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`--source; /* return to the illegal value itself */`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`if (target >= targetEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`source = oldSource; /* Back up source pointer! */`
Packit	1fb8d4	`result = targetExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`*target++ = ch;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`*sourceStart = source;`
Packit	1fb8d4	`*targetStart = target;`
Packit	1fb8d4	`#ifdef CVTUTF_DEBUG`
Packit	1fb8d4
Packit	1fb8d4	`if (result == sourceIllegal)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`WLOG_WARN(TAG, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x", ch, ch2);`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`#endif`
Packit	1fb8d4	`return result;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* Index into the table below with the first byte of a UTF-8 sequence to`
Packit	1fb8d4	`* get the number of trailing bytes that are supposed to follow it.`
Packit	1fb8d4	`* Note that legal UTF-8 values can't have 4 or 5-bytes. The table is`
Packit	1fb8d4	`* left as-is for anyone who may want to do such conversion, which was`
Packit	1fb8d4	`* allowed in earlier algorithms.`
Packit	1fb8d4	`*/`
Packit	1fb8d4	`static const char trailingBytesForUTF8[256] =`
Packit	1fb8d4	`{`
Packit	1fb8d4	`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
Packit	1fb8d4	`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
Packit	1fb8d4	`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
Packit	1fb8d4	`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
Packit	1fb8d4	`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
Packit	1fb8d4	`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
Packit	1fb8d4	`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
Packit	1fb8d4	`2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5`
Packit	1fb8d4	`};`
Packit	1fb8d4
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* Magic values subtracted from a buffer value during UTF8 conversion.`
Packit	1fb8d4	`* This table contains as many values as there might be trailing bytes`
Packit	1fb8d4	`* in a UTF-8 sequence.`
Packit	1fb8d4	`*/`
Packit	1fb8d4	`static const DWORD offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,`
Packit	1fb8d4	`0x03C82080UL, 0xFA082080UL, 0x82082080UL`
Packit	1fb8d4	`};`
Packit	1fb8d4
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed`
Packit	1fb8d4	`* into the first byte, depending on how many bytes follow. There are`
Packit	1fb8d4	`* as many entries in this table as there are UTF-8 sequence types.`
Packit	1fb8d4	`* (I.e., one byte sequence, two byte... etc.). Remember that sequencs`
Packit	1fb8d4	`* for legal UTF-8 will be 4 or fewer bytes total.`
Packit	1fb8d4	`*/`
Packit	1fb8d4	`static const BYTE firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`/* The interface converts a whole buffer to avoid function-call overhead.`
Packit	1fb8d4	`* Constants have been gathered. Loops & conditionals have been removed as`
Packit	1fb8d4	`* much as possible for efficiency, in favor of drop-through switches.`
Packit	1fb8d4	`* (See "Note A" at the bottom of the file for equivalent code.)`
Packit	1fb8d4	`* If your compiler supports it, the "isLegalUTF8" call can be turned`
Packit	1fb8d4	`* into an inline function.`
Packit	1fb8d4	`*/`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`ConversionResult ConvertUTF16toUTF8(`
Packit	1fb8d4	`const WCHAR** sourceStart, const WCHAR* sourceEnd,`
Packit	1fb8d4	`BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`BYTE* target;`
Packit	1fb8d4	`const WCHAR* source;`
Packit	1fb8d4	`BOOL computeLength;`
Packit	1fb8d4	`ConversionResult result;`
Packit	1fb8d4	`computeLength = (!targetEnd) ? TRUE : FALSE;`
Packit	1fb8d4	`source = *sourceStart;`
Packit	1fb8d4	`target = *targetStart;`
Packit	1fb8d4	`result = conversionOK;`
Packit	1fb8d4
Packit	1fb8d4	`while (source < sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`DWORD ch;`
Packit	1fb8d4	`unsigned short bytesToWrite = 0;`
Packit	1fb8d4	`const DWORD byteMask = 0xBF;`
Packit	1fb8d4	`const DWORD byteMark = 0x80;`
Packit	1fb8d4	`const WCHAR* oldSource = source; /* In case we have to back up because of target overflow. */`
Packit	1fb8d4	`Data_Read_UINT16(source, ch);`
Packit	1fb8d4	`source++;`
Packit	1fb8d4
Packit	1fb8d4	`/* If we have a surrogate pair, convert to UTF32 first. */`
Packit	1fb8d4	`if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* If the 16 bits following the high surrogate are in the source buffer... */`
Packit	1fb8d4	`if (source < sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`DWORD ch2;`
Packit	1fb8d4	`Data_Read_UINT16(source, ch2);`
Packit	1fb8d4
Packit	1fb8d4	`/* If it's a low surrogate, convert to UTF32. */`
Packit	1fb8d4	`if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`ch = ((ch - UNI_SUR_HIGH_START) << halfShift)`
Packit	1fb8d4	`+ (ch2 - UNI_SUR_LOW_START) + halfBase;`
Packit	1fb8d4	`++source;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* it's an unpaired high surrogate */`
Packit	1fb8d4	`--source; /* return to the illegal value itself */`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* We don't have the 16 bits following the high surrogate. */`
Packit	1fb8d4	`--source; /* return to the high surrogate */`
Packit	1fb8d4	`result = sourceExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* UTF-16 surrogate values are illegal in UTF-32 */`
Packit	1fb8d4	`if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`--source; /* return to the illegal value itself */`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* Figure out how many bytes the result will require */`
Packit	1fb8d4	`if (ch < (DWORD) 0x80)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 1;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (ch < (DWORD) 0x800)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 2;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (ch < (DWORD) 0x10000)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 3;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (ch < (DWORD) 0x110000)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 4;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 3;`
Packit	1fb8d4	`ch = UNI_REPLACEMENT_CHAR;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`target += bytesToWrite;`
Packit	1fb8d4
Packit	1fb8d4	`if ((target > targetEnd) && (!computeLength))`
Packit	1fb8d4	`{`
Packit	1fb8d4	`source = oldSource; /* Back up source pointer! */`
Packit	1fb8d4	`target -= bytesToWrite;`
Packit	1fb8d4	`result = targetExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`if (!computeLength)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`switch (bytesToWrite)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* note: everything falls through. */`
Packit	1fb8d4	`case 4:`
Packit	1fb8d4	`*--target = (BYTE)((ch \| byteMark) & byteMask);`
Packit	1fb8d4	`ch >>= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 3:`
Packit	1fb8d4	`*--target = (BYTE)((ch \| byteMark) & byteMask);`
Packit	1fb8d4	`ch >>= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 2:`
Packit	1fb8d4	`*--target = (BYTE)((ch \| byteMark) & byteMask);`
Packit	1fb8d4	`ch >>= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 1:`
Packit	1fb8d4	`*--target = (BYTE)(ch \| firstByteMark[bytesToWrite]);`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`switch (bytesToWrite)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* note: everything falls through. */`
Packit	1fb8d4	`case 4:`
Packit	1fb8d4	`--target;`
Packit	1fb8d4
Packit	1fb8d4	`case 3:`
Packit	1fb8d4	`--target;`
Packit	1fb8d4
Packit	1fb8d4	`case 2:`
Packit	1fb8d4	`--target;`
Packit	1fb8d4
Packit	1fb8d4	`case 1:`
Packit	1fb8d4	`--target;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`target += bytesToWrite;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`*sourceStart = source;`
Packit	1fb8d4	`*targetStart = target;`
Packit	1fb8d4	`return result;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* Utility routine to tell whether a sequence of bytes is legal UTF-8.`
Packit	1fb8d4	`* This must be called with the length pre-determined by the first byte.`
Packit	1fb8d4	`* If not calling this from ConvertUTF8to*, then the length can be set by:`
Packit	1fb8d4	`* length = trailingBytesForUTF8[*source]+1;`
Packit	1fb8d4	`* and the sequence is illegal right away if there aren't that many bytes`
Packit	1fb8d4	`* available.`
Packit	1fb8d4	`* If presented with a length > 4, this returns FALSE. The Unicode`
Packit	1fb8d4	`* definition of UTF-8 goes up to 4-byte sequences.`
Packit	1fb8d4	`*/`
Packit	1fb8d4
Packit	1fb8d4	`static BOOL isLegalUTF8(const BYTE* source, int length)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`BYTE a;`
Packit	1fb8d4	`const BYTE* srcptr = source + length;`
Packit	1fb8d4
Packit	1fb8d4	`switch (length)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`default:`
Packit	1fb8d4	`return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`/* Everything else falls through when "TRUE"... */`
Packit	1fb8d4	`case 4:`
Packit	1fb8d4	`if ((a = (*--srcptr)) < 0x80 \|\| a > 0xBF) return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`case 3:`
Packit	1fb8d4	`if ((a = (*--srcptr)) < 0x80 \|\| a > 0xBF) return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`case 2:`
Packit	1fb8d4	`if ((a = (*--srcptr)) > 0xBF) return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`switch (*source)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* no fall-through in this inner switch */`
Packit	1fb8d4	`case 0xE0:`
Packit	1fb8d4	`if (a < 0xA0) return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`break;`
Packit	1fb8d4
Packit	1fb8d4	`case 0xED:`
Packit	1fb8d4	`if (a > 0x9F) return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`break;`
Packit	1fb8d4
Packit	1fb8d4	`case 0xF0:`
Packit	1fb8d4	`if (a < 0x90) return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`break;`
Packit	1fb8d4
Packit	1fb8d4	`case 0xF4:`
Packit	1fb8d4	`if (a > 0x8F) return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`break;`
Packit	1fb8d4
Packit	1fb8d4	`default:`
Packit	1fb8d4	`if (a < 0x80) return FALSE;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`case 1:`
Packit	1fb8d4	`if (source >= 0x80 && source < 0xC2) return FALSE;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`if (*source > 0xF4)`
Packit	1fb8d4	`return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`return TRUE;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* Exported function to return whether a UTF-8 sequence is legal or not.`
Packit	1fb8d4	`* This is not used here; it's just exported.`
Packit	1fb8d4	`*/`
Packit	1fb8d4	`BOOL isLegalUTF8Sequence(const BYTE* source, const BYTE* sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`int length = trailingBytesForUTF8[*source] + 1;`
Packit	1fb8d4
Packit	1fb8d4	`if (source + length > sourceEnd)`
Packit	1fb8d4	`return FALSE;`
Packit	1fb8d4
Packit	1fb8d4	`return isLegalUTF8(source, length);`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`ConversionResult ConvertUTF8toUTF16(`
Packit	1fb8d4	`const BYTE** sourceStart, const BYTE* sourceEnd,`
Packit	1fb8d4	`WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`WCHAR* target;`
Packit	1fb8d4	`const BYTE* source;`
Packit	1fb8d4	`BOOL computeLength;`
Packit	1fb8d4	`ConversionResult result;`
Packit	1fb8d4	`computeLength = (!targetEnd) ? TRUE : FALSE;`
Packit	1fb8d4	`result = conversionOK;`
Packit	1fb8d4	`source = *sourceStart;`
Packit	1fb8d4	`target = *targetStart;`
Packit	1fb8d4
Packit	1fb8d4	`while (source < sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`DWORD ch = 0;`
Packit	1fb8d4	`unsigned short extraBytesToRead = trailingBytesForUTF8[*source];`
Packit	1fb8d4
Packit	1fb8d4	`if ((source + extraBytesToRead) >= sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`result = sourceExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* Do this check whether lenient or strict */`
Packit	1fb8d4	`if (!isLegalUTF8(source, extraBytesToRead + 1))`
Packit	1fb8d4	`{`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* The cases all fall through. See "Note A" below.`
Packit	1fb8d4	`*/`
Packit	1fb8d4	`switch (extraBytesToRead)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`case 5:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6; /* remember, illegal UTF-8 */`
Packit	1fb8d4
Packit	1fb8d4	`case 4:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6; /* remember, illegal UTF-8 */`
Packit	1fb8d4
Packit	1fb8d4	`case 3:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 2:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 1:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 0:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`ch -= offsetsFromUTF8[extraBytesToRead];`
Packit	1fb8d4
Packit	1fb8d4	`if ((target >= targetEnd) && (!computeLength))`
Packit	1fb8d4	`{`
Packit	1fb8d4	`source -= (extraBytesToRead + 1); /* Back up source pointer! */`
Packit	1fb8d4	`result = targetExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`if (ch <= UNI_MAX_BMP)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* Target is a character <= 0xFFFF */`
Packit	1fb8d4	`/* UTF-16 surrogate values are illegal in UTF-32 */`
Packit	1fb8d4	`if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`source -= (extraBytesToRead + 1); /* return to the illegal value itself */`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`if (!computeLength)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`Data_Write_UINT16(target, UNI_REPLACEMENT_CHAR);`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`if (!computeLength)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`Data_Write_UINT16(target, ch); /* normal case */`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (ch > UNI_MAX_UTF16)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`source -= (extraBytesToRead + 1); /* return to the start */`
Packit	1fb8d4	`break; /* Bail out; shouldn't continue */`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`if (!computeLength)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`Data_Write_UINT16(target, UNI_REPLACEMENT_CHAR);`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* target is a character in range 0xFFFF - 0x10FFFF. */`
Packit	1fb8d4	`if ((target + 1 >= targetEnd) && (!computeLength))`
Packit	1fb8d4	`{`
Packit	1fb8d4	`source -= (extraBytesToRead + 1); /* Back up source pointer! */`
Packit	1fb8d4	`result = targetExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`ch -= halfBase;`
Packit	1fb8d4
Packit	1fb8d4	`if (!computeLength)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`WCHAR wchar;`
Packit	1fb8d4	`wchar = (ch >> halfShift) + UNI_SUR_HIGH_START;`
Packit	1fb8d4	`Data_Write_UINT16(target, wchar);`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`wchar = (ch & halfMask) + UNI_SUR_LOW_START;`
Packit	1fb8d4	`Data_Write_UINT16(target, wchar);`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`target++;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`*sourceStart = source;`
Packit	1fb8d4	`*targetStart = target;`
Packit	1fb8d4	`return result;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`ConversionResult ConvertUTF32toUTF8(`
Packit	1fb8d4	`const DWORD** sourceStart, const DWORD* sourceEnd,`
Packit	1fb8d4	`BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`ConversionResult result = conversionOK;`
Packit	1fb8d4	`const DWORD* source = *sourceStart;`
Packit	1fb8d4	`BYTE* target = *targetStart;`
Packit	1fb8d4
Packit	1fb8d4	`while (source < sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`DWORD ch;`
Packit	1fb8d4	`unsigned short bytesToWrite = 0;`
Packit	1fb8d4	`const DWORD byteMask = 0xBF;`
Packit	1fb8d4	`const DWORD byteMark = 0x80;`
Packit	1fb8d4	`ch = *source++;`
Packit	1fb8d4
Packit	1fb8d4	`if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/* UTF-16 surrogate values are illegal in UTF-32 */`
Packit	1fb8d4	`if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`--source; /* return to the illegal value itself */`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* Figure out how many bytes the result will require. Turn any`
Packit	1fb8d4	`* illegally large UTF32 things (> Plane 17) into replacement chars.`
Packit	1fb8d4	`*/`
Packit	1fb8d4	`if (ch < (DWORD)0x80)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 1;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (ch < (DWORD)0x800)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 2;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (ch < (DWORD)0x10000)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 3;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else if (ch <= UNI_MAX_LEGAL_UTF32)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 4;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`bytesToWrite = 3;`
Packit	1fb8d4	`ch = UNI_REPLACEMENT_CHAR;`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`target += bytesToWrite;`
Packit	1fb8d4
Packit	1fb8d4	`if (target > targetEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`--source; /* Back up source pointer! */`
Packit	1fb8d4	`target -= bytesToWrite;`
Packit	1fb8d4	`result = targetExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`switch (bytesToWrite) /* note: everything falls through. */`
Packit	1fb8d4	`{`
Packit	1fb8d4	`case 4:`
Packit	1fb8d4	`*--target = (BYTE)((ch \| byteMark) & byteMask);`
Packit	1fb8d4	`ch >>= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 3:`
Packit	1fb8d4	`*--target = (BYTE)((ch \| byteMark) & byteMask);`
Packit	1fb8d4	`ch >>= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 2:`
Packit	1fb8d4	`*--target = (BYTE)((ch \| byteMark) & byteMask);`
Packit	1fb8d4	`ch >>= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 1:`
Packit	1fb8d4	`*--target = (BYTE)(ch \| firstByteMark[bytesToWrite]);`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`target += bytesToWrite;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`*sourceStart = source;`
Packit	1fb8d4	`*targetStart = target;`
Packit	1fb8d4	`return result;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* --------------------------------------------------------------------- */`
Packit	1fb8d4
Packit	1fb8d4	`ConversionResult ConvertUTF8toUTF32(`
Packit	1fb8d4	`const BYTE** sourceStart, const BYTE* sourceEnd,`
Packit	1fb8d4	`DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`ConversionResult result = conversionOK;`
Packit	1fb8d4	`const BYTE* source = *sourceStart;`
Packit	1fb8d4	`DWORD* target = *targetStart;`
Packit	1fb8d4
Packit	1fb8d4	`while (source < sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`DWORD ch = 0;`
Packit	1fb8d4	`unsigned short extraBytesToRead = trailingBytesForUTF8[*source];`
Packit	1fb8d4
Packit	1fb8d4	`if (source + extraBytesToRead >= sourceEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`result = sourceExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* Do this check whether lenient or strict */`
Packit	1fb8d4	`if (! isLegalUTF8(source, extraBytesToRead + 1))`
Packit	1fb8d4	`{`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* The cases all fall through. See "Note A" below.`
Packit	1fb8d4	`*/`
Packit	1fb8d4	`switch (extraBytesToRead)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`case 5:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 4:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 3:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 2:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 1:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`ch <<= 6;`
Packit	1fb8d4
Packit	1fb8d4	`case 0:`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`ch -= offsetsFromUTF8[extraBytesToRead];`
Packit	1fb8d4
Packit	1fb8d4	`if (target >= targetEnd)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`source -= (extraBytesToRead + 1); /* Back up the source pointer! */`
Packit	1fb8d4	`result = targetExhausted;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`if (ch <= UNI_MAX_LEGAL_UTF32)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`/*`
Packit	1fb8d4	`* UTF-16 surrogate values are illegal in UTF-32, and anything`
Packit	1fb8d4	`* over Plane 17 (> 0x10FFFF) is illegal.`
Packit	1fb8d4	`*/`
Packit	1fb8d4	`if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`if (flags == strictConversion)`
Packit	1fb8d4	`{`
Packit	1fb8d4	`source -= (extraBytesToRead + 1); /* return to the illegal value itself */`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`break;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`*target++ = UNI_REPLACEMENT_CHAR;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else`
Packit	1fb8d4	`{`
Packit	1fb8d4	`*target++ = ch;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4	`else /* i.e., ch > UNI_MAX_LEGAL_UTF32 */`
Packit	1fb8d4	`{`
Packit	1fb8d4	`result = sourceIllegal;`
Packit	1fb8d4	`*target++ = UNI_REPLACEMENT_CHAR;`
Packit	1fb8d4	`}`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`*sourceStart = source;`
Packit	1fb8d4	`*targetStart = target;`
Packit	1fb8d4	`return result;`
Packit	1fb8d4	`}`
Packit	1fb8d4
Packit	1fb8d4	`/* ---------------------------------------------------------------------`
Packit	1fb8d4
Packit	1fb8d4	`Note A.`
Packit	1fb8d4	`The fall-through switches in UTF-8 reading code save a`
Packit	1fb8d4	`temp variable, some decrements & conditionals. The switches`
Packit	1fb8d4	`are equivalent to the following loop:`
Packit	1fb8d4	`{`
Packit	1fb8d4	`int tmpBytesToRead = extraBytesToRead+1;`
Packit	1fb8d4	`do {`
Packit	1fb8d4	`ch += *source++;`
Packit	1fb8d4	`--tmpBytesToRead;`
Packit	1fb8d4	`if (tmpBytesToRead) ch <<= 6;`
Packit	1fb8d4	`} while (tmpBytesToRead > 0);`
Packit	1fb8d4	`}`
Packit	1fb8d4	`In UTF-8 writing code, the switches on "bytesToWrite" are`
Packit	1fb8d4	`similarly unrolled loops.`
Packit	1fb8d4
Packit	1fb8d4	`--------------------------------------------------------------------- */`

source-git / freerdp

Source Code

Blame winpr/libwinpr/crt/utf.c