Blame winpr/libwinpr/crt/utf.h

Packit 1fb8d4
/*
Packit 1fb8d4
 * Copyright 2001-2004 Unicode, Inc.
Packit 1fb8d4
 * 
Packit 1fb8d4
 * Disclaimer
Packit 1fb8d4
 * 
Packit 1fb8d4
 * This source code is provided as is by Unicode, Inc. No claims are
Packit 1fb8d4
 * made as to fitness for any particular purpose. No warranties of any
Packit 1fb8d4
 * kind are expressed or implied. The recipient agrees to determine
Packit 1fb8d4
 * applicability of information provided. If this file has been
Packit 1fb8d4
 * purchased on magnetic or optical media from Unicode, Inc., the
Packit 1fb8d4
 * sole remedy for any claim will be exchange of defective media
Packit 1fb8d4
 * within 90 days of receipt.
Packit 1fb8d4
 * 
Packit 1fb8d4
 * Limitations on Rights to Redistribute This Code
Packit 1fb8d4
 * 
Packit 1fb8d4
 * Unicode, Inc. hereby grants the right to freely use the information
Packit 1fb8d4
 * supplied in this file in the creation of products supporting the
Packit 1fb8d4
 * Unicode Standard, and to make copies of this file in any form
Packit 1fb8d4
 * for internal or external distribution as long as this notice
Packit 1fb8d4
 * remains attached.
Packit 1fb8d4
 */
Packit 1fb8d4
Packit 1fb8d4
/* ---------------------------------------------------------------------
Packit 1fb8d4
Packit 1fb8d4
    Conversions between UTF32, UTF-16, and UTF-8.  Header file.
Packit 1fb8d4
Packit 1fb8d4
    Several funtions are included here, forming a complete set of
Packit 1fb8d4
    conversions between the three formats.  UTF-7 is not included
Packit 1fb8d4
    here, but is handled in a separate source file.
Packit 1fb8d4
Packit 1fb8d4
    Each of these routines takes pointers to input buffers and output
Packit 1fb8d4
    buffers.  The input buffers are const.
Packit 1fb8d4
Packit 1fb8d4
    Each routine converts the text between *sourceStart and sourceEnd,
Packit 1fb8d4
    putting the result into the buffer between *targetStart and
Packit 1fb8d4
    targetEnd. Note: the end pointers are *after* the last item: e.g. 
Packit 1fb8d4
    *(sourceEnd - 1) is the last item.
Packit 1fb8d4
Packit 1fb8d4
    The return result indicates whether the conversion was successful,
Packit 1fb8d4
    and if not, whether the problem was in the source or target buffers.
Packit 1fb8d4
    (Only the first encountered problem is indicated.)
Packit 1fb8d4
Packit 1fb8d4
    After the conversion, *sourceStart and *targetStart are both
Packit 1fb8d4
    updated to point to the end of last text successfully converted in
Packit 1fb8d4
    the respective buffers.
Packit 1fb8d4
Packit 1fb8d4
    Input parameters:
Packit 1fb8d4
    sourceStart - pointer to a pointer to the source buffer.
Packit 1fb8d4
        The contents of this are modified on return so that
Packit 1fb8d4
        it points at the next thing to be converted.
Packit 1fb8d4
    targetStart - similarly, pointer to pointer to the target buffer.
Packit 1fb8d4
    sourceEnd, targetEnd - respectively pointers to the ends of the
Packit 1fb8d4
        two buffers, for overflow checking only.
Packit 1fb8d4
Packit 1fb8d4
    These conversion functions take a ConversionFlags argument. When this
Packit 1fb8d4
    flag is set to strict, both irregular sequences and isolated surrogates
Packit 1fb8d4
    will cause an error.  When the flag is set to lenient, both irregular
Packit 1fb8d4
    sequences and isolated surrogates are converted.
Packit 1fb8d4
Packit 1fb8d4
    Whether the flag is strict or lenient, all illegal sequences will cause
Packit 1fb8d4
    an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
Packit 1fb8d4
    or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
Packit 1fb8d4
    must check for illegal sequences.
Packit 1fb8d4
Packit 1fb8d4
    When the flag is set to lenient, characters over 0x10FFFF are converted
Packit 1fb8d4
    to the replacement character; otherwise (when the flag is set to strict)
Packit 1fb8d4
    they constitute an error.
Packit 1fb8d4
Packit 1fb8d4
    Output parameters:
Packit 1fb8d4
    The value "sourceIllegal" is returned from some routines if the input
Packit 1fb8d4
    sequence is malformed.  When "sourceIllegal" is returned, the source
Packit 1fb8d4
    value will point to the illegal value that caused the problem. E.g.,
Packit 1fb8d4
    in UTF-8 when a sequence is malformed, it points to the start of the
Packit 1fb8d4
    malformed sequence.  
Packit 1fb8d4
Packit 1fb8d4
    Author: Mark E. Davis, 1994.
Packit 1fb8d4
    Rev History: Rick McGowan, fixes & updates May 2001.
Packit 1fb8d4
         Fixes & updates, Sept 2001.
Packit 1fb8d4
Packit 1fb8d4
------------------------------------------------------------------------ */
Packit 1fb8d4
Packit 1fb8d4
#ifndef FREERDP_UNICODE_CONVERT_UTF_H
Packit 1fb8d4
#define FREERDP_UNICODE_CONVERT_UTF_H
Packit 1fb8d4
Packit 1fb8d4
#include <winpr/wtypes.h>
Packit 1fb8d4
Packit 1fb8d4
/*
Packit 1fb8d4
 * Character Types:
Packit 1fb8d4
 *
Packit 1fb8d4
 * UTF8:	BYTE		8 bits
Packit 1fb8d4
 * UTF16:	WCHAR		16 bits
Packit 1fb8d4
 * UTF32:	DWORD		32 bits
Packit 1fb8d4
 */
Packit 1fb8d4
Packit 1fb8d4
/* Some fundamental constants */
Packit 1fb8d4
#define UNI_REPLACEMENT_CHAR	(DWORD)0x0000FFFD
Packit 1fb8d4
#define UNI_MAX_BMP		(DWORD)0x0000FFFF
Packit 1fb8d4
#define UNI_MAX_UTF16		(DWORD)0x0010FFFF
Packit 1fb8d4
#define UNI_MAX_UTF32		(DWORD)0x7FFFFFFF
Packit 1fb8d4
#define UNI_MAX_LEGAL_UTF32	(DWORD)0x0010FFFF
Packit 1fb8d4
Packit 1fb8d4
typedef enum
Packit 1fb8d4
{
Packit 1fb8d4
 	conversionOK,   /* conversion successful */
Packit 1fb8d4
	sourceExhausted, /* partial character in source, but hit end */
Packit 1fb8d4
	targetExhausted, /* insuff. room in target for conversion */
Packit 1fb8d4
	sourceIllegal  /* source sequence is illegal/malformed */
Packit 1fb8d4
} ConversionResult;
Packit 1fb8d4
Packit 1fb8d4
typedef enum
Packit 1fb8d4
{
Packit 1fb8d4
	strictConversion = 0,
Packit 1fb8d4
	lenientConversion
Packit 1fb8d4
} ConversionFlags;
Packit 1fb8d4
Packit 1fb8d4
/* This is for C++ and does no harm in C */
Packit 1fb8d4
#ifdef __cplusplus
Packit 1fb8d4
extern "C" {
Packit 1fb8d4
#endif
Packit 1fb8d4
Packit 1fb8d4
ConversionResult ConvertUTF8toUTF16(
Packit 1fb8d4
	const BYTE** sourceStart, const BYTE* sourceEnd,
Packit 1fb8d4
	WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags);
Packit 1fb8d4
Packit 1fb8d4
ConversionResult ConvertUTF16toUTF8(
Packit 1fb8d4
	const WCHAR** sourceStart, const WCHAR* sourceEnd,
Packit 1fb8d4
	BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags);
Packit 1fb8d4
Packit 1fb8d4
ConversionResult ConvertUTF8toUTF32(
Packit 1fb8d4
	const BYTE** sourceStart, const BYTE* sourceEnd,
Packit 1fb8d4
	DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags);
Packit 1fb8d4
Packit 1fb8d4
ConversionResult ConvertUTF32toUTF8(
Packit 1fb8d4
	const DWORD** sourceStart, const DWORD* sourceEnd,
Packit 1fb8d4
	BYTE** targetStart, BYTE* targetEnd, ConversionFlags flags);
Packit 1fb8d4
Packit 1fb8d4
ConversionResult ConvertUTF16toUTF32(
Packit 1fb8d4
	const WCHAR** sourceStart, const WCHAR* sourceEnd,
Packit 1fb8d4
	DWORD** targetStart, DWORD* targetEnd, ConversionFlags flags);
Packit 1fb8d4
Packit 1fb8d4
ConversionResult ConvertUTF32toUTF16(
Packit 1fb8d4
	const DWORD** sourceStart, const DWORD* sourceEnd,
Packit 1fb8d4
	WCHAR** targetStart, WCHAR* targetEnd, ConversionFlags flags);
Packit 1fb8d4
Packit 1fb8d4
BOOL isLegalUTF8Sequence(const BYTE *source, const BYTE *sourceEnd);
Packit 1fb8d4
Packit 1fb8d4
#ifdef __cplusplus
Packit 1fb8d4
}
Packit 1fb8d4
#endif
Packit 1fb8d4
Packit 1fb8d4
#endif /* FREERDP_UNICODE_CONVERT_UTF_H */
Packit 1fb8d4