|
Packit |
fd8b60 |
/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
Packit |
fd8b60 |
/*
|
|
Packit |
fd8b60 |
* Copyright (C) 2008 by the Massachusetts Institute of Technology,
|
|
Packit |
fd8b60 |
* Cambridge, MA, USA. All Rights Reserved.
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* This software is being provided to you, the LICENSEE, by the
|
|
Packit |
fd8b60 |
* Massachusetts Institute of Technology (M.I.T.) under the following
|
|
Packit |
fd8b60 |
* license. By obtaining, using and/or copying this software, you agree
|
|
Packit |
fd8b60 |
* that you have read, understood, and will comply with these terms and
|
|
Packit |
fd8b60 |
* conditions:
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* Export of this software from the United States of America may
|
|
Packit |
fd8b60 |
* require a specific license from the United States Government.
|
|
Packit |
fd8b60 |
* It is the responsibility of any person or organization contemplating
|
|
Packit |
fd8b60 |
* export to obtain such a license before exporting.
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* WITHIN THAT CONSTRAINT, permission to use, copy, modify and distribute
|
|
Packit |
fd8b60 |
* this software and its documentation for any purpose and without fee or
|
|
Packit |
fd8b60 |
* royalty is hereby granted, provided that you agree to comply with the
|
|
Packit |
fd8b60 |
* following copyright notice and statements, including the disclaimer, and
|
|
Packit |
fd8b60 |
* that the same appear on ALL copies of the software and documentation,
|
|
Packit |
fd8b60 |
* including modifications that you make for internal use or for
|
|
Packit |
fd8b60 |
* distribution:
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* THIS SOFTWARE IS PROVIDED "AS IS", AND M.I.T. MAKES NO REPRESENTATIONS
|
|
Packit |
fd8b60 |
* OR WARRANTIES, EXPRESS OR IMPLIED. By way of example, but not
|
|
Packit |
fd8b60 |
* limitation, M.I.T. MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
|
Packit |
fd8b60 |
* MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF
|
|
Packit |
fd8b60 |
* THE LICENSED SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
|
|
Packit |
fd8b60 |
* PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* The name of the Massachusetts Institute of Technology or M.I.T. may NOT
|
|
Packit |
fd8b60 |
* be used in advertising or publicity pertaining to distribution of the
|
|
Packit |
fd8b60 |
* software. Title to copyright in this software and any associated
|
|
Packit |
fd8b60 |
* documentation shall at all times remain with M.I.T., and USER agrees to
|
|
Packit |
fd8b60 |
* preserve same.
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* Furthermore if you modify this software you must label
|
|
Packit |
fd8b60 |
* your software as modified software and not distribute it in such a
|
|
Packit |
fd8b60 |
* fashion that it might be confused with the original M.I.T. software.
|
|
Packit |
fd8b60 |
*/
|
|
Packit |
fd8b60 |
/*
|
|
Packit |
fd8b60 |
* Copyright 1998-2008 The OpenLDAP Foundation.
|
|
Packit |
fd8b60 |
* All rights reserved.
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* Redistribution and use in source and binary forms, with or without
|
|
Packit |
fd8b60 |
* modification, are permitted only as authorized by the OpenLDAP
|
|
Packit |
fd8b60 |
* Public License.
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* A copy of this license is available in file LICENSE in the
|
|
Packit |
fd8b60 |
* top-level directory of the distribution or, alternatively, at
|
|
Packit |
fd8b60 |
* <https://www.OpenLDAP.org/license.html>.
|
|
Packit |
fd8b60 |
*/
|
|
Packit |
fd8b60 |
/*
|
|
Packit |
fd8b60 |
* Copyright (C) 2000 Novell, Inc. All Rights Reserved.
|
|
Packit |
fd8b60 |
*
|
|
Packit |
fd8b60 |
* THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND TREATIES.
|
|
Packit |
fd8b60 |
* USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT TO VERSION
|
|
Packit |
fd8b60 |
* 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS AVAILABLE AT
|
|
Packit |
fd8b60 |
* HTTPS://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" IN THE
|
|
Packit |
fd8b60 |
* TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION OF THIS
|
|
Packit |
fd8b60 |
* WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP PUBLIC
|
|
Packit |
fd8b60 |
* LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT THE
|
|
Packit |
fd8b60 |
* PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY.
|
|
Packit |
fd8b60 |
*/
|
|
Packit |
fd8b60 |
/* This work is part of OpenLDAP Software <https://www.openldap.org/>. */
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#ifndef K5_UTF8_H
|
|
Packit |
fd8b60 |
#define K5_UTF8_H
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#include "k5-platform.h"
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
typedef uint16_t krb5_ucs2;
|
|
Packit |
fd8b60 |
typedef uint32_t krb5_ucs4;
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out);
|
|
Packit |
fd8b60 |
size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
|
|
Packit |
fd8b60 |
size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/*
|
|
Packit |
fd8b60 |
* Convert a little-endian UTF-16 string to an allocated null-terminated UTF-8
|
|
Packit |
fd8b60 |
* string. nbytes is the length of ucs2bytes in bytes, and must be an even
|
|
Packit |
fd8b60 |
* number. Return EINVAL on invalid input, ENOMEM on out of memory, or 0 on
|
|
Packit |
fd8b60 |
* success.
|
|
Packit |
fd8b60 |
*/
|
|
Packit |
fd8b60 |
int k5_utf16le_to_utf8(const uint8_t *utf16bytes, size_t nbytes,
|
|
Packit |
fd8b60 |
char **utf8_out);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/*
|
|
Packit |
fd8b60 |
* Convert a UTF-8 string to an allocated little-endian UTF-16 string. The
|
|
Packit |
fd8b60 |
* resulting length is in bytes and will always be even. Return EINVAL on
|
|
Packit |
fd8b60 |
* invalid input, ENOMEM on out of memory, or 0 on success.
|
|
Packit |
fd8b60 |
*/
|
|
Packit |
fd8b60 |
int k5_utf8_to_utf16le(const char *utf8, uint8_t **utf16_out,
|
|
Packit |
fd8b60 |
size_t *nbytes_out);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* returns the number of bytes in the UTF-8 string */
|
|
Packit |
fd8b60 |
size_t krb5int_utf8_bytes(const char *);
|
|
Packit |
fd8b60 |
/* returns the number of UTF-8 characters in the string */
|
|
Packit |
fd8b60 |
size_t krb5int_utf8_chars(const char *);
|
|
Packit |
fd8b60 |
/* returns the number of UTF-8 characters in the counted string */
|
|
Packit |
fd8b60 |
size_t krb5int_utf8c_chars(const char *, size_t);
|
|
Packit |
fd8b60 |
/* returns the length (in bytes) of the UTF-8 character */
|
|
Packit |
fd8b60 |
int krb5int_utf8_offset(const char *);
|
|
Packit |
fd8b60 |
/* returns the length (in bytes) indicated by the UTF-8 character */
|
|
Packit |
fd8b60 |
int krb5int_utf8_charlen(const char *);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* returns the length (in bytes) indicated by the UTF-8 character
|
|
Packit |
fd8b60 |
* also checks that shortest possible encoding was used
|
|
Packit |
fd8b60 |
*/
|
|
Packit |
fd8b60 |
int krb5int_utf8_charlen2(const char *);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* copies a UTF-8 character and returning number of bytes copied */
|
|
Packit |
fd8b60 |
int krb5int_utf8_copy(char *, const char *);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* returns pointer of next UTF-8 character in string */
|
|
Packit |
fd8b60 |
char *krb5int_utf8_next( const char *);
|
|
Packit |
fd8b60 |
/* returns pointer of previous UTF-8 character in string */
|
|
Packit |
fd8b60 |
char *krb5int_utf8_prev( const char *);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* primitive ctype routines -- not aware of non-ascii characters */
|
|
Packit |
fd8b60 |
int krb5int_utf8_isascii( const char *);
|
|
Packit |
fd8b60 |
int krb5int_utf8_isalpha( const char *);
|
|
Packit |
fd8b60 |
int krb5int_utf8_isalnum( const char *);
|
|
Packit |
fd8b60 |
int krb5int_utf8_isdigit( const char *);
|
|
Packit |
fd8b60 |
int krb5int_utf8_isxdigit( const char *);
|
|
Packit |
fd8b60 |
int krb5int_utf8_isspace( const char *);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* span characters not in set, return bytes spanned */
|
|
Packit |
fd8b60 |
size_t krb5int_utf8_strcspn( const char* str, const char *set);
|
|
Packit |
fd8b60 |
/* span characters in set, return bytes spanned */
|
|
Packit |
fd8b60 |
size_t krb5int_utf8_strspn( const char* str, const char *set);
|
|
Packit |
fd8b60 |
/* return first occurance of character in string */
|
|
Packit |
fd8b60 |
char *krb5int_utf8_strchr( const char* str, const char *chr);
|
|
Packit |
fd8b60 |
/* return first character of set in string */
|
|
Packit |
fd8b60 |
char *krb5int_utf8_strpbrk( const char* str, const char *set);
|
|
Packit |
fd8b60 |
/* reentrant tokenizer */
|
|
Packit |
fd8b60 |
char *krb5int_utf8_strtok( char* sp, const char* sep, char **last);
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* Optimizations */
|
|
Packit |
fd8b60 |
extern const char krb5int_utf8_lentab[128];
|
|
Packit |
fd8b60 |
extern const char krb5int_utf8_mintab[32];
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_BV(p) (*(const unsigned char *)(p))
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_ISASCII(p) (!(KRB5_UTF8_BV(p) & 0x80))
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_CHARLEN(p) (KRB5_UTF8_ISASCII(p) ? 1 : \
|
|
Packit |
fd8b60 |
krb5int_utf8_lentab[KRB5_UTF8_BV(p) ^ 0x80])
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* This is like CHARLEN but additionally validates to make sure
|
|
Packit |
fd8b60 |
* the char used the shortest possible encoding.
|
|
Packit |
fd8b60 |
* 'l' is used to temporarily hold the result of CHARLEN.
|
|
Packit |
fd8b60 |
*/
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_CHARLEN2(p, l) ( \
|
|
Packit |
fd8b60 |
((l = KRB5_UTF8_CHARLEN(p)) < 3 || \
|
|
Packit |
fd8b60 |
(krb5int_utf8_mintab[KRB5_UTF8_BV(p) & 0x1f] & (p)[1])) ? \
|
|
Packit |
fd8b60 |
l : 0)
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_OFFSET(p) (KRB5_UTF8_ISASCII(p) \
|
|
Packit |
fd8b60 |
? 1 : krb5int_utf8_offset((p)) )
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_COPY(d,s) (KRB5_UTF8_ISASCII(s) \
|
|
Packit |
fd8b60 |
? (*(d) = *(s), 1) : krb5int_utf8_copy((d),(s)))
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_NEXT(p) (KRB5_UTF8_ISASCII(p) \
|
|
Packit |
fd8b60 |
? (char *)(p)+1 : krb5int_utf8_next((p)))
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_INCR(p) ((p) = KRB5_UTF8_NEXT(p))
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/* For symmetry */
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_PREV(p) (krb5int_utf8_prev((p)))
|
|
Packit |
fd8b60 |
#define KRB5_UTF8_DECR(p) ((p)=KRB5_UTF8_PREV((p)))
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
/*
|
|
Packit |
fd8b60 |
* these macros assume 'x' is an ASCII x
|
|
Packit |
fd8b60 |
* and assume the "C" locale
|
|
Packit |
fd8b60 |
*/
|
|
Packit |
fd8b60 |
#define KRB5_ASCII(c) (!((c) & 0x80))
|
|
Packit |
fd8b60 |
#define KRB5_SPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
|
|
Packit |
fd8b60 |
#define KRB5_DIGIT(c) ((c) >= '0' && (c) <= '9')
|
|
Packit |
fd8b60 |
#define KRB5_LOWER(c) ((c) >= 'a' && (c) <= 'z')
|
|
Packit |
fd8b60 |
#define KRB5_UPPER(c) ((c) >= 'A' && (c) <= 'Z')
|
|
Packit |
fd8b60 |
#define KRB5_ALPHA(c) (KRB5_LOWER(c) || KRB5_UPPER(c))
|
|
Packit |
fd8b60 |
#define KRB5_ALNUM(c) (KRB5_ALPHA(c) || KRB5_DIGIT(c))
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#define KRB5_LDH(c) (KRB5_ALNUM(c) || (c) == '-')
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#define KRB5_HEXLOWER(c) ((c) >= 'a' && (c) <= 'f')
|
|
Packit |
fd8b60 |
#define KRB5_HEXUPPER(c) ((c) >= 'A' && (c) <= 'F')
|
|
Packit |
fd8b60 |
#define KRB5_HEX(c) (KRB5_DIGIT(c) || \
|
|
Packit |
fd8b60 |
KRB5_HEXLOWER(c) || KRB5_HEXUPPER(c))
|
|
Packit |
fd8b60 |
|
|
Packit |
fd8b60 |
#endif /* K5_UTF8_H */
|