Blob Blame History Raw
/* utf8.c - collect routines for conversion to/from utf8 */
#include "config.h"
#include "utf8.h"

/*
 * Convert utf8 to long.
 * On success: update *inptr to be the first nonread character,
 *   set *err to 0, and return the obtained value.
 * On failure: leave *inptr unchanged, set *err to some nonzero error value:
 *   UTF8_BAD: bad utf8, UTF8_SHORT: input too short
 *   and return 0;
 *
 * cnt is either 0 or gives the number of available bytes
 */
unsigned long
from_utf8(char **inptr, int cnt, int *err)
{
	unsigned char *in;
	unsigned int uc, uc2;
	int need, bit, bad = 0;

	in   = (unsigned char *)(*inptr);
	uc   = *in++;
	need = 0;
	bit  = 0x80;
	while (uc & bit) {
		need++;
		bit >>= 1;
	}
	uc &= (bit - 1);
	if (cnt && cnt < need) {
		*err = UTF8_SHORT;
		return 0;
	}
	if (need == 1)
		bad = 1;
	else if (need)
		while (--need) {
			uc2 = *in++;
			if ((uc2 & 0xc0) != 0x80) {
				bad = 1;
				break;
			}
			uc = ((uc << 6) | (uc2 & 0x3f));
		}
	if (bad) {
		*err = UTF8_BAD;
		return 0;
	}
	*inptr = (char *)in;
	*err   = 0;
	return uc;
}