Blame Unicode/Unicode.xs

Packit d0f5c2
/*
Packit d0f5c2
 $Id: Unicode.xs,v 2.17 2018/02/08 00:26:15 dankogai Exp $
Packit d0f5c2
 */
Packit d0f5c2
Packit d0f5c2
#define PERL_NO_GET_CONTEXT
Packit d0f5c2
#include "EXTERN.h"
Packit d0f5c2
#include "perl.h"
Packit d0f5c2
#include "XSUB.h"
Packit d0f5c2
#include "../Encode/encode.h"
Packit d0f5c2
Packit d0f5c2
#define FBCHAR			0xFFFd
Packit d0f5c2
#define BOM_BE			0xFeFF
Packit d0f5c2
#define BOM16LE			0xFFFe
Packit d0f5c2
#define BOM32LE			0xFFFe0000
Packit d0f5c2
#define issurrogate(x)		(0xD800 <= (x)  && (x) <= 0xDFFF )
Packit d0f5c2
#define isHiSurrogate(x)	(0xD800 <= (x)  && (x) <  0xDC00 )
Packit d0f5c2
#define isLoSurrogate(x)	(0xDC00 <= (x)  && (x) <= 0xDFFF )
Packit d0f5c2
#define invalid_ucs2(x)         ( issurrogate(x) || 0xFFFF < (x) )
Packit d0f5c2
Packit d0f5c2
/* For pre-5.14 source compatibility */
Packit d0f5c2
#ifndef UNICODE_WARN_ILLEGAL_INTERCHANGE
Packit d0f5c2
#   define UNICODE_WARN_ILLEGAL_INTERCHANGE 0
Packit d0f5c2
#   define UTF8_DISALLOW_SURROGATE 0
Packit d0f5c2
#   define UTF8_WARN_SURROGATE 0
Packit d0f5c2
#   define UTF8_DISALLOW_FE_FF 0
Packit d0f5c2
#   define UTF8_WARN_FE_FF 0
Packit d0f5c2
#   define UTF8_WARN_NONCHAR 0
Packit d0f5c2
#endif
Packit d0f5c2
Packit d0f5c2
#define PERLIO_BUFSIZ 1024 /* XXX value comes from PerlIOEncode_get_base */
Packit d0f5c2
Packit d0f5c2
/* Avoid wasting too much space in the result buffer */
Packit d0f5c2
/* static void */
Packit d0f5c2
/* shrink_buffer(SV *result) */
Packit d0f5c2
/* { */
Packit d0f5c2
/*     if (SvLEN(result) > 42 + SvCUR(result)) { */
Packit d0f5c2
/* 	char *buf; */
Packit d0f5c2
/* 	STRLEN len = 1 + SvCUR(result); /\* include the NUL byte *\/ */
Packit d0f5c2
/* 	New(0, buf, len, char); */
Packit d0f5c2
/* 	Copy(SvPVX(result), buf, len, char); */
Packit d0f5c2
/* 	Safefree(SvPVX(result)); */
Packit d0f5c2
/* 	SvPV_set(result, buf); */
Packit d0f5c2
/* 	SvLEN_set(result, len); */
Packit d0f5c2
/*     } */
Packit d0f5c2
/* } */
Packit d0f5c2
Packit d0f5c2
#define shrink_buffer(result) { \
Packit d0f5c2
    if (SvLEN(result) > 42 + SvCUR(result)) { \
Packit d0f5c2
	char *newpv; \
Packit d0f5c2
	STRLEN newlen = 1 + SvCUR(result); /* include the NUL byte */ \
Packit d0f5c2
	New(0, newpv, newlen, char); \
Packit d0f5c2
	Copy(SvPVX(result), newpv, newlen, char); \
Packit d0f5c2
	Safefree(SvPVX(result)); \
Packit d0f5c2
	SvPV_set(result, newpv); \
Packit d0f5c2
	SvLEN_set(result, newlen); \
Packit d0f5c2
    } \
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
static UV
Packit d0f5c2
enc_unpack(pTHX_ U8 **sp, U8 *e, STRLEN size, U8 endian)
Packit d0f5c2
{
Packit d0f5c2
    U8 *s = *sp;
Packit d0f5c2
    UV v = 0;
Packit d0f5c2
    if (s+size > e) {
Packit d0f5c2
	croak("Partial character %c",(char) endian);
Packit d0f5c2
    }
Packit d0f5c2
    switch(endian) {
Packit d0f5c2
    case 'N':
Packit d0f5c2
	v = *s++;
Packit d0f5c2
	v = (v << 8) | *s++;
Packit d0f5c2
    case 'n':
Packit d0f5c2
	v = (v << 8) | *s++;
Packit d0f5c2
	v = (v << 8) | *s++;
Packit d0f5c2
	break;
Packit d0f5c2
    case 'V':
Packit d0f5c2
    case 'v':
Packit d0f5c2
	v |= *s++;
Packit d0f5c2
	v |= (*s++ << 8);
Packit d0f5c2
	if (endian == 'v')
Packit d0f5c2
	    break;
Packit d0f5c2
	v |= (*s++ << 16);
Packit d0f5c2
	v |= ((UV)*s++ << 24);
Packit d0f5c2
	break;
Packit d0f5c2
    default:
Packit d0f5c2
	croak("Unknown endian %c",(char) endian);
Packit d0f5c2
	break;
Packit d0f5c2
    }
Packit d0f5c2
    *sp = s;
Packit d0f5c2
    return v;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
static void
Packit d0f5c2
enc_pack(pTHX_ SV *result, STRLEN size, U8 endian, UV value)
Packit d0f5c2
{
Packit d0f5c2
    U8 *d = (U8 *) SvPV_nolen(result);
Packit d0f5c2
Packit d0f5c2
    switch(endian) {
Packit d0f5c2
    case 'v':
Packit d0f5c2
    case 'V':
Packit d0f5c2
	d += SvCUR(result);
Packit d0f5c2
	SvCUR_set(result,SvCUR(result)+size);
Packit d0f5c2
	while (size--) {
Packit d0f5c2
	    *d++ = (U8)(value & 0xFF);
Packit d0f5c2
	    value >>= 8;
Packit d0f5c2
	}
Packit d0f5c2
	break;
Packit d0f5c2
    case 'n':
Packit d0f5c2
    case 'N':
Packit d0f5c2
	SvCUR_set(result,SvCUR(result)+size);
Packit d0f5c2
	d += SvCUR(result);
Packit d0f5c2
	while (size--) {
Packit d0f5c2
	    *--d = (U8)(value & 0xFF);
Packit d0f5c2
	    value >>= 8;
Packit d0f5c2
	}
Packit d0f5c2
	break;
Packit d0f5c2
    default:
Packit d0f5c2
	croak("Unknown endian %c",(char) endian);
Packit d0f5c2
	break;
Packit d0f5c2
    }
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
MODULE = Encode::Unicode PACKAGE = Encode::Unicode
Packit d0f5c2
Packit d0f5c2
PROTOTYPES: DISABLE
Packit d0f5c2
Packit d0f5c2
#define attr(k, l)  (hv_exists((HV *)SvRV(obj),k,l) ? \
Packit d0f5c2
    *hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef)
Packit d0f5c2
Packit d0f5c2
void
Packit d0f5c2
decode(obj, str, check = 0)
Packit d0f5c2
SV *	obj
Packit d0f5c2
SV *	str
Packit d0f5c2
IV	check
Packit d0f5c2
CODE:
Packit d0f5c2
{
Packit d0f5c2
    SV *sve      = attr("endian", 6);
Packit d0f5c2
    U8 endian    = *((U8 *)SvPV_nolen(sve));
Packit d0f5c2
    SV *svs      = attr("size", 4);
Packit d0f5c2
    int size     = SvIV(svs);
Packit d0f5c2
    int ucs2     = -1; /* only needed in the event of surrogate pairs */
Packit d0f5c2
    SV *result   = newSVpvn("",0);
Packit d0f5c2
    STRLEN usize = (size > 0 ? size : 1); /* protect against rogue size<=0 */
Packit d0f5c2
    STRLEN ulen;
Packit d0f5c2
    STRLEN resultbuflen;
Packit d0f5c2
    U8 *resultbuf;
Packit d0f5c2
    U8 *s;
Packit d0f5c2
    U8 *e;
Packit d0f5c2
    bool modify = (check && !(check & ENCODE_LEAVE_SRC));
Packit d0f5c2
    bool temp_result;
Packit d0f5c2
Packit d0f5c2
    SvGETMAGIC(str);
Packit d0f5c2
    if (!SvOK(str))
Packit d0f5c2
        XSRETURN_UNDEF;
Packit d0f5c2
    s = modify ? (U8 *)SvPV_force_nomg(str, ulen) : (U8 *)SvPV_nomg(str, ulen);
Packit d0f5c2
    if (SvUTF8(str)) {
Packit d0f5c2
        if (!modify) {
Packit d0f5c2
            SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen));
Packit d0f5c2
            SvUTF8_on(tmp);
Packit d0f5c2
            if (SvTAINTED(str))
Packit d0f5c2
                SvTAINTED_on(tmp);
Packit d0f5c2
            str = tmp;
Packit d0f5c2
            s = (U8 *)SvPVX(str);
Packit d0f5c2
        }
Packit d0f5c2
        if (ulen) {
Packit d0f5c2
            if (!utf8_to_bytes(s, &ulen))
Packit d0f5c2
                croak("Wide character");
Packit d0f5c2
            SvCUR_set(str, ulen);
Packit d0f5c2
        }
Packit d0f5c2
        SvUTF8_off(str);
Packit d0f5c2
    }
Packit d0f5c2
    e = s+ulen;
Packit d0f5c2
Packit d0f5c2
    /* Optimise for the common case of being called from PerlIOEncode_fill()
Packit d0f5c2
       with a standard length buffer. In this case the result SV's buffer is
Packit d0f5c2
       only used temporarily, so we can afford to allocate the maximum needed
Packit d0f5c2
       and not care about unused space. */
Packit d0f5c2
    temp_result = (ulen == PERLIO_BUFSIZ);
Packit d0f5c2
Packit d0f5c2
    ST(0) = sv_2mortal(result);
Packit d0f5c2
    SvUTF8_on(result);
Packit d0f5c2
Packit d0f5c2
    if (!endian && s+size <= e) {
Packit d0f5c2
	SV *sv;
Packit d0f5c2
	UV bom;
Packit d0f5c2
	endian = (size == 4) ? 'N' : 'n';
Packit d0f5c2
	bom = enc_unpack(aTHX_ &s,e,size,endian);
Packit d0f5c2
	if (bom != BOM_BE) {
Packit d0f5c2
	    if (bom == BOM16LE) {
Packit d0f5c2
		endian = 'v';
Packit d0f5c2
	    }
Packit d0f5c2
	    else if (bom == BOM32LE) {
Packit d0f5c2
		endian = 'V';
Packit d0f5c2
	    }
Packit d0f5c2
	    else {
Packit d0f5c2
               /* No BOM found, use big-endian fallback as specified in
Packit d0f5c2
                * RFC2781 and the Unicode Standard version 8.0:
Packit d0f5c2
                *
Packit d0f5c2
                *  The UTF-16 encoding scheme may or may not begin with
Packit d0f5c2
                *  a BOM. However, when there is no BOM, and in the
Packit d0f5c2
                *  absence of a higher-level protocol, the byte order
Packit d0f5c2
                *  of the UTF-16 encoding scheme is big-endian.
Packit d0f5c2
                *
Packit d0f5c2
                *  If the first two octets of the text is not 0xFE
Packit d0f5c2
                *  followed by 0xFF, and is not 0xFF followed by 0xFE,
Packit d0f5c2
                *  then the text SHOULD be interpreted as big-endian.
Packit d0f5c2
                */
Packit d0f5c2
                s -= size;
Packit d0f5c2
	    }
Packit d0f5c2
	}
Packit d0f5c2
#if 1
Packit d0f5c2
	/* Update endian for next sequence */
Packit d0f5c2
	sv = attr("renewed", 7);
Packit d0f5c2
	if (SvTRUE(sv)) {
Packit d0f5c2
	    (void)hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
Packit d0f5c2
	}
Packit d0f5c2
#endif
Packit d0f5c2
    }
Packit d0f5c2
Packit d0f5c2
    if (temp_result) {
Packit d0f5c2
	resultbuflen = 1 + ulen/usize * UTF8_MAXLEN;
Packit d0f5c2
    } else {
Packit d0f5c2
	/* Preallocate the buffer to the minimum possible space required. */
Packit d0f5c2
	resultbuflen = ulen/usize + UTF8_MAXLEN + 1;
Packit d0f5c2
    }
Packit d0f5c2
    resultbuf = (U8 *) SvGROW(result, resultbuflen);
Packit d0f5c2
Packit d0f5c2
    while (s < e && s+size <= e) {
Packit d0f5c2
	UV ord = enc_unpack(aTHX_ &s,e,size,endian);
Packit d0f5c2
	U8 *d;
Packit d0f5c2
	if (issurrogate(ord)) {
Packit d0f5c2
	    if (ucs2 == -1) {
Packit d0f5c2
		SV *sv = attr("ucs2", 4);
Packit d0f5c2
		ucs2 = SvTRUE(sv);
Packit d0f5c2
	    }
Packit d0f5c2
	    if (ucs2 || size == 4) {
Packit d0f5c2
		if (check) {
Packit d0f5c2
		    croak("%" SVf ":no surrogates allowed %" UVxf,
Packit d0f5c2
			  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
Packit d0f5c2
			  ord);
Packit d0f5c2
		}
Packit d0f5c2
		ord = FBCHAR;
Packit d0f5c2
	    }
Packit d0f5c2
	    else {
Packit d0f5c2
		UV lo;
Packit d0f5c2
		if (!isHiSurrogate(ord)) {
Packit d0f5c2
		    if (check) {
Packit d0f5c2
			croak("%" SVf ":Malformed HI surrogate %" UVxf,
Packit d0f5c2
			      *hv_fetch((HV *)SvRV(obj),"Name",4,0),
Packit d0f5c2
			      ord);
Packit d0f5c2
		    }
Packit d0f5c2
		    else {
Packit d0f5c2
			ord = FBCHAR;
Packit d0f5c2
		    }
Packit d0f5c2
		}
Packit d0f5c2
		else if (s+size > e) {
Packit d0f5c2
		    if (check) {
Packit d0f5c2
		        if (check & ENCODE_STOP_AT_PARTIAL) {
Packit d0f5c2
		             s -= size;
Packit d0f5c2
		             break;
Packit d0f5c2
		        }
Packit d0f5c2
		        else {
Packit d0f5c2
		             croak("%" SVf ":Malformed HI surrogate %" UVxf,
Packit d0f5c2
				   *hv_fetch((HV *)SvRV(obj),"Name",4,0),
Packit d0f5c2
				   ord);
Packit d0f5c2
		        }
Packit d0f5c2
		    }
Packit d0f5c2
		    else {
Packit d0f5c2
		        ord = FBCHAR;
Packit d0f5c2
		    }
Packit d0f5c2
		}
Packit d0f5c2
		else {
Packit d0f5c2
		    lo = enc_unpack(aTHX_ &s,e,size,endian);
Packit d0f5c2
		    if (!isLoSurrogate(lo)) {
Packit d0f5c2
			if (check) {
Packit d0f5c2
			    croak("%" SVf ":Malformed LO surrogate %" UVxf,
Packit d0f5c2
				  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
Packit d0f5c2
				  ord);
Packit d0f5c2
			}
Packit d0f5c2
			else {
Packit d0f5c2
			    s -= size;
Packit d0f5c2
			    ord = FBCHAR;
Packit d0f5c2
			}
Packit d0f5c2
		    }
Packit d0f5c2
		    else {
Packit d0f5c2
			ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
Packit d0f5c2
		    }
Packit d0f5c2
		}
Packit d0f5c2
	    }
Packit d0f5c2
	}
Packit d0f5c2
Packit d0f5c2
	if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
Packit d0f5c2
	    if (check) {
Packit d0f5c2
		croak("%" SVf ":Unicode character %" UVxf " is illegal",
Packit d0f5c2
		      *hv_fetch((HV *)SvRV(obj),"Name",4,0),
Packit d0f5c2
		      ord);
Packit d0f5c2
	    } else {
Packit d0f5c2
		ord = FBCHAR;
Packit d0f5c2
	    }
Packit d0f5c2
	}
Packit d0f5c2
Packit d0f5c2
	if (resultbuflen < SvCUR(result) + UTF8_MAXLEN + 1) {
Packit d0f5c2
	    /* Do not allocate >8Mb more than the minimum needed.
Packit d0f5c2
	       This prevents allocating too much in the rogue case of a large
Packit d0f5c2
	       input consisting initially of long sequence uft8-byte unicode
Packit d0f5c2
	       chars followed by single utf8-byte chars. */
Packit d0f5c2
            /* +1 
Packit d0f5c2
               fixes  Unicode.xs!decode_xs n-byte heap-overflow
Packit d0f5c2
              */
Packit d0f5c2
	    STRLEN remaining = (e - s)/usize + 1; /* +1 to avoid the leak */
Packit d0f5c2
	    STRLEN max_alloc = remaining + (8*1024*1024);
Packit d0f5c2
	    STRLEN est_alloc = remaining * UTF8_MAXLEN;
Packit d0f5c2
	    STRLEN newlen = SvLEN(result) + /* min(max_alloc, est_alloc) */
Packit d0f5c2
		(est_alloc > max_alloc ? max_alloc : est_alloc);
Packit d0f5c2
	    resultbuf = (U8 *) SvGROW(result, newlen);
Packit d0f5c2
	    resultbuflen = SvLEN(result);
Packit d0f5c2
	}
Packit d0f5c2
Packit d0f5c2
	d = uvchr_to_utf8_flags(resultbuf+SvCUR(result), ord,
Packit d0f5c2
                                            UNICODE_WARN_ILLEGAL_INTERCHANGE);
Packit d0f5c2
	SvCUR_set(result, d - (U8 *)SvPVX(result));
Packit d0f5c2
    }
Packit d0f5c2
Packit d0f5c2
    if (s < e) {
Packit d0f5c2
	/* unlikely to happen because it's fixed-length -- dankogai */
Packit d0f5c2
	if (check & ENCODE_WARN_ON_ERR) {
Packit d0f5c2
	    Perl_warner(aTHX_ packWARN(WARN_UTF8),"%" SVf ":Partial character",
Packit d0f5c2
			*hv_fetch((HV *)SvRV(obj),"Name",4,0));
Packit d0f5c2
	}
Packit d0f5c2
    }
Packit d0f5c2
    if (check && !(check & ENCODE_LEAVE_SRC)) {
Packit d0f5c2
	if (s < e) {
Packit d0f5c2
	    Move(s,SvPVX(str),e-s,U8);
Packit d0f5c2
	    SvCUR_set(str,(e-s));
Packit d0f5c2
	}
Packit d0f5c2
	else {
Packit d0f5c2
	    SvCUR_set(str,0);
Packit d0f5c2
	}
Packit d0f5c2
	*SvEND(str) = '\0';
Packit d0f5c2
	SvSETMAGIC(str);
Packit d0f5c2
    }
Packit d0f5c2
Packit d0f5c2
    if (!temp_result) shrink_buffer(result);
Packit d0f5c2
    if (SvTAINTED(str)) SvTAINTED_on(result); /* propagate taintedness */
Packit d0f5c2
    XSRETURN(1);
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
void
Packit d0f5c2
encode(obj, utf8, check = 0)
Packit d0f5c2
SV *	obj
Packit d0f5c2
SV *	utf8
Packit d0f5c2
IV	check
Packit d0f5c2
CODE:
Packit d0f5c2
{
Packit d0f5c2
    SV *sve = attr("endian", 6);
Packit d0f5c2
    U8 endian = *((U8 *)SvPV_nolen(sve));
Packit d0f5c2
    SV *svs = attr("size", 4);
Packit d0f5c2
    const int size = SvIV(svs);
Packit d0f5c2
    int ucs2 = -1; /* only needed if there is invalid_ucs2 input */
Packit d0f5c2
    const STRLEN usize = (size > 0 ? size : 1);
Packit d0f5c2
    SV *result = newSVpvn("", 0);
Packit d0f5c2
    STRLEN ulen;
Packit d0f5c2
    U8 *s;
Packit d0f5c2
    U8 *e;
Packit d0f5c2
    bool modify = (check && !(check & ENCODE_LEAVE_SRC));
Packit d0f5c2
    bool temp_result;
Packit d0f5c2
Packit d0f5c2
    SvGETMAGIC(utf8);
Packit d0f5c2
    if (!SvOK(utf8))
Packit d0f5c2
        XSRETURN_UNDEF;
Packit d0f5c2
    s = modify ? (U8 *)SvPV_force_nomg(utf8, ulen) : (U8 *)SvPV_nomg(utf8, ulen);
Packit d0f5c2
    if (!SvUTF8(utf8)) {
Packit d0f5c2
        if (!modify) {
Packit d0f5c2
            SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen));
Packit d0f5c2
            if (SvTAINTED(utf8))
Packit d0f5c2
                SvTAINTED_on(tmp);
Packit d0f5c2
            utf8 = tmp;
Packit d0f5c2
        }
Packit d0f5c2
        sv_utf8_upgrade_nomg(utf8);
Packit d0f5c2
        s = (U8 *)SvPV_nomg(utf8, ulen);
Packit d0f5c2
    }
Packit d0f5c2
    e = s+ulen;
Packit d0f5c2
Packit d0f5c2
    /* Optimise for the common case of being called from PerlIOEncode_flush()
Packit d0f5c2
       with a standard length buffer. In this case the result SV's buffer is
Packit d0f5c2
       only used temporarily, so we can afford to allocate the maximum needed
Packit d0f5c2
       and not care about unused space. */
Packit d0f5c2
    temp_result = (ulen == PERLIO_BUFSIZ);
Packit d0f5c2
Packit d0f5c2
    ST(0) = sv_2mortal(result);
Packit d0f5c2
Packit d0f5c2
    /* Preallocate the result buffer to the maximum possible size.
Packit d0f5c2
       ie. assume each UTF8 byte is 1 character.
Packit d0f5c2
       Then shrink the result's buffer if necesary at the end. */
Packit d0f5c2
    SvGROW(result, ((ulen+1) * usize));
Packit d0f5c2
Packit d0f5c2
    if (!endian) {
Packit d0f5c2
	SV *sv;
Packit d0f5c2
	endian = (size == 4) ? 'N' : 'n';
Packit d0f5c2
	enc_pack(aTHX_ result,size,endian,BOM_BE);
Packit d0f5c2
#if 1
Packit d0f5c2
	/* Update endian for next sequence */
Packit d0f5c2
	sv = attr("renewed", 7);
Packit d0f5c2
	if (SvTRUE(sv)) {
Packit d0f5c2
	    (void)hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
Packit d0f5c2
	}
Packit d0f5c2
#endif
Packit d0f5c2
    }
Packit d0f5c2
    while (s < e && s+UTF8SKIP(s) <= e) {
Packit d0f5c2
	STRLEN len;
Packit d0f5c2
	UV ord = utf8n_to_uvchr(s, e-s, &len, (UTF8_DISALLOW_SURROGATE
Packit d0f5c2
                                               |UTF8_WARN_SURROGATE
Packit d0f5c2
                                               |UTF8_DISALLOW_FE_FF
Packit d0f5c2
                                               |UTF8_WARN_FE_FF
Packit d0f5c2
                                               |UTF8_WARN_NONCHAR));
Packit d0f5c2
	s += len;
Packit d0f5c2
	if (size != 4 && invalid_ucs2(ord)) {
Packit d0f5c2
	    if (!issurrogate(ord)) {
Packit d0f5c2
		if (ucs2 == -1) {
Packit d0f5c2
		    SV *sv = attr("ucs2", 4);
Packit d0f5c2
		    ucs2 = SvTRUE(sv);
Packit d0f5c2
		}
Packit d0f5c2
		if (ucs2 || ord > 0x10FFFF) {
Packit d0f5c2
		    if (check) {
Packit d0f5c2
			croak("%" SVf ":code point \"\\x{%" UVxf "}\" too high",
Packit d0f5c2
				  *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
Packit d0f5c2
		    }
Packit d0f5c2
		    enc_pack(aTHX_ result,size,endian,FBCHAR);
Packit d0f5c2
		} else {
Packit d0f5c2
		    UV hi = ((ord - 0x10000) >> 10)   + 0xD800;
Packit d0f5c2
		    UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00;
Packit d0f5c2
		    enc_pack(aTHX_ result,size,endian,hi);
Packit d0f5c2
		    enc_pack(aTHX_ result,size,endian,lo);
Packit d0f5c2
		}
Packit d0f5c2
	    }
Packit d0f5c2
	    else {
Packit d0f5c2
		/* not supposed to happen */
Packit d0f5c2
		enc_pack(aTHX_ result,size,endian,FBCHAR);
Packit d0f5c2
	    }
Packit d0f5c2
	}
Packit d0f5c2
	else {
Packit d0f5c2
	    enc_pack(aTHX_ result,size,endian,ord);
Packit d0f5c2
	}
Packit d0f5c2
    }
Packit d0f5c2
    if (s < e) {
Packit d0f5c2
	/* UTF-8 partial char happens often on PerlIO.
Packit d0f5c2
	   Since this is okay and normal, we do not warn.
Packit d0f5c2
	   But this is critical when you choose to LEAVE_SRC
Packit d0f5c2
	   in which case we die */
Packit d0f5c2
	if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)) {
Packit d0f5c2
	    Perl_croak(aTHX_ "%" SVf ":partial character is not allowed "
Packit d0f5c2
		       "when CHECK = 0x%" UVuf,
Packit d0f5c2
		       *hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
Packit d0f5c2
	}
Packit d0f5c2
    }
Packit d0f5c2
    if (check && !(check & ENCODE_LEAVE_SRC)) {
Packit d0f5c2
	if (s < e) {
Packit d0f5c2
	    Move(s,SvPVX(utf8),e-s,U8);
Packit d0f5c2
	    SvCUR_set(utf8,(e-s));
Packit d0f5c2
	}
Packit d0f5c2
	else {
Packit d0f5c2
	    SvCUR_set(utf8,0);
Packit d0f5c2
	}
Packit d0f5c2
	*SvEND(utf8) = '\0';
Packit d0f5c2
	SvSETMAGIC(utf8);
Packit d0f5c2
    }
Packit d0f5c2
Packit d0f5c2
    if (!temp_result) shrink_buffer(result);
Packit d0f5c2
    if (SvTAINTED(utf8)) SvTAINTED_on(result); /* propagate taintedness */
Packit d0f5c2
Packit d0f5c2
    XSRETURN(1);
Packit d0f5c2
}