Blame iconvdata/iso-2022-cn-ext.c

Packit 6c4009
/* Conversion module for ISO-2022-CN-EXT.
Packit 6c4009
   Copyright (C) 2000-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <dlfcn.h>
Packit 6c4009
#include <gconv.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
#include <stdlib.h>
Packit 6c4009
#include <string.h>
Packit 6c4009
#include "gb2312.h"
Packit 6c4009
#include "iso-ir-165.h"
Packit 6c4009
#include "cns11643.h"
Packit 6c4009
#include "cns11643l1.h"
Packit 6c4009
#include "cns11643l2.h"
Packit 6c4009
#include <libc-diag.h>
Packit 6c4009
Packit 6c4009
#include <assert.h>
Packit 6c4009
Packit 6c4009
/* This makes obvious what everybody knows: 0x1b is the Esc character.  */
Packit 6c4009
#define ESC	0x1b
Packit 6c4009
Packit 6c4009
/* We have single-byte shift-in and shift-out sequences, and the single
Packit 6c4009
   shift sequences SS2 and SS3 which replaces the SS2/SS3 designation for
Packit 6c4009
   the next two bytes.  */
Packit 6c4009
#define SI	0x0f
Packit 6c4009
#define SO	0x0e
Packit 6c4009
#define SS2_0	ESC
Packit 6c4009
#define SS2_1	0x4e
Packit 6c4009
#define SS3_0	ESC
Packit 6c4009
#define SS3_1	0x4f
Packit 6c4009
Packit 6c4009
/* Definitions used in the body of the `gconv' function.  */
Packit 6c4009
#define CHARSET_NAME		"ISO-2022-CN-EXT//"
Packit 6c4009
#define DEFINE_INIT		1
Packit 6c4009
#define DEFINE_FINI		1
Packit 6c4009
#define ONE_DIRECTION		0
Packit 6c4009
#define FROM_LOOP		from_iso2022cn_ext_loop
Packit 6c4009
#define TO_LOOP			to_iso2022cn_ext_loop
Packit 6c4009
#define FROM_LOOP_MIN_NEEDED_FROM	1
Packit 6c4009
#define FROM_LOOP_MAX_NEEDED_FROM	4
Packit 6c4009
#define FROM_LOOP_MIN_NEEDED_TO		4
Packit 6c4009
#define FROM_LOOP_MAX_NEEDED_TO		4
Packit 6c4009
#define TO_LOOP_MIN_NEEDED_FROM		4
Packit 6c4009
#define TO_LOOP_MAX_NEEDED_FROM		4
Packit 6c4009
#define TO_LOOP_MIN_NEEDED_TO		1
Packit 6c4009
#define TO_LOOP_MAX_NEEDED_TO		6
Packit 6c4009
#define PREPARE_LOOP \
Packit 6c4009
  int save_set;								      \
Packit 6c4009
  int *setp = &data->__statep->__count;
Packit 6c4009
#define EXTRA_LOOP_ARGS		, setp
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* The charsets GB/T 12345-90, GB 7589-87, GB/T 13131-9X, GB 7590-87,
Packit 6c4009
   and GB/T 13132-9X are not registered to the best of my knowledge and
Packit 6c4009
   therefore have no escape sequence assigned.  We cannot handle them
Packit 6c4009
   for this reason.  Tell the implementation about this.  */
Packit 6c4009
#define X12345	'\0'
Packit 6c4009
#define X7589	'\0'
Packit 6c4009
#define X13131	'\0'
Packit 6c4009
#define X7590	'\0'
Packit 6c4009
#define X13132	'\0'
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* The COUNT element of the state keeps track of the currently selected
Packit 6c4009
   character set.  The possible values are:  */
Packit 6c4009
enum
Packit 6c4009
{
Packit 6c4009
  ASCII_set = 0,
Packit 6c4009
  GB2312_set,
Packit 6c4009
  GB12345_set,
Packit 6c4009
  CNS11643_1_set,
Packit 6c4009
  ISO_IR_165_set,
Packit 6c4009
  SO_mask = 7,
Packit 6c4009
Packit 6c4009
  GB7589_set = 1 << 3,
Packit 6c4009
  GB13131_set = 2 << 3,
Packit 6c4009
  CNS11643_2_set = 3 << 3,
Packit 6c4009
  SS2_mask = 3 << 3,
Packit 6c4009
Packit 6c4009
  GB7590_set = 1 << 5,
Packit 6c4009
  GB13132_set = 2 << 5,
Packit 6c4009
  CNS11643_3_set = 3 << 5,
Packit 6c4009
  CNS11643_4_set = 4 << 5,
Packit 6c4009
  CNS11643_5_set = 5 << 5,
Packit 6c4009
  CNS11643_6_set = 6 << 5,
Packit 6c4009
  CNS11643_7_set = 7 << 5,
Packit 6c4009
  SS3_mask = 7 << 5,
Packit 6c4009
Packit 6c4009
#define CURRENT_MASK (SO_mask | SS2_mask | SS3_mask)
Packit 6c4009
Packit 6c4009
  GB2312_ann = 1 << 8,
Packit 6c4009
  GB12345_ann = 2 << 8,
Packit 6c4009
  CNS11643_1_ann = 3 << 8,
Packit 6c4009
  ISO_IR_165_ann = 4 << 8,
Packit 6c4009
  SO_ann = 7 << 8,
Packit 6c4009
Packit 6c4009
  GB7589_ann = 1 << 11,
Packit 6c4009
  GB13131_ann = 2 << 11,
Packit 6c4009
  CNS11643_2_ann = 3 << 11,
Packit 6c4009
  SS2_ann = 3 << 11,
Packit 6c4009
Packit 6c4009
  GB7590_ann = 1 << 13,
Packit 6c4009
  GB13132_ann = 2 << 13,
Packit 6c4009
  CNS11643_3_ann = 3 << 13,
Packit 6c4009
  CNS11643_4_ann = 4 << 13,
Packit 6c4009
  CNS11643_5_ann = 5 << 13,
Packit 6c4009
  CNS11643_6_ann = 6 << 13,
Packit 6c4009
  CNS11643_7_ann = 7 << 13,
Packit 6c4009
  SS3_ann = 7 << 13
Packit 6c4009
};
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Since this is a stateful encoding we have to provide code which resets
Packit 6c4009
   the output state to the initial state.  This has to be done during the
Packit 6c4009
   flushing.  */
Packit 6c4009
#define EMIT_SHIFT_TO_INIT \
Packit 6c4009
  if (data->__statep->__count >> 3 != ASCII_set)			      \
Packit 6c4009
    {									      \
Packit 6c4009
      if (FROM_DIRECTION)						      \
Packit 6c4009
	/* It's easy, we don't have to emit anything, we just reset the	      \
Packit 6c4009
	   state for the input.  */					      \
Packit 6c4009
	data->__statep->__count = ASCII_set << 3;			      \
Packit 6c4009
      else								      \
Packit 6c4009
	{								      \
Packit 6c4009
	  /* We are not in the initial state.  To switch back we have	      \
Packit 6c4009
	     to emit `SI'.  */						      \
Packit 6c4009
	  if (__glibc_unlikely (outbuf == outend))			      \
Packit 6c4009
	    /* We don't have enough room in the output buffer.  */	      \
Packit 6c4009
	    status = __GCONV_FULL_OUTPUT;				      \
Packit 6c4009
	  else								      \
Packit 6c4009
	    {								      \
Packit 6c4009
	      /* Write out the shift sequence.  */			      \
Packit 6c4009
	      *outbuf++ = SI;						      \
Packit 6c4009
	      if (data->__flags & __GCONV_IS_LAST)			      \
Packit 6c4009
		*irreversible += 1;					      \
Packit 6c4009
	      data->__statep->__count = ASCII_set << 3;			      \
Packit 6c4009
	    }								      \
Packit 6c4009
	}								      \
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Since we might have to reset input pointer we must be able to save
Packit 6c4009
   and retore the state.  */
Packit 6c4009
#define SAVE_RESET_STATE(Save) \
Packit 6c4009
  if (Save)								      \
Packit 6c4009
    save_set = *setp;							      \
Packit 6c4009
  else									      \
Packit 6c4009
    *setp = save_set
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* First define the conversion function from ISO-2022-CN to UCS4.  */
Packit 6c4009
#define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM
Packit 6c4009
#define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM
Packit 6c4009
#define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO
Packit 6c4009
#define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO
Packit 6c4009
#define LOOPFCT			FROM_LOOP
Packit 6c4009
#define BODY \
Packit 6c4009
  {									      \
Packit 6c4009
    uint32_t ch = *inptr;						      \
Packit 6c4009
									      \
Packit 6c4009
    /* This is a 7bit character set, disallow all 8bit characters.  */	      \
Packit 6c4009
    if (ch > 0x7f)							      \
Packit 6c4009
      STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
Packit 6c4009
									      \
Packit 6c4009
    /* Recognize escape sequences.  */					      \
Packit 6c4009
    if (ch == ESC)							      \
Packit 6c4009
      {									      \
Packit 6c4009
	/* There are three kinds of escape sequences we have to handle:	      \
Packit 6c4009
	   - those announcing the use of GB and CNS characters on the	      \
Packit 6c4009
	     line; we can simply ignore them				      \
Packit 6c4009
	   - the initial byte of the SS2 sequence.			      \
Packit 6c4009
	   - the initial byte of the SS3 sequence.			      \
Packit 6c4009
	*/								      \
Packit 6c4009
	if (inptr + 2 > inend						      \
Packit 6c4009
	    || (inptr[1] == '$'						      \
Packit 6c4009
		&& (inptr + 3 > inend					      \
Packit 6c4009
		    || (inptr[2] == ')' && inptr + 4 > inend)		      \
Packit 6c4009
		    || (inptr[2] == '*' && inptr + 4 > inend)		      \
Packit 6c4009
		    || (inptr[2] == '+' && inptr + 4 > inend)))		      \
Packit 6c4009
	    || (inptr[1] == SS2_1 && inptr + 4 > inend)			      \
Packit 6c4009
	    || (inptr[1] == SS3_1 && inptr + 4 > inend))		      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    result = __GCONV_INCOMPLETE_INPUT;				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  }								      \
Packit 6c4009
	if (inptr[1] == '$'						      \
Packit 6c4009
	    && ((inptr[2] == ')'					      \
Packit 6c4009
		 && (inptr[3] == 'A'					      \
Packit 6c4009
		     || (X12345 != '\0' && inptr[3] == X12345)		      \
Packit 6c4009
		     || inptr[3] == 'E' || inptr[3] == 'G'))		      \
Packit 6c4009
		|| (inptr[2] == '*'					      \
Packit 6c4009
		    && ((X7589 != '\0' && inptr[3] == X7589)		      \
Packit 6c4009
			|| (X13131 != '\0' && inptr[3] == X13131)	      \
Packit 6c4009
			|| inptr[3] == 'H'))				      \
Packit 6c4009
		|| (inptr[2] == '+'					      \
Packit 6c4009
		    && ((X7590 != '\0' && inptr[3] == X7590)		      \
Packit 6c4009
			|| (X13132 != '\0' && inptr[3] == X13132)	      \
Packit 6c4009
			|| inptr[3] == 'I' || inptr[3] == 'J'		      \
Packit 6c4009
			|| inptr[3] == 'K' || inptr[3] == 'L'		      \
Packit 6c4009
			|| inptr[3] == 'M'))))				      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    /* OK, we accept those character sets.  */			      \
Packit 6c4009
	    if (inptr[3] == 'A')					      \
Packit 6c4009
	      ann = (ann & ~SO_ann) | GB2312_ann;			      \
Packit 6c4009
	    else if (inptr[3] == 'G')					      \
Packit 6c4009
	      ann = (ann & ~SO_ann) | CNS11643_1_ann;			      \
Packit 6c4009
	    else if (inptr[3] == 'E')					      \
Packit 6c4009
	      ann = (ann & ~SO_ann) | ISO_IR_165_ann;			      \
Packit 6c4009
	    else if (X12345 != '\0' && inptr[3] == X12345)		      \
Packit 6c4009
	      ann = (ann & ~SO_ann) | GB12345_ann;			      \
Packit 6c4009
	    else if (inptr[3] == 'H')					      \
Packit 6c4009
	      ann = (ann & ~SS2_ann) | CNS11643_2_ann;			      \
Packit 6c4009
	    else if (X7589 != '\0' && inptr[3] == X7589)		      \
Packit 6c4009
	      ann = (ann & ~SS2_ann) | GB7589_ann;			      \
Packit 6c4009
	    else if (X13131 != '\0' && inptr[3] == X13131)		      \
Packit 6c4009
	      ann = (ann & ~SS2_ann) | GB13131_ann;			      \
Packit 6c4009
	    else if (inptr[3] == 'I')					      \
Packit 6c4009
	      ann = (ann & ~SS3_ann) | CNS11643_3_ann;			      \
Packit 6c4009
	    else if (inptr[3] == 'J')					      \
Packit 6c4009
	      ann = (ann & ~SS3_ann) | CNS11643_4_ann;			      \
Packit 6c4009
	    else if (inptr[3] == 'K')					      \
Packit 6c4009
	      ann = (ann & ~SS3_ann) | CNS11643_5_ann;			      \
Packit 6c4009
	    else if (inptr[3] == 'L')					      \
Packit 6c4009
	      ann = (ann & ~SS3_ann) | CNS11643_6_ann;			      \
Packit 6c4009
	    else if (inptr[3] == 'M')					      \
Packit 6c4009
	      ann = (ann & ~SS3_ann) | CNS11643_7_ann;			      \
Packit 6c4009
	    else if (X7590 != '\0' && inptr[3] == X7590)		      \
Packit 6c4009
	      ann = (ann & ~SS3_ann) | GB7590_ann;			      \
Packit 6c4009
	    else if (X13132 != '\0' && inptr[3] == X13132)		      \
Packit 6c4009
	      ann = (ann & ~SS3_ann) | GB13132_ann;			      \
Packit 6c4009
	    inptr += 4;							      \
Packit 6c4009
	    continue;							      \
Packit 6c4009
	  }								      \
Packit 6c4009
      }									      \
Packit 6c4009
    else if (ch == SO)							      \
Packit 6c4009
      {									      \
Packit 6c4009
	/* Switch to use GB2312, GB12345, CNS 11643 plane 1, or ISO-IR-165,   \
Packit 6c4009
	   depending on which S0 designation came last.  The only problem     \
Packit 6c4009
	   is what to do with faulty input files where no designator came.    \
Packit 6c4009
	   XXX For now I'll default to use GB2312.  If this is not the	      \
Packit 6c4009
	   best behavior (e.g., we should flag an error) let me know.  */     \
Packit 6c4009
	++inptr;							      \
Packit 6c4009
	if ((ann & SO_ann) != 0)					      \
Packit 6c4009
	  switch (ann & SO_ann)						      \
Packit 6c4009
	    {								      \
Packit 6c4009
	    case GB2312_ann:						      \
Packit 6c4009
	      set = GB2312_set;						      \
Packit 6c4009
	      break;							      \
Packit 6c4009
	    case GB12345_ann:						      \
Packit 6c4009
	      set = GB12345_set;					      \
Packit 6c4009
	      break;							      \
Packit 6c4009
	    case CNS11643_1_ann:					      \
Packit 6c4009
	      set = CNS11643_1_set;					      \
Packit 6c4009
	      break;							      \
Packit 6c4009
	    case ISO_IR_165_ann:					      \
Packit 6c4009
	      set = ISO_IR_165_set;					      \
Packit 6c4009
	      break;							      \
Packit 6c4009
	    default:							      \
Packit 6c4009
	      abort ();							      \
Packit 6c4009
	    }								      \
Packit 6c4009
	else								      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
Packit 6c4009
	  }								      \
Packit 6c4009
	continue;							      \
Packit 6c4009
      }									      \
Packit 6c4009
    else if (ch == SI)							      \
Packit 6c4009
      {									      \
Packit 6c4009
	/* Switch to use ASCII.  */					      \
Packit 6c4009
	++inptr;							      \
Packit 6c4009
	set = ASCII_set;						      \
Packit 6c4009
	continue;							      \
Packit 6c4009
      }									      \
Packit 6c4009
									      \
Packit 6c4009
    if (ch == ESC && inptr[1] == SS2_1)					      \
Packit 6c4009
      {									      \
Packit 6c4009
	/* This is a character from CNS 11643 plane 2.			      \
Packit 6c4009
	   XXX We could test here whether the use of this character	      \
Packit 6c4009
	   set was announced.						      \
Packit 6c4009
	   XXX Currently GB7589 and GB13131 are not supported.  */	      \
Packit 6c4009
	inptr += 2;							      \
Packit 6c4009
	ch = cns11643l2_to_ucs4 (&inptr, 2, 0);				      \
Packit 6c4009
	if (ch == __UNKNOWN_10646_CHAR)					      \
Packit 6c4009
	  STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \
Packit 6c4009
      }									      \
Packit 6c4009
    /* Note that we can assume here that at least 4 bytes are available if    \
Packit 6c4009
       the first byte is ESC since otherwise the first if would have been     \
Packit 6c4009
       true.  */							      \
Packit 6c4009
    else if (ch == ESC && inptr[1] == SS3_1)				      \
Packit 6c4009
      {									      \
Packit 6c4009
	/* This is a character from CNS 11643 plane 3 or higher.	      \
Packit 6c4009
	   XXX Currently GB7590 and GB13132 are not supported.  */	      \
Packit 6c4009
	unsigned char buf[3];						      \
Packit 6c4009
	const unsigned char *tmp = buf;					      \
Packit 6c4009
									      \
Packit 6c4009
	buf[1] = inptr[2];						      \
Packit 6c4009
	buf[2] = inptr[3];						      \
Packit 6c4009
	switch (ann & SS3_ann)						      \
Packit 6c4009
	  {								      \
Packit 6c4009
	  case CNS11643_3_ann:						      \
Packit 6c4009
	    buf[0] = 0x23;						      \
Packit 6c4009
	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  case CNS11643_4_ann:						      \
Packit 6c4009
	    buf[0] = 0x24;						      \
Packit 6c4009
	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  case CNS11643_5_ann:						      \
Packit 6c4009
	    buf[0] = 0x25;						      \
Packit 6c4009
	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  case CNS11643_6_ann:						      \
Packit 6c4009
	    buf[0] = 0x26;						      \
Packit 6c4009
	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  case CNS11643_7_ann:						      \
Packit 6c4009
	    buf[0] = 0x27;						      \
Packit 6c4009
	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  default:							      \
Packit 6c4009
	    /* XXX Currently GB7590 and GB13132 are not supported.  */	      \
Packit 6c4009
	    ch = __UNKNOWN_10646_CHAR;					      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  }								      \
Packit 6c4009
	if (ch == __UNKNOWN_10646_CHAR)					      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    STANDARD_FROM_LOOP_ERR_HANDLER (4);				      \
Packit 6c4009
	  }								      \
Packit 6c4009
	assert (tmp == buf + 3);					      \
Packit 6c4009
	inptr += 4;							      \
Packit 6c4009
      }									      \
Packit 6c4009
    else if (set == ASCII_set)						      \
Packit 6c4009
      {									      \
Packit 6c4009
	/* Almost done, just advance the input pointer.  */		      \
Packit 6c4009
	++inptr;							      \
Packit 6c4009
      }									      \
Packit 6c4009
    else								      \
Packit 6c4009
      {									      \
Packit 6c4009
	/* That's pretty easy, we have a dedicated functions for this.  */    \
Packit 6c4009
	if (inend - inptr < 2)						      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    result = __GCONV_INCOMPLETE_INPUT;				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  }								      \
Packit 6c4009
	if (set == GB2312_set)						      \
Packit 6c4009
	  ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);		      \
Packit 6c4009
	else if (set == ISO_IR_165_set)					      \
Packit 6c4009
	  ch = isoir165_to_ucs4 (&inptr, inend - inptr);		      \
Packit 6c4009
	else								      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    assert (set == CNS11643_1_set);				      \
Packit 6c4009
	    ch = cns11643l1_to_ucs4 (&inptr, inend - inptr, 0);		      \
Packit 6c4009
	  }								      \
Packit 6c4009
									      \
Packit 6c4009
	if (ch == 0)							      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    result = __GCONV_INCOMPLETE_INPUT;				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  }								      \
Packit 6c4009
	else if (ch == __UNKNOWN_10646_CHAR)				      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \
Packit 6c4009
	  }								      \
Packit 6c4009
      }									      \
Packit 6c4009
									      \
Packit 6c4009
    *((uint32_t *) outptr) = ch;					      \
Packit 6c4009
    outptr += sizeof (uint32_t);					      \
Packit 6c4009
  }
Packit 6c4009
#define EXTRA_LOOP_DECLS	, int *setp
Packit 6c4009
#define INIT_PARAMS		int set = (*setp >> 3) & CURRENT_MASK; \
Packit 6c4009
				int ann = (*setp >> 3) & ~CURRENT_MASK
Packit 6c4009
#define UPDATE_PARAMS		*setp = (set | ann) << 3
Packit 6c4009
#define LOOP_NEED_FLAGS
Packit 6c4009
#include <iconv/loop.c>
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Next, define the other direction.  */
Packit 6c4009
#define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM
Packit 6c4009
#define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM
Packit 6c4009
#define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO
Packit 6c4009
#define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO
Packit 6c4009
#define LOOPFCT			TO_LOOP
Packit 6c4009
/* With GCC 5.3 when compiling with -Os the compiler emits a warning
Packit 6c4009
   that buf[0] and buf[1] may be used uninitialized.  This can only
Packit 6c4009
   happen in the case where tmpbuf[3] is used, and in that case the
Packit 6c4009
   write to the tmpbuf[1] and tmpbuf[2] was assured because
Packit 6c4009
   ucs4_to_cns11643 would have filled in those entries.  The difficulty
Packit 6c4009
   is in getting the compiler to see this logic because tmpbuf[0] is
Packit 6c4009
   involved in determining the code page and is the indicator that
Packit 6c4009
   tmpbuf[2] is initialized.  */
Packit 6c4009
DIAG_PUSH_NEEDS_COMMENT;
Packit 6c4009
DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
Packit 6c4009
#define BODY \
Packit 6c4009
  {									      \
Packit 6c4009
    uint32_t ch;							      \
Packit 6c4009
    size_t written = 0;							      \
Packit 6c4009
									      \
Packit 6c4009
    ch = *((const uint32_t *) inptr);					      \
Packit 6c4009
									      \
Packit 6c4009
    /* First see whether we can write the character using the currently	      \
Packit 6c4009
       selected character set.  */					      \
Packit 6c4009
    if (ch < 0x80)							      \
Packit 6c4009
      {									      \
Packit 6c4009
	if (set != ASCII_set)						      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    *outptr++ = SI;						      \
Packit 6c4009
	    set = ASCII_set;						      \
Packit 6c4009
	    if (outptr == outend)					      \
Packit 6c4009
	      {								      \
Packit 6c4009
		result = __GCONV_FULL_OUTPUT;				      \
Packit 6c4009
		break;							      \
Packit 6c4009
	      }								      \
Packit 6c4009
	  }								      \
Packit 6c4009
									      \
Packit 6c4009
	*outptr++ = ch;							      \
Packit 6c4009
	written = 1;							      \
Packit 6c4009
									      \
Packit 6c4009
	/* At the end of the line we have to clear the `ann' flags since      \
Packit 6c4009
	   every line must contain this information again.  */		      \
Packit 6c4009
	if (ch == L'\n')						      \
Packit 6c4009
	  ann = 0;							      \
Packit 6c4009
      }									      \
Packit 6c4009
    else								      \
Packit 6c4009
      {									      \
Packit 6c4009
	unsigned char buf[2] = { 0, 0 };				      \
Packit 6c4009
	int used;							      \
Packit 6c4009
									      \
Packit 6c4009
	if (set == GB2312_set || ((ann & SO_ann) != CNS11643_1_ann	      \
Packit 6c4009
				  && (ann & SO_ann) != ISO_IR_165_ann))	      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    written = ucs4_to_gb2312 (ch, buf, 2);			      \
Packit 6c4009
	    used = GB2312_set;						      \
Packit 6c4009
	  }								      \
Packit 6c4009
	else if (set == ISO_IR_165_set || (ann & SO_ann) == ISO_IR_165_set)   \
Packit 6c4009
	  {								      \
Packit 6c4009
	    written = ucs4_to_isoir165 (ch, buf, 2);			      \
Packit 6c4009
	    used = ISO_IR_165_set;					      \
Packit 6c4009
	  }								      \
Packit 6c4009
	else								      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    written = ucs4_to_cns11643l1 (ch, buf, 2);			      \
Packit 6c4009
	    used = CNS11643_1_set;					      \
Packit 6c4009
	  }								      \
Packit 6c4009
									      \
Packit 6c4009
	if (written == __UNKNOWN_10646_CHAR)				      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    /* Cannot convert it using the currently selected SO set.	      \
Packit 6c4009
	       Next try the SS2 set.  */				      \
Packit 6c4009
	    written = ucs4_to_cns11643l2 (ch, buf, 2);			      \
Packit 6c4009
	    if (written != __UNKNOWN_10646_CHAR)			      \
Packit 6c4009
	      /* Yep, that worked.  */					      \
Packit 6c4009
	      used = CNS11643_2_set;					      \
Packit 6c4009
	    else							      \
Packit 6c4009
	      {								      \
Packit 6c4009
		unsigned char tmpbuf[3];				      \
Packit 6c4009
									      \
Packit 6c4009
		switch (0)						      \
Packit 6c4009
		  {							      \
Packit 6c4009
		  default:						      \
Packit 6c4009
		    /* Well, see whether we have to change the SO set.  */    \
Packit 6c4009
									      \
Packit 6c4009
		    if (used != GB2312_set)				      \
Packit 6c4009
		      {							      \
Packit 6c4009
			written = ucs4_to_gb2312 (ch, buf, 2);		      \
Packit 6c4009
			if (written != __UNKNOWN_10646_CHAR)		      \
Packit 6c4009
			  {						      \
Packit 6c4009
			    used = GB2312_set;				      \
Packit 6c4009
			    break;					      \
Packit 6c4009
			  }						      \
Packit 6c4009
		      }							      \
Packit 6c4009
									      \
Packit 6c4009
		    if (used != ISO_IR_165_set)				      \
Packit 6c4009
		      {							      \
Packit 6c4009
			written = ucs4_to_isoir165 (ch, buf, 2);	      \
Packit 6c4009
			if (written != __UNKNOWN_10646_CHAR)		      \
Packit 6c4009
			  {						      \
Packit 6c4009
			    used = ISO_IR_165_set;			      \
Packit 6c4009
			    break;					      \
Packit 6c4009
			  }						      \
Packit 6c4009
		      }							      \
Packit 6c4009
									      \
Packit 6c4009
		    if (used != CNS11643_1_set)				      \
Packit 6c4009
		      {							      \
Packit 6c4009
			written = ucs4_to_cns11643l1 (ch, buf, 2);	      \
Packit 6c4009
			if (written != __UNKNOWN_10646_CHAR)		      \
Packit 6c4009
			  {						      \
Packit 6c4009
			    used = CNS11643_1_set;			      \
Packit 6c4009
			    break;					      \
Packit 6c4009
			  }						      \
Packit 6c4009
		      }							      \
Packit 6c4009
									      \
Packit 6c4009
		    written = ucs4_to_cns11643 (ch, tmpbuf, 3);		      \
Packit 6c4009
		    if (written == 3 && tmpbuf[0] >= 3 && tmpbuf[0] <= 7)     \
Packit 6c4009
		      {							      \
Packit 6c4009
			buf[0] = tmpbuf[1];				      \
Packit 6c4009
			buf[1] = tmpbuf[2];				      \
Packit 6c4009
			switch (tmpbuf[0])				      \
Packit 6c4009
			  {						      \
Packit 6c4009
			  case 3:					      \
Packit 6c4009
			    used = CNS11643_3_set;			      \
Packit 6c4009
			    break;					      \
Packit 6c4009
			  case 4:					      \
Packit 6c4009
			    used = CNS11643_4_set;			      \
Packit 6c4009
			    break;					      \
Packit 6c4009
			  case 5:					      \
Packit 6c4009
			    used = CNS11643_5_set;			      \
Packit 6c4009
			    break;					      \
Packit 6c4009
			  case 6:					      \
Packit 6c4009
			    used = CNS11643_6_set;			      \
Packit 6c4009
			    break;					      \
Packit 6c4009
			  case 7:					      \
Packit 6c4009
			    used = CNS11643_7_set;			      \
Packit 6c4009
			    break;					      \
Packit 6c4009
			  default:					      \
Packit 6c4009
			    abort ();					      \
Packit 6c4009
			  }						      \
Packit 6c4009
			written = 2;					      \
Packit 6c4009
			break;						      \
Packit 6c4009
		      }							      \
Packit 6c4009
									      \
Packit 6c4009
		    /* XXX Currently GB7590 and GB13132 are not supported.  */\
Packit 6c4009
									      \
Packit 6c4009
		    /* Even this does not work.  Error.  */		      \
Packit 6c4009
		    used = ASCII_set;					      \
Packit 6c4009
		  }							      \
Packit 6c4009
		if (used == ASCII_set)					      \
Packit 6c4009
		  {							      \
Packit 6c4009
		    UNICODE_TAG_HANDLER (ch, 4);			      \
Packit 6c4009
		    STANDARD_TO_LOOP_ERR_HANDLER (4);			      \
Packit 6c4009
		  }							      \
Packit 6c4009
	      }								      \
Packit 6c4009
	  }								      \
Packit 6c4009
	assert (written == 2);						      \
Packit 6c4009
									      \
Packit 6c4009
	/* See whether we have to emit an escape sequence.  */		      \
Packit 6c4009
	if (set != used)						      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    /* First see whether we announced that we use this		      \
Packit 6c4009
	       character set.  */					      \
Packit 6c4009
	    if ((used & SO_mask) != 0 && (ann & SO_ann) != (used << 8))	      \
Packit 6c4009
	      {								      \
Packit 6c4009
		const char *escseq;					      \
Packit 6c4009
									      \
Packit 6c4009
		if (outptr + 4 > outend)				      \
Packit 6c4009
		  {							      \
Packit 6c4009
		    result = __GCONV_FULL_OUTPUT;			      \
Packit 6c4009
		    break;						      \
Packit 6c4009
		  }							      \
Packit 6c4009
									      \
Packit 6c4009
		assert (used >= 1 && used <= 4);			      \
Packit 6c4009
		escseq = ")A\0\0)G)E" + (used - 1) * 2;			      \
Packit 6c4009
		*outptr++ = ESC;					      \
Packit 6c4009
		*outptr++ = '$';					      \
Packit 6c4009
		*outptr++ = *escseq++;					      \
Packit 6c4009
		*outptr++ = *escseq++;					      \
Packit 6c4009
									      \
Packit 6c4009
		ann = (ann & ~SO_ann) | (used << 8);			      \
Packit 6c4009
	      }								      \
Packit 6c4009
	    else if ((used & SS2_mask) != 0 && (ann & SS2_ann) != (used << 8))\
Packit 6c4009
	      {								      \
Packit 6c4009
		const char *escseq;					      \
Packit 6c4009
									      \
Packit 6c4009
		assert (used == CNS11643_2_set); /* XXX */		      \
Packit 6c4009
		escseq = "*H";						      \
Packit 6c4009
		*outptr++ = ESC;					      \
Packit 6c4009
		*outptr++ = '$';					      \
Packit 6c4009
		*outptr++ = *escseq++;					      \
Packit 6c4009
		*outptr++ = *escseq++;					      \
Packit 6c4009
									      \
Packit 6c4009
		ann = (ann & ~SS2_ann) | (used << 8);			      \
Packit 6c4009
	      }								      \
Packit 6c4009
	    else if ((used & SS3_mask) != 0 && (ann & SS3_ann) != (used << 8))\
Packit 6c4009
	      {								      \
Packit 6c4009
		const char *escseq;					      \
Packit 6c4009
									      \
Packit 6c4009
		assert ((used >> 5) >= 3 && (used >> 5) <= 7);		      \
Packit 6c4009
		escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2;		      \
Packit 6c4009
		*outptr++ = ESC;					      \
Packit 6c4009
		*outptr++ = '$';					      \
Packit 6c4009
		*outptr++ = *escseq++;					      \
Packit 6c4009
		*outptr++ = *escseq++;					      \
Packit 6c4009
									      \
Packit 6c4009
		ann = (ann & ~SS3_ann) | (used << 8);			      \
Packit 6c4009
	      }								      \
Packit 6c4009
									      \
Packit 6c4009
	    if (used == CNS11643_2_set)					      \
Packit 6c4009
	      {								      \
Packit 6c4009
		if (outptr + 2 > outend)				      \
Packit 6c4009
		  {							      \
Packit 6c4009
		    result = __GCONV_FULL_OUTPUT;			      \
Packit 6c4009
		    break;						      \
Packit 6c4009
		  }							      \
Packit 6c4009
		*outptr++ = SS2_0;					      \
Packit 6c4009
		*outptr++ = SS2_1;					      \
Packit 6c4009
	      }								      \
Packit 6c4009
	    else if (used >= CNS11643_3_set && used <= CNS11643_7_set)	      \
Packit 6c4009
	      {								      \
Packit 6c4009
		if (outptr + 2 > outend)				      \
Packit 6c4009
		  {							      \
Packit 6c4009
		    result = __GCONV_FULL_OUTPUT;			      \
Packit 6c4009
		    break;						      \
Packit 6c4009
		  }							      \
Packit 6c4009
		*outptr++ = SS3_0;					      \
Packit 6c4009
		*outptr++ = SS3_1;					      \
Packit 6c4009
	      }								      \
Packit 6c4009
	    else							      \
Packit 6c4009
	      {								      \
Packit 6c4009
		/* We only have to emit something if currently ASCII is	      \
Packit 6c4009
		   selected.  Otherwise we are switching within the	      \
Packit 6c4009
		   SO charset.  */					      \
Packit 6c4009
		if (set == ASCII_set)					      \
Packit 6c4009
		  {							      \
Packit 6c4009
		    if (outptr + 1 > outend)				      \
Packit 6c4009
		      {							      \
Packit 6c4009
			result = __GCONV_FULL_OUTPUT;			      \
Packit 6c4009
			break;						      \
Packit 6c4009
		      }							      \
Packit 6c4009
		    *outptr++ = SO;					      \
Packit 6c4009
		  }							      \
Packit 6c4009
	      }								      \
Packit 6c4009
									      \
Packit 6c4009
	    /* Always test the length here since we have used up all the      \
Packit 6c4009
	       guaranteed output buffer slots.  */			      \
Packit 6c4009
	    if (outptr + 2 > outend)					      \
Packit 6c4009
	      {								      \
Packit 6c4009
		result = __GCONV_FULL_OUTPUT;				      \
Packit 6c4009
		break;							      \
Packit 6c4009
	      }								      \
Packit 6c4009
	  }								      \
Packit 6c4009
	else if (outptr + 2 > outend)					      \
Packit 6c4009
	  {								      \
Packit 6c4009
	    result = __GCONV_FULL_OUTPUT;				      \
Packit 6c4009
	    break;							      \
Packit 6c4009
	  }								      \
Packit 6c4009
									      \
Packit 6c4009
	*outptr++ = buf[0];						      \
Packit 6c4009
	*outptr++ = buf[1];						      \
Packit 6c4009
	set = used;							      \
Packit 6c4009
      }									      \
Packit 6c4009
									      \
Packit 6c4009
    /* Now that we wrote the output increment the input pointer.  */	      \
Packit 6c4009
    inptr += 4;								      \
Packit 6c4009
  }
Packit 6c4009
DIAG_POP_NEEDS_COMMENT;
Packit 6c4009
#define EXTRA_LOOP_DECLS	, int *setp
Packit 6c4009
#define INIT_PARAMS		int set = (*setp >> 3) & CURRENT_MASK; \
Packit 6c4009
				int ann = (*setp >> 3) & ~CURRENT_MASK
Packit 6c4009
#define REINIT_PARAMS		do					      \
Packit 6c4009
				  {					      \
Packit 6c4009
				    set = (*setp >> 3) & CURRENT_MASK;	      \
Packit 6c4009
				    ann = (*setp >> 3) & ~CURRENT_MASK;	      \
Packit 6c4009
				  }					      \
Packit 6c4009
				while (0)
Packit 6c4009
#define UPDATE_PARAMS		*setp = (set | ann) << 3
Packit 6c4009
#define LOOP_NEED_FLAGS
Packit 6c4009
#include <iconv/loop.c>
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Now define the toplevel functions.  */
Packit 6c4009
#include <iconv/skeleton.c>