Blame iconv/gconv_trans.c

Packit 6c4009
/* Transliteration using the locale's data.
Packit 6c4009
   Copyright (C) 2000-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <assert.h>
Packit 6c4009
#include <dlfcn.h>
Packit 6c4009
#include <search.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
#include <string.h>
Packit 6c4009
#include <stdlib.h>
Packit 6c4009
Packit 6c4009
#include <libc-lock.h>
Packit 6c4009
#include "gconv_int.h"
Packit 6c4009
#include "../locale/localeinfo.h"
Packit 6c4009
Packit 6c4009
Packit 6c4009
int
Packit 6c4009
__gconv_transliterate (struct __gconv_step *step,
Packit 6c4009
		       struct __gconv_step_data *step_data,
Packit 6c4009
		       const unsigned char *inbufstart,
Packit 6c4009
		       const unsigned char **inbufp,
Packit 6c4009
		       const unsigned char *inbufend,
Packit 6c4009
		       unsigned char **outbufstart, size_t *irreversible)
Packit 6c4009
{
Packit 6c4009
  /* Find out about the locale's transliteration.  */
Packit 6c4009
  uint_fast32_t size;
Packit 6c4009
  const uint32_t *from_idx;
Packit 6c4009
  const uint32_t *from_tbl;
Packit 6c4009
  const uint32_t *to_idx;
Packit 6c4009
  const uint32_t *to_tbl;
Packit 6c4009
  const uint32_t *winbuf;
Packit 6c4009
  const uint32_t *winbufend;
Packit 6c4009
  uint_fast32_t low;
Packit 6c4009
  uint_fast32_t high;
Packit 6c4009
Packit 6c4009
  /* The input buffer.  There are actually 4-byte values.  */
Packit 6c4009
  winbuf = (const uint32_t *) *inbufp;
Packit 6c4009
  winbufend = (const uint32_t *) inbufend;
Packit 6c4009
Packit 6c4009
  __gconv_fct fct = step->__fct;
Packit 6c4009
#ifdef PTR_DEMANGLE
Packit 6c4009
  if (step->__shlib_handle != NULL)
Packit 6c4009
    PTR_DEMANGLE (fct);
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  /* If there is no transliteration information in the locale don't do
Packit 6c4009
     anything and return the error.  */
Packit 6c4009
  size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
Packit 6c4009
  if (size == 0)
Packit 6c4009
    goto no_rules;
Packit 6c4009
Packit 6c4009
  /* Get the rest of the values.  */
Packit 6c4009
  from_idx =
Packit 6c4009
    (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
Packit 6c4009
  from_tbl =
Packit 6c4009
    (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
Packit 6c4009
  to_idx =
Packit 6c4009
    (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
Packit 6c4009
  to_tbl =
Packit 6c4009
    (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
Packit 6c4009
Packit 6c4009
  /* Test whether there is enough input.  */
Packit 6c4009
  if (winbuf + 1 > winbufend)
Packit 6c4009
    return (winbuf == winbufend
Packit 6c4009
	    ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
Packit 6c4009
Packit 6c4009
  /* The array starting at FROM_IDX contains indeces to the string table
Packit 6c4009
     in FROM_TBL.  The indeces are sorted wrt to the strings.  I.e., we
Packit 6c4009
     are doing binary search.  */
Packit 6c4009
  low = 0;
Packit 6c4009
  high = size;
Packit 6c4009
  while (low < high)
Packit 6c4009
    {
Packit 6c4009
      uint_fast32_t med = (low + high) / 2;
Packit 6c4009
      uint32_t idx;
Packit 6c4009
      int cnt;
Packit 6c4009
Packit 6c4009
      /* Compare the string at this index with the string at the current
Packit 6c4009
	 position in the input buffer.  */
Packit 6c4009
      idx = from_idx[med];
Packit 6c4009
      cnt = 0;
Packit 6c4009
      do
Packit 6c4009
	{
Packit 6c4009
	  if (from_tbl[idx + cnt] != winbuf[cnt])
Packit 6c4009
	    /* Does not match.  */
Packit 6c4009
	    break;
Packit 6c4009
	  ++cnt;
Packit 6c4009
	}
Packit 6c4009
      while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
Packit 6c4009
Packit 6c4009
      if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
Packit 6c4009
	{
Packit 6c4009
	  /* Found a matching input sequence.  Now try to convert the
Packit 6c4009
	     possible replacements.  */
Packit 6c4009
	  uint32_t idx2 = to_idx[med];
Packit 6c4009
Packit 6c4009
	  do
Packit 6c4009
	    {
Packit 6c4009
	      /* Determine length of replacement.  */
Packit 6c4009
	      uint_fast32_t len = 0;
Packit 6c4009
	      int res;
Packit 6c4009
	      const unsigned char *toinptr;
Packit 6c4009
	      unsigned char *outptr;
Packit 6c4009
Packit 6c4009
	      while (to_tbl[idx2 + len] != L'\0')
Packit 6c4009
		++len;
Packit 6c4009
Packit 6c4009
	      /* Try this input text.  */
Packit 6c4009
	      toinptr = (const unsigned char *) &to_tbl[idx2];
Packit 6c4009
	      outptr = *outbufstart;
Packit 6c4009
	      res = DL_CALL_FCT (fct,
Packit 6c4009
				 (step, step_data, &toinptr,
Packit 6c4009
				  (const unsigned char *) &to_tbl[idx2 + len],
Packit 6c4009
				  &outptr, NULL, 0, 0));
Packit 6c4009
	      if (res != __GCONV_ILLEGAL_INPUT)
Packit 6c4009
		{
Packit 6c4009
		  /* If the conversion succeeds we have to increment the
Packit 6c4009
		     input buffer.  */
Packit 6c4009
		  if (res == __GCONV_EMPTY_INPUT)
Packit 6c4009
		    {
Packit 6c4009
		      *inbufp += cnt * sizeof (uint32_t);
Packit 6c4009
		      ++*irreversible;
Packit 6c4009
		      res = __GCONV_OK;
Packit 6c4009
		    }
Packit 6c4009
		  /* Do not increment the output pointer if we could not
Packit 6c4009
		     store the entire output. */
Packit 6c4009
		  if (res != __GCONV_FULL_OUTPUT)
Packit 6c4009
		    *outbufstart = outptr;
Packit 6c4009
Packit 6c4009
		  return res;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      /* Next replacement.  */
Packit 6c4009
	      idx2 += len + 1;
Packit 6c4009
	    }
Packit 6c4009
	  while (to_tbl[idx2] != L'\0');
Packit 6c4009
Packit 6c4009
	  /* Nothing found, continue searching.  */
Packit 6c4009
	}
Packit 6c4009
      else if (cnt > 0)
Packit 6c4009
	/* This means that the input buffer contents matches a prefix of
Packit 6c4009
	   an entry.  Since we cannot match it unless we get more input,
Packit 6c4009
	   we will tell the caller about it.  */
Packit 6c4009
	return __GCONV_INCOMPLETE_INPUT;
Packit 6c4009
Packit 6c4009
      if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
Packit 6c4009
	low = med + 1;
Packit 6c4009
      else
Packit 6c4009
	high = med;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
 no_rules:
Packit 6c4009
  /* Maybe the character is supposed to be ignored.  */
Packit 6c4009
  if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
Packit 6c4009
    {
Packit 6c4009
      int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
Packit 6c4009
      const uint32_t *ranges =
Packit 6c4009
	(const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
Packit 6c4009
      const uint32_t wc = *(const uint32_t *) (*inbufp);
Packit 6c4009
      int i;
Packit 6c4009
Packit 6c4009
      /* Test whether there is enough input.  */
Packit 6c4009
      if (winbuf + 1 > winbufend)
Packit 6c4009
	return (winbuf == winbufend
Packit 6c4009
		? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
Packit 6c4009
Packit 6c4009
      for (i = 0; i < n; ranges += 3, ++i)
Packit 6c4009
	if (ranges[0] <= wc && wc <= ranges[1]
Packit 6c4009
	    && (wc - ranges[0]) % ranges[2] == 0)
Packit 6c4009
	  {
Packit 6c4009
	    /* Matches the range.  Ignore it.  */
Packit 6c4009
	    *inbufp += 4;
Packit 6c4009
	    ++*irreversible;
Packit 6c4009
	    return __GCONV_OK;
Packit 6c4009
	  }
Packit 6c4009
	else if (wc < ranges[0])
Packit 6c4009
	  /* There cannot be any other matching range since they are
Packit 6c4009
             sorted.  */
Packit 6c4009
	  break;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* One last chance: use the default replacement.  */
Packit 6c4009
  if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
Packit 6c4009
    {
Packit 6c4009
      const uint32_t *default_missing = (const uint32_t *)
Packit 6c4009
	_NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
Packit 6c4009
      const unsigned char *toinptr = (const unsigned char *) default_missing;
Packit 6c4009
      uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
Packit 6c4009
				       _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
Packit 6c4009
      unsigned char *outptr;
Packit 6c4009
      int res;
Packit 6c4009
Packit 6c4009
      /* Test whether there is enough input.  */
Packit 6c4009
      if (winbuf + 1 > winbufend)
Packit 6c4009
	return (winbuf == winbufend
Packit 6c4009
		? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
Packit 6c4009
Packit 6c4009
      outptr = *outbufstart;
Packit 6c4009
      res = DL_CALL_FCT (fct,
Packit 6c4009
			 (step, step_data, &toinptr,
Packit 6c4009
			  (const unsigned char *) (default_missing + len),
Packit 6c4009
			  &outptr, NULL, 0, 0));
Packit 6c4009
Packit 6c4009
      if (res != __GCONV_ILLEGAL_INPUT)
Packit 6c4009
	{
Packit 6c4009
	  /* If the conversion succeeds we have to increment the
Packit 6c4009
	     input buffer.  */
Packit 6c4009
	  if (res == __GCONV_EMPTY_INPUT)
Packit 6c4009
	    {
Packit 6c4009
	      /* This worked but is not reversible.  */
Packit 6c4009
	      ++*irreversible;
Packit 6c4009
	      *inbufp += 4;
Packit 6c4009
	      res = __GCONV_OK;
Packit 6c4009
	    }
Packit 6c4009
	  *outbufstart = outptr;
Packit 6c4009
Packit 6c4009
	  return res;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Haven't found a match.  */
Packit 6c4009
  return __GCONV_ILLEGAL_INPUT;
Packit 6c4009
}
Packit 6c4009
libc_hidden_def (__gconv_transliterate)