Blame iconv/gconv_charset.c

Packit Service 0f3533
/* Charset name normalization.
Packit Service 0f3533
   Copyright (C) 2020 Free Software Foundation, Inc.
Packit Service 0f3533
   This file is part of the GNU C Library.
Packit Service 0f3533
Packit Service 0f3533
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 0f3533
   modify it under the terms of the GNU Lesser General Public
Packit Service 0f3533
   License as published by the Free Software Foundation; either
Packit Service 0f3533
   version 2.1 of the License, or (at your option) any later version.
Packit Service 0f3533
Packit Service 0f3533
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 0f3533
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 0f3533
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 0f3533
   Lesser General Public License for more details.
Packit Service 0f3533
Packit Service 0f3533
   You should have received a copy of the GNU Lesser General Public
Packit Service 0f3533
   License along with the GNU C Library; if not, see
Packit Service 0f3533
   <http://www.gnu.org/licenses/>.  */
Packit Service 0f3533
Packit Service 0f3533
Packit Service 0f3533
#include <stdlib.h>
Packit Service 0f3533
#include <ctype.h>
Packit Service 0f3533
#include <locale.h>
Packit Service 0f3533
#include <stdbool.h>
Packit Service 0f3533
#include <string.h>
Packit Service 0f3533
#include <sys/stat.h>
Packit Service 0f3533
#include "gconv_int.h"
Packit Service 0f3533
#include "gconv_charset.h"
Packit Service 0f3533
Packit Service 0f3533
Packit Service 0f3533
/* This function returns a pointer to the last suffix in a conversion code
Packit Service 0f3533
   string.  Valid suffixes matched by this function are of the form: '/' or ','
Packit Service 0f3533
   followed by arbitrary text that doesn't contain '/' or ','.  It does not
Packit Service 0f3533
   edit the string in any way.  The caller is expected to parse the suffix and
Packit Service 0f3533
   remove it (by e.g. truncating the string) before the next call.  */
Packit Service 0f3533
static char *
Packit Service 0f3533
find_suffix (char *s)
Packit Service 0f3533
{
Packit Service 0f3533
  /* The conversion code is in the form of a triplet, separated by '/' chars.
Packit Service 0f3533
     The third component of the triplet contains suffixes. If we don't have two
Packit Service 0f3533
     slashes, we don't have a suffix.  */
Packit Service 0f3533
Packit Service 0f3533
  int slash_count = 0;
Packit Service 0f3533
  char *suffix_term = NULL;
Packit Service 0f3533
Packit Service 0f3533
  for (int i = 0; s[i] != '\0'; i++)
Packit Service 0f3533
    switch (s[i])
Packit Service 0f3533
      {
Packit Service 0f3533
        case '/':
Packit Service 0f3533
          slash_count++;
Packit Service 0f3533
          /* Fallthrough */
Packit Service 0f3533
        case ',':
Packit Service 0f3533
          suffix_term = &s[i];
Packit Service 0f3533
      }
Packit Service 0f3533
Packit Service 0f3533
  if (slash_count >= 2)
Packit Service 0f3533
    return suffix_term;
Packit Service 0f3533
Packit Service 0f3533
  return NULL;
Packit Service 0f3533
}
Packit Service 0f3533
Packit Service 0f3533
Packit Service 0f3533
struct gconv_parsed_code
Packit Service 0f3533
{
Packit Service 0f3533
  char *code;
Packit Service 0f3533
  bool translit;
Packit Service 0f3533
  bool ignore;
Packit Service 0f3533
};
Packit Service 0f3533
Packit Service 0f3533
Packit Service 0f3533
/* This function parses an iconv_open encoding PC.CODE, strips any suffixes
Packit Service 0f3533
   (such as TRANSLIT or IGNORE) from it and sets corresponding flags in it.  */
Packit Service 0f3533
static void
Packit Service 0f3533
gconv_parse_code (struct gconv_parsed_code *pc)
Packit Service 0f3533
{
Packit Service 0f3533
  pc->translit = false;
Packit Service 0f3533
  pc->ignore = false;
Packit Service 0f3533
Packit Service 0f3533
  while (1)
Packit Service 0f3533
    {
Packit Service 0f3533
      /* First drop any trailing whitespaces and separators.  */
Packit Service 0f3533
      size_t len = strlen (pc->code);
Packit Service 0f3533
      while ((len > 0)
Packit Service 0f3533
             && (isspace (pc->code[len - 1])
Packit Service 0f3533
                 || pc->code[len - 1] == ','
Packit Service 0f3533
                 || pc->code[len - 1] == '/'))
Packit Service 0f3533
        len--;
Packit Service 0f3533
Packit Service 0f3533
      pc->code[len] = '\0';
Packit Service 0f3533
Packit Service 0f3533
      if (len == 0)
Packit Service 0f3533
        return;
Packit Service 0f3533
Packit Service 0f3533
      char * suffix = find_suffix (pc->code);
Packit Service 0f3533
      if (suffix == NULL)
Packit Service 0f3533
        {
Packit Service 0f3533
          /* At this point, we have processed and removed all suffixes from the
Packit Service 0f3533
             code and what remains of the code is suffix free.  */
Packit Service 0f3533
          return;
Packit Service 0f3533
        }
Packit Service 0f3533
      else
Packit Service 0f3533
        {
Packit Service 0f3533
          /* A suffix is processed from the end of the code array going
Packit Service 0f3533
             backwards, one suffix at a time.  The suffix is an index into the
Packit Service 0f3533
             code character array and points to: one past the end of the code
Packit Service 0f3533
             and any unprocessed suffixes, and to the beginning of the suffix
Packit Service 0f3533
             currently being processed during this iteration.  We must process
Packit Service 0f3533
             this suffix and then drop it from the code by terminating the
Packit Service 0f3533
             preceding text with NULL.
Packit Service 0f3533
Packit Service 0f3533
             We want to allow and recognize suffixes such as:
Packit Service 0f3533
Packit Service 0f3533
             "/TRANSLIT"         i.e. single suffix
Packit Service 0f3533
             "//TRANSLIT"        i.e. single suffix and multiple separators
Packit Service 0f3533
             "//TRANSLIT/IGNORE" i.e. suffixes separated by "/"
Packit Service 0f3533
             "/TRANSLIT//IGNORE" i.e. suffixes separated by "//"
Packit Service 0f3533
             "//IGNORE,TRANSLIT" i.e. suffixes separated by ","
Packit Service 0f3533
             "//IGNORE,"         i.e. trailing ","
Packit Service 0f3533
             "//TRANSLIT/"       i.e. trailing "/"
Packit Service 0f3533
             "//TRANSLIT//"      i.e. trailing "//"
Packit Service 0f3533
             "/"                 i.e. empty suffix.
Packit Service 0f3533
Packit Service 0f3533
             Unknown suffixes are silently discarded and ignored.  */
Packit Service 0f3533
Packit Service 0f3533
          if ((__strcasecmp_l (suffix,
Packit Service 0f3533
                               GCONV_TRIPLE_SEPARATOR
Packit Service 0f3533
                               GCONV_TRANSLIT_SUFFIX,
Packit Service 0f3533
                               _nl_C_locobj_ptr) == 0)
Packit Service 0f3533
              || (__strcasecmp_l (suffix,
Packit Service 0f3533
                                  GCONV_SUFFIX_SEPARATOR
Packit Service 0f3533
                                  GCONV_TRANSLIT_SUFFIX,
Packit Service 0f3533
                                  _nl_C_locobj_ptr) == 0))
Packit Service 0f3533
            pc->translit = true;
Packit Service 0f3533
Packit Service 0f3533
          if ((__strcasecmp_l (suffix,
Packit Service 0f3533
                               GCONV_TRIPLE_SEPARATOR
Packit Service 0f3533
                               GCONV_IGNORE_ERRORS_SUFFIX,
Packit Service 0f3533
                               _nl_C_locobj_ptr) == 0)
Packit Service 0f3533
              || (__strcasecmp_l (suffix,
Packit Service 0f3533
                                  GCONV_SUFFIX_SEPARATOR
Packit Service 0f3533
                                  GCONV_IGNORE_ERRORS_SUFFIX,
Packit Service 0f3533
                                  _nl_C_locobj_ptr) == 0))
Packit Service 0f3533
            pc->ignore = true;
Packit Service 0f3533
Packit Service 0f3533
          /* We just processed this suffix.  We can now drop it from the
Packit Service 0f3533
             code string by truncating it at the suffix's position.  */
Packit Service 0f3533
          suffix[0] = '\0';
Packit Service 0f3533
        }
Packit Service 0f3533
    }
Packit Service 0f3533
}
Packit Service 0f3533
Packit Service 0f3533
Packit Service 0f3533
/* This function accepts the charset names of the source and destination of the
Packit Service 0f3533
   conversion and populates *conv_spec with an equivalent conversion
Packit Service 0f3533
   specification that may later be used by __gconv_open.  The charset names
Packit Service 0f3533
   might contain options in the form of suffixes that alter the conversion,
Packit Service 0f3533
   e.g. "ISO-10646/UTF-8/TRANSLIT".  It processes the charset names, ignoring
Packit Service 0f3533
   and truncating any suffix options in fromcode, and processing and truncating
Packit Service 0f3533
   any suffix options in tocode.  Supported suffix options ("TRANSLIT" or
Packit Service 0f3533
   "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
Packit Service 0f3533
   to be set to true.  Unrecognized suffix options are silently discarded.  If
Packit Service 0f3533
   the function succeeds, it returns conv_spec back to the caller.  It returns
Packit Service 0f3533
   NULL upon failure.  conv_spec must be allocated and freed by the caller.  */
Packit Service 0f3533
struct gconv_spec *
Packit Service 0f3533
__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
Packit Service 0f3533
                   const char *tocode)
Packit Service 0f3533
{
Packit Service 0f3533
  struct gconv_parsed_code pfc, ptc;
Packit Service 0f3533
  struct gconv_spec *ret = NULL;
Packit Service 0f3533
Packit Service 0f3533
  pfc.code = __strdup (fromcode);
Packit Service 0f3533
  ptc.code = __strdup (tocode);
Packit Service 0f3533
Packit Service 0f3533
  if ((pfc.code == NULL)
Packit Service 0f3533
      || (ptc.code == NULL))
Packit Service 0f3533
    goto out;
Packit Service 0f3533
Packit Service 0f3533
  gconv_parse_code (&pfc;;
Packit Service 0f3533
  gconv_parse_code (&ptc);
Packit Service 0f3533
Packit Service 0f3533
  /* We ignore suffixes in the fromcode because that is how the current
Packit Service 0f3533
     implementation has always handled them.  Only suffixes in the tocode are
Packit Service 0f3533
     processed and handled.  The reality is that invalid input in the input
Packit Service 0f3533
     character set should only be ignored if the fromcode specifies IGNORE.
Packit Service 0f3533
     The current implementation ignores invalid intput in the input character
Packit Service 0f3533
     set if the tocode contains IGNORE.  We preserve this behavior for
Packit Service 0f3533
     backwards compatibility.  In the future we may split the handling of
Packit Service 0f3533
     IGNORE to allow a finer grained specification of ignorning invalid input
Packit Service 0f3533
     and/or ignoring invalid output.  */
Packit Service 0f3533
  conv_spec->translit = ptc.translit;
Packit Service 0f3533
  conv_spec->ignore = ptc.ignore;
Packit Service 0f3533
Packit Service 0f3533
  /* 3 extra bytes because 1 extra for '\0', and 2 extra so strip might
Packit Service 0f3533
     be able to add one or two trailing '/' characters if necessary.  */
Packit Service 0f3533
  conv_spec->fromcode = malloc (strlen (fromcode) + 3);
Packit Service 0f3533
  if (conv_spec->fromcode == NULL)
Packit Service 0f3533
    goto out;
Packit Service 0f3533
Packit Service 0f3533
  conv_spec->tocode = malloc (strlen (tocode) + 3);
Packit Service 0f3533
  if (conv_spec->tocode == NULL)
Packit Service 0f3533
    {
Packit Service 0f3533
      free (conv_spec->fromcode);
Packit Service 0f3533
      conv_spec->fromcode = NULL;
Packit Service 0f3533
      goto out;
Packit Service 0f3533
    }
Packit Service 0f3533
Packit Service 0f3533
  /* Strip unrecognized characters and ensure that the code has two '/'
Packit Service 0f3533
     characters as per conversion code triplet specification.  */
Packit Service 0f3533
  strip (conv_spec->fromcode, pfc.code);
Packit Service 0f3533
  strip (conv_spec->tocode, ptc.code);
Packit Service 0f3533
  ret = conv_spec;
Packit Service 0f3533
Packit Service 0f3533
out:
Packit Service 0f3533
  free (pfc.code);
Packit Service 0f3533
  free (ptc.code);
Packit Service 0f3533
Packit Service 0f3533
  return ret;
Packit Service 0f3533
}
Packit Service 0f3533
libc_hidden_def (__gconv_create_spec)
Packit Service 98f370
Packit Service 98f370
Packit Service 98f370
void
Packit Service 98f370
__gconv_destroy_spec (struct gconv_spec *conv_spec)
Packit Service 98f370
{
Packit Service 98f370
  free (conv_spec->fromcode);
Packit Service 98f370
  free (conv_spec->tocode);
Packit Service 98f370
  return;
Packit Service 98f370
}
Packit Service 98f370
libc_hidden_def (__gconv_destroy_spec)