Blame iconv/gconv_charset.c

Packit Service a38be6
/* Charset name normalization.
Packit Service a38be6
   Copyright (C) 2020 Free Software Foundation, Inc.
Packit Service a38be6
   This file is part of the GNU C Library.
Packit Service a38be6
Packit Service a38be6
   The GNU C Library is free software; you can redistribute it and/or
Packit Service a38be6
   modify it under the terms of the GNU Lesser General Public
Packit Service a38be6
   License as published by the Free Software Foundation; either
Packit Service a38be6
   version 2.1 of the License, or (at your option) any later version.
Packit Service a38be6
Packit Service a38be6
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service a38be6
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service a38be6
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service a38be6
   Lesser General Public License for more details.
Packit Service a38be6
Packit Service a38be6
   You should have received a copy of the GNU Lesser General Public
Packit Service a38be6
   License along with the GNU C Library; if not, see
Packit Service a38be6
   <http://www.gnu.org/licenses/>.  */
Packit Service a38be6
Packit Service a38be6
Packit Service a38be6
#include <stdlib.h>
Packit Service a38be6
#include <ctype.h>
Packit Service a38be6
#include <locale.h>
Packit Service a38be6
#include <stdbool.h>
Packit Service a38be6
#include <string.h>
Packit Service a38be6
#include <sys/stat.h>
Packit Service a38be6
#include "gconv_int.h"
Packit Service a38be6
#include "gconv_charset.h"
Packit Service a38be6
Packit Service a38be6
Packit Service a38be6
/* This function returns a pointer to the last suffix in a conversion code
Packit Service a38be6
   string.  Valid suffixes matched by this function are of the form: '/' or ','
Packit Service a38be6
   followed by arbitrary text that doesn't contain '/' or ','.  It does not
Packit Service a38be6
   edit the string in any way.  The caller is expected to parse the suffix and
Packit Service a38be6
   remove it (by e.g. truncating the string) before the next call.  */
Packit Service a38be6
static char *
Packit Service a38be6
find_suffix (char *s)
Packit Service a38be6
{
Packit Service a38be6
  /* The conversion code is in the form of a triplet, separated by '/' chars.
Packit Service a38be6
     The third component of the triplet contains suffixes. If we don't have two
Packit Service a38be6
     slashes, we don't have a suffix.  */
Packit Service a38be6
Packit Service a38be6
  int slash_count = 0;
Packit Service a38be6
  char *suffix_term = NULL;
Packit Service a38be6
Packit Service a38be6
  for (int i = 0; s[i] != '\0'; i++)
Packit Service a38be6
    switch (s[i])
Packit Service a38be6
      {
Packit Service a38be6
        case '/':
Packit Service a38be6
          slash_count++;
Packit Service a38be6
          /* Fallthrough */
Packit Service a38be6
        case ',':
Packit Service a38be6
          suffix_term = &s[i];
Packit Service a38be6
      }
Packit Service a38be6
Packit Service a38be6
  if (slash_count >= 2)
Packit Service a38be6
    return suffix_term;
Packit Service a38be6
Packit Service a38be6
  return NULL;
Packit Service a38be6
}
Packit Service a38be6
Packit Service a38be6
Packit Service a38be6
struct gconv_parsed_code
Packit Service a38be6
{
Packit Service a38be6
  char *code;
Packit Service a38be6
  bool translit;
Packit Service a38be6
  bool ignore;
Packit Service a38be6
};
Packit Service a38be6
Packit Service a38be6
Packit Service a38be6
/* This function parses an iconv_open encoding PC.CODE, strips any suffixes
Packit Service a38be6
   (such as TRANSLIT or IGNORE) from it and sets corresponding flags in it.  */
Packit Service a38be6
static void
Packit Service a38be6
gconv_parse_code (struct gconv_parsed_code *pc)
Packit Service a38be6
{
Packit Service a38be6
  pc->translit = false;
Packit Service a38be6
  pc->ignore = false;
Packit Service a38be6
Packit Service a38be6
  while (1)
Packit Service a38be6
    {
Packit Service a38be6
      /* First drop any trailing whitespaces and separators.  */
Packit Service a38be6
      size_t len = strlen (pc->code);
Packit Service a38be6
      while ((len > 0)
Packit Service a38be6
             && (isspace (pc->code[len - 1])
Packit Service a38be6
                 || pc->code[len - 1] == ','
Packit Service a38be6
                 || pc->code[len - 1] == '/'))
Packit Service a38be6
        len--;
Packit Service a38be6
Packit Service a38be6
      pc->code[len] = '\0';
Packit Service a38be6
Packit Service a38be6
      if (len == 0)
Packit Service a38be6
        return;
Packit Service a38be6
Packit Service a38be6
      char * suffix = find_suffix (pc->code);
Packit Service a38be6
      if (suffix == NULL)
Packit Service a38be6
        {
Packit Service a38be6
          /* At this point, we have processed and removed all suffixes from the
Packit Service a38be6
             code and what remains of the code is suffix free.  */
Packit Service a38be6
          return;
Packit Service a38be6
        }
Packit Service a38be6
      else
Packit Service a38be6
        {
Packit Service a38be6
          /* A suffix is processed from the end of the code array going
Packit Service a38be6
             backwards, one suffix at a time.  The suffix is an index into the
Packit Service a38be6
             code character array and points to: one past the end of the code
Packit Service a38be6
             and any unprocessed suffixes, and to the beginning of the suffix
Packit Service a38be6
             currently being processed during this iteration.  We must process
Packit Service a38be6
             this suffix and then drop it from the code by terminating the
Packit Service a38be6
             preceding text with NULL.
Packit Service a38be6
Packit Service a38be6
             We want to allow and recognize suffixes such as:
Packit Service a38be6
Packit Service a38be6
             "/TRANSLIT"         i.e. single suffix
Packit Service a38be6
             "//TRANSLIT"        i.e. single suffix and multiple separators
Packit Service a38be6
             "//TRANSLIT/IGNORE" i.e. suffixes separated by "/"
Packit Service a38be6
             "/TRANSLIT//IGNORE" i.e. suffixes separated by "//"
Packit Service a38be6
             "//IGNORE,TRANSLIT" i.e. suffixes separated by ","
Packit Service a38be6
             "//IGNORE,"         i.e. trailing ","
Packit Service a38be6
             "//TRANSLIT/"       i.e. trailing "/"
Packit Service a38be6
             "//TRANSLIT//"      i.e. trailing "//"
Packit Service a38be6
             "/"                 i.e. empty suffix.
Packit Service a38be6
Packit Service a38be6
             Unknown suffixes are silently discarded and ignored.  */
Packit Service a38be6
Packit Service a38be6
          if ((__strcasecmp_l (suffix,
Packit Service a38be6
                               GCONV_TRIPLE_SEPARATOR
Packit Service a38be6
                               GCONV_TRANSLIT_SUFFIX,
Packit Service a38be6
                               _nl_C_locobj_ptr) == 0)
Packit Service a38be6
              || (__strcasecmp_l (suffix,
Packit Service a38be6
                                  GCONV_SUFFIX_SEPARATOR
Packit Service a38be6
                                  GCONV_TRANSLIT_SUFFIX,
Packit Service a38be6
                                  _nl_C_locobj_ptr) == 0))
Packit Service a38be6
            pc->translit = true;
Packit Service a38be6
Packit Service a38be6
          if ((__strcasecmp_l (suffix,
Packit Service a38be6
                               GCONV_TRIPLE_SEPARATOR
Packit Service a38be6
                               GCONV_IGNORE_ERRORS_SUFFIX,
Packit Service a38be6
                               _nl_C_locobj_ptr) == 0)
Packit Service a38be6
              || (__strcasecmp_l (suffix,
Packit Service a38be6
                                  GCONV_SUFFIX_SEPARATOR
Packit Service a38be6
                                  GCONV_IGNORE_ERRORS_SUFFIX,
Packit Service a38be6
                                  _nl_C_locobj_ptr) == 0))
Packit Service a38be6
            pc->ignore = true;
Packit Service a38be6
Packit Service a38be6
          /* We just processed this suffix.  We can now drop it from the
Packit Service a38be6
             code string by truncating it at the suffix's position.  */
Packit Service a38be6
          suffix[0] = '\0';
Packit Service a38be6
        }
Packit Service a38be6
    }
Packit Service a38be6
}
Packit Service a38be6
Packit Service a38be6
Packit Service a38be6
/* This function accepts the charset names of the source and destination of the
Packit Service a38be6
   conversion and populates *conv_spec with an equivalent conversion
Packit Service a38be6
   specification that may later be used by __gconv_open.  The charset names
Packit Service a38be6
   might contain options in the form of suffixes that alter the conversion,
Packit Service a38be6
   e.g. "ISO-10646/UTF-8/TRANSLIT".  It processes the charset names, ignoring
Packit Service a38be6
   and truncating any suffix options in fromcode, and processing and truncating
Packit Service a38be6
   any suffix options in tocode.  Supported suffix options ("TRANSLIT" or
Packit Service a38be6
   "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
Packit Service a38be6
   to be set to true.  Unrecognized suffix options are silently discarded.  If
Packit Service a38be6
   the function succeeds, it returns conv_spec back to the caller.  It returns
Packit Service a38be6
   NULL upon failure.  conv_spec must be allocated and freed by the caller.  */
Packit Service a38be6
struct gconv_spec *
Packit Service a38be6
__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
Packit Service a38be6
                   const char *tocode)
Packit Service a38be6
{
Packit Service a38be6
  struct gconv_parsed_code pfc, ptc;
Packit Service a38be6
  struct gconv_spec *ret = NULL;
Packit Service a38be6
Packit Service a38be6
  pfc.code = __strdup (fromcode);
Packit Service a38be6
  ptc.code = __strdup (tocode);
Packit Service a38be6
Packit Service a38be6
  if ((pfc.code == NULL)
Packit Service a38be6
      || (ptc.code == NULL))
Packit Service a38be6
    goto out;
Packit Service a38be6
Packit Service a38be6
  gconv_parse_code (&pfc;;
Packit Service a38be6
  gconv_parse_code (&ptc);
Packit Service a38be6
Packit Service a38be6
  /* We ignore suffixes in the fromcode because that is how the current
Packit Service a38be6
     implementation has always handled them.  Only suffixes in the tocode are
Packit Service a38be6
     processed and handled.  The reality is that invalid input in the input
Packit Service a38be6
     character set should only be ignored if the fromcode specifies IGNORE.
Packit Service a38be6
     The current implementation ignores invalid intput in the input character
Packit Service a38be6
     set if the tocode contains IGNORE.  We preserve this behavior for
Packit Service a38be6
     backwards compatibility.  In the future we may split the handling of
Packit Service a38be6
     IGNORE to allow a finer grained specification of ignorning invalid input
Packit Service a38be6
     and/or ignoring invalid output.  */
Packit Service a38be6
  conv_spec->translit = ptc.translit;
Packit Service a38be6
  conv_spec->ignore = ptc.ignore;
Packit Service a38be6
Packit Service a38be6
  /* 3 extra bytes because 1 extra for '\0', and 2 extra so strip might
Packit Service a38be6
     be able to add one or two trailing '/' characters if necessary.  */
Packit Service a38be6
  conv_spec->fromcode = malloc (strlen (fromcode) + 3);
Packit Service a38be6
  if (conv_spec->fromcode == NULL)
Packit Service a38be6
    goto out;
Packit Service a38be6
Packit Service a38be6
  conv_spec->tocode = malloc (strlen (tocode) + 3);
Packit Service a38be6
  if (conv_spec->tocode == NULL)
Packit Service a38be6
    {
Packit Service a38be6
      free (conv_spec->fromcode);
Packit Service a38be6
      conv_spec->fromcode = NULL;
Packit Service a38be6
      goto out;
Packit Service a38be6
    }
Packit Service a38be6
Packit Service a38be6
  /* Strip unrecognized characters and ensure that the code has two '/'
Packit Service a38be6
     characters as per conversion code triplet specification.  */
Packit Service a38be6
  strip (conv_spec->fromcode, pfc.code);
Packit Service a38be6
  strip (conv_spec->tocode, ptc.code);
Packit Service a38be6
  ret = conv_spec;
Packit Service a38be6
Packit Service a38be6
out:
Packit Service a38be6
  free (pfc.code);
Packit Service a38be6
  free (ptc.code);
Packit Service a38be6
Packit Service a38be6
  return ret;
Packit Service a38be6
}
Packit Service a38be6
libc_hidden_def (__gconv_create_spec)