Blame locale/programs/charmap.c

Packit 6c4009
/* Copyright (C) 1996-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
Packit 6c4009
Packit 6c4009
   This program is free software; you can redistribute it and/or modify
Packit 6c4009
   it under the terms of the GNU General Public License as published
Packit 6c4009
   by the Free Software Foundation; version 2 of the License, or
Packit 6c4009
   (at your option) any later version.
Packit 6c4009
Packit 6c4009
   This program is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 6c4009
   GNU General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU General Public License
Packit 6c4009
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#ifdef HAVE_CONFIG_H
Packit 6c4009
# include <config.h>
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#include <ctype.h>
Packit 6c4009
#include <errno.h>
Packit 6c4009
#include <libintl.h>
Packit 6c4009
#include <limits.h>
Packit 6c4009
#include <stdio.h>
Packit 6c4009
#include <stdlib.h>
Packit 6c4009
#include <string.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
Packit 6c4009
#include "localedef.h"
Packit 6c4009
#include "linereader.h"
Packit 6c4009
#include "charmap.h"
Packit 6c4009
#include "charmap-dir.h"
Packit 6c4009
Packit 6c4009
#include <assert.h>
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Define the lookup function.  */
Packit 6c4009
#include "charmap-kw.h"
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Prototypes for local functions.  */
Packit 6c4009
static struct charmap_t *parse_charmap (struct linereader *cmfile,
Packit 6c4009
					int verbose, int be_quiet);
Packit 6c4009
static void new_width (struct linereader *cmfile, struct charmap_t *result,
Packit 6c4009
		       const char *from, const char *to,
Packit 6c4009
		       unsigned long int width);
Packit 6c4009
static void charmap_new_char (struct linereader *lr, struct charmap_t *cm,
Packit 6c4009
			      size_t nbytes, unsigned char *bytes,
Packit 6c4009
			      const char *from, const char *to,
Packit 6c4009
			      int decimal_ellipsis, int step);
Packit 6c4009
Packit 6c4009
Packit 6c4009
bool enc_not_ascii_compatible;
Packit 6c4009
Packit 6c4009
Packit 6c4009
#ifdef NEED_NULL_POINTER
Packit 6c4009
static const char *null_pointer;
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
static struct linereader *
Packit 6c4009
cmlr_open (const char *directory, const char *name, kw_hash_fct_t hf)
Packit 6c4009
{
Packit 6c4009
  FILE *fp;
Packit 6c4009
Packit 6c4009
  fp = charmap_open (directory, name);
Packit 6c4009
  if (fp == NULL)
Packit 6c4009
    return NULL;
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      size_t dlen = strlen (directory);
Packit 6c4009
      int add_slash = (dlen == 0 || directory[dlen - 1] != '/');
Packit 6c4009
      size_t nlen = strlen (name);
Packit 6c4009
      char *pathname;
Packit 6c4009
      char *p;
Packit 6c4009
Packit 6c4009
      pathname = alloca (dlen + add_slash + nlen + 1);
Packit 6c4009
      p = stpcpy (pathname, directory);
Packit 6c4009
      if (add_slash)
Packit 6c4009
	*p++ = '/';
Packit 6c4009
      stpcpy (p, name);
Packit 6c4009
Packit 6c4009
      return lr_create (fp, pathname, hf);
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
struct charmap_t *
Packit 6c4009
charmap_read (const char *filename, int verbose, int error_not_found,
Packit 6c4009
	      int be_quiet, int use_default)
Packit 6c4009
{
Packit 6c4009
  struct charmap_t *result = NULL;
Packit 6c4009
Packit 6c4009
  if (filename != NULL)
Packit 6c4009
    {
Packit 6c4009
      struct linereader *cmfile;
Packit 6c4009
Packit 6c4009
      /* First try the name as found in the parameter.  */
Packit 6c4009
      cmfile = lr_open (filename, charmap_hash);
Packit 6c4009
      if (cmfile == NULL)
Packit 6c4009
	{
Packit 6c4009
	  /* No successful.  So start looking through the directories
Packit 6c4009
	     in the I18NPATH if this is a simple name.  */
Packit 6c4009
	  if (strchr (filename, '/') == NULL)
Packit 6c4009
	    {
Packit 6c4009
	      char *i18npath = getenv ("I18NPATH");
Packit 6c4009
	      if (i18npath != NULL && *i18npath != '\0')
Packit 6c4009
		{
Packit 6c4009
		  const size_t pathlen = strlen (i18npath);
Packit 6c4009
		  char i18npathbuf[pathlen + 1];
Packit 6c4009
		  char path[pathlen + sizeof ("/charmaps")];
Packit 6c4009
		  char *next;
Packit 6c4009
		  i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1);
Packit 6c4009
Packit 6c4009
		  while (cmfile == NULL
Packit 6c4009
			 && (next = strsep (&i18npath, ":")) != NULL)
Packit 6c4009
		    {
Packit 6c4009
		      stpcpy (stpcpy (path, next), "/charmaps");
Packit 6c4009
		      cmfile = cmlr_open (path, filename, charmap_hash);
Packit 6c4009
Packit 6c4009
		      if (cmfile == NULL)
Packit 6c4009
			/* Try without the "/charmaps" part.  */
Packit 6c4009
			cmfile = cmlr_open (next, filename, charmap_hash);
Packit 6c4009
		    }
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      if (cmfile == NULL)
Packit 6c4009
		/* Try the default directory.  */
Packit 6c4009
		cmfile = cmlr_open (CHARMAP_PATH, filename, charmap_hash);
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      if (cmfile != NULL)
Packit 6c4009
	result = parse_charmap (cmfile, verbose, be_quiet);
Packit 6c4009
Packit 6c4009
      if (result == NULL && error_not_found)
Packit 6c4009
	record_error (0, errno,
Packit 6c4009
		      _("character map file `%s' not found"),
Packit 6c4009
		      filename);
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (result == NULL && filename != NULL && strchr (filename, '/') == NULL)
Packit 6c4009
    {
Packit 6c4009
      /* OK, one more try.  We also accept the names given to the
Packit 6c4009
	 character sets in the files.  Sometimes they differ from the
Packit 6c4009
	 file name.  */
Packit 6c4009
      CHARMAP_DIR *dir;
Packit 6c4009
Packit 6c4009
      dir = charmap_opendir (CHARMAP_PATH);
Packit 6c4009
      if (dir != NULL)
Packit 6c4009
	{
Packit 6c4009
	  const char *dirent;
Packit 6c4009
Packit 6c4009
	  while ((dirent = charmap_readdir (dir)) != NULL)
Packit 6c4009
	    {
Packit 6c4009
	      char **aliases;
Packit 6c4009
	      char **p;
Packit 6c4009
	      int found;
Packit 6c4009
Packit 6c4009
	      aliases = charmap_aliases (CHARMAP_PATH, dirent);
Packit 6c4009
	      found = 0;
Packit 6c4009
	      for (p = aliases; *p; p++)
Packit 6c4009
		if (strcasecmp (*p, filename) == 0)
Packit 6c4009
		  {
Packit 6c4009
		    found = 1;
Packit 6c4009
		    break;
Packit 6c4009
		  }
Packit 6c4009
	      charmap_free_aliases (aliases);
Packit 6c4009
Packit 6c4009
	      if (found)
Packit 6c4009
		{
Packit 6c4009
		  struct linereader *cmfile;
Packit 6c4009
Packit 6c4009
		  cmfile = cmlr_open (CHARMAP_PATH, dirent, charmap_hash);
Packit 6c4009
		  if (cmfile != NULL)
Packit 6c4009
		    result = parse_charmap (cmfile, verbose, be_quiet);
Packit 6c4009
Packit 6c4009
		  break;
Packit 6c4009
		}
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  charmap_closedir (dir);
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (result == NULL && DEFAULT_CHARMAP != NULL)
Packit 6c4009
    {
Packit 6c4009
      struct linereader *cmfile;
Packit 6c4009
Packit 6c4009
      cmfile = cmlr_open (CHARMAP_PATH, DEFAULT_CHARMAP, charmap_hash);
Packit 6c4009
      if (cmfile != NULL)
Packit 6c4009
	result = parse_charmap (cmfile, verbose, be_quiet);
Packit 6c4009
Packit 6c4009
      if (result == NULL)
Packit 6c4009
	record_error (4, errno,
Packit 6c4009
		      _("default character map file `%s' not found"),
Packit 6c4009
		      DEFAULT_CHARMAP);
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (result != NULL && result->code_set_name == NULL)
Packit 6c4009
    /* The input file does not specify a code set name.  This
Packit 6c4009
       shouldn't happen but we should cope with it.  */
Packit 6c4009
    result->code_set_name = basename (filename);
Packit 6c4009
Packit 6c4009
  /* Test of ASCII compatibility of locale encoding.
Packit 6c4009
Packit 6c4009
     Verify that the encoding to be used in a locale is ASCII compatible,
Packit 6c4009
     at least for the graphic characters, excluding the control characters,
Packit 6c4009
     '$' and '@'.  This constraint comes from an ISO C 99 restriction.
Packit 6c4009
Packit 6c4009
     ISO C 99 section 7.17.(2) (about wchar_t):
Packit 6c4009
       the null character shall have the code value zero and each member of
Packit 6c4009
       the basic character set shall have a code value equal to its value
Packit 6c4009
       when used as the lone character in an integer character constant.
Packit 6c4009
     ISO C 99 section 5.2.1.(3):
Packit 6c4009
       Both the basic source and basic execution character sets shall have
Packit 6c4009
       the following members: the 26 uppercase letters of the Latin alphabet
Packit 6c4009
            A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
Packit 6c4009
       the 26 lowercase letters of the Latin alphabet
Packit 6c4009
            a b c d e f g h i j k l m n o p q r s t u v w x y z
Packit 6c4009
       the 10 decimal digits
Packit 6c4009
            0 1 2 3 4 5 6 7 8 9
Packit 6c4009
       the following 29 graphic characters
Packit 6c4009
            ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
Packit 6c4009
       the space character, and control characters representing horizontal
Packit 6c4009
       tab, vertical tab, and form feed.
Packit 6c4009
Packit 6c4009
     Therefore, for all members of the "basic character set", the 'char' code
Packit 6c4009
     must have the same value as the 'wchar_t' code, which in glibc is the
Packit 6c4009
     same as the Unicode code, which for all of the enumerated characters
Packit 6c4009
     is identical to the ASCII code. */
Packit 6c4009
  if (result != NULL && use_default)
Packit 6c4009
    {
Packit 6c4009
      static const char basic_charset[] =
Packit 6c4009
	{
Packit 6c4009
	  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
Packit 6c4009
	  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
Packit 6c4009
	  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
Packit 6c4009
	  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
Packit 6c4009
	  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
Packit 6c4009
	  '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
Packit 6c4009
	  '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
Packit 6c4009
	  '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
Packit 6c4009
	};
Packit 6c4009
      int failed = 0;
Packit 6c4009
      const char *p = basic_charset;
Packit 6c4009
Packit 6c4009
      do
Packit 6c4009
	{
Packit 6c4009
	  struct charseq *seq = charmap_find_symbol (result, p, 1);
Packit 6c4009
Packit 6c4009
	  if (seq == NULL || seq->ucs4 != (uint32_t) *p)
Packit 6c4009
	    failed = 1;
Packit 6c4009
	}
Packit 6c4009
      while (*p++ != '\0');
Packit 6c4009
Packit 6c4009
      if (failed)
Packit 6c4009
	{
Packit 6c4009
	  /* A user may disable the ASCII compatibility warning check,
Packit 6c4009
	     but we must remember that the encoding is not ASCII
Packit 6c4009
	     compatible, since it may have other implications.  Later
Packit 6c4009
	     we will set _NL_CTYPE_MAP_TO_NONASCII from this value.  */
Packit 6c4009
	  if (warn_ascii)
Packit 6c4009
	    record_warning (_(
Packit 6c4009
"character map `%s' is not ASCII compatible, locale not ISO C compliant "
Packit 6c4009
"[--no-warnings=ascii]"),
Packit 6c4009
			    result->code_set_name);
Packit 6c4009
	  enc_not_ascii_compatible = true;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return result;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static struct charmap_t *
Packit 6c4009
parse_charmap (struct linereader *cmfile, int verbose, int be_quiet)
Packit 6c4009
{
Packit 6c4009
  struct charmap_t *result;
Packit 6c4009
  int state;
Packit 6c4009
  enum token_t expected_tok = tok_error;
Packit 6c4009
  const char *expected_str = NULL;
Packit 6c4009
  char *from_name = NULL;
Packit 6c4009
  char *to_name = NULL;
Packit 6c4009
  enum token_t ellipsis = 0;
Packit 6c4009
  int step = 1;
Packit 6c4009
Packit 6c4009
  /* We don't want symbolic names in string to be translated.  */
Packit 6c4009
  cmfile->translate_strings = 0;
Packit 6c4009
Packit 6c4009
  /* Allocate room for result.  */
Packit 6c4009
  result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t));
Packit 6c4009
  memset (result, '\0', sizeof (struct charmap_t));
Packit 6c4009
  /* The default DEFAULT_WIDTH is 1.  */
Packit 6c4009
  result->width_default = 1;
Packit 6c4009
Packit 6c4009
#define obstack_chunk_alloc malloc
Packit 6c4009
#define obstack_chunk_free free
Packit 6c4009
  obstack_init (&result->mem_pool);
Packit 6c4009
Packit 6c4009
  if (init_hash (&result->char_table, 256)
Packit 6c4009
      || init_hash (&result->byte_table, 256))
Packit 6c4009
    {
Packit 6c4009
      free (result);
Packit 6c4009
      return NULL;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* We use a state machine to describe the charmap description file
Packit 6c4009
     format.  */
Packit 6c4009
  state = 1;
Packit 6c4009
  while (1)
Packit 6c4009
    {
Packit 6c4009
      /* What's on?  */
Packit 6c4009
      struct token *now = lr_token (cmfile, NULL, NULL, NULL, verbose);
Packit 6c4009
      enum token_t nowtok = now->tok;
Packit 6c4009
      struct token *arg;
Packit 6c4009
Packit 6c4009
      if (nowtok == tok_eof)
Packit 6c4009
	break;
Packit 6c4009
Packit 6c4009
      switch (state)
Packit 6c4009
	{
Packit 6c4009
	case 1:
Packit 6c4009
	  /* The beginning.  We expect the special declarations, EOL or
Packit 6c4009
	     `CHARMAP'.  */
Packit 6c4009
	  if (nowtok == tok_eol)
Packit 6c4009
	    /* Ignore empty lines.  */
Packit 6c4009
	    continue;
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_charmap)
Packit 6c4009
	    {
Packit 6c4009
	      from_name = NULL;
Packit 6c4009
	      to_name = NULL;
Packit 6c4009
Packit 6c4009
	      /* We have to set up the real work.  Fill in some
Packit 6c4009
		 default values.  */
Packit 6c4009
	      if (result->mb_cur_max == 0)
Packit 6c4009
		result->mb_cur_max = 1;
Packit 6c4009
	      if (result->mb_cur_min == 0)
Packit 6c4009
		result->mb_cur_min = result->mb_cur_max;
Packit 6c4009
	      if (result->mb_cur_min > result->mb_cur_max)
Packit 6c4009
		{
Packit 6c4009
		  record_error (0, 0, _("\
Packit 6c4009
%s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
Packit 6c4009
				cmfile->fname);
Packit 6c4009
Packit 6c4009
		  result->mb_cur_min = result->mb_cur_max;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 1);
Packit 6c4009
Packit 6c4009
	      state = 2;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max
Packit 6c4009
	      && nowtok != tok_mb_cur_min && nowtok != tok_escape_char
Packit 6c4009
	      && nowtok != tok_comment_char && nowtok != tok_g0esc
Packit 6c4009
	      && nowtok != tok_g1esc && nowtok != tok_g2esc
Packit 6c4009
	      && nowtok != tok_g3esc && nowtok != tok_repertoiremap
Packit 6c4009
	      && nowtok != tok_include)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (cmfile, _("syntax error in prolog: %s"),
Packit 6c4009
			_("invalid definition"));
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 0);
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* We know that we need an argument.  */
Packit 6c4009
	  arg = lr_token (cmfile, NULL, NULL, NULL, verbose);
Packit 6c4009
Packit 6c4009
	  switch (nowtok)
Packit 6c4009
	    {
Packit 6c4009
	    case tok_code_set_name:
Packit 6c4009
	    case tok_repertoiremap:
Packit 6c4009
	      if (arg->tok != tok_ident && arg->tok != tok_string)
Packit 6c4009
		{
Packit 6c4009
		badarg:
Packit 6c4009
		  lr_error (cmfile, _("syntax error in prolog: %s"),
Packit 6c4009
			    _("bad argument"));
Packit 6c4009
Packit 6c4009
		  lr_ignore_rest (cmfile, 0);
Packit 6c4009
		  continue;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      if (nowtok == tok_code_set_name)
Packit 6c4009
		result->code_set_name = obstack_copy0 (&result->mem_pool,
Packit 6c4009
						       arg->val.str.startmb,
Packit 6c4009
						       arg->val.str.lenmb);
Packit 6c4009
	      else
Packit 6c4009
		result->repertoiremap = obstack_copy0 (&result->mem_pool,
Packit 6c4009
						       arg->val.str.startmb,
Packit 6c4009
						       arg->val.str.lenmb);
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 1);
Packit 6c4009
	      continue;
Packit 6c4009
Packit 6c4009
	    case tok_mb_cur_max:
Packit 6c4009
	    case tok_mb_cur_min:
Packit 6c4009
	      if (arg->tok != tok_number)
Packit 6c4009
		goto badarg;
Packit 6c4009
Packit 6c4009
	      if ((nowtok == tok_mb_cur_max
Packit 6c4009
		       && result->mb_cur_max != 0)
Packit 6c4009
		      || (nowtok == tok_mb_cur_max
Packit 6c4009
			  && result->mb_cur_max != 0))
Packit 6c4009
		lr_error (cmfile, _("duplicate definition of <%s>"),
Packit 6c4009
			  nowtok == tok_mb_cur_min
Packit 6c4009
			  ? "mb_cur_min" : "mb_cur_max");
Packit 6c4009
Packit 6c4009
	      if (arg->val.num < 1)
Packit 6c4009
		{
Packit 6c4009
		  lr_error (cmfile,
Packit 6c4009
			    _("value for <%s> must be 1 or greater"),
Packit 6c4009
			    nowtok == tok_mb_cur_min
Packit 6c4009
			    ? "mb_cur_min" : "mb_cur_max");
Packit 6c4009
Packit 6c4009
		  lr_ignore_rest (cmfile, 0);
Packit 6c4009
		  continue;
Packit 6c4009
		}
Packit 6c4009
	      if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0
Packit 6c4009
		   && (int) arg->val.num < result->mb_cur_min)
Packit 6c4009
		  || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0
Packit 6c4009
		      && (int) arg->val.num > result->mb_cur_max))
Packit 6c4009
		{
Packit 6c4009
		  lr_error (cmfile, _("\
Packit 6c4009
value of <%s> must be greater or equal than the value of <%s>"),
Packit 6c4009
			    "mb_cur_max", "mb_cur_min");
Packit 6c4009
Packit 6c4009
		  lr_ignore_rest (cmfile, 0);
Packit 6c4009
		  continue;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      if (nowtok == tok_mb_cur_max)
Packit 6c4009
		result->mb_cur_max = arg->val.num;
Packit 6c4009
	      else
Packit 6c4009
		result->mb_cur_min = arg->val.num;
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 1);
Packit 6c4009
	      continue;
Packit 6c4009
Packit 6c4009
	    case tok_escape_char:
Packit 6c4009
	    case tok_comment_char:
Packit 6c4009
	      if (arg->tok != tok_ident)
Packit 6c4009
		goto badarg;
Packit 6c4009
Packit 6c4009
	      if (arg->val.str.lenmb != 1)
Packit 6c4009
		{
Packit 6c4009
		  lr_error (cmfile, _("\
Packit 6c4009
argument to <%s> must be a single character"),
Packit 6c4009
			    nowtok == tok_escape_char ? "escape_char"
Packit 6c4009
						      : "comment_char");
Packit 6c4009
Packit 6c4009
		  lr_ignore_rest (cmfile, 0);
Packit 6c4009
		  continue;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      if (nowtok == tok_escape_char)
Packit 6c4009
		cmfile->escape_char = *arg->val.str.startmb;
Packit 6c4009
	      else
Packit 6c4009
		cmfile->comment_char = *arg->val.str.startmb;
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 1);
Packit 6c4009
	      continue;
Packit 6c4009
Packit 6c4009
	    case tok_g0esc:
Packit 6c4009
	    case tok_g1esc:
Packit 6c4009
	    case tok_g2esc:
Packit 6c4009
	    case tok_g3esc:
Packit 6c4009
	    case tok_escseq:
Packit 6c4009
	      lr_ignore_rest (cmfile, 0); /* XXX */
Packit 6c4009
	      continue;
Packit 6c4009
Packit 6c4009
	    case tok_include:
Packit 6c4009
	      lr_error (cmfile, _("\
Packit 6c4009
character sets with locking states are not supported"));
Packit 6c4009
	      exit (4);
Packit 6c4009
Packit 6c4009
	    default:
Packit 6c4009
	      /* Cannot happen.  */
Packit 6c4009
	      assert (! "Should not happen");
Packit 6c4009
	    }
Packit 6c4009
	  break;
Packit 6c4009
Packit 6c4009
	case 2:
Packit 6c4009
	  /* We have seen `CHARMAP' and now are in the body.  Each line
Packit 6c4009
	     must have the format "%s %s %s\n" or "%s...%s %s %s\n".  */
Packit 6c4009
	  if (nowtok == tok_eol)
Packit 6c4009
	    /* Ignore empty lines.  */
Packit 6c4009
	    continue;
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_end)
Packit 6c4009
	    {
Packit 6c4009
	      expected_tok = tok_charmap;
Packit 6c4009
	      expected_str = "CHARMAP";
Packit 6c4009
	      state = 90;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (cmfile, _("syntax error in %s definition: %s"),
Packit 6c4009
			"CHARMAP", _("no symbolic name given"));
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 0);
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* If the previous line was not completely correct free the
Packit 6c4009
	     used memory.  */
Packit 6c4009
	  if (from_name != NULL)
Packit 6c4009
	    obstack_free (&result->mem_pool, from_name);
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_bsymbol)
Packit 6c4009
	    from_name = (char *) obstack_copy0 (&result->mem_pool,
Packit 6c4009
						now->val.str.startmb,
Packit 6c4009
						now->val.str.lenmb);
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      obstack_printf (&result->mem_pool, "U%08X",
Packit 6c4009
			      cmfile->token.val.ucs4);
Packit 6c4009
	      obstack_1grow (&result->mem_pool, '\0');
Packit 6c4009
	      from_name = (char *) obstack_finish (&result->mem_pool);
Packit 6c4009
	    }
Packit 6c4009
	  to_name = NULL;
Packit 6c4009
Packit 6c4009
	  state = 3;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 3:
Packit 6c4009
	  /* We have two possibilities: We can see an ellipsis or an
Packit 6c4009
	     encoding value.  */
Packit 6c4009
	  if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
Packit 6c4009
	      || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2
Packit 6c4009
	      || nowtok == tok_ellipsis2_2)
Packit 6c4009
	    {
Packit 6c4009
	      ellipsis = nowtok;
Packit 6c4009
	      if (nowtok == tok_ellipsis4_2)
Packit 6c4009
		{
Packit 6c4009
		  step = 2;
Packit 6c4009
		  nowtok = tok_ellipsis4;
Packit 6c4009
		}
Packit 6c4009
	      else if (nowtok == tok_ellipsis2_2)
Packit 6c4009
		{
Packit 6c4009
		  step = 2;
Packit 6c4009
		  nowtok = tok_ellipsis2;
Packit 6c4009
		}
Packit 6c4009
	      state = 4;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
	  /* FALLTHROUGH */
Packit 6c4009
Packit 6c4009
	case 5:
Packit 6c4009
	  if (nowtok != tok_charcode)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (cmfile, _("syntax error in %s definition: %s"),
Packit 6c4009
			"CHARMAP", _("invalid encoding given"));
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 0);
Packit 6c4009
Packit 6c4009
	      state = 2;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (now->val.charcode.nbytes < result->mb_cur_min)
Packit 6c4009
	    lr_error (cmfile, _("too few bytes in character encoding"));
Packit 6c4009
	  else if (now->val.charcode.nbytes > result->mb_cur_max)
Packit 6c4009
	    lr_error (cmfile, _("too many bytes in character encoding"));
Packit 6c4009
	  else
Packit 6c4009
	    charmap_new_char (cmfile, result, now->val.charcode.nbytes,
Packit 6c4009
			      now->val.charcode.bytes, from_name, to_name,
Packit 6c4009
			      ellipsis != tok_ellipsis2, step);
Packit 6c4009
Packit 6c4009
	  /* Ignore trailing comment silently.  */
Packit 6c4009
	  lr_ignore_rest (cmfile, 0);
Packit 6c4009
Packit 6c4009
	  from_name = NULL;
Packit 6c4009
	  to_name = NULL;
Packit 6c4009
	  ellipsis = tok_none;
Packit 6c4009
	  step = 1;
Packit 6c4009
Packit 6c4009
	  state = 2;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 4:
Packit 6c4009
	  if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (cmfile, _("syntax error in %s definition: %s"),
Packit 6c4009
			"CHARMAP",
Packit 6c4009
			_("no symbolic name given for end of range"));
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 0);
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* Copy the to-name in a safe place.  */
Packit 6c4009
	  if (nowtok == tok_bsymbol)
Packit 6c4009
	    to_name = (char *) obstack_copy0 (&result->mem_pool,
Packit 6c4009
					      cmfile->token.val.str.startmb,
Packit 6c4009
					      cmfile->token.val.str.lenmb);
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      obstack_printf (&result->mem_pool, "U%08X",
Packit 6c4009
			      cmfile->token.val.ucs4);
Packit 6c4009
	      obstack_1grow (&result->mem_pool, '\0');
Packit 6c4009
	      to_name = (char *) obstack_finish (&result->mem_pool);
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  state = 5;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 90:
Packit 6c4009
	  if (nowtok != expected_tok)
Packit 6c4009
	    lr_error (cmfile, _("\
Packit 6c4009
%1$s: definition does not end with `END %1$s'"), expected_str);
Packit 6c4009
Packit 6c4009
	  lr_ignore_rest (cmfile, nowtok == expected_tok);
Packit 6c4009
	  state = 91;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 91:
Packit 6c4009
	  /* Waiting for WIDTH... */
Packit 6c4009
	  if (nowtok == tok_eol)
Packit 6c4009
	    /* Ignore empty lines.  */
Packit 6c4009
	    continue;
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_width_default)
Packit 6c4009
	    {
Packit 6c4009
	      state = 92;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_width)
Packit 6c4009
	    {
Packit 6c4009
	      lr_ignore_rest (cmfile, 1);
Packit 6c4009
	      state = 93;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_width_variable)
Packit 6c4009
	    {
Packit 6c4009
	      lr_ignore_rest (cmfile, 1);
Packit 6c4009
	      state = 98;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  lr_error (cmfile, _("\
Packit 6c4009
only WIDTH definitions are allowed to follow the CHARMAP definition"));
Packit 6c4009
Packit 6c4009
	  lr_ignore_rest (cmfile, 0);
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 92:
Packit 6c4009
	  if (nowtok != tok_number)
Packit 6c4009
	    lr_error (cmfile, _("value for %s must be an integer"),
Packit 6c4009
		      "WIDTH_DEFAULT");
Packit 6c4009
	  else
Packit 6c4009
	    result->width_default = now->val.num;
Packit 6c4009
Packit 6c4009
	  lr_ignore_rest (cmfile, nowtok == tok_number);
Packit 6c4009
Packit 6c4009
	  state = 91;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 93:
Packit 6c4009
	  /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
Packit 6c4009
	     "%s...%s %d\n".  */
Packit 6c4009
	  if (nowtok == tok_eol)
Packit 6c4009
	    /* ignore empty lines.  */
Packit 6c4009
	    continue;
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_end)
Packit 6c4009
	    {
Packit 6c4009
	      expected_tok = tok_width;
Packit 6c4009
	      expected_str = "WIDTH";
Packit 6c4009
	      state = 90;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (cmfile, _("syntax error in %s definition: %s"),
Packit 6c4009
			"WIDTH", _("no symbolic name given"));
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 0);
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (from_name != NULL)
Packit 6c4009
	    obstack_free (&result->mem_pool, from_name);
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_bsymbol)
Packit 6c4009
	    from_name = (char *) obstack_copy0 (&result->mem_pool,
Packit 6c4009
						now->val.str.startmb,
Packit 6c4009
						now->val.str.lenmb);
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      obstack_printf (&result->mem_pool, "U%08X",
Packit 6c4009
			      cmfile->token.val.ucs4);
Packit 6c4009
	      obstack_1grow (&result->mem_pool, '\0');
Packit 6c4009
	      from_name = (char *) obstack_finish (&result->mem_pool);
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  to_name = NULL;
Packit 6c4009
Packit 6c4009
	  state = 94;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 94:
Packit 6c4009
	  if (nowtok == tok_ellipsis3)
Packit 6c4009
	    {
Packit 6c4009
	      state = 95;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit Bot 04b074
	  /* Fall through.  */
Packit 6c4009
Packit 6c4009
	case 96:
Packit 6c4009
	  if (nowtok != tok_number)
Packit 6c4009
	    lr_error (cmfile, _("value for %s must be an integer"),
Packit 6c4009
		      "WIDTH");
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      /* Store width for chars.  */
Packit 6c4009
	      new_width (cmfile, result, from_name, to_name, now->val.num);
Packit 6c4009
Packit 6c4009
	      from_name = NULL;
Packit 6c4009
	      to_name = NULL;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  lr_ignore_rest (cmfile, nowtok == tok_number);
Packit 6c4009
Packit 6c4009
	  state = 93;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 95:
Packit 6c4009
	  if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (cmfile, _("syntax error in %s definition: %s"),
Packit 6c4009
			"WIDTH", _("no symbolic name given for end of range"));
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 0);
Packit 6c4009
Packit 6c4009
	      state = 93;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_bsymbol)
Packit 6c4009
	    to_name = (char *) obstack_copy0 (&result->mem_pool,
Packit 6c4009
					      now->val.str.startmb,
Packit 6c4009
					      now->val.str.lenmb);
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      obstack_printf (&result->mem_pool, "U%08X",
Packit 6c4009
			      cmfile->token.val.ucs4);
Packit 6c4009
	      obstack_1grow (&result->mem_pool, '\0');
Packit 6c4009
	      to_name = (char *) obstack_finish (&result->mem_pool);
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  state = 96;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 98:
Packit 6c4009
	  /* We now expect `END WIDTH_VARIABLE' or lines of the format
Packit 6c4009
	     "%s\n" or "%s...%s\n".  */
Packit 6c4009
	  if (nowtok == tok_eol)
Packit 6c4009
	    /* ignore empty lines.  */
Packit 6c4009
	    continue;
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_end)
Packit 6c4009
	    {
Packit 6c4009
	      expected_tok = tok_width_variable;
Packit 6c4009
	      expected_str = "WIDTH_VARIABLE";
Packit 6c4009
	      state = 90;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (cmfile, _("syntax error in %s definition: %s"),
Packit 6c4009
			"WIDTH_VARIABLE", _("no symbolic name given"));
Packit 6c4009
Packit 6c4009
	      lr_ignore_rest (cmfile, 0);
Packit 6c4009
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (from_name != NULL)
Packit 6c4009
	    obstack_free (&result->mem_pool, from_name);
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_bsymbol)
Packit 6c4009
	    from_name = (char *) obstack_copy0 (&result->mem_pool,
Packit 6c4009
						now->val.str.startmb,
Packit 6c4009
						now->val.str.lenmb);
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      obstack_printf (&result->mem_pool, "U%08X",
Packit 6c4009
			      cmfile->token.val.ucs4);
Packit 6c4009
	      obstack_1grow (&result->mem_pool, '\0');
Packit 6c4009
	      from_name = (char *) obstack_finish (&result->mem_pool);
Packit 6c4009
	    }
Packit 6c4009
	  to_name = NULL;
Packit 6c4009
Packit 6c4009
	  state = 99;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 99:
Packit 6c4009
	  if (nowtok == tok_ellipsis3)
Packit 6c4009
	    state = 100;
Packit 6c4009
Packit 6c4009
	  /* Store info.  */
Packit 6c4009
	  from_name = NULL;
Packit 6c4009
Packit 6c4009
	  /* Warn */
Packit 6c4009
	  state = 98;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	case 100:
Packit 6c4009
	  if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (cmfile, _("syntax error in %s definition: %s"),
Packit 6c4009
			"WIDTH_VARIABLE",
Packit 6c4009
			_("no symbolic name given for end of range"));
Packit 6c4009
	      lr_ignore_rest (cmfile, 0);
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (nowtok == tok_bsymbol)
Packit 6c4009
	    to_name = (char *) obstack_copy0 (&result->mem_pool,
Packit 6c4009
					      now->val.str.startmb,
Packit 6c4009
					      now->val.str.lenmb);
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      obstack_printf (&result->mem_pool, "U%08X",
Packit 6c4009
			      cmfile->token.val.ucs4);
Packit 6c4009
	      obstack_1grow (&result->mem_pool, '\0');
Packit 6c4009
	      to_name = (char *) obstack_finish (&result->mem_pool);
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* XXX Enter value into table.  */
Packit 6c4009
Packit 6c4009
	  lr_ignore_rest (cmfile, 1);
Packit 6c4009
Packit 6c4009
	  state = 98;
Packit 6c4009
	  continue;
Packit 6c4009
Packit 6c4009
	default:
Packit 6c4009
	  record_error (5, 0, _("%s: error in state machine"),
Packit 6c4009
			__FILE__);
Packit 6c4009
	  /* NOTREACHED */
Packit 6c4009
	}
Packit 6c4009
      break;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (state != 91)
Packit 6c4009
    record_error (0, 0, _("%s: premature end of file"),
Packit 6c4009
		  cmfile->fname);
Packit 6c4009
Packit 6c4009
  lr_close (cmfile);
Packit 6c4009
Packit 6c4009
  return result;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
new_width (struct linereader *cmfile, struct charmap_t *result,
Packit 6c4009
	   const char *from, const char *to, unsigned long int width)
Packit 6c4009
{
Packit 6c4009
  struct charseq *from_val;
Packit 6c4009
  struct charseq *to_val;
Packit 6c4009
Packit 6c4009
  from_val = charmap_find_value (result, from, strlen (from));
Packit 6c4009
  if (from_val == NULL)
Packit 6c4009
    {
Packit 6c4009
      lr_error (cmfile, _("unknown character `%s'"), from);
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (to == NULL)
Packit 6c4009
    to_val = from_val;
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      to_val = charmap_find_value (result, to, strlen (to));
Packit 6c4009
      if (to_val == NULL)
Packit 6c4009
	{
Packit 6c4009
	  lr_error (cmfile, _("unknown character `%s'"), to);
Packit 6c4009
	  return;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Make sure the number of bytes for the end points of the range
Packit 6c4009
	 is correct.  */
Packit 6c4009
      if (from_val->nbytes != to_val->nbytes)
Packit 6c4009
	{
Packit 6c4009
	  lr_error (cmfile, _("\
Packit 6c4009
number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"),
Packit 6c4009
		    from_val->nbytes, to_val->nbytes);
Packit 6c4009
	  return;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (result->nwidth_rules >= result->nwidth_rules_max)
Packit 6c4009
    {
Packit 6c4009
      size_t new_size = result->nwidth_rules + 32;
Packit 6c4009
      struct width_rule *new_rules =
Packit 6c4009
	(struct width_rule *) obstack_alloc (&result->mem_pool,
Packit 6c4009
					     (new_size
Packit 6c4009
					      * sizeof (struct width_rule)));
Packit 6c4009
Packit 6c4009
      memcpy (new_rules, result->width_rules,
Packit 6c4009
	      result->nwidth_rules_max * sizeof (struct width_rule));
Packit 6c4009
Packit 6c4009
      result->width_rules = new_rules;
Packit 6c4009
      result->nwidth_rules_max = new_size;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  result->width_rules[result->nwidth_rules].from = from_val;
Packit 6c4009
  result->width_rules[result->nwidth_rules].to = to_val;
Packit 6c4009
  result->width_rules[result->nwidth_rules].width = (unsigned int) width;
Packit 6c4009
  ++result->nwidth_rules;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
struct charseq *
Packit 6c4009
charmap_find_value (const struct charmap_t *cm, const char *name, size_t len)
Packit 6c4009
{
Packit 6c4009
  void *result;
Packit 6c4009
Packit 6c4009
  return (find_entry ((hash_table *) &cm->char_table, name, len, &result)
Packit 6c4009
	  < 0 ? NULL : (struct charseq *) result);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
charmap_new_char (struct linereader *lr, struct charmap_t *cm,
Packit 6c4009
		  size_t nbytes, unsigned char *bytes,
Packit 6c4009
		  const char *from, const char *to,
Packit 6c4009
		  int decimal_ellipsis, int step)
Packit 6c4009
{
Packit 6c4009
  hash_table *ht = &cm->char_table;
Packit 6c4009
  hash_table *bt = &cm->byte_table;
Packit 6c4009
  struct obstack *ob = &cm->mem_pool;
Packit 6c4009
  char *from_end;
Packit 6c4009
  char *to_end;
Packit 6c4009
  const char *cp;
Packit 6c4009
  int prefix_len, len1, len2;
Packit 6c4009
  unsigned int from_nr, to_nr, cnt;
Packit 6c4009
  struct charseq *newp;
Packit 6c4009
Packit 6c4009
  len1 = strlen (from);
Packit 6c4009
Packit 6c4009
  if (to == NULL)
Packit 6c4009
    {
Packit 6c4009
      newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
Packit 6c4009
      newp->nbytes = nbytes;
Packit 6c4009
      memcpy (newp->bytes, bytes, nbytes);
Packit 6c4009
      newp->name = from;
Packit 6c4009
Packit 6c4009
      newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
Packit 6c4009
      if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9))
Packit 6c4009
	{
Packit 6c4009
	  /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
Packit 6c4009
	     xxxx and xxxxxxxx are hexadecimal numbers.  In this case
Packit 6c4009
	     we use the value of xxxx or xxxxxxxx as the UCS4 value of
Packit 6c4009
	     this character and we don't have to consult the repertoire
Packit 6c4009
	     map.
Packit 6c4009
Packit 6c4009
	     If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
Packit 6c4009
	     and xxxxxxxx also give the code point in UCS4 but this must
Packit 6c4009
	     be in the private, i.e., unassigned, area.  This should be
Packit 6c4009
	     used for characters which do not (yet) have an equivalent
Packit 6c4009
	     in ISO 10646 and Unicode.  */
Packit 6c4009
	  char *endp;
Packit 6c4009
Packit 6c4009
	  errno = 0;
Packit 6c4009
	  newp->ucs4 = strtoul (from + 1, &endp, 16);
Packit 6c4009
	  if (endp - from != len1
Packit 6c4009
	      || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE)
Packit 6c4009
	      || newp->ucs4 >= 0x80000000)
Packit 6c4009
	    /* This wasn't successful.  Signal this name cannot be a
Packit 6c4009
	       correct UCS value.  */
Packit 6c4009
	    newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      insert_entry (ht, from, len1, newp);
Packit 6c4009
      insert_entry (bt, newp->bytes, nbytes, newp);
Packit 6c4009
      /* Please note that it isn't a bug if a symbol is defined more
Packit 6c4009
	 than once.  All later definitions are simply discarded.  */
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* We have a range: the names must have names with equal prefixes
Packit 6c4009
     and an equal number of digits, where the second number is greater
Packit 6c4009
     or equal than the first.  */
Packit 6c4009
  len2 = strlen (to);
Packit 6c4009
Packit 6c4009
  if (len1 != len2)
Packit 6c4009
    {
Packit 6c4009
    illegal_range:
Packit 6c4009
      lr_error (lr, _("invalid names for character range"));
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  cp = &from[len1 - 1];
Packit 6c4009
  if (decimal_ellipsis)
Packit 6c4009
    while (isdigit (*cp) && cp >= from)
Packit 6c4009
      --cp;
Packit 6c4009
  else
Packit 6c4009
    while (isxdigit (*cp) && cp >= from)
Packit 6c4009
      {
Packit 6c4009
	if (!isdigit (*cp) && !isupper (*cp))
Packit 6c4009
	  lr_error (lr, _("\
Packit 6c4009
hexadecimal range format should use only capital characters"));
Packit 6c4009
	--cp;
Packit 6c4009
      }
Packit 6c4009
Packit 6c4009
  prefix_len = (cp - from) + 1;
Packit 6c4009
Packit 6c4009
  if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
Packit 6c4009
    goto illegal_range;
Packit 6c4009
Packit 6c4009
  errno = 0;
Packit 6c4009
  from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16);
Packit 6c4009
  if (*from_end != '\0' || (from_nr == UINT_MAX && errno == ERANGE)
Packit 6c4009
      || ((to_nr = strtoul (&to[prefix_len], &to_end,
Packit 6c4009
			    decimal_ellipsis ? 10 : 16)) == UINT_MAX
Packit 6c4009
	  && errno == ERANGE)
Packit 6c4009
      || *to_end != '\0')
Packit 6c4009
    {
Packit 6c4009
      lr_error (lr, _("<%s> and <%s> are invalid names for range"), from, to);
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (from_nr > to_nr)
Packit 6c4009
    {
Packit 6c4009
      lr_error (lr, _("upper limit in range is smaller than lower limit"));
Packit 6c4009
      return;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  for (cnt = from_nr; cnt <= to_nr; cnt += step)
Packit 6c4009
    {
Packit 6c4009
      char *name_end;
Packit 6c4009
      obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X",
Packit 6c4009
		      prefix_len, from, len1 - prefix_len, cnt);
Packit 6c4009
      obstack_1grow (ob, '\0');
Packit 6c4009
      name_end = obstack_finish (ob);
Packit 6c4009
Packit 6c4009
      newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
Packit 6c4009
      newp->nbytes = nbytes;
Packit 6c4009
      memcpy (newp->bytes, bytes, nbytes);
Packit 6c4009
      newp->name = name_end;
Packit 6c4009
Packit 6c4009
      newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
Packit 6c4009
      if ((name_end[0] == 'U' || name_end[0] == 'P')
Packit 6c4009
	  && (len1 == 5 || len1 == 9))
Packit 6c4009
	{
Packit 6c4009
	  /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
Packit 6c4009
	     xxxx and xxxxxxxx are hexadecimal numbers.  In this case
Packit 6c4009
	     we use the value of xxxx or xxxxxxxx as the UCS4 value of
Packit 6c4009
	     this character and we don't have to consult the repertoire
Packit 6c4009
	     map.
Packit 6c4009
Packit 6c4009
	     If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
Packit 6c4009
	     and xxxxxxxx also give the code point in UCS4 but this must
Packit 6c4009
	     be in the private, i.e., unassigned, area.  This should be
Packit 6c4009
	     used for characters which do not (yet) have an equivalent
Packit 6c4009
	     in ISO 10646 and Unicode.  */
Packit 6c4009
	  char *endp;
Packit 6c4009
Packit 6c4009
	  errno = 0;
Packit 6c4009
	  newp->ucs4 = strtoul (name_end + 1, &endp, 16);
Packit 6c4009
	  if (endp - name_end != len1
Packit 6c4009
	      || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE)
Packit 6c4009
	      || newp->ucs4 >= 0x80000000)
Packit 6c4009
	    /* This wasn't successful.  Signal this name cannot be a
Packit 6c4009
	       correct UCS value.  */
Packit 6c4009
	    newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      insert_entry (ht, name_end, len1, newp);
Packit 6c4009
      insert_entry (bt, newp->bytes, nbytes, newp);
Packit 6c4009
      /* Please note we don't examine the return value since it is no error
Packit 6c4009
	 if we have two definitions for a symbol.  */
Packit 6c4009
Packit 6c4009
      /* Increment the value in the byte sequence.  */
Packit 6c4009
      if (++bytes[nbytes - 1] == '\0')
Packit 6c4009
	{
Packit 6c4009
	  int b = nbytes - 2;
Packit 6c4009
Packit 6c4009
	  do
Packit 6c4009
	    if (b < 0)
Packit 6c4009
	      {
Packit 6c4009
		lr_error (lr,
Packit 6c4009
			  _("resulting bytes for range not representable."));
Packit 6c4009
		return;
Packit 6c4009
	      }
Packit 6c4009
	  while (++bytes[b--] == 0);
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
struct charseq *
Packit 6c4009
charmap_find_symbol (const struct charmap_t *cm, const char *bytes,
Packit 6c4009
		     size_t nbytes)
Packit 6c4009
{
Packit 6c4009
  void *result;
Packit 6c4009
Packit 6c4009
  return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result)
Packit 6c4009
	  < 0 ? NULL : (struct charseq *) result);
Packit 6c4009
}