Blame locale/programs/linereader.c

Packit 6c4009
/* Copyright (C) 1996-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
Packit 6c4009
Packit 6c4009
   This program is free software; you can redistribute it and/or modify
Packit 6c4009
   it under the terms of the GNU General Public License as published
Packit 6c4009
   by the Free Software Foundation; version 2 of the License, or
Packit 6c4009
   (at your option) any later version.
Packit 6c4009
Packit 6c4009
   This program is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 6c4009
   GNU General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU General Public License
Packit 6c4009
   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#ifdef HAVE_CONFIG_H
Packit 6c4009
# include <config.h>
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#include <assert.h>
Packit 6c4009
#include <ctype.h>
Packit 6c4009
#include <errno.h>
Packit 6c4009
#include <libintl.h>
Packit 6c4009
#include <stdarg.h>
Packit 6c4009
#include <stdlib.h>
Packit 6c4009
#include <string.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
Packit 6c4009
#include "localedef.h"
Packit 6c4009
#include "charmap.h"
Packit 6c4009
#include "error.h"
Packit 6c4009
#include "linereader.h"
Packit 6c4009
#include "locfile.h"
Packit 6c4009
Packit 6c4009
/* Prototypes for local functions.  */
Packit 6c4009
static struct token *get_toplvl_escape (struct linereader *lr);
Packit 6c4009
static struct token *get_symname (struct linereader *lr);
Packit 6c4009
static struct token *get_ident (struct linereader *lr);
Packit 6c4009
static struct token *get_string (struct linereader *lr,
Packit 6c4009
				 const struct charmap_t *charmap,
Packit 6c4009
				 struct localedef_t *locale,
Packit 6c4009
				 const struct repertoire_t *repertoire,
Packit 6c4009
				 int verbose);
Packit 6c4009
Packit 6c4009
Packit 6c4009
struct linereader *
Packit 6c4009
lr_open (const char *fname, kw_hash_fct_t hf)
Packit 6c4009
{
Packit 6c4009
  FILE *fp;
Packit 6c4009
Packit 6c4009
  if (fname == NULL || strcmp (fname, "-") == 0
Packit 6c4009
      || strcmp (fname, "/dev/stdin") == 0)
Packit 6c4009
    return lr_create (stdin, "<stdin>", hf);
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      fp = fopen (fname, "rm");
Packit 6c4009
      if (fp == NULL)
Packit 6c4009
	return NULL;
Packit 6c4009
      return lr_create (fp, fname, hf);
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
struct linereader *
Packit 6c4009
lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
Packit 6c4009
{
Packit 6c4009
  struct linereader *result;
Packit 6c4009
  int n;
Packit 6c4009
Packit 6c4009
  result = (struct linereader *) xmalloc (sizeof (*result));
Packit 6c4009
Packit 6c4009
  result->fp = fp;
Packit 6c4009
  result->fname = xstrdup (fname);
Packit 6c4009
  result->buf = NULL;
Packit 6c4009
  result->bufsize = 0;
Packit 6c4009
  result->lineno = 1;
Packit 6c4009
  result->idx = 0;
Packit 6c4009
  result->comment_char = '#';
Packit 6c4009
  result->escape_char = '\\';
Packit 6c4009
  result->translate_strings = 1;
Packit 6c4009
  result->return_widestr = 0;
Packit 6c4009
Packit 6c4009
  n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
Packit 6c4009
  if (n < 0)
Packit 6c4009
    {
Packit 6c4009
      int save = errno;
Packit 6c4009
      fclose (result->fp);
Packit 6c4009
      free ((char *) result->fname);
Packit 6c4009
      free (result);
Packit 6c4009
      errno = save;
Packit 6c4009
      return NULL;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
Packit 6c4009
    n -= 2;
Packit 6c4009
Packit 6c4009
  result->buf[n] = '\0';
Packit 6c4009
  result->bufact = n;
Packit 6c4009
  result->hash_fct = hf;
Packit 6c4009
Packit 6c4009
  return result;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
int
Packit 6c4009
lr_eof (struct linereader *lr)
Packit 6c4009
{
Packit 6c4009
  return lr->bufact = 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
void
Packit 6c4009
lr_ignore_rest (struct linereader *lr, int verbose)
Packit 6c4009
{
Packit 6c4009
  if (verbose)
Packit 6c4009
    {
Packit 6c4009
      while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
Packit 6c4009
	     && lr->buf[lr->idx] != lr->comment_char)
Packit 6c4009
	if (lr->buf[lr->idx] == '\0')
Packit 6c4009
	  {
Packit 6c4009
	    if (lr_next (lr) < 0)
Packit 6c4009
	      return;
Packit 6c4009
	  }
Packit 6c4009
	else
Packit 6c4009
	  ++lr->idx;
Packit 6c4009
Packit 6c4009
      if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
Packit 6c4009
	  && lr->buf[lr->idx] != lr->comment_char)
Packit 6c4009
	lr_error (lr, _("trailing garbage at end of line"));
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Ignore continued line.  */
Packit 6c4009
  while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
Packit 6c4009
    if (lr_next (lr) < 0)
Packit 6c4009
      break;
Packit 6c4009
Packit 6c4009
  lr->idx = lr->bufact;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
void
Packit 6c4009
lr_close (struct linereader *lr)
Packit 6c4009
{
Packit 6c4009
  fclose (lr->fp);
Packit 6c4009
  free (lr->buf);
Packit 6c4009
  free (lr);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
int
Packit 6c4009
lr_next (struct linereader *lr)
Packit 6c4009
{
Packit 6c4009
  int n;
Packit 6c4009
Packit 6c4009
  n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
Packit 6c4009
  if (n < 0)
Packit 6c4009
    return -1;
Packit 6c4009
Packit 6c4009
  ++lr->lineno;
Packit 6c4009
Packit 6c4009
  if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
Packit 6c4009
    {
Packit 6c4009
#if 0
Packit 6c4009
      /* XXX Is this correct?  */
Packit 6c4009
      /* An escaped newline character is substituted with a single <SP>.  */
Packit 6c4009
      --n;
Packit 6c4009
      lr->buf[n - 1] = ' ';
Packit 6c4009
#else
Packit 6c4009
      n -= 2;
Packit 6c4009
#endif
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  lr->buf[n] = '\0';
Packit 6c4009
  lr->bufact = n;
Packit 6c4009
  lr->idx = 0;
Packit 6c4009
Packit 6c4009
  return 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Defined in error.c.  */
Packit 6c4009
/* This variable is incremented each time `error' is called.  */
Packit 6c4009
extern unsigned int error_message_count;
Packit 6c4009
Packit 6c4009
/* The calling program should define program_name and set it to the
Packit 6c4009
   name of the executing program.  */
Packit 6c4009
extern char *program_name;
Packit 6c4009
Packit 6c4009
Packit 6c4009
struct token *
Packit 6c4009
lr_token (struct linereader *lr, const struct charmap_t *charmap,
Packit 6c4009
	  struct localedef_t *locale, const struct repertoire_t *repertoire,
Packit 6c4009
	  int verbose)
Packit 6c4009
{
Packit 6c4009
  int ch;
Packit 6c4009
Packit 6c4009
  while (1)
Packit 6c4009
    {
Packit 6c4009
      do
Packit 6c4009
	{
Packit 6c4009
	  ch = lr_getc (lr);
Packit 6c4009
Packit 6c4009
	  if (ch == EOF)
Packit 6c4009
	    {
Packit 6c4009
	      lr->token.tok = tok_eof;
Packit 6c4009
	      return &lr->token;
Packit 6c4009
	    };
Packit 6c4009
Packit 6c4009
	  if (ch == '\n')
Packit 6c4009
	    {
Packit 6c4009
	      lr->token.tok = tok_eol;
Packit 6c4009
	      return &lr->token;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      while (isspace (ch));
Packit 6c4009
Packit 6c4009
      if (ch != lr->comment_char)
Packit 6c4009
	break;
Packit 6c4009
Packit 6c4009
      /* Is there an newline at the end of the buffer?  */
Packit 6c4009
      if (lr->buf[lr->bufact - 1] != '\n')
Packit 6c4009
	{
Packit 6c4009
	  /* No.  Some people want this to mean that only the line in
Packit 6c4009
	     the file not the logical, concatenated line is ignored.
Packit 6c4009
	     Let's try this.  */
Packit 6c4009
	  lr->idx = lr->bufact;
Packit 6c4009
	  continue;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Ignore rest of line.  */
Packit 6c4009
      lr_ignore_rest (lr, 0);
Packit 6c4009
      lr->token.tok = tok_eol;
Packit 6c4009
      return &lr->token;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Match escape sequences.  */
Packit 6c4009
  if (ch == lr->escape_char)
Packit 6c4009
    return get_toplvl_escape (lr);
Packit 6c4009
Packit 6c4009
  /* Match ellipsis.  */
Packit 6c4009
  if (ch == '.')
Packit 6c4009
    {
Packit 6c4009
      if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
Packit 6c4009
	{
Packit 6c4009
	  int cnt;
Packit 6c4009
	  for (cnt = 0; cnt < 10; ++cnt)
Packit 6c4009
	    lr_getc (lr);
Packit 6c4009
	  lr->token.tok = tok_ellipsis4_2;
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
      if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
Packit 6c4009
	{
Packit 6c4009
	  lr_getc (lr);
Packit 6c4009
	  lr_getc (lr);
Packit 6c4009
	  lr_getc (lr);
Packit 6c4009
	  lr->token.tok = tok_ellipsis4;
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
      if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
Packit 6c4009
	{
Packit 6c4009
	  lr_getc (lr);
Packit 6c4009
	  lr_getc (lr);
Packit 6c4009
	  lr->token.tok = tok_ellipsis3;
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
      if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
Packit 6c4009
	{
Packit 6c4009
	  int cnt;
Packit 6c4009
	  for (cnt = 0; cnt < 6; ++cnt)
Packit 6c4009
	    lr_getc (lr);
Packit 6c4009
	  lr->token.tok = tok_ellipsis2_2;
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
      if (lr->buf[lr->idx] == '.')
Packit 6c4009
	{
Packit 6c4009
	  lr_getc (lr);
Packit 6c4009
	  lr->token.tok = tok_ellipsis2;
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  switch (ch)
Packit 6c4009
    {
Packit 6c4009
    case '<':
Packit 6c4009
      return get_symname (lr);
Packit 6c4009
Packit 6c4009
    case '0' ... '9':
Packit 6c4009
      lr->token.tok = tok_number;
Packit 6c4009
      lr->token.val.num = ch - '0';
Packit 6c4009
Packit 6c4009
      while (isdigit (ch = lr_getc (lr)))
Packit 6c4009
	{
Packit 6c4009
	  lr->token.val.num *= 10;
Packit 6c4009
	  lr->token.val.num += ch - '0';
Packit 6c4009
	}
Packit 6c4009
      if (isalpha (ch))
Packit 6c4009
	lr_error (lr, _("garbage at end of number"));
Packit 6c4009
      lr_ungetn (lr, 1);
Packit 6c4009
Packit 6c4009
      return &lr->token;
Packit 6c4009
Packit 6c4009
    case ';':
Packit 6c4009
      lr->token.tok = tok_semicolon;
Packit 6c4009
      return &lr->token;
Packit 6c4009
Packit 6c4009
    case ',':
Packit 6c4009
      lr->token.tok = tok_comma;
Packit 6c4009
      return &lr->token;
Packit 6c4009
Packit 6c4009
    case '(':
Packit 6c4009
      lr->token.tok = tok_open_brace;
Packit 6c4009
      return &lr->token;
Packit 6c4009
Packit 6c4009
    case ')':
Packit 6c4009
      lr->token.tok = tok_close_brace;
Packit 6c4009
      return &lr->token;
Packit 6c4009
Packit 6c4009
    case '"':
Packit 6c4009
      return get_string (lr, charmap, locale, repertoire, verbose);
Packit 6c4009
Packit 6c4009
    case '-':
Packit 6c4009
      ch = lr_getc (lr);
Packit 6c4009
      if (ch == '1')
Packit 6c4009
	{
Packit 6c4009
	  lr->token.tok = tok_minus1;
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
      lr_ungetn (lr, 2);
Packit 6c4009
      break;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return get_ident (lr);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static struct token *
Packit 6c4009
get_toplvl_escape (struct linereader *lr)
Packit 6c4009
{
Packit 6c4009
  /* This is supposed to be a numeric value.  We return the
Packit 6c4009
     numerical value and the number of bytes.  */
Packit 6c4009
  size_t start_idx = lr->idx - 1;
Packit 6c4009
  unsigned char *bytes = lr->token.val.charcode.bytes;
Packit 6c4009
  size_t nbytes = 0;
Packit 6c4009
  int ch;
Packit 6c4009
Packit 6c4009
  do
Packit 6c4009
    {
Packit 6c4009
      unsigned int byte = 0;
Packit 6c4009
      unsigned int base = 8;
Packit 6c4009
Packit 6c4009
      ch = lr_getc (lr);
Packit 6c4009
Packit 6c4009
      if (ch == 'd')
Packit 6c4009
	{
Packit 6c4009
	  base = 10;
Packit 6c4009
	  ch = lr_getc (lr);
Packit 6c4009
	}
Packit 6c4009
      else if (ch == 'x')
Packit 6c4009
	{
Packit 6c4009
	  base = 16;
Packit 6c4009
	  ch = lr_getc (lr);
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      if ((base == 16 && !isxdigit (ch))
Packit 6c4009
	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
Packit 6c4009
	{
Packit 6c4009
	esc_error:
Packit 6c4009
	  lr->token.val.str.startmb = &lr->buf[start_idx];
Packit 6c4009
Packit 6c4009
	  while (ch != EOF && !isspace (ch))
Packit 6c4009
	    ch = lr_getc (lr);
Packit 6c4009
	  lr->token.val.str.lenmb = lr->idx - start_idx;
Packit 6c4009
Packit 6c4009
	  lr->token.tok = tok_error;
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      if (isdigit (ch))
Packit 6c4009
	byte = ch - '0';
Packit 6c4009
      else
Packit 6c4009
	byte = tolower (ch) - 'a' + 10;
Packit 6c4009
Packit 6c4009
      ch = lr_getc (lr);
Packit 6c4009
      if ((base == 16 && !isxdigit (ch))
Packit 6c4009
	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
Packit 6c4009
	goto esc_error;
Packit 6c4009
Packit 6c4009
      byte *= base;
Packit 6c4009
      if (isdigit (ch))
Packit 6c4009
	byte += ch - '0';
Packit 6c4009
      else
Packit 6c4009
	byte += tolower (ch) - 'a' + 10;
Packit 6c4009
Packit 6c4009
      ch = lr_getc (lr);
Packit 6c4009
      if (base != 16 && isdigit (ch))
Packit 6c4009
	{
Packit 6c4009
	  byte *= base;
Packit 6c4009
	  byte += ch - '0';
Packit 6c4009
Packit 6c4009
	  ch = lr_getc (lr);
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      bytes[nbytes++] = byte;
Packit 6c4009
    }
Packit 6c4009
  while (ch == lr->escape_char
Packit 6c4009
	 && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
Packit 6c4009
Packit 6c4009
  if (!isspace (ch))
Packit 6c4009
    lr_error (lr, _("garbage at end of character code specification"));
Packit 6c4009
Packit 6c4009
  lr_ungetn (lr, 1);
Packit 6c4009
Packit 6c4009
  lr->token.tok = tok_charcode;
Packit 6c4009
  lr->token.val.charcode.nbytes = nbytes;
Packit 6c4009
Packit 6c4009
  return &lr->token;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
#define ADDC(ch) \
Packit 6c4009
  do									      \
Packit 6c4009
    {									      \
Packit 6c4009
      if (bufact == bufmax)						      \
Packit 6c4009
	{								      \
Packit 6c4009
	  bufmax *= 2;							      \
Packit 6c4009
	  buf = xrealloc (buf, bufmax);					      \
Packit 6c4009
	}								      \
Packit 6c4009
      buf[bufact++] = (ch);						      \
Packit 6c4009
    }									      \
Packit 6c4009
  while (0)
Packit 6c4009
Packit 6c4009
Packit 6c4009
#define ADDS(s, l) \
Packit 6c4009
  do									      \
Packit 6c4009
    {									      \
Packit 6c4009
      size_t _l = (l);							      \
Packit 6c4009
      if (bufact + _l > bufmax)						      \
Packit 6c4009
	{								      \
Packit 6c4009
	  if (bufact < _l)						      \
Packit 6c4009
	    bufact = _l;						      \
Packit 6c4009
	  bufmax *= 2;							      \
Packit 6c4009
	  buf = xrealloc (buf, bufmax);					      \
Packit 6c4009
	}								      \
Packit 6c4009
      memcpy (&buf[bufact], s, _l);					      \
Packit 6c4009
      bufact += _l;							      \
Packit 6c4009
    }									      \
Packit 6c4009
  while (0)
Packit 6c4009
Packit 6c4009
Packit 6c4009
#define ADDWC(ch) \
Packit 6c4009
  do									      \
Packit 6c4009
    {									      \
Packit 6c4009
      if (buf2act == buf2max)						      \
Packit 6c4009
	{								      \
Packit 6c4009
	  buf2max *= 2;							      \
Packit 6c4009
	  buf2 = xrealloc (buf2, buf2max * 4);				      \
Packit 6c4009
	}								      \
Packit 6c4009
      buf2[buf2act++] = (ch);						      \
Packit 6c4009
    }									      \
Packit 6c4009
  while (0)
Packit 6c4009
Packit 6c4009
Packit 6c4009
static struct token *
Packit 6c4009
get_symname (struct linereader *lr)
Packit 6c4009
{
Packit 6c4009
  /* Symbol in brackets.  We must distinguish three kinds:
Packit 6c4009
     1. reserved words
Packit 6c4009
     2. ISO 10646 position values
Packit 6c4009
     3. all other.  */
Packit 6c4009
  char *buf;
Packit 6c4009
  size_t bufact = 0;
Packit 6c4009
  size_t bufmax = 56;
Packit 6c4009
  const struct keyword_t *kw;
Packit 6c4009
  int ch;
Packit 6c4009
Packit 6c4009
  buf = (char *) xmalloc (bufmax);
Packit 6c4009
Packit 6c4009
  do
Packit 6c4009
    {
Packit 6c4009
      ch = lr_getc (lr);
Packit 6c4009
      if (ch == lr->escape_char)
Packit 6c4009
	{
Packit 6c4009
	  int c2 = lr_getc (lr);
Packit 6c4009
	  ADDC (c2);
Packit 6c4009
Packit 6c4009
	  if (c2 == '\n')
Packit 6c4009
	    ch = '\n';
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	ADDC (ch);
Packit 6c4009
    }
Packit 6c4009
  while (ch != '>' && ch != '\n');
Packit 6c4009
Packit 6c4009
  if (ch == '\n')
Packit 6c4009
    lr_error (lr, _("unterminated symbolic name"));
Packit 6c4009
Packit 6c4009
  /* Test for ISO 10646 position value.  */
Packit 6c4009
  if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
Packit 6c4009
    {
Packit 6c4009
      char *cp = buf + 1;
Packit 6c4009
      while (cp < &buf[bufact - 1] && isxdigit (*cp))
Packit 6c4009
	++cp;
Packit 6c4009
Packit 6c4009
      if (cp == &buf[bufact - 1])
Packit 6c4009
	{
Packit 6c4009
	  /* Yes, it is.  */
Packit 6c4009
	  lr->token.tok = tok_ucs4;
Packit 6c4009
	  lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
Packit 6c4009
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* It is a symbolic name.  Test for reserved words.  */
Packit 6c4009
  kw = lr->hash_fct (buf, bufact - 1);
Packit 6c4009
Packit 6c4009
  if (kw != NULL && kw->symname_or_ident == 1)
Packit 6c4009
    {
Packit 6c4009
      lr->token.tok = kw->token;
Packit 6c4009
      free (buf);
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      lr->token.tok = tok_bsymbol;
Packit 6c4009
Packit 6c4009
      buf = xrealloc (buf, bufact + 1);
Packit 6c4009
      buf[bufact] = '\0';
Packit 6c4009
Packit 6c4009
      lr->token.val.str.startmb = buf;
Packit 6c4009
      lr->token.val.str.lenmb = bufact - 1;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return &lr->token;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static struct token *
Packit 6c4009
get_ident (struct linereader *lr)
Packit 6c4009
{
Packit 6c4009
  char *buf;
Packit 6c4009
  size_t bufact;
Packit 6c4009
  size_t bufmax = 56;
Packit 6c4009
  const struct keyword_t *kw;
Packit 6c4009
  int ch;
Packit 6c4009
Packit 6c4009
  buf = xmalloc (bufmax);
Packit 6c4009
  bufact = 0;
Packit 6c4009
Packit 6c4009
  ADDC (lr->buf[lr->idx - 1]);
Packit 6c4009
Packit 6c4009
  while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
Packit 6c4009
	 && ch != '<' && ch != ',' && ch != EOF)
Packit 6c4009
    {
Packit 6c4009
      if (ch == lr->escape_char)
Packit 6c4009
	{
Packit 6c4009
	  ch = lr_getc (lr);
Packit 6c4009
	  if (ch == '\n' || ch == EOF)
Packit 6c4009
	    {
Packit 6c4009
	      lr_error (lr, _("invalid escape sequence"));
Packit 6c4009
	      break;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      ADDC (ch);
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  lr_ungetc (lr, ch);
Packit 6c4009
Packit 6c4009
  kw = lr->hash_fct (buf, bufact);
Packit 6c4009
Packit 6c4009
  if (kw != NULL && kw->symname_or_ident == 0)
Packit 6c4009
    {
Packit 6c4009
      lr->token.tok = kw->token;
Packit 6c4009
      free (buf);
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      lr->token.tok = tok_ident;
Packit 6c4009
Packit 6c4009
      buf = xrealloc (buf, bufact + 1);
Packit 6c4009
      buf[bufact] = '\0';
Packit 6c4009
Packit 6c4009
      lr->token.val.str.startmb = buf;
Packit 6c4009
      lr->token.val.str.lenmb = bufact;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return &lr->token;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static struct token *
Packit 6c4009
get_string (struct linereader *lr, const struct charmap_t *charmap,
Packit 6c4009
	    struct localedef_t *locale, const struct repertoire_t *repertoire,
Packit 6c4009
	    int verbose)
Packit 6c4009
{
Packit 6c4009
  int return_widestr = lr->return_widestr;
Packit 6c4009
  char *buf;
Packit 6c4009
  wchar_t *buf2 = NULL;
Packit 6c4009
  size_t bufact;
Packit 6c4009
  size_t bufmax = 56;
Packit 6c4009
Packit 6c4009
  /* We must return two different strings.  */
Packit 6c4009
  buf = xmalloc (bufmax);
Packit 6c4009
  bufact = 0;
Packit 6c4009
Packit 6c4009
  /* We know it'll be a string.  */
Packit 6c4009
  lr->token.tok = tok_string;
Packit 6c4009
Packit 6c4009
  /* If we need not translate the strings (i.e., expand <...> parts)
Packit 6c4009
     we can run a simple loop.  */
Packit 6c4009
  if (!lr->translate_strings)
Packit 6c4009
    {
Packit 6c4009
      int ch;
Packit 6c4009
Packit 6c4009
      buf2 = NULL;
Packit 6c4009
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
Packit 6c4009
	ADDC (ch);
Packit 6c4009
Packit 6c4009
      /* Catch errors with trailing escape character.  */
Packit 6c4009
      if (bufact > 0 && buf[bufact - 1] == lr->escape_char
Packit 6c4009
	  && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
Packit 6c4009
	{
Packit 6c4009
	  lr_error (lr, _("illegal escape sequence at end of string"));
Packit 6c4009
	  --bufact;
Packit 6c4009
	}
Packit 6c4009
      else if (ch == '\n' || ch == EOF)
Packit 6c4009
	lr_error (lr, _("unterminated string"));
Packit 6c4009
Packit 6c4009
      ADDC ('\0');
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      int illegal_string = 0;
Packit 6c4009
      size_t buf2act = 0;
Packit 6c4009
      size_t buf2max = 56 * sizeof (uint32_t);
Packit 6c4009
      int ch;
Packit 6c4009
Packit 6c4009
      /* We have to provide the wide character result as well.  */
Packit 6c4009
      if (return_widestr)
Packit 6c4009
	buf2 = xmalloc (buf2max);
Packit 6c4009
Packit 6c4009
      /* Read until the end of the string (or end of the line or file).  */
Packit 6c4009
      while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
Packit 6c4009
	{
Packit 6c4009
	  size_t startidx;
Packit 6c4009
	  uint32_t wch;
Packit 6c4009
	  struct charseq *seq;
Packit 6c4009
Packit 6c4009
	  if (ch != '<')
Packit 6c4009
	    {
Packit 6c4009
	      /* The standards leave it up to the implementation to decide
Packit 6c4009
		 what to do with character which stand for themself.  We
Packit 6c4009
		 could jump through hoops to find out the value relative to
Packit 6c4009
		 the charmap and the repertoire map, but instead we leave
Packit 6c4009
		 it up to the locale definition author to write a better
Packit 6c4009
		 definition.  We assume here that every character which
Packit 6c4009
		 stands for itself is encoded using ISO 8859-1.  Using the
Packit 6c4009
		 escape character is allowed.  */
Packit 6c4009
	      if (ch == lr->escape_char)
Packit 6c4009
		{
Packit 6c4009
		  ch = lr_getc (lr);
Packit 6c4009
		  if (ch == '\n' || ch == EOF)
Packit 6c4009
		    break;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      ADDC (ch);
Packit 6c4009
	      if (return_widestr)
Packit 6c4009
		ADDWC ((uint32_t) ch);
Packit 6c4009
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* Now we have to search for the end of the symbolic name, i.e.,
Packit 6c4009
	     the closing '>'.  */
Packit 6c4009
	  startidx = bufact;
Packit 6c4009
	  while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
Packit 6c4009
	    {
Packit 6c4009
	      if (ch == lr->escape_char)
Packit 6c4009
		{
Packit 6c4009
		  ch = lr_getc (lr);
Packit 6c4009
		  if (ch == '\n' || ch == EOF)
Packit 6c4009
		    break;
Packit 6c4009
		}
Packit 6c4009
	      ADDC (ch);
Packit 6c4009
	    }
Packit 6c4009
	  if (ch == '\n' || ch == EOF)
Packit 6c4009
	    /* Not a correct string.  */
Packit 6c4009
	    break;
Packit 6c4009
	  if (bufact == startidx)
Packit 6c4009
	    {
Packit 6c4009
	      /* <> is no correct name.  Ignore it and also signal an
Packit 6c4009
		 error.  */
Packit 6c4009
	      illegal_string = 1;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* It might be a Uxxxx symbol.  */
Packit 6c4009
	  if (buf[startidx] == 'U'
Packit 6c4009
	      && (bufact - startidx == 5 || bufact - startidx == 9))
Packit 6c4009
	    {
Packit 6c4009
	      char *cp = buf + startidx + 1;
Packit 6c4009
	      while (cp < &buf[bufact] && isxdigit (*cp))
Packit 6c4009
		++cp;
Packit 6c4009
Packit 6c4009
	      if (cp == &buf[bufact])
Packit 6c4009
		{
Packit 6c4009
		  char utmp[10];
Packit 6c4009
Packit 6c4009
		  /* Yes, it is.  */
Packit 6c4009
		  ADDC ('\0');
Packit 6c4009
		  wch = strtoul (buf + startidx + 1, NULL, 16);
Packit 6c4009
Packit 6c4009
		  /* Now forget about the name we just added.  */
Packit 6c4009
		  bufact = startidx;
Packit 6c4009
Packit 6c4009
		  if (return_widestr)
Packit 6c4009
		    ADDWC (wch);
Packit 6c4009
Packit 6c4009
		  /* See whether the charmap contains the Uxxxxxxxx names.  */
Packit 6c4009
		  snprintf (utmp, sizeof (utmp), "U%08X", wch);
Packit 6c4009
		  seq = charmap_find_value (charmap, utmp, 9);
Packit 6c4009
Packit 6c4009
		  if (seq == NULL)
Packit 6c4009
		    {
Packit 6c4009
		     /* No, this isn't the case.  Now determine from
Packit 6c4009
			the repertoire the name of the character and
Packit 6c4009
			find it in the charmap.  */
Packit 6c4009
		      if (repertoire != NULL)
Packit 6c4009
			{
Packit 6c4009
			  const char *symbol;
Packit 6c4009
Packit 6c4009
			  symbol = repertoire_find_symbol (repertoire, wch);
Packit 6c4009
Packit 6c4009
			  if (symbol != NULL)
Packit 6c4009
			    seq = charmap_find_value (charmap, symbol,
Packit 6c4009
						      strlen (symbol));
Packit 6c4009
			}
Packit 6c4009
Packit 6c4009
		      if (seq == NULL)
Packit 6c4009
			{
Packit 6c4009
#ifndef NO_TRANSLITERATION
Packit 6c4009
			  /* Transliterate if possible.  */
Packit 6c4009
			  if (locale != NULL)
Packit 6c4009
			    {
Packit 6c4009
			      uint32_t *translit;
Packit 6c4009
Packit 6c4009
			      if ((locale->avail & CTYPE_LOCALE) == 0)
Packit 6c4009
				{
Packit 6c4009
				  /* Load the CTYPE data now.  */
Packit 6c4009
				  int old_needed = locale->needed;
Packit 6c4009
Packit 6c4009
				  locale->needed = 0;
Packit 6c4009
				  locale = load_locale (LC_CTYPE,
Packit 6c4009
							locale->name,
Packit 6c4009
							locale->repertoire_name,
Packit 6c4009
							charmap, locale);
Packit 6c4009
				  locale->needed = old_needed;
Packit 6c4009
				}
Packit 6c4009
Packit 6c4009
			      if ((locale->avail & CTYPE_LOCALE) != 0
Packit 6c4009
				  && ((translit = find_translit (locale,
Packit 6c4009
								 charmap, wch))
Packit 6c4009
				      != NULL))
Packit 6c4009
				/* The CTYPE data contains a matching
Packit 6c4009
				   transliteration.  */
Packit 6c4009
				{
Packit 6c4009
				  int i;
Packit 6c4009
Packit 6c4009
				  for (i = 0; translit[i] != 0; ++i)
Packit 6c4009
				    {
Packit 6c4009
				      char utmp[10];
Packit 6c4009
Packit 6c4009
				      snprintf (utmp, sizeof (utmp), "U%08X",
Packit 6c4009
						translit[i]);
Packit 6c4009
				      seq = charmap_find_value (charmap, utmp,
Packit 6c4009
								9);
Packit 6c4009
				      assert (seq != NULL);
Packit 6c4009
				      ADDS (seq->bytes, seq->nbytes);
Packit 6c4009
				    }
Packit 6c4009
Packit 6c4009
				  continue;
Packit 6c4009
				}
Packit 6c4009
			    }
Packit 6c4009
#endif	/* NO_TRANSLITERATION */
Packit 6c4009
Packit 6c4009
			  /* Not a known name.  */
Packit 6c4009
			  illegal_string = 1;
Packit 6c4009
			}
Packit 6c4009
		    }
Packit 6c4009
Packit 6c4009
		  if (seq != NULL)
Packit 6c4009
		    ADDS (seq->bytes, seq->nbytes);
Packit 6c4009
Packit 6c4009
		  continue;
Packit 6c4009
		}
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* We now have the symbolic name in buf[startidx] to
Packit 6c4009
	     buf[bufact-1].  Now find out the value for this character
Packit 6c4009
	     in the charmap as well as in the repertoire map (in this
Packit 6c4009
	     order).  */
Packit 6c4009
	  seq = charmap_find_value (charmap, &buf[startidx],
Packit 6c4009
				    bufact - startidx);
Packit 6c4009
Packit 6c4009
	  if (seq == NULL)
Packit 6c4009
	    {
Packit 6c4009
	      /* This name is not in the charmap.  */
Packit 6c4009
	      lr_error (lr, _("symbol `%.*s' not in charmap"),
Packit 6c4009
			(int) (bufact - startidx), &buf[startidx]);
Packit 6c4009
	      illegal_string = 1;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (return_widestr)
Packit 6c4009
	    {
Packit 6c4009
	      /* Now the same for the multibyte representation.  */
Packit 6c4009
	      if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
Packit 6c4009
		wch = seq->ucs4;
Packit 6c4009
	      else
Packit 6c4009
		{
Packit 6c4009
		  wch = repertoire_find_value (repertoire, &buf[startidx],
Packit 6c4009
					       bufact - startidx);
Packit 6c4009
		  if (seq != NULL)
Packit 6c4009
		    seq->ucs4 = wch;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      if (wch == ILLEGAL_CHAR_VALUE)
Packit 6c4009
		{
Packit 6c4009
		  /* This name is not in the repertoire map.  */
Packit 6c4009
		  lr_error (lr, _("symbol `%.*s' not in repertoire map"),
Packit 6c4009
			    (int) (bufact - startidx), &buf[startidx]);
Packit 6c4009
		  illegal_string = 1;
Packit 6c4009
		}
Packit 6c4009
	      else
Packit 6c4009
		ADDWC (wch);
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* Now forget about the name we just added.  */
Packit 6c4009
	  bufact = startidx;
Packit 6c4009
Packit 6c4009
	  /* And copy the bytes.  */
Packit 6c4009
	  if (seq != NULL)
Packit 6c4009
	    ADDS (seq->bytes, seq->nbytes);
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      if (ch == '\n' || ch == EOF)
Packit 6c4009
	{
Packit 6c4009
	  lr_error (lr, _("unterminated string"));
Packit 6c4009
	  illegal_string = 1;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      if (illegal_string)
Packit 6c4009
	{
Packit 6c4009
	  free (buf);
Packit 6c4009
	  free (buf2);
Packit 6c4009
	  lr->token.val.str.startmb = NULL;
Packit 6c4009
	  lr->token.val.str.lenmb = 0;
Packit 6c4009
	  lr->token.val.str.startwc = NULL;
Packit 6c4009
	  lr->token.val.str.lenwc = 0;
Packit 6c4009
Packit 6c4009
	  return &lr->token;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      ADDC ('\0');
Packit 6c4009
Packit 6c4009
      if (return_widestr)
Packit 6c4009
	{
Packit 6c4009
	  ADDWC (0);
Packit 6c4009
	  lr->token.val.str.startwc = xrealloc (buf2,
Packit 6c4009
						buf2act * sizeof (uint32_t));
Packit 6c4009
	  lr->token.val.str.lenwc = buf2act;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  lr->token.val.str.startmb = xrealloc (buf, bufact);
Packit 6c4009
  lr->token.val.str.lenmb = bufact;
Packit 6c4009
Packit 6c4009
  return &lr->token;
Packit 6c4009
}