Blob Blame History Raw
/* xgettext common functions.
   Copyright (C) 2001-2003, 2005-2006, 2008-2009, 2011, 2015 Free
   Software Foundation, Inc.
   Written by Peter Miller <millerp@canb.auug.org.au>
   and Bruno Haible <haible@clisp.cons.org>, 2001.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#ifndef _XGETTEXT_H
#define _XGETTEXT_H

#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>

#if HAVE_ICONV
#include <iconv.h>
#endif

#include "message.h"
#include "pos.h"
#include "str-list.h"

/* Declare 'line_comment' and 'input_syntax'.  */
#include "read-catalog.h"


#ifdef __cplusplus
extern "C" {
#endif


/* If true, omit the header entry.
   If false, keep the header entry present in the input.  */
extern int xgettext_omit_header;

extern bool substring_match;


/* Calling convention for a given keyword.  */
struct callshape
{
  int argnum1; /* argument number to use for msgid */
  int argnum2; /* argument number to use for msgid_plural */
  int argnumc; /* argument number to use for msgctxt */
  bool argnum1_glib_context; /* argument argnum1 has the syntax "ctxt|msgid" */
  bool argnum2_glib_context; /* argument argnum2 has the syntax "ctxt|msgid" */
  int argtotal; /* total number of arguments */
  string_list_ty xcomments; /* auto-extracted comments */
};

/* Split keyword spec into keyword, argnum1, argnum2, argnumc.  */
extern void split_keywordspec (const char *spec, const char **endp,
                               struct callshape *shapep);

/* Set of alternative calling conventions for a given keyword.  */
struct callshapes
{
  const char *keyword;          /* the keyword, not NUL terminated */
  size_t keyword_len;           /* the keyword's length */
  size_t nshapes;
  struct callshape shapes[1];   /* actually nshapes elements */
};

/* Insert a (keyword, callshape) pair into a hash table mapping keyword to
   'struct callshapes *'.  */
extern void insert_keyword_callshape (hash_table *table,
                                      const char *keyword, size_t keyword_len,
                                      const struct callshape *shape);


/* Context representing some flags.  */
typedef struct flag_context_ty flag_context_ty;
struct flag_context_ty
{
  /* Regarding the primary formatstring type.  */
  /*enum is_format*/ unsigned int is_format1    : 3;
  /*bool*/           unsigned int pass_format1  : 1;
  /* Regarding the secondary formatstring type.  */
  /*enum is_format*/ unsigned int is_format2    : 3;
  /*bool*/           unsigned int pass_format2  : 1;
  /* Regarding the tertiary formatstring type.  */
  /*enum is_format*/ unsigned int is_format3    : 3;
  /*bool*/           unsigned int pass_format3  : 1;
};
/* Null context.  */
extern flag_context_ty null_context;
/* Transparent context.  */
extern flag_context_ty passthrough_context;
/* Compute an inherited context.
   The outer_context is assumed to have all pass_format* flags = false.
   The result will then also have all pass_format* flags = false.  */
extern flag_context_ty
       inherited_context (flag_context_ty outer_context,
                          flag_context_ty modifier_context);

/* Context representing some flags, for each possible argument number.
   This is a linked list, sorted according to the argument number.  */
typedef struct flag_context_list_ty flag_context_list_ty;
struct flag_context_list_ty
{
  int argnum;                   /* current argument number, > 0 */
  flag_context_ty flags;        /* flags for current argument */
  flag_context_list_ty *next;
};

/* Iterator through a flag_context_list_ty.  */
typedef struct flag_context_list_iterator_ty flag_context_list_iterator_ty;
struct flag_context_list_iterator_ty
{
  int argnum;                           /* current argument number, > 0 */
  const flag_context_list_ty* head;     /* tail of list */
};
extern flag_context_list_iterator_ty null_context_list_iterator;
extern flag_context_list_iterator_ty passthrough_context_list_iterator;
extern flag_context_list_iterator_ty
       flag_context_list_iterator (flag_context_list_ty *list);
extern flag_context_ty
       flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter);

/* For nearly each backend, we have a separate table mapping a keyword to
   a flag_context_list_ty *.  */
typedef hash_table /* char[] -> flag_context_list_ty * */
        flag_context_list_table_ty;
extern flag_context_list_ty *
       flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
                                       const void *key, size_t keylen);
/* Record a flag in the appropriate backend's table.  */
extern void xgettext_record_flag (const char *optionstring);


/* Context while building up lexical tokens.  */
typedef enum
  {
    lc_outside, /* Initial context: outside of comments and strings.  */
    lc_comment, /* Inside a comment.  */
    lc_string,  /* Inside a string literal.  */

    /* For embedded XML in programming code, like E4X in JavaScript.  */
    lc_xml_open_tag,   /* Inside an opening tag of an XML element.  */
    lc_xml_close_tag,  /* Inside a closing tag of an XML element.  */
    lc_xml_content     /* Inside an XML text node.  */
  }
  lexical_context_ty;

/* Error message about non-ASCII character in a specific lexical context.  */
extern char *non_ascii_error_message (lexical_context_ty lcontext,
                                      const char *file_name,
                                      size_t line_number);


/* Canonicalized encoding name for all input files.  */
extern const char *xgettext_global_source_encoding;

#if HAVE_ICONV
/* Converter from xgettext_global_source_encoding to UTF-8 (except from
   ASCII or UTF-8, when this conversion is a no-op).  */
extern iconv_t xgettext_global_source_iconv;
#endif

/* Canonicalized encoding name for the current input file.  */
extern const char *xgettext_current_source_encoding;

#if HAVE_ICONV
/* Converter from xgettext_current_source_encoding to UTF-8 (except from
   ASCII or UTF-8, when this conversion is a no-op).  */
extern iconv_t xgettext_current_source_iconv;
#endif

/* Convert the given string from xgettext_current_source_encoding to
   the output file encoding (i.e. ASCII or UTF-8).
   The resulting string is either the argument string, or freshly allocated.
   The lcontext, file_name and line_number are only used for error message
   purposes.  */
extern char *from_current_source_encoding (const char *string,
                                           lexical_context_ty lcontext,
                                           const char *file_name,
                                           size_t line_number);


/* List of messages whose msgids must not be extracted, or NULL.
   Used by remember_a_message().  */
extern message_list_ty *exclude;


/* Comment handling for backends which support combining adjacent strings
   even across lines.
   In these backends we cannot use the xgettext_comment* functions directly,
   because in multiline string expressions like
           "string1" +
           "string2"
   the newline between "string1" and "string2" would cause a call to
   xgettext_comment_reset(), thus destroying the accumulated comments
   that we need a little later, when we have concatenated the two strings
   and pass them to remember_a_message().
   Instead, we do the bookkeeping of the accumulated comments directly,
   and save a pointer to the accumulated comments when we read "string1".
   In order to avoid excessive copying of strings, we use reference
   counting.  */

typedef struct refcounted_string_list_ty refcounted_string_list_ty;
struct refcounted_string_list_ty
{
  unsigned int refcount;
  struct string_list_ty contents;
};

static inline refcounted_string_list_ty *
add_reference (refcounted_string_list_ty *rslp)
{
  if (rslp != NULL)
    rslp->refcount++;
  return rslp;
}

static inline void
drop_reference (refcounted_string_list_ty *rslp)
{
  if (rslp != NULL)
    {
      if (rslp->refcount > 1)
        rslp->refcount--;
      else
        {
          string_list_destroy (&rslp->contents);
          free (rslp);
        }
    }
}

extern refcounted_string_list_ty *savable_comment;
extern void savable_comment_add (const char *str);
extern void savable_comment_reset (void);

/* Convert character encoding of COMMENT according to the current
   source encoding.  Returns a new refcounted_string_list_ty.  */
extern refcounted_string_list_ty *
       savable_comment_convert_encoding (refcounted_string_list_ty *comment,
                                         lex_pos_ty *pos);


enum literalstring_escape_type
{
  LET_NONE = 0,
  LET_ANSI_C = 1 << 0,
  LET_UNICODE = 1 << 1
};

struct literalstring_parser
{
  char * (*parse) (const char *string, lex_pos_ty *pos,
                   enum literalstring_escape_type type);
};

/* Add a message to the list of extracted messages.
   msgctxt must be either NULL or a malloc()ed string; its ownership is passed
   to the callee.
   MSGID must be a malloc()ed string; its ownership is passed to the callee.
   POS->file_name must be allocated with indefinite extent.
   EXTRACTED_COMMENT is a comment that needs to be copied into the POT file,
   or NULL.
   COMMENT may be savable_comment, or it may be a saved copy of savable_comment
   (then add_reference must be used when saving it, and drop_reference while
   dropping it).  Clear savable_comment.
   Return the new or found message, or NULL if the message is excluded.  */
extern message_ty *remember_a_message (message_list_ty *mlp,
                                       char *msgctxt,
                                       char *msgid,
                                       flag_context_ty context,
                                       lex_pos_ty *pos,
                                       const char *extracted_comment,
                                       refcounted_string_list_ty *comment);

/* Add an msgid_plural to a message previously returned by
   remember_a_message.
   STRING must be a malloc()ed string; its ownership is passed to the callee.
   POS->file_name must be allocated with indefinite extent.
   COMMENT may be savable_comment, or it may be a saved copy of savable_comment
   (then add_reference must be used when saving it, and drop_reference while
   dropping it).  Clear savable_comment.  */
extern void remember_a_message_plural (message_ty *mp,
                                       char *string,
                                       flag_context_ty context,
                                       lex_pos_ty *pos,
                                       refcounted_string_list_ty *comment);

/* Represents the progressive parsing of an argument list w.r.t. a single
   'struct callshape'.  */
struct partial_call
{
  int argnumc;                  /* number of context argument, 0 when seen */
  int argnum1;                  /* number of singular argument, 0 when seen */
  int argnum2;                  /* number of plural argument, 0 when seen */
  bool argnum1_glib_context;    /* argument argnum1 has the syntax "ctxt|msgid" */
  bool argnum2_glib_context;    /* argument argnum2 has the syntax "ctxt|msgid" */
  int argtotal;                 /* total number of arguments, 0 if unspecified */
  string_list_ty xcomments;     /* auto-extracted comments */
  char *msgctxt;                /* context - owned string, or NULL */
  enum literalstring_escape_type msgctxt_escape;
  lex_pos_ty msgctxt_pos;
  char *msgid;                  /* msgid - owned string, or NULL */
  enum literalstring_escape_type msgid_escape;
  flag_context_ty msgid_context;
  lex_pos_ty msgid_pos;
  refcounted_string_list_ty *msgid_comment;
  char *msgid_plural;           /* msgid_plural - owned string, or NULL */
  enum literalstring_escape_type msgid_plural_escape;
  flag_context_ty msgid_plural_context;
  lex_pos_ty msgid_plural_pos;
};

/* Represents the progressive parsing of an argument list w.r.t. an entire
   'struct callshapes'.  */
struct arglist_parser
{
  message_list_ty *mlp;         /* list where the message shall be added */
  const char *keyword;          /* the keyword, not NUL terminated */
  size_t keyword_len;           /* the keyword's length */
  size_t nalternatives;         /* number of partial_call alternatives */
  struct partial_call alternative[1]; /* partial_call alternatives */
};

/* Creates a fresh arglist_parser recognizing calls.
   You can pass shapes = NULL for a parser not recognizing any calls.  */
extern struct arglist_parser * arglist_parser_alloc (message_list_ty *mlp,
                                                     const struct callshapes *shapes);
/* Clones an arglist_parser.  */
extern struct arglist_parser * arglist_parser_clone (struct arglist_parser *ap);
/* Adds a string argument to an arglist_parser.  ARGNUM must be > 0.
   STRING must be malloc()ed string; its ownership is passed to the callee.
   FILE_NAME must be allocated with indefinite extent.
   COMMENT may be savable_comment, or it may be a saved copy of savable_comment
   (then add_reference must be used when saving it, and drop_reference while
   dropping it).  Clear savable_comment.  */
extern void arglist_parser_remember (struct arglist_parser *ap,
                                     int argnum, char *string,
                                     flag_context_ty context,
                                     char *file_name, size_t line_number,
                                     refcounted_string_list_ty *comment);
/* Adds an uninterpreted string argument to an arglist_parser.  ARGNUM
   must be > 0.
   STRING is must be malloc()ed string; its ownership is passed to the callee.
   FILE_NAME must be allocated with indefinite extent.
   COMMENT may be savable_comment, or it may be a saved copy of savable_comment
   (then add_reference must be used when saving it, and drop_reference while
   dropping it).  Clear savable_comment.  */
extern void arglist_parser_remember_literal (struct arglist_parser *ap,
                                             int argnum, char *string,
                                             flag_context_ty context,
                                             char *file_name, size_t line_number,
                                             refcounted_string_list_ty *comment,
                                             enum literalstring_escape_type type);
/* Tests whether an arglist_parser has is not waiting for more arguments after
   argument ARGNUM.  */
extern bool arglist_parser_decidedp (struct arglist_parser *ap, int argnum);
/* Terminates the processing of an arglist_parser after argument ARGNUM and
   deletes it.  */
extern void arglist_parser_done (struct arglist_parser *ap, int argnum);


/* A string buffer type that allows appending bytes (in the
   xgettext_current_source_encoding) or Unicode characters.
   Returns the entire string in UTF-8 encoding.  */

struct mixed_string_buffer
{
  /* The part of the string that has already been converted to UTF-8.  */
  char *utf8_buffer;
  size_t utf8_buflen;
  size_t utf8_allocated;
  /* The first half of an UTF-16 surrogate character.  */
  unsigned short utf16_surr;
  /* The part of the string that is still in the source encoding.  */
  char *curr_buffer;
  size_t curr_buflen;
  size_t curr_allocated;
  /* The lexical context.  Used only for error message purposes.  */
  lexical_context_ty lcontext;
  const char *logical_file_name;
  int line_number;
};

/* Creates a fresh mixed_string_buffer.  */
extern struct mixed_string_buffer *
       mixed_string_buffer_alloc (lexical_context_ty lcontext,
                                  const char *logical_file_name,
                                  int line_number);

/* Appends a character to a mixed_string_buffer.  */
extern void mixed_string_buffer_append_char (struct mixed_string_buffer *bp,
                                             int c);

/* Appends a Unicode character to a mixed_string_buffer.  */
extern void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp,
                                                int c);

/* Frees mixed_string_buffer and returns the accumulated string in UTF-8.  */
extern char * mixed_string_buffer_done (struct mixed_string_buffer *bp);


#ifdef __cplusplus
}
#endif


#endif /* _XGETTEXT_H */