Blob Blame History Raw
/* This header file is in the public domain. */
#ifndef ENCA_H
#define ENCA_H

#include <stdlib.h>
/* According to autoconf stdlib may not be enough for size_t */
#include <sys/types.h>

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

/* Enumerated types */

typedef enum { /*< flags >*/
  ENCA_SURFACE_EOL_CR    = 1 << 0,
  ENCA_SURFACE_EOL_LF    = 1 << 1,
  ENCA_SURFACE_EOL_CRLF  = 1 << 2,
  ENCA_SURFACE_EOL_MIX   = 1 << 3,
  ENCA_SURFACE_EOL_BIN   = 1 << 4,
  ENCA_SURFACE_MASK_EOL  = (ENCA_SURFACE_EOL_CR
                            | ENCA_SURFACE_EOL_LF
                            | ENCA_SURFACE_EOL_CRLF
                            | ENCA_SURFACE_EOL_MIX
                            | ENCA_SURFACE_EOL_BIN),
  ENCA_SURFACE_PERM_21    = 1 << 5,
  ENCA_SURFACE_PERM_4321  = 1 << 6,
  ENCA_SURFACE_PERM_MIX   = 1 << 7,
  ENCA_SURFACE_MASK_PERM  = (ENCA_SURFACE_PERM_21
                             | ENCA_SURFACE_PERM_4321
                             | ENCA_SURFACE_PERM_MIX),
  ENCA_SURFACE_QP        = 1 << 8,
  ENCA_SURFACE_REMOVE    = 1 << 13,
  ENCA_SURFACE_UNKNOWN   = 1 << 14,
  ENCA_SURFACE_MASK_ALL  = (ENCA_SURFACE_MASK_EOL
                            | ENCA_SURFACE_MASK_PERM
                            | ENCA_SURFACE_QP
                            | ENCA_SURFACE_REMOVE)
} EncaSurface;

typedef enum {
  ENCA_NAME_STYLE_ENCA,
  ENCA_NAME_STYLE_RFC1345,
  ENCA_NAME_STYLE_CSTOCS,
  ENCA_NAME_STYLE_ICONV,
  ENCA_NAME_STYLE_HUMAN,
  ENCA_NAME_STYLE_MIME
} EncaNameStyle;

typedef enum { /*< flags >*/
  ENCA_CHARSET_7BIT      = 1 << 0,
  ENCA_CHARSET_8BIT      = 1 << 1,
  ENCA_CHARSET_16BIT     = 1 << 2,
  ENCA_CHARSET_32BIT     = 1 << 3,
  ENCA_CHARSET_FIXED     = 1 << 4,
  ENCA_CHARSET_VARIABLE  = 1 << 5,
  ENCA_CHARSET_BINARY    = 1 << 6,
  ENCA_CHARSET_REGULAR   = 1 << 7,
  ENCA_CHARSET_MULTIBYTE = 1 << 8
} EncaCharsetFlags;

typedef enum {
  ENCA_EOK = 0,
  ENCA_EINVALUE,
  ENCA_EEMPTY,
  ENCA_EFILTERED,
  ENCA_ENOCS8,
  ENCA_ESIGNIF,
  ENCA_EWINNER,
  ENCA_EGARBAGE
} EncaErrno;

#define ENCA_CS_UNKNOWN (-1)

#define ENCA_NOT_A_CHAR 0xffff

/* Published (opaque) typedefs  */
typedef struct _EncaAnalyserState *EncaAnalyser;

/* Public (transparent) typedefs */
typedef struct _EncaEncoding EncaEncoding;

struct _EncaEncoding { int charset; EncaSurface surface; };

/* Basic interface. */
EncaAnalyser  enca_analyser_alloc (const char *langname);
void          enca_analyser_free  (EncaAnalyser analyser);
EncaEncoding  enca_analyse        (EncaAnalyser analyser,
                                   unsigned char *buffer,
                                   size_t size);
EncaEncoding  enca_analyse_const  (EncaAnalyser analyser,
                                   const unsigned char *buffer,
                                   size_t size);
int           enca_double_utf8_check (EncaAnalyser analyser,
                                      const unsigned char *buffer,
                                      size_t size);
int*          enca_double_utf8_get_candidates (EncaAnalyser analyser);
int           enca_errno          (EncaAnalyser analyser);
const char*   enca_strerror       (EncaAnalyser analyser,
                                   int errnum);

/* Options. */
void          enca_set_multibyte              (EncaAnalyser analyser,
                                               int multibyte);
int           enca_get_multibyte              (EncaAnalyser analyser);
void          enca_set_interpreted_surfaces   (EncaAnalyser analyser,
                                               int interpreted_surfaces);
int           enca_get_interpreted_surfaces   (EncaAnalyser analyser);
void          enca_set_ambiguity              (EncaAnalyser analyser,
                                               int ambiguity);
int           enca_get_ambiguity              (EncaAnalyser analyser);
void          enca_set_filtering              (EncaAnalyser analyser,
                                               int filtering);
int           enca_get_filtering              (EncaAnalyser analyser);
void          enca_set_garbage_test           (EncaAnalyser analyser,
                                               int garabage_test);
int           enca_get_garbage_test           (EncaAnalyser analyser);
void          enca_set_termination_strictness (EncaAnalyser analyser,
                                               int termination_strictness);
int           enca_get_termination_strictness (EncaAnalyser analyser);
int           enca_set_significant            (EncaAnalyser analyser,
                                               size_t significant);
size_t        enca_get_significant            (EncaAnalyser analyser);
int           enca_set_threshold              (EncaAnalyser analyser,
                                               double threshold);
double        enca_get_threshold              (EncaAnalyser analyser);

/* Names and properties. */
const char*       enca_charset_name            (int charset,
                                                EncaNameStyle whatname);
const char**      enca_get_charset_aliases     (int charset,
                                                size_t *n);
char*             enca_get_surface_name        (EncaSurface surface,
                                                EncaNameStyle whatname);
EncaEncoding      enca_parse_encoding_name     (const char *name);
EncaSurface       enca_charset_natural_surface (int charset);
EncaCharsetFlags  enca_charset_properties      (int charset);

#define enca_charset_is_known(cs) \
  ((cs) != ENCA_CS_UNKNOWN)
#define enca_charset_is_7bit(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_7BIT)
#define enca_charset_is_8bit(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_8BIT)
#define enca_charset_is_16bit(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_16BIT)
#define enca_charset_is_32bit(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_32BIT)
#define enca_charset_is_fixed(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_FIXED)
#define enca_charset_is_variable(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_VARIABLE)
#define enca_charset_is_binary(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_BINARY)
#define enca_charset_is_regular(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_REGULAR)
#define enca_charset_is_multibyte(cs) \
  (enca_charset_properties(cs) & ENCA_CHARSET_MULTIBYTE)

/* Auxiliary functions. */
int           enca_charset_has_ucs2_map  (int charset);
int           enca_charset_ucs2_map      (int charset,
                                          unsigned int *buffer);
size_t        enca_number_of_charsets    (void);
const char*   enca_analyser_language     (EncaAnalyser analyser);
const char*   enca_language_english_name (const char *lang);
const char**  enca_get_languages         (size_t *n);
int*          enca_get_language_charsets (const char *langname,
                                          size_t *n);
#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif