<?xml version="1.0"?>
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">
<refentry id="libenca-Internal-Functions">
<refmeta>
<refentrytitle role="top_of_page" id="libenca-Internal-Functions.top_of_page">internal</refentrytitle>
<manvolnum>3</manvolnum>
<refmiscinfo>LIBENCA Library</refmiscinfo>
</refmeta>
<refnamediv>
<refname>internal</refname>
<refpurpose><para>
internal functions
</para></refpurpose>
</refnamediv>
<refsect1 id="libenca-Internal-Functions.functions" role="functions_proto">
<title role="functions_proto.title">Functions</title>
<informaltable pgwide="1" frame="none">
<tgroup cols="2">
<colspec colname="functions_return" colwidth="150px"/>
<colspec colname="functions_name"/>
<tbody>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><phrase role="c_punctuation">(</phrase><link linkend="EncaHookFunc">*EncaHookFunc</link><phrase role="c_punctuation">)</phrase> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><phrase role="c_punctuation">(</phrase><link linkend="EncaGuessFunc">*EncaGuessFunc</link><phrase role="c_punctuation">)</phrase> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="ELEMENTS:CAPS">ELEMENTS</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="MAKE-HOOK-LINE:CAPS">MAKE_HOOK_LINE</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="LF:CAPS">LF</link></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="CR:CAPS">CR</link></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link> *
</entry><entry role="function_name"><link linkend="enca-malloc">enca_malloc</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link> *
</entry><entry role="function_name"><link linkend="enca-realloc">enca_realloc</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="enca-free">enca_free</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="NEW:CAPS">NEW</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="RENEW:CAPS">RENEW</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="char"><returnvalue>char</returnvalue></link> *
</entry><entry role="function_name"><link linkend="enca-strdup">enca_strdup</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type">const <link linkend="char"><returnvalue>char</returnvalue></link> *
</entry><entry role="function_name"><link linkend="enca-strstr">enca_strstr</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="char"><returnvalue>char</returnvalue></link> *
</entry><entry role="function_name"><link linkend="enca-stpcpy">enca_stpcpy</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="char"><returnvalue>char</returnvalue></link> *
</entry><entry role="function_name"><link linkend="enca-strconcat">enca_strconcat</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="char"><returnvalue>char</returnvalue></link> *
</entry><entry role="function_name"><link linkend="enca-strappend">enca_strappend</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="enca-csname">enca_csname</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-name-to-charset">enca_name_to_charset</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="EncaSurface"><returnvalue>EncaSurface</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-name-to-surface">enca_name_to_surface</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-language-init">enca_language_init</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-language-destroy">enca_language_destroy</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="double"><returnvalue>double</returnvalue></link> *
</entry><entry role="function_name"><link linkend="enca-get-charset-similarity-matrix">enca_get_charset_similarity_matrix</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-charsets-subset-identical">enca_charsets_subset_identical</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="size-t"><returnvalue>size_t</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-filter-boxdraw">enca_filter_boxdraw</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-language-hook-ncs">enca_language_hook_ncs</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-language-hook-eol">enca_language_hook_eol</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-guess-init">enca_guess_init</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-guess-destroy">enca_guess_destroy</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="EncaSurface"><returnvalue>EncaSurface</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-eol-surface">enca_eol_surface</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-find-max-sec">enca_find_max_sec</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-double-utf8-init">enca_double_utf8_init</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-double-utf8-destroy">enca_double_utf8_destroy</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-pair-init">enca_pair_init</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-pair-destroy">enca_pair_destroy</link> <phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-pair-analyse">enca_pair_analyse</link> <phrase role="c_punctuation">()</phrase></entry></row>
</tbody>
</tgroup>
</informaltable>
</refsect1>
<refsect1 id="libenca-Internal-Functions.other" role="other_proto">
<title role="other_proto.title">Types and Values</title>
<informaltable role="enum_members_table" pgwide="1" frame="none">
<tgroup cols="2">
<colspec colname="name" colwidth="150px"/>
<colspec colname="description"/>
<tbody>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaAnalyserOptions">EncaAnalyserOptions</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaAnalyserState">EncaAnalyserState</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaCharsetInfo">EncaCharsetInfo</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaLanguageInfo">EncaLanguageInfo</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaLanguageHookData1CS">EncaLanguageHookData1CS</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaLanguageHookDataEOL">EncaLanguageHookDataEOL</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaUTFCheckData">EncaUTFCheckData</link></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="EPSILON:CAPS">EPSILON</link></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="FILL-NONLETTER:CAPS">FILL_NONLETTER</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-BE:CAPS">ENCA_LANGUAGE_BE</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-BG:CAPS">ENCA_LANGUAGE_BG</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-CS:CAPS">ENCA_LANGUAGE_CS</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-ET:CAPS">ENCA_LANGUAGE_ET</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-HR:CAPS">ENCA_LANGUAGE_HR</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-HU:CAPS">ENCA_LANGUAGE_HU</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-LT:CAPS">ENCA_LANGUAGE_LT</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-LV:CAPS">ENCA_LANGUAGE_LV</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-PL:CAPS">ENCA_LANGUAGE_PL</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-RU:CAPS">ENCA_LANGUAGE_RU</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-SK:CAPS">ENCA_LANGUAGE_SK</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-SL:CAPS">ENCA_LANGUAGE_SL</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-UK:CAPS">ENCA_LANGUAGE_UK</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-ZH:CAPS">ENCA_LANGUAGE_ZH</link></entry></row>
</tbody>
</tgroup>
</informaltable>
</refsect1>
<refsect1 id="libenca-Internal-Functions.description" role="desc">
<title role="desc.title">Description</title>
<para>
Do not use outside Enca library.
</para>
</refsect1>
<refsect1 id="libenca-Internal-Functions.functions_details" role="details">
<title role="details.title">Functions</title>
<refsect2 id="EncaHookFunc" role="function">
<title>EncaHookFunc ()</title>
<indexterm zone="EncaHookFunc"><primary>EncaHookFunc</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
<phrase role="c_punctuation">(</phrase>*EncaHookFunc<phrase role="c_punctuation">)</phrase> (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Language hook function type.</para>
<para>Launches language specific hooks for a particular language.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser state whose charset ratings are to be modified.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero if charset ratigns have been actually modified, zero
otherwise.</para>
</refsect3></refsect2>
<refsect2 id="EncaGuessFunc" role="function">
<title>EncaGuessFunc ()</title>
<indexterm zone="EncaGuessFunc"><primary>EncaGuessFunc</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
<phrase role="c_punctuation">(</phrase>*EncaGuessFunc<phrase role="c_punctuation">)</phrase> (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Special (multibyte) encoding check function type.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser state whose buffer should be checked.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero if analyser->result has been set, zero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="ELEMENTS:CAPS" role="macro">
<title>ELEMENTS()</title>
<indexterm zone="ELEMENTS:CAPS"><primary>ELEMENTS</primary></indexterm>
<programlisting language="C">#define ELEMENTS(array) (sizeof(array)/sizeof((array)[0]))
</programlisting>
<para>Compute the number of elements of a static array.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>array</para></entry>
<entry role="parameter_description"><para>An array whose size is to be computed.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> the number of elements.</para>
</refsect3></refsect2>
<refsect2 id="MAKE-HOOK-LINE:CAPS" role="macro">
<title>MAKE_HOOK_LINE()</title>
<indexterm zone="MAKE-HOOK-LINE:CAPS"><primary>MAKE_HOOK_LINE</primary></indexterm>
<programlisting language="C">#define MAKE_HOOK_LINE(name)</programlisting>
<para>Ugly code `beautifier' macro for language hooks.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>name</para></entry>
<entry role="parameter_description"><para>A charset name in C-style identifier suitable form.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="LF:CAPS" role="macro">
<title>LF</title>
<indexterm zone="LF:CAPS"><primary>LF</primary></indexterm>
<programlisting language="C">#define LF ((unsigned char)'\n')
</programlisting>
<para>Line feed character (End-of-line on Unix).</para>
</refsect2>
<refsect2 id="CR:CAPS" role="macro">
<title>CR</title>
<indexterm zone="CR:CAPS"><primary>CR</primary></indexterm>
<programlisting language="C">#define CR ((unsigned char)'\r')
</programlisting>
<para>Carriage return character (End-of-line on Macintosh).</para>
</refsect2>
<refsect2 id="enca-malloc" role="function">
<title>enca_malloc ()</title>
<indexterm zone="enca-malloc"><primary>enca_malloc</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link> *
enca_malloc (<parameter><link linkend="size-t"><type>size_t</type></link> size</parameter>);</programlisting>
<para>Allocates memory, always successfully (when fails, aborts program).</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>size</para></entry>
<entry role="parameter_description"><para>The number of bytes to allocate.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the newly allocated memory.</para>
</refsect3></refsect2>
<refsect2 id="enca-realloc" role="function">
<title>enca_realloc ()</title>
<indexterm zone="enca-realloc"><primary>enca_realloc</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link> *
enca_realloc (<parameter><link linkend="void"><type>void</type></link> *ptr</parameter>,
<parameter><link linkend="size-t"><type>size_t</type></link> size</parameter>);</programlisting>
<para>Reallocates memory, always successfully (when fails, aborts program).</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>ptr</para></entry>
<entry role="parameter_description"><para>Pointer to block of previously allocated memory.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>size</para></entry>
<entry role="parameter_description"><para>The number of bytes to resize the block.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the newly allocated memory, <link linkend="NULL:CAPS"><type>NULL</type></link> when <parameter>size</parameter>
is zero.</para>
</refsect3></refsect2>
<refsect2 id="enca-free" role="macro">
<title>enca_free()</title>
<indexterm zone="enca-free"><primary>enca_free</primary></indexterm>
<programlisting language="C">#define enca_free(ptr)</programlisting>
<para>Frees memory pointed by <parameter>ptr</parameter>
with <link linkend="free"><function>free()</function></link> hack and assigns it a safe value,
thus may be called more than once.</para>
<para><parameter>ptr</parameter>
MUST be l-value.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>ptr</para></entry>
<entry role="parameter_description"><para>Pointer to memory to free.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="NEW:CAPS" role="macro">
<title>NEW()</title>
<indexterm zone="NEW:CAPS"><primary>NEW</primary></indexterm>
<programlisting language="C">#define NEW(type,n) ((type*)enca_malloc((n)*sizeof(type)))
</programlisting>
<para>An <link linkend="enca-malloc"><function>enca_malloc()</function></link> wrapper.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>type</para></entry>
<entry role="parameter_description"><para>Data type to allocate.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>n</para></entry>
<entry role="parameter_description"><para>Number of elements to allocate.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the newly allocated memory.</para>
</refsect3></refsect2>
<refsect2 id="RENEW:CAPS" role="macro">
<title>RENEW()</title>
<indexterm zone="RENEW:CAPS"><primary>RENEW</primary></indexterm>
<programlisting language="C">#define RENEW(ptr,type,n) ((type*)enca_realloc((ptr),(n)*sizeof(type)))
</programlisting>
<para>An <link linkend="enca-realloc"><function>enca_realloc()</function></link> wrapper.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>ptr</para></entry>
<entry role="parameter_description"><para>Pointer to already allocate memory or <link linkend="NULL:CAPS"><type>NULL</type></link>.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>type</para></entry>
<entry role="parameter_description"><para>Data type to allocate.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>n</para></entry>
<entry role="parameter_description"><para>Number of elements to resize the memory to.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the reallocated memory (or pointer safe to call <link linkend="free"><function>free()</function></link>
on when <parameter>n</parameter>
is zero).</para>
</refsect3></refsect2>
<refsect2 id="enca-strdup" role="function">
<title>enca_strdup ()</title>
<indexterm zone="enca-strdup"><primary>enca_strdup</primary></indexterm>
<programlisting language="C"><link linkend="char"><returnvalue>char</returnvalue></link> *
enca_strdup (<parameter>const <link linkend="char"><type>char</type></link> *s</parameter>);</programlisting>
<para>Duplicates string.</para>
<para>Will be defined as <link linkend="strdup"><function>strdup()</function></link> when system provides it.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>s</para></entry>
<entry role="parameter_description"><para>A string.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The newly allocated string copy.</para>
</refsect3></refsect2>
<refsect2 id="enca-strstr" role="function">
<title>enca_strstr ()</title>
<indexterm zone="enca-strstr"><primary>enca_strstr</primary></indexterm>
<programlisting language="C">const <link linkend="char"><returnvalue>char</returnvalue></link> *
enca_strstr (<parameter>const <link linkend="char"><type>char</type></link> *haystack</parameter>,
<parameter>const <link linkend="char"><type>char</type></link> *needle</parameter>);</programlisting>
<para>Finds occurence of a substring in a string.</para>
<para>Will be defined as <link linkend="strstr"><function>strstr()</function></link> when system provides it.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>haystack</para></entry>
<entry role="parameter_description"><para>A string where to search.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>needle</para></entry>
<entry role="parameter_description"><para>A string to find.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the first occurence of <parameter>needle</parameter>
in <parameter>haystack</parameter>
; <link linkend="NULL:CAPS"><type>NULL</type></link> if
not found.</para>
</refsect3></refsect2>
<refsect2 id="enca-stpcpy" role="function">
<title>enca_stpcpy ()</title>
<indexterm zone="enca-stpcpy"><primary>enca_stpcpy</primary></indexterm>
<programlisting language="C"><link linkend="char"><returnvalue>char</returnvalue></link> *
enca_stpcpy (<parameter><link linkend="char"><type>char</type></link> *dest</parameter>,
<parameter>const <link linkend="char"><type>char</type></link> *src</parameter>);</programlisting>
<para>Appends a string to the end of another strings, returning pointer to
the terminating zero byte.</para>
<para>Will be defined as <link linkend="stpcpy"><function>stpcpy()</function></link> when system provides it.</para>
<para>Caller is responisble for providing <parameter>dest</parameter>
long enough to hold the result.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>dest</para></entry>
<entry role="parameter_description"><para>A string.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>src</para></entry>
<entry role="parameter_description"><para>A string to append.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the terminating zero byte of resulting string.</para>
</refsect3></refsect2>
<refsect2 id="enca-strconcat" role="function">
<title>enca_strconcat ()</title>
<indexterm zone="enca-strconcat"><primary>enca_strconcat</primary></indexterm>
<programlisting language="C"><link linkend="char"><returnvalue>char</returnvalue></link> *
enca_strconcat (<parameter>const <link linkend="char"><type>char</type></link> *str</parameter>,
<parameter>...</parameter>);</programlisting>
<para>Concatenates arbitrary (but at least one) number of strings.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>str</para></entry>
<entry role="parameter_description"><para>A string.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>...</para></entry>
<entry role="parameter_description"><para>A <link linkend="NULL-terminated"><type>NULL-terminated</type></link> list of string to append.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> All the strings concatenated together.</para>
</refsect3></refsect2>
<refsect2 id="enca-strappend" role="function">
<title>enca_strappend ()</title>
<indexterm zone="enca-strappend"><primary>enca_strappend</primary></indexterm>
<programlisting language="C"><link linkend="char"><returnvalue>char</returnvalue></link> *
enca_strappend (<parameter><link linkend="char"><type>char</type></link> *str</parameter>,
<parameter>...</parameter>);</programlisting>
<para>Appends arbitrary number of strings to a string.</para>
<para>The string <parameter>str</parameter>
is destroyed (reallocated), the others are kept.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>str</para></entry>
<entry role="parameter_description"><para>A string.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>...</para></entry>
<entry role="parameter_description"><para>A <link linkend="NULL-terminated"><type>NULL-terminated</type></link> list of string to append.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> All the strings concatenated together.</para>
</refsect3></refsect2>
<refsect2 id="enca-csname" role="macro">
<title>enca_csname()</title>
<indexterm zone="enca-csname"><primary>enca_csname</primary></indexterm>
<programlisting language="C">#define enca_csname(cs) enca_charset_name((cs), ENCA_NAME_STYLE_ENCA)
</programlisting>
<para>A shorthand for printing names with <link linkend="ENCA-NAME-STYLE-ENCA:CAPS"><type>ENCA_NAME_STYLE_ENCA</type></link>.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>cs</para></entry>
<entry role="parameter_description"><para>A charset id.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-name-to-charset" role="function">
<title>enca_name_to_charset ()</title>
<indexterm zone="enca-name-to-charset"><primary>enca_name_to_charset</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_name_to_charset (<parameter>const <link linkend="char"><type>char</type></link> *csname</parameter>);</programlisting>
<para>Transforms charset name to numeric charset id.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>csname</para></entry>
<entry role="parameter_description"><para>The charset name.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The charset id; <link linkend="ENCA-CS-UNKNOWN:CAPS"><type>ENCA_CS_UNKNOWN</type></link> when the name is not recognized.</para>
</refsect3></refsect2>
<refsect2 id="enca-name-to-surface" role="function">
<title>enca_name_to_surface ()</title>
<indexterm zone="enca-name-to-surface"><primary>enca_name_to_surface</primary></indexterm>
<programlisting language="C"><link linkend="EncaSurface"><returnvalue>EncaSurface</returnvalue></link>
enca_name_to_surface (<parameter>const <link linkend="char"><type>char</type></link> *sname</parameter>);</programlisting>
<para>Transforms surface name to numeric surface id.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>sname</para></entry>
<entry role="parameter_description"><para>The surface name.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The surface id; <link linkend="ENCA-SURFACE-UNKNOWN:CAPS"><literal>ENCA_SURFACE_UNKNOWN</literal></link> when the name is not
recognized.</para>
</refsect3></refsect2>
<refsect2 id="enca-language-init" role="function">
<title>enca_language_init ()</title>
<indexterm zone="enca-language-init"><primary>enca_language_init</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_language_init (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>,
<parameter>const <link linkend="char"><type>char</type></link> *langname</parameter>);</programlisting>
<para>Initializes analyser for language <parameter>langname</parameter>
.</para>
<para>Assumes <parameter>analyser</parameter>
is unitinialized, calling with an initialized <parameter>analyser</parameter>
leads to memory leak.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state to be initialized for this language.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>langname</para></entry>
<entry role="parameter_description"><para>Two-letter ISO-639 language code.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero on success, zero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="enca-language-destroy" role="function">
<title>enca_language_destroy ()</title>
<indexterm zone="enca-language-destroy"><primary>enca_language_destroy</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_language_destroy (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Destroys the language part of analyser state <parameter>analyser</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state whose language part should be destroyed.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-get-charset-similarity-matrix" role="function">
<title>enca_get_charset_similarity_matrix ()</title>
<indexterm zone="enca-get-charset-similarity-matrix"><primary>enca_get_charset_similarity_matrix</primary></indexterm>
<programlisting language="C"><link linkend="double"><returnvalue>double</returnvalue></link> *
enca_get_charset_similarity_matrix (<parameter>const <link linkend="EncaLanguageInfo"><type>EncaLanguageInfo</type></link> *lang</parameter>);</programlisting>
<para>Computes character weight similarity matrix for language <parameter>lang</parameter>
.</para>
<para>sim[i,j] is normalized to sim[i,i] thus:</para>
<itemizedlist>
<listitem><para>a row i contains ,probabilities` different languages will look like the
i-th one</para></listitem>
<listitem>
<para>a column i contains ,probabilities` the i-th language will look like
the other languages.</para>
</listitem>
</itemizedlist>
<para>For all practical applications, the higher one of sim[i,j] and sim[j,i]
is important.</para>
<para>Note: this is not used anywhere, only by simtable.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>lang</para></entry>
<entry role="parameter_description"><para>A language.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The matrix, its size is determined by <parameter>lang->ncharsets</parameter>
; <link linkend="NULL:CAPS"><type>NULL</type></link>
for language with no charsets.</para>
</refsect3></refsect2>
<refsect2 id="enca-charsets-subset-identical" role="function">
<title>enca_charsets_subset_identical ()</title>
<indexterm zone="enca-charsets-subset-identical"><primary>enca_charsets_subset_identical</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_charsets_subset_identical (<parameter><link linkend="int"><type>int</type></link> charset1</parameter>,
<parameter><link linkend="int"><type>int</type></link> charset2</parameter>,
<parameter>const <link linkend="size-t"><type>size_t</type></link> *counts</parameter>);</programlisting>
<para>Checks whether all characters with nonzero count have the same meaning
in both charsets.</para>
<para>In other words, it checks whether conversion of sample containing only
these characters from <parameter>charset1</parameter>
to <parameter>charset2</parameter>
would be identity.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>charset1</para></entry>
<entry role="parameter_description"><para>A charset.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>charset2</para></entry>
<entry role="parameter_description"><para>Another charset.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>counts</para></entry>
<entry role="parameter_description"><para>An array of size 0x100 containing character counts.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero if charsets are identical on the subset, zero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="enca-filter-boxdraw" role="function">
<title>enca_filter_boxdraw ()</title>
<indexterm zone="enca-filter-boxdraw"><primary>enca_filter_boxdraw</primary></indexterm>
<programlisting language="C"><link linkend="size-t"><returnvalue>size_t</returnvalue></link>
enca_filter_boxdraw (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>,
<parameter>unsigned <link linkend="char"><type>char</type></link> fill_char</parameter>);</programlisting>
<para>Runs boxdrawing characters filter on <parameter>buffer</parameter>
for each charset in <parameter>language</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser whose charsets should be considered for filtration.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>fill_char</para></entry>
<entry role="parameter_description"><para>Replacement character for filtered bytes.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Number of characters filtered out.</para>
</refsect3></refsect2>
<refsect2 id="enca-language-hook-ncs" role="function">
<title>enca_language_hook_ncs ()</title>
<indexterm zone="enca-language-hook-ncs"><primary>enca_language_hook_ncs</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_language_hook_ncs (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>,
<parameter><link linkend="size-t"><type>size_t</type></link> ncs</parameter>,
<parameter><link linkend="EncaLanguageHookData1CS"><type>EncaLanguageHookData1CS</type></link> *hookdata</parameter>);</programlisting>
<para>Decide between two charsets differing only in a few characters.</para>
<para>If the two most probable charsets correspond to <parameter>hookdata</parameter>
charsets,
give the characters they differ half the weight of all other characters
together, thus allowing to decide between the two very similar charsets.</para>
<para>It also recomputes <parameter>order</parameter>
when something changes.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser whose charset ratings are to be modified.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>ncs</para></entry>
<entry role="parameter_description"><para>The number of charsets.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>hookdata</para></entry>
<entry role="parameter_description"><para>What characters of which charsets should be given the extra
weight.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero when <parameter>ratings</parameter>
were actually modified, nonzero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="enca-language-hook-eol" role="function">
<title>enca_language_hook_eol ()</title>
<indexterm zone="enca-language-hook-eol"><primary>enca_language_hook_eol</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_language_hook_eol (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>,
<parameter><link linkend="size-t"><type>size_t</type></link> ncs</parameter>,
<parameter><link linkend="EncaLanguageHookDataEOL"><type>EncaLanguageHookDataEOL</type></link> *hookdata</parameter>);</programlisting>
<para>Decide between two charsets differing only in EOL type or other surface.</para>
<para>The (surface mask, charset) pairs are scanned in order. If a matching
surface is found, ratings of all other charsets in the list are zeroed.
So you can place a surface mask of all 1s at the end to match when nothing
else matches.</para>
<para>All the charsets have to have the same rating, or nothing happens.</para>
<para>It also recomputes <parameter>order</parameter>
when something changes.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser whose charset ratings are to be modified.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>ncs</para></entry>
<entry role="parameter_description"><para>The number of charsets.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>hookdata</para></entry>
<entry role="parameter_description"><para>What characters of which charsets should be decided with based
on the EOL type.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero when <parameter>ratings</parameter>
were actually modified, nonzero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="enca-guess-init" role="function">
<title>enca_guess_init ()</title>
<indexterm zone="enca-guess-init"><primary>enca_guess_init</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_guess_init (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Allocates and initializes analyser state, sets options to defaults.</para>
<para>Assumes <parameter>analyser</parameter>
is unitinialized, calling with an initialized <parameter>analyser</parameter>
leads to memory leak, but <parameter>analyser->lang</parameter>
must be already initialized.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser to initialize.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-guess-destroy" role="function">
<title>enca_guess_destroy ()</title>
<indexterm zone="enca-guess-destroy"><primary>enca_guess_destroy</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_guess_destroy (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Frees memory owned by analyser state.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser to destroy.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-eol-surface" role="function">
<title>enca_eol_surface ()</title>
<indexterm zone="enca-eol-surface"><primary>enca_eol_surface</primary></indexterm>
<programlisting language="C"><link linkend="EncaSurface"><returnvalue>EncaSurface</returnvalue></link>
enca_eol_surface (<parameter>const unsigned <link linkend="char"><type>char</type></link> *buffer</parameter>,
<parameter><link linkend="size-t"><type>size_t</type></link> size</parameter>,
<parameter>const <link linkend="size-t"><type>size_t</type></link> *counts</parameter>);</programlisting>
<para>Find EOL type of sample in <parameter>buffer</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>buffer</para></entry>
<entry role="parameter_description"><para>A buffer whose EOL type is to be detected.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>size</para></entry>
<entry role="parameter_description"><para>Size of <parameter>buffer</parameter>
.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>counts</para></entry>
<entry role="parameter_description"><para>Character counts.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The EOL surface flags.</para>
</refsect3></refsect2>
<refsect2 id="enca-find-max-sec" role="function">
<title>enca_find_max_sec ()</title>
<indexterm zone="enca-find-max-sec"><primary>enca_find_max_sec</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_find_max_sec (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Updates <parameter>analyser->order</parameter>
according to charset <parameter>ratings</parameter>
.</para>
<para>XXX: This should be stable sort. The ordering is defined by
data/<lang>/<lang>.h header file which is in turn defined by odering in
the appropriate script (doit.sh). Silly.</para>
<para>Must not be called with <parameter>analyser</parameter>
with no regular charsets.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>An analyser.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-double-utf8-init" role="function">
<title>enca_double_utf8_init ()</title>
<indexterm zone="enca-double-utf8-init"><primary>enca_double_utf8_init</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_double_utf8_init (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Initializes double-UTF-8 check.</para>
<para>In fact it initializes the fields to <link linkend="NULL:CAPS"><type>NULL</type></link>'s, they are actually initialized
only when needed.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state to be initialized.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-double-utf8-destroy" role="function">
<title>enca_double_utf8_destroy ()</title>
<indexterm zone="enca-double-utf8-destroy"><primary>enca_double_utf8_destroy</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_double_utf8_destroy (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Destroys the double-UTF-8 check part of analyser state <parameter>analyser</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state whose double-UTF-8 check part should be destroyed.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-pair-init" role="function">
<title>enca_pair_init ()</title>
<indexterm zone="enca-pair-init"><primary>enca_pair_init</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_pair_init (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Initializes pair statistics data.</para>
<para>In fact it just sets everything to <link linkend="NULL:CAPS"><type>NULL</type></link>, to be initialized when needed.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state to be initialized.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-pair-destroy" role="function">
<title>enca_pair_destroy ()</title>
<indexterm zone="enca-pair-destroy"><primary>enca_pair_destroy</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_pair_destroy (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Destroys the pair statistics part of analyser state <parameter>analyser</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state whose pair statistics part should be destroyed.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-pair-analyse" role="function">
<title>enca_pair_analyse ()</title>
<indexterm zone="enca-pair-analyse"><primary>enca_pair_analyse</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_pair_analyse (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Performs pair-frequency based analysis, provided that the language supports
it (does nothing otherwise).</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analysed containing the sample for pair frequency analysis.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero when the character set was succesfully determined,
<parameter>analyser</parameter>
-><parameter>result</parameter>
.<parameter>charset</parameter>
is then directly modified.</para>
</refsect3></refsect2>
</refsect1>
<refsect1 id="libenca-Internal-Functions.other_details" role="details">
<title role="details.title">Types and Values</title>
<refsect2 id="EncaAnalyserOptions" role="struct">
<title>struct EncaAnalyserOptions</title>
<indexterm zone="EncaAnalyserOptions"><primary>EncaAnalyserOptions</primary></indexterm>
<programlisting language="C">struct EncaAnalyserOptions {
int const_buffer;
size_t min_chars;
double threshold;
int multibyte_enabled;
int interpreted_surfaces;
int ambiguous_mode;
int filtering;
int test_garbageness;
int termination_strictness;
};
</programlisting>
<para>Analyser options, a part of analyser state.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaAnalyserOptions.const-buffer">const_buffer</structfield>;</para></entry>
<entry role="struct_member_description"><para>Treat buffer as const? Otherwise its content can be,
and probably will be, modified.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaAnalyserOptions.min-chars">min_chars</structfield>;</para></entry>
<entry role="struct_member_description"><para>Minimal number significant characters.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="double"><type>double</type></link> <structfield id="EncaAnalyserOptions.threshold">threshold</structfield>;</para></entry>
<entry role="struct_member_description"><para>Minimal ratio between winner and the second.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaAnalyserOptions.multibyte-enabled">multibyte_enabled</structfield>;</para></entry>
<entry role="struct_member_description"><para>Check for multibyte encodings?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaAnalyserOptions.interpreted-surfaces">interpreted_surfaces</structfield>;</para></entry>
<entry role="struct_member_description"><para>Allow surfaces causing fundamental reinterpretation?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaAnalyserOptions.ambiguous-mode">ambiguous_mode</structfield>;</para></entry>
<entry role="struct_member_description"><para>Ambiguous mode?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaAnalyserOptions.filtering">filtering</structfield>;</para></entry>
<entry role="struct_member_description"><para>Allow binary and box-drawing filters?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaAnalyserOptions.test-garbageness">test_garbageness</structfield>;</para></entry>
<entry role="struct_member_description"><para>Do test garbageness?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaAnalyserOptions.termination-strictness">termination_strictness</structfield>;</para></entry>
<entry role="struct_member_description"><para>Disallow broken multibyte sequences at buffer end?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaAnalyserState" role="struct">
<title>struct EncaAnalyserState</title>
<indexterm zone="EncaAnalyserState"><primary>EncaAnalyserState</primary></indexterm>
<programlisting language="C">struct EncaAnalyserState {
/* Language data. */
const EncaLanguageInfo *lang;
size_t ncharsets;
int *charsets;
/* Analyser state. */
EncaErrno gerrno;
size_t size;
unsigned char *buffer;
EncaEncoding result;
size_t *counts;
size_t bin;
size_t up;
double *ratings;
size_t *order;
size_t size2;
unsigned char *buffer2;
/* Double-UTF-8 data. */
EncaUTFCheckData *utfch;
int *utfbuf;
/* Pair frequency data */
unsigned char *pair2bits;
size_t *bitcounts;
size_t *pairratings;
/* LCUC data XXX: unused (yet) */
size_t *lcbits;
size_t *ucbits;
/* Options. */
EncaAnalyserOptions options;
};
</programlisting>
<para>The internal analyser state.</para>
<para>Passed as an opaque object (`this') to analyser calls.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para>const <link linkend="EncaLanguageInfo"><type>EncaLanguageInfo</type></link> *<structfield id="EncaAnalyserState.lang">lang</structfield>;</para></entry>
<entry role="struct_member_description"><para>Language informations.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaAnalyserState.ncharsets">ncharsets</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of 8bit charsets in this language.
(Equal to <parameter>lang->ncharsets</parameter>
.)</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> *<structfield id="EncaAnalyserState.charsets">charsets</structfield>;</para></entry>
<entry role="struct_member_description"><para>8bit charset id's [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaErrno"><type>EncaErrno</type></link> <structfield id="EncaAnalyserState.gerrno">gerrno</structfield>;</para></entry>
<entry role="struct_member_description"><para>Guessing gerrno.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaAnalyserState.size">size</structfield>;</para></entry>
<entry role="struct_member_description"><para>Size of buffer.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned <link linkend="char"><type>char</type></link> *<structfield id="EncaAnalyserState.buffer">buffer</structfield>;</para></entry>
<entry role="struct_member_description"><para>Buffer whose encoding is to be detected [<parameter>size</parameter>
].
(Owned by outer world.)</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaEncoding"><type>EncaEncoding</type></link> <structfield id="EncaAnalyserState.result">result</structfield>;</para></entry>
<entry role="struct_member_description"><para>Result returned to caller.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> *<structfield id="EncaAnalyserState.counts">counts</structfield>;</para></entry>
<entry role="struct_member_description"><para>Character counts [0x100].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaAnalyserState.bin">bin</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of `binary' characters.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaAnalyserState.up">up</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of 8bit characters.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="double"><type>double</type></link> *<structfield id="EncaAnalyserState.ratings">ratings</structfield>;</para></entry>
<entry role="struct_member_description"><para>8bit charset ratings [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> *<structfield id="EncaAnalyserState.order">order</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset indices (not id's) sorted by ratings in descending order
[ncharsets].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaAnalyserState.size2">size2</structfield>;</para></entry>
<entry role="struct_member_description"><para>Size of buffer2.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned <link linkend="char"><type>char</type></link> *<structfield id="EncaAnalyserState.buffer2">buffer2</structfield>;</para></entry>
<entry role="struct_member_description"><para>A temporary secondary buffer [<parameter>size2</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaUTFCheckData"><type>EncaUTFCheckData</type></link> *<structfield id="EncaAnalyserState.utfch">utfch</structfield>;</para></entry>
<entry role="struct_member_description"><para>Double-UTF-8 test data [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> *<structfield id="EncaAnalyserState.utfbuf">utfbuf</structfield>;</para></entry>
<entry role="struct_member_description"><para>Double-UTF-8 buffer for various UCS-2 character counting [0x10000].
(Magic: see <link linkend="mark-scratch-buffer"><function>mark_scratch_buffer()</function></link> for description.)</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned <link linkend="char"><type>char</type></link> *<structfield id="EncaAnalyserState.pair2bits">pair2bits</structfield>;</para></entry>
<entry role="struct_member_description"><para>Character pair map to charsets [0x100000] (indexed
0x100*first + second). Each bit corresponds to one charset,
when set, the pair is `good' for the given charset. The
type is char, so it breaks for <parameter>ncharsets</parameter>
> 8, but it should
not be accessed from outer world, so it can be easily enlarged
to more bits.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> *<structfield id="EncaAnalyserState.bitcounts">bitcounts</structfield>;</para></entry>
<entry role="struct_member_description"><para>Counts for each possible bit combinations in <parameter>pair2bits</parameter>
[0x1 << ncharsets].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> *<structfield id="EncaAnalyserState.pairratings">pairratings</structfield>;</para></entry>
<entry role="struct_member_description"><para>Counts of `good' pairs per charset [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> *<structfield id="EncaAnalyserState.lcbits">lcbits</structfield>;</para></entry>
<entry role="struct_member_description"><para>If a character is lowercase in some charset, correspinding bit
is set [0x100].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> *<structfield id="EncaAnalyserState.ucbits">ucbits</structfield>;</para></entry>
<entry role="struct_member_description"><para>If a character is uppercase in some charset, correspinding bit
is set [0x100].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaAnalyserOptions"><type>EncaAnalyserOptions</type></link> <structfield id="EncaAnalyserState.options">options</structfield>;</para></entry>
<entry role="struct_member_description"><para>Analyser options.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaCharsetInfo" role="struct">
<title>struct EncaCharsetInfo</title>
<indexterm zone="EncaCharsetInfo"><primary>EncaCharsetInfo</primary></indexterm>
<programlisting language="C">struct EncaCharsetInfo {
int enca;
int rfc1345;
int cstocs;
int iconv;
int mime;
const char *human;
unsigned int flags;
unsigned int nsurface;
};
</programlisting>
<para>General charset informnations.</para>
<para>All the <link linkend="int"><type>int</type></link> fields are indices in <link linkend="ALIAS-LIST:CAPS"><type>ALIAS_LIST</type></link>[].</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaCharsetInfo.enca">enca</structfield>;</para></entry>
<entry role="struct_member_description"><para>Default, implicit name in enca.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaCharsetInfo.rfc1345">rfc1345</structfield>;</para></entry>
<entry role="struct_member_description"><para>RFC1345 charset name.
(For charsets not in RFC1345, some canonical name is invented.)</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaCharsetInfo.cstocs">cstocs</structfield>;</para></entry>
<entry role="struct_member_description"><para>Cstocs charset name or -1.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaCharsetInfo.iconv">iconv</structfield>;</para></entry>
<entry role="struct_member_description"><para>Iconv charset name or -1.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaCharsetInfo.mime">mime</structfield>;</para></entry>
<entry role="struct_member_description"><para>Preferred MIME charset name or -1.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>const <link linkend="char"><type>char</type></link> *<structfield id="EncaCharsetInfo.human">human</structfield>;</para></entry>
<entry role="struct_member_description"><para>Human comprehensible description.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned <link linkend="int"><type>int</type></link> <structfield id="EncaCharsetInfo.flags">flags</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset properties (7bit, 8bit, multibyte, ...).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned <link linkend="int"><type>int</type></link> <structfield id="EncaCharsetInfo.nsurface">nsurface</structfield>;</para></entry>
<entry role="struct_member_description"><para>Natural surface (`implied' in recode).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaLanguageInfo" role="struct">
<title>struct EncaLanguageInfo</title>
<indexterm zone="EncaLanguageInfo"><primary>EncaLanguageInfo</primary></indexterm>
<programlisting language="C">struct EncaLanguageInfo {
const char *name;
const char *humanname;
size_t ncharsets;
const char *const *csnames;
const unsigned short int *const *weights;
const unsigned short int *significant;
const unsigned char *const *letters;
const unsigned char **const *pairs;
long int weight_sum;
EncaHookFunc hook;
EncaHookFunc eolhook;
EncaHookFunc lcuchook;
EncaHookFunc ratinghook;
};
</programlisting>
<para>Language specific data.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para>const <link linkend="char"><type>char</type></link> *<structfield id="EncaLanguageInfo.name">name</structfield>;</para></entry>
<entry role="struct_member_description"><para>Language name, or more precisely, locale name.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>const <link linkend="char"><type>char</type></link> *<structfield id="EncaLanguageInfo.humanname">humanname</structfield>;</para></entry>
<entry role="struct_member_description"><para>Normal human-readable [English] language name.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaLanguageInfo.ncharsets">ncharsets</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of charsets in this language.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>const <link linkend="char"><type>char</type></link> *const  *<structfield id="EncaLanguageInfo.csnames">csnames</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset names [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>long <link linkend="int"><type>int</type></link> <structfield id="EncaLanguageInfo.weight-sum">weight_sum</structfield>;</para></entry>
<entry role="struct_member_description"><para>Sum of all <parameter>weights</parameter>
(is the same for all charsets).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaHookFunc"><type>EncaHookFunc</type></link> <structfield id="EncaLanguageInfo.hook">hook</structfield>;</para></entry>
<entry role="struct_member_description"><para>Hook function (deciding hard cases).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaHookFunc"><type>EncaHookFunc</type></link> <structfield id="EncaLanguageInfo.eolhook">eolhook</structfield>;</para></entry>
<entry role="struct_member_description"><para>EOL hook function (deciding ambiguous cases based on EOL type).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaHookFunc"><type>EncaHookFunc</type></link> <structfield id="EncaLanguageInfo.lcuchook">lcuchook</structfield>;</para></entry>
<entry role="struct_member_description"></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaHookFunc"><type>EncaHookFunc</type></link> <structfield id="EncaLanguageInfo.ratinghook">ratinghook</structfield>;</para></entry>
<entry role="struct_member_description"><para>Helper to calculate ratings for weightingless languages.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaLanguageHookData1CS" role="struct">
<title>struct EncaLanguageHookData1CS</title>
<indexterm zone="EncaLanguageHookData1CS"><primary>EncaLanguageHookData1CS</primary></indexterm>
<programlisting language="C">struct EncaLanguageHookData1CS {
const char *name;
size_t size;
const unsigned char *list;
size_t cs;
};
</programlisting>
<para>Cointainer for data needed by <link linkend="enca-language-hook-ncs"><function>enca_language_hook_ncs()</function></link>.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para>const <link linkend="char"><type>char</type></link> *<structfield id="EncaLanguageHookData1CS.name">name</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset name.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaLanguageHookData1CS.size">size</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of characters in <parameter>list</parameter>
.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaLanguageHookData1CS.cs">cs</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset number. This is an index in <parameter>analyser</parameter>
arrays (like <parameter>charsets</parameter>
),
NOT a charset id.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaLanguageHookDataEOL" role="struct">
<title>struct EncaLanguageHookDataEOL</title>
<indexterm zone="EncaLanguageHookDataEOL"><primary>EncaLanguageHookDataEOL</primary></indexterm>
<programlisting language="C">struct EncaLanguageHookDataEOL {
const char *name;
EncaSurface eol;
size_t cs;
};
</programlisting>
<para>Cointainer for data needed by <link linkend="enca-language-hook-eol"><function>enca_language_hook_eol()</function></link>.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para>const <link linkend="char"><type>char</type></link> *<structfield id="EncaLanguageHookDataEOL.name">name</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset name.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaSurface"><type>EncaSurface</type></link> <structfield id="EncaLanguageHookDataEOL.eol">eol</structfield>;</para></entry>
<entry role="struct_member_description"><para>The corresponding <link linkend="EncaSurface"><type>EncaSurface</type></link> bit.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaLanguageHookDataEOL.cs">cs</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset number. This is an index in <parameter>analyser</parameter>
arrays (like <parameter>charsets</parameter>
),
NOT a charset id.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaUTFCheckData" role="struct">
<title>struct EncaUTFCheckData</title>
<indexterm zone="EncaUTFCheckData"><primary>EncaUTFCheckData</primary></indexterm>
<programlisting language="C">struct EncaUTFCheckData {
double rating;
size_t size;
int result;
int *ucs2;
int *weights;
};
</programlisting>
<para>Data needed by double-UTF-8 check, per language charset.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para><link linkend="double"><type>double</type></link> <structfield id="EncaUTFCheckData.rating">rating</structfield>;</para></entry>
<entry role="struct_member_description"><para>Total rating for this charset.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link> <structfield id="EncaUTFCheckData.size">size</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of UCS-2 characters.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> <structfield id="EncaUTFCheckData.result">result</structfield>;</para></entry>
<entry role="struct_member_description"><para>Nonzero when the sample is probably Doubly-UTF-8 encoded from
this charset.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> *<structfield id="EncaUTFCheckData.ucs2">ucs2</structfield>;</para></entry>
<entry role="struct_member_description"><para>List of significant UCS-2 characters, in order [<parameter>size</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link> *<structfield id="EncaUTFCheckData.weights">weights</structfield>;</para></entry>
<entry role="struct_member_description"><para>Weights for double-UTF-8 check [<parameter>size</parameter>
]. Positive means normal
UTF-8, negative doubly-encoded.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EPSILON:CAPS" role="macro">
<title>EPSILON</title>
<indexterm zone="EPSILON:CAPS"><primary>EPSILON</primary></indexterm>
<programlisting language="C">#define EPSILON 0.000001
</programlisting>
<para>`Zero' for float comparsion (and to prevent division by zero, etc.).</para>
</refsect2>
<refsect2 id="FILL-NONLETTER:CAPS" role="macro">
<title>FILL_NONLETTER</title>
<indexterm zone="FILL-NONLETTER:CAPS"><primary>FILL_NONLETTER</primary></indexterm>
<programlisting language="C">#define FILL_NONLETTER '.'
</programlisting>
<para>Replacement character for non-letters in pair frequencies.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-BE:CAPS" role="variable">
<title>ENCA_LANGUAGE_BE</title>
<indexterm zone="ENCA-LANGUAGE-BE:CAPS"><primary>ENCA_LANGUAGE_BE</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_BE;
</programlisting>
<para>Belarusian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-BG:CAPS" role="variable">
<title>ENCA_LANGUAGE_BG</title>
<indexterm zone="ENCA-LANGUAGE-BG:CAPS"><primary>ENCA_LANGUAGE_BG</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_BG;
</programlisting>
<para>Bulgarian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-CS:CAPS" role="variable">
<title>ENCA_LANGUAGE_CS</title>
<indexterm zone="ENCA-LANGUAGE-CS:CAPS"><primary>ENCA_LANGUAGE_CS</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_CS;
</programlisting>
<para>Czech language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-ET:CAPS" role="variable">
<title>ENCA_LANGUAGE_ET</title>
<indexterm zone="ENCA-LANGUAGE-ET:CAPS"><primary>ENCA_LANGUAGE_ET</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_ET;
</programlisting>
<para>Estonian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-HR:CAPS" role="variable">
<title>ENCA_LANGUAGE_HR</title>
<indexterm zone="ENCA-LANGUAGE-HR:CAPS"><primary>ENCA_LANGUAGE_HR</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_HR;
</programlisting>
<para>Croatian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-HU:CAPS" role="variable">
<title>ENCA_LANGUAGE_HU</title>
<indexterm zone="ENCA-LANGUAGE-HU:CAPS"><primary>ENCA_LANGUAGE_HU</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_HU;
</programlisting>
<para>Hungarian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-LT:CAPS" role="variable">
<title>ENCA_LANGUAGE_LT</title>
<indexterm zone="ENCA-LANGUAGE-LT:CAPS"><primary>ENCA_LANGUAGE_LT</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_LT;
</programlisting>
<para>Lithuanian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-LV:CAPS" role="variable">
<title>ENCA_LANGUAGE_LV</title>
<indexterm zone="ENCA-LANGUAGE-LV:CAPS"><primary>ENCA_LANGUAGE_LV</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_LV;
</programlisting>
<para>Latvian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-PL:CAPS" role="variable">
<title>ENCA_LANGUAGE_PL</title>
<indexterm zone="ENCA-LANGUAGE-PL:CAPS"><primary>ENCA_LANGUAGE_PL</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_PL;
</programlisting>
<para>Polish language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-RU:CAPS" role="variable">
<title>ENCA_LANGUAGE_RU</title>
<indexterm zone="ENCA-LANGUAGE-RU:CAPS"><primary>ENCA_LANGUAGE_RU</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_RU;
</programlisting>
<para>Russian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-SK:CAPS" role="variable">
<title>ENCA_LANGUAGE_SK</title>
<indexterm zone="ENCA-LANGUAGE-SK:CAPS"><primary>ENCA_LANGUAGE_SK</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_SK;
</programlisting>
<para>Slovak language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-SL:CAPS" role="variable">
<title>ENCA_LANGUAGE_SL</title>
<indexterm zone="ENCA-LANGUAGE-SL:CAPS"><primary>ENCA_LANGUAGE_SL</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_SL;
</programlisting>
<para>Slovene language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-UK:CAPS" role="variable">
<title>ENCA_LANGUAGE_UK</title>
<indexterm zone="ENCA-LANGUAGE-UK:CAPS"><primary>ENCA_LANGUAGE_UK</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_UK;
</programlisting>
<para>Ukrainian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-ZH:CAPS" role="variable">
<title>ENCA_LANGUAGE_ZH</title>
<indexterm zone="ENCA-LANGUAGE-ZH:CAPS"><primary>ENCA_LANGUAGE_ZH</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_ZH;
</programlisting>
<para>Chinese language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
</refsect1>
</refentry>