Blob Blame History Raw
<?xml version="1.0"?>
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" 
               "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">
<refentry id="libenca-Internal-Functions">
<refmeta>
<refentrytitle role="top_of_page" id="libenca-Internal-Functions.top_of_page">internal</refentrytitle>
<manvolnum>3</manvolnum>
<refmiscinfo>LIBENCA Library</refmiscinfo>
</refmeta>
<refnamediv>
<refname>internal</refname>
<refpurpose><para>
internal functions
</para></refpurpose>
</refnamediv>

<refsect1 id="libenca-Internal-Functions.functions" role="functions_proto">
<title role="functions_proto.title">Functions</title>
<informaltable pgwide="1" frame="none">
<tgroup cols="2">
<colspec colname="functions_return" colwidth="150px"/>
<colspec colname="functions_name"/>
<tbody>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><phrase role="c_punctuation">(</phrase><link linkend="EncaHookFunc">*EncaHookFunc</link><phrase role="c_punctuation">)</phrase>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><phrase role="c_punctuation">(</phrase><link linkend="EncaGuessFunc">*EncaGuessFunc</link><phrase role="c_punctuation">)</phrase>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="ELEMENTS:CAPS">ELEMENTS</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="MAKE-HOOK-LINE:CAPS">MAKE_HOOK_LINE</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="LF:CAPS">LF</link></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="CR:CAPS">CR</link></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>&#160;*
</entry><entry role="function_name"><link linkend="enca-malloc">enca_malloc</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>&#160;*
</entry><entry role="function_name"><link linkend="enca-realloc">enca_realloc</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="enca-free">enca_free</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="NEW:CAPS">NEW</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="RENEW:CAPS">RENEW</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
</entry><entry role="function_name"><link linkend="enca-strdup">enca_strdup</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type">const <link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
</entry><entry role="function_name"><link linkend="enca-strstr">enca_strstr</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
</entry><entry role="function_name"><link linkend="enca-stpcpy">enca_stpcpy</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
</entry><entry role="function_name"><link linkend="enca-strconcat">enca_strconcat</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
</entry><entry role="function_name"><link linkend="enca-strappend">enca_strappend</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="enca-csname">enca_csname</link><phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-name-to-charset">enca_name_to_charset</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="EncaSurface"><returnvalue>EncaSurface</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-name-to-surface">enca_name_to_surface</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-language-init">enca_language_init</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-language-destroy">enca_language_destroy</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="double"><returnvalue>double</returnvalue></link>&#160;*
</entry><entry role="function_name"><link linkend="enca-get-charset-similarity-matrix">enca_get_charset_similarity_matrix</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-charsets-subset-identical">enca_charsets_subset_identical</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="size-t"><returnvalue>size_t</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-filter-boxdraw">enca_filter_boxdraw</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-language-hook-ncs">enca_language_hook_ncs</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-language-hook-eol">enca_language_hook_eol</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-guess-init">enca_guess_init</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-guess-destroy">enca_guess_destroy</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="EncaSurface"><returnvalue>EncaSurface</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-eol-surface">enca_eol_surface</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-find-max-sec">enca_find_max_sec</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-double-utf8-init">enca_double_utf8_init</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-double-utf8-destroy">enca_double_utf8_destroy</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-pair-init">enca_pair_init</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="void"><returnvalue>void</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-pair-destroy">enca_pair_destroy</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>
<row><entry role="function_type"><link linkend="int"><returnvalue>int</returnvalue></link>
</entry><entry role="function_name"><link linkend="enca-pair-analyse">enca_pair_analyse</link>&#160;<phrase role="c_punctuation">()</phrase></entry></row>

</tbody>
</tgroup>
</informaltable>
</refsect1>
<refsect1 id="libenca-Internal-Functions.other" role="other_proto">
<title role="other_proto.title">Types and Values</title>
<informaltable role="enum_members_table" pgwide="1" frame="none">
<tgroup cols="2">
<colspec colname="name" colwidth="150px"/>
<colspec colname="description"/>
<tbody>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaAnalyserOptions">EncaAnalyserOptions</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaAnalyserState">EncaAnalyserState</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaCharsetInfo">EncaCharsetInfo</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaLanguageInfo">EncaLanguageInfo</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaLanguageHookData1CS">EncaLanguageHookData1CS</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaLanguageHookDataEOL">EncaLanguageHookDataEOL</link></entry></row>
<row><entry role="datatype_keyword">struct</entry><entry role="function_name"><link linkend="EncaUTFCheckData">EncaUTFCheckData</link></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="EPSILON:CAPS">EPSILON</link></entry></row>
<row><entry role="define_keyword">#define</entry><entry role="function_name"><link linkend="FILL-NONLETTER:CAPS">FILL_NONLETTER</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-BE:CAPS">ENCA_LANGUAGE_BE</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-BG:CAPS">ENCA_LANGUAGE_BG</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-CS:CAPS">ENCA_LANGUAGE_CS</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-ET:CAPS">ENCA_LANGUAGE_ET</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-HR:CAPS">ENCA_LANGUAGE_HR</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-HU:CAPS">ENCA_LANGUAGE_HU</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-LT:CAPS">ENCA_LANGUAGE_LT</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-LV:CAPS">ENCA_LANGUAGE_LV</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-PL:CAPS">ENCA_LANGUAGE_PL</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-RU:CAPS">ENCA_LANGUAGE_RU</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-SK:CAPS">ENCA_LANGUAGE_SK</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-SL:CAPS">ENCA_LANGUAGE_SL</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-UK:CAPS">ENCA_LANGUAGE_UK</link></entry></row>
<row><entry role="variable_type">extern const EncaLanguageInfo </entry><entry role="function_name"><link linkend="ENCA-LANGUAGE-ZH:CAPS">ENCA_LANGUAGE_ZH</link></entry></row>

</tbody>
</tgroup>
</informaltable>
</refsect1>


<refsect1 id="libenca-Internal-Functions.description" role="desc">
<title role="desc.title">Description</title>
<para>
Do not use outside Enca library.
</para>

</refsect1>
<refsect1 id="libenca-Internal-Functions.functions_details" role="details">
<title role="details.title">Functions</title>
<refsect2 id="EncaHookFunc" role="function">
<title>EncaHookFunc&#160;()</title>
<indexterm zone="EncaHookFunc"><primary>EncaHookFunc</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
<phrase role="c_punctuation">(</phrase>*EncaHookFunc<phrase role="c_punctuation">)</phrase> (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Language hook function type.</para>
<para>Launches language specific hooks for a particular language.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser state whose charset ratings are to be modified.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero if charset ratigns have been actually modified, zero
otherwise.</para>
</refsect3></refsect2>
<refsect2 id="EncaGuessFunc" role="function">
<title>EncaGuessFunc&#160;()</title>
<indexterm zone="EncaGuessFunc"><primary>EncaGuessFunc</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
<phrase role="c_punctuation">(</phrase>*EncaGuessFunc<phrase role="c_punctuation">)</phrase> (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Special (multibyte) encoding check function type.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser state whose buffer should be checked.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero if analyser->result has been set, zero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="ELEMENTS:CAPS" role="macro">
<title>ELEMENTS()</title>
<indexterm zone="ELEMENTS:CAPS"><primary>ELEMENTS</primary></indexterm>
<programlisting language="C">#define ELEMENTS(array) (sizeof(array)/sizeof((array)[0]))
</programlisting>
<para>Compute the number of elements of a static array.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>array</para></entry>
<entry role="parameter_description"><para>An array whose size is to be computed.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> the number of elements.</para>
</refsect3></refsect2>
<refsect2 id="MAKE-HOOK-LINE:CAPS" role="macro">
<title>MAKE_HOOK_LINE()</title>
<indexterm zone="MAKE-HOOK-LINE:CAPS"><primary>MAKE_HOOK_LINE</primary></indexterm>
<programlisting language="C">#define             MAKE_HOOK_LINE(name)</programlisting>
<para>Ugly code `beautifier' macro for language hooks.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>name</para></entry>
<entry role="parameter_description"><para>A charset name in C-style identifier suitable form.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="LF:CAPS" role="macro">
<title>LF</title>
<indexterm zone="LF:CAPS"><primary>LF</primary></indexterm>
<programlisting language="C">#define LF ((unsigned char)'\n')
</programlisting>
<para>Line feed character (End-of-line on Unix).</para>
</refsect2>
<refsect2 id="CR:CAPS" role="macro">
<title>CR</title>
<indexterm zone="CR:CAPS"><primary>CR</primary></indexterm>
<programlisting language="C">#define CR ((unsigned char)'\r')
</programlisting>
<para>Carriage return character (End-of-line on Macintosh).</para>
</refsect2>
<refsect2 id="enca-malloc" role="function">
<title>enca_malloc&#160;()</title>
<indexterm zone="enca-malloc"><primary>enca_malloc</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>&#160;*
enca_malloc (<parameter><link linkend="size-t"><type>size_t</type></link> size</parameter>);</programlisting>
<para>Allocates memory, always successfully (when fails, aborts program).</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>size</para></entry>
<entry role="parameter_description"><para>The number of bytes to allocate.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the newly allocated memory.</para>
</refsect3></refsect2>
<refsect2 id="enca-realloc" role="function">
<title>enca_realloc&#160;()</title>
<indexterm zone="enca-realloc"><primary>enca_realloc</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>&#160;*
enca_realloc (<parameter><link linkend="void"><type>void</type></link> *ptr</parameter>,
              <parameter><link linkend="size-t"><type>size_t</type></link> size</parameter>);</programlisting>
<para>Reallocates memory, always successfully (when fails, aborts program).</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>ptr</para></entry>
<entry role="parameter_description"><para>Pointer to block of previously allocated memory.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>size</para></entry>
<entry role="parameter_description"><para>The number of bytes to resize the block.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the newly allocated memory, <link linkend="NULL:CAPS"><type>NULL</type></link> when <parameter>size</parameter>
is zero.</para>
</refsect3></refsect2>
<refsect2 id="enca-free" role="macro">
<title>enca_free()</title>
<indexterm zone="enca-free"><primary>enca_free</primary></indexterm>
<programlisting language="C">#define             enca_free(ptr)</programlisting>
<para>Frees memory pointed by <parameter>ptr</parameter>
 with <link linkend="free"><function>free()</function></link> hack and assigns it a safe value,
thus may be called more than once.</para>
<para><parameter>ptr</parameter>
 MUST be l-value.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>ptr</para></entry>
<entry role="parameter_description"><para>Pointer to memory to free.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="NEW:CAPS" role="macro">
<title>NEW()</title>
<indexterm zone="NEW:CAPS"><primary>NEW</primary></indexterm>
<programlisting language="C">#define NEW(type,n) ((type*)enca_malloc((n)*sizeof(type)))
</programlisting>
<para>An <link linkend="enca-malloc"><function>enca_malloc()</function></link> wrapper.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>type</para></entry>
<entry role="parameter_description"><para>Data type to allocate.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>n</para></entry>
<entry role="parameter_description"><para>Number of elements to allocate.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the newly allocated memory.</para>
</refsect3></refsect2>
<refsect2 id="RENEW:CAPS" role="macro">
<title>RENEW()</title>
<indexterm zone="RENEW:CAPS"><primary>RENEW</primary></indexterm>
<programlisting language="C">#define RENEW(ptr,type,n) ((type*)enca_realloc((ptr),(n)*sizeof(type)))
</programlisting>
<para>An <link linkend="enca-realloc"><function>enca_realloc()</function></link> wrapper.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>ptr</para></entry>
<entry role="parameter_description"><para>Pointer to already allocate memory or <link linkend="NULL:CAPS"><type>NULL</type></link>.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>type</para></entry>
<entry role="parameter_description"><para>Data type to allocate.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>n</para></entry>
<entry role="parameter_description"><para>Number of elements to resize the memory to.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the reallocated memory (or pointer safe to call <link linkend="free"><function>free()</function></link>
on when <parameter>n</parameter>
is zero).</para>
</refsect3></refsect2>
<refsect2 id="enca-strdup" role="function">
<title>enca_strdup&#160;()</title>
<indexterm zone="enca-strdup"><primary>enca_strdup</primary></indexterm>
<programlisting language="C"><link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
enca_strdup (<parameter>const <link linkend="char"><type>char</type></link> *s</parameter>);</programlisting>
<para>Duplicates string.</para>
<para>Will be defined as <link linkend="strdup"><function>strdup()</function></link> when system provides it.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>s</para></entry>
<entry role="parameter_description"><para>A string.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The newly allocated string copy.</para>
</refsect3></refsect2>
<refsect2 id="enca-strstr" role="function">
<title>enca_strstr&#160;()</title>
<indexterm zone="enca-strstr"><primary>enca_strstr</primary></indexterm>
<programlisting language="C">const <link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
enca_strstr (<parameter>const <link linkend="char"><type>char</type></link> *haystack</parameter>,
             <parameter>const <link linkend="char"><type>char</type></link> *needle</parameter>);</programlisting>
<para>Finds occurence of a substring in a string.</para>
<para>Will be defined as <link linkend="strstr"><function>strstr()</function></link> when system provides it.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>haystack</para></entry>
<entry role="parameter_description"><para>A string where to search.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>needle</para></entry>
<entry role="parameter_description"><para>A string to find.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the first occurence of <parameter>needle</parameter>
in <parameter>haystack</parameter>
; <link linkend="NULL:CAPS"><type>NULL</type></link> if
not found.</para>
</refsect3></refsect2>
<refsect2 id="enca-stpcpy" role="function">
<title>enca_stpcpy&#160;()</title>
<indexterm zone="enca-stpcpy"><primary>enca_stpcpy</primary></indexterm>
<programlisting language="C"><link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
enca_stpcpy (<parameter><link linkend="char"><type>char</type></link> *dest</parameter>,
             <parameter>const <link linkend="char"><type>char</type></link> *src</parameter>);</programlisting>
<para>Appends a string to the end of another strings, returning pointer to
the terminating zero byte.</para>
<para>Will be defined as <link linkend="stpcpy"><function>stpcpy()</function></link> when system provides it.</para>
<para>Caller is responisble for providing <parameter>dest</parameter>
 long enough to hold the result.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>dest</para></entry>
<entry role="parameter_description"><para>A string.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>src</para></entry>
<entry role="parameter_description"><para>A string to append.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Pointer to the terminating zero byte of resulting string.</para>
</refsect3></refsect2>
<refsect2 id="enca-strconcat" role="function">
<title>enca_strconcat&#160;()</title>
<indexterm zone="enca-strconcat"><primary>enca_strconcat</primary></indexterm>
<programlisting language="C"><link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
enca_strconcat (<parameter>const <link linkend="char"><type>char</type></link> *str</parameter>,
                <parameter>...</parameter>);</programlisting>
<para>Concatenates arbitrary (but at least one) number of strings.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>str</para></entry>
<entry role="parameter_description"><para>A string.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>...</para></entry>
<entry role="parameter_description"><para>A <link linkend="NULL-terminated"><type>NULL-terminated</type></link> list of string to append.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> All the strings concatenated together.</para>
</refsect3></refsect2>
<refsect2 id="enca-strappend" role="function">
<title>enca_strappend&#160;()</title>
<indexterm zone="enca-strappend"><primary>enca_strappend</primary></indexterm>
<programlisting language="C"><link linkend="char"><returnvalue>char</returnvalue></link>&#160;*
enca_strappend (<parameter><link linkend="char"><type>char</type></link> *str</parameter>,
                <parameter>...</parameter>);</programlisting>
<para>Appends arbitrary number of strings to a string.</para>
<para>The string <parameter>str</parameter>
 is destroyed (reallocated), the others are kept.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>str</para></entry>
<entry role="parameter_description"><para>A string.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>...</para></entry>
<entry role="parameter_description"><para>A <link linkend="NULL-terminated"><type>NULL-terminated</type></link> list of string to append.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> All the strings concatenated together.</para>
</refsect3></refsect2>
<refsect2 id="enca-csname" role="macro">
<title>enca_csname()</title>
<indexterm zone="enca-csname"><primary>enca_csname</primary></indexterm>
<programlisting language="C">#define enca_csname(cs) enca_charset_name((cs), ENCA_NAME_STYLE_ENCA)
</programlisting>
<para>A shorthand for printing names with <link linkend="ENCA-NAME-STYLE-ENCA:CAPS"><type>ENCA_NAME_STYLE_ENCA</type></link>.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>cs</para></entry>
<entry role="parameter_description"><para>A charset id.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-name-to-charset" role="function">
<title>enca_name_to_charset&#160;()</title>
<indexterm zone="enca-name-to-charset"><primary>enca_name_to_charset</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_name_to_charset (<parameter>const <link linkend="char"><type>char</type></link> *csname</parameter>);</programlisting>
<para>Transforms charset name to numeric charset id.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>csname</para></entry>
<entry role="parameter_description"><para>The charset name.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The charset id; <link linkend="ENCA-CS-UNKNOWN:CAPS"><type>ENCA_CS_UNKNOWN</type></link> when the name is not recognized.</para>
</refsect3></refsect2>
<refsect2 id="enca-name-to-surface" role="function">
<title>enca_name_to_surface&#160;()</title>
<indexterm zone="enca-name-to-surface"><primary>enca_name_to_surface</primary></indexterm>
<programlisting language="C"><link linkend="EncaSurface"><returnvalue>EncaSurface</returnvalue></link>
enca_name_to_surface (<parameter>const <link linkend="char"><type>char</type></link> *sname</parameter>);</programlisting>
<para>Transforms surface name to numeric surface id.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>sname</para></entry>
<entry role="parameter_description"><para>The surface name.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The surface id; <link linkend="ENCA-SURFACE-UNKNOWN:CAPS"><literal>ENCA_SURFACE_UNKNOWN</literal></link> when the name is not
recognized.</para>
</refsect3></refsect2>
<refsect2 id="enca-language-init" role="function">
<title>enca_language_init&#160;()</title>
<indexterm zone="enca-language-init"><primary>enca_language_init</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_language_init (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>,
                    <parameter>const <link linkend="char"><type>char</type></link> *langname</parameter>);</programlisting>
<para>Initializes analyser for language <parameter>langname</parameter>
.</para>
<para>Assumes <parameter>analyser</parameter>
 is unitinialized, calling with an initialized <parameter>analyser</parameter>

leads to memory leak.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state to be initialized for this language.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>langname</para></entry>
<entry role="parameter_description"><para>Two-letter ISO-639 language code.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero on success, zero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="enca-language-destroy" role="function">
<title>enca_language_destroy&#160;()</title>
<indexterm zone="enca-language-destroy"><primary>enca_language_destroy</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_language_destroy (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Destroys the language part of analyser state <parameter>analyser</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state whose language part should be destroyed.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-get-charset-similarity-matrix" role="function">
<title>enca_get_charset_similarity_matrix&#160;()</title>
<indexterm zone="enca-get-charset-similarity-matrix"><primary>enca_get_charset_similarity_matrix</primary></indexterm>
<programlisting language="C"><link linkend="double"><returnvalue>double</returnvalue></link>&#160;*
enca_get_charset_similarity_matrix (<parameter>const <link linkend="EncaLanguageInfo"><type>EncaLanguageInfo</type></link> *lang</parameter>);</programlisting>
<para>Computes character weight similarity matrix for language <parameter>lang</parameter>
.</para>
<para>sim[i,j] is normalized to sim[i,i] thus:</para>
<itemizedlist>
<listitem><para>a row i contains ,probabilities` different languages will look like the
i-th one</para></listitem>
<listitem>
<para>a column i contains ,probabilities` the i-th language will look like
the other languages.</para>
</listitem>
</itemizedlist>
<para>For all practical applications, the higher one of sim[i,j] and sim[j,i]
is important.</para>
<para>Note: this is not used anywhere, only by simtable.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>lang</para></entry>
<entry role="parameter_description"><para>A language.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The matrix, its size is determined by <parameter>lang->ncharsets</parameter>
; <link linkend="NULL:CAPS"><type>NULL</type></link>
for language with no charsets.</para>
</refsect3></refsect2>
<refsect2 id="enca-charsets-subset-identical" role="function">
<title>enca_charsets_subset_identical&#160;()</title>
<indexterm zone="enca-charsets-subset-identical"><primary>enca_charsets_subset_identical</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_charsets_subset_identical (<parameter><link linkend="int"><type>int</type></link> charset1</parameter>,
                                <parameter><link linkend="int"><type>int</type></link> charset2</parameter>,
                                <parameter>const <link linkend="size-t"><type>size_t</type></link> *counts</parameter>);</programlisting>
<para>Checks whether all characters with nonzero count have the same meaning
in both charsets.</para>
<para>In other words, it checks whether conversion of sample containing only
these characters from <parameter>charset1</parameter>
 to <parameter>charset2</parameter>
 would be identity.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>charset1</para></entry>
<entry role="parameter_description"><para>A charset.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>charset2</para></entry>
<entry role="parameter_description"><para>Another charset.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>counts</para></entry>
<entry role="parameter_description"><para>An array of size 0x100 containing character counts.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero if charsets are identical on the subset, zero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="enca-filter-boxdraw" role="function">
<title>enca_filter_boxdraw&#160;()</title>
<indexterm zone="enca-filter-boxdraw"><primary>enca_filter_boxdraw</primary></indexterm>
<programlisting language="C"><link linkend="size-t"><returnvalue>size_t</returnvalue></link>
enca_filter_boxdraw (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>,
                     <parameter>unsigned <link linkend="char"><type>char</type></link> fill_char</parameter>);</programlisting>
<para>Runs boxdrawing characters filter on <parameter>buffer</parameter>
 for each charset in <parameter>language</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser whose charsets should be considered for filtration.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>fill_char</para></entry>
<entry role="parameter_description"><para>Replacement character for filtered bytes.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Number of characters filtered out.</para>
</refsect3></refsect2>
<refsect2 id="enca-language-hook-ncs" role="function">
<title>enca_language_hook_ncs&#160;()</title>
<indexterm zone="enca-language-hook-ncs"><primary>enca_language_hook_ncs</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_language_hook_ncs (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>,
                        <parameter><link linkend="size-t"><type>size_t</type></link> ncs</parameter>,
                        <parameter><link linkend="EncaLanguageHookData1CS"><type>EncaLanguageHookData1CS</type></link> *hookdata</parameter>);</programlisting>
<para>Decide between two charsets differing only in a few characters.</para>
<para>If the two most probable charsets correspond to <parameter>hookdata</parameter>
 charsets,
give the characters they differ half the weight of all other characters
together, thus allowing to decide between the two very similar charsets.</para>
<para>It also recomputes <parameter>order</parameter>
 when something changes.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser whose charset ratings are to be modified.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>ncs</para></entry>
<entry role="parameter_description"><para>The number of charsets.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>hookdata</para></entry>
<entry role="parameter_description"><para>What characters of which charsets should be given the extra
weight.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero when <parameter>ratings</parameter>
were actually modified, nonzero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="enca-language-hook-eol" role="function">
<title>enca_language_hook_eol&#160;()</title>
<indexterm zone="enca-language-hook-eol"><primary>enca_language_hook_eol</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_language_hook_eol (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>,
                        <parameter><link linkend="size-t"><type>size_t</type></link> ncs</parameter>,
                        <parameter><link linkend="EncaLanguageHookDataEOL"><type>EncaLanguageHookDataEOL</type></link> *hookdata</parameter>);</programlisting>
<para>Decide between two charsets differing only in EOL type or other surface.</para>
<para>The (surface mask, charset) pairs are scanned in order. If a matching
surface is found, ratings of all other charsets in the list are zeroed.
So you can place a surface mask of all 1s at the end to match when nothing
else matches.</para>
<para>All the charsets have to have the same rating, or nothing happens.</para>
<para>It also recomputes <parameter>order</parameter>
 when something changes.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser whose charset ratings are to be modified.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>ncs</para></entry>
<entry role="parameter_description"><para>The number of charsets.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>hookdata</para></entry>
<entry role="parameter_description"><para>What characters of which charsets should be decided with based
on the EOL type.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero when <parameter>ratings</parameter>
were actually modified, nonzero otherwise.</para>
</refsect3></refsect2>
<refsect2 id="enca-guess-init" role="function">
<title>enca_guess_init&#160;()</title>
<indexterm zone="enca-guess-init"><primary>enca_guess_init</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_guess_init (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Allocates and initializes analyser state, sets options to defaults.</para>
<para>Assumes <parameter>analyser</parameter>
 is unitinialized, calling with an initialized <parameter>analyser</parameter>

leads to memory leak, but <parameter>analyser->lang</parameter>
 must be already initialized.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser to initialize.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-guess-destroy" role="function">
<title>enca_guess_destroy&#160;()</title>
<indexterm zone="enca-guess-destroy"><primary>enca_guess_destroy</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_guess_destroy (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Frees memory owned by analyser state.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyser to destroy.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-eol-surface" role="function">
<title>enca_eol_surface&#160;()</title>
<indexterm zone="enca-eol-surface"><primary>enca_eol_surface</primary></indexterm>
<programlisting language="C"><link linkend="EncaSurface"><returnvalue>EncaSurface</returnvalue></link>
enca_eol_surface (<parameter>const unsigned <link linkend="char"><type>char</type></link> *buffer</parameter>,
                  <parameter><link linkend="size-t"><type>size_t</type></link> size</parameter>,
                  <parameter>const <link linkend="size-t"><type>size_t</type></link> *counts</parameter>);</programlisting>
<para>Find EOL type of sample in <parameter>buffer</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>buffer</para></entry>
<entry role="parameter_description"><para>A buffer whose EOL type is to be detected.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>size</para></entry>
<entry role="parameter_description"><para>Size of <parameter>buffer</parameter>
.</para></entry>
<entry role="parameter_annotations"></entry></row>
<row><entry role="parameter_name"><para>counts</para></entry>
<entry role="parameter_description"><para>Character counts.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> The EOL surface flags.</para>
</refsect3></refsect2>
<refsect2 id="enca-find-max-sec" role="function">
<title>enca_find_max_sec&#160;()</title>
<indexterm zone="enca-find-max-sec"><primary>enca_find_max_sec</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_find_max_sec (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Updates <parameter>analyser->order</parameter>
 according to charset <parameter>ratings</parameter>
.</para>
<para>XXX: This should be stable sort.  The ordering is defined by
data/&lt;lang>/&lt;lang>.h header file which is in turn defined by odering in
the appropriate script (doit.sh).  Silly.</para>
<para>Must not be called with <parameter>analyser</parameter>
 with no regular charsets.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>An analyser.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-double-utf8-init" role="function">
<title>enca_double_utf8_init&#160;()</title>
<indexterm zone="enca-double-utf8-init"><primary>enca_double_utf8_init</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_double_utf8_init (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Initializes double-UTF-8 check.</para>
<para>In fact it initializes the fields to <link linkend="NULL:CAPS"><type>NULL</type></link>'s, they are actually initialized
only when needed.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state to be initialized.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-double-utf8-destroy" role="function">
<title>enca_double_utf8_destroy&#160;()</title>
<indexterm zone="enca-double-utf8-destroy"><primary>enca_double_utf8_destroy</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_double_utf8_destroy (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Destroys the double-UTF-8 check part of analyser state <parameter>analyser</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state whose double-UTF-8 check part should be destroyed.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-pair-init" role="function">
<title>enca_pair_init&#160;()</title>
<indexterm zone="enca-pair-init"><primary>enca_pair_init</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_pair_init (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Initializes pair statistics data.</para>
<para>In fact it just sets everything to <link linkend="NULL:CAPS"><type>NULL</type></link>, to be initialized when needed.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state to be initialized.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-pair-destroy" role="function">
<title>enca_pair_destroy&#160;()</title>
<indexterm zone="enca-pair-destroy"><primary>enca_pair_destroy</primary></indexterm>
<programlisting language="C"><link linkend="void"><returnvalue>void</returnvalue></link>
enca_pair_destroy (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Destroys the pair statistics part of analyser state <parameter>analyser</parameter>
.</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analyzer state whose pair statistics part should be destroyed.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3></refsect2>
<refsect2 id="enca-pair-analyse" role="function">
<title>enca_pair_analyse&#160;()</title>
<indexterm zone="enca-pair-analyse"><primary>enca_pair_analyse</primary></indexterm>
<programlisting language="C"><link linkend="int"><returnvalue>int</returnvalue></link>
enca_pair_analyse (<parameter><link linkend="EncaAnalyserState"><type>EncaAnalyserState</type></link> *analyser</parameter>);</programlisting>
<para>Performs pair-frequency based analysis, provided that the language supports
it (does nothing otherwise).</para>
<refsect3 role="parameters">
<title>Parameters</title>
<informaltable role="parameters_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="parameters_name" colwidth="150px"/>
<colspec colname="parameters_description"/>
<colspec colname="parameters_annotations" colwidth="200px"/>
<tbody>
<row><entry role="parameter_name"><para>analyser</para></entry>
<entry role="parameter_description"><para>Analysed containing the sample for pair frequency analysis.</para></entry>
<entry role="parameter_annotations"></entry></row>
</tbody></tgroup></informaltable>
</refsect3><refsect3 role="returns">
<title>Returns</title>
<para> Nonzero when the character set was succesfully determined,
<parameter>analyser</parameter>
-><parameter>result</parameter>
.<parameter>charset</parameter>
is then directly modified.</para>
</refsect3></refsect2>

</refsect1>
<refsect1 id="libenca-Internal-Functions.other_details" role="details">
<title role="details.title">Types and Values</title>
<refsect2 id="EncaAnalyserOptions" role="struct">
<title>struct EncaAnalyserOptions</title>
<indexterm zone="EncaAnalyserOptions"><primary>EncaAnalyserOptions</primary></indexterm>
<programlisting language="C">struct EncaAnalyserOptions {
  int const_buffer;
  size_t min_chars;
  double threshold;
  int multibyte_enabled;
  int interpreted_surfaces;
  int ambiguous_mode;
  int filtering;
  int test_garbageness;
  int termination_strictness;
};
</programlisting>
<para>Analyser options, a part of analyser state.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaAnalyserOptions.const-buffer">const_buffer</structfield>;</para></entry>
<entry role="struct_member_description"><para>Treat buffer as const?  Otherwise its content can be,
and probably will be, modified.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaAnalyserOptions.min-chars">min_chars</structfield>;</para></entry>
<entry role="struct_member_description"><para>Minimal number significant characters.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="double"><type>double</type></link>&#160;<structfield id="EncaAnalyserOptions.threshold">threshold</structfield>;</para></entry>
<entry role="struct_member_description"><para>Minimal ratio between winner and the second.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaAnalyserOptions.multibyte-enabled">multibyte_enabled</structfield>;</para></entry>
<entry role="struct_member_description"><para>Check for multibyte encodings?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaAnalyserOptions.interpreted-surfaces">interpreted_surfaces</structfield>;</para></entry>
<entry role="struct_member_description"><para>Allow surfaces causing fundamental reinterpretation?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaAnalyserOptions.ambiguous-mode">ambiguous_mode</structfield>;</para></entry>
<entry role="struct_member_description"><para>Ambiguous mode?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaAnalyserOptions.filtering">filtering</structfield>;</para></entry>
<entry role="struct_member_description"><para>Allow binary and box-drawing filters?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaAnalyserOptions.test-garbageness">test_garbageness</structfield>;</para></entry>
<entry role="struct_member_description"><para>Do test garbageness?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaAnalyserOptions.termination-strictness">termination_strictness</structfield>;</para></entry>
<entry role="struct_member_description"><para>Disallow broken multibyte sequences at buffer end?</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaAnalyserState" role="struct">
<title>struct EncaAnalyserState</title>
<indexterm zone="EncaAnalyserState"><primary>EncaAnalyserState</primary></indexterm>
<programlisting language="C">struct EncaAnalyserState {
  /* Language data. */
  const EncaLanguageInfo *lang;
  size_t ncharsets;
  int *charsets;
  /* Analyser state. */
  EncaErrno gerrno;
  size_t size;
  unsigned char *buffer;
  EncaEncoding result;
  size_t *counts;
  size_t bin;
  size_t up;
  double *ratings;
  size_t *order;
  size_t size2;
  unsigned char *buffer2;
  /* Double-UTF-8 data. */
  EncaUTFCheckData *utfch;
  int *utfbuf;
  /* Pair frequency data */
  unsigned char *pair2bits;
  size_t *bitcounts;
  size_t *pairratings;
  /* LCUC data XXX: unused (yet) */
  size_t *lcbits;
  size_t *ucbits;
  /* Options. */
  EncaAnalyserOptions options;
};
</programlisting>
<para>The internal analyser state.</para>
<para>Passed as an opaque object (`this') to analyser calls.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para>const&#160;<link linkend="EncaLanguageInfo"><type>EncaLanguageInfo</type></link>&#160;*<structfield id="EncaAnalyserState.lang">lang</structfield>;</para></entry>
<entry role="struct_member_description"><para>Language informations.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaAnalyserState.ncharsets">ncharsets</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of 8bit charsets in this language.
(Equal to <parameter>lang->ncharsets</parameter>
.)</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;*<structfield id="EncaAnalyserState.charsets">charsets</structfield>;</para></entry>
<entry role="struct_member_description"><para>8bit charset id's [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaErrno"><type>EncaErrno</type></link>&#160;<structfield id="EncaAnalyserState.gerrno">gerrno</structfield>;</para></entry>
<entry role="struct_member_description"><para>Guessing gerrno.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaAnalyserState.size">size</structfield>;</para></entry>
<entry role="struct_member_description"><para>Size of buffer.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned&#160;<link linkend="char"><type>char</type></link>&#160;*<structfield id="EncaAnalyserState.buffer">buffer</structfield>;</para></entry>
<entry role="struct_member_description"><para>Buffer whose encoding is to be detected [<parameter>size</parameter>
].
(Owned by outer world.)</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaEncoding"><type>EncaEncoding</type></link>&#160;<structfield id="EncaAnalyserState.result">result</structfield>;</para></entry>
<entry role="struct_member_description"><para>Result returned to caller.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;*<structfield id="EncaAnalyserState.counts">counts</structfield>;</para></entry>
<entry role="struct_member_description"><para>Character counts [0x100].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaAnalyserState.bin">bin</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of `binary' characters.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaAnalyserState.up">up</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of 8bit characters.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="double"><type>double</type></link>&#160;*<structfield id="EncaAnalyserState.ratings">ratings</structfield>;</para></entry>
<entry role="struct_member_description"><para>8bit charset ratings [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;*<structfield id="EncaAnalyserState.order">order</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset indices (not id's) sorted by ratings in descending order
[ncharsets].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaAnalyserState.size2">size2</structfield>;</para></entry>
<entry role="struct_member_description"><para>Size of buffer2.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned&#160;<link linkend="char"><type>char</type></link>&#160;*<structfield id="EncaAnalyserState.buffer2">buffer2</structfield>;</para></entry>
<entry role="struct_member_description"><para>A temporary secondary buffer [<parameter>size2</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaUTFCheckData"><type>EncaUTFCheckData</type></link>&#160;*<structfield id="EncaAnalyserState.utfch">utfch</structfield>;</para></entry>
<entry role="struct_member_description"><para>Double-UTF-8 test data [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;*<structfield id="EncaAnalyserState.utfbuf">utfbuf</structfield>;</para></entry>
<entry role="struct_member_description"><para>Double-UTF-8 buffer for various UCS-2 character counting [0x10000].
(Magic: see <link linkend="mark-scratch-buffer"><function>mark_scratch_buffer()</function></link> for description.)</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned&#160;<link linkend="char"><type>char</type></link>&#160;*<structfield id="EncaAnalyserState.pair2bits">pair2bits</structfield>;</para></entry>
<entry role="struct_member_description"><para>Character pair map to charsets [0x100000] (indexed
0x100*first + second).  Each bit corresponds to one charset,
when set, the pair is `good' for the given charset.  The
type is char, so it breaks for <parameter>ncharsets</parameter>
&gt; 8, but it should
not be accessed from outer world, so it can be easily enlarged
to more bits.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;*<structfield id="EncaAnalyserState.bitcounts">bitcounts</structfield>;</para></entry>
<entry role="struct_member_description"><para>Counts for each possible bit combinations in <parameter>pair2bits</parameter>
[0x1 &lt;&lt; ncharsets].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;*<structfield id="EncaAnalyserState.pairratings">pairratings</structfield>;</para></entry>
<entry role="struct_member_description"><para>Counts of `good' pairs per charset [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;*<structfield id="EncaAnalyserState.lcbits">lcbits</structfield>;</para></entry>
<entry role="struct_member_description"><para>If a character is lowercase in some charset, correspinding bit
is set [0x100].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;*<structfield id="EncaAnalyserState.ucbits">ucbits</structfield>;</para></entry>
<entry role="struct_member_description"><para>If a character is uppercase in some charset, correspinding bit
is set [0x100].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaAnalyserOptions"><type>EncaAnalyserOptions</type></link>&#160;<structfield id="EncaAnalyserState.options">options</structfield>;</para></entry>
<entry role="struct_member_description"><para>Analyser options.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaCharsetInfo" role="struct">
<title>struct EncaCharsetInfo</title>
<indexterm zone="EncaCharsetInfo"><primary>EncaCharsetInfo</primary></indexterm>
<programlisting language="C">struct EncaCharsetInfo {
  int enca;
  int rfc1345;
  int cstocs;
  int iconv;
  int mime;
  const char *human;
  unsigned int flags;
  unsigned int nsurface;
};
</programlisting>
<para>General charset informnations.</para>
<para>All the <link linkend="int"><type>int</type></link> fields are indices in <link linkend="ALIAS-LIST:CAPS"><type>ALIAS_LIST</type></link>[].</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaCharsetInfo.enca">enca</structfield>;</para></entry>
<entry role="struct_member_description"><para>Default, implicit name in enca.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaCharsetInfo.rfc1345">rfc1345</structfield>;</para></entry>
<entry role="struct_member_description"><para>RFC1345 charset name.
(For charsets not in RFC1345, some canonical name is invented.)</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaCharsetInfo.cstocs">cstocs</structfield>;</para></entry>
<entry role="struct_member_description"><para>Cstocs charset name or -1.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaCharsetInfo.iconv">iconv</structfield>;</para></entry>
<entry role="struct_member_description"><para>Iconv charset name or -1.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaCharsetInfo.mime">mime</structfield>;</para></entry>
<entry role="struct_member_description"><para>Preferred MIME charset name or -1.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>const&#160;<link linkend="char"><type>char</type></link>&#160;*<structfield id="EncaCharsetInfo.human">human</structfield>;</para></entry>
<entry role="struct_member_description"><para>Human comprehensible description.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned&#160;<link linkend="int"><type>int</type></link>&#160;<structfield id="EncaCharsetInfo.flags">flags</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset properties (7bit, 8bit, multibyte, ...).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>unsigned&#160;<link linkend="int"><type>int</type></link>&#160;<structfield id="EncaCharsetInfo.nsurface">nsurface</structfield>;</para></entry>
<entry role="struct_member_description"><para>Natural surface (`implied' in recode).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaLanguageInfo" role="struct">
<title>struct EncaLanguageInfo</title>
<indexterm zone="EncaLanguageInfo"><primary>EncaLanguageInfo</primary></indexterm>
<programlisting language="C">struct EncaLanguageInfo {
  const char *name;
  const char *humanname;
  size_t ncharsets;
  const char *const *csnames;
  const unsigned short int *const *weights;
  const unsigned short int *significant;
  const unsigned char *const *letters;
  const unsigned char **const *pairs;
  long int weight_sum;
  EncaHookFunc hook;
  EncaHookFunc eolhook;
  EncaHookFunc lcuchook;
  EncaHookFunc ratinghook;
};
</programlisting>
<para>Language specific data.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para>const&#160;<link linkend="char"><type>char</type></link>&#160;*<structfield id="EncaLanguageInfo.name">name</structfield>;</para></entry>
<entry role="struct_member_description"><para>Language name, or more precisely, locale name.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>const&#160;<link linkend="char"><type>char</type></link>&#160;*<structfield id="EncaLanguageInfo.humanname">humanname</structfield>;</para></entry>
<entry role="struct_member_description"><para>Normal human-readable [English] language name.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaLanguageInfo.ncharsets">ncharsets</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of charsets in this language.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>const&#160;<link linkend="char"><type>char</type></link>&#160;*const&#160;&#160;*<structfield id="EncaLanguageInfo.csnames">csnames</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset names [<parameter>ncharsets</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para>long&#160;<link linkend="int"><type>int</type></link>&#160;<structfield id="EncaLanguageInfo.weight-sum">weight_sum</structfield>;</para></entry>
<entry role="struct_member_description"><para>Sum of all <parameter>weights</parameter>
(is the same for all charsets).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaHookFunc"><type>EncaHookFunc</type></link>&#160;<structfield id="EncaLanguageInfo.hook">hook</structfield>;</para></entry>
<entry role="struct_member_description"><para>Hook function (deciding hard cases).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaHookFunc"><type>EncaHookFunc</type></link>&#160;<structfield id="EncaLanguageInfo.eolhook">eolhook</structfield>;</para></entry>
<entry role="struct_member_description"><para>EOL hook function (deciding ambiguous cases based on EOL type).</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaHookFunc"><type>EncaHookFunc</type></link>&#160;<structfield id="EncaLanguageInfo.lcuchook">lcuchook</structfield>;</para></entry>
<entry role="struct_member_description"></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaHookFunc"><type>EncaHookFunc</type></link>&#160;<structfield id="EncaLanguageInfo.ratinghook">ratinghook</structfield>;</para></entry>
<entry role="struct_member_description"><para>Helper to calculate ratings for weightingless languages.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaLanguageHookData1CS" role="struct">
<title>struct EncaLanguageHookData1CS</title>
<indexterm zone="EncaLanguageHookData1CS"><primary>EncaLanguageHookData1CS</primary></indexterm>
<programlisting language="C">struct EncaLanguageHookData1CS {
  const char *name;
  size_t size;
  const unsigned char *list;
  size_t cs;
};
</programlisting>
<para>Cointainer for data needed by <link linkend="enca-language-hook-ncs"><function>enca_language_hook_ncs()</function></link>.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para>const&#160;<link linkend="char"><type>char</type></link>&#160;*<structfield id="EncaLanguageHookData1CS.name">name</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset name.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaLanguageHookData1CS.size">size</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of characters in <parameter>list</parameter>
.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaLanguageHookData1CS.cs">cs</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset number.  This is an index in <parameter>analyser</parameter>
arrays (like <parameter>charsets</parameter>
),
NOT a charset id.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaLanguageHookDataEOL" role="struct">
<title>struct EncaLanguageHookDataEOL</title>
<indexterm zone="EncaLanguageHookDataEOL"><primary>EncaLanguageHookDataEOL</primary></indexterm>
<programlisting language="C">struct EncaLanguageHookDataEOL {
  const char *name;
  EncaSurface eol;
  size_t cs;
};
</programlisting>
<para>Cointainer for data needed by <link linkend="enca-language-hook-eol"><function>enca_language_hook_eol()</function></link>.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para>const&#160;<link linkend="char"><type>char</type></link>&#160;*<structfield id="EncaLanguageHookDataEOL.name">name</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset name.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="EncaSurface"><type>EncaSurface</type></link>&#160;<structfield id="EncaLanguageHookDataEOL.eol">eol</structfield>;</para></entry>
<entry role="struct_member_description"><para>The corresponding <link linkend="EncaSurface"><type>EncaSurface</type></link> bit.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaLanguageHookDataEOL.cs">cs</structfield>;</para></entry>
<entry role="struct_member_description"><para>Charset number.  This is an index in <parameter>analyser</parameter>
arrays (like <parameter>charsets</parameter>
),
NOT a charset id.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EncaUTFCheckData" role="struct">
<title>struct EncaUTFCheckData</title>
<indexterm zone="EncaUTFCheckData"><primary>EncaUTFCheckData</primary></indexterm>
<programlisting language="C">struct EncaUTFCheckData {
  double rating;
  size_t size;
  int result;
  int *ucs2;
  int *weights;
};
</programlisting>
<para>Data needed by double-UTF-8 check, per language charset.</para>
<refsect3 role="struct_members">
<title>Members</title>
<informaltable role="struct_members_table" pgwide="1" frame="none">
<tgroup cols="3">
<colspec colname="struct_members_name" colwidth="300px"/>
<colspec colname="struct_members_description"/>
<colspec colname="struct_members_annotations" colwidth="200px"/>
<tbody>
<row><entry role="struct_member_name"><para><link linkend="double"><type>double</type></link>&#160;<structfield id="EncaUTFCheckData.rating">rating</structfield>;</para></entry>
<entry role="struct_member_description"><para>Total rating for this charset.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="size-t"><type>size_t</type></link>&#160;<structfield id="EncaUTFCheckData.size">size</structfield>;</para></entry>
<entry role="struct_member_description"><para>Number of UCS-2 characters.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;<structfield id="EncaUTFCheckData.result">result</structfield>;</para></entry>
<entry role="struct_member_description"><para>Nonzero when the sample is probably Doubly-UTF-8 encoded from
this charset.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;*<structfield id="EncaUTFCheckData.ucs2">ucs2</structfield>;</para></entry>
<entry role="struct_member_description"><para>List of significant UCS-2 characters, in order [<parameter>size</parameter>
].</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
<row><entry role="struct_member_name"><para><link linkend="int"><type>int</type></link>&#160;*<structfield id="EncaUTFCheckData.weights">weights</structfield>;</para></entry>
<entry role="struct_member_description"><para>Weights for double-UTF-8 check [<parameter>size</parameter>
].  Positive means normal
UTF-8, negative doubly-encoded.</para></entry>
<entry role="struct_member_annotations"></entry>
</row>
</tbody></tgroup></informaltable>
</refsect3>
</refsect2>
<refsect2 id="EPSILON:CAPS" role="macro">
<title>EPSILON</title>
<indexterm zone="EPSILON:CAPS"><primary>EPSILON</primary></indexterm>
<programlisting language="C">#define EPSILON 0.000001
</programlisting>
<para>`Zero' for float comparsion (and to prevent division by zero, etc.).</para>
</refsect2>
<refsect2 id="FILL-NONLETTER:CAPS" role="macro">
<title>FILL_NONLETTER</title>
<indexterm zone="FILL-NONLETTER:CAPS"><primary>FILL_NONLETTER</primary></indexterm>
<programlisting language="C">#define FILL_NONLETTER '.'
</programlisting>
<para>Replacement character for non-letters in pair frequencies.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-BE:CAPS" role="variable">
<title>ENCA_LANGUAGE_BE</title>
<indexterm zone="ENCA-LANGUAGE-BE:CAPS"><primary>ENCA_LANGUAGE_BE</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_BE;
</programlisting>
<para>Belarusian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-BG:CAPS" role="variable">
<title>ENCA_LANGUAGE_BG</title>
<indexterm zone="ENCA-LANGUAGE-BG:CAPS"><primary>ENCA_LANGUAGE_BG</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_BG;
</programlisting>
<para>Bulgarian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-CS:CAPS" role="variable">
<title>ENCA_LANGUAGE_CS</title>
<indexterm zone="ENCA-LANGUAGE-CS:CAPS"><primary>ENCA_LANGUAGE_CS</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_CS;
</programlisting>
<para>Czech language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-ET:CAPS" role="variable">
<title>ENCA_LANGUAGE_ET</title>
<indexterm zone="ENCA-LANGUAGE-ET:CAPS"><primary>ENCA_LANGUAGE_ET</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_ET;
</programlisting>
<para>Estonian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-HR:CAPS" role="variable">
<title>ENCA_LANGUAGE_HR</title>
<indexterm zone="ENCA-LANGUAGE-HR:CAPS"><primary>ENCA_LANGUAGE_HR</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_HR;
</programlisting>
<para>Croatian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-HU:CAPS" role="variable">
<title>ENCA_LANGUAGE_HU</title>
<indexterm zone="ENCA-LANGUAGE-HU:CAPS"><primary>ENCA_LANGUAGE_HU</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_HU;
</programlisting>
<para>Hungarian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-LT:CAPS" role="variable">
<title>ENCA_LANGUAGE_LT</title>
<indexterm zone="ENCA-LANGUAGE-LT:CAPS"><primary>ENCA_LANGUAGE_LT</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_LT;
</programlisting>
<para>Lithuanian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-LV:CAPS" role="variable">
<title>ENCA_LANGUAGE_LV</title>
<indexterm zone="ENCA-LANGUAGE-LV:CAPS"><primary>ENCA_LANGUAGE_LV</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_LV;
</programlisting>
<para>Latvian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-PL:CAPS" role="variable">
<title>ENCA_LANGUAGE_PL</title>
<indexterm zone="ENCA-LANGUAGE-PL:CAPS"><primary>ENCA_LANGUAGE_PL</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_PL;
</programlisting>
<para>Polish language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-RU:CAPS" role="variable">
<title>ENCA_LANGUAGE_RU</title>
<indexterm zone="ENCA-LANGUAGE-RU:CAPS"><primary>ENCA_LANGUAGE_RU</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_RU;
</programlisting>
<para>Russian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-SK:CAPS" role="variable">
<title>ENCA_LANGUAGE_SK</title>
<indexterm zone="ENCA-LANGUAGE-SK:CAPS"><primary>ENCA_LANGUAGE_SK</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_SK;
</programlisting>
<para>Slovak language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-SL:CAPS" role="variable">
<title>ENCA_LANGUAGE_SL</title>
<indexterm zone="ENCA-LANGUAGE-SL:CAPS"><primary>ENCA_LANGUAGE_SL</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_SL;
</programlisting>
<para>Slovene language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-UK:CAPS" role="variable">
<title>ENCA_LANGUAGE_UK</title>
<indexterm zone="ENCA-LANGUAGE-UK:CAPS"><primary>ENCA_LANGUAGE_UK</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_UK;
</programlisting>
<para>Ukrainian language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>
<refsect2 id="ENCA-LANGUAGE-ZH:CAPS" role="variable">
<title>ENCA_LANGUAGE_ZH</title>
<indexterm zone="ENCA-LANGUAGE-ZH:CAPS"><primary>ENCA_LANGUAGE_ZH</primary></indexterm>
<programlisting language="C">extern const EncaLanguageInfo ENCA_LANGUAGE_ZH;
</programlisting>
<para>Chinese language.</para>
<para>Everything the world out there needs to know about this language.</para>
</refsect2>

</refsect1>

</refentry>