Blob Blame History Raw
% Modified from Evgenie Medvedev <medvedev at project7.ru> by A. Shipunov (plantago@herba.msu.ru)

\ProvidesPackage{xecyr}%
        [2008/03/08 v1.0 Definitions for Cyrillic Babel/XeLaTeX compatibility]

\newif\if@EXT
\DeclareOption{ext}{\@EXTtrue}
\DeclareOption{noext}{\@EXTfalse}
\newif\if@MIS
\DeclareOption{mis}{\@MIStrue}
\DeclareOption{nomis}{\@MISfalse}

\ExecuteOptions{ext,nomis}
\ProcessOptions

\if@MIS
	\RequirePackage{misccorr}
	\XeTeXinputencoding "KOI8-R"
\fi

\if@EXT
	\PassOptionsToPackage{cm-default}{fontspec}
	\RequirePackage{xltxtra}
\fi

\RequirePackage{xunicode}

%%% Cyrillic letter TeX definitions for XeTeX
% This definition set is complete for all Slavic Cyrillic 
% languages (Russian, Ukrainian, Belorussian,
% Rusyn, Serbian, Macedonian and Bulgarian alphabets).
%
% It has not been confirmed as complete, but might be, for those
% non-Slavic Cyrillic languages:
% Ossetian, Khalka, Buryat, Kalmyk, Kyrgyz, Tatar, Uzbek
% Azerbaijani, Kazakh, Abkhaz, Chukchi.

%%% Part I
%%% Cyrillic letters defined as a continuous stretch 
%%% in Unicode and most common 8-bit encodings which define them:

\DeclareUTFcharacter[\UTFencname]{x0410}{\CYRA}        % А
\DeclareUTFcharacter[\UTFencname]{x0430}{\cyra}        % а
\DeclareUTFcharacter[\UTFencname]{x0411}{\CYRB}        % Б
\DeclareUTFcharacter[\UTFencname]{x0431}{\cyrb}        % б
\DeclareUTFcharacter[\UTFencname]{x0412}{\CYRV}        % В 
\DeclareUTFcharacter[\UTFencname]{x0432}{\cyrv}        % в
\DeclareUTFcharacter[\UTFencname]{x0413}{\CYRG}        % Г
\DeclareUTFcharacter[\UTFencname]{x0433}{\cyrg}        % г
\DeclareUTFcharacter[\UTFencname]{x0414}{\CYRD}        % Д
\DeclareUTFcharacter[\UTFencname]{x0434}{\cyrd}        % д
\DeclareUTFcharacter[\UTFencname]{x0415}{\CYRE}        % Е 
\DeclareUTFcharacter[\UTFencname]{x0435}{\cyre}        % е
\DeclareUTFcharacter[\UTFencname]{x0416}{\CYRZH}       % Ж 
\DeclareUTFcharacter[\UTFencname]{x0436}{\cyrzh}       % ж
\DeclareUTFcharacter[\UTFencname]{x0417}{\CYRZ}        % З
\DeclareUTFcharacter[\UTFencname]{x0437}{\cyrz}        % з
\DeclareUTFcharacter[\UTFencname]{x0418}{\CYRI}        % И
\DeclareUTFcharacter[\UTFencname]{x0438}{\cyri}        % и
\DeclareUTFcharacter[\UTFencname]{x0419}{\CYRISHRT}    % Й
\DeclareUTFcharacter[\UTFencname]{x0439}{\cyrishrt}    % й
\DeclareUTFcharacter[\UTFencname]{x041A}{\CYRK}        % К
\DeclareUTFcharacter[\UTFencname]{x043A}{\cyrk}        % к
\DeclareUTFcharacter[\UTFencname]{x041B}{\CYRL}        % Л
\DeclareUTFcharacter[\UTFencname]{x043B}{\cyrl}        % л 
\DeclareUTFcharacter[\UTFencname]{x041C}{\CYRM}        % М
\DeclareUTFcharacter[\UTFencname]{x043C}{\cyrm}        % м
\DeclareUTFcharacter[\UTFencname]{x041D}{\CYRN}        % Н
\DeclareUTFcharacter[\UTFencname]{x043D}{\cyrn}        % н
\DeclareUTFcharacter[\UTFencname]{x041E}{\CYRO}        % О
\DeclareUTFcharacter[\UTFencname]{x043E}{\cyro}        % о
\DeclareUTFcharacter[\UTFencname]{x041F}{\CYRP}        % П
\DeclareUTFcharacter[\UTFencname]{x043F}{\cyrp}        % п
\DeclareUTFcharacter[\UTFencname]{x0420}{\CYRR}        % Р
\DeclareUTFcharacter[\UTFencname]{x0440}{\cyrr}        % р
\DeclareUTFcharacter[\UTFencname]{x0421}{\CYRS}        % С
\DeclareUTFcharacter[\UTFencname]{x0441}{\cyrs}        % с
\DeclareUTFcharacter[\UTFencname]{x0422}{\CYRT}        % Т
\DeclareUTFcharacter[\UTFencname]{x0442}{\cyrt}        % т
\DeclareUTFcharacter[\UTFencname]{x0423}{\CYRU}        % У
\DeclareUTFcharacter[\UTFencname]{x0443}{\cyru}        % у
\DeclareUTFcharacter[\UTFencname]{x0424}{\CYRF}        % Ф
\DeclareUTFcharacter[\UTFencname]{x0444}{\cyrf}        % ф
\DeclareUTFcharacter[\UTFencname]{x0425}{\CYRH}        % Х
\DeclareUTFcharacter[\UTFencname]{x0445}{\cyrh}        % х
\DeclareUTFcharacter[\UTFencname]{x0426}{\CYRC}        % Ц
\DeclareUTFcharacter[\UTFencname]{x0446}{\cyrc}        % ц
\DeclareUTFcharacter[\UTFencname]{x0427}{\CYRCH}       % Ч
\DeclareUTFcharacter[\UTFencname]{x0447}{\cyrch}       % ч
\DeclareUTFcharacter[\UTFencname]{x0428}{\CYRSH}       % Ш
\DeclareUTFcharacter[\UTFencname]{x0448}{\cyrsh}       % ш
\DeclareUTFcharacter[\UTFencname]{x0429}{\CYRSHCH}     % Щ
\DeclareUTFcharacter[\UTFencname]{x0449}{\cyrshch}     % щ
\DeclareUTFcharacter[\UTFencname]{x042A}{\CYRHRDSN}    % Ъ
\DeclareUTFcharacter[\UTFencname]{x044A}{\cyrhrdsn}    % ъ
\DeclareUTFcharacter[\UTFencname]{x042B}{\CYRERY}      % Ы
\DeclareUTFcharacter[\UTFencname]{x044B}{\cyrery}      % ы
\DeclareUTFcharacter[\UTFencname]{x042C}{\CYRSFTSN}    % Ь
\DeclareUTFcharacter[\UTFencname]{x044C}{\cyrsftsn}    % ь
\DeclareUTFcharacter[\UTFencname]{x042D}{\CYREREV}     % Э
\DeclareUTFcharacter[\UTFencname]{x044D}{\cyrerev}     % э
\DeclareUTFcharacter[\UTFencname]{x042E}{\CYRYU}       % Ю
\DeclareUTFcharacter[\UTFencname]{x044E}{\cyryu}       % ю
\DeclareUTFcharacter[\UTFencname]{x042F}{\CYRYA}       % Я
\DeclareUTFcharacter[\UTFencname]{x044F}{\cyrya}       % я

%%% Part II
%%% Cyrillic letters not defined as continuous stretches,
%%% but available in common 8-bit cyrillic encodings,
%%% like cp1251 and iso-8859-5, sorted by languages 
%%% they're used in for ease of checking:

% Common to many languages:
\DeclareUTFcharacter[\UTFencname]{x0401}{\CYRYO}       % Ё
\DeclareUTFcharacter[\UTFencname]{x0451}{\cyryo}       % ё
% Belorussian-Uzbek:
\DeclareUTFcharacter[\UTFencname]{x040E}{\CYRUSHRT}    % Ў
\DeclareUTFcharacter[\UTFencname]{x045E}{\cyrushrt}    % ў
% Ukrainian-Rusyn:
\DeclareUTFcharacter[\UTFencname]{x0490}{\CYRGUP}      % Ґ
\DeclareUTFcharacter[\UTFencname]{x0491}{\cyrgup}      % ґ
\DeclareUTFcharacter[\UTFencname]{x0407}{\CYRII}       % Ї
\DeclareUTFcharacter[\UTFencname]{x0457}{\cyrii}       % ї
\DeclareUTFcharacter[\UTFencname]{x0404}{\CYRIE}       % Є
\DeclareUTFcharacter[\UTFencname]{x0454}{\cyrie}       % є 
% Ukrainian-Belorussian-Rusyn-Kazakh:
\DeclareUTFcharacter[\UTFencname]{x0406}{\CYRYI}       % І
\DeclareUTFcharacter[\UTFencname]{x0456}{\cyryi}       % і
% Serbian-Macedonian-Azerbaijani:
\DeclareUTFcharacter[\UTFencname]{x0408}{\CYRJE}       % Ј
\DeclareUTFcharacter[\UTFencname]{x0458}{\cyrje}       % ј
% Serbian-Macedonian-Abkhaz:
\DeclareUTFcharacter[\UTFencname]{x040F}{\CYRDZHE}     % Џ
\DeclareUTFcharacter[\UTFencname]{x045F}{\cyrdzhe}     % џ
% Serbian-Macedonian:
\DeclareUTFcharacter[\UTFencname]{x0409}{\CYRLJE}      % Љ
\DeclareUTFcharacter[\UTFencname]{x0459}{\cyrlje}      % љ
\DeclareUTFcharacter[\UTFencname]{x040A}{\CYRNJE}      % Њ
\DeclareUTFcharacter[\UTFencname]{x045A}{\cyrnje}      % њ
% Serbian:
\DeclareUTFcharacter[\UTFencname]{x0402}{\CYRDJE}      % Ђ
\DeclareUTFcharacter[\UTFencname]{x0452}{\cyrdje}      % ђ
\DeclareUTFcharacter[\UTFencname]{x040B}{\CYRTSHE}     % Ћ
\DeclareUTFcharacter[\UTFencname]{x045B}{\cyrtshe}     % ћ
% Macedonian:
\DeclareUTFcharacter[\UTFencname]{x0405}{\CYRDZE}      % Ѕ
\DeclareUTFcharacter[\UTFencname]{x0455}{\cyrdze}      % ѕ
\DeclareUTFcharacter[\UTFencname]{x0403}{\CYRGJE}      % Ѓ
\DeclareUTFcharacter[\UTFencname]{x0453}{\cyrgje}      % ѓ
\DeclareUTFcharacter[\UTFencname]{x040C}{\CYRKJE}      % Ќ
\DeclareUTFcharacter[\UTFencname]{x045C}{\cyrkje}      % ќ

%%% Part III
%%% Non-Slavic Cyrillic letters, waiting for their experts
%%% to be confirmed as correct.
%%% From here on, we're out of the frequently used 8-bit land
%%% and into mostly Unicode territory.

% Azerbaijani-Abkhaz-Kalmyk-Kazakh-Tatar:
\DeclareUTFcharacter[\UTFencname]{x04D8}{\CYRSCHWA}    % Ә
\DeclareUTFcharacter[\UTFencname]{x04D9}{\cyrschwa}    % ә
% Azerbaijani-Kyrgyz-Buryat-Kazakh-Tatar:
\DeclareUTFcharacter[\UTFencname]{x04BA}{\CYRSHHA}     % Һ
\DeclareUTFcharacter[\UTFencname]{x04BB}{\cyrshha}     % һ
% Azerbaijani-Kyrgyz-Buryat-Kalmyk-Kazakh-Tatar:
\DeclareUTFcharacter[\UTFencname]{x04E8}{\CYROTLD}     % Ө
\DeclareUTFcharacter[\UTFencname]{x04E9}{\cyrotld}     % ө
\DeclareUTFcharacter[\UTFencname]{x04AE}{\CYRY}        % Ү
\DeclareUTFcharacter[\UTFencname]{x04AF}{\cyry}        % ү
% Azerbaijani-Tajik-Kazakh-Uzbek:
\DeclareUTFcharacter[\UTFencname]{x0492}{\CYRGHCRS}    % Ғ 
\DeclareUTFcharacter[\UTFencname]{x0493}{\cyrghcrs}    % ғ
% Tajik-Abkhaz-Kazakh-Uzbek:
\DeclareUTFcharacter[\UTFencname]{x049A}{\CYRKDSC}     % Қ
\DeclareUTFcharacter[\UTFencname]{x049B}{\cyrkdsc}     % қ
% Tajik-Abkhaz-Uzbek:
\DeclareUTFcharacter[\UTFencname]{x04B2}{\CYRHDSC}     % Ҳ
\DeclareUTFcharacter[\UTFencname]{x04B3}{\cyrhdsc}     % ҳ
% Tajik-Abkhaz:
\DeclareUTFcharacter[\UTFencname]{x04B6}{\CYRCHRDSC}   % Ҷ
\DeclareUTFcharacter[\UTFencname]{x04B7}{\cyrchrdsc}   % ҷ
% Kalmyk-Kyrguz-Tatar:
\DeclareUTFcharacter[\UTFencname]{x04A2}{\CYRNDSC}     % Ң
\DeclareUTFcharacter[\UTFencname]{x04A3}{\cyrndsc}     % ң
% Kalmyk-Tatar:
\DeclareUTFcharacter[\UTFencname]{x0496}{\CYRZHDSC}    % Җ
\DeclareUTFcharacter[\UTFencname]{x0497}{\cyrzhdsc}    % җ
% Ossetian: 
\DeclareUTFcharacter[\UTFencname]{x04D4}{\CYRAE}       % Ӕ
\DeclareUTFcharacter[\UTFencname]{x04D5}{\cyrae}       % ӕ
% Azerbaijani:
\DeclareUTFcharacter[\UTFencname]{x04B8}{\CYRCHVCRS}   % Ҹ
\DeclareUTFcharacter[\UTFencname]{x04B9}{\cyrchvcrs}   % ҹ 
\DeclareUTFcharacter[\UTFencname]{x049C}{\CYRKVCRS}    % Ҝ
\DeclareUTFcharacter[\UTFencname]{x049D}{\cyrkvcrs}    % ҝ
% Kazakh:
\DeclareUTFcharacter[\UTFencname]{x04B0}{\CYRYHCRS}    % Ұ
\DeclareUTFcharacter[\UTFencname]{x04B1}{\cyryhcrs}    % ұ
% Abkhaz:
\DeclareUTFcharacter[\UTFencname]{x0494}{\CYRGHK}      % Ҕ
\DeclareUTFcharacter[\UTFencname]{x0495}{\cyrghk}      % ҕ
\DeclareUTFcharacter[\UTFencname]{x04E0}{\CYRABHDZE}   % Ӡ
\DeclareUTFcharacter[\UTFencname]{x04E1}{\cyrabhdze}   % ӡ
\DeclareUTFcharacter[\UTFencname]{x049E}{\CYRKHCRS}    % Ҟ
\DeclareUTFcharacter[\UTFencname]{x049F}{\cyrkhcrs}    % ҟ  
\DeclareUTFcharacter[\UTFencname]{x04A6}{\CYRPHK}      % Ҧ
\DeclareUTFcharacter[\UTFencname]{x04A7}{\cyrphk}      % ҧ
\DeclareUTFcharacter[\UTFencname]{x04AC}{\CYRTDSC}     % Ҭ
\DeclareUTFcharacter[\UTFencname]{x04AD}{\cyrtdsc}     % ҭ
\DeclareUTFcharacter[\UTFencname]{x04B4}{\CYRTETSE}    % Ҵ
\DeclareUTFcharacter[\UTFencname]{x04B5}{\cyrtetse}    % ҵ  
\DeclareUTFcharacter[\UTFencname]{x04BC}{\CYRABHCH}    % Ҽ
\DeclareUTFcharacter[\UTFencname]{x04BD}{\cyrabhch}    % ҽ 
\DeclareUTFcharacter[\UTFencname]{x04BE}{\CYRABHCHDSC} % Ҿ
\DeclareUTFcharacter[\UTFencname]{x04BF}{\cyrabhchdsc} % ҿ 
\DeclareUTFcharacter[\UTFencname]{x04A8}{\CYRABHHA}    % Ҩ
\DeclareUTFcharacter[\UTFencname]{x04A9}{\cyrabhha}    % ҩ
% Chukchi: 
\DeclareUTFcharacter[\UTFencname]{x04C3}{\CYRKHK}      % Ӄ
\DeclareUTFcharacter[\UTFencname]{x04C4}{\cyrkhk}      % ӄ
\DeclareUTFcharacter[\UTFencname]{x04C7}{\CYRNHK}      % Ӈ
\DeclareUTFcharacter[\UTFencname]{x04C8}{\cyrnhk}      % ӈ

%% P.S. Tajik and Chuvash, while mentioned, 
%% have letters which are apparently missing and are NOT complete.

%%% Part IV  
%%% Historical letters, needed to support old orthographies.
\DeclareUTFcharacter[\UTFencname]{x0462}{\CYRYAT}      % Ѣ
\DeclareUTFcharacter[\UTFencname]{x0463}{\cyryat}      % ѣ
\DeclareUTFcharacter[\UTFencname]{x046A}{\CYRBYUS}     % Ѫ
\DeclareUTFcharacter[\UTFencname]{x046B}{\cyrbyus}     % ѫ
\DeclareUTFcharacter[\UTFencname]{x0474}{\CYRIZH}      % Ѵ
\DeclareUTFcharacter[\UTFencname]{x0475}{\cyrizh}      % ѵ
\DeclareUTFcharacter[\UTFencname]{x0472}{\CYRFITA}     % Ѳ
\DeclareUTFcharacter[\UTFencname]{x0473}{\cyrfita}     % ѳ

%%% Part V
%%% UNRECOGNISED NON-SLAVIC CYRILLIC LETTERS
%%% Belong to languages not mentioned herein.

\DeclareUTFcharacter[\UTFencname]{x04C0}{\CYRpalochka} % Ӏ
\DeclareUTFcharacter[\UTFencname]{x0498}{\CYRZDSC}     % Ҙ
\DeclareUTFcharacter[\UTFencname]{x0499}{\cyrzdsc}     % ҙ
\DeclareUTFcharacter[\UTFencname]{x04A0}{\CYRKBEAK}    % Ҡ
\DeclareUTFcharacter[\UTFencname]{x04A1}{\cyrkbeak}    % ҡ
\DeclareUTFcharacter[\UTFencname]{x04A4}{\CYRNG}       % Ҥ
\DeclareUTFcharacter[\UTFencname]{x04A5}{\cyrng}       % ҥ
\DeclareUTFcharacter[\UTFencname]{x04AA}{\CYRSDSC}     % Ҫ
\DeclareUTFcharacter[\UTFencname]{x04AB}{\cyrsdsc}     % ҫ
\DeclareUTFcharacter[\UTFencname]{x04CB}{\CYRCHLDSC}   % Ӌ
\DeclareUTFcharacter[\UTFencname]{x04CC}{\cyrchldsc}   % ӌ

%% These are probably improperly named either in TeX or Unicode,
%% since their Unicode names and TeX names do not match.
%% TeX definitely treats them as the same symbol though.
\DeclareUTFcharacter[\UTFencname]{x04CD}{\CYRMDSC}     % Ӎ
\DeclareUTFcharacter[\UTFencname]{x04CE}{\cyrmdsc}     % ӎ
\DeclareUTFcharacter[\UTFencname]{x04C5}{\CYRLDSC}     % Ӆ 
\DeclareUTFcharacter[\UTFencname]{x04C6}{\cyrldsc}     % ӆ

%% These two letters are part of proposed Unicode for Kurdish
%% and their codepoints may or may not change upon inclusion.
\DeclareUTFcharacter[\UTFencname]{x051A}{\CYRQ}
\DeclareUTFcharacter[\UTFencname]{x051B}{\cyrq}
\DeclareUTFcharacter[\UTFencname]{x051C}{\CYRW}
\DeclareUTFcharacter[\UTFencname]{x051D}{\cyrw}

%%% Part VII
%%% PUNCTUATION AND UNCLEAR CASES
%%% Exotic (punctuation, letters...)
%%% (this list is probably incomplete)

% A dash - Defined in cyrillic.mtx to be emdash
% and should have been emdash all along.
\DeclareUTFcharacter[\UTFencname]{x2014}{\cyrdash} 

% Exotic angle brackets.
% Might still be unprintable for you.
\DeclareUTFcharacter[\UTFencname]{x27E8}{\cyrlangle}
\DeclareUTFcharacter[\UTFencname]{x27E9}{\cyrrangle}

% This particular association is not certain but I guess 
% we'll hear about it when someone trips over it.
\DeclareUTFcharacter[\UTFencname]{x0510}{\CYREPS}
\DeclareUTFcharacter[\UTFencname]{x0511}{\cyreps}

% Can't confirm those, but I'm positive the codepoints are correct:
\DeclareUTFcharacter[\UTFencname]{x04F6}{\CYRGDSC}
\DeclareUTFcharacter[\UTFencname]{x04F7}{\cyrgdsc}
\DeclareUTFcharacter[\UTFencname]{x04FC}{\CYRHHK}
\DeclareUTFcharacter[\UTFencname]{x04FD}{\cyrhhk}
\DeclareUTFcharacter[\UTFencname]{x0512}{\CYRLHK}
\DeclareUTFcharacter[\UTFencname]{x0513}{\cyrlhk}

% I could not assign these in a way that would make sense,
% since I couldn't find matches in cyrillic unicode table.

%\DeclareUTFcharacter[\UTFencname]{x????}{\CYRNLHK}
%\DeclareUTFcharacter[\UTFencname]{x????}{\cyrnlhk}
%\DeclareUTFcharacter[\UTFencname]{x????}{\CYRRDSC}
%\DeclareUTFcharacter[\UTFencname]{x????}{\cyrrdsc}

% Oddities:
% I don't see a letter like that in Unicode cyrillic table.
% I've no idea what is it doing in a cyrillic encoding either.
%\DeclareUTFcharacter[\UTFencname]{x????}{\CYRDELTA}
%\DeclareUTFcharacter[\UTFencname]{x????}{\cyrdelta}

% Quotes
% Ligatures << and >> are working only with the last versions of "tex-text.map" 
% and "tex-text.tec" (http://scripts.sil.org/svn-view/xetex/TRUNK) 
\DeclareUTFcharacter[\UTFencname]{x00AB}{\flqq} 
\DeclareUTFcharacter[\UTFencname]{x00BB}{\frqq} 
\DeclareUTFcharacter[\UTFencname]{x201E}{\glqq} 
\DeclareUTFcharacter[\UTFencname]{x201C}{\grqq}

% To make it (and "--- -like shortcuts) work, "xecyr" must be loaded _after_ babel.
\def\cyrdash{\hbox to.8em{--\hss--}}

\endinput