|
rpm-build |
0f2925 |
(*
|
|
rpm-build |
0f2925 |
* UTF-8 - UTF-8 encoded Unicode string
|
|
rpm-build |
0f2925 |
* Copyright 2002, 2003 (C) Yamagata Yoriyuki.
|
|
rpm-build |
0f2925 |
*
|
|
rpm-build |
0f2925 |
* This library is free software; you can redistribute it and/or
|
|
rpm-build |
0f2925 |
* modify it under the terms of the GNU Lesser General Public
|
|
rpm-build |
0f2925 |
* License as published by the Free Software Foundation; either
|
|
rpm-build |
0f2925 |
* version 2.1 of the License, or (at your option) any later version,
|
|
rpm-build |
0f2925 |
* with the special exception on linking described in file LICENSE.
|
|
rpm-build |
0f2925 |
*
|
|
rpm-build |
0f2925 |
* This library is distributed in the hope that it will be useful,
|
|
rpm-build |
0f2925 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
rpm-build |
0f2925 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
rpm-build |
0f2925 |
* Lesser General Public License for more details.
|
|
rpm-build |
0f2925 |
*
|
|
rpm-build |
0f2925 |
* You should have received a copy of the GNU Lesser General Public
|
|
rpm-build |
0f2925 |
* License along with this library; if not, write to the Free Software
|
|
rpm-build |
0f2925 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
rpm-build |
0f2925 |
*)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
open UChar
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
type t = string
|
|
rpm-build |
0f2925 |
type index = int
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let look s i =
|
|
rpm-build |
0f2925 |
let n' =
|
|
rpm-build |
0f2925 |
let n = Char.code s.[i] in
|
|
rpm-build |
0f2925 |
if n < 0x80 then n else
|
|
rpm-build |
0f2925 |
if n <= 0xdf then
|
|
rpm-build |
0f2925 |
(n - 0xc0) lsl 6 lor (0x7f land (Char.code s.[i + 1]))
|
|
rpm-build |
0f2925 |
else if n <= 0xef then
|
|
rpm-build |
0f2925 |
let n' = n - 0xe0 in
|
|
rpm-build |
0f2925 |
let m0 = Char.code s.[i + 2] in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 1)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
n' lsl 6 lor (0x7f land m0)
|
|
rpm-build |
0f2925 |
else if n <= 0xf7 then
|
|
rpm-build |
0f2925 |
let n' = n - 0xf0 in
|
|
rpm-build |
0f2925 |
let m0 = Char.code s.[i + 3] in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 1)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 2)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
n' lsl 6 lor (0x7f land m0)
|
|
rpm-build |
0f2925 |
else if n <= 0xfb then
|
|
rpm-build |
0f2925 |
let n' = n - 0xf8 in
|
|
rpm-build |
0f2925 |
let m0 = Char.code s.[i + 4] in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 1)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 2)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 3)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
n' lsl 6 lor (0x7f land m0)
|
|
rpm-build |
0f2925 |
else if n <= 0xfd then
|
|
rpm-build |
0f2925 |
let n' = n - 0xfc in
|
|
rpm-build |
0f2925 |
let m0 = Char.code s.[i + 5] in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 1)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 2)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 3)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
let m = Char.code (String.unsafe_get s (i + 4)) in
|
|
rpm-build |
0f2925 |
let n' = n' lsl 6 lor (0x7f land m) in
|
|
rpm-build |
0f2925 |
n' lsl 6 lor (0x7f land m0)
|
|
rpm-build |
0f2925 |
else invalid_arg "UTF8.look"
|
|
rpm-build |
0f2925 |
in
|
|
rpm-build |
0f2925 |
Obj.magic n'
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let rec search_head s i =
|
|
rpm-build |
0f2925 |
if i >= String.length s then i else
|
|
rpm-build |
0f2925 |
let n = Char.code (String.unsafe_get s i) in
|
|
rpm-build |
0f2925 |
if n < 0x80 || n >= 0xc2 then i else
|
|
rpm-build |
0f2925 |
search_head s (i + 1)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let next s i =
|
|
rpm-build |
0f2925 |
let n = Char.code s.[i] in
|
|
rpm-build |
0f2925 |
if n < 0x80 then i + 1 else
|
|
rpm-build |
0f2925 |
if n < 0xc0 then search_head s (i + 1) else
|
|
rpm-build |
0f2925 |
if n <= 0xdf then i + 2
|
|
rpm-build |
0f2925 |
else if n <= 0xef then i + 3
|
|
rpm-build |
0f2925 |
else if n <= 0xf7 then i + 4
|
|
rpm-build |
0f2925 |
else if n <= 0xfb then i + 5
|
|
rpm-build |
0f2925 |
else if n <= 0xfd then i + 6
|
|
rpm-build |
0f2925 |
else invalid_arg "UTF8.next"
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let rec search_head_backward s i =
|
|
rpm-build |
0f2925 |
if i < 0 then -1 else
|
|
rpm-build |
0f2925 |
let n = Char.code s.[i] in
|
|
rpm-build |
0f2925 |
if n < 0x80 || n >= 0xc2 then i else
|
|
rpm-build |
0f2925 |
search_head_backward s (i - 1)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let prev s i = search_head_backward s (i - 1)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let move s i n =
|
|
rpm-build |
0f2925 |
if n >= 0 then
|
|
rpm-build |
0f2925 |
let rec loop i n = if n <= 0 then i else loop (next s i) (n - 1) in
|
|
rpm-build |
0f2925 |
loop i n
|
|
rpm-build |
0f2925 |
else
|
|
rpm-build |
0f2925 |
let rec loop i n = if n >= 0 then i else loop (prev s i) (n + 1) in
|
|
rpm-build |
0f2925 |
loop i n
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let rec nth_aux s i n =
|
|
rpm-build |
0f2925 |
if n = 0 then i else
|
|
rpm-build |
0f2925 |
nth_aux s (next s i) (n - 1)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let nth s n = nth_aux s 0 n
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let substring s i n =
|
|
rpm-build |
0f2925 |
let j = nth s i in
|
|
rpm-build |
0f2925 |
let j' = (nth_aux s j n) - 1 in
|
|
rpm-build |
0f2925 |
String.sub s j (j' - j + 1)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let last s = search_head_backward s (String.length s - 1)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let out_of_range s i = i < 0 || i >= String.length s
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let compare_index _ i j = i - j
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let get s n = look s (nth s n)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let add_uchar buf u =
|
|
rpm-build |
0f2925 |
let masq = 0b111111 in
|
|
rpm-build |
0f2925 |
let k = int_of_uchar u in
|
|
rpm-build |
0f2925 |
if k < 0 || k >= 0x4000000 then begin
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.chr (0xfc + (k lsr 30)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 24) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 18) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
|
|
rpm-build |
0f2925 |
end else if k <= 0x7f then
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr k)
|
|
rpm-build |
0f2925 |
else if k <= 0x7ff then begin
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0xc0 lor (k lsr 6)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)))
|
|
rpm-build |
0f2925 |
end else if k <= 0xffff then begin
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0xe0 lor (k lsr 12)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
|
|
rpm-build |
0f2925 |
end else if k <= 0x1fffff then begin
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0xf0 + (k lsr 18)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
|
|
rpm-build |
0f2925 |
end else begin
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0xf8 + (k lsr 24)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 18) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
|
|
rpm-build |
0f2925 |
Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
|
|
rpm-build |
0f2925 |
end
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let init len f =
|
|
rpm-build |
0f2925 |
let buf = Buffer.create len in
|
|
rpm-build |
0f2925 |
for c = 0 to len - 1 do add_uchar buf (f c) done;
|
|
rpm-build |
0f2925 |
Buffer.contents buf
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let rec length_aux s c i =
|
|
rpm-build |
0f2925 |
if i >= String.length s then c else
|
|
rpm-build |
0f2925 |
let n = Char.code (String.unsafe_get s i) in
|
|
rpm-build |
0f2925 |
let k =
|
|
rpm-build |
0f2925 |
if n < 0x80 then 1 else
|
|
rpm-build |
0f2925 |
if n < 0xc0 then invalid_arg "UTF8.length" else
|
|
rpm-build |
0f2925 |
if n < 0xe0 then 2 else
|
|
rpm-build |
0f2925 |
if n < 0xf0 then 3 else
|
|
rpm-build |
0f2925 |
if n < 0xf8 then 4 else
|
|
rpm-build |
0f2925 |
if n < 0xfc then 5 else
|
|
rpm-build |
0f2925 |
if n < 0xfe then 6 else
|
|
rpm-build |
0f2925 |
invalid_arg "UTF8.length" in
|
|
rpm-build |
0f2925 |
length_aux s (c + 1) (i + k)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let length s = length_aux s 0 0
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let rec iter_aux proc s i =
|
|
rpm-build |
0f2925 |
if i >= String.length s then () else
|
|
rpm-build |
0f2925 |
let u = look s i in
|
|
rpm-build |
0f2925 |
proc u;
|
|
rpm-build |
0f2925 |
iter_aux proc s (next s i)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let iter proc s = iter_aux proc s 0
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let compare s1 s2 = Pervasives.compare s1 s2
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
exception Malformed_code
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let validate s =
|
|
rpm-build |
0f2925 |
let rec trail c i a =
|
|
rpm-build |
0f2925 |
if c = 0 then a else
|
|
rpm-build |
0f2925 |
if i >= String.length s then raise Malformed_code else
|
|
rpm-build |
0f2925 |
let n = Char.code (String.unsafe_get s i) in
|
|
rpm-build |
0f2925 |
if n < 0x80 || n >= 0xc0 then raise Malformed_code else
|
|
rpm-build |
0f2925 |
trail (c - 1) (i + 1) (a lsl 6 lor (n - 0x80)) in
|
|
rpm-build |
0f2925 |
let rec main i =
|
|
rpm-build |
0f2925 |
if i >= String.length s then () else
|
|
rpm-build |
0f2925 |
let n = Char.code (String.unsafe_get s i) in
|
|
rpm-build |
0f2925 |
if n < 0x80 then main (i + 1) else
|
|
rpm-build |
0f2925 |
if n < 0xc2 then raise Malformed_code else
|
|
rpm-build |
0f2925 |
if n <= 0xdf then
|
|
rpm-build |
0f2925 |
if trail 1 (i + 1) (n - 0xc0) < 0x80 then raise Malformed_code else
|
|
rpm-build |
0f2925 |
main (i + 2)
|
|
rpm-build |
0f2925 |
else if n <= 0xef then
|
|
rpm-build |
0f2925 |
if trail 2 (i + 1) (n - 0xe0) < 0x800 then raise Malformed_code else
|
|
rpm-build |
0f2925 |
main (i + 3)
|
|
rpm-build |
0f2925 |
else if n <= 0xf7 then
|
|
rpm-build |
0f2925 |
if trail 3 (i + 1) (n - 0xf0) < 0x10000 then raise Malformed_code else
|
|
rpm-build |
0f2925 |
main (i + 4)
|
|
rpm-build |
0f2925 |
else if n <= 0xfb then
|
|
rpm-build |
0f2925 |
if trail 4 (i + 1) (n - 0xf8) < 0x200000 then raise Malformed_code else
|
|
rpm-build |
0f2925 |
main (i + 5)
|
|
rpm-build |
0f2925 |
else if n <= 0xfd then
|
|
rpm-build |
0f2925 |
let n = trail 5 (i + 1) (n - 0xfc) in
|
|
rpm-build |
0f2925 |
if n lsr 16 < 0x400 then raise Malformed_code else
|
|
rpm-build |
0f2925 |
main (i + 6)
|
|
rpm-build |
0f2925 |
else raise Malformed_code in
|
|
rpm-build |
0f2925 |
main 0
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
module Buf =
|
|
rpm-build |
0f2925 |
struct
|
|
rpm-build |
0f2925 |
include Buffer
|
|
rpm-build |
0f2925 |
type buf = t
|
|
rpm-build |
0f2925 |
let add_char = add_uchar
|
|
rpm-build |
0f2925 |
end
|