|
rpm-build |
0f2925 |
(*
|
|
rpm-build |
0f2925 |
* Bitset - Efficient bit sets
|
|
rpm-build |
0f2925 |
* Copyright (C) 2003 Nicolas Cannasse
|
|
rpm-build |
0f2925 |
*
|
|
rpm-build |
0f2925 |
* This library is free software; you can redistribute it and/or
|
|
rpm-build |
0f2925 |
* modify it under the terms of the GNU Lesser General Public
|
|
rpm-build |
0f2925 |
* License as published by the Free Software Foundation; either
|
|
rpm-build |
0f2925 |
* version 2.1 of the License, or (at your option) any later version,
|
|
rpm-build |
0f2925 |
* with the special exception on linking described in file LICENSE.
|
|
rpm-build |
0f2925 |
*
|
|
rpm-build |
0f2925 |
* This library is distributed in the hope that it will be useful,
|
|
rpm-build |
0f2925 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
rpm-build |
0f2925 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
rpm-build |
0f2925 |
* Lesser General Public License for more details.
|
|
rpm-build |
0f2925 |
*
|
|
rpm-build |
0f2925 |
* You should have received a copy of the GNU Lesser General Public
|
|
rpm-build |
0f2925 |
* License along with this library; if not, write to the Free Software
|
|
rpm-build |
0f2925 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
rpm-build |
0f2925 |
*)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
open ExtBytes
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
type intern
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let bcreate : int -> intern = Obj.magic Bytes.create
|
|
rpm-build |
0f2925 |
external fast_get : intern -> int -> int = "%string_unsafe_get"
|
|
rpm-build |
0f2925 |
let fast_set : intern -> int -> int -> unit = Obj.magic Bytes.unsafe_set
|
|
rpm-build |
0f2925 |
external fast_bool : int -> bool = "%identity"
|
|
rpm-build |
0f2925 |
let fast_blit : intern -> int -> intern -> int -> int -> unit = Obj.magic Bytes.blit
|
|
rpm-build |
0f2925 |
let fast_fill : intern -> int -> int -> int -> unit = Obj.magic Bytes.fill
|
|
rpm-build |
0f2925 |
let fast_length : intern -> int= Obj.magic Bytes.length
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let bget s ndx =
|
|
rpm-build |
0f2925 |
assert (ndx >= 0 && ndx < fast_length s);
|
|
rpm-build |
0f2925 |
fast_get s ndx
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let bset s ndx v =
|
|
rpm-build |
0f2925 |
assert (ndx >= 0 && ndx < fast_length s);
|
|
rpm-build |
0f2925 |
fast_set s ndx v
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let bblit src srcoff dst dstoff len =
|
|
rpm-build |
0f2925 |
assert (srcoff >= 0 && dstoff >= 0 && len >= 0);
|
|
rpm-build |
0f2925 |
fast_blit src srcoff dst dstoff len
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let bfill dst start len c =
|
|
rpm-build |
0f2925 |
assert (start >= 0 && len >= 0);
|
|
rpm-build |
0f2925 |
fast_fill dst start len c
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
exception Negative_index of string
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
type t = {
|
|
rpm-build |
0f2925 |
mutable data : intern;
|
|
rpm-build |
0f2925 |
mutable len : int;
|
|
rpm-build |
0f2925 |
}
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let error fname = raise (Negative_index fname)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let empty() =
|
|
rpm-build |
0f2925 |
{
|
|
rpm-build |
0f2925 |
data = bcreate 0;
|
|
rpm-build |
0f2925 |
len = 0;
|
|
rpm-build |
0f2925 |
}
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let int_size = 7 (* value used to round up index *)
|
|
rpm-build |
0f2925 |
let log_int_size = 3 (* number of shifts *)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let create n =
|
|
rpm-build |
0f2925 |
if n < 0 then error "create";
|
|
rpm-build |
0f2925 |
let size = (n+int_size) lsr log_int_size in
|
|
rpm-build |
0f2925 |
let b = bcreate size in
|
|
rpm-build |
0f2925 |
bfill b 0 size 0;
|
|
rpm-build |
0f2925 |
{
|
|
rpm-build |
0f2925 |
data = b;
|
|
rpm-build |
0f2925 |
len = size;
|
|
rpm-build |
0f2925 |
}
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let copy t =
|
|
rpm-build |
0f2925 |
let b = bcreate t.len in
|
|
rpm-build |
0f2925 |
bblit t.data 0 b 0 t.len;
|
|
rpm-build |
0f2925 |
{
|
|
rpm-build |
0f2925 |
data = b;
|
|
rpm-build |
0f2925 |
len = t.len
|
|
rpm-build |
0f2925 |
}
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let clone = copy
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let set t x =
|
|
rpm-build |
0f2925 |
if x < 0 then error "set";
|
|
rpm-build |
0f2925 |
let pos = x lsr log_int_size and delta = x land int_size in
|
|
rpm-build |
0f2925 |
let size = t.len in
|
|
rpm-build |
0f2925 |
if pos >= size then begin
|
|
rpm-build |
0f2925 |
let b = bcreate (pos+1) in
|
|
rpm-build |
0f2925 |
bblit t.data 0 b 0 size;
|
|
rpm-build |
0f2925 |
bfill b size (pos - size + 1) 0;
|
|
rpm-build |
0f2925 |
t.len <- pos + 1;
|
|
rpm-build |
0f2925 |
t.data <- b;
|
|
rpm-build |
0f2925 |
end;
|
|
rpm-build |
0f2925 |
bset t.data pos ((bget t.data pos) lor (1 lsl delta))
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let unset t x =
|
|
rpm-build |
0f2925 |
if x < 0 then error "unset";
|
|
rpm-build |
0f2925 |
let pos = x lsr log_int_size and delta = x land int_size in
|
|
rpm-build |
0f2925 |
if pos < t.len then
|
|
rpm-build |
0f2925 |
bset t.data pos ((bget t.data pos) land (0xFF lxor (1 lsl delta)))
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let toggle t x =
|
|
rpm-build |
0f2925 |
if x < 0 then error "toggle";
|
|
rpm-build |
0f2925 |
let pos = x lsr log_int_size and delta = x land int_size in
|
|
rpm-build |
0f2925 |
let size = t.len in
|
|
rpm-build |
0f2925 |
if pos >= size then begin
|
|
rpm-build |
0f2925 |
let b = bcreate (pos+1) in
|
|
rpm-build |
0f2925 |
bblit t.data 0 b 0 size;
|
|
rpm-build |
0f2925 |
bfill b size (pos - size + 1) 0;
|
|
rpm-build |
0f2925 |
t.len <- pos + 1;
|
|
rpm-build |
0f2925 |
t.data <- b;
|
|
rpm-build |
0f2925 |
end;
|
|
rpm-build |
0f2925 |
bset t.data pos ((bget t.data pos) lxor (1 lsl delta))
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let put t = function
|
|
rpm-build |
0f2925 |
| true -> set t
|
|
rpm-build |
0f2925 |
| false -> unset t
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let is_set t x =
|
|
rpm-build |
0f2925 |
if x < 0 then error "is_set";
|
|
rpm-build |
0f2925 |
let pos = x lsr log_int_size and delta = x land int_size in
|
|
rpm-build |
0f2925 |
let size = t.len in
|
|
rpm-build |
0f2925 |
if pos < size then
|
|
rpm-build |
0f2925 |
fast_bool (((bget t.data pos) lsr delta) land 1)
|
|
rpm-build |
0f2925 |
else
|
|
rpm-build |
0f2925 |
false
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
exception Break_int of int
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
(* Find highest set element or raise Not_found *)
|
|
rpm-build |
0f2925 |
let find_msb t =
|
|
rpm-build |
0f2925 |
(* Find highest set bit in a byte. Does not work with zero. *)
|
|
rpm-build |
0f2925 |
let byte_msb b =
|
|
rpm-build |
0f2925 |
assert (b <> 0);
|
|
rpm-build |
0f2925 |
let rec loop n =
|
|
rpm-build |
0f2925 |
if b land (1 lsl n) = 0 then
|
|
rpm-build |
0f2925 |
loop (n-1)
|
|
rpm-build |
0f2925 |
else n in
|
|
rpm-build |
0f2925 |
loop 7 in
|
|
rpm-build |
0f2925 |
let n = t.len - 1
|
|
rpm-build |
0f2925 |
and buf = t.data in
|
|
rpm-build |
0f2925 |
try
|
|
rpm-build |
0f2925 |
for i = n downto 0 do
|
|
rpm-build |
0f2925 |
let byte = bget buf i in
|
|
rpm-build |
0f2925 |
if byte <> 0 then raise (Break_int ((i lsl log_int_size)+(byte_msb byte)))
|
|
rpm-build |
0f2925 |
done;
|
|
rpm-build |
0f2925 |
raise Not_found
|
|
rpm-build |
0f2925 |
with
|
|
rpm-build |
0f2925 |
Break_int n -> n
|
|
rpm-build |
0f2925 |
| _ -> raise Not_found
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let compare t1 t2 =
|
|
rpm-build |
0f2925 |
let some_msb b = try Some (find_msb b) with Not_found -> None in
|
|
rpm-build |
0f2925 |
match (some_msb t1, some_msb t2) with
|
|
rpm-build |
0f2925 |
(None, Some _) -> -1 (* 0-y -> -1 *)
|
|
rpm-build |
0f2925 |
| (Some _, None) -> 1 (* x-0 -> 1 *)
|
|
rpm-build |
0f2925 |
| (None, None) -> 0 (* 0-0 -> 0 *)
|
|
rpm-build |
0f2925 |
| (Some a, Some b) -> (* x-y *)
|
|
rpm-build |
0f2925 |
if a < b then -1
|
|
rpm-build |
0f2925 |
else if a > b then 1
|
|
rpm-build |
0f2925 |
else
|
|
rpm-build |
0f2925 |
begin
|
|
rpm-build |
0f2925 |
(* MSBs differ, we need to scan arrays until we find a
|
|
rpm-build |
0f2925 |
difference *)
|
|
rpm-build |
0f2925 |
let ndx = a lsr log_int_size in
|
|
rpm-build |
0f2925 |
assert (ndx < t1.len && ndx < t2.len);
|
|
rpm-build |
0f2925 |
try
|
|
rpm-build |
0f2925 |
for i = ndx downto 0 do
|
|
rpm-build |
0f2925 |
let b1 = bget t1.data i
|
|
rpm-build |
0f2925 |
and b2 = bget t2.data i in
|
|
rpm-build |
0f2925 |
if b1 <> b2 then raise (Break_int (compare b1 b2))
|
|
rpm-build |
0f2925 |
done;
|
|
rpm-build |
0f2925 |
0
|
|
rpm-build |
0f2925 |
with
|
|
rpm-build |
0f2925 |
Break_int res -> res
|
|
rpm-build |
0f2925 |
end
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let equals t1 t2 =
|
|
rpm-build |
0f2925 |
compare t1 t2 = 0
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let partial_count t x =
|
|
rpm-build |
0f2925 |
let rec nbits x =
|
|
rpm-build |
0f2925 |
if x = 0 then
|
|
rpm-build |
0f2925 |
0
|
|
rpm-build |
0f2925 |
else if fast_bool (x land 1) then
|
|
rpm-build |
0f2925 |
1 + (nbits (x lsr 1))
|
|
rpm-build |
0f2925 |
else
|
|
rpm-build |
0f2925 |
nbits (x lsr 1)
|
|
rpm-build |
0f2925 |
in
|
|
rpm-build |
0f2925 |
let size = t.len in
|
|
rpm-build |
0f2925 |
let pos = x lsr log_int_size and delta = x land int_size in
|
|
rpm-build |
0f2925 |
let rec loop n acc =
|
|
rpm-build |
0f2925 |
if n = size then
|
|
rpm-build |
0f2925 |
acc
|
|
rpm-build |
0f2925 |
else
|
|
rpm-build |
0f2925 |
let x = bget t.data n in
|
|
rpm-build |
0f2925 |
loop (n+1) (acc + nbits x)
|
|
rpm-build |
0f2925 |
in
|
|
rpm-build |
0f2925 |
if pos >= size then
|
|
rpm-build |
0f2925 |
0
|
|
rpm-build |
0f2925 |
else
|
|
rpm-build |
0f2925 |
loop (pos+1) (nbits ((bget t.data pos) lsr delta))
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let count t =
|
|
rpm-build |
0f2925 |
partial_count t 0
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
(* Find the first set bit in the bit array *)
|
|
rpm-build |
0f2925 |
let find_first_set b n =
|
|
rpm-build |
0f2925 |
(* TODO there are many ways to speed this up. Lookup table would be
|
|
rpm-build |
0f2925 |
one way to speed this up. *)
|
|
rpm-build |
0f2925 |
let find_lsb b =
|
|
rpm-build |
0f2925 |
assert (b <> 0);
|
|
rpm-build |
0f2925 |
let rec loop n =
|
|
rpm-build |
0f2925 |
if b land (1 lsl n) <> 0 then n else loop (n+1) in
|
|
rpm-build |
0f2925 |
loop 0 in
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let buf = b.data in
|
|
rpm-build |
0f2925 |
let rec find_bit byte_ndx bit_offs =
|
|
rpm-build |
0f2925 |
if byte_ndx >= b.len then
|
|
rpm-build |
0f2925 |
None
|
|
rpm-build |
0f2925 |
else
|
|
rpm-build |
0f2925 |
let byte = (bget buf byte_ndx) lsr bit_offs in
|
|
rpm-build |
0f2925 |
if byte = 0 then
|
|
rpm-build |
0f2925 |
find_bit (byte_ndx + 1) 0
|
|
rpm-build |
0f2925 |
else
|
|
rpm-build |
0f2925 |
Some ((find_lsb byte) + (byte_ndx lsl log_int_size) + bit_offs) in
|
|
rpm-build |
0f2925 |
find_bit (n lsr log_int_size) (n land int_size)
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let enum t =
|
|
rpm-build |
0f2925 |
let rec make n =
|
|
rpm-build |
0f2925 |
let cur = ref n in
|
|
rpm-build |
0f2925 |
let rec next () =
|
|
rpm-build |
0f2925 |
match find_first_set t !cur with
|
|
rpm-build |
0f2925 |
Some elem ->
|
|
rpm-build |
0f2925 |
cur := (elem+1);
|
|
rpm-build |
0f2925 |
elem
|
|
rpm-build |
0f2925 |
| None ->
|
|
rpm-build |
0f2925 |
raise Enum.No_more_elements in
|
|
rpm-build |
0f2925 |
Enum.make
|
|
rpm-build |
0f2925 |
~next
|
|
rpm-build |
0f2925 |
~count:(fun () -> partial_count t !cur)
|
|
rpm-build |
0f2925 |
~clone:(fun () -> make !cur)
|
|
rpm-build |
0f2925 |
in
|
|
rpm-build |
0f2925 |
make 0
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let raw_create size =
|
|
rpm-build |
0f2925 |
let b = bcreate size in
|
|
rpm-build |
0f2925 |
bfill b 0 size 0;
|
|
rpm-build |
0f2925 |
{ data = b; len = size }
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let inter a b =
|
|
rpm-build |
0f2925 |
let max_size = max a.len b.len in
|
|
rpm-build |
0f2925 |
let d = raw_create max_size in
|
|
rpm-build |
0f2925 |
let sl = min a.len b.len in
|
|
rpm-build |
0f2925 |
let abuf = a.data
|
|
rpm-build |
0f2925 |
and bbuf = b.data in
|
|
rpm-build |
0f2925 |
(* Note: rest of the array is set to zero automatically *)
|
|
rpm-build |
0f2925 |
for i = 0 to sl-1 do
|
|
rpm-build |
0f2925 |
bset d.data i ((bget abuf i) land (bget bbuf i))
|
|
rpm-build |
0f2925 |
done;
|
|
rpm-build |
0f2925 |
d
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
(* Note: rest of the array is handled automatically correct, since we
|
|
rpm-build |
0f2925 |
took a copy of the bigger set. *)
|
|
rpm-build |
0f2925 |
let union a b =
|
|
rpm-build |
0f2925 |
let d = if a.len > b.len then copy a else copy b in
|
|
rpm-build |
0f2925 |
let sl = min a.len b.len in
|
|
rpm-build |
0f2925 |
let abuf = a.data
|
|
rpm-build |
0f2925 |
and bbuf = b.data in
|
|
rpm-build |
0f2925 |
for i = 0 to sl-1 do
|
|
rpm-build |
0f2925 |
bset d.data i ((bget abuf i) lor (bget bbuf i))
|
|
rpm-build |
0f2925 |
done;
|
|
rpm-build |
0f2925 |
d
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let diff a b =
|
|
rpm-build |
0f2925 |
let maxlen = max a.len b.len in
|
|
rpm-build |
0f2925 |
let buf = bcreate maxlen in
|
|
rpm-build |
0f2925 |
bblit a.data 0 buf 0 a.len;
|
|
rpm-build |
0f2925 |
let sl = min a.len b.len in
|
|
rpm-build |
0f2925 |
let abuf = a.data
|
|
rpm-build |
0f2925 |
and bbuf = b.data in
|
|
rpm-build |
0f2925 |
for i = 0 to sl-1 do
|
|
rpm-build |
0f2925 |
bset buf i ((bget abuf i) land (lnot (bget bbuf i)))
|
|
rpm-build |
0f2925 |
done;
|
|
rpm-build |
0f2925 |
{ data = buf; len = maxlen }
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let sym_diff a b =
|
|
rpm-build |
0f2925 |
let maxlen = max a.len b.len in
|
|
rpm-build |
0f2925 |
let buf = bcreate maxlen in
|
|
rpm-build |
0f2925 |
(* Copy larger (assumes missing bits are zero) *)
|
|
rpm-build |
0f2925 |
bblit (if a.len > b.len then a.data else b.data) 0 buf 0 maxlen;
|
|
rpm-build |
0f2925 |
let sl = min a.len b.len in
|
|
rpm-build |
0f2925 |
let abuf = a.data
|
|
rpm-build |
0f2925 |
and bbuf = b.data in
|
|
rpm-build |
0f2925 |
for i = 0 to sl-1 do
|
|
rpm-build |
0f2925 |
bset buf i ((bget abuf i) lxor (bget bbuf i))
|
|
rpm-build |
0f2925 |
done;
|
|
rpm-build |
0f2925 |
{ data = buf; len = maxlen }
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
(* TODO the following set operations can be made faster if you do the
|
|
rpm-build |
0f2925 |
set operation in-place instead of taking a copy. But be careful
|
|
rpm-build |
0f2925 |
when the sizes of the bitvector strings differ. *)
|
|
rpm-build |
0f2925 |
let intersect t t' =
|
|
rpm-build |
0f2925 |
let d = inter t t' in
|
|
rpm-build |
0f2925 |
t.data <- d.data;
|
|
rpm-build |
0f2925 |
t.len <- d.len
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let differentiate t t' =
|
|
rpm-build |
0f2925 |
let d = diff t t' in
|
|
rpm-build |
0f2925 |
t.data <- d.data;
|
|
rpm-build |
0f2925 |
t.len <- d.len
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let unite t t' =
|
|
rpm-build |
0f2925 |
let d = union t t' in
|
|
rpm-build |
0f2925 |
t.data <- d.data;
|
|
rpm-build |
0f2925 |
t.len <- d.len
|
|
rpm-build |
0f2925 |
|
|
rpm-build |
0f2925 |
let differentiate_sym t t' =
|
|
rpm-build |
0f2925 |
let d = sym_diff t t' in
|
|
rpm-build |
0f2925 |
t.data <- d.data;
|
|
rpm-build |
0f2925 |
t.len <- d.len
|