Blame src/bitSet.ml

rpm-build 0f2925
(*
rpm-build 0f2925
 * Bitset - Efficient bit sets
rpm-build 0f2925
 * Copyright (C) 2003 Nicolas Cannasse
rpm-build 0f2925
 *
rpm-build 0f2925
 * This library is free software; you can redistribute it and/or
rpm-build 0f2925
 * modify it under the terms of the GNU Lesser General Public
rpm-build 0f2925
 * License as published by the Free Software Foundation; either
rpm-build 0f2925
 * version 2.1 of the License, or (at your option) any later version,
rpm-build 0f2925
 * with the special exception on linking described in file LICENSE.
rpm-build 0f2925
 *
rpm-build 0f2925
 * This library is distributed in the hope that it will be useful,
rpm-build 0f2925
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
rpm-build 0f2925
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
rpm-build 0f2925
 * Lesser General Public License for more details.
rpm-build 0f2925
 *
rpm-build 0f2925
 * You should have received a copy of the GNU Lesser General Public
rpm-build 0f2925
 * License along with this library; if not, write to the Free Software
rpm-build 0f2925
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
rpm-build 0f2925
 *)
rpm-build 0f2925
rpm-build 0f2925
open ExtBytes
rpm-build 0f2925
rpm-build 0f2925
type intern
rpm-build 0f2925
rpm-build 0f2925
let bcreate : int -> intern = Obj.magic Bytes.create
rpm-build 0f2925
external fast_get : intern -> int -> int = "%string_unsafe_get"
rpm-build 0f2925
let fast_set : intern -> int -> int -> unit = Obj.magic Bytes.unsafe_set
rpm-build 0f2925
external fast_bool : int -> bool = "%identity"
rpm-build 0f2925
let fast_blit : intern -> int -> intern -> int -> int -> unit = Obj.magic Bytes.blit
rpm-build 0f2925
let fast_fill : intern -> int -> int -> int -> unit = Obj.magic Bytes.fill
rpm-build 0f2925
let fast_length : intern -> int= Obj.magic Bytes.length
rpm-build 0f2925
rpm-build 0f2925
let bget s ndx =
rpm-build 0f2925
  assert (ndx >= 0 && ndx < fast_length s);
rpm-build 0f2925
  fast_get s ndx
rpm-build 0f2925
rpm-build 0f2925
let bset s ndx v =
rpm-build 0f2925
  assert (ndx >= 0 && ndx < fast_length s);
rpm-build 0f2925
  fast_set s ndx v
rpm-build 0f2925
rpm-build 0f2925
let bblit src srcoff dst dstoff len = 
rpm-build 0f2925
  assert (srcoff >= 0 && dstoff >= 0 && len >= 0);
rpm-build 0f2925
  fast_blit src srcoff dst dstoff len
rpm-build 0f2925
rpm-build 0f2925
let bfill dst start len c = 
rpm-build 0f2925
  assert (start >= 0 && len >= 0);
rpm-build 0f2925
  fast_fill dst start len c
rpm-build 0f2925
rpm-build 0f2925
exception Negative_index of string
rpm-build 0f2925
rpm-build 0f2925
type t = {
rpm-build 0f2925
  mutable data : intern;
rpm-build 0f2925
  mutable len : int;
rpm-build 0f2925
}
rpm-build 0f2925
rpm-build 0f2925
let error fname = raise (Negative_index fname)
rpm-build 0f2925
rpm-build 0f2925
let empty() =
rpm-build 0f2925
  {
rpm-build 0f2925
    data = bcreate 0;
rpm-build 0f2925
    len = 0;
rpm-build 0f2925
  }
rpm-build 0f2925
rpm-build 0f2925
let int_size = 7 (* value used to round up index *)
rpm-build 0f2925
let log_int_size = 3 (* number of shifts *)
rpm-build 0f2925
rpm-build 0f2925
let create n =
rpm-build 0f2925
  if n < 0 then error "create";
rpm-build 0f2925
  let size = (n+int_size) lsr log_int_size in
rpm-build 0f2925
  let b = bcreate size in
rpm-build 0f2925
  bfill b 0 size 0;
rpm-build 0f2925
  {
rpm-build 0f2925
    data = b;
rpm-build 0f2925
    len = size;
rpm-build 0f2925
  }
rpm-build 0f2925
rpm-build 0f2925
let copy t =
rpm-build 0f2925
  let b = bcreate t.len in
rpm-build 0f2925
  bblit t.data 0 b 0 t.len;
rpm-build 0f2925
  {
rpm-build 0f2925
    data = b;
rpm-build 0f2925
    len = t.len
rpm-build 0f2925
  }
rpm-build 0f2925
rpm-build 0f2925
let clone = copy
rpm-build 0f2925
rpm-build 0f2925
let set t x =
rpm-build 0f2925
  if x < 0 then error "set";
rpm-build 0f2925
  let pos = x lsr log_int_size and delta = x land int_size in
rpm-build 0f2925
  let size = t.len in
rpm-build 0f2925
  if pos >= size then begin
rpm-build 0f2925
    let b = bcreate (pos+1) in
rpm-build 0f2925
    bblit t.data 0 b 0 size;
rpm-build 0f2925
    bfill b size (pos - size + 1) 0;
rpm-build 0f2925
    t.len <- pos + 1;
rpm-build 0f2925
    t.data <- b;
rpm-build 0f2925
  end;
rpm-build 0f2925
  bset t.data pos ((bget t.data pos) lor (1 lsl delta))
rpm-build 0f2925
rpm-build 0f2925
let unset t x =
rpm-build 0f2925
  if x < 0 then error "unset";
rpm-build 0f2925
  let pos = x lsr log_int_size and delta = x land int_size in
rpm-build 0f2925
  if pos < t.len then
rpm-build 0f2925
    bset t.data pos ((bget t.data pos) land (0xFF lxor (1 lsl delta)))
rpm-build 0f2925
rpm-build 0f2925
let toggle t x =
rpm-build 0f2925
  if x < 0 then error "toggle";
rpm-build 0f2925
  let pos = x lsr log_int_size and delta = x land int_size in
rpm-build 0f2925
  let size = t.len in
rpm-build 0f2925
  if pos >= size then begin
rpm-build 0f2925
    let b = bcreate (pos+1) in
rpm-build 0f2925
    bblit t.data 0 b 0 size;
rpm-build 0f2925
    bfill b size (pos - size + 1) 0;
rpm-build 0f2925
    t.len <- pos + 1;
rpm-build 0f2925
    t.data <- b;
rpm-build 0f2925
  end;
rpm-build 0f2925
  bset t.data pos ((bget t.data pos) lxor (1 lsl delta))
rpm-build 0f2925
rpm-build 0f2925
let put t = function
rpm-build 0f2925
  | true -> set t
rpm-build 0f2925
  | false -> unset t
rpm-build 0f2925
rpm-build 0f2925
let is_set t x =
rpm-build 0f2925
  if x < 0 then error "is_set";
rpm-build 0f2925
  let pos = x lsr log_int_size and delta = x land int_size in
rpm-build 0f2925
  let size = t.len in
rpm-build 0f2925
  if pos < size then
rpm-build 0f2925
  fast_bool (((bget t.data pos) lsr delta) land 1)
rpm-build 0f2925
  else
rpm-build 0f2925
  false
rpm-build 0f2925
rpm-build 0f2925
rpm-build 0f2925
exception Break_int of int
rpm-build 0f2925
rpm-build 0f2925
(* Find highest set element or raise Not_found *)
rpm-build 0f2925
let find_msb t =
rpm-build 0f2925
  (* Find highest set bit in a byte.  Does not work with zero. *)
rpm-build 0f2925
  let byte_msb b = 
rpm-build 0f2925
    assert (b <> 0);
rpm-build 0f2925
    let rec loop n = 
rpm-build 0f2925
      if b land (1 lsl n) = 0 then
rpm-build 0f2925
        loop (n-1)
rpm-build 0f2925
      else n in
rpm-build 0f2925
    loop 7 in
rpm-build 0f2925
  let n = t.len - 1
rpm-build 0f2925
  and buf = t.data in
rpm-build 0f2925
  try 
rpm-build 0f2925
    for i = n downto 0 do
rpm-build 0f2925
      let byte = bget buf i in
rpm-build 0f2925
      if byte <> 0 then raise (Break_int ((i lsl log_int_size)+(byte_msb byte)))
rpm-build 0f2925
    done;
rpm-build 0f2925
    raise Not_found
rpm-build 0f2925
  with 
rpm-build 0f2925
    Break_int n -> n
rpm-build 0f2925
  | _ -> raise Not_found
rpm-build 0f2925
rpm-build 0f2925
let compare t1 t2 =
rpm-build 0f2925
  let some_msb b = try Some (find_msb b) with Not_found -> None in
rpm-build 0f2925
  match (some_msb t1, some_msb t2) with
rpm-build 0f2925
    (None, Some _) -> -1 (* 0-y -> -1 *)
rpm-build 0f2925
  | (Some _, None) -> 1  (* x-0 ->  1 *)
rpm-build 0f2925
  | (None, None) -> 0    (* 0-0 ->  0 *)
rpm-build 0f2925
  | (Some a, Some b) ->  (* x-y *)
rpm-build 0f2925
      if a < b then -1
rpm-build 0f2925
      else if a > b then 1
rpm-build 0f2925
      else
rpm-build 0f2925
        begin
rpm-build 0f2925
          (* MSBs differ, we need to scan arrays until we find a
rpm-build 0f2925
             difference *)
rpm-build 0f2925
          let ndx = a lsr log_int_size in 
rpm-build 0f2925
          assert (ndx < t1.len && ndx < t2.len);
rpm-build 0f2925
          try
rpm-build 0f2925
            for i = ndx downto 0 do
rpm-build 0f2925
              let b1 = bget t1.data i 
rpm-build 0f2925
              and b2 = bget t2.data i in
rpm-build 0f2925
              if b1 <> b2 then raise (Break_int (compare b1 b2))
rpm-build 0f2925
            done;
rpm-build 0f2925
            0
rpm-build 0f2925
          with
rpm-build 0f2925
            Break_int res -> res
rpm-build 0f2925
        end
rpm-build 0f2925
rpm-build 0f2925
let equals t1 t2 =
rpm-build 0f2925
  compare t1 t2 = 0
rpm-build 0f2925
rpm-build 0f2925
let partial_count t x =
rpm-build 0f2925
  let rec nbits x =
rpm-build 0f2925
    if x = 0 then
rpm-build 0f2925
      0
rpm-build 0f2925
    else if fast_bool (x land 1) then
rpm-build 0f2925
      1 + (nbits (x lsr 1))
rpm-build 0f2925
    else
rpm-build 0f2925
      nbits (x lsr 1)
rpm-build 0f2925
  in
rpm-build 0f2925
  let size = t.len in
rpm-build 0f2925
  let pos = x lsr log_int_size and delta = x land int_size in
rpm-build 0f2925
  let rec loop n acc =
rpm-build 0f2925
    if n = size then
rpm-build 0f2925
      acc
rpm-build 0f2925
    else
rpm-build 0f2925
      let x = bget t.data n in
rpm-build 0f2925
      loop (n+1) (acc + nbits x)
rpm-build 0f2925
  in
rpm-build 0f2925
  if pos >= size then
rpm-build 0f2925
    0
rpm-build 0f2925
  else
rpm-build 0f2925
    loop (pos+1) (nbits ((bget t.data pos) lsr delta))
rpm-build 0f2925
rpm-build 0f2925
let count t =
rpm-build 0f2925
  partial_count t 0
rpm-build 0f2925
rpm-build 0f2925
(* Find the first set bit in the bit array *)
rpm-build 0f2925
let find_first_set b n =
rpm-build 0f2925
  (* TODO there are many ways to speed this up.  Lookup table would be
rpm-build 0f2925
     one way to speed this up. *)
rpm-build 0f2925
  let find_lsb b =
rpm-build 0f2925
    assert (b <> 0);
rpm-build 0f2925
    let rec loop n =
rpm-build 0f2925
      if b land (1 lsl n) <> 0 then n else loop (n+1) in
rpm-build 0f2925
    loop 0 in
rpm-build 0f2925
rpm-build 0f2925
  let buf = b.data in
rpm-build 0f2925
  let rec find_bit byte_ndx bit_offs =
rpm-build 0f2925
    if byte_ndx >= b.len then
rpm-build 0f2925
      None
rpm-build 0f2925
    else
rpm-build 0f2925
      let byte = (bget buf byte_ndx) lsr bit_offs in
rpm-build 0f2925
      if byte = 0 then
rpm-build 0f2925
        find_bit (byte_ndx + 1) 0
rpm-build 0f2925
      else
rpm-build 0f2925
        Some ((find_lsb byte) + (byte_ndx lsl log_int_size) + bit_offs) in
rpm-build 0f2925
  find_bit (n lsr log_int_size) (n land int_size)
rpm-build 0f2925
      
rpm-build 0f2925
let enum t =
rpm-build 0f2925
  let rec make n =
rpm-build 0f2925
    let cur = ref n in
rpm-build 0f2925
    let rec next () =
rpm-build 0f2925
      match find_first_set t !cur with
rpm-build 0f2925
        Some elem ->
rpm-build 0f2925
          cur := (elem+1);
rpm-build 0f2925
          elem
rpm-build 0f2925
      | None ->
rpm-build 0f2925
          raise Enum.No_more_elements in
rpm-build 0f2925
    Enum.make
rpm-build 0f2925
      ~next
rpm-build 0f2925
      ~count:(fun () -> partial_count t !cur)
rpm-build 0f2925
      ~clone:(fun () -> make !cur)
rpm-build 0f2925
  in
rpm-build 0f2925
  make 0
rpm-build 0f2925
rpm-build 0f2925
let raw_create size = 
rpm-build 0f2925
  let b = bcreate size in
rpm-build 0f2925
  bfill b 0 size 0;
rpm-build 0f2925
  { data = b; len = size }
rpm-build 0f2925
rpm-build 0f2925
let inter a b =
rpm-build 0f2925
  let max_size = max a.len b.len in
rpm-build 0f2925
  let d = raw_create max_size in
rpm-build 0f2925
  let sl = min a.len b.len in
rpm-build 0f2925
  let abuf = a.data
rpm-build 0f2925
  and bbuf = b.data in
rpm-build 0f2925
  (* Note: rest of the array is set to zero automatically *)
rpm-build 0f2925
  for i = 0 to sl-1 do
rpm-build 0f2925
    bset d.data i ((bget abuf i) land (bget bbuf i))
rpm-build 0f2925
  done;
rpm-build 0f2925
  d
rpm-build 0f2925
rpm-build 0f2925
(* Note: rest of the array is handled automatically correct, since we
rpm-build 0f2925
   took a copy of the bigger set. *)
rpm-build 0f2925
let union a b = 
rpm-build 0f2925
  let d = if a.len > b.len then copy a else copy b in
rpm-build 0f2925
  let sl = min a.len b.len in
rpm-build 0f2925
  let abuf = a.data
rpm-build 0f2925
  and bbuf = b.data in
rpm-build 0f2925
  for i = 0 to sl-1 do
rpm-build 0f2925
    bset d.data i ((bget abuf i) lor (bget bbuf i))
rpm-build 0f2925
  done;
rpm-build 0f2925
  d
rpm-build 0f2925
rpm-build 0f2925
let diff a b = 
rpm-build 0f2925
  let maxlen = max a.len b.len in
rpm-build 0f2925
  let buf = bcreate maxlen in
rpm-build 0f2925
  bblit a.data 0 buf 0 a.len;
rpm-build 0f2925
  let sl = min a.len b.len in
rpm-build 0f2925
  let abuf = a.data
rpm-build 0f2925
  and bbuf = b.data in
rpm-build 0f2925
  for i = 0 to sl-1 do
rpm-build 0f2925
    bset buf i ((bget abuf i) land (lnot (bget bbuf i)))
rpm-build 0f2925
  done;
rpm-build 0f2925
  { data = buf; len = maxlen }
rpm-build 0f2925
rpm-build 0f2925
let sym_diff a b = 
rpm-build 0f2925
  let maxlen = max a.len b.len in
rpm-build 0f2925
  let buf = bcreate maxlen in
rpm-build 0f2925
  (* Copy larger (assumes missing bits are zero) *)
rpm-build 0f2925
  bblit (if a.len > b.len then a.data else b.data) 0 buf 0 maxlen;
rpm-build 0f2925
  let sl = min a.len b.len in
rpm-build 0f2925
  let abuf = a.data
rpm-build 0f2925
  and bbuf = b.data in
rpm-build 0f2925
  for i = 0 to sl-1 do
rpm-build 0f2925
    bset buf i ((bget abuf i) lxor (bget bbuf i))
rpm-build 0f2925
  done;
rpm-build 0f2925
  { data = buf; len = maxlen }
rpm-build 0f2925
rpm-build 0f2925
(* TODO the following set operations can be made faster if you do the
rpm-build 0f2925
   set operation in-place instead of taking a copy.  But be careful
rpm-build 0f2925
   when the sizes of the bitvector strings differ. *)
rpm-build 0f2925
let intersect t t' =
rpm-build 0f2925
  let d = inter t t' in
rpm-build 0f2925
  t.data <- d.data;
rpm-build 0f2925
  t.len <- d.len
rpm-build 0f2925
rpm-build 0f2925
let differentiate t t' =
rpm-build 0f2925
  let d = diff t t' in
rpm-build 0f2925
  t.data <- d.data;
rpm-build 0f2925
  t.len <- d.len
rpm-build 0f2925
rpm-build 0f2925
let unite t t' =
rpm-build 0f2925
  let d = union t t' in
rpm-build 0f2925
  t.data <- d.data;
rpm-build 0f2925
  t.len <- d.len
rpm-build 0f2925
rpm-build 0f2925
let differentiate_sym t t' =
rpm-build 0f2925
  let d = sym_diff t t' in
rpm-build 0f2925
  t.data <- d.data;
rpm-build 0f2925
  t.len <- d.len