(* * Bitset - Efficient bit sets * Copyright (C) 2003 Nicolas Cannasse * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version, * with the special exception on linking described in file LICENSE. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) open ExtBytes type intern let bcreate : int -> intern = Obj.magic Bytes.create external fast_get : intern -> int -> int = "%string_unsafe_get" let fast_set : intern -> int -> int -> unit = Obj.magic Bytes.unsafe_set external fast_bool : int -> bool = "%identity" let fast_blit : intern -> int -> intern -> int -> int -> unit = Obj.magic Bytes.blit let fast_fill : intern -> int -> int -> int -> unit = Obj.magic Bytes.fill let fast_length : intern -> int= Obj.magic Bytes.length let bget s ndx = assert (ndx >= 0 && ndx < fast_length s); fast_get s ndx let bset s ndx v = assert (ndx >= 0 && ndx < fast_length s); fast_set s ndx v let bblit src srcoff dst dstoff len = assert (srcoff >= 0 && dstoff >= 0 && len >= 0); fast_blit src srcoff dst dstoff len let bfill dst start len c = assert (start >= 0 && len >= 0); fast_fill dst start len c exception Negative_index of string type t = { mutable data : intern; mutable len : int; } let error fname = raise (Negative_index fname) let empty() = { data = bcreate 0; len = 0; } let int_size = 7 (* value used to round up index *) let log_int_size = 3 (* number of shifts *) let create n = if n < 0 then error "create"; let size = (n+int_size) lsr log_int_size in let b = bcreate size in bfill b 0 size 0; { data = b; len = size; } let copy t = let b = bcreate t.len in bblit t.data 0 b 0 t.len; { data = b; len = t.len } let clone = copy let set t x = if x < 0 then error "set"; let pos = x lsr log_int_size and delta = x land int_size in let size = t.len in if pos >= size then begin let b = bcreate (pos+1) in bblit t.data 0 b 0 size; bfill b size (pos - size + 1) 0; t.len <- pos + 1; t.data <- b; end; bset t.data pos ((bget t.data pos) lor (1 lsl delta)) let unset t x = if x < 0 then error "unset"; let pos = x lsr log_int_size and delta = x land int_size in if pos < t.len then bset t.data pos ((bget t.data pos) land (0xFF lxor (1 lsl delta))) let toggle t x = if x < 0 then error "toggle"; let pos = x lsr log_int_size and delta = x land int_size in let size = t.len in if pos >= size then begin let b = bcreate (pos+1) in bblit t.data 0 b 0 size; bfill b size (pos - size + 1) 0; t.len <- pos + 1; t.data <- b; end; bset t.data pos ((bget t.data pos) lxor (1 lsl delta)) let put t = function | true -> set t | false -> unset t let is_set t x = if x < 0 then error "is_set"; let pos = x lsr log_int_size and delta = x land int_size in let size = t.len in if pos < size then fast_bool (((bget t.data pos) lsr delta) land 1) else false exception Break_int of int (* Find highest set element or raise Not_found *) let find_msb t = (* Find highest set bit in a byte. Does not work with zero. *) let byte_msb b = assert (b <> 0); let rec loop n = if b land (1 lsl n) = 0 then loop (n-1) else n in loop 7 in let n = t.len - 1 and buf = t.data in try for i = n downto 0 do let byte = bget buf i in if byte <> 0 then raise (Break_int ((i lsl log_int_size)+(byte_msb byte))) done; raise Not_found with Break_int n -> n | _ -> raise Not_found let compare t1 t2 = let some_msb b = try Some (find_msb b) with Not_found -> None in match (some_msb t1, some_msb t2) with (None, Some _) -> -1 (* 0-y -> -1 *) | (Some _, None) -> 1 (* x-0 -> 1 *) | (None, None) -> 0 (* 0-0 -> 0 *) | (Some a, Some b) -> (* x-y *) if a < b then -1 else if a > b then 1 else begin (* MSBs differ, we need to scan arrays until we find a difference *) let ndx = a lsr log_int_size in assert (ndx < t1.len && ndx < t2.len); try for i = ndx downto 0 do let b1 = bget t1.data i and b2 = bget t2.data i in if b1 <> b2 then raise (Break_int (compare b1 b2)) done; 0 with Break_int res -> res end let equals t1 t2 = compare t1 t2 = 0 let partial_count t x = let rec nbits x = if x = 0 then 0 else if fast_bool (x land 1) then 1 + (nbits (x lsr 1)) else nbits (x lsr 1) in let size = t.len in let pos = x lsr log_int_size and delta = x land int_size in let rec loop n acc = if n = size then acc else let x = bget t.data n in loop (n+1) (acc + nbits x) in if pos >= size then 0 else loop (pos+1) (nbits ((bget t.data pos) lsr delta)) let count t = partial_count t 0 (* Find the first set bit in the bit array *) let find_first_set b n = (* TODO there are many ways to speed this up. Lookup table would be one way to speed this up. *) let find_lsb b = assert (b <> 0); let rec loop n = if b land (1 lsl n) <> 0 then n else loop (n+1) in loop 0 in let buf = b.data in let rec find_bit byte_ndx bit_offs = if byte_ndx >= b.len then None else let byte = (bget buf byte_ndx) lsr bit_offs in if byte = 0 then find_bit (byte_ndx + 1) 0 else Some ((find_lsb byte) + (byte_ndx lsl log_int_size) + bit_offs) in find_bit (n lsr log_int_size) (n land int_size) let enum t = let rec make n = let cur = ref n in let rec next () = match find_first_set t !cur with Some elem -> cur := (elem+1); elem | None -> raise Enum.No_more_elements in Enum.make ~next ~count:(fun () -> partial_count t !cur) ~clone:(fun () -> make !cur) in make 0 let raw_create size = let b = bcreate size in bfill b 0 size 0; { data = b; len = size } let inter a b = let max_size = max a.len b.len in let d = raw_create max_size in let sl = min a.len b.len in let abuf = a.data and bbuf = b.data in (* Note: rest of the array is set to zero automatically *) for i = 0 to sl-1 do bset d.data i ((bget abuf i) land (bget bbuf i)) done; d (* Note: rest of the array is handled automatically correct, since we took a copy of the bigger set. *) let union a b = let d = if a.len > b.len then copy a else copy b in let sl = min a.len b.len in let abuf = a.data and bbuf = b.data in for i = 0 to sl-1 do bset d.data i ((bget abuf i) lor (bget bbuf i)) done; d let diff a b = let maxlen = max a.len b.len in let buf = bcreate maxlen in bblit a.data 0 buf 0 a.len; let sl = min a.len b.len in let abuf = a.data and bbuf = b.data in for i = 0 to sl-1 do bset buf i ((bget abuf i) land (lnot (bget bbuf i))) done; { data = buf; len = maxlen } let sym_diff a b = let maxlen = max a.len b.len in let buf = bcreate maxlen in (* Copy larger (assumes missing bits are zero) *) bblit (if a.len > b.len then a.data else b.data) 0 buf 0 maxlen; let sl = min a.len b.len in let abuf = a.data and bbuf = b.data in for i = 0 to sl-1 do bset buf i ((bget abuf i) lxor (bget bbuf i)) done; { data = buf; len = maxlen } (* TODO the following set operations can be made faster if you do the set operation in-place instead of taking a copy. But be careful when the sizes of the bitvector strings differ. *) let intersect t t' = let d = inter t t' in t.data <- d.data; t.len <- d.len let differentiate t t' = let d = diff t t' in t.data <- d.data; t.len <- d.len let unite t t' = let d = union t t' in t.data <- d.data; t.len <- d.len let differentiate_sym t t' = let d = sym_diff t t' in t.data <- d.data; t.len <- d.len