Blob Blame History Raw
(***********************************************************************)
(*                                                                     *)
(*                             ocamlbuild                              *)
(*                                                                     *)
(*  Nicolas Pouillard, Berke Durak, projet Gallium, INRIA Rocquencourt *)
(*                                                                     *)
(*  Copyright 2007 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  All rights reserved.  This file is distributed    *)
(*  under the terms of the GNU Library General Public License, with    *)
(*  the special exception on linking described in file ../LICENSE.     *)
(*                                                                     *)
(***********************************************************************)


(* Original author: Berke Durak *)
(* Glob *)
open My_std;;
open Bool;;
include Glob_ast;;
open Glob_lexer;;

let sf = Printf.sprintf;;

let brute_limit = 10;;

(*** string_of_token *)
let string_of_token = function
| ATOM _ -> "ATOM"
| AND -> "AND"
| OR -> "OR"
| NOT -> "NOT"
| LPAR -> "LPAR"
| RPAR -> "RPAR"
| TRUE -> "TRUE"
| FALSE -> "FALSE"
| EOF -> "EOF"
;;
(* ***)
(*** match_character_class *)
let match_character_class cl c =
  Bool.eval
    begin function (c1,c2) ->
      c1 <= c && c <= c2
    end
    cl
;;
(* ***)
(*** NFA *)
module NFA =
  struct
    type transition =
    | QCLASS of character_class
    | QEPSILON
    ;;

    module IS = Set.Make(struct type t = int let compare (x:t) y = compare x y let print = Format.pp_print_int end);;
    module ISM = Map.Make(struct type t = IS.t let compare = IS.compare let print = IS.print end);;

    type machine = {
      mc_qi : IS.t;
      mc_table : (character_class * IS.t) list array;
      mc_qf : int;
      mc_power_table : (char, IS.t ISM.t) Hashtbl.t
    }

    (*** build' *)
    let build' p =
      let count = ref 0 in
      let transitions = ref [] in
      let epsilons : (int * int) list ref = ref [] in
      let state () = let id = !count in incr count; id in
      let ( --> ) q1 t q2 =
        match t with
        | QEPSILON -> epsilons := (q1,q2) :: !epsilons; q1
        | QCLASS cl -> transitions := (q1,cl,q2) :: !transitions; q1
      in
      (* Build the transitions corresponding to the given pattern and arriving
       * on state qf. Return the original state. *)
      let rec loop qf = function
        | Epsilon  -> qf
        | Word u   ->
            let m = String.length u in
            let q0 = state () in
            let rec loop q i =
              if i = m then
                q0
              else
                begin
                  let q' =
                    if i = m - 1 then
                      qf
                    else
                      state ()
                  in
                  let _ = (q --> QCLASS(Atom(u.[i], u.[i]))) q' in
                  loop q' (i + 1)
                end
            in
            loop q0 0
        | Class cl ->
            let q1 = state () in
            (q1 --> QCLASS cl) qf
        | Star p ->
            (* The fucking Kleene star *)
            let q2 = state () in
            let q1 = loop q2 p in (* q1 -{p}-> q2 *)
            let _ = (q1 --> QEPSILON) qf in
            let _ = (q2 --> QEPSILON) q1 in
            let _ = (q2 --> QEPSILON) q1 in
            q1
        | Concat(p1,p2) ->
            let q12 = state () in
            let q1  = loop q12 p1 in (* q1  -{p1}-> q12 *)
            let q2  = loop qf  p2 in (* q2  -{p2}-> qf *)
            let _   = (q12 --> QEPSILON) q2 in
            q1
        | Union pl ->
            let qi = state () in
            List.iter
              begin fun p ->
                let q = loop qf p in           (* q -{p2}-> qf *)
                let _ = (qi --> QEPSILON) q in (* qi -{}---> q  *)
                ()
              end
              pl;
            qi
      in
      let qf = state () in
      let qi = loop qf p in
      let m = !count in

      (* Compute epsilon closure *)
      let graph = Array.make m IS.empty in
      List.iter
        begin fun (q,q') ->
          graph.(q) <- IS.add q' graph.(q)
        end
        !epsilons;

      let closure = Array.make m IS.empty in
      let rec transitive past = function
      | [] -> past
      | q :: future ->
          let past' = IS.add q past in
          let future' =
            IS.fold
              begin fun q' future' ->
                (* q -{}--> q' *)
                if IS.mem q' past' then
                  future'
                else
                  q' :: future'
              end
              graph.(q)
              future
          in
          transitive past' future'
      in
      for i = 0 to m - 1 do
        closure.(i) <- transitive IS.empty [i] (* O(n^2), I know *)
      done;

      (* Finally, build the table *)
      let table = Array.make m [] in
      List.iter
        begin fun (q,t,q') ->
          table.(q) <- (t, closure.(q')) :: table.(q)
        end
        !transitions;

      (graph, closure,
      { mc_qi = closure.(qi);
        mc_table = table;
        mc_qf = qf;
        mc_power_table = Hashtbl.create 37 })
    ;;
    let build x = let (_,_, machine) = build' x in machine;;
    (* ***)
    (*** run *)
    let run ?(trace=false) machine u =
      let m = String.length u in
      let apply qs c =
        try
          let t = Hashtbl.find machine.mc_power_table c in
          ISM.find qs t
        with
        | Not_found ->
            let qs' =
              IS.fold
                begin fun q qs' ->
                  List.fold_left
                    begin fun qs' (cl,qs'') ->
                      if match_character_class cl c then
                        IS.union qs' qs''
                      else
                        qs'
                    end
                    qs'
                    machine.mc_table.(q)
                end
                qs
                IS.empty
            in
            let t =
              try
                Hashtbl.find machine.mc_power_table c
              with
              | Not_found -> ISM.empty
            in
            Hashtbl.replace machine.mc_power_table c (ISM.add qs qs' t);
            qs'
      in
      let rec loop qs i =
        if IS.is_empty qs then
          false
        else
          begin
            if i = m then
              IS.mem machine.mc_qf qs
            else
              begin
                let c = u.[i] in
                if trace then
                  begin
                    Printf.printf "%d %C {" i c;
                    IS.iter (fun q -> Printf.printf " %d" q) qs;
                    Printf.printf " }\n%!"
                  end;
                let qs' = apply qs c in
                loop qs' (i + 1)
              end
          end
      in
      loop machine.mc_qi 0
    ;;
    (* ***)
  end
;;
(* ***)
(*** Brute *)
module Brute =
  struct
    exception Succeed;;
    exception Fail;;
    exception Too_hard;;

    (*** match_pattern *)
    let match_pattern counter p u =
      let m = String.length u in
      (** [loop i n p] returns [true] iff the word [u.(i .. i + n - 1)] is in the
       ** language generated by the pattern [p].
       ** We must have 0 <= i and i + n <= m *)
      let rec loop (i,n,p) =
        assert (0 <= i && 0 <= n && i + n <= m);
        incr counter;
        if !counter >= brute_limit then raise Too_hard;
        match p with
        | Word v   ->
            String.length v = n &&
            begin
              let rec check j = j = n || (v.[j] = u.[i + j] && check (j + 1))
              in
              check 0
            end
        | Epsilon  -> n = 0
        | Star(Class True) -> true
        | Star(Class cl) ->
            let rec check k =
              if k = n then
                true
              else
                (match_character_class cl u.[i + k]) && check (k + 1)
            in
            check 0
        | Star _ -> raise Too_hard
        | Class cl -> n = 1 && match_character_class cl u.[i]
        | Concat(p1,p2) ->
            let rec scan j =
              j <= n && ((loop (i,j,p1) && loop (i+j, n - j,p2)) || scan (j + 1))
            in
            scan 0
        | Union pl -> List.exists (fun p' -> loop (i,n,p')) pl
      in
      loop (0,m,p)
    ;;
    (* ***)
end
;;
(* ***)
(*** fast_pattern_contents, fast_pattern, globber *)
type fast_pattern_contents =
| Brute of int ref * pattern
| Machine of NFA.machine
;;
type fast_pattern = fast_pattern_contents ref;;
type globber = fast_pattern atom Bool.boolean;;
(* ***)
(*** fast_pattern_of_pattern *)
let fast_pattern_of_pattern p = ref (Brute(ref 0, p));;
(* ***)
(*** add_dir *)
let add_dir dir x =
  match dir with
  | None -> x
  | Some(dir) ->
      match x with
      | Constant(s) ->
          Constant(My_std.filename_concat dir s)
      | Pattern(p) ->
          Pattern(Concat(Word(My_std.filename_concat dir ""), p))
;;
(* ***)
(*** add_ast_dir *)
let add_ast_dir dir x =
  match dir with
  | None -> x
  | Some dir ->
      let slash = Class(Atom('/','/')) in
      let any = Class True in
      let q = Union[Epsilon; Concat(slash, Star any)] in (* ( /** )? *)
      And[Atom(Pattern(ref (Brute(ref 0, Concat(Word dir, q))))); x]
;;
(* ***)
(*** parse *)
let parse ?dir u =
  let l = Lexing.from_string u in
  let tok = ref None in
  let f =
    fun () ->
      match !tok with
      | None -> token l
      | Some x ->
          tok := None;
          x
  in
  let g t =
    match !tok with
    | None -> tok := Some t
    | Some t' ->
        raise (Parse_error(sf "Trying to unput token %s while %s is active" (string_of_token t) (string_of_token t')))
  in
  let read x =
    let y = f () in
    if x = y then
      ()
    else
      raise (Parse_error(sf "Unexpected token, expecting %s, got %s" (string_of_token x) (string_of_token y)))
  in
  let rec atomizer continuation = match f () with
  | NOT    -> atomizer (fun x -> continuation (Not x))
  | ATOM x ->
      begin
        let a =
          match add_dir dir x with
          | Constant u -> Constant u
          | Pattern p -> Pattern(fast_pattern_of_pattern p)
        in
        continuation (Atom a)
      end
  | TRUE   -> continuation True
  | FALSE  -> continuation False
  | LPAR   ->
      let y = parse_s () in
      read RPAR;
      continuation y
  | t      -> raise (Parse_error(sf "Unexpected token %s in atomizer" (string_of_token t)))
  and parse_s1 x = match f () with
  | OR     -> let y = parse_s () in Or[x; y]
  | AND    -> parse_t x
  | t      -> g t; x
  and parse_t1 x y = match f () with
  | OR     -> let z = parse_s () in Or[And[x;y]; z]
  | AND    -> parse_t (And[x;y])
  | t      -> g t; And[x;y]
  and parse_s () = atomizer parse_s1
  and parse_t x = atomizer (parse_t1 x)
  in
  let x = parse_s () in
  read EOF;
  add_ast_dir dir x
;;
(* ***)
(*** eval *)
let eval g u =
  Bool.eval
    begin function
      | Constant v -> u = v
      | Pattern kind ->
          match !kind with
          | Brute(count, p) ->
            begin
              let do_nfa () =
                let m = NFA.build p in
                kind := Machine m;
                NFA.run m u
              in
              if !count >= brute_limit then
                do_nfa ()
              else
                try
                  Brute.match_pattern count p u
                with
                | Brute.Too_hard -> do_nfa ()
            end
          | Machine m -> NFA.run m u
    end
    g
(* ***)
(*** Debug *)
(*let (Atom(Pattern x)) = parse "<{a,b}>";;
#install_printer IS.print;;
#install_printer ISM.print;;
let (graph, closure, machine) = build' x;;*)
(* ***)