Blame compiler/lexer.mll

Packit bd2e5d
(***********************************************************************)
Packit bd2e5d
(*                                                                     *)
Packit bd2e5d
(*                 MLTk, Tcl/Tk interface of OCaml                     *)
Packit bd2e5d
(*                                                                     *)
Packit bd2e5d
(*    Francois Rouaix, Francois Pessaux, Jun Furuse and Pierre Weis    *)
Packit bd2e5d
(*               projet Cristal, INRIA Rocquencourt                    *)
Packit bd2e5d
(*            Jacques Garrigue, Kyoto University RIMS                  *)
Packit bd2e5d
(*                                                                     *)
Packit bd2e5d
(*  Copyright 2002 Institut National de Recherche en Informatique et   *)
Packit bd2e5d
(*  en Automatique and Kyoto University.  All rights reserved.         *)
Packit bd2e5d
(*  This file is distributed under the terms of the GNU Library        *)
Packit bd2e5d
(*  General Public License, with the special exception on linking      *)
Packit bd2e5d
(*  described in file ../LICENSE.                                      *)
Packit bd2e5d
(*                                                                     *)
Packit bd2e5d
(***********************************************************************)
Packit bd2e5d
Packit bd2e5d
(* $Id$ *)
Packit bd2e5d
Packit bd2e5d
{
Packit bd2e5d
open StdLabels
Packit bd2e5d
open Lexing
Packit bd2e5d
open Parser
Packit bd2e5d
Packit bd2e5d
exception Lexical_error of string
Packit bd2e5d
let current_line = ref 1
Packit bd2e5d
Packit bd2e5d
Packit bd2e5d
(* The table of keywords *)
Packit bd2e5d
Packit bd2e5d
let keyword_table = (Hashtbl.create 149 : (string, token) Hashtbl.t)
Packit bd2e5d
Packit bd2e5d
let _ = List.iter
Packit bd2e5d
  ~f:(fun (str,tok) -> Hashtbl.add keyword_table str tok)
Packit bd2e5d
  [
Packit bd2e5d
  "int", TYINT;
Packit bd2e5d
  "float", TYFLOAT;
Packit bd2e5d
  "bool", TYBOOL;
Packit bd2e5d
  "char", TYCHAR;
Packit bd2e5d
  "string", TYSTRING;
Packit bd2e5d
  "list", LIST;
Packit bd2e5d
  "as", AS;
Packit bd2e5d
  "variant", VARIANT;
Packit bd2e5d
  "widget", WIDGET;
Packit bd2e5d
  "option", OPTION;
Packit bd2e5d
  "type", TYPE;
Packit bd2e5d
  "subtype", SUBTYPE;
Packit bd2e5d
  "function", FUNCTION;
Packit bd2e5d
  "module", MODULE;
Packit bd2e5d
  "external", EXTERNAL;
Packit bd2e5d
  "sequence", SEQUENCE;
Packit bd2e5d
  "unsafe", UNSAFE
Packit bd2e5d
]
Packit bd2e5d
Packit bd2e5d
Packit bd2e5d
(* To buffer string literals *)
Packit bd2e5d
Packit bd2e5d
let initial_string_buffer = Bytes.create 256
Packit bd2e5d
let string_buff = ref initial_string_buffer
Packit bd2e5d
let string_index = ref 0
Packit bd2e5d
Packit bd2e5d
let reset_string_buffer () =
Packit bd2e5d
  string_buff := initial_string_buffer;
Packit bd2e5d
  string_index := 0;
Packit bd2e5d
  ()
Packit bd2e5d
Packit bd2e5d
let store_string_char c =
Packit bd2e5d
  if !string_index >= Bytes.length (!string_buff) then begin
Packit bd2e5d
    let new_buff = Bytes.create (Bytes.length (!string_buff) * 2) in
Packit bd2e5d
      Bytes.blit ~src:(!string_buff) ~src_pos:0 ~dst:new_buff ~dst_pos:0
Packit bd2e5d
                  ~len:(Bytes.length (!string_buff));
Packit bd2e5d
      string_buff := new_buff
Packit bd2e5d
  end;
Packit bd2e5d
  Bytes.set (!string_buff) (!string_index) c;
Packit bd2e5d
  incr string_index
Packit bd2e5d
Packit bd2e5d
let get_stored_string () =
Packit bd2e5d
  let s = Bytes.sub_string (!string_buff) 0 (!string_index) in
Packit bd2e5d
    string_buff := initial_string_buffer;
Packit bd2e5d
    s
Packit bd2e5d
(* To translate escape sequences *)
Packit bd2e5d
Packit bd2e5d
let char_for_backslash = function
Packit bd2e5d
    'n' -> '\010'
Packit bd2e5d
  | 'r' -> '\013'
Packit bd2e5d
  | 'b' -> '\008'
Packit bd2e5d
  | 't' -> '\009'
Packit bd2e5d
  | c   -> c
Packit bd2e5d
Packit bd2e5d
let char_for_decimal_code lexbuf i =
Packit bd2e5d
  Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
Packit bd2e5d
               10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
Packit bd2e5d
                    (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48))
Packit bd2e5d
Packit bd2e5d
let saved_string_start = ref 0
Packit bd2e5d
Packit bd2e5d
}
Packit bd2e5d
Packit bd2e5d
rule main = parse
Packit bd2e5d
    '\010' { incr current_line; main lexbuf }
Packit bd2e5d
  | [' ' '\013' '\009' '\026' '\012'] +
Packit bd2e5d
      { main lexbuf }
Packit bd2e5d
  | ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255' ]
Packit bd2e5d
    ( '_' ? ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255' (*'*) '0'-'9' ] ) *
Packit bd2e5d
      { let s = Lexing.lexeme lexbuf in
Packit bd2e5d
          try
Packit bd2e5d
            Hashtbl.find keyword_table s
Packit bd2e5d
          with Not_found ->
Packit bd2e5d
            IDENT s }
Packit bd2e5d
Packit bd2e5d
  | "\""
Packit bd2e5d
      { reset_string_buffer();
Packit bd2e5d
        (* Start of token is start of string. *)
Packit bd2e5d
        saved_string_start := lexbuf.lex_start_pos;
Packit bd2e5d
        string lexbuf;
Packit bd2e5d
        lexbuf.lex_start_pos <- !saved_string_start;
Packit bd2e5d
        STRING (get_stored_string()) }
Packit bd2e5d
  | "(" { LPAREN }
Packit bd2e5d
  | ")" { RPAREN }
Packit bd2e5d
  | "[" { LBRACKET }
Packit bd2e5d
  | "]" { RBRACKET }
Packit bd2e5d
  | "{" { LBRACE }
Packit bd2e5d
  | "}" { RBRACE }
Packit bd2e5d
  | "," { COMMA }
Packit bd2e5d
  | ";" { SEMICOLON }
Packit bd2e5d
  | ":" {COLON}
Packit bd2e5d
  | "?" {QUESTION}
Packit bd2e5d
  | "/" {SLASH}
Packit bd2e5d
  | "%" { comment lexbuf; main lexbuf }
Packit bd2e5d
  | "##line" { line lexbuf; main lexbuf }
Packit bd2e5d
  | eof { EOF }
Packit bd2e5d
  | _
Packit bd2e5d
      { raise (Lexical_error("illegal character")) }
Packit bd2e5d
Packit bd2e5d
Packit bd2e5d
and string = parse
Packit bd2e5d
    '"'
Packit bd2e5d
      { () }
Packit bd2e5d
  | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] +
Packit bd2e5d
      { string lexbuf }
Packit bd2e5d
  | '\\' ['\\' '"' 'n' 't' 'b' 'r']
Packit bd2e5d
      { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
Packit bd2e5d
        string lexbuf }
Packit bd2e5d
  | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
Packit bd2e5d
      { store_string_char(char_for_decimal_code lexbuf 1);
Packit bd2e5d
         string lexbuf }
Packit bd2e5d
  | eof
Packit bd2e5d
      { raise (Lexical_error("string not terminated")) }
Packit bd2e5d
  | '\010'
Packit bd2e5d
      { incr current_line;
Packit bd2e5d
        store_string_char(Lexing.lexeme_char lexbuf 0);
Packit bd2e5d
        string lexbuf }
Packit bd2e5d
  | _
Packit bd2e5d
      { store_string_char(Lexing.lexeme_char lexbuf 0);
Packit bd2e5d
        string lexbuf }
Packit bd2e5d
Packit bd2e5d
and comment = parse
Packit bd2e5d
   '\010' { incr current_line }
Packit bd2e5d
 | eof  { () }
Packit bd2e5d
 | _ { comment lexbuf }
Packit bd2e5d
Packit bd2e5d
and linenum = parse
Packit bd2e5d
 | ['0'-'9']+ {
Packit bd2e5d
            let next_line = int_of_string (Lexing.lexeme lexbuf) in
Packit bd2e5d
            current_line := next_line - 1
Packit bd2e5d
          }
Packit bd2e5d
 | _ { raise (Lexical_error("illegal ##line directive: no line number"))}
Packit bd2e5d
Packit bd2e5d
and line = parse
Packit bd2e5d
 | [' ' '\t']* { linenum lexbuf }