Blame lenses/xml.aug

Packit Service a2ae7a
(* XML lens for Augeas
Packit Service a2ae7a
   Author: Francis Giraldeau <francis.giraldeau@usherbrooke.ca>
Packit Service a2ae7a
Packit Service a2ae7a
   Reference: http://www.w3.org/TR/2006/REC-xml11-20060816/
Packit Service a2ae7a
*)
Packit Service a2ae7a
Packit Service a2ae7a
module Xml =
Packit Service a2ae7a
Packit Service a2ae7a
autoload xfm
Packit Service a2ae7a
Packit Service a2ae7a
(************************************************************************
Packit Service a2ae7a
 *                           Utilities lens
Packit Service a2ae7a
 *************************************************************************)
Packit Service a2ae7a
Packit Service a2ae7a
let dels (s:string)   = del s s
Packit Service a2ae7a
let spc               = /[ \t\r\n]+/
Packit Service a2ae7a
let osp               = /[ \t\r\n]*/
Packit Service a2ae7a
let sep_spc           = del /[ \t\r\n]+/ " "
Packit Service a2ae7a
let sep_osp           = del /[ \t\r\n]*/ ""
Packit Service a2ae7a
let sep_eq            = del /[ \t\r\n]*=[ \t\r\n]*/ "="
Packit Service a2ae7a
Packit Service a2ae7a
let nmtoken             = /[a-zA-Z:_][a-zA-Z0-9:_.-]*/
Packit Service a2ae7a
let word                = /[a-zA-Z][a-zA-Z0-9._-]*/
Packit Service a2ae7a
let char                = /.|(\r?\n)/
Packit Service a2ae7a
(* if we hide the quotes, then we can only accept single or double quotes *)
Packit Service a2ae7a
(* otherwise a put ambiguity is raised *)
Packit Service a2ae7a
let sto_dquote          = dels "\"" . store /[^"]*/ . dels "\"" (* " *)
Packit Service a2ae7a
let sto_squote          = dels "'" . store /[^']*/ . dels "'"
Packit Service a2ae7a
Packit Service a2ae7a
let comment             = [ label "#comment" .
Packit Service a2ae7a
                            dels "
Packit Service a2ae7a
                            store /([^-]|-[^-])*/ .
Packit Service a2ae7a
                            dels "-->" ]
Packit Service a2ae7a
Packit Service a2ae7a
let pi_target           = nmtoken - /[Xx][Mm][Ll]/
Packit Service a2ae7a
let empty               = Util.empty
Packit Service a2ae7a
let del_end             = del />[\r?\n]?/ ">\n"
Packit Service a2ae7a
let del_end_simple      = dels ">"
Packit Service a2ae7a
Packit Service a2ae7a
(* This is siplified version of processing instruction
Packit Service a2ae7a
 * pi has to not start or end with a white space and the string
Packit Service a2ae7a
 * must not contain "?>". We restrict too much by not allowing any
Packit Service a2ae7a
 * "?" nor ">" in PI
Packit Service a2ae7a
 *)
Packit Service a2ae7a
let pi                  = /[^ \r\n\t]|[^ \r\n\t][^?>]*[^ \r\n\t]/
Packit Service a2ae7a
Packit Service a2ae7a
(************************************************************************
Packit Service a2ae7a
 *                            Attributes
Packit Service a2ae7a
 *************************************************************************)
Packit Service a2ae7a
Packit Service a2ae7a
Packit Service a2ae7a
let decl          = [ label "#decl" . sep_spc .
Packit Service a2ae7a
                      store /[^> \t\n\r]|[^> \t\n\r][^>\t\n\r]*[^> \t\n\r]/ ]
Packit Service a2ae7a
Packit Service a2ae7a
let decl_def (r:regexp) (b:lens) = [ dels "<" . key r .
Packit Service a2ae7a
                                     sep_spc . store nmtoken .
Packit Service a2ae7a
                                     b . sep_osp . del_end_simple ]
Packit Service a2ae7a
Packit Service a2ae7a
let elem_def      = decl_def /!ELEMENT/ decl
Packit Service a2ae7a
Packit Service a2ae7a
let enum          = "(" . osp . nmtoken . ( osp . "|" . osp . nmtoken )* . osp . ")"
Packit Service a2ae7a
Packit Service a2ae7a
let att_type      = /CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS/ |
Packit Service a2ae7a
                     enum
Packit Service a2ae7a
Packit Service a2ae7a
let id_def        = [ sep_spc . key /PUBLIC/ .
Packit Service a2ae7a
                      [ label "#literal" . sep_spc . sto_dquote ]* ] |
Packit Service a2ae7a
                    [ sep_spc . key /SYSTEM/ . sep_spc . sto_dquote ]
Packit Service a2ae7a
Packit Service a2ae7a
let notation_def  = decl_def /!NOTATION/ id_def
Packit Service a2ae7a
Packit Service a2ae7a
let att_def       = counter "att_id" .
Packit Service a2ae7a
                    [ sep_spc . seq "att_id" .
Packit Service a2ae7a
                      [ label "#name" . store word . sep_spc ] .
Packit Service a2ae7a
                      [ label "#type" . store att_type . sep_spc ] .
Packit Service a2ae7a
                      ([ key   /#REQUIRED|#IMPLIED/ ] |
Packit Service a2ae7a
                       [ label "#FIXED" . del /#FIXED[ \r\n\t]*|/ "" . sto_dquote ]) ]*
Packit Service a2ae7a
Packit Service a2ae7a
let att_list_def = decl_def /!ATTLIST/ att_def
Packit Service a2ae7a
Packit Service a2ae7a
let entity_def   =
Packit Service a2ae7a
  let literal (lbl:string) = [ sep_spc . label lbl . sto_dquote ] in
Packit Service a2ae7a
  decl_def /!ENTITY/
Packit Service a2ae7a
    ( literal "#decl"
Packit Service a2ae7a
    | [ sep_spc . key /SYSTEM/ . literal "#systemliteral" ]
Packit Service a2ae7a
    | [ sep_spc . key /PUBLIC/ . literal "#pubidliteral"
Packit Service a2ae7a
                               . literal "#systemliteral" ] )
Packit Service a2ae7a
Packit Service a2ae7a
let decl_def_item = elem_def | entity_def | att_list_def | notation_def
Packit Service a2ae7a
Packit Service a2ae7a
let decl_outer    = sep_osp . del /\[[ \n\t\r]*/ "[\n" .
Packit Service a2ae7a
                    (decl_def_item . sep_osp )* . dels "]"
Packit Service a2ae7a
Packit Service a2ae7a
(* let dtd_def       = [ sep_spc . key "SYSTEM" . sep_spc . sto_dquote ] *)
Packit Service a2ae7a
Packit Service a2ae7a
let doctype       = decl_def /!DOCTYPE/ (decl_outer|id_def)
Packit Service a2ae7a
Packit Service a2ae7a
(* General shape of an attribute
Packit Service a2ae7a
 * q   is the regexp matching the quote character for the value
Packit Service a2ae7a
 * qd  is the default quote character
Packit Service a2ae7a
 * brx is what the actual attribute value must match *)
Packit Service a2ae7a
let attval (q:regexp) (qd:string) (brx:regexp) =
Packit Service a2ae7a
  let quote = del q qd in
Packit Service a2ae7a
  let body = store brx in
Packit Service a2ae7a
  [ sep_spc . key nmtoken . sep_eq . square quote body quote ]
Packit Service a2ae7a
Packit Service a2ae7a
(* We treat attributes according to one of the following three patterns:
Packit Service a2ae7a
   attval1 : values that must be quoted with single quotes
Packit Service a2ae7a
   attval2 : values that must be quoted with double quotes
Packit Service a2ae7a
   attval3 : values that can be quoted with either *)
Packit Service a2ae7a
let attributes    =
Packit Service a2ae7a
  let attval1 = attval "'" "'" /[^']*"[^']*/ in (* " *)
Packit Service a2ae7a
  let attval2 = attval "\"" "\"" /[^"]*'[^"]*/ in
Packit Service a2ae7a
  let attval3 = attval /['"]/ "\"" /(\\\\|[^'\"])*/ in (* " *)
Packit Service a2ae7a
  [ label "#attribute" . (attval1|attval2|attval3)+ ]
Packit Service a2ae7a
Packit Service a2ae7a
let prolog        = [ label "#declaration" .
Packit Service a2ae7a
                      dels "
Packit Service a2ae7a
                      attributes .
Packit Service a2ae7a
                      sep_osp .
Packit Service a2ae7a
                      dels "?>" ]
Packit Service a2ae7a
Packit Service a2ae7a
Packit Service a2ae7a
(************************************************************************
Packit Service a2ae7a
 *                            Tags
Packit Service a2ae7a
 *************************************************************************)
Packit Service a2ae7a
Packit Service a2ae7a
(* we consider entities as simple text *)
Packit Service a2ae7a
let text_re   = /[^<]+/ - /([^<]*\]\]>[^<]*)/
Packit Service a2ae7a
let text      = [ label "#text" . store text_re ]
Packit Service a2ae7a
let cdata     = [ label "#CDATA" . dels "
Packit Service a2ae7a
                  store (char* - (char* . "]]>" . char*)) . dels "]]>" ]
Packit Service a2ae7a
Packit Service a2ae7a
(* the value of nmtoken_del is always the nmtoken_key string *)
Packit Service a2ae7a
let nmtoken_key = key nmtoken
Packit Service a2ae7a
let nmtoken_del = del nmtoken "a"
Packit Service a2ae7a
Packit Service a2ae7a
let element (body:lens) =
Packit Service a2ae7a
    let h = attributes? . sep_osp . dels ">" . body* . dels "</" in
Packit Service a2ae7a
        [ dels "<" . square nmtoken_key h nmtoken_del . sep_osp . del_end ]
Packit Service a2ae7a
Packit Service a2ae7a
let empty_element = [ dels "<" . nmtoken_key . value "#empty" .
Packit Service a2ae7a
                      attributes? . sep_osp . del /\/>[\r?\n]?/ "/>\n" ]
Packit Service a2ae7a
Packit Service a2ae7a
let pi_instruction = [ dels "
Packit Service a2ae7a
                       [ label "#target" . store pi_target ] .
Packit Service a2ae7a
                       [ sep_spc . label "#instruction" . store pi ]? .
Packit Service a2ae7a
                       sep_osp . del /\?>/ "?>" ]
Packit Service a2ae7a
Packit Service a2ae7a
(* Typecheck is weaker on rec lens, detected by unfolding *)
Packit Service a2ae7a
(*
Packit Service a2ae7a
let content1 = element text
Packit Service a2ae7a
let rec content2 = element (content1|text|comment)
Packit Service a2ae7a
*)
Packit Service a2ae7a
Packit Service a2ae7a
let rec content = element (text|comment|content|empty_element|pi_instruction|cdata)
Packit Service a2ae7a
Packit Service a2ae7a
(* Constraints are weaker here, but it's better than being too strict *)
Packit Service a2ae7a
let doc = (sep_osp . (prolog  | comment | doctype | pi_instruction))* .
Packit Service a2ae7a
          ((sep_osp . content) | (sep_osp . empty_element)) .
Packit Service a2ae7a
          (sep_osp . (comment | pi_instruction ))* . sep_osp
Packit Service a2ae7a
Packit Service a2ae7a
let lns = doc | Util.empty?
Packit Service a2ae7a
Packit Service a2ae7a
let filter = (incl "/etc/xml/*.xml")
Packit Service a2ae7a
    . (incl "/etc/xml/catalog")
Packit Service a2ae7a
Packit Service a2ae7a
let xfm = transform lns filter