Blame doc/devhelp/libxml2-xmlregexp.html

Packit 423ecb
Packit 423ecb
<html>
Packit 423ecb
  <head>
Packit 423ecb
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
Packit 423ecb
    <title>xmlregexp: regular expressions handling</title>
Packit 423ecb
    <meta name="generator" content="Libxml2 devhelp stylesheet"/>
Packit 423ecb
    <link rel="start" href="index.html" title="libxml2 Reference Manual"/>
Packit 423ecb
    <link rel="up" href="general.html" title="API"/>
Packit 423ecb
    <link rel="stylesheet" href="style.css" type="text/css"/>
Packit 423ecb
    <link rel="chapter" href="general.html" title="API"/>
Packit 423ecb
  </head>
Packit 423ecb
  <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
Packit 423ecb
    
Packit 423ecb
      
Packit 423ecb
        
Packit 423ecb
          
Packit 423ecb
            Prev
Packit 423ecb
          
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb
          
Packit 423ecb
            Up
Packit 423ecb
          
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb
          
Packit 423ecb
            Home
Packit 423ecb
          
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb
          
Packit 423ecb
            Next
Packit 423ecb
          
Packit 423ecb
        
Packit 423ecb
        libxml2 Reference Manual
Packit 423ecb
      
Packit 423ecb
    
Packit 423ecb
    

Packit 423ecb
      xmlregexp
Packit 423ecb
    
Packit 423ecb
    

xmlregexp - regular expressions handling

Packit 423ecb
    

basic API for libxml regular expressions handling used for XML Schemas and validation.

Packit 423ecb
    

Author(s): Daniel Veillard

Packit 423ecb
    
Packit 423ecb
      

Synopsis

Packit 423ecb
      
typedef xmlRegExecCtxt * xmlRegExecCtxtPtr;
Packit 423ecb
typedef struct _xmlExpCtxt xmlExpCtxt;
Packit 423ecb
typedef xmlExpNode * xmlExpNodePtr;
Packit 423ecb
typedef xmlExpCtxt * xmlExpCtxtPtr;
Packit 423ecb
typedef enum xmlExpNodeType;
Packit 423ecb
typedef struct _xmlRegExecCtxt xmlRegExecCtxt;
Packit 423ecb
typedef struct _xmlExpNode xmlExpNode;
Packit 423ecb
typedef struct _xmlRegexp xmlRegexp;
Packit 423ecb
typedef xmlRegexp * xmlRegexpPtr;
Packit 423ecb
typedef void xmlRegExecCallbacks		(xmlRegExecCtxtPtr exec, 
const xmlChar * token,
void * transdata,
void * inputdata);
Packit 423ecb
xmlRegExecCtxtPtr	xmlRegNewExecCtxt	(xmlRegexpPtr comp, 
xmlRegExecCallbacks callback,
void * data);
Packit 423ecb
xmlExpNodePtr	xmlExpNewOr		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr left,
xmlExpNodePtr right);
Packit 423ecb
void	xmlRegFreeRegexp		(xmlRegexpPtr regexp);
Packit 423ecb
void	xmlExpRef			(xmlExpNodePtr exp);
Packit 423ecb
int	xmlRegexpIsDeterminist		(xmlRegexpPtr comp);
Packit 423ecb
int	xmlRegExecErrInfo		(xmlRegExecCtxtPtr exec, 
const xmlChar ** string,
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal);
Packit 423ecb
void	xmlRegFreeExecCtxt		(xmlRegExecCtxtPtr exec);
Packit 423ecb
int	xmlExpCtxtNbCons		(xmlExpCtxtPtr ctxt);
Packit 423ecb
int	xmlExpSubsume			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
xmlExpNodePtr sub);
Packit 423ecb
int	xmlRegExecPushString2		(xmlRegExecCtxtPtr exec, 
const xmlChar * value,
const xmlChar * value2,
void * data);
Packit 423ecb
int	xmlRegExecNextValues		(xmlRegExecCtxtPtr exec, 
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal);
Packit 423ecb
xmlExpNodePtr	xmlExpExpDerive		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
xmlExpNodePtr sub);
Packit 423ecb
int	xmlExpIsNillable		(xmlExpNodePtr exp);
Packit 423ecb
void	xmlExpFreeCtxt			(xmlExpCtxtPtr ctxt);
Packit 423ecb
void	xmlExpDump			(xmlBufferPtr buf, 
xmlExpNodePtr expr);
Packit 423ecb
xmlExpNodePtr	xmlExpNewSeq		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr left,
xmlExpNodePtr right);
Packit 423ecb
void	xmlExpFree			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp);
Packit 423ecb
xmlExpNodePtr	xmlExpNewRange		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr subset,
int min,
int max);
Packit 423ecb
xmlRegexpPtr	xmlRegexpCompile	(const xmlChar * regexp);
Packit 423ecb
xmlExpNodePtr	xmlExpNewAtom		(xmlExpCtxtPtr ctxt, 
const xmlChar * name,
int len);
Packit 423ecb
int	xmlRegexpExec			(xmlRegexpPtr comp, 
const xmlChar * content);
Packit 423ecb
int	xmlRegExecPushString		(xmlRegExecCtxtPtr exec, 
const xmlChar * value,
void * data);
Packit 423ecb
int	xmlExpGetStart			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar ** tokList,
int len);
Packit 423ecb
xmlExpNodePtr	xmlExpParse		(xmlExpCtxtPtr ctxt, 
const char * expr);
Packit 423ecb
xmlExpCtxtPtr	xmlExpNewCtxt		(int maxNodes, 
xmlDictPtr dict);
Packit 423ecb
int	xmlExpGetLanguage		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar ** langList,
int len);
Packit 423ecb
xmlExpNodePtr	xmlExpStringDerive	(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar * str,
int len);
Packit 423ecb
int	xmlExpCtxtNbNodes		(xmlExpCtxtPtr ctxt);
Packit 423ecb
int	xmlExpMaxToken			(xmlExpNodePtr expr);
Packit 423ecb
void	xmlRegexpPrint			(FILE * output, 
xmlRegexpPtr regexp);
Packit 423ecb
Packit 423ecb
    
Packit 423ecb
    
Packit 423ecb
      

Description

Packit 423ecb
    
Packit 423ecb
    
Packit 423ecb
      

Details

Packit 423ecb
      
Packit 423ecb
        

Structure xmlExpCtxt

struct _xmlExpCtxt {
Packit 423ecb
The content of this structure is not made public by the API.
Packit 423ecb
} xmlExpCtxt;
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Typedef xmlExpCtxtPtr

xmlExpCtxt * xmlExpCtxtPtr;
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Structure xmlExpNode

struct _xmlExpNode {
Packit 423ecb
The content of this structure is not made public by the API.
Packit 423ecb
} xmlExpNode;
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Typedef xmlExpNodePtr

xmlExpNode * xmlExpNodePtr;
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Enum xmlExpNodeType

enum xmlExpNodeType {
Packit 423ecb
    XML_EXP_EMPTY = 0
Packit 423ecb
    XML_EXP_FORBID = 1
Packit 423ecb
    XML_EXP_ATOM = 2
Packit 423ecb
    XML_EXP_SEQ = 3
Packit 423ecb
    XML_EXP_OR = 4
Packit 423ecb
    XML_EXP_COUNT = 5
Packit 423ecb
};
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Structure xmlRegExecCtxt

struct _xmlRegExecCtxt {
Packit 423ecb
The content of this structure is not made public by the API.
Packit 423ecb
} xmlRegExecCtxt;
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Typedef xmlRegExecCtxtPtr

xmlRegExecCtxt * xmlRegExecCtxtPtr;
Packit 423ecb

A libxml progressive regular expression evaluation context

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Structure xmlRegexp

struct _xmlRegexp {
Packit 423ecb
The content of this structure is not made public by the API.
Packit 423ecb
} xmlRegexp;
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Typedef xmlRegexpPtr

xmlRegexp * xmlRegexpPtr;
Packit 423ecb

A libxml regular expression, they can actually be far more complex thank the POSIX regex expressions.

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Function type xmlRegExecCallbacks

void	xmlRegExecCallbacks		(xmlRegExecCtxtPtr exec, 
const xmlChar * token,
void * transdata,
void * inputdata)
Packit 423ecb

Callback function when doing a transition in the automata

Packit 423ecb
<tt>exec</tt>:the regular expression context
<tt>token</tt>:the current token string
<tt>transdata</tt>:transition data
<tt>inputdata</tt>:input data
Packit 423ecb
        
Packit 423ecb
        

Variable emptyExp

xmlExpNodePtr emptyExp;
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        

Variable forbiddenExp

xmlExpNodePtr forbiddenExp;
Packit 423ecb

Packit 423ecb
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Debugging facility provides the number of allocated nodes over lifetime

Packit 423ecb
<tt>ctxt</tt>:an expression context
<tt>Returns</tt>:the number of nodes ever allocated or -1 in case of error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Debugging facility provides the number of allocated nodes at a that point

Packit 423ecb
<tt>ctxt</tt>:an expression context
<tt>Returns</tt>:the number of nodes in use or -1 in case of error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Serialize the expression as compiled to the buffer

Packit 423ecb
<tt>buf</tt>:a buffer to receive the output
<tt>expr</tt>:the compiled expression
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Evaluates the expression resulting from @exp consuming a sub expression @sub Based on algebraic derivation and sometimes direct Brzozowski derivation it usually tatkes less than linear time and can handle expressions generating infinite languages.

Packit 423ecb
<tt>ctxt</tt>:the expressions context
<tt>exp</tt>:the englobing expression
<tt>sub</tt>:the subexpression
<tt>Returns</tt>:the resulting expression or NULL in case of internal error, the result must be freed
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Dereference the expression

Packit 423ecb
<tt>ctxt</tt>:the expression context
<tt>exp</tt>:the expression
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Free an expression context

Packit 423ecb
<tt>ctxt</tt>:an expression context
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Find all the strings used in @exp and store them in @list

Packit 423ecb
<tt>ctxt</tt>:the expression context
<tt>exp</tt>:the expression
<tt>langList</tt>:where to store the tokens
<tt>len</tt>:the allocated length of @list
<tt>Returns</tt>:the number of unique strings found, -1 in case of errors and -2 if there is more than @len strings
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Find all the strings that appears at the start of the languages accepted by @exp and store them in @list. E.g. for (a, b) | c it will return the list [a, c]

Packit 423ecb
<tt>ctxt</tt>:the expression context
<tt>exp</tt>:the expression
<tt>tokList</tt>:where to store the tokens
<tt>len</tt>:the allocated length of @list
<tt>Returns</tt>:the number of unique strings found, -1 in case of errors and -2 if there is more than @len strings
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Finds if the expression is nillable, i.e. if it accepts the empty sequqnce

Packit 423ecb
<tt>exp</tt>:the expression
<tt>Returns</tt>:1 if nillable, 0 if not and -1 in case of error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Indicate the maximum number of input a expression can accept

Packit 423ecb
<tt>expr</tt>:a compiled expression
<tt>Returns</tt>:the maximum length or -1 in case of error
Packit 423ecb
        
Packit 423ecb
        

xmlExpNewAtom ()

xmlExpNodePtr	xmlExpNewAtom		(xmlExpCtxtPtr ctxt, 
const xmlChar * name,
int len)
Packit 423ecb

Get the atom associated to this name from that context

Packit 423ecb
<tt>ctxt</tt>:the expression context
<tt>name</tt>:the atom name
<tt>len</tt>:the atom name length in byte (or -1);
<tt>Returns</tt>:the node or NULL in case of error
Packit 423ecb
        
Packit 423ecb
        

xmlExpNewCtxt ()

xmlExpCtxtPtr	xmlExpNewCtxt		(int maxNodes, 
xmlDictPtr dict)
Packit 423ecb

Creates a new context for manipulating expressions

Packit 423ecb
<tt>maxNodes</tt>:the maximum number of nodes
<tt>dict</tt>:optional dictionary to use internally
<tt>Returns</tt>:the context or NULL in case of error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Get the atom associated to the choice @left | @right Note that @left and @right are consumed in the operation, to keep an handle on them use xmlExpRef() and use xmlExpFree() to release them, this is true even in case of failure (unless ctxt == NULL).

Packit 423ecb
<tt>ctxt</tt>:the expression context
<tt>left</tt>:left expression
<tt>right</tt>:right expression
<tt>Returns</tt>:the node or NULL in case of error
Packit 423ecb
        
Packit 423ecb
        

xmlExpNewRange ()

xmlExpNodePtr	xmlExpNewRange		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr subset,
int min,
int max)
Packit 423ecb

Get the atom associated to the range (@subset){@min, @max} Note that @subset is consumed in the operation, to keep an handle on it use xmlExpRef() and use xmlExpFree() to release it, this is true even in case of failure (unless ctxt == NULL).

Packit 423ecb
<tt>ctxt</tt>:the expression context
<tt>subset</tt>:the expression to be repeated
<tt>min</tt>:the lower bound for the repetition
<tt>max</tt>:the upper bound for the repetition, -1 means infinite
<tt>Returns</tt>:the node or NULL in case of error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Get the atom associated to the sequence @left , @right Note that @left and @right are consumed in the operation, to keep an handle on them use xmlExpRef() and use xmlExpFree() to release them, this is true even in case of failure (unless ctxt == NULL).

Packit 423ecb
<tt>ctxt</tt>:the expression context
<tt>left</tt>:left expression
<tt>right</tt>:right expression
<tt>Returns</tt>:the node or NULL in case of error
Packit 423ecb
        
Packit 423ecb
        

xmlExpParse ()

xmlExpNodePtr	xmlExpParse		(xmlExpCtxtPtr ctxt, 
const char * expr)
Packit 423ecb

Minimal parser for regexps, it understand the following constructs - string terminals - choice operator | - sequence operator , - subexpressions (...) - usual cardinality operators + * and ? - finite sequences { min, max } - infinite sequences { min, * } There is minimal checkings made especially no checking on strings values

Packit 423ecb
<tt>ctxt</tt>:the expressions context
<tt>expr</tt>:the 0 terminated string
<tt>Returns</tt>:a new expression or NULL in case of failure
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Increase the reference count of the expression

Packit 423ecb
<tt>exp</tt>:the expression
Packit 423ecb
        
Packit 423ecb
        

xmlExpStringDerive ()

xmlExpNodePtr	xmlExpStringDerive	(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar * str,
int len)
Packit 423ecb

Do one step of Brzozowski derivation of the expression @exp with respect to the input string

Packit 423ecb
<tt>ctxt</tt>:the expression context
<tt>exp</tt>:the expression
<tt>str</tt>:the string
<tt>len</tt>:the string len in bytes if available
<tt>Returns</tt>:the resulting expression or NULL in case of internal error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Check whether @exp accepts all the languages accexpted by @sub the input being a subexpression.

Packit 423ecb
<tt>ctxt</tt>:the expressions context
<tt>exp</tt>:the englobing expression
<tt>sub</tt>:the subexpression
<tt>Returns</tt>:1 if true 0 if false and -1 in case of failure.
Packit 423ecb
        
Packit 423ecb
        

xmlRegExecErrInfo ()

int	xmlRegExecErrInfo		(xmlRegExecCtxtPtr exec, 
const xmlChar ** string,
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal)
Packit 423ecb

Extract error informations from the regexp execution, the parameter @string will be updated with the value pushed and not accepted, the parameter @values must point to an array of @nbval string pointers on return nbval will contain the number of possible strings in that state and the @values array will be updated with them. The string values

Packit 423ecb
<tt>exec</tt>:a regexp execution context generating an error
<tt>string</tt>:return value for the error string
<tt>nbval</tt>:pointer to the number of accepted values IN/OUT
<tt>nbneg</tt>:return number of negative transitions
<tt>values</tt>:pointer to the array of acceptable values
<tt>terminal</tt>:return value if this was a terminal state
<tt>Returns</tt>:will be freed with the @exec context and don't need to be deallocated. Returns: 0 in case of success or -1 in case of error.
Packit 423ecb
        
Packit 423ecb
        

xmlRegExecNextValues ()

int	xmlRegExecNextValues		(xmlRegExecCtxtPtr exec, 
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal)
Packit 423ecb

Extract informations from the regexp execution, the parameter @values must point to an array of @nbval string pointers on return nbval will contain the number of possible strings in that state and the @values array will be updated with them. The string values

Packit 423ecb
<tt>exec</tt>:a regexp execution context
<tt>nbval</tt>:pointer to the number of accepted values IN/OUT
<tt>nbneg</tt>:return number of negative transitions
<tt>values</tt>:pointer to the array of acceptable values
<tt>terminal</tt>:return value if this was a terminal state
<tt>Returns</tt>:will be freed with the @exec context and don't need to be deallocated. Returns: 0 in case of success or -1 in case of error.
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Push one input token in the execution context

Packit 423ecb
<tt>exec</tt>:a regexp execution context or NULL to indicate the end
<tt>value</tt>:a string token input
<tt>data</tt>:data associated to the token to reuse in callbacks
<tt>Returns</tt>:1 if the regexp reached a final state, 0 if non-final, and a negative value in case of error.
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Push one input token in the execution context

Packit 423ecb
<tt>exec</tt>:a regexp execution context or NULL to indicate the end
<tt>value</tt>:the first string token input
<tt>value2</tt>:the second string token input
<tt>data</tt>:data associated to the token to reuse in callbacks
<tt>Returns</tt>:1 if the regexp reached a final state, 0 if non-final, and a negative value in case of error.
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Free the structures associated to a regular expression evaulation context.

Packit 423ecb
<tt>exec</tt>:a regular expression evaulation context
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Free a regexp

Packit 423ecb
<tt>regexp</tt>:the regexp
Packit 423ecb
        
Packit 423ecb
        

xmlRegNewExecCtxt ()

xmlRegExecCtxtPtr	xmlRegNewExecCtxt	(xmlRegexpPtr comp, 
xmlRegExecCallbacks callback,
void * data)
Packit 423ecb

Build a context used for progressive evaluation of a regexp.

Packit 423ecb
<tt>comp</tt>:a precompiled regular expression
<tt>callback</tt>:a callback function used for handling progresses in the automata matching phase
<tt>data</tt>:the context data associated to the callback in this context
<tt>Returns</tt>:the new context
Packit 423ecb
        
Packit 423ecb
        

xmlRegexpCompile ()

xmlRegexpPtr	xmlRegexpCompile	(const xmlChar * regexp)
Packit 423ecb

Parses a regular expression conforming to XML Schemas Part 2 Datatype Appendix F and builds an automata suitable for testing strings against that regular expression

Packit 423ecb
<tt>regexp</tt>:a regular expression string
<tt>Returns</tt>:the compiled expression or NULL in case of error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Check if the regular expression generates the value

Packit 423ecb
<tt>comp</tt>:the compiled regular expression
<tt>content</tt>:the value to check against the regular expression
<tt>Returns</tt>:1 if it matches, 0 if not and a negative value in case of error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Check if the regular expression is determinist

Packit 423ecb
<tt>comp</tt>:the compiled regular expression
<tt>Returns</tt>:1 if it yes, 0 if not and a negative value in case of error
Packit 423ecb
        
Packit 423ecb
        
Packit 423ecb

Print the content of the compiled regular expression

Packit 423ecb
<tt>output</tt>:the file for the output debug
<tt>regexp</tt>:the compiled regexp
Packit 423ecb
        
Packit 423ecb
      
Packit 423ecb
    
Packit 423ecb
  </body>
Packit 423ecb
</html>