|
Packit |
423ecb |
|
|
Packit |
423ecb |
<html>
|
|
Packit |
423ecb |
<head>
|
|
Packit |
423ecb |
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
|
Packit |
423ecb |
<title>xmlregexp: regular expressions handling</title>
|
|
Packit |
423ecb |
<meta name="generator" content="Libxml2 devhelp stylesheet"/>
|
|
Packit |
423ecb |
<link rel="start" href="index.html" title="libxml2 Reference Manual"/>
|
|
Packit |
423ecb |
<link rel="up" href="general.html" title="API"/>
|
|
Packit |
423ecb |
<link rel="stylesheet" href="style.css" type="text/css"/>
|
|
Packit |
423ecb |
<link rel="chapter" href="general.html" title="API"/>
|
|
Packit |
423ecb |
</head>
|
|
Packit |
423ecb |
<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
libxml2 Reference Manual
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlregexp
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
xmlregexp - regular expressions handling
|
|
Packit |
423ecb |
basic API for libxml regular expressions handling used for XML Schemas and validation.
|
|
Packit |
423ecb |
Author(s): Daniel Veillard
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Synopsis
|
|
Packit |
423ecb |
typedef xmlRegExecCtxt * xmlRegExecCtxtPtr;
|
|
Packit |
423ecb |
typedef struct _xmlExpCtxt xmlExpCtxt;
|
|
Packit |
423ecb |
typedef xmlExpNode * xmlExpNodePtr;
|
|
Packit |
423ecb |
typedef xmlExpCtxt * xmlExpCtxtPtr;
|
|
Packit |
423ecb |
typedef enum xmlExpNodeType;
|
|
Packit |
423ecb |
typedef struct _xmlRegExecCtxt xmlRegExecCtxt;
|
|
Packit |
423ecb |
typedef struct _xmlExpNode xmlExpNode;
|
|
Packit |
423ecb |
typedef struct _xmlRegexp xmlRegexp;
|
|
Packit |
423ecb |
typedef xmlRegexp * xmlRegexpPtr;
|
|
Packit |
423ecb |
typedef void xmlRegExecCallbacks (xmlRegExecCtxtPtr exec, const xmlChar * token, void * transdata, void * inputdata);
|
|
Packit |
423ecb |
xmlRegExecCtxtPtr xmlRegNewExecCtxt (xmlRegexpPtr comp, xmlRegExecCallbacks callback, void * data);
|
|
Packit |
423ecb |
xmlExpNodePtr xmlExpNewOr (xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right);
|
|
Packit |
423ecb |
void xmlRegFreeRegexp (xmlRegexpPtr regexp);
|
|
Packit |
423ecb |
void xmlExpRef (xmlExpNodePtr exp);
|
|
Packit |
423ecb |
int xmlRegexpIsDeterminist (xmlRegexpPtr comp);
|
|
Packit |
423ecb |
int xmlRegExecErrInfo (xmlRegExecCtxtPtr exec, const xmlChar ** string, int * nbval, int * nbneg, xmlChar ** values, int * terminal);
|
|
Packit |
423ecb |
void xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec);
|
|
Packit |
423ecb |
int xmlExpCtxtNbCons (xmlExpCtxtPtr ctxt);
|
|
Packit |
423ecb |
int xmlExpSubsume (xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub);
|
|
Packit |
423ecb |
int xmlRegExecPushString2 (xmlRegExecCtxtPtr exec, const xmlChar * value, const xmlChar * value2, void * data);
|
|
Packit |
423ecb |
int xmlRegExecNextValues (xmlRegExecCtxtPtr exec, int * nbval, int * nbneg, xmlChar ** values, int * terminal);
|
|
Packit |
423ecb |
xmlExpNodePtr xmlExpExpDerive (xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub);
|
|
Packit |
423ecb |
int xmlExpIsNillable (xmlExpNodePtr exp);
|
|
Packit |
423ecb |
void xmlExpFreeCtxt (xmlExpCtxtPtr ctxt);
|
|
Packit |
423ecb |
void xmlExpDump (xmlBufferPtr buf, xmlExpNodePtr expr);
|
|
Packit |
423ecb |
xmlExpNodePtr xmlExpNewSeq (xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right);
|
|
Packit |
423ecb |
void xmlExpFree (xmlExpCtxtPtr ctxt, xmlExpNodePtr exp);
|
|
Packit |
423ecb |
xmlExpNodePtr xmlExpNewRange (xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max);
|
|
Packit |
423ecb |
xmlRegexpPtr xmlRegexpCompile (const xmlChar * regexp);
|
|
Packit |
423ecb |
xmlExpNodePtr xmlExpNewAtom (xmlExpCtxtPtr ctxt, const xmlChar * name, int len);
|
|
Packit |
423ecb |
int xmlRegexpExec (xmlRegexpPtr comp, const xmlChar * content);
|
|
Packit |
423ecb |
int xmlRegExecPushString (xmlRegExecCtxtPtr exec, const xmlChar * value, void * data);
|
|
Packit |
423ecb |
int xmlExpGetStart (xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar ** tokList, int len);
|
|
Packit |
423ecb |
xmlExpNodePtr xmlExpParse (xmlExpCtxtPtr ctxt, const char * expr);
|
|
Packit |
423ecb |
xmlExpCtxtPtr xmlExpNewCtxt (int maxNodes, xmlDictPtr dict);
|
|
Packit |
423ecb |
int xmlExpGetLanguage (xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar ** langList, int len);
|
|
Packit |
423ecb |
xmlExpNodePtr xmlExpStringDerive (xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar * str, int len);
|
|
Packit |
423ecb |
int xmlExpCtxtNbNodes (xmlExpCtxtPtr ctxt);
|
|
Packit |
423ecb |
int xmlExpMaxToken (xmlExpNodePtr expr);
|
|
Packit |
423ecb |
void xmlRegexpPrint (FILE * output, xmlRegexpPtr regexp);
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Description
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Details
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
The content of this structure is not made public by the API.
|
|
Packit |
423ecb |
} xmlExpCtxt;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
The content of this structure is not made public by the API.
|
|
Packit |
423ecb |
} xmlExpNode;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
XML_EXP_EMPTY = 0
|
|
Packit |
423ecb |
XML_EXP_FORBID = 1
|
|
Packit |
423ecb |
XML_EXP_ATOM = 2
|
|
Packit |
423ecb |
XML_EXP_SEQ = 3
|
|
Packit |
423ecb |
XML_EXP_OR = 4
|
|
Packit |
423ecb |
XML_EXP_COUNT = 5
|
|
Packit |
423ecb |
};
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
The content of this structure is not made public by the API.
|
|
Packit |
423ecb |
} xmlRegExecCtxt;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
A libxml progressive regular expression evaluation context
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
The content of this structure is not made public by the API.
|
|
Packit |
423ecb |
} xmlRegexp;
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
A libxml regular expression, they can actually be far more complex thank the POSIX regex expressions.
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Callback function when doing a transition in the automata
|
|
Packit |
423ecb |
<tt>exec</tt>: | the regular expression context | <tt>token</tt>: | the current token string | <tt>transdata</tt>: | transition data | <tt>inputdata</tt>: | input data |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Debugging facility provides the number of allocated nodes over lifetime
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | an expression context | <tt>Returns</tt>: | the number of nodes ever allocated or -1 in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Debugging facility provides the number of allocated nodes at a that point
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | an expression context | <tt>Returns</tt>: | the number of nodes in use or -1 in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Serialize the expression as compiled to the buffer
|
|
Packit |
423ecb |
<tt>buf</tt>: | a buffer to receive the output | <tt>expr</tt>: | the compiled expression |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Evaluates the expression resulting from @exp consuming a sub expression @sub Based on algebraic derivation and sometimes direct Brzozowski derivation it usually tatkes less than linear time and can handle expressions generating infinite languages.
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expressions context | <tt>exp</tt>: | the englobing expression | <tt>sub</tt>: | the subexpression | <tt>Returns</tt>: | the resulting expression or NULL in case of internal error, the result must be freed |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Dereference the expression
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expression context | <tt>exp</tt>: | the expression |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Free an expression context
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | an expression context |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Find all the strings used in @exp and store them in @list
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expression context | <tt>exp</tt>: | the expression | <tt>langList</tt>: | where to store the tokens | <tt>len</tt>: | the allocated length of @list | <tt>Returns</tt>: | the number of unique strings found, -1 in case of errors and -2 if there is more than @len strings |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Find all the strings that appears at the start of the languages accepted by @exp and store them in @list. E.g. for (a, b) | c it will return the list [a, c]
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expression context | <tt>exp</tt>: | the expression | <tt>tokList</tt>: | where to store the tokens | <tt>len</tt>: | the allocated length of @list | <tt>Returns</tt>: | the number of unique strings found, -1 in case of errors and -2 if there is more than @len strings |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
|
|
Packit |
423ecb |
<tt>exp</tt>: | the expression | <tt>Returns</tt>: | 1 if nillable, 0 if not and -1 in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Indicate the maximum number of input a expression can accept
|
|
Packit |
423ecb |
<tt>expr</tt>: | a compiled expression | <tt>Returns</tt>: | the maximum length or -1 in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Get the atom associated to this name from that context
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expression context | <tt>name</tt>: | the atom name | <tt>len</tt>: | the atom name length in byte (or -1); | <tt>Returns</tt>: | the node or NULL in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Creates a new context for manipulating expressions
|
|
Packit |
423ecb |
<tt>maxNodes</tt>: | the maximum number of nodes | <tt>dict</tt>: | optional dictionary to use internally | <tt>Returns</tt>: | the context or NULL in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Get the atom associated to the choice @left | @right Note that @left and @right are consumed in the operation, to keep an handle on them use xmlExpRef() and use xmlExpFree() to release them, this is true even in case of failure (unless ctxt == NULL).
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expression context | <tt>left</tt>: | left expression | <tt>right</tt>: | right expression | <tt>Returns</tt>: | the node or NULL in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Get the atom associated to the range (@subset){@min, @max} Note that @subset is consumed in the operation, to keep an handle on it use xmlExpRef() and use xmlExpFree() to release it, this is true even in case of failure (unless ctxt == NULL).
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expression context | <tt>subset</tt>: | the expression to be repeated | <tt>min</tt>: | the lower bound for the repetition | <tt>max</tt>: | the upper bound for the repetition, -1 means infinite | <tt>Returns</tt>: | the node or NULL in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Get the atom associated to the sequence @left , @right Note that @left and @right are consumed in the operation, to keep an handle on them use xmlExpRef() and use xmlExpFree() to release them, this is true even in case of failure (unless ctxt == NULL).
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expression context | <tt>left</tt>: | left expression | <tt>right</tt>: | right expression | <tt>Returns</tt>: | the node or NULL in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Minimal parser for regexps, it understand the following constructs - string terminals - choice operator | - sequence operator , - subexpressions (...) - usual cardinality operators + * and ? - finite sequences { min, max } - infinite sequences { min, * } There is minimal checkings made especially no checking on strings values
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expressions context | <tt>expr</tt>: | the 0 terminated string | <tt>Returns</tt>: | a new expression or NULL in case of failure |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Increase the reference count of the expression
|
|
Packit |
423ecb |
<tt>exp</tt>: | the expression |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Do one step of Brzozowski derivation of the expression @exp with respect to the input string
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expression context | <tt>exp</tt>: | the expression | <tt>str</tt>: | the string | <tt>len</tt>: | the string len in bytes if available | <tt>Returns</tt>: | the resulting expression or NULL in case of internal error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Check whether @exp accepts all the languages accexpted by @sub the input being a subexpression.
|
|
Packit |
423ecb |
<tt>ctxt</tt>: | the expressions context | <tt>exp</tt>: | the englobing expression | <tt>sub</tt>: | the subexpression | <tt>Returns</tt>: | 1 if true 0 if false and -1 in case of failure. |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Extract error informations from the regexp execution, the parameter @string will be updated with the value pushed and not accepted, the parameter @values must point to an array of @nbval string pointers on return nbval will contain the number of possible strings in that state and the @values array will be updated with them. The string values
|
|
Packit |
423ecb |
<tt>exec</tt>: | a regexp execution context generating an error | <tt>string</tt>: | return value for the error string | <tt>nbval</tt>: | pointer to the number of accepted values IN/OUT | <tt>nbneg</tt>: | return number of negative transitions | <tt>values</tt>: | pointer to the array of acceptable values | <tt>terminal</tt>: | return value if this was a terminal state | <tt>Returns</tt>: | will be freed with the @exec context and don't need to be deallocated. Returns: 0 in case of success or -1 in case of error. |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Extract informations from the regexp execution, the parameter @values must point to an array of @nbval string pointers on return nbval will contain the number of possible strings in that state and the @values array will be updated with them. The string values
|
|
Packit |
423ecb |
<tt>exec</tt>: | a regexp execution context | <tt>nbval</tt>: | pointer to the number of accepted values IN/OUT | <tt>nbneg</tt>: | return number of negative transitions | <tt>values</tt>: | pointer to the array of acceptable values | <tt>terminal</tt>: | return value if this was a terminal state | <tt>Returns</tt>: | will be freed with the @exec context and don't need to be deallocated. Returns: 0 in case of success or -1 in case of error. |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Push one input token in the execution context
|
|
Packit |
423ecb |
<tt>exec</tt>: | a regexp execution context or NULL to indicate the end | <tt>value</tt>: | a string token input | <tt>data</tt>: | data associated to the token to reuse in callbacks | <tt>Returns</tt>: | 1 if the regexp reached a final state, 0 if non-final, and a negative value in case of error. |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Push one input token in the execution context
|
|
Packit |
423ecb |
<tt>exec</tt>: | a regexp execution context or NULL to indicate the end | <tt>value</tt>: | the first string token input | <tt>value2</tt>: | the second string token input | <tt>data</tt>: | data associated to the token to reuse in callbacks | <tt>Returns</tt>: | 1 if the regexp reached a final state, 0 if non-final, and a negative value in case of error. |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Free the structures associated to a regular expression evaulation context.
|
|
Packit |
423ecb |
<tt>exec</tt>: | a regular expression evaulation context |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Free a regexp
|
|
Packit |
423ecb |
<tt>regexp</tt>: | the regexp |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Build a context used for progressive evaluation of a regexp.
|
|
Packit |
423ecb |
<tt>comp</tt>: | a precompiled regular expression | <tt>callback</tt>: | a callback function used for handling progresses in the automata matching phase | <tt>data</tt>: | the context data associated to the callback in this context | <tt>Returns</tt>: | the new context |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Parses a regular expression conforming to XML Schemas Part 2 Datatype Appendix F and builds an automata suitable for testing strings against that regular expression
|
|
Packit |
423ecb |
<tt>regexp</tt>: | a regular expression string | <tt>Returns</tt>: | the compiled expression or NULL in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Check if the regular expression generates the value
|
|
Packit |
423ecb |
<tt>comp</tt>: | the compiled regular expression | <tt>content</tt>: | the value to check against the regular expression | <tt>Returns</tt>: | 1 if it matches, 0 if not and a negative value in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Check if the regular expression is determinist
|
|
Packit |
423ecb |
<tt>comp</tt>: | the compiled regular expression | <tt>Returns</tt>: | 1 if it yes, 0 if not and a negative value in case of error |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
Print the content of the compiled regular expression
|
|
Packit |
423ecb |
<tt>output</tt>: | the file for the output debug | <tt>regexp</tt>: | the compiled regexp |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
|
|
Packit |
423ecb |
</body>
|
|
Packit |
423ecb |
</html>
|