Blame doc/example.html

Packit 423ecb
Packit 423ecb
Packit 423ecb
<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><link rel="SHORTCUT ICON" href="/favicon.ico" /><style type="text/css">
Packit 423ecb
TD {font-family: Verdana,Arial,Helvetica}
Packit 423ecb
BODY {font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em}
Packit 423ecb
H1 {font-family: Verdana,Arial,Helvetica}
Packit 423ecb
H2 {font-family: Verdana,Arial,Helvetica}
Packit 423ecb
H3 {font-family: Verdana,Arial,Helvetica}
Packit 423ecb
A:link, A:visited, A:active { text-decoration: underline }
Packit 423ecb
</style><title>A real example</title></head><body bgcolor="#8b7765" text="#000000" link="#a06060" vlink="#000000">
Action against software patentsGnome2 LogoW3C LogoRed Hat Logo
Made with Libxml2 Logo

The XML C parser and toolkit of Gnome

A real example

<center>Developer Menu</center>
<form action="search.php" enctype="application/x-www-form-urlencoded" method="get"><input name="query" type="text" size="20" value="" /><input name="submit" type="submit" value="Search ..." /></form>
<center>API Indexes</center>
<center>Related links</center>

Here is a real size example, where the actual content of the application

Packit 423ecb
data is not kept in the DOM tree but uses internal structures. It is based on
Packit 423ecb
a proposal to keep a database of jobs related to Gnome, with an XML based
Packit 423ecb
storage structure. Here is an XML encoded jobs
Packit 423ecb
base:

<?xml version="1.0"?>
Packit 423ecb
<gjob:Helping xmlns:gjob="http://www.gnome.org/some-location">
Packit 423ecb
  <gjob:Jobs>
Packit 423ecb
Packit 423ecb
    <gjob:Job>
Packit 423ecb
      <gjob:Project ID="3"/>
Packit 423ecb
      <gjob:Application>GBackup</gjob:Application>
Packit 423ecb
      <gjob:Category>Development</gjob:Category>
Packit 423ecb
Packit 423ecb
      <gjob:Update>
Packit 423ecb
        <gjob:Status>Open</gjob:Status>
Packit 423ecb
        <gjob:Modified>Mon, 07 Jun 1999 20:27:45 -0400 MET DST</gjob:Modified>
Packit 423ecb
        <gjob:Salary>USD 0.00</gjob:Salary>
Packit 423ecb
      </gjob:Update>
Packit 423ecb
Packit 423ecb
      <gjob:Developers>
Packit 423ecb
        <gjob:Developer>
Packit 423ecb
        </gjob:Developer>
Packit 423ecb
      </gjob:Developers>
Packit 423ecb
Packit 423ecb
      <gjob:Contact>
Packit 423ecb
        <gjob:Person>Nathan Clemons</gjob:Person>
Packit 423ecb
        <gjob:Email>nathan@windsofstorm.net</gjob:Email>
Packit 423ecb
        <gjob:Company>
Packit 423ecb
        </gjob:Company>
Packit 423ecb
        <gjob:Organisation>
Packit 423ecb
        </gjob:Organisation>
Packit 423ecb
        <gjob:Webpage>
Packit 423ecb
        </gjob:Webpage>
Packit 423ecb
        <gjob:Snailmail>
Packit 423ecb
        </gjob:Snailmail>
Packit 423ecb
        <gjob:Phone>
Packit 423ecb
        </gjob:Phone>
Packit 423ecb
      </gjob:Contact>
Packit 423ecb
Packit 423ecb
      <gjob:Requirements>
Packit 423ecb
      The program should be released as free software, under the GPL.
Packit 423ecb
      </gjob:Requirements>
Packit 423ecb
Packit 423ecb
      <gjob:Skills>
Packit 423ecb
      </gjob:Skills>
Packit 423ecb
Packit 423ecb
      <gjob:Details>
Packit 423ecb
      A GNOME based system that will allow a superuser to configure 
Packit 423ecb
      compressed and uncompressed files and/or file systems to be backed 
Packit 423ecb
      up with a supported media in the system.  This should be able to 
Packit 423ecb
      perform via find commands generating a list of files that are passed 
Packit 423ecb
      to tar, dd, cpio, cp, gzip, etc., to be directed to the tape machine 
Packit 423ecb
      or via operations performed on the filesystem itself. Email 
Packit 423ecb
      notification and GUI status display very important.
Packit 423ecb
      </gjob:Details>
Packit 423ecb
Packit 423ecb
    </gjob:Job>
Packit 423ecb
Packit 423ecb
  </gjob:Jobs>
Packit 423ecb
</gjob:Helping>

While loading the XML file into an internal DOM tree is a matter of

Packit 423ecb
calling only a couple of functions, browsing the tree to gather the data and
Packit 423ecb
generate the internal structures is harder, and more error prone.

The suggested principle is to be tolerant with respect to the input

Packit 423ecb
structure. For example, the ordering of the attributes is not significant,
Packit 423ecb
the XML specification is clear about it. It's also usually a good idea not to
Packit 423ecb
depend on the order of the children of a given node, unless it really makes
Packit 423ecb
things harder. Here is some code to parse the information for a person:

/*
Packit 423ecb
 * A person record
Packit 423ecb
 */
Packit 423ecb
typedef struct person {
Packit 423ecb
    char *name;
Packit 423ecb
    char *email;
Packit 423ecb
    char *company;
Packit 423ecb
    char *organisation;
Packit 423ecb
    char *smail;
Packit 423ecb
    char *webPage;
Packit 423ecb
    char *phone;
Packit 423ecb
} person, *personPtr;
Packit 423ecb
Packit 423ecb
/*
Packit 423ecb
 * And the code needed to parse it
Packit 423ecb
 */
Packit 423ecb
personPtr parsePerson(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) {
Packit 423ecb
    personPtr ret = NULL;
Packit 423ecb
Packit 423ecb
DEBUG("parsePerson\n");
Packit 423ecb
    /*
Packit 423ecb
     * allocate the struct
Packit 423ecb
     */
Packit 423ecb
    ret = (personPtr) malloc(sizeof(person));
Packit 423ecb
    if (ret == NULL) {
Packit 423ecb
        fprintf(stderr,"out of memory\n");
Packit 423ecb
        return(NULL);
Packit 423ecb
    }
Packit 423ecb
    memset(ret, 0, sizeof(person));
Packit 423ecb
Packit 423ecb
    /* We don't care what the top level element name is */
Packit 423ecb
    cur = cur->xmlChildrenNode;
Packit 423ecb
    while (cur != NULL) {
Packit 423ecb
        if ((!strcmp(cur->name, "Person")) && (cur->ns == ns))
Packit 423ecb
            ret->name = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
Packit 423ecb
        if ((!strcmp(cur->name, "Email")) && (cur->ns == ns))
Packit 423ecb
            ret->email = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
Packit 423ecb
        cur = cur->next;
Packit 423ecb
    }
Packit 423ecb
Packit 423ecb
    return(ret);
Packit 423ecb
}

Here are a couple of things to notice:

    Packit 423ecb
      
  • Usually a recursive parsing style is the more convenient one: XML data
  • Packit 423ecb
        is by nature subject to repetitive constructs and usually exhibits highly
    Packit 423ecb
        structured patterns.
    Packit 423ecb
      
  • The two arguments of type xmlDocPtr and xmlNsPtr,
  • Packit 423ecb
        i.e. the pointer to the global XML document and the namespace reserved to
    Packit 423ecb
        the application. Document wide information are needed for example to
    Packit 423ecb
        decode entities and it's a good coding practice to define a namespace for
    Packit 423ecb
        your application set of data and test that the element and attributes
    Packit 423ecb
        you're analyzing actually pertains to your application space. This is
    Packit 423ecb
        done by a simple equality test (cur->ns == ns).
    Packit 423ecb
      
  • To retrieve text and attributes value, you can use the function
  • Packit 423ecb
        xmlNodeListGetString to gather all the text and entity reference
    Packit 423ecb
        nodes generated by the DOM output and produce an single text string.
    Packit 423ecb

    Here is another piece of code used to parse another level of the

    Packit 423ecb
    structure:

    #include <libxml/tree.h>
    Packit 423ecb
    /*
    Packit 423ecb
     * a Description for a Job
    Packit 423ecb
     */
    Packit 423ecb
    typedef struct job {
    Packit 423ecb
        char *projectID;
    Packit 423ecb
        char *application;
    Packit 423ecb
        char *category;
    Packit 423ecb
        personPtr contact;
    Packit 423ecb
        int nbDevelopers;
    Packit 423ecb
        personPtr developers[100]; /* using dynamic alloc is left as an exercise */
    Packit 423ecb
    } job, *jobPtr;
    Packit 423ecb
    Packit 423ecb
    /*
    Packit 423ecb
     * And the code needed to parse it
    Packit 423ecb
     */
    Packit 423ecb
    jobPtr parseJob(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) {
    Packit 423ecb
        jobPtr ret = NULL;
    Packit 423ecb
    Packit 423ecb
    DEBUG("parseJob\n");
    Packit 423ecb
        /*
    Packit 423ecb
         * allocate the struct
    Packit 423ecb
         */
    Packit 423ecb
        ret = (jobPtr) malloc(sizeof(job));
    Packit 423ecb
        if (ret == NULL) {
    Packit 423ecb
            fprintf(stderr,"out of memory\n");
    Packit 423ecb
            return(NULL);
    Packit 423ecb
        }
    Packit 423ecb
        memset(ret, 0, sizeof(job));
    Packit 423ecb
    Packit 423ecb
        /* We don't care what the top level element name is */
    Packit 423ecb
        cur = cur->xmlChildrenNode;
    Packit 423ecb
        while (cur != NULL) {
    Packit 423ecb
            
    Packit 423ecb
            if ((!strcmp(cur->name, "Project")) && (cur->ns == ns)) {
    Packit 423ecb
                ret->projectID = xmlGetProp(cur, "ID");
    Packit 423ecb
                if (ret->projectID == NULL) {
    Packit 423ecb
                    fprintf(stderr, "Project has no ID\n");
    Packit 423ecb
                }
    Packit 423ecb
            }
    Packit 423ecb
            if ((!strcmp(cur->name, "Application")) && (cur->ns == ns))
    Packit 423ecb
                ret->application = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
    Packit 423ecb
            if ((!strcmp(cur->name, "Category")) && (cur->ns == ns))
    Packit 423ecb
                ret->category = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
    Packit 423ecb
            if ((!strcmp(cur->name, "Contact")) && (cur->ns == ns))
    Packit 423ecb
                ret->contact = parsePerson(doc, ns, cur);
    Packit 423ecb
            cur = cur->next;
    Packit 423ecb
        }
    Packit 423ecb
    Packit 423ecb
        return(ret);
    Packit 423ecb
    }

    Once you are used to it, writing this kind of code is quite simple, but

    Packit 423ecb
    boring. Ultimately, it could be possible to write stubbers taking either C
    Packit 423ecb
    data structure definitions, a set of XML examples or an XML DTD and produce
    Packit 423ecb
    the code needed to import and export the content between C data and XML
    Packit 423ecb
    storage. This is left as an exercise to the reader :-)

    Feel free to use the code for the full C

    Packit 423ecb
    parsing example as a template, it is also available with Makefile in the
    Packit 423ecb
    Gnome SVN base under libxml2/example

    Daniel Veillard

    </body></html>