/*
* xml.c
*
* Copyright (c) Chris Putnam 2004-2018
*
* Source code released under the GPL version 2
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "is_ws.h"
#include "strsearch.h"
#include "xml.h"
char *xml_pns = NULL;
void
xml_init( xml *node )
{
str_init( &(node->tag) );
str_init( &(node->value) );
slist_init( &(node->attributes) );
slist_init( &(node->attribute_values) );
node->down = NULL;
node->next = NULL;
}
static xml *
xml_new( void )
{
xml *node = ( xml * ) malloc( sizeof( xml ) );
if ( node ) xml_init( node );
return node;
}
static void
xml_delete( xml *node )
{
xml_free( node );
free( node );
}
void
xml_free( xml *node )
{
str_free( &(node->tag) );
str_free( &(node->value) );
slist_free( &(node->attributes) );
slist_free( &(node->attribute_values) );
if ( node->down ) xml_delete( node->down );
if ( node->next ) xml_delete( node->next );
}
enum {
XML_DESCRIPTOR,
XML_COMMENT,
XML_OPEN,
XML_CLOSE,
XML_OPENCLOSE
};
static int
xml_is_terminator( char *p, int *type )
{
if ( *p=='>' ) {
return 1;
} else if ( *p=='/' && *(p+1)=='>' ) {
if ( *type==XML_OPENCLOSE ) return 1;
else if ( *type==XML_OPEN ) {
*type = XML_OPENCLOSE;
return 1;
}
} else if ( *p=='?' && *(p+1)=='>' && *type==XML_DESCRIPTOR ) {
return 1;
} else if ( *p=='!' && *(p+1)=='>' && *type==XML_COMMENT ) {
return 1;
}
return 0;
}
static int
xml_add_attribute( xml *node, char *attribute, char *attribute_value )
{
str *s;
if ( attribute )
s = slist_addc( &(node->attributes), attribute );
else
s = slist_addc( &(node->attributes), "" );
if ( s==NULL ) return 0;
if ( attribute_value )
s = slist_addc( &(node->attribute_values), attribute_value );
else
s = slist_addc( &(node->attribute_values), "" );
if ( s==NULL ) {
(void) slist_remove( &(node->attributes), node->attributes.n-1 );
return 0;
}
return 1;
}
static char *
xml_processattrib( char *p, xml *node, int *type )
{
char quote_character = '\"';
int inquotes = 0;
str aname, aval;
str_init( &aname );
str_init( &aval );
while ( *p && !xml_is_terminator( p, type ) ) {
/* get attribute name */
while ( *p==' ' || *p=='\t' ) p++;
while ( *p && !strchr( "= \t", *p ) && !xml_is_terminator( p, type ) ){
str_addchar( &aname, *p );
p++;
}
/* equals sign */
while ( *p==' ' || *p=='\t' ) p++;
if ( *p=='=' ) p++;
while ( *p==' ' || *p=='\t' ) p++;
/* get attribute value */
if ( *p=='\"' || *p=='\'' ) {
if ( *p=='\'' ) quote_character = *p;
inquotes=1;
p++;
}
while ( *p && ((!xml_is_terminator(p,type) && !strchr("= \t", *p ))||inquotes)){
if ( *p==quote_character ) inquotes=0;
else str_addchar( &aval, *p );
p++;
}
if ( str_has_value( &aname ) ) {
xml_add_attribute( node, str_cstr( &aname ), str_cstr( &aval ) );
}
str_empty( &aname );
str_empty( &aval );
}
str_free( &aname );
str_free( &aval );
return p;
}
/*
* xml_processtag
*
* start right after '<'
* *
* XML_COMMENT <!-- .... -->
* XML_DESCRIPTOR <?.....>
* XML_OPEN <A>
* XML_CLOSE </A>
* XML_OPENCLOSE <A/>
*/
static char *
xml_processtag( char *p, xml *node, int *type )
{
str tag;
str_init( &tag );
if ( *p=='!' ) {
*type = XML_COMMENT;
while ( *p && *p!='>' ) p++;
}
else if ( *p=='?' ) {
*type = XML_DESCRIPTOR;
p++; /* skip '?' */
while ( *p && !strchr( " \t", *p ) && !xml_is_terminator(p,type) )
str_addchar( &tag, *p++ );
if ( *p==' ' || *p=='\t' )
p = xml_processattrib( p, node, type );
}
else if ( *p=='/' ) {
*type = XML_CLOSE;
while ( *p && !strchr( " \t", *p ) && !xml_is_terminator(p,type) )
str_addchar( &tag, *p++ );
if ( *p==' ' || *p=='\t' )
p = xml_processattrib( p, node, type );
}
else {
*type = XML_OPEN;
while ( *p && !strchr( " \t", *p ) && !xml_is_terminator(p,type) )
str_addchar( &tag, *p++ );
if ( *p==' ' || *p=='\t' )
p = xml_processattrib( p, node, type );
}
while ( *p && *p!='>' ) p++;
if ( *p=='>' ) p++;
str_strcpy( &(node->tag), &tag );
str_free( &tag );
return p;
}
static void
xml_appendnode( xml *onode, xml *nnode )
{
if ( !onode->down ) onode->down = nnode;
else {
xml *p = onode->down;
while ( p->next ) p = p->next;
p->next = nnode;
}
}
char *
xml_parse( char *p, xml *onode )
{
int type, is_style = 0;
xml *nnode;
while ( *p ) {
/* retain white space for <style> tags in endnote xml */
if ( str_cstr( &(onode->tag) ) &&
!strcasecmp( str_cstr( &(onode->tag) ),"style") ) is_style=1;
while ( *p && *p!='<' ) {
if ( onode->value.len>0 || is_style || !is_ws( *p ) )
str_addchar( &(onode->value), *p );
p++;
}
if ( *p=='<' ) {
nnode = xml_new();
p = xml_processtag( p+1, nnode, &type );
if ( type==XML_OPEN || type==XML_OPENCLOSE || type==XML_DESCRIPTOR ) {
xml_appendnode( onode, nnode );
if ( type==XML_OPEN )
p = xml_parse( p, nnode );
} else if ( type==XML_CLOSE ) {
/*check to see if it's closing for this one*/
xml_delete( nnode );
goto out; /* assume it's right for now */
} else {
xml_delete( nnode );
}
}
}
out:
return p;
}
void
xml_draw( xml *node, int n )
{
slist_index j;
int i;
if ( !node ) return;
for ( i=0; i<n; ++i ) printf( " " );
printf("n=%d tag='%s' value='%s'\n", n, str_cstr( &(node->tag) ), str_cstr( &(node->value) ) );
for ( j=0; j<node->attributes.n; ++j ) {
for ( i=0; i<n; ++i ) printf( " " );
printf( " attribute='%s' value='%s'\n",
slist_cstr( &(node->attributes), j ),
slist_cstr( &(node->attribute_values), j )
);
}
if ( node->down ) xml_draw( node->down, n+1 );
if ( node->next ) xml_draw( node->next, n );
}
char *
xml_find_start( char *buffer, char *tag )
{
str starttag;
char *p;
str_initstrsc( &starttag, "<", tag, " ", NULL );
p = strsearch( buffer, str_cstr( &starttag ) );
if ( !p ) {
starttag.data[ starttag.len-1 ] = '>';
p = strsearch( buffer, str_cstr( &starttag ) );
}
str_free( &starttag );
return p;
}
char *
xml_find_end( char *buffer, char *tag )
{
str endtag;
char *p;
if ( xml_pns )
str_initstrsc( &endtag, "</", xml_pns, ":", tag, ">", NULL );
else
str_initstrsc( &endtag, "</", tag, ">", NULL );
p = strsearch( buffer, str_cstr( &endtag ) );
if ( p && *p ) {
if ( *p ) p++; /* skip <random_tag></end> combo */
while ( *p && *(p-1)!='>' ) p++;
}
str_free( &endtag );
return p;
}
static int
xml_tag_matches_simple( xml* node, const char *tag )
{
if ( node->tag.len!=strlen( tag ) ) return 0;
if ( strcasecmp( str_cstr( &(node->tag) ), tag ) ) return 0;
return 1;
}
static int
xml_tag_matches_pns( xml* node, const char *tag )
{
int found = 0;
str pnstag;
str_initstrsc( &pnstag, xml_pns, ":", tag, NULL );
if ( node->tag.len==pnstag.len &&
!strcasecmp( str_cstr( &(node->tag) ), str_cstr( &pnstag ) ) )
found = 1;
str_free( &pnstag );
return found;
}
int
xml_tag_matches( xml *node, const char *tag )
{
if ( xml_pns ) return xml_tag_matches_pns ( node, tag );
else return xml_tag_matches_simple( node, tag );
}
int
xml_tag_matches_has_value( xml *node, const char *tag )
{
if ( xml_tag_matches( node, tag ) && xml_has_value( node ) ) return 1;
return 0;
}
int
xml_has_attribute( xml *node, const char *attribute, const char *attribute_value )
{
slist_index i;
char *a, *v;
for ( i=0; i<node->attributes.n; ++i ) {
a = slist_cstr( &(node->attributes), i );
v = slist_cstr( &(node->attribute_values), i );
if ( !a || !v ) continue;
if ( !strcasecmp( a, attribute ) && !strcasecmp( v, attribute_value ) )
return 1;
}
return 0;
}
int
xml_tag_has_attribute( xml *node, const char *tag, const char *attribute, const char *attribute_value )
{
if ( !xml_tag_matches( node, tag ) ) return 0;
return xml_has_attribute( node, attribute, attribute_value );
}
str *
xml_attribute( xml *node, const char *attribute )
{
slist_index n;
n = slist_findc( &(node->attributes), attribute );
if ( slist_wasnotfound( &(node->attributes), n ) ) return NULL;
else return slist_str( &(node->attribute_values), n );
}
int
xml_has_value( xml *node )
{
if ( node && str_has_value( &(node->value) ) ) return 1;
return 0;
}
str *
xml_tag( xml *node )
{
return &(node->tag);
}
char *
xml_tag_cstr( xml *node )
{
return str_cstr( &(node->tag) );
}
str *
xml_value( xml *node )
{
return &(node->value);
}
char *
xml_value_cstr( xml *node )
{
return str_cstr( &(node->value) );
}