Blame bibutils/xml.c

Packit 89ede9
/*
Packit 89ede9
 * xml.c
Packit 89ede9
 *
Packit 89ede9
 * Copyright (c) Chris Putnam 2004-2018
Packit 89ede9
 *
Packit 89ede9
 * Source code released under the GPL version 2
Packit 89ede9
 *
Packit 89ede9
 */
Packit 89ede9
#include <stdio.h>
Packit 89ede9
#include <stdlib.h>
Packit 89ede9
#include <string.h>
Packit 89ede9
#include "is_ws.h"
Packit 89ede9
#include "strsearch.h"
Packit 89ede9
#include "xml.h"
Packit 89ede9
Packit 89ede9
char *xml_pns = NULL;
Packit 89ede9
Packit 89ede9
void
Packit 89ede9
xml_init( xml *node )
Packit 89ede9
{
Packit 89ede9
	str_init( &(node->tag) );
Packit 89ede9
	str_init( &(node->value) );
Packit 89ede9
	slist_init( &(node->attributes) );
Packit 89ede9
	slist_init( &(node->attribute_values) );
Packit 89ede9
	node->down = NULL;
Packit 89ede9
	node->next = NULL;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static xml *
Packit 89ede9
xml_new( void )
Packit 89ede9
{
Packit 89ede9
	xml *node = ( xml * ) malloc( sizeof( xml ) );
Packit 89ede9
	if ( node ) xml_init( node );
Packit 89ede9
	return node;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static void
Packit 89ede9
xml_delete( xml *node )
Packit 89ede9
{
Packit 89ede9
	xml_free( node );
Packit 89ede9
	free( node );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
void
Packit 89ede9
xml_free( xml *node )
Packit 89ede9
{
Packit 89ede9
	str_free( &(node->tag) );
Packit 89ede9
	str_free( &(node->value) );
Packit 89ede9
	slist_free( &(node->attributes) );
Packit 89ede9
	slist_free( &(node->attribute_values) );
Packit 89ede9
	if ( node->down ) xml_delete( node->down );
Packit 89ede9
	if ( node->next ) xml_delete( node->next );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
enum {
Packit 89ede9
	XML_DESCRIPTOR,
Packit 89ede9
	XML_COMMENT,
Packit 89ede9
	XML_OPEN,
Packit 89ede9
	XML_CLOSE,
Packit 89ede9
	XML_OPENCLOSE
Packit 89ede9
};
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
xml_is_terminator( char *p, int *type )
Packit 89ede9
{
Packit 89ede9
	if ( *p=='>' ) {
Packit 89ede9
		return 1;
Packit 89ede9
	} else if ( *p=='/' && *(p+1)=='>' ) {
Packit 89ede9
		if ( *type==XML_OPENCLOSE ) return 1;
Packit 89ede9
		else if ( *type==XML_OPEN ) {
Packit 89ede9
			*type = XML_OPENCLOSE;
Packit 89ede9
			return 1;
Packit 89ede9
		}
Packit 89ede9
	} else if ( *p=='?' && *(p+1)=='>' && *type==XML_DESCRIPTOR ) {
Packit 89ede9
		return 1;
Packit 89ede9
	} else if ( *p=='!' && *(p+1)=='>' && *type==XML_COMMENT ) {
Packit 89ede9
		return 1;
Packit 89ede9
	}
Packit 89ede9
	return 0;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
xml_add_attribute( xml *node, char *attribute, char *attribute_value  )
Packit 89ede9
{
Packit 89ede9
	str *s;
Packit 89ede9
Packit 89ede9
	if ( attribute )
Packit 89ede9
		s = slist_addc( &(node->attributes), attribute );
Packit 89ede9
	else
Packit 89ede9
		s = slist_addc( &(node->attributes), "" );
Packit 89ede9
	if ( s==NULL ) return 0;
Packit 89ede9
Packit 89ede9
	if ( attribute_value )
Packit 89ede9
		s = slist_addc( &(node->attribute_values), attribute_value );
Packit 89ede9
	else
Packit 89ede9
		s = slist_addc( &(node->attribute_values), "" );
Packit 89ede9
	if ( s==NULL ) {
Packit 89ede9
		(void) slist_remove( &(node->attributes), node->attributes.n-1 );
Packit 89ede9
		return 0;
Packit 89ede9
	}
Packit 89ede9
	return 1;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static char *
Packit 89ede9
xml_processattrib( char *p, xml *node, int *type )
Packit 89ede9
{
Packit 89ede9
	char quote_character = '\"';
Packit 89ede9
	int inquotes = 0;
Packit 89ede9
	str aname, aval;
Packit 89ede9
Packit 89ede9
	str_init( &aname );
Packit 89ede9
	str_init( &aval );
Packit 89ede9
Packit 89ede9
	while ( *p && !xml_is_terminator( p, type ) ) {
Packit 89ede9
Packit 89ede9
		/* get attribute name */
Packit 89ede9
		while ( *p==' ' || *p=='\t' ) p++;
Packit 89ede9
		while ( *p && !strchr( "= \t", *p ) && !xml_is_terminator( p, type ) ){
Packit 89ede9
			str_addchar( &aname, *p );
Packit 89ede9
			p++;
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
		/* equals sign */
Packit 89ede9
		while ( *p==' ' || *p=='\t' ) p++;
Packit 89ede9
		if ( *p=='=' ) p++;
Packit 89ede9
		while ( *p==' ' || *p=='\t' ) p++;
Packit 89ede9
Packit 89ede9
		/* get attribute value */
Packit 89ede9
		if ( *p=='\"' || *p=='\'' ) {
Packit 89ede9
			if ( *p=='\'' ) quote_character = *p;
Packit 89ede9
			inquotes=1;
Packit 89ede9
			p++;
Packit 89ede9
		}
Packit 89ede9
		while ( *p && ((!xml_is_terminator(p,type) && !strchr("= \t", *p ))||inquotes)){
Packit 89ede9
			if ( *p==quote_character ) inquotes=0;
Packit 89ede9
			else str_addchar( &aval, *p );
Packit 89ede9
			p++;
Packit 89ede9
		}
Packit 89ede9
		if ( str_has_value( &aname ) ) {
Packit 89ede9
			xml_add_attribute( node, str_cstr( &aname ), str_cstr( &aval ) );
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
		str_empty( &aname );
Packit 89ede9
		str_empty( &aval );
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	str_free( &aname );
Packit 89ede9
	str_free( &aval );
Packit 89ede9
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*
Packit 89ede9
 * xml_processtag
Packit 89ede9
 *
Packit 89ede9
 *                        start right after '<'
Packit 89ede9
 *                        *
Packit 89ede9
 *      XML_COMMENT      
Packit 89ede9
 * 	XML_DESCRIPTOR   
Packit 89ede9
 * 	XML_OPEN         
Packit 89ede9
 * 	XML_CLOSE        
Packit 89ede9
 * 	XML_OPENCLOSE    
Packit 89ede9
 */
Packit 89ede9
static char *
Packit 89ede9
xml_processtag( char *p, xml *node, int *type )
Packit 89ede9
{
Packit 89ede9
	str tag;
Packit 89ede9
Packit 89ede9
	str_init( &tag );
Packit 89ede9
Packit 89ede9
	if ( *p=='!' ) {
Packit 89ede9
		*type = XML_COMMENT;
Packit 89ede9
		while ( *p && *p!='>' ) p++;
Packit 89ede9
	}
Packit 89ede9
	else if ( *p=='?' ) {
Packit 89ede9
		*type = XML_DESCRIPTOR;
Packit 89ede9
		p++; /* skip '?' */
Packit 89ede9
		while ( *p && !strchr( " \t", *p ) && !xml_is_terminator(p,type) )
Packit 89ede9
			str_addchar( &tag, *p++ );
Packit 89ede9
		if ( *p==' ' || *p=='\t' )
Packit 89ede9
			p = xml_processattrib( p, node, type );
Packit 89ede9
	}
Packit 89ede9
	else if ( *p=='/' ) {
Packit 89ede9
		*type = XML_CLOSE;
Packit 89ede9
		while ( *p && !strchr( " \t", *p ) && !xml_is_terminator(p,type) )
Packit 89ede9
			str_addchar( &tag, *p++ );
Packit 89ede9
		if ( *p==' ' || *p=='\t' ) 
Packit 89ede9
			p = xml_processattrib( p, node, type );
Packit 89ede9
	}
Packit 89ede9
	else {
Packit 89ede9
		*type = XML_OPEN;
Packit 89ede9
		while ( *p && !strchr( " \t", *p ) && !xml_is_terminator(p,type) )
Packit 89ede9
			str_addchar( &tag, *p++ );
Packit 89ede9
		if ( *p==' ' || *p=='\t' ) 
Packit 89ede9
			p = xml_processattrib( p, node, type );
Packit 89ede9
	}
Packit 89ede9
	while ( *p && *p!='>' ) p++;
Packit 89ede9
	if ( *p=='>' ) p++;
Packit 89ede9
Packit 89ede9
	str_strcpy( &(node->tag), &tag );
Packit 89ede9
Packit 89ede9
	str_free( &tag );
Packit 89ede9
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static void
Packit 89ede9
xml_appendnode( xml *onode, xml *nnode )
Packit 89ede9
{
Packit 89ede9
	if ( !onode->down ) onode->down = nnode;
Packit 89ede9
	else {
Packit 89ede9
		xml *p = onode->down;
Packit 89ede9
		while ( p->next ) p = p->next;
Packit 89ede9
		p->next = nnode;
Packit 89ede9
	}
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
char *
Packit 89ede9
xml_parse( char *p, xml *onode )
Packit 89ede9
{
Packit 89ede9
	int type, is_style = 0;
Packit 89ede9
	xml *nnode;
Packit 89ede9
Packit 89ede9
	while ( *p ) {
Packit 89ede9
Packit 89ede9
		/* retain white space for <style> tags in endnote xml */
Packit 89ede9
		if ( str_cstr( &(onode->tag) ) &&
Packit 89ede9
			!strcasecmp( str_cstr( &(onode->tag) ),"style") ) is_style=1;
Packit 89ede9
Packit 89ede9
		while ( *p && *p!='<' ) {
Packit 89ede9
			if ( onode->value.len>0 || is_style || !is_ws( *p ) )
Packit 89ede9
				str_addchar( &(onode->value), *p );
Packit 89ede9
			p++;
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
		if ( *p=='<' ) {
Packit 89ede9
			nnode = xml_new();
Packit 89ede9
			p = xml_processtag( p+1, nnode, &type );
Packit 89ede9
			if ( type==XML_OPEN || type==XML_OPENCLOSE || type==XML_DESCRIPTOR ) {
Packit 89ede9
				xml_appendnode( onode, nnode );
Packit 89ede9
				if ( type==XML_OPEN )
Packit 89ede9
					p = xml_parse( p, nnode );
Packit 89ede9
			} else if ( type==XML_CLOSE ) {
Packit 89ede9
				/*check to see if it's closing for this one*/
Packit 89ede9
				xml_delete( nnode );
Packit 89ede9
				goto out; /* assume it's right for now */
Packit 89ede9
			} else {
Packit 89ede9
				xml_delete( nnode );
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
void
Packit 89ede9
xml_draw( xml *node, int n )
Packit 89ede9
{
Packit 89ede9
	slist_index j;
Packit 89ede9
	int i;
Packit 89ede9
Packit 89ede9
	if ( !node ) return;
Packit 89ede9
Packit 89ede9
	for ( i=0; i
Packit 89ede9
Packit 89ede9
	printf("n=%d tag='%s' value='%s'\n", n, str_cstr( &(node->tag) ), str_cstr( &(node->value) ) );
Packit 89ede9
Packit 89ede9
	for ( j=0; j<node->attributes.n; ++j ) {
Packit 89ede9
		for ( i=0; i
Packit 89ede9
		printf( "    attribute='%s' value='%s'\n",
Packit 89ede9
			slist_cstr( &(node->attributes), j ),
Packit 89ede9
			slist_cstr( &(node->attribute_values), j )
Packit 89ede9
		);
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	if ( node->down ) xml_draw( node->down, n+1 );
Packit 89ede9
	if ( node->next ) xml_draw( node->next, n );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
char *
Packit 89ede9
xml_find_start( char *buffer, char *tag )
Packit 89ede9
{
Packit 89ede9
	str starttag;
Packit 89ede9
	char *p;
Packit 89ede9
Packit 89ede9
	str_initstrsc( &starttag, "<", tag, " ", NULL );
Packit 89ede9
Packit 89ede9
	p = strsearch( buffer, str_cstr( &starttag ) );
Packit 89ede9
	if ( !p ) {
Packit 89ede9
		starttag.data[ starttag.len-1 ] = '>';
Packit 89ede9
		p = strsearch( buffer, str_cstr( &starttag ) );
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	str_free( &starttag );
Packit 89ede9
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
char *
Packit 89ede9
xml_find_end( char *buffer, char *tag )
Packit 89ede9
{
Packit 89ede9
	str endtag;
Packit 89ede9
	char *p;
Packit 89ede9
Packit 89ede9
	if ( xml_pns )
Packit 89ede9
		str_initstrsc( &endtag, "</", xml_pns, ":", tag, ">", NULL );
Packit 89ede9
	else
Packit 89ede9
		str_initstrsc( &endtag, "</", tag, ">", NULL );
Packit 89ede9
Packit 89ede9
	p = strsearch( buffer, str_cstr( &endtag ) );
Packit 89ede9
	if ( p && *p ) {
Packit 89ede9
		if ( *p ) p++;  /* skip <random_tag></end> combo */
Packit 89ede9
		while ( *p && *(p-1)!='>' ) p++;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	str_free( &endtag );
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
xml_tag_matches_simple( xml* node, const char *tag )
Packit 89ede9
{
Packit 89ede9
	if ( node->tag.len!=strlen( tag ) ) return 0;
Packit 89ede9
	if ( strcasecmp( str_cstr( &(node->tag) ), tag ) ) return 0;
Packit 89ede9
	return 1;
Packit 89ede9
}
Packit 89ede9
static int
Packit 89ede9
xml_tag_matches_pns( xml* node, const char *tag )
Packit 89ede9
{
Packit 89ede9
	int found = 0;
Packit 89ede9
	str pnstag;
Packit 89ede9
Packit 89ede9
	str_initstrsc( &pnstag, xml_pns, ":", tag, NULL );
Packit 89ede9
	if ( node->tag.len==pnstag.len &&
Packit 89ede9
			!strcasecmp( str_cstr( &(node->tag) ), str_cstr( &pnstag ) ) )
Packit 89ede9
		found = 1;
Packit 89ede9
	str_free( &pnstag );
Packit 89ede9
Packit 89ede9
	return found;
Packit 89ede9
}
Packit 89ede9
int
Packit 89ede9
xml_tag_matches( xml *node, const char *tag )
Packit 89ede9
{
Packit 89ede9
	if ( xml_pns ) return xml_tag_matches_pns   ( node, tag );
Packit 89ede9
	else           return xml_tag_matches_simple( node, tag );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
int
Packit 89ede9
xml_tag_matches_has_value( xml *node, const char *tag )
Packit 89ede9
{
Packit 89ede9
	if ( xml_tag_matches( node, tag ) && xml_has_value( node ) ) return 1;
Packit 89ede9
	return 0;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
int
Packit 89ede9
xml_has_attribute( xml *node, const char *attribute, const char *attribute_value )
Packit 89ede9
{
Packit 89ede9
	slist_index i;
Packit 89ede9
	char *a, *v;
Packit 89ede9
Packit 89ede9
	for ( i=0; i<node->attributes.n; ++i ) {
Packit 89ede9
		a = slist_cstr( &(node->attributes), i );
Packit 89ede9
		v = slist_cstr( &(node->attribute_values), i );
Packit 89ede9
		if ( !a || !v ) continue;
Packit 89ede9
		if ( !strcasecmp( a, attribute ) && !strcasecmp( v, attribute_value ) )
Packit 89ede9
			return 1;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	return 0;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
int
Packit 89ede9
xml_tag_has_attribute( xml *node, const char *tag, const char *attribute, const char *attribute_value )
Packit 89ede9
{
Packit 89ede9
	if ( !xml_tag_matches( node, tag ) ) return 0;
Packit 89ede9
	return xml_has_attribute( node, attribute, attribute_value );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
str *
Packit 89ede9
xml_attribute( xml *node, const char *attribute )
Packit 89ede9
{
Packit 89ede9
	slist_index n;
Packit 89ede9
Packit 89ede9
	n = slist_findc( &(node->attributes), attribute );
Packit 89ede9
	if ( slist_wasnotfound( &(node->attributes), n ) ) return NULL;
Packit 89ede9
	else return slist_str( &(node->attribute_values), n );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
int
Packit 89ede9
xml_has_value( xml *node )
Packit 89ede9
{
Packit 89ede9
	if ( node && str_has_value( &(node->value) ) ) return 1;
Packit 89ede9
	return 0;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
str *
Packit 89ede9
xml_tag( xml *node )
Packit 89ede9
{
Packit 89ede9
	return &(node->tag);
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
char *
Packit 89ede9
xml_tag_cstr( xml *node )
Packit 89ede9
{
Packit 89ede9
	return str_cstr( &(node->tag) );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
str *
Packit 89ede9
xml_value( xml *node )
Packit 89ede9
{
Packit 89ede9
	return &(node->value);
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
char *
Packit 89ede9
xml_value_cstr( xml *node )
Packit 89ede9
{
Packit 89ede9
	return str_cstr( &(node->value) );
Packit 89ede9
}