Blame bibutils/xml_encoding.c

Packit 89ede9
/*
Packit 89ede9
 * xml_getencoding.c
Packit 89ede9
 *
Packit 89ede9
 * Copyright (c) Chris Putnam 2007-2018
Packit 89ede9
 *
Packit 89ede9
 * Source code released under the GPL version 2
Packit 89ede9
 *
Packit 89ede9
 */
Packit 89ede9
#include <stdio.h>
Packit 89ede9
#include <stdlib.h>
Packit 89ede9
#include <string.h>
Packit 89ede9
#include "charsets.h"
Packit 89ede9
#include "str.h"
Packit 89ede9
#include "str_conv.h"
Packit 89ede9
#include "xml.h"
Packit 89ede9
#include "xml_encoding.h"
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
xml_getencodingr( xml *node )
Packit 89ede9
{
Packit 89ede9
	int n = CHARSET_UNKNOWN, m;
Packit 89ede9
	str *s;
Packit 89ede9
	char *t;
Packit 89ede9
Packit 89ede9
	if ( xml_tag_matches( node, "xml" ) ) {
Packit 89ede9
		s = xml_attribute( node, "encoding" );
Packit 89ede9
		if ( str_has_value( s ) ) {
Packit 89ede9
			t = str_cstr( s );
Packit 89ede9
			if ( !strcasecmp( t, "UTF-8" ) )
Packit 89ede9
				n = CHARSET_UNICODE;
Packit 89ede9
			else if ( !strcasecmp( t, "UTF8" ) )
Packit 89ede9
				n = CHARSET_UNICODE;
Packit 89ede9
			else if ( !strcasecmp( t, "GB18030" ) )
Packit 89ede9
				n = CHARSET_GB18030;
Packit 89ede9
			else n = charset_find( t );
Packit 89ede9
			if ( n==CHARSET_UNKNOWN ) {
Packit 89ede9
				fprintf( stderr, "Warning: did not recognize encoding '%s'\n", t );
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
        if ( node->down ) {
Packit 89ede9
		m = xml_getencodingr( node->down );
Packit 89ede9
		if ( m!=CHARSET_UNKNOWN ) n = m;
Packit 89ede9
	}
Packit 89ede9
        if ( node->next ) {
Packit 89ede9
		m = xml_getencodingr( node->next );
Packit 89ede9
		if ( m!=CHARSET_UNKNOWN ) n = m;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	return n;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
int
Packit 89ede9
xml_getencoding( str *s )
Packit 89ede9
{
Packit 89ede9
	int file_charset = CHARSET_UNKNOWN;
Packit 89ede9
	str descriptor;
Packit 89ede9
	xml descriptxml;
Packit 89ede9
	char *p, *q;
Packit 89ede9
Packit 89ede9
	p = strstr( str_cstr( s ), "
Packit 89ede9
	if ( !p ) p = strstr( str_cstr( s ), "
Packit 89ede9
	if ( p ) {
Packit 89ede9
		q = strstr( p, "?>" );
Packit 89ede9
		if ( q ) {
Packit 89ede9
			str_init( &descriptor );
Packit 89ede9
			str_segcpy( &descriptor, p, q+2 );
Packit 89ede9
			xml_init( &descriptxml );
Packit 89ede9
			xml_parse( str_cstr( &descriptor ), &descriptxml );
Packit 89ede9
			file_charset = xml_getencodingr( &descriptxml );
Packit 89ede9
			xml_free( &descriptxml );
Packit 89ede9
			str_free( &descriptor );
Packit 89ede9
			str_segdel( s, p, q+2 );
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
	return file_charset;
Packit 89ede9
}