|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* xml_getencoding.c
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Copyright (c) Chris Putnam 2007-2018
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Source code released under the GPL version 2
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
#include <stdio.h>
|
|
Packit |
89ede9 |
#include <stdlib.h>
|
|
Packit |
89ede9 |
#include <string.h>
|
|
Packit |
89ede9 |
#include "charsets.h"
|
|
Packit |
89ede9 |
#include "str.h"
|
|
Packit |
89ede9 |
#include "str_conv.h"
|
|
Packit |
89ede9 |
#include "xml.h"
|
|
Packit |
89ede9 |
#include "xml_encoding.h"
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
xml_getencodingr( xml *node )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int n = CHARSET_UNKNOWN, m;
|
|
Packit |
89ede9 |
str *s;
|
|
Packit |
89ede9 |
char *t;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "xml" ) ) {
|
|
Packit |
89ede9 |
s = xml_attribute( node, "encoding" );
|
|
Packit |
89ede9 |
if ( str_has_value( s ) ) {
|
|
Packit |
89ede9 |
t = str_cstr( s );
|
|
Packit |
89ede9 |
if ( !strcasecmp( t, "UTF-8" ) )
|
|
Packit |
89ede9 |
n = CHARSET_UNICODE;
|
|
Packit |
89ede9 |
else if ( !strcasecmp( t, "UTF8" ) )
|
|
Packit |
89ede9 |
n = CHARSET_UNICODE;
|
|
Packit |
89ede9 |
else if ( !strcasecmp( t, "GB18030" ) )
|
|
Packit |
89ede9 |
n = CHARSET_GB18030;
|
|
Packit |
89ede9 |
else n = charset_find( t );
|
|
Packit |
89ede9 |
if ( n==CHARSET_UNKNOWN ) {
|
|
Packit |
89ede9 |
fprintf( stderr, "Warning: did not recognize encoding '%s'\n", t );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->down ) {
|
|
Packit |
89ede9 |
m = xml_getencodingr( node->down );
|
|
Packit |
89ede9 |
if ( m!=CHARSET_UNKNOWN ) n = m;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) {
|
|
Packit |
89ede9 |
m = xml_getencodingr( node->next );
|
|
Packit |
89ede9 |
if ( m!=CHARSET_UNKNOWN ) n = m;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return n;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
int
|
|
Packit |
89ede9 |
xml_getencoding( str *s )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int file_charset = CHARSET_UNKNOWN;
|
|
Packit |
89ede9 |
str descriptor;
|
|
Packit |
89ede9 |
xml descriptxml;
|
|
Packit |
89ede9 |
char *p, *q;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
p = strstr( str_cstr( s ), "
|
|
Packit |
89ede9 |
if ( !p ) p = strstr( str_cstr( s ), "
|
|
Packit |
89ede9 |
if ( p ) {
|
|
Packit |
89ede9 |
q = strstr( p, "?>" );
|
|
Packit |
89ede9 |
if ( q ) {
|
|
Packit |
89ede9 |
str_init( &descriptor );
|
|
Packit |
89ede9 |
str_segcpy( &descriptor, p, q+2 );
|
|
Packit |
89ede9 |
xml_init( &descriptxml );
|
|
Packit |
89ede9 |
xml_parse( str_cstr( &descriptor ), &descriptxml );
|
|
Packit |
89ede9 |
file_charset = xml_getencodingr( &descriptxml );
|
|
Packit |
89ede9 |
xml_free( &descriptxml );
|
|
Packit |
89ede9 |
str_free( &descriptor );
|
|
Packit |
89ede9 |
str_segdel( s, p, q+2 );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return file_charset;
|
|
Packit |
89ede9 |
}
|