|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* wordin.c
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Copyright (c) Chris Putnam 2010-2018
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Source code released under the GPL version 2
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
#include <stdio.h>
|
|
Packit |
89ede9 |
#include <stdlib.h>
|
|
Packit |
89ede9 |
#include "is_ws.h"
|
|
Packit |
89ede9 |
#include "str.h"
|
|
Packit |
89ede9 |
#include "str_conv.h"
|
|
Packit |
89ede9 |
#include "fields.h"
|
|
Packit |
89ede9 |
#include "xml.h"
|
|
Packit |
89ede9 |
#include "xml_encoding.h"
|
|
Packit |
89ede9 |
#include "bibformats.h"
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int wordin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
|
|
Packit |
89ede9 |
static int wordin_processf( fields *wordin, char *data, char *filename, long nref, param *p );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*****************************************************
|
|
Packit |
89ede9 |
PUBLIC: void wordin_initparams()
|
|
Packit |
89ede9 |
*****************************************************/
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
void
|
|
Packit |
89ede9 |
wordin_initparams( param *p, const char *progname )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
p->readformat = BIBL_WORDIN;
|
|
Packit |
89ede9 |
p->charsetin = BIBL_CHARSET_DEFAULT;
|
|
Packit |
89ede9 |
p->charsetin_src = BIBL_SRC_DEFAULT;
|
|
Packit |
89ede9 |
p->latexin = 0;
|
|
Packit |
89ede9 |
p->xmlin = 1;
|
|
Packit |
89ede9 |
p->utf8in = 1;
|
|
Packit |
89ede9 |
p->nosplittitle = 0;
|
|
Packit |
89ede9 |
p->verbose = 0;
|
|
Packit |
89ede9 |
p->addcount = 0;
|
|
Packit |
89ede9 |
p->output_raw = BIBL_RAW_WITHMAKEREFID |
|
|
Packit |
89ede9 |
BIBL_RAW_WITHCHARCONVERT;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
p->readf = wordin_readf;
|
|
Packit |
89ede9 |
p->processf = wordin_processf;
|
|
Packit |
89ede9 |
p->cleanf = NULL;
|
|
Packit |
89ede9 |
p->typef = NULL;
|
|
Packit |
89ede9 |
p->convertf = NULL;
|
|
Packit |
89ede9 |
p->all = NULL;
|
|
Packit |
89ede9 |
p->nall = 0;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
slist_init( &(p->asis) );
|
|
Packit |
89ede9 |
slist_init( &(p->corps) );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( !progname ) p->progname = NULL;
|
|
Packit |
89ede9 |
else p->progname = strdup( progname );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*****************************************************
|
|
Packit |
89ede9 |
PUBLIC: int wordin_readf()
|
|
Packit |
89ede9 |
*****************************************************/
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static char *
|
|
Packit |
89ede9 |
wordin_findstartwrapper( char *buf, int *ntype )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
return xml_find_start( buf, "b:Source" );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static char *
|
|
Packit |
89ede9 |
wordin_findendwrapper( char *buf, int ntype )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
return xml_find_end( buf, "b:Source" );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
str tmp;
|
|
Packit |
89ede9 |
char *startptr = NULL, *endptr;
|
|
Packit |
89ede9 |
int haveref = 0, inref = 0, file_charset = CHARSET_UNKNOWN, m, type = 1;
|
|
Packit |
89ede9 |
str_init( &tmp );
|
|
Packit |
89ede9 |
while ( !haveref && str_fget( fp, buf, bufsize, bufpos, line ) ) {
|
|
Packit |
89ede9 |
if ( str_cstr( line ) ) {
|
|
Packit |
89ede9 |
m = xml_getencoding( line );
|
|
Packit |
89ede9 |
if ( m!=CHARSET_UNKNOWN ) file_charset = m;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( str_cstr( line ) ) {
|
|
Packit |
89ede9 |
startptr = wordin_findstartwrapper( str_cstr( line ), &type );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( startptr || inref ) {
|
|
Packit |
89ede9 |
if ( inref ) str_strcat( &tmp, line );
|
|
Packit |
89ede9 |
else {
|
|
Packit |
89ede9 |
str_strcatc( &tmp, startptr );
|
|
Packit |
89ede9 |
inref = 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
endptr = wordin_findendwrapper( str_cstr( &tmp ), type );
|
|
Packit |
89ede9 |
if ( endptr ) {
|
|
Packit |
89ede9 |
str_segcpy( reference, str_cstr( &tmp ), endptr );
|
|
Packit |
89ede9 |
haveref = 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
str_free( &tmp );
|
|
Packit |
89ede9 |
*fcharset = file_charset;
|
|
Packit |
89ede9 |
return haveref;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*****************************************************
|
|
Packit |
89ede9 |
PUBLIC: int wordin_processf()
|
|
Packit |
89ede9 |
*****************************************************/
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
typedef struct xml_convert {
|
|
Packit |
89ede9 |
char *in; /* The input tag */
|
|
Packit |
89ede9 |
char *a, *aval; /* The attribute="attribute_value" pair, if nec. */
|
|
Packit |
89ede9 |
char *out; /* The output tag */
|
|
Packit |
89ede9 |
int level;
|
|
Packit |
89ede9 |
} xml_convert;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* wordin_person_last()
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* From an xml list, extract the value from the first entry
|
|
Packit |
89ede9 |
* of <b:Last>xxxx</b:Last> and copy into name
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Additional <b:Last>yyyyy</b:Last> will be ignored.
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Returns BIBL_ERR_MEMERR on memory error, BIBL_OK otherwise.
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_person_last( xml *node, str *name )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
while ( node && !xml_tag_matches( node, "b:Last" ) )
|
|
Packit |
89ede9 |
node = node->next;
|
|
Packit |
89ede9 |
if ( xml_has_value( node ) ) {
|
|
Packit |
89ede9 |
str_strcpy( name, xml_value( node ) );
|
|
Packit |
89ede9 |
if ( str_memerr( name ) ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* wordin_person_first()
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* From an xml list, extract the value of any
|
|
Packit |
89ede9 |
* <b:First>xxxx</b:First> and append "|xxxx" to name.
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Returns BIBL_ERR_MEMERR on memory error, BIBL_OK otherwise
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_person_first( xml *node, str *name )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
for ( ; node; node=node->next ) {
|
|
Packit |
89ede9 |
if ( !xml_tag_matches( node, "b:First" ) ) continue;
|
|
Packit |
89ede9 |
if ( xml_has_value( node ) ) {
|
|
Packit |
89ede9 |
if ( str_has_value( name ) ) str_addchar( name, '|' );
|
|
Packit |
89ede9 |
str_strcat( name, xml_value( node ) );
|
|
Packit |
89ede9 |
if ( str_memerr( name ) ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_person( xml *node, fields *info, char *type )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status, ret = BIBL_OK;
|
|
Packit |
89ede9 |
str name;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_init( &name );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
status = wordin_person_last( node, &name );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) {
|
|
Packit |
89ede9 |
ret = status;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
status = wordin_person_first( node, &name );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) {
|
|
Packit |
89ede9 |
ret = status;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
status = fields_add( info, type, str_cstr( &name ), 0 );
|
|
Packit |
89ede9 |
if ( status != FIELDS_OK ) ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
out:
|
|
Packit |
89ede9 |
str_free( &name );
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_people( xml *node, fields *info, char *type )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int ret = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "b:Author" ) && node->down ) {
|
|
Packit |
89ede9 |
ret = wordin_people( node->down, info, type );
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:NameList" ) && node->down ) {
|
|
Packit |
89ede9 |
ret = wordin_people( node->down, info, type );
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:Person" ) ) {
|
|
Packit |
89ede9 |
if ( node->down ) ret = wordin_person( node->down, info, type );
|
|
Packit |
89ede9 |
if ( ret!=BIBL_OK ) return ret;
|
|
Packit |
89ede9 |
if ( node->next ) ret = wordin_people( node->next, info, type );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_pages( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int i, status, ret = BIBL_OK;
|
|
Packit |
89ede9 |
str sp, ep;
|
|
Packit |
89ede9 |
char *p;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
strs_init( &sp, &ep, NULL );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
p = xml_value_cstr( node );
|
|
Packit |
89ede9 |
while ( *p && *p!='-' )
|
|
Packit |
89ede9 |
str_addchar( &sp, *p++ );
|
|
Packit |
89ede9 |
if ( str_memerr( &sp ) ) {
|
|
Packit |
89ede9 |
ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( *p=='-' ) p++;
|
|
Packit |
89ede9 |
while ( *p )
|
|
Packit |
89ede9 |
str_addchar( &ep, *p++ );
|
|
Packit |
89ede9 |
if ( str_memerr( &ep ) ) {
|
|
Packit |
89ede9 |
ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( str_has_value( &sp ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "PAGES:START", str_cstr( &sp ), 1 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) {
|
|
Packit |
89ede9 |
ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( str_has_value( &ep ) ) {
|
|
Packit |
89ede9 |
if ( sp.len > ep.len ) {
|
|
Packit |
89ede9 |
for ( i=sp.len-ep.len; i
|
|
Packit |
89ede9 |
sp.data[i] = ep.data[i-sp.len+ep.len];
|
|
Packit |
89ede9 |
status = fields_add( info, "PAGES:STOP", str_cstr( &sp ), 1 );
|
|
Packit |
89ede9 |
} else
|
|
Packit |
89ede9 |
status = fields_add( info, "PAGES:STOP", str_cstr( &ep ), 1 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) {
|
|
Packit |
89ede9 |
ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
out:
|
|
Packit |
89ede9 |
strs_free( &sp, &ep, NULL );
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_reference( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status, ret = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_has_value( node ) ) {
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "b:Tag" ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "REFNUM", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:SourceType" ) ) {
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:City" ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "ADDRESS", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:Publisher" ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "PUBLISHER", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:Title" ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "TITLE", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:JournalName" ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "TITLE", xml_value_cstr( node ), 1 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:Volume" ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "VOLUME", xml_value_cstr( node ), 1 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:Comments" ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "NOTES", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:Pages" ) ) {
|
|
Packit |
89ede9 |
ret = wordin_pages( node, info );
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:Author" ) && node->down ) {
|
|
Packit |
89ede9 |
ret = wordin_people( node->down, info, "AUTHOR" );
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "b:Editor" ) && node->down ) {
|
|
Packit |
89ede9 |
ret = wordin_people( node->down, info, "EDITOR" );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( ret==BIBL_OK && node->next ) wordin_reference( node->next, info );
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_assembleref( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int ret = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "b:Source" ) ) {
|
|
Packit |
89ede9 |
if ( node->down ) ret = wordin_reference( node->down, info );
|
|
Packit |
89ede9 |
} else if ( str_is_empty( &(node->tag) ) && node->down ) {
|
|
Packit |
89ede9 |
ret = wordin_assembleref( node->down, info );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
wordin_processf( fields *wordin, char *data, char *filename, long nref, param *p )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status, ret = 1;
|
|
Packit |
89ede9 |
xml top;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
xml_init( &top );
|
|
Packit |
89ede9 |
xml_parse( data, &top );
|
|
Packit |
89ede9 |
status = wordin_assembleref( &top, wordin );
|
|
Packit |
89ede9 |
xml_free( &top );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( status==BIBL_ERR_MEMERR ) ret = 0;
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|