Blame bibutils/isiin.c

Packit 89ede9
/*
Packit 89ede9
 * isiin.c
Packit 89ede9
 *
Packit 89ede9
 * Copyright (c) Chris Putnam 2004-2018
Packit 89ede9
 *
Packit 89ede9
 * Program and source code released under the GPL version 2
Packit 89ede9
 *
Packit 89ede9
 */
Packit 89ede9
#include <stdio.h>
Packit 89ede9
#include <stdlib.h>
Packit 89ede9
#include <string.h>
Packit 89ede9
#include "is_ws.h"
Packit 89ede9
#include "str.h"
Packit 89ede9
#include "str_conv.h"
Packit 89ede9
#include "name.h"
Packit 89ede9
#include "fields.h"
Packit 89ede9
#include "reftypes.h"
Packit 89ede9
#include "bibformats.h"
Packit 89ede9
#include "generic.h"
Packit 89ede9
Packit 89ede9
extern variants isi_all[];
Packit 89ede9
extern int isi_nall;
Packit 89ede9
Packit 89ede9
static int isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
Packit 89ede9
static int isiin_typef( fields *isiin, char *filename, int nref, param *p );
Packit 89ede9
static int isiin_convertf( fields *isiin, fields *info, int reftype, param *p );
Packit 89ede9
static int isiin_processf( fields *isiin, char *p, char *filename, long nref, param *pm );
Packit 89ede9
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: void isiin_initparams()
Packit 89ede9
*****************************************************/
Packit 89ede9
void
Packit 89ede9
isiin_initparams( param *p, const char *progname )
Packit 89ede9
{
Packit 89ede9
	p->readformat       = BIBL_ISIIN;
Packit 89ede9
	p->charsetin        = BIBL_CHARSET_DEFAULT;
Packit 89ede9
	p->charsetin_src    = BIBL_SRC_DEFAULT;
Packit 89ede9
	p->latexin          = 0;
Packit 89ede9
	p->xmlin            = 0;
Packit 89ede9
	p->utf8in           = 0;
Packit 89ede9
	p->nosplittitle     = 0;
Packit 89ede9
	p->verbose          = 0;
Packit 89ede9
	p->addcount         = 0;
Packit 89ede9
	p->output_raw       = 0;
Packit 89ede9
Packit 89ede9
	p->readf    = isiin_readf;
Packit 89ede9
	p->processf = isiin_processf;
Packit 89ede9
	p->cleanf   = NULL;
Packit 89ede9
	p->typef    = isiin_typef;
Packit 89ede9
	p->convertf = isiin_convertf;
Packit 89ede9
	p->all      = isi_all;
Packit 89ede9
	p->nall     = isi_nall;
Packit 89ede9
Packit 89ede9
	slist_init( &(p->asis) );
Packit 89ede9
	slist_init( &(p->corps) );
Packit 89ede9
Packit 89ede9
	if ( !progname ) p->progname = NULL;
Packit 89ede9
	else p->progname = strdup( progname );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: int isiin_readf()
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
/* ISI definition of a tag is strict:
Packit 89ede9
 *   char 1 = uppercase alphabetic character
Packit 89ede9
 *   char 2 = uppercase alphabetic character or digit
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
is_isi_tag( char *buf )
Packit 89ede9
{
Packit 89ede9
	if ( ! (buf[0]>='A' && buf[0]<='Z') ) return 0;
Packit 89ede9
	if ( ! (((buf[1]>='A' && buf[1]<='Z'))||(buf[1]>='0'&&buf[1]<='9')))
Packit 89ede9
		return 0;
Packit 89ede9
	return 1;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
Packit 89ede9
{
Packit 89ede9
	if ( line->len ) return 1;
Packit 89ede9
	else return str_fget( fp, buf, bufsize, bufpos, line );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
Packit 89ede9
{
Packit 89ede9
	int haveref = 0, inref = 0;
Packit 89ede9
	char *p;
Packit 89ede9
	*fcharset = CHARSET_UNKNOWN;
Packit 89ede9
	while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
Packit 89ede9
		if ( !line->data ) continue;
Packit 89ede9
		p = &(line->data[0]);
Packit 89ede9
		/* Recognize UTF8 BOM */
Packit 89ede9
		if ( line->len > 2 &&
Packit 89ede9
				(unsigned char)(p[0])==0xEF &&
Packit 89ede9
				(unsigned char)(p[1])==0xBB &&
Packit 89ede9
				(unsigned char)(p[2])==0xBF ) {
Packit 89ede9
			*fcharset = CHARSET_UNICODE;
Packit 89ede9
			p += 3;
Packit 89ede9
		}
Packit 89ede9
		/* Each reference ends with 'ER ' */
Packit 89ede9
		if ( is_isi_tag( p ) ) {
Packit 89ede9
			if ( !strncmp( p, "FN ", 3 ) ) {
Packit 89ede9
				if (strncasecmp( p, "FN ISI Export Format",20)){
Packit 89ede9
					fprintf( stderr, ": warning file FN type not '%s' not recognized.\n", /*r->progname,*/ p );
Packit 89ede9
				}
Packit 89ede9
			} else if ( !strncmp( p, "VR ", 3 ) ) {
Packit 89ede9
				if ( strncasecmp( p, "VR 1.0", 6 ) ) {
Packit 89ede9
					fprintf(stderr,": warning file version number '%s' not recognized, expected 'VR 1.0'\n", /*r->progname,*/ p );
Packit 89ede9
				}
Packit 89ede9
			} else if ( !strncmp( p, "ER", 2 ) ) haveref = 1;
Packit 89ede9
			else {
Packit 89ede9
				str_addchar( reference, '\n' );
Packit 89ede9
				str_strcatc( reference, p );
Packit 89ede9
				inref = 1;
Packit 89ede9
			}
Packit 89ede9
			str_empty( line );
Packit 89ede9
		}
Packit 89ede9
		/* not a tag, but we'll append to the last values */
Packit 89ede9
		else if ( inref ) {
Packit 89ede9
			str_addchar( reference, '\n' );
Packit 89ede9
			str_strcatc( reference, p );
Packit 89ede9
			str_empty( line );
Packit 89ede9
		}
Packit 89ede9
		else {
Packit 89ede9
			str_empty( line );
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
	return haveref;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: int isiin_processf()
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
static char *
Packit 89ede9
process_tagged_line( str *tag, str *data, char *p )
Packit 89ede9
{
Packit 89ede9
	int i;
Packit 89ede9
Packit 89ede9
	/* collect tag and skip past it */
Packit 89ede9
	i = 0;
Packit 89ede9
	while ( i<2 && *p && *p!='\r' && *p!='\n') {
Packit 89ede9
		str_addchar( tag, *p++ );
Packit 89ede9
		i++;
Packit 89ede9
	}
Packit 89ede9
	while ( *p==' ' || *p=='\t' ) p++;
Packit 89ede9
	while ( *p && *p!='\r' && *p!='\n' )
Packit 89ede9
		str_addchar( data, *p++ );
Packit 89ede9
	str_trimendingws( data );
Packit 89ede9
	while ( *p=='\r' || *p=='\n' ) p++;
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static char *
Packit 89ede9
process_untagged_line( str *data, char *p )
Packit 89ede9
{
Packit 89ede9
	while ( *p==' ' || *p=='\t' ) p++;
Packit 89ede9
	while ( *p && *p!='\r' && *p!='\n' )
Packit 89ede9
		str_addchar( data, *p++ );
Packit 89ede9
	str_trimendingws( data );
Packit 89ede9
	while ( *p=='\r' || *p=='\n' ) p++;
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
add_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
Packit 89ede9
{
Packit 89ede9
	int status;
Packit 89ede9
Packit 89ede9
	if ( str_has_value( tag ) && str_has_value( value ) ) {
Packit 89ede9
		status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), 0 );
Packit 89ede9
		if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
Packit 89ede9
		*tag_added = 1;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	else {
Packit 89ede9
		*tag_added = 0;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	return BIBL_OK;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
merge_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
Packit 89ede9
{
Packit 89ede9
	int n, status;
Packit 89ede9
Packit 89ede9
	if ( str_has_value( value ) ) {
Packit 89ede9
Packit 89ede9
		if ( *tag_added==1 ) {
Packit 89ede9
Packit 89ede9
			n = fields_num( isiin );
Packit 89ede9
			if ( n==0 ) return BIBL_OK;
Packit 89ede9
Packit 89ede9
			/* only one AU or AF for list of authors */
Packit 89ede9
			if ( !strcmp( str_cstr( tag ), "AU" ) ) {
Packit 89ede9
				status = fields_add( isiin, "AU", str_cstr( value ), 0 );
Packit 89ede9
				if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
Packit 89ede9
			} else if ( !strcmp( str_cstr( tag ), "AF" ) ) {
Packit 89ede9
				status = fields_add( isiin, "AF", str_cstr( value ), 0 );
Packit 89ede9
				if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
Packit 89ede9
			}
Packit 89ede9
			/* otherwise append multiline data */
Packit 89ede9
			else {
Packit 89ede9
				str_addchar( &(isiin->data[n-1]),' ');
Packit 89ede9
				str_strcat( &(isiin->data[n-1]), value );
Packit 89ede9
				if ( str_memerr( &(isiin->data[n-1]) ) ) return BIBL_ERR_MEMERR;
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
		else {
Packit 89ede9
                        status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), 0 );
Packit 89ede9
                        if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
Packit 89ede9
                        *tag_added = 1;
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	return BIBL_OK;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
isiin_processf( fields *isiin, char *p, char *filename, long nref, param *pm )
Packit 89ede9
{
Packit 89ede9
	int status, tag_added = 0, ret = 1;
Packit 89ede9
	str tag, value;
Packit 89ede9
Packit 89ede9
	strs_init( &tag, &value, NULL );
Packit 89ede9
Packit 89ede9
	while ( *p ) {
Packit 89ede9
Packit 89ede9
		/* ...with tag, add */
Packit 89ede9
		if ( is_isi_tag( p ) ) {
Packit 89ede9
			str_empty( &tag );
Packit 89ede9
			str_empty( &value );
Packit 89ede9
			p = process_tagged_line( &tag, &value, p );
Packit 89ede9
			status = add_tag_value( isiin, &tag, &value, &tag_added );
Packit 89ede9
			if ( status!=BIBL_OK ) {
Packit 89ede9
				ret = 0;
Packit 89ede9
				goto out;
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
		/* ...untagged, merge -- one AU or AF for list of authors */
Packit 89ede9
		else {
Packit 89ede9
			str_empty( &value );
Packit 89ede9
			p = process_untagged_line( &value, p );
Packit 89ede9
			status = merge_tag_value( isiin, &tag, &value, &tag_added );
Packit 89ede9
			if ( status!=BIBL_OK ) {
Packit 89ede9
				ret = 0;
Packit 89ede9
				goto out;
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	strs_free( &value, &tag, NULL );
Packit 89ede9
	return ret;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: int isiin_typef()
Packit 89ede9
*****************************************************/
Packit 89ede9
static int
Packit 89ede9
isiin_typef( fields *isiin, char *filename, int nref, param *p )
Packit 89ede9
{
Packit 89ede9
	int ntypename, nrefname, is_default;
Packit 89ede9
	char *refname = "", *typename="";
Packit 89ede9
Packit 89ede9
	ntypename = fields_find( isiin, "PT", LEVEL_MAIN );
Packit 89ede9
	nrefname  = fields_find( isiin, "UT", LEVEL_MAIN );
Packit 89ede9
Packit 89ede9
	if ( nrefname!=FIELDS_NOTFOUND )  refname  = fields_value( isiin, nrefname,  FIELDS_CHRP_NOUSE );
Packit 89ede9
	if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( isiin, ntypename, FIELDS_CHRP_NOUSE );
Packit 89ede9
Packit 89ede9
	return get_reftype( typename, nref, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: int isiin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
/* pull off authors first--use AF before AU */
Packit 89ede9
static int
Packit 89ede9
isiin_addauthors( fields *isiin, fields *info, int reftype, variants *all, int nall, slist *asis, slist *corps )
Packit 89ede9
{
Packit 89ede9
	char *newtag, *authortype, use_af[]="AF", use_au[]="AU";
Packit 89ede9
	int level, i, n, has_af=0, has_au=0, nfields, ok;
Packit 89ede9
	str *t, *d;
Packit 89ede9
Packit 89ede9
	nfields = fields_num( isiin );
Packit 89ede9
	for ( i=0; i
Packit 89ede9
		t = fields_tag( isiin, i, FIELDS_STRP );
Packit 89ede9
		if ( !strcasecmp( t->data, "AU" ) ) has_au++;
Packit 89ede9
		if ( !strcasecmp( t->data, "AF" ) ) has_af++;
Packit 89ede9
	}
Packit 89ede9
	if ( has_af ) authortype = use_af;
Packit 89ede9
	else if ( has_au ) authortype = use_au;
Packit 89ede9
	else return BIBL_OK; /* no authors */
Packit 89ede9
Packit 89ede9
	for ( i=0; i
Packit 89ede9
		t = fields_tag( isiin, i, FIELDS_STRP );
Packit 89ede9
		if ( strcasecmp( t->data, authortype ) ) continue;
Packit 89ede9
		d = fields_value( isiin, i, FIELDS_STRP );
Packit 89ede9
		n = process_findoldtag( authortype, reftype, all, nall );
Packit 89ede9
		level = ((all[reftype]).tags[n]).level;
Packit 89ede9
		newtag = all[reftype].tags[n].newstr;
Packit 89ede9
		ok = name_add( info, newtag, d->data, level, asis, corps );
Packit 89ede9
		if ( !ok ) return BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
	return BIBL_OK;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
isiin_keyword( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	int fstatus, status = BIBL_OK;
Packit 89ede9
	char *p = invalue->data;
Packit 89ede9
	str keyword;
Packit 89ede9
Packit 89ede9
	str_init( &keyword );
Packit 89ede9
	while ( *p ) {
Packit 89ede9
		p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
Packit 89ede9
		if ( str_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
		if ( str_has_value( &keyword ) ) {
Packit 89ede9
			fstatus = fields_add( bibout, outtag, keyword.data, level );
Packit 89ede9
			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	str_free( &keyword );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static void
Packit 89ede9
isiin_report_notag( param *p, char *tag )
Packit 89ede9
{
Packit 89ede9
	if ( p->verbose && strcmp( tag, "PT" ) ) {
Packit 89ede9
		if ( p->progname ) fprintf( stderr, "%s: ", p->progname );
Packit 89ede9
		fprintf( stderr, "Did not identify ISI tag '%s'\n", tag );
Packit 89ede9
	}
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
isiin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
Packit 89ede9
{
Packit 89ede9
	static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
Packit 89ede9
		[ 0 ... NUM_REFTYPES-1 ] = generic_null,
Packit 89ede9
		[ SIMPLE       ] = generic_simple,
Packit 89ede9
		[ TITLE        ] = generic_title,
Packit 89ede9
		[ PERSON       ] = generic_person,
Packit 89ede9
		[ SERIALNO     ] = generic_serialno,
Packit 89ede9
		[ DATE         ] = generic_simple,
Packit 89ede9
		[ NOTES        ] = generic_notes,
Packit 89ede9
		[ KEYWORD      ] = isiin_keyword,
Packit 89ede9
	};
Packit 89ede9
Packit 89ede9
	int process, level, i, nfields, status;
Packit 89ede9
	str *intag, *invalue;
Packit 89ede9
	char *outtag;
Packit 89ede9
Packit 89ede9
	status = isiin_addauthors( bibin, bibout, reftype, p->all, p->nall, &(p->asis), &(p->corps) );
Packit 89ede9
	if ( status!=BIBL_OK ) return status;
Packit 89ede9
Packit 89ede9
	nfields = fields_num( bibin );
Packit 89ede9
	for ( i=0; i
Packit 89ede9
Packit 89ede9
		intag = fields_tag( bibin, i, FIELDS_STRP );
Packit 89ede9
		if ( !strcasecmp( str_cstr( intag ), "AU" ) || !strcasecmp( str_cstr( intag ), "AF" ) )
Packit 89ede9
			continue;
Packit 89ede9
Packit 89ede9
		if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
Packit 89ede9
			isiin_report_notag( p, str_cstr( intag ) );
Packit 89ede9
			continue;
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
		invalue = fields_value( bibin, i, FIELDS_STRP );
Packit 89ede9
Packit 89ede9
		status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
Packit 89ede9
		if ( status!=BIBL_OK ) return status;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	if ( status==BIBL_OK && p->verbose ) fields_report( bibout, stderr );
Packit 89ede9
Packit 89ede9
	return status;
Packit 89ede9
}