Blob Blame History Raw
/*
 * modsin.c
 *
 * Copyright (c) Chris Putnam 2004-2018
 *
 * Source code released under the GPL version 2
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "is_ws.h"
#include "str.h"
#include "str_conv.h"
#include "xml.h"
#include "xml_encoding.h"
#include "fields.h"
#include "name.h"
#include "reftypes.h"
#include "modstypes.h"
#include "bu_auth.h"
#include "marc_auth.h"
#include "url.h"
#include "iso639_1.h"
#include "iso639_2.h"
#include "iso639_3.h"
#include "bibutils.h"
#include "bibformats.h"

static int modsin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
static int modsin_processf( fields *medin, char *data, char *filename, long nref, param *p );

/*****************************************************
 PUBLIC: void modsin_initparams()
*****************************************************/
void
modsin_initparams( param *p, const char *progname )
{

	p->readformat       = BIBL_MODSIN;
	p->format_opts      = 0;
	p->charsetin        = BIBL_CHARSET_UNICODE;
	p->charsetin_src    = BIBL_SRC_DEFAULT;
	p->latexin          = 0;
	p->utf8in           = 1;
	p->xmlin            = 1;
	p->nosplittitle     = 0;
	p->verbose          = 0;
	p->addcount         = 0;
	p->singlerefperfile = 0;
	p->output_raw       = BIBL_RAW_WITHMAKEREFID |
	                      BIBL_RAW_WITHCHARCONVERT;

	p->readf    = modsin_readf;
	p->processf = modsin_processf;
	p->cleanf   = NULL;
	p->typef    = NULL;
	p->convertf = NULL;
	p->all      = NULL;
	p->nall     = 0;

	slist_init( &(p->asis) );
	slist_init( &(p->corps) );

	if ( !progname ) p->progname = NULL;
	else p->progname = strdup( progname );
}

/*****************************************************
 PUBLIC: int modsin_processf()
*****************************************************/

static char modsns[]="mods";

static int
modsin_detailr( xml *node, str *value )
{
	int status = BIBL_OK;
	if ( xml_has_value( node ) ) {
		if ( value->len ) str_addchar( value, ' ' );
		str_strcat( value, xml_value( node ) );
		if ( str_memerr( value ) ) return BIBL_ERR_MEMERR;
	}
	if ( node->down ) {
		status = modsin_detailr( node->down, value );
		if ( status!=BIBL_OK ) return status;
	}
	if ( node->next )
		status = modsin_detailr( node->next, value );
	return status;
}

static int
modsin_detail( xml *node, fields *info, int level )
{
	str type, value, *tp;
	int fstatus, status = BIBL_OK;
	if ( node->down ) {
		strs_init( &type, &value, NULL );
		tp = xml_attribute( node, "type" );
		if ( tp ) {
			str_strcpy( &type, tp );
			str_toupper( &type );
			if ( str_memerr( &type ) ) goto out;
		}
		status = modsin_detailr( node->down, &value );
		if ( status!=BIBL_OK ) goto out;
		if ( type.data && !strcasecmp( type.data, "PAGE" ) ) {
			fstatus = fields_add( info, "PAGES:START", value.data, level );
		} else {
			fstatus = fields_add( info, type.data, value.data, level );
		}
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
out:
		strs_free( &type, &value, NULL );
	}
	return status;
}

static int
modsin_date( xml *node, fields *info, int level, int part )
{
	int fstatus, status = BIBL_OK;
	char *tag, *p;
	str s;

	str_init( &s );

	p = xml_value_cstr( node );

	if ( p ) {

		p = str_cpytodelim( &s, skip_ws( p ), "-", 1 );
		if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; }
		if ( str_has_value( &s ) ) {
			tag = ( part ) ? "PARTDATE:YEAR" : "DATE:YEAR";
			fstatus =  fields_add( info, tag, str_cstr( &s ), level );
			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
		}

		p = str_cpytodelim( &s, skip_ws( p ), "-", 1 );
		if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; }
		if ( str_has_value( &s ) ) {
			tag = ( part ) ? "PARTDATE:MONTH" : "DATE:MONTH";
			fstatus =  fields_add( info, tag, str_cstr( &s ), level );
			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
		}

		(void) str_cpytodelim( &s, skip_ws( p ), "", 0 );
		if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; }
		if ( str_has_value( &s ) ) {
			tag = ( part ) ? "PARTDATE:DAY" : "DATE:DAY";
			fstatus =  fields_add( info, tag, str_cstr( &s ), level );
			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
		}

	}

out:
	str_free( &s );
	return status;
}

static int
modsin_pager( xml *node, str *sp, str *ep, str *tp, str *lp )
{
	int status = BIBL_OK;
	if ( xml_tag_matches_has_value( node, "start" ) ) {
		str_strcpy( sp, xml_value( node ) );
		if ( str_memerr( sp ) ) return BIBL_ERR_MEMERR;
	} else if ( xml_tag_matches_has_value( node, "end" ) ) {
		str_strcpy( ep, xml_value( node ) );
		if ( str_memerr( ep ) ) return BIBL_ERR_MEMERR;
	} else if ( xml_tag_matches_has_value( node, "total" ) ) {
		str_strcpy( tp, xml_value( node ) );
		if ( str_memerr( tp ) ) return BIBL_ERR_MEMERR;
	} else if ( xml_tag_matches_has_value( node, "list" ) ) {
		str_strcpy( lp, xml_value( node ) );
		if ( str_memerr( lp ) ) return BIBL_ERR_MEMERR;
	}
	if ( node->down ) {
		status = modsin_pager( node->down, sp, ep, tp, lp );
		if ( status!=BIBL_OK ) return status;
	}
	if ( node->next )
		status = modsin_pager( node->next, sp, ep, tp, lp );
	return status;
}

static int
modsin_page( xml *node, fields *info, int level )
{
	int fstatus, status = BIBL_OK;
	str sp, ep, tp, lp;
	xml *dnode = node->down;

	if ( !dnode ) return BIBL_OK;

	strs_init( &sp, &ep, &tp, &lp, NULL );

	status = modsin_pager( dnode, &sp, &ep, &tp, &lp );
	if ( status!=BIBL_OK ) goto out;

	if ( str_has_value( &sp ) || str_has_value( &ep ) ) {
		if ( str_has_value( &sp ) ) {
			fstatus = fields_add( info, "PAGES:START", str_cstr( &sp ), level );
			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
		}
		if ( str_has_value( &ep ) ) {
			fstatus = fields_add( info, "PAGES:STOP", str_cstr( &ep ), level );
			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
		}
	} else if ( str_has_value( &lp ) ) {
		fstatus = fields_add( info, "PAGES:START", str_cstr( &lp ), level );
		if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
	}
	if ( str_has_value( &tp ) ) {
		fstatus = fields_add( info, "PAGES:TOTAL", str_cstr( &tp ), level );
		if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
	}
out:
	strs_free( &sp, &ep, &tp, &lp, NULL );
	return status;
}

static int
modsin_titler( xml *node, str *title, str *subtitle )
{
	int status = BIBL_OK;
	if ( xml_tag_matches_has_value( node, "title" ) ) {
		if ( str_has_value( title ) ) str_strcatc( title, " : " );
		str_strcat( title, xml_value( node ) );
		if ( str_memerr( title ) ) return BIBL_ERR_MEMERR;
	} else if ( xml_tag_matches_has_value( node, "subTitle" ) ) {
		str_strcat( subtitle, xml_value( node ) );
		if ( str_memerr( subtitle ) ) return BIBL_ERR_MEMERR;
	}
	if ( node->down ) {
		status = modsin_titler( node->down, title, subtitle );
		if ( status!=BIBL_OK ) return status;
	}
	if ( node->next )
		status = modsin_titler( node->next, title, subtitle );
	return status;
}

static int
modsin_title( xml *node, fields *info, int level )
{
	char *titletag[2][2] = {
		{ "TITLE",    "SHORTTITLE" },
		{ "SUBTITLE", "SHORTSUBTITLE" },
	};
	int fstatus, status = BIBL_OK;
	str title, subtitle;
	xml *dnode;
	int abbr;

	dnode = node->down;
	if ( !dnode ) return status;

	strs_init( &title, &subtitle, NULL );
	abbr = xml_tag_has_attribute( node, "titleInfo", "type", "abbreviated" );

	status = modsin_titler( dnode, &title, &subtitle );
	if ( status!=BIBL_OK ) goto out;

	if ( str_has_value( &title ) ) {
		fstatus = fields_add( info, titletag[0][abbr], str_cstr( &title ), level );
		if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
	}

	if ( str_has_value( &subtitle ) ) {
		fstatus = fields_add( info, titletag[1][abbr], str_cstr( &subtitle ), level );
		if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
	}

out:
	strs_free( &title, &subtitle, NULL );
	return status;
}

/* modsin_marcrole_convert()
 *
 * Map MARC-authority roles for people or organizations associated
 * with a reference to internal roles.
 *
 * Take input strings with roles separated by '|' characters, e.g.
 * "author" or "author|creator" or "edt" or "editor|edt".
 */
static int
modsin_marcrole_convert( str *s, char *suffix, str *out )
{
	int i, sstatus, status = BIBL_OK;
	slist tokens;
	char *p;

	slist_init( &tokens );

	/* ...default to author on an empty string */
	if ( str_is_empty( s ) ) {
		str_strcpyc( out, "AUTHOR" );
	}

	else {
		sstatus = slist_tokenize( &tokens, s, "|", 1 );
		if ( sstatus!=SLIST_OK ) {
			status = BIBL_ERR_MEMERR;
			goto done;
		}
		/* ...take first match */
		for ( i=0; i<tokens.n; ++i ) {
			p = marc_convertrole( slist_cstr( &tokens, i ) );
			if ( p ) {
				str_strcpyc( out, p );
				goto done;
			}
		}
		/* ...otherwise just copy input */
		str_strcpy( out, slist_str( &tokens, 0 ) );
		str_toupper( out );
	}

done:
	if ( suffix ) str_strcatc( out, suffix );
	slist_free( &tokens );
	if ( str_memerr( out ) ) return BIBL_ERR_MEMERR;
	return status;
}

static int
modsin_asis_corp_r( xml *node, str *name, str *role )
{
	int status = BIBL_OK;
	if ( xml_tag_matches_has_value( node, "namePart" ) ) {
		str_strcpy( name, xml_value( node ) );
		if ( str_memerr( name ) ) return BIBL_ERR_MEMERR;
	} else if ( xml_tag_matches_has_value( node, "roleTerm" ) ) {
		if ( role->len ) str_addchar( role, '|' );
		str_strcat( role, xml_value( node ) );
		if ( str_memerr( role ) ) return BIBL_ERR_MEMERR;
	}
	if ( node->down ) {
		status = modsin_asis_corp_r( node->down, name, role );
		if ( status!=BIBL_OK ) return status;
	}
	if ( node->next )
		status = modsin_asis_corp_r( node->next, name, role );
	return status;
}

static int
modsin_asis_corp( xml *node, fields *info, int level, char *suffix )
{
	int fstatus, status = BIBL_OK;
	str name, roles, role_out;
	xml *dnode = node->down;
	if ( dnode ) {
		strs_init( &name, &roles, &role_out, NULL );
		status = modsin_asis_corp_r( dnode, &name, &roles );
		if ( status!=BIBL_OK ) goto out;
		status = modsin_marcrole_convert( &roles, suffix, &role_out );
		if ( status!=BIBL_OK ) goto out;
		fstatus = fields_add( info, str_cstr( &role_out ), str_cstr( &name ), level );
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
out:
		strs_free( &name, &roles, &role_out, NULL );
	}
	return status;
}

static int
modsin_roler( xml *node, str *roles )
{
	if ( xml_has_value( node ) ) {
		if ( roles->len ) str_addchar( roles, '|' );
		str_strcat( roles, xml_value( node ) );
	}
	if ( str_memerr( roles ) ) return BIBL_ERR_MEMERR;
	else return BIBL_OK;
}

static int
modsin_personr( xml *node, str *familyname, str *givenname, str *suffix )
{
	int status = BIBL_OK;

	if ( !xml_has_value( node ) ) return status;

	if ( xml_tag_has_attribute( node, "namePart", "type", "family" ) ) {
		if ( str_has_value( familyname ) ) str_addchar( familyname, ' ' );
		str_strcat( familyname, xml_value( node ) );
		if ( str_memerr( familyname ) ) status = BIBL_ERR_MEMERR;
	}

	else if ( xml_tag_has_attribute( node, "namePart", "type", "suffix"         ) ||
	          xml_tag_has_attribute( node, "namePart", "type", "termsOfAddress" ) ) {
		if ( str_has_value( suffix ) ) str_addchar( suffix, ' ' );
		str_strcat( suffix, xml_value( node ) );
		if ( str_memerr( suffix ) ) status = BIBL_ERR_MEMERR;
	}

	else if ( xml_tag_has_attribute( node, "namePart", "type", "date" ) ) {
		/* no nothing */
	}

	else {
		if ( str_has_value( givenname ) ) str_addchar( givenname, '|' );
		str_strcat( givenname, xml_value( node ) );
		if ( str_memerr( givenname ) ) status = BIBL_ERR_MEMERR;
	}

	return status;
}

static int
modsin_person( xml *node, fields *info, int level )
{
	str familyname, givenname, name, suffix, roles, role_out;
	int fstatus, status = BIBL_OK;
	xml *dnode, *rnode;

	dnode = node->down;
	if ( !dnode ) return status;

	strs_init( &name, &familyname, &givenname, &suffix, &roles, &role_out, NULL );

	while ( dnode ) {

		if ( xml_tag_matches( dnode, "namePart" ) ) {
			status = modsin_personr( dnode, &familyname, &givenname, &suffix );
			if ( status!=BIBL_OK ) goto out;
		}

		else if ( xml_tag_matches( dnode, "role" ) ) {
			rnode = dnode->down;
			while ( rnode ) {
				if ( xml_tag_matches( rnode, "roleTerm" ) ) {
					status = modsin_roler( rnode, &roles );
					if ( status!=BIBL_OK ) goto out;
				}
				rnode = rnode->next;
			}
		}

		dnode = dnode->next;

	}

	/*
	 * Handle:
	 *          <namePart type='given'>Noah A.</namePart>
	 *          <namePart type='family'>Smith</namePart>
	 * without mangling the order of "Noah A."
	 */
	if ( str_has_value( &familyname ) ) {
		str_strcpy( &name, &familyname );
		if ( givenname.len ) {
			str_addchar( &name, '|' );
			str_strcat( &name, &givenname );
		}
	}

	/*
	 * Handle:
	 *          <namePart>Noah A. Smith</namePart>
	 * with name order mangling.
	 */
	else {
		if ( str_has_value( &givenname ) )
			name_parse( &name, &givenname, NULL, NULL );
	}

	if ( str_has_value( &suffix ) ) {
		str_strcatc( &name, "||" );
		str_strcat( &name, &suffix );
	}

	if ( str_memerr( &name ) ) {
		status=BIBL_ERR_MEMERR;
		goto out;
	}

	status = modsin_marcrole_convert( &roles, NULL, &role_out );
	if ( status!=BIBL_OK ) goto out;

	fstatus = fields_add_can_dup( info, str_cstr( &role_out ), str_cstr( &name ), level );
	if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;

out:
	strs_free( &name, &familyname, &givenname, &suffix, &roles, &role_out, NULL );
	return status;
}

static int
modsin_placeterm_text( xml *node, fields *info, int level, int school )
{
	char address_tag[] = "ADDRESS";
	char school_tag[]  = "SCHOOL";
	char *tag;
	int fstatus;

	tag = ( school ) ? school_tag : address_tag;

	fstatus = fields_add( info, tag, xml_value_cstr( node ), level );
	if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;

	return BIBL_OK;
}

static int
modsin_placeterm_code( xml *node, fields *info, int level )
{
	int fstatus, status = BIBL_OK;
	str s, *auth;

	str_init( &s );

	auth = xml_attribute( node, "authority" );
	if ( auth && auth->len ) {
		str_strcpy( &s, auth );
		str_addchar( &s, '|' );
	}
	str_strcat( &s, xml_value( node ) );

	if ( str_memerr( &s ) ) {
		status = BIBL_ERR_MEMERR;
		goto out;
	}

	fstatus = fields_add( info, "CODEDADDRESS", str_cstr( &s ), level );
	if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
out:
	str_free( &s );
	return status;
}

static int
modsin_placeterm( xml *node, fields *info, int level, int school )
{
	int status = BIBL_OK;
	str *type;

	type = xml_attribute( node, "type" );
	if ( str_has_value( type ) ) {
		if ( !strcmp( str_cstr( type ), "text" ) )
			status = modsin_placeterm_text( node, info, level, school );
		else if ( !strcmp( str_cstr( type ), "code" ) )
			status = modsin_placeterm_code( node, info, level );
	}

	return status;
}

static int
modsin_placer( xml *node, fields *info, int level, int school )
{
	int status = BIBL_OK;

	if ( xml_tag_has_attribute( node, "place", "type", "school" ) ) {
		school = 1;
	} else if ( xml_tag_matches( node, "placeTerm" ) ) {
		status = modsin_placeterm( node, info, level, school );
	}

	if ( node->down ) {
		status = modsin_placer( node->down, info, level, school );
		if ( status!=BIBL_OK ) return status;
	}

	if ( node->next ) status = modsin_placer( node->next, info, level, school );

	return status;
}

static int
modsin_origininfor( xml *node, fields *info, int level, str *pub, str *add, str *addc, str *ed, str *iss )
{
	int status = BIBL_OK;

	if ( xml_tag_matches( node, "dateIssued" ) ) {
		status = modsin_date( node, info, level, 0 );
	} else if ( xml_tag_matches( node, "place" ) ) {
		status = modsin_placer( node, info, level, 0 );
	} else if ( xml_tag_matches_has_value( node, "publisher" ) ) {
		str_strcat( pub, xml_value( node ) );
		if ( str_memerr( pub ) ) return BIBL_ERR_MEMERR;
	} else if ( xml_tag_matches_has_value( node, "edition" ) ) {
		str_strcat( ed, xml_value( node ) );
		if( str_memerr( ed ) ) return BIBL_ERR_MEMERR;
	} else if ( xml_tag_matches_has_value( node, "issuance" ) ) {
		str_strcat( iss, xml_value( node ) );
		if ( str_memerr( iss ) ) return BIBL_ERR_MEMERR;
	}
	if ( status!=BIBL_OK ) return status;

	if ( node->down ) {
		status = modsin_origininfor( node->down, info, level, pub, add, addc, ed, iss );
		if ( status!=BIBL_OK ) return status;
	}

	if ( node->next )
		status = modsin_origininfor( node->next, info, level, pub, add, addc, ed, iss );

	return status;
}

static int
modsin_origininfo( xml *node, fields *info, int level )
{
	str publisher, address, addcode, edition, issuance;
	int fstatus, status = BIBL_OK;
	if ( node->down ) {
		strs_init( &publisher, &address, &addcode, &edition, &issuance, NULL );
		status = modsin_origininfor( node->down, info, level, &publisher, 
				&address, &addcode, &edition, &issuance );
		if ( status!=BIBL_OK ) goto out;
		if ( str_has_value( &publisher ) ) {
			fstatus = fields_add( info, "PUBLISHER", str_cstr( &publisher ), level );
			if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; }
		}
		if ( str_has_value( &address ) ) {
			fstatus = fields_add( info, "ADDRESS", str_cstr( &address ), level );
			if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; }
		}
		if ( str_has_value( &addcode ) ) {
			fstatus = fields_add( info, "CODEDADDRESS", str_cstr( &addcode ), level );
			if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; }
		}
		if ( str_has_value( &edition ) ) {
			fstatus = fields_add( info, "EDITION", str_cstr( &edition ), level );
			if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; }
		}
		if ( str_has_value( &issuance ) ) {
			fstatus = fields_add( info, "ISSUANCE", str_cstr( &issuance ), level );
			if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; }
		}
out:
		strs_free( &publisher, &address, &addcode, &edition, &issuance, NULL );
	}
	return status;
}

static int
modsin_subjectr( xml *node, fields *info, int level )
{
	int fstatus, status = BIBL_OK;
	if ( xml_tag_has_attribute( node, "topic", "class", "primary" ) && xml_has_value( node ) ) {
		fstatus = fields_add( info, "EPRINTCLASS", xml_value_cstr( node ), level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}
	else if ( xml_tag_matches_has_value( node, "topic" ) ) {
		fstatus = fields_add( info, "KEYWORD", xml_value_cstr( node ), level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}
	else if ( xml_tag_matches_has_value( node, "geographic" ) ) {
		fstatus = fields_add( info, "KEYWORD", xml_value_cstr( node ), level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}
	if ( node->down ) {
		status = modsin_subjectr( node->down, info, level );
		if ( status!=BIBL_OK ) return status;
	}
	if ( node->next ) status = modsin_subjectr( node->next, info, level );
	return status;
}

static int
modsin_subject( xml *node, fields *info, int level )
{
	int status = BIBL_OK;
	if ( node->down ) status = modsin_subjectr( node->down, info, level );
	return status;
}

static int
modsin_id1( xml *node, fields *info, int level )
{
	int fstatus;
	str *ns;
	ns = xml_attribute( node, "ID" );
	if ( str_has_value( ns ) ) {
		fstatus = fields_add( info, "REFNUM", str_cstr( ns ), level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}
	return BIBL_OK;
}

/* modsin_genre()
 *
 * MARC authority terms tagged with "GENRE:MARC"
 * bibutils authority terms tagged with "GENRE:BIBUTILS"
 * unknown terms tagged with "GENRE:UNKNOWN"
 */
static int
modsin_genre( xml *node, fields *info, int level )
{
	int fstatus;
	char *d;

	if ( !xml_has_value( node ) ) return BIBL_OK;

	d = xml_value_cstr( node );

	/* ...handle special genres in KTH DivA */
	if ( !strcmp( d, "conferenceProceedings" ) || !strcmp( d, "conferencePaper" ) )
		d = "conference publication";
	else if ( !strcmp( d, "artisticOutput" ) || !strcmp( d, "other" ) )
		d = "miscellaneous";
	else if ( !strcmp( d, "studentThesis" ) )
		d = "thesis";
	else if ( !strcmp( d, "monographDoctoralThesis" ) )
		d = "Ph.D. thesis";
	else if ( !strcmp( d, "comprehensiveDoctoralThesis" ) )
		d = "Ph.D. thesis";
	else if ( !strcmp( d, "monographLicentiateThesis" ) )
		d = "Licentiate thesis";
	else if ( !strcmp( d, "comprehensiveLicentiateThesis" ) )
		d = "Licentiate thesis";

	if ( is_marc_genre( d ) )
		fstatus = fields_add( info, "GENRE:MARC", d, level );
	else if ( is_bu_genre( d ) )
		fstatus = fields_add( info, "GENRE:BIBUTILS", d, level );
	else
		fstatus = fields_add( info, "GENRE:UNKNOWN", d, level );

	if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	else return BIBL_OK;
}

/* in MODS version 3.5
 * <languageTerm type="text">....</languageTerm>
 * <languageTerm type="code" authority="xxx">...</languageTerm>
 * xxx = rfc3066
 * xxx = iso639-2b
 * xxx = iso639-3
 * xxx = rfc4646
 * xxx = rfc5646
 */
static int
modsin_languager( xml *node, fields *info, int level )
{
	int fstatus, status = BIBL_OK;
	char *d = NULL;
	if ( xml_tag_matches( node, "languageTerm" ) ) {
		if ( xml_has_value( node ) ) {
			if ( xml_has_attribute( node, "type", "code" ) ) {
				if ( xml_has_attribute( node, "authority", "iso639-1" ) )
					d = iso639_1_from_code( xml_value_cstr( node ) );
				else if ( xml_has_attribute( node, "authority", "iso639-2b" ) )
					d = iso639_2_from_code( xml_value_cstr( node ) );
				else if ( xml_has_attribute( node, "authority", "iso639-3" ))
					d = iso639_3_from_code( xml_value_cstr( node ) );
			}
			if ( !d ) d  = xml_value_cstr( node );
			fstatus = fields_add( info, "LANGUAGE", d, level );
			if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
		}
	}
	if ( node->next ) status = modsin_languager( node->next, info, level );
	return status;
}

static int
modsin_language( xml *node, fields *info, int level )
{
	int fstatus, status = BIBL_OK;
	/* Old versions of MODS had <language>English</language> */
	if ( xml_has_value( node ) ) {
		fstatus = fields_add( info, "LANGUAGE", xml_value_cstr( node ), level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}

	/* New versions of MODS have <language><languageTerm>English</languageTerm></language> */
	if ( node->down ) status = modsin_languager( node->down, info, level );
	return status;
}

static int
modsin_simple( xml *node, fields *info, char *tag, int level )
{
	int fstatus;
	if ( xml_has_value( node ) ) {
		fstatus = fields_add( info, tag, xml_value_cstr( node ), level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}
	return BIBL_OK;
}

static int
modsin_locationr( xml *node, fields *info, int level )
{
	int fstatus, status = BIBL_OK;
	char *url        = "URL";
	char *fileattach = "FILEATTACH";
	char *tag=NULL;

	if ( xml_tag_matches( node, "url" ) ) {
		if ( xml_has_attribute( node, "access", "raw object" ) )
			tag = fileattach;
		else
			tag = url;
	}
	else if ( xml_tag_matches( node, "physicalLocation" ) ) {
		if ( xml_has_attribute( node, "type", "school" ) )
			tag = "SCHOOL";
		else
			tag = "LOCATION";
	}

	if ( tag == url ) {
		status = urls_split_and_add( xml_value_cstr( node ), info, level );
		if ( status!=BIBL_OK ) return status;
	}
	else if ( tag ) {
		fstatus = fields_add( info, tag, xml_value_cstr( node ), level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}

	if ( node->down ) {
		status = modsin_locationr( node->down, info, level );
		if ( status!=BIBL_OK ) return status;
	}
	if ( node->next ) status = modsin_locationr( node->next, info, level );
	return status;
}

static int
modsin_location( xml *node, fields *info, int level )
{
	int status = BIBL_OK;
	if ( node->down ) status = modsin_locationr( node->down, info, level );
	return status;
}

static int
modsin_descriptionr( xml *node, str *s )
{
	int status = BIBL_OK;
	if ( xml_tag_matches( node, "extent" ) ||
	     xml_tag_matches( node, "note" ) ) {
		str_strcpy( s, &(node->value) );
		if ( str_memerr( s ) ) return BIBL_ERR_MEMERR;
	}
	if ( node->down ) {
		status = modsin_descriptionr( node->down, s );
		if ( status!=BIBL_OK ) return status;
	}
	if ( node->next ) status = modsin_descriptionr( node->next, s );
	return status;
}

static int
modsin_description( xml *node, fields *info, int level )
{
	int fstatus, status = BIBL_OK;
	str s;
	str_init( &s );
	if ( node->down ) {
		status = modsin_descriptionr( node->down, &s );
		if ( status!=BIBL_OK ) goto out;
	} else {
		if ( node->value.len > 0 )
			str_strcpy( &s, &(node->value) );
		if ( str_memerr( &s ) ) {
			status = BIBL_ERR_MEMERR;
			goto out;
		}
	}
	if ( str_has_value( &s ) ) {
		fstatus = fields_add( info, "DESCRIPTION", str_cstr( &s ), level );
		if ( fstatus!=FIELDS_OK ) {
			status = BIBL_ERR_MEMERR;
			goto out;
		}
	}
out:
	str_free( &s );
	return status;
}

static int
modsin_partr( xml *node, fields *info, int level )
{
	int status = BIBL_OK;
	if ( xml_tag_matches( node, "detail" ) )
		status = modsin_detail( node, info, level );
	else if ( xml_tag_has_attribute( node, "extent", "unit", "page" ) )
		status = modsin_page( node, info, level );
	else if ( xml_tag_has_attribute( node, "extent", "unit", "pages" ) )
		status = modsin_page( node, info, level );
	else if ( xml_tag_matches( node, "date" ) )
		status = modsin_date( node, info, level, 1 );
	if ( status!=BIBL_OK ) return status;
	if ( node->next ) status = modsin_partr( node->next, info, level );
	return status;
}

static int
modsin_part( xml *node, fields *info, int level )
{
	if ( node->down ) return modsin_partr( node->down, info, level );
	return BIBL_OK;
}

/* <classification authority="lcc">Q3 .A65</classification> */
static int
modsin_classification( xml *node, fields *info, int level )
{
	int fstatus, status = BIBL_OK;
	char *tag;
	if ( xml_has_value( node ) ) {
		if ( xml_tag_has_attribute( node, "classification", "authority", "lcc" ) )
			tag = "LCC";
		else
			tag = "CLASSIFICATION";
		fstatus = fields_add( info, tag, xml_value_cstr( node ), level );
		if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
	}
	if ( node->down ) status = modsin_classification( node->down, info, level );
	return status;
}

static int
modsin_recordinfo( xml *node, fields *info, int level )
{
	int fstatus;
	xml *curr;

	/* extract recordIdentifier */
	curr = node;
	while ( curr ) {
		if ( xml_tag_matches_has_value( curr, "recordIdentifier" ) ) {
			fstatus = fields_add( info, "REFNUM", xml_value_cstr( curr ), level );
			if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
		}
		curr = curr->next;
	}
	return BIBL_OK;
}

static int
modsin_identifier( xml *node, fields *info, int level )
{
	convert ids[] = {
		{ "citekey",       "REFNUM",      0, 0 },
		{ "issn",          "ISSN",        0, 0 },
		{ "coden",         "CODEN",       0, 0 },
		{ "isbn",          "ISBN",        0, 0 },
		{ "doi",           "DOI",         0, 0 },
		{ "url",           "URL",         0, 0 },
		{ "uri",           "URL",         0, 0 },
		{ "pmid",          "PMID",        0, 0 },
		{ "pubmed",        "PMID",        0, 0 },
		{ "medline",       "MEDLINE",     0, 0 },
		{ "pmc",           "PMC",         0, 0 },
		{ "arXiv",         "ARXIV",       0, 0 },
		{ "MRnumber",      "MRNUMBER",    0, 0 },
		{ "pii",           "PII",         0, 0 },
		{ "isi",           "ISIREFNUM",   0, 0 },
		{ "serial number", "SERIALNUMBER",0, 0 },
		{ "accessnum",     "ACCESSNUM",   0, 0 },
		{ "jstor",         "JSTOR",       0, 0 },
	};
	int i, fstatus, n = sizeof( ids ) / sizeof( ids[0] );
	if ( node->value.len==0 ) return BIBL_OK;
	for ( i=0; i<n; ++i ) {
		if ( xml_tag_has_attribute( node, "identifier", "type", ids[i].mods ) ) {
			fstatus = fields_add( info, ids[i].internal, xml_value_cstr( node ), level );
			if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
		}
	}
	return BIBL_OK;
}

static int
modsin_mods( xml *node, fields *info, int level )
{
	convert simple[] = {
		{ "note",            "NOTES",    0, 0 },
		{ "abstract",        "ABSTRACT", 0, 0 },
		{ "bibtex-annote",   "ANNOTE",   0, 0 },
		{ "typeOfResource",  "RESOURCE", 0, 0 },
		{ "tableOfContents", "CONTENTS", 0, 0 },
	};
	int nsimple = sizeof( simple ) / sizeof( simple[0] );
	int i, found = 0, status = BIBL_OK;

	for ( i=0; i<nsimple && found==0; i++ ) {
		if ( xml_tag_matches( node, simple[i].mods ) ) {
			status = modsin_simple( node, info, simple[i].internal, level );
			if ( status!=BIBL_OK ) return status;
			found = 1;
		}
	}

	if ( !found ) {
		if ( xml_tag_matches( node, "titleInfo" ) )
			modsin_title( node, info, level );
		else if ( xml_tag_has_attribute( node, "name", "type", "personal" ) )
			status = modsin_person( node, info, level );
		else if ( xml_tag_has_attribute( node, "name", "type", "corporate" ) )
			status = modsin_asis_corp( node, info, level, ":CORP" );
		else if ( xml_tag_matches( node, "name" ) )
			status = modsin_asis_corp( node, info, level, ":ASIS" );
		else if ( xml_tag_matches( node, "recordInfo" ) && node->down )
			status = modsin_recordinfo( node->down, info, level );
		else if  ( xml_tag_matches( node, "part" ) )
			modsin_part( node, info, level );
		else if ( xml_tag_matches( node, "identifier" ) )
			status = modsin_identifier( node, info, level );
		else if ( xml_tag_matches( node, "originInfo" ) )
			status = modsin_origininfo( node, info, level );
		else if ( xml_tag_matches( node, "language" ) )
			status = modsin_language( node, info, level );
		else if ( xml_tag_matches( node, "genre" ) )
			status = modsin_genre( node, info, level );
		else if ( xml_tag_matches( node, "date" ) )
			status = modsin_date( node, info, level, 0 );
		else if ( xml_tag_matches( node, "subject" ) )
			status = modsin_subject( node, info, level );
		else if ( xml_tag_matches( node, "classification" ) )
			status = modsin_classification( node, info, level );
		else if ( xml_tag_matches( node, "location" ) )
			status = modsin_location( node, info, level );
		else if ( xml_tag_matches( node, "physicalDescription" ) )
			status = modsin_description( node, info, level );
		else if ( xml_tag_has_attribute( node, "relatedItem", "type", "host" ) ||
			  xml_tag_has_attribute( node, "relatedItem", "type", "series" ) ) {
			if ( node->down ) status = modsin_mods( node->down, info, level+1 );
		}
		else if ( xml_tag_has_attribute( node, "relatedItem", "type", "original" ) ) {
			if ( node->down ) status = modsin_mods( node->down, info, LEVEL_ORIG );
		}

		if ( status!=BIBL_OK ) return status;
	}

	if ( node->next ) status = modsin_mods( node->next, info, level );

	return status;
}

static int
modsin_assembleref( xml *node, fields *info )
{
	int status = BIBL_OK;
	if ( xml_tag_matches( node, "mods" ) ) {
		status = modsin_id1( node, info, 0 );
		if ( status!=BIBL_OK ) return status;
		if ( node->down ) {
			status = modsin_mods( node->down, info, 0 );
			if ( status!=BIBL_OK ) return status;
		}
	} else if ( node->down ) {
		status = modsin_assembleref( node->down, info );
		if ( status!=BIBL_OK ) return status;
	}
	if ( node->next ) status = modsin_assembleref( node->next, info );
	return status;
}

static int
modsin_processf( fields *modsin, char *data, char *filename, long nref, param *p )
{
	int status;
	xml top;

	xml_init( &top );
	xml_parse( data, &top );
	status = modsin_assembleref( &top, modsin );
	xml_free( &top );

	if ( status==BIBL_OK ) return 1;
	else return 0;
}

/*****************************************************
 PUBLIC: int modsin_readf()
*****************************************************/

static char *
modsin_startptr( char *p )
{
	char *startptr;
	startptr = xml_find_start( p, "mods:mods" );
	if ( startptr ) {
		/* set namespace if found */
		xml_pns = modsns;
	} else {
		startptr = xml_find_start( p, "mods" );
		if ( startptr ) xml_pns = NULL;
	}
	return startptr;
}

static char *
modsin_endptr( char *p )
{
	return xml_find_end( p, "mods" );
}

static int
modsin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
{
	str tmp;
	int m, file_charset = CHARSET_UNKNOWN;
	char *startptr = NULL, *endptr = NULL;

	str_init( &tmp );

	do {
		if ( line->data ) str_strcat( &tmp, line );
		if ( str_has_value( &tmp ) ) {
			m = xml_getencoding( &tmp );
			if ( m!=CHARSET_UNKNOWN ) file_charset = m;
			startptr = modsin_startptr( tmp.data );
			endptr = modsin_endptr( tmp.data );
		} else startptr = endptr = NULL;
		str_empty( line );
		if ( startptr && endptr ) {
			str_segcpy( reference, startptr, endptr );
			str_strcpyc( line, endptr );
		}
	} while ( !endptr && str_fget( fp, buf, bufsize, bufpos, line ) );

	str_free( &tmp );
	*fcharset = file_charset;
	return ( reference->len > 0 );
}