Blame bibutils/bibtexin.c

Packit 89ede9
/*
Packit 89ede9
 * bibtexin.c
Packit 89ede9
 *
Packit 89ede9
 * Copyright (c) Chris Putnam 2003-2018
Packit 89ede9
 *
Packit 89ede9
 * Program and source code released under the GPL version 2
Packit 89ede9
 *
Packit 89ede9
 */
Packit 89ede9
#include <stdio.h>
Packit 89ede9
#include <stdlib.h>
Packit 89ede9
#include <string.h>
Packit 89ede9
#include <ctype.h>
Packit 89ede9
#include "is_ws.h"
Packit 89ede9
#include "str.h"
Packit 89ede9
#include "utf8.h"
Packit 89ede9
#include "str_conv.h"
Packit 89ede9
#include "fields.h"
Packit 89ede9
#include "slist.h"
Packit 89ede9
#include "name.h"
Packit 89ede9
#include "title.h"
Packit 89ede9
#include "url.h"
Packit 89ede9
#include "reftypes.h"
Packit 89ede9
#include "bibformats.h"
Packit 89ede9
#include "generic.h"
Packit 89ede9
Packit 89ede9
static slist find    = { 0, 0, 0, NULL };
Packit 89ede9
static slist replace = { 0, 0, 0, NULL };
Packit 89ede9
Packit 89ede9
extern variants bibtex_all[];
Packit 89ede9
extern int bibtex_nall;
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: void bibtexin_initparams()
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
static int  bibtexin_convertf( fields *bibin, fields *info, int reftype, param *p );
Packit 89ede9
static int  bibtexin_processf( fields *bibin, char *data, char *filename, long nref, param *p );
Packit 89ede9
static int  bibtexin_cleanf( bibl *bin, param *p );
Packit 89ede9
static int  bibtexin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
Packit 89ede9
static int  bibtexin_typef( fields *bibin, char *filename, int nrefs, param *p );
Packit 89ede9
Packit 89ede9
void
Packit 89ede9
bibtexin_initparams( param *p, const char *progname )
Packit 89ede9
{
Packit 89ede9
	p->readformat       = BIBL_BIBTEXIN;
Packit 89ede9
	p->charsetin        = BIBL_CHARSET_DEFAULT;
Packit 89ede9
	p->charsetin_src    = BIBL_SRC_DEFAULT;
Packit 89ede9
	p->latexin          = 1;
Packit 89ede9
	p->xmlin            = 0;
Packit 89ede9
	p->utf8in           = 0;
Packit 89ede9
	p->nosplittitle     = 0;
Packit 89ede9
	p->verbose          = 0;
Packit 89ede9
	p->addcount         = 0;
Packit 89ede9
	p->output_raw       = 0;
Packit 89ede9
Packit 89ede9
	p->readf    = bibtexin_readf;
Packit 89ede9
	p->processf = bibtexin_processf;
Packit 89ede9
	p->cleanf   = bibtexin_cleanf;
Packit 89ede9
	p->typef    = bibtexin_typef;
Packit 89ede9
	p->convertf = bibtexin_convertf;
Packit 89ede9
	p->all      = bibtex_all;
Packit 89ede9
	p->nall     = bibtex_nall;
Packit 89ede9
Packit 89ede9
	slist_init( &(p->asis) );
Packit 89ede9
	slist_init( &(p->corps) );
Packit 89ede9
Packit 89ede9
	if ( !progname ) p->progname = NULL;
Packit 89ede9
	else p->progname = strdup( progname );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: int bibtexin_readf()
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
/*
Packit 89ede9
 * readf can "read too far", so we store this information in line, thus
Packit 89ede9
 * the next new text is in line, either from having read too far or
Packit 89ede9
 * from the next chunk obtained via str_fget()
Packit 89ede9
 *
Packit 89ede9
 * return 1 on success, 0 on error/end-of-file
Packit 89ede9
 *
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
Packit 89ede9
{
Packit 89ede9
	if ( line->len ) return 1;
Packit 89ede9
	else return str_fget( fp, buf, bufsize, bufpos, line );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*
Packit 89ede9
 * readf()
Packit 89ede9
 *
Packit 89ede9
 * returns zero if cannot get reference and hit end of-file
Packit 89ede9
 * returns 1 if last reference in file, 2 if reference within file
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
bibtexin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
Packit 89ede9
{
Packit 89ede9
	int haveref = 0;
Packit 89ede9
	char *p;
Packit 89ede9
	*fcharset = CHARSET_UNKNOWN;
Packit 89ede9
	while ( haveref!=2 && readmore( fp, buf, bufsize, bufpos, line ) ) {
Packit 89ede9
		if ( line->len == 0 ) continue; /* blank line */
Packit 89ede9
		p = &(line->data[0]);
Packit 89ede9
		/* Recognize UTF8 BOM */
Packit 89ede9
		if ( line->len > 2 && 
Packit 89ede9
				(unsigned char)(p[0])==0xEF &&
Packit 89ede9
				(unsigned char)(p[1])==0xBB &&
Packit 89ede9
				(unsigned char)(p[2])==0xBF ) {
Packit 89ede9
			*fcharset = CHARSET_UNICODE;
Packit 89ede9
			p += 3;
Packit 89ede9
		}
Packit 89ede9
		p = skip_ws( p );
Packit 89ede9
		if ( *p == '%' ) { /* commented out line */
Packit 89ede9
			str_empty( line );
Packit 89ede9
			continue;
Packit 89ede9
		}
Packit 89ede9
		if ( *p == '@' ) haveref++;
Packit 89ede9
		if ( haveref && haveref<2 ) {
Packit 89ede9
			str_strcatc( reference, p );
Packit 89ede9
			str_addchar( reference, '\n' );
Packit 89ede9
			str_empty( line );
Packit 89ede9
		} else if ( !haveref ) str_empty( line );
Packit 89ede9
	
Packit 89ede9
	}
Packit 89ede9
	return haveref;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: int bibtexin_processf()
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
static char*
Packit 89ede9
process_bibtextype( char *p, str *type )
Packit 89ede9
{
Packit 89ede9
	str tmp;
Packit 89ede9
	str_init( &tmp );
Packit 89ede9
Packit 89ede9
	if ( *p=='@' ) p++;
Packit 89ede9
	p = str_cpytodelim( &tmp, p, "{( \t\r\n", 0 );
Packit 89ede9
	p = skip_ws( p );
Packit 89ede9
	if ( *p=='{' || *p=='(' ) p++;
Packit 89ede9
	p = skip_ws( p );
Packit 89ede9
Packit 89ede9
	if ( str_has_value( &tmp ) ) str_strcpy( type, &tmp );
Packit 89ede9
	else str_empty( type );
Packit 89ede9
Packit 89ede9
	str_free( &tmp );
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static char*
Packit 89ede9
process_bibtexid( char *p, str *id )
Packit 89ede9
{
Packit 89ede9
	char *start_p = p;
Packit 89ede9
	str tmp;
Packit 89ede9
Packit 89ede9
	str_init( &tmp );
Packit 89ede9
	p = str_cpytodelim( &tmp, p, ",", 1 );
Packit 89ede9
Packit 89ede9
	if ( str_has_value( &tmp ) ) {
Packit 89ede9
		if ( strchr( tmp.data, '=' ) ) {
Packit 89ede9
			/* Endnote writes bibtex files w/o fields, try to
Packit 89ede9
			 * distinguish via presence of an equal sign.... if
Packit 89ede9
			 * it's there, assume that it's a tag/data pair instead
Packit 89ede9
			 * and roll back.
Packit 89ede9
			 */
Packit 89ede9
			p = start_p;
Packit 89ede9
			str_empty( id );
Packit 89ede9
		} else {
Packit 89ede9
			str_strcpy( id, &tmp );
Packit 89ede9
		}
Packit 89ede9
	} else {
Packit 89ede9
		str_empty( id );
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	str_free( &tmp );
Packit 89ede9
	return skip_ws( p );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static char *
Packit 89ede9
bibtex_tag( char *p, str *tag )
Packit 89ede9
{
Packit 89ede9
	p = str_cpytodelim( tag, skip_ws( p ), "= \t\r\n", 0 );
Packit 89ede9
	if ( str_memerr( tag ) ) return NULL;
Packit 89ede9
	return skip_ws( p );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static char *
Packit 89ede9
bibtex_data( char *p, fields *bibin, slist *tokens, long nref, param *pm )
Packit 89ede9
{
Packit 89ede9
	unsigned int nbracket = 0, nquotes = 0;
Packit 89ede9
	char *startp = p;
Packit 89ede9
	str tok, *t;
Packit 89ede9
Packit 89ede9
	str_init( &tok );
Packit 89ede9
	while ( p && *p ) {
Packit 89ede9
		if ( !nquotes && !nbracket ) {
Packit 89ede9
			if ( *p==',' || *p=='=' || *p=='}' || *p==')' )
Packit 89ede9
				goto out;
Packit 89ede9
		}
Packit 89ede9
		if ( *p=='\"' && nbracket==0 && ( p==startp || *(p-1)!='\\' ) ) {
Packit 89ede9
			nquotes = !nquotes;
Packit 89ede9
			str_addchar( &tok, *p );
Packit 89ede9
			if ( !nquotes ) {
Packit 89ede9
				if ( str_memerr( &tok ) ) { p=NULL; goto out; }
Packit 89ede9
				t = slist_add( tokens, &tok );
Packit 89ede9
				if ( !t ) { p=NULL; goto out0; }
Packit 89ede9
				str_empty( &tok );
Packit 89ede9
			}
Packit 89ede9
		} else if ( *p=='#' && !nquotes && !nbracket ) {
Packit 89ede9
			if ( str_has_value( &tok ) ) {
Packit 89ede9
				if ( str_memerr( &tok ) ) { p=NULL; goto out; }
Packit 89ede9
				t = slist_add( tokens, &tok );
Packit 89ede9
				if ( !t ) { p=NULL; goto out0; }
Packit 89ede9
			}
Packit 89ede9
			str_strcpyc( &tok, "#" );
Packit 89ede9
			t = slist_add( tokens, &tok );
Packit 89ede9
			if ( !t ) { p=NULL; goto out0; }
Packit 89ede9
			str_empty( &tok );
Packit 89ede9
		} else if ( *p=='{' && !nquotes && ( p==startp || *(p-1)!='\\' ) ) {
Packit 89ede9
			nbracket++;
Packit 89ede9
			str_addchar( &tok, *p );
Packit 89ede9
		} else if ( *p=='}' && !nquotes && ( p==startp || *(p-1)!='\\' ) ) {
Packit 89ede9
			nbracket--;
Packit 89ede9
			str_addchar( &tok, *p );
Packit 89ede9
			if ( nbracket==0 ) {
Packit 89ede9
				if ( str_memerr( &tok ) ) { p=NULL; goto out; }
Packit 89ede9
				t = slist_add( tokens, &tok );
Packit 89ede9
				if ( !t ) { p=NULL; goto out; }
Packit 89ede9
				str_empty( &tok );
Packit 89ede9
			}
Packit 89ede9
		} else if ( !is_ws( *p ) || nquotes || nbracket ) {
Packit 89ede9
			if ( !is_ws( *p ) ) str_addchar( &tok, *p );
Packit 89ede9
			else {
Packit 89ede9
				if ( tok.len!=0 && *p!='\n' && *p!='\r' )
Packit 89ede9
					str_addchar( &tok, *p );
Packit 89ede9
				else if ( tok.len!=0 && (*p=='\n' || *p=='\r')) {
Packit 89ede9
					str_addchar( &tok, ' ' );
Packit 89ede9
					while ( is_ws( *(p+1) ) ) p++;
Packit 89ede9
				}
Packit 89ede9
			}
Packit 89ede9
		} else if ( is_ws( *p ) ) {
Packit 89ede9
			if ( tok.len ) {
Packit 89ede9
				if ( str_memerr( &tok ) ) { p=NULL; goto out; }
Packit 89ede9
				t = slist_add( tokens, &tok );
Packit 89ede9
				if ( !t ) { p=NULL; goto out; }
Packit 89ede9
				str_empty( &tok );
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
		p++;
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	if ( nbracket!=0 ) {
Packit 89ede9
		fprintf( stderr, "%s: Mismatch in number of brackets in reference %ld.\n", pm->progname, nref );
Packit 89ede9
	}
Packit 89ede9
	if ( nquotes!=0 ) {
Packit 89ede9
		fprintf( stderr, "%s: Mismatch in number of quotes in reference %ld.\n", pm->progname, nref );
Packit 89ede9
	}
Packit 89ede9
	if ( str_has_value( &tok ) ) {
Packit 89ede9
		if ( str_memerr( &tok ) ) { p = NULL; goto out; }
Packit 89ede9
		t = slist_add( tokens, &tok );
Packit 89ede9
		if ( !t ) p = NULL;
Packit 89ede9
	}
Packit 89ede9
out0:
Packit 89ede9
	str_free( &tok );
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/* replace_strings()
Packit 89ede9
 *
Packit 89ede9
 * do string replacement -- only if unprotected by quotation marks or curly brackets
Packit 89ede9
 */
Packit 89ede9
static void
Packit 89ede9
replace_strings( slist *tokens, fields *bibin, param *pm )
Packit 89ede9
{
Packit 89ede9
	int i, n, ok;
Packit 89ede9
	char *q;
Packit 89ede9
	str *s;
Packit 89ede9
	i = 0;
Packit 89ede9
	while ( i < tokens->n ) {
Packit 89ede9
		s = slist_str( tokens, i );
Packit 89ede9
		if ( !strcmp( s->data, "#" ) ) {
Packit 89ede9
		} else if ( s->data[0]!='\"' && s->data[0]!='{' ) {
Packit 89ede9
			n = slist_find( &find, s );
Packit 89ede9
			if ( n!=-1 ) {
Packit 89ede9
				str_strcpy( s, slist_str( &replace, n ) );
Packit 89ede9
			} else {
Packit 89ede9
				q = s->data;
Packit 89ede9
				ok = 1;
Packit 89ede9
				while ( *q && ok ) {
Packit 89ede9
					if ( !isdigit( *q ) ) ok = 0;
Packit 89ede9
					q++;
Packit 89ede9
				}
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
		i++;
Packit 89ede9
	}
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
string_concatenate( slist *tokens, fields *bibin, long nref, param *pm )
Packit 89ede9
{
Packit 89ede9
	int i, status;
Packit 89ede9
	str *s, *t;
Packit 89ede9
	i = 0;
Packit 89ede9
	while ( i < tokens->n ) {
Packit 89ede9
		s = slist_str( tokens, i );
Packit 89ede9
		if ( !strcmp( s->data, "#" ) ) {
Packit 89ede9
			if ( i==0 || i==tokens->n-1 ) {
Packit 89ede9
				fprintf( stderr, "%s: Warning: Stray string concatenation "
Packit 89ede9
					"('#' character) in reference %ld\n", pm->progname, nref );
Packit 89ede9
				status = slist_remove( tokens, i );
Packit 89ede9
				if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
Packit 89ede9
				continue;
Packit 89ede9
			}
Packit 89ede9
			s = slist_str( tokens, i-1 );
Packit 89ede9
			if ( s->data[0]!='\"' && s->data[s->len-1]!='\"' )
Packit 89ede9
				fprintf( stderr, "%s: Warning: String concentation should "
Packit 89ede9
					"be used in context of quotations marks in reference %ld\n", pm->progname, nref );
Packit 89ede9
			t = slist_str( tokens, i+1 );
Packit 89ede9
			if ( t->data[0]!='\"' && t->data[s->len-1]!='\"' )
Packit 89ede9
				fprintf( stderr, "%s: Warning: String concentation should "
Packit 89ede9
					"be used in context of quotations marks in reference %ld\n", pm->progname, nref );
Packit 89ede9
			if ( ( s->data[s->len-1]=='\"' && t->data[0]=='\"') || (s->data[s->len-1]=='}' && t->data[0]=='{') ) {
Packit 89ede9
				str_trimend( s, 1 );
Packit 89ede9
				str_trimbegin( t, 1 );
Packit 89ede9
				str_strcat( s, t );
Packit 89ede9
			} else {
Packit 89ede9
				str_strcat( s, t );
Packit 89ede9
			}
Packit 89ede9
			status = slist_remove( tokens, i );
Packit 89ede9
			if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
Packit 89ede9
			status = slist_remove( tokens, i );
Packit 89ede9
			if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
Packit 89ede9
		} else i++;
Packit 89ede9
	}
Packit 89ede9
	return BIBL_OK;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/* return NULL on memory error */
Packit 89ede9
static char *
Packit 89ede9
process_bibtexline( char *p, str *tag, str *data, uchar stripquotes, fields *bibin, long nref, param *pm )
Packit 89ede9
{
Packit 89ede9
	int i, status;
Packit 89ede9
	slist tokens;
Packit 89ede9
	str *s;
Packit 89ede9
Packit 89ede9
	str_empty( data );
Packit 89ede9
Packit 89ede9
	p = bibtex_tag( p, tag );
Packit 89ede9
Packit 89ede9
	if ( str_is_empty( tag ) ) {
Packit 89ede9
		/* ...skip this line */
Packit 89ede9
		while ( *p && *p!='\n' && *p!='\r' ) p++;
Packit 89ede9
		while ( *p=='\n' || *p=='\r' ) p++;
Packit 89ede9
		return p;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	slist_init( &tokens );
Packit 89ede9
Packit 89ede9
	if ( *p=='=' ) {
Packit 89ede9
		p = bibtex_data( p+1, bibin, &tokens, nref, pm );
Packit 89ede9
		if ( p==NULL ) goto out;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	replace_strings( &tokens, bibin, pm );
Packit 89ede9
Packit 89ede9
	status = string_concatenate( &tokens, bibin, nref, pm );
Packit 89ede9
	if ( status!=BIBL_OK ) {
Packit 89ede9
		p = NULL;
Packit 89ede9
		goto out;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	for ( i=0; i
Packit 89ede9
		s = slist_str( &tokens, i );
Packit 89ede9
		if ( ( stripquotes && s->data[0]=='\"' && s->data[s->len-1]=='\"' ) ||
Packit 89ede9
		     ( s->data[0]=='{' && s->data[s->len-1]=='}' ) ) {
Packit 89ede9
			str_trimbegin( s, 1 );
Packit 89ede9
			str_trimend( s, 1 );
Packit 89ede9
		}
Packit 89ede9
		str_strcat( data, slist_str( &tokens, i ) );
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	slist_free( &tokens );
Packit 89ede9
	return p;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/* process_cite()
Packit 89ede9
 *
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
process_cite( fields *bibin, char *p, char *filename, long nref, param *pm )
Packit 89ede9
{
Packit 89ede9
	int fstatus, status = BIBL_OK;
Packit 89ede9
	str type, id, tag, data;
Packit 89ede9
Packit 89ede9
	strs_init( &type, &id, &tag, &data, NULL );
Packit 89ede9
Packit 89ede9
	p = process_bibtextype( p, &type );
Packit 89ede9
	p = process_bibtexid( p, &id );
Packit 89ede9
Packit 89ede9
	if ( str_is_empty( &type ) || str_is_empty( &id ) ) goto out;
Packit 89ede9
Packit 89ede9
	fstatus = fields_add( bibin, "INTERNAL_TYPE", str_cstr( &type ), 0 );
Packit 89ede9
	if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
Packit 89ede9
	fstatus = fields_add( bibin, "REFNUM", str_cstr( &id), 0 );
Packit 89ede9
	if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
Packit 89ede9
	while ( *p ) {
Packit 89ede9
		p = process_bibtexline( p, &tag, &data, 1, bibin, nref, pm );
Packit 89ede9
		if ( p==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
		/* no anonymous or empty fields allowed */
Packit 89ede9
		if ( str_has_value( &tag ) && str_has_value( &data ) ) {
Packit 89ede9
			fstatus = fields_add( bibin, str_cstr( &tag ), str_cstr( &data ), 0 );
Packit 89ede9
			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
		}
Packit 89ede9
		strs_empty( &tag, &data, NULL );
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	strs_free( &type, &id, &tag, &data, NULL );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/* process_string()
Packit 89ede9
 *
Packit 89ede9
 * Handle lines like:
Packit 89ede9
 *
Packit 89ede9
 * '@STRING{TL = {Tetrahedron Lett.}}'
Packit 89ede9
 *
Packit 89ede9
 * p should point to just after '@STRING'
Packit 89ede9
 *
Packit 89ede9
 * In BibTeX, if a string is defined several times, the last one is kept.
Packit 89ede9
 *
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
process_string( char *p, long nref, param *pm )
Packit 89ede9
{
Packit 89ede9
	int n, status = BIBL_OK;
Packit 89ede9
	str s1, s2, *t;
Packit 89ede9
	strs_init( &s1, &s2, NULL );
Packit 89ede9
	while ( *p && *p!='{' && *p!='(' ) p++;
Packit 89ede9
	if ( *p=='{' || *p=='(' ) p++;
Packit 89ede9
	p = process_bibtexline( skip_ws( p ), &s1, &s2, 0, NULL, nref, pm );
Packit 89ede9
	if ( p==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
	if ( str_has_value( &s2 ) ) {
Packit 89ede9
		str_findreplace( &s2, "\\ ", " " );
Packit 89ede9
	}
Packit 89ede9
	if ( str_has_value( &s1 ) ) {
Packit 89ede9
		n = slist_find( &find, &s1 );
Packit 89ede9
		if ( n==-1 ) {
Packit 89ede9
			t = slist_add( &find, &s1 );
Packit 89ede9
			if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
			if ( str_has_value( &s2 ) ) t = slist_add( &replace, &s2 );
Packit 89ede9
			else t = slist_addc( &replace, "" );
Packit 89ede9
			if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
		} else {
Packit 89ede9
			if ( str_has_value( &s2 ) ) t = slist_set( &replace, n, &s2 );
Packit 89ede9
			else t = slist_setc( &replace, n, "" );
Packit 89ede9
			if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	strs_free( &s1, &s2, NULL );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/* bibtexin_processf()
Packit 89ede9
 *
Packit 89ede9
 * Handle '@STRING', '@reftype', and ignore '@COMMENT'
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
bibtexin_processf( fields *bibin, char *data, char *filename, long nref, param *p )
Packit 89ede9
{
Packit 89ede9
	if ( !strncasecmp( data, "@STRING", 7 ) ) {
Packit 89ede9
		process_string( data+7, nref, p );
Packit 89ede9
		return 0;
Packit 89ede9
	} else if ( !strncasecmp( data, "@COMMENT", 8 ) ) {
Packit 89ede9
		/* Not sure if these are real Bibtex, but not references */
Packit 89ede9
		return 0;
Packit 89ede9
	} else {
Packit 89ede9
		process_cite( bibin, data, filename, nref, p );
Packit 89ede9
		return 1;
Packit 89ede9
	}
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: void bibtexin_cleanf()
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtex_protected( str *data )
Packit 89ede9
{
Packit 89ede9
	if ( data->data[0]=='{' && data->data[data->len-1]=='}' ) return 1;
Packit 89ede9
	if ( data->data[0]=='\"' && data->data[data->len-1]=='\"' ) return 1;
Packit 89ede9
	return 0;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtex_split( slist *tokens, str *s )
Packit 89ede9
{
Packit 89ede9
	int i, n = s->len, nbrackets = 0, status = BIBL_OK;
Packit 89ede9
	str tok, *t;
Packit 89ede9
Packit 89ede9
	str_init( &tok );
Packit 89ede9
Packit 89ede9
	for ( i=0; i
Packit 89ede9
		if ( s->data[i]=='{' && ( i==0 || s->data[i-1]!='\\' ) ) {
Packit 89ede9
			nbrackets++;
Packit 89ede9
			str_addchar( &tok, '{' );
Packit 89ede9
		} else if ( s->data[i]=='}' && ( i==0 || s->data[i-1]!='\\' ) ) {
Packit 89ede9
			nbrackets--;
Packit 89ede9
			str_addchar( &tok, '}' );
Packit 89ede9
		} else if ( !is_ws( s->data[i] ) || nbrackets ) {
Packit 89ede9
			str_addchar( &tok, s->data[i] );
Packit 89ede9
		} else if ( is_ws( s->data[i] ) ) {
Packit 89ede9
			if ( str_has_value( &tok ) ) {
Packit 89ede9
				t = slist_add( tokens, &tok );
Packit 89ede9
				if ( !t ) {
Packit 89ede9
					status = BIBL_ERR_MEMERR;
Packit 89ede9
					goto out;
Packit 89ede9
				}
Packit 89ede9
			}
Packit 89ede9
			str_empty( &tok );
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
	if ( str_has_value( &tok ) ) {
Packit 89ede9
		t = slist_add( tokens, &tok );
Packit 89ede9
		if ( !t ) {
Packit 89ede9
			status = BIBL_ERR_MEMERR;
Packit 89ede9
			goto out;
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	for ( i=0; i<tokens->n; ++i ) {
Packit 89ede9
		str_trimstartingws( slist_str( tokens, i ) );
Packit 89ede9
		str_trimendingws( slist_str( tokens, i ) );
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	str_free( &tok );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtex_addtitleurl( fields *info, str *in )
Packit 89ede9
{
Packit 89ede9
	int fstatus, status = BIBL_OK;
Packit 89ede9
	str s;
Packit 89ede9
	char *p;
Packit 89ede9
Packit 89ede9
	str_init( &s );
Packit 89ede9
Packit 89ede9
	/* ...skip past "\href{" and copy to "}" */
Packit 89ede9
	p = str_cpytodelim( &s, in->data + 6, "}", 1 );
Packit 89ede9
	if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
Packit 89ede9
	/* ...add to URL */
Packit 89ede9
	fstatus = fields_add( info, "URL", s.data, 0 );
Packit 89ede9
	if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
Packit 89ede9
	/* ...return deleted fragment to str in */
Packit 89ede9
	(void) str_cpytodelim( &s, p, "", 0 );
Packit 89ede9
	if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
	str_swapstrings( &s, in );
Packit 89ede9
Packit 89ede9
out:
Packit 89ede9
	str_free( &s );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
is_url_tag( str *tag )
Packit 89ede9
{
Packit 89ede9
	if ( str_has_value( tag ) ) {
Packit 89ede9
		if ( !strcasecmp( str_cstr( tag ), "url" ) ) return 1;
Packit 89ede9
	}
Packit 89ede9
	return 0;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
is_name_tag( str *tag )
Packit 89ede9
{
Packit 89ede9
	if ( str_has_value( tag ) ) {
Packit 89ede9
		if ( !strcasecmp( str_cstr( tag ), "author" ) ) return 1;
Packit 89ede9
		if ( !strcasecmp( str_cstr( tag ), "editor" ) ) return 1;
Packit 89ede9
	}
Packit 89ede9
	return 0;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static void
Packit 89ede9
bibtex_process_tilde( str *s )
Packit 89ede9
{
Packit 89ede9
	char *p, *q;
Packit 89ede9
	int n = 0;
Packit 89ede9
Packit 89ede9
	p = q = s->data;
Packit 89ede9
	if ( !p ) return;
Packit 89ede9
	while ( *p ) {
Packit 89ede9
		if ( *p=='~' ) {
Packit 89ede9
			*q = ' ';
Packit 89ede9
		} else if ( *p=='\\' && *(p+1)=='~' ) {
Packit 89ede9
			n++;
Packit 89ede9
			p++;
Packit 89ede9
			*q = '~';
Packit 89ede9
		} else {
Packit 89ede9
			*q = *p;
Packit 89ede9
		}
Packit 89ede9
		p++;
Packit 89ede9
		q++;
Packit 89ede9
	}
Packit 89ede9
	*q = '\0';
Packit 89ede9
	s->len -= n;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static void
Packit 89ede9
bibtex_process_bracket( str *s )
Packit 89ede9
{
Packit 89ede9
	char *p, *q;
Packit 89ede9
	int n = 0;
Packit 89ede9
Packit 89ede9
	p = q = s->data;
Packit 89ede9
	if ( !p ) return;
Packit 89ede9
	while ( *p ) {
Packit 89ede9
		if ( *p=='\\' && ( *(p+1)=='{' || *(p+1)=='}' ) ) {
Packit 89ede9
			n++;
Packit 89ede9
			p++;
Packit 89ede9
			*q = *p;
Packit 89ede9
			q++;
Packit 89ede9
		} else if ( *p=='{' || *p=='}' ) {
Packit 89ede9
			n++;
Packit 89ede9
		} else {
Packit 89ede9
			*q = *p;
Packit 89ede9
			q++;
Packit 89ede9
		}
Packit 89ede9
		p++;
Packit 89ede9
	}
Packit 89ede9
	*q = '\0';
Packit 89ede9
	s->len -= n;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static void
Packit 89ede9
bibtex_cleantoken( str *s )
Packit 89ede9
{
Packit 89ede9
	/* 'textcomp' annotations */
Packit 89ede9
	str_findreplace( s, "\\textit", "" );
Packit 89ede9
	str_findreplace( s, "\\textbf", "" );
Packit 89ede9
	str_findreplace( s, "\\textsl", "" );
Packit 89ede9
	str_findreplace( s, "\\textsc", "" );
Packit 89ede9
	str_findreplace( s, "\\textsf", "" );
Packit 89ede9
	str_findreplace( s, "\\texttt", "" );
Packit 89ede9
	str_findreplace( s, "\\textsubscript", "" );
Packit 89ede9
	str_findreplace( s, "\\textsuperscript", "" );
Packit 89ede9
	str_findreplace( s, "\\emph", "" );
Packit 89ede9
	str_findreplace( s, "\\url", "" );
Packit 89ede9
	str_findreplace( s, "\\mbox", "" );
Packit 89ede9
Packit 89ede9
	/* Other text annotations */
Packit 89ede9
	str_findreplace( s, "\\it ", "" );
Packit 89ede9
	str_findreplace( s, "\\em ", "" );
Packit 89ede9
Packit 89ede9
	str_findreplace( s, "\\%", "%" );
Packit 89ede9
	str_findreplace( s, "\\$", "$" );
Packit 89ede9
	while ( str_findreplace( s, "  ", " " ) ) {}
Packit 89ede9
Packit 89ede9
	/* 'textcomp' annotations that we don't want to substitute on output*/
Packit 89ede9
	str_findreplace( s, "\\textdollar", "$" );
Packit 89ede9
	str_findreplace( s, "\\textunderscore", "_" );
Packit 89ede9
Packit 89ede9
	bibtex_process_bracket( s );
Packit 89ede9
	bibtex_process_tilde( s );
Packit 89ede9
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtex_cleandata( str *tag, str *s, fields *info, param *p )
Packit 89ede9
{
Packit 89ede9
	int i, status;
Packit 89ede9
	slist tokens;
Packit 89ede9
	str *tok;
Packit 89ede9
	if ( str_is_empty( s ) ) return BIBL_OK;
Packit 89ede9
	/* protect url from undergoing any parsing */
Packit 89ede9
	if ( is_url_tag( tag ) ) return BIBL_OK;
Packit 89ede9
	slist_init( &tokens );
Packit 89ede9
	status = bibtex_split( &tokens, s );
Packit 89ede9
	if ( status!=BIBL_OK ) goto out;
Packit 89ede9
	for ( i=0; i
Packit 89ede9
		tok = slist_str( &tokens, i );
Packit 89ede9
		if ( bibtex_protected( tok ) ) {
Packit 89ede9
			if (!strncasecmp(tok->data,"\\href{", 6)) {
Packit 89ede9
				bibtex_addtitleurl( info, tok );
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
		if ( p->latexin && !is_name_tag( tag ) && !is_url_tag( tag ) )
Packit 89ede9
			bibtex_cleantoken( tok );
Packit 89ede9
	}
Packit 89ede9
	str_empty( s );
Packit 89ede9
	for ( i=0; i
Packit 89ede9
		tok = slist_str( &tokens, i );
Packit 89ede9
		if ( i>0 ) str_addchar( s, ' ' );
Packit 89ede9
		str_strcat( s, tok );
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	slist_free( &tokens );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_cleanref( fields *bibin, param *p )
Packit 89ede9
{
Packit 89ede9
	int i, n, status;
Packit 89ede9
	str *t, *d;
Packit 89ede9
	n = fields_num( bibin );
Packit 89ede9
	for ( i=0; i
Packit 89ede9
		t = fields_tag( bibin, i, FIELDS_STRP_NOUSE );
Packit 89ede9
		d = fields_value( bibin, i, FIELDS_STRP_NOUSE );
Packit 89ede9
		status = bibtex_cleandata( t, d, bibin, p );
Packit 89ede9
		if ( status!=BIBL_OK ) return status;
Packit 89ede9
	}
Packit 89ede9
	return BIBL_OK;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static long
Packit 89ede9
bibtexin_findref( bibl *bin, char *citekey )
Packit 89ede9
{
Packit 89ede9
	int n;
Packit 89ede9
	long i;
Packit 89ede9
	for ( i=0; i<bin->nrefs; ++i ) {
Packit 89ede9
		n = fields_find( bin->ref[i], "refnum", LEVEL_ANY );
Packit 89ede9
		if ( n==FIELDS_NOTFOUND ) continue;
Packit 89ede9
		if ( !strcmp( bin->ref[i]->data[n].data, citekey ) ) return i;
Packit 89ede9
	}
Packit 89ede9
	return -1;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static void
Packit 89ede9
bibtexin_nocrossref( bibl *bin, long i, int n, param *p )
Packit 89ede9
{
Packit 89ede9
	int n1 = fields_find( bin->ref[i], "REFNUM", LEVEL_ANY );
Packit 89ede9
	if ( p->progname ) fprintf( stderr, "%s: ", p->progname );
Packit 89ede9
	fprintf( stderr, "Cannot find cross-reference '%s'",
Packit 89ede9
			bin->ref[i]->data[n].data );
Packit 89ede9
	if ( n1!=FIELDS_NOTFOUND ) fprintf( stderr, " for reference '%s'\n",
Packit 89ede9
			bin->ref[i]->data[n1].data );
Packit 89ede9
	fprintf( stderr, "\n" );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_crossref_oneref( fields *bibref, fields *bibcross )
Packit 89ede9
{
Packit 89ede9
	int j, n, nl, ntype, fstatus, status = BIBL_OK;
Packit 89ede9
	char *type, *nt, *nv;
Packit 89ede9
Packit 89ede9
	ntype = fields_find( bibref, "INTERNAL_TYPE", LEVEL_ANY );
Packit 89ede9
	type = ( char * ) fields_value( bibref, ntype, FIELDS_CHRP_NOUSE );
Packit 89ede9
Packit 89ede9
	n = fields_num( bibcross );
Packit 89ede9
	for ( j=0; j
Packit 89ede9
		nt = ( char * ) fields_tag( bibcross, j, FIELDS_CHRP_NOUSE );
Packit 89ede9
		if ( !strcasecmp( nt, "INTERNAL_TYPE" ) ) continue;
Packit 89ede9
		if ( !strcasecmp( nt, "REFNUM" ) ) continue;
Packit 89ede9
		if ( !strcasecmp( nt, "TITLE" ) ) {
Packit 89ede9
			if ( !strcasecmp( type, "Inproceedings" ) ||
Packit 89ede9
			     !strcasecmp( type, "Incollection" ) )
Packit 89ede9
				nt = "booktitle";
Packit 89ede9
		}
Packit 89ede9
		nv = ( char * ) fields_value( bibcross, j, FIELDS_CHRP_NOUSE );
Packit 89ede9
		nl = fields_level( bibcross, j ) + 1;
Packit 89ede9
		fstatus = fields_add( bibref, nt, nv, nl );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) {
Packit 89ede9
			status = BIBL_ERR_MEMERR;
Packit 89ede9
			goto out;
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_crossref( bibl *bin, param *p )
Packit 89ede9
{
Packit 89ede9
	int i, n, ncross, status = BIBL_OK;
Packit 89ede9
	fields *bibref, *bibcross;
Packit 89ede9
Packit 89ede9
	for ( i=0; i<bin->nrefs; ++i ) {
Packit 89ede9
		bibref = bin->ref[i];
Packit 89ede9
		n = fields_find( bibref, "CROSSREF", LEVEL_ANY );
Packit 89ede9
		if ( n==FIELDS_NOTFOUND ) continue;
Packit 89ede9
		fields_setused( bibref, n );
Packit 89ede9
		ncross = bibtexin_findref( bin, (char*) fields_value( bibref, n, FIELDS_CHRP ) );
Packit 89ede9
		if ( ncross==-1 ) {
Packit 89ede9
			bibtexin_nocrossref( bin, i, n, p );
Packit 89ede9
			continue;
Packit 89ede9
		}
Packit 89ede9
		bibcross = bin->ref[ncross];
Packit 89ede9
		status = bibtexin_crossref_oneref( bibref, bibcross );
Packit 89ede9
		if ( status!=BIBL_OK ) goto out;
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_cleanf( bibl *bin, param *p )
Packit 89ede9
{
Packit 89ede9
	int status = BIBL_OK;
Packit 89ede9
	long i;
Packit 89ede9
Packit 89ede9
        for ( i=0; i<bin->nrefs; ++i )
Packit 89ede9
		status = bibtexin_cleanref( bin->ref[i], p );
Packit 89ede9
	bibtexin_crossref( bin, p );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: int bibtexin_typef()
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_typef( fields *bibin, char *filename, int nrefs, param *p )
Packit 89ede9
{
Packit 89ede9
	int ntypename, nrefname, is_default;
Packit 89ede9
	char *refname = "", *typename = "";
Packit 89ede9
Packit 89ede9
	ntypename = fields_find( bibin, "INTERNAL_TYPE", LEVEL_MAIN );
Packit 89ede9
	nrefname  = fields_find( bibin, "REFNUM",        LEVEL_MAIN );
Packit 89ede9
	if ( nrefname!=FIELDS_NOTFOUND )  refname  = fields_value( bibin, nrefname,  FIELDS_CHRP_NOUSE );
Packit 89ede9
	if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( bibin, ntypename, FIELDS_CHRP_NOUSE );
Packit 89ede9
Packit 89ede9
	return get_reftype( typename, nrefs, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*****************************************************
Packit 89ede9
 PUBLIC: int bibtexin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
Packit 89ede9
*****************************************************/
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtex_matches_list( fields *bibout, char *tag, char *suffix, str *data, int level,
Packit 89ede9
		slist *names, int *match )
Packit 89ede9
{
Packit 89ede9
	int i, fstatus, status = BIBL_OK;
Packit 89ede9
	str newtag;
Packit 89ede9
Packit 89ede9
	*match = 0;
Packit 89ede9
	if ( names->n==0 ) return status;
Packit 89ede9
Packit 89ede9
	str_init( &newtag );
Packit 89ede9
Packit 89ede9
	for ( i=0; i<names->n; ++i ) {
Packit 89ede9
		if ( strcmp( str_cstr( data ), slist_cstr( names, i ) ) ) continue;
Packit 89ede9
		str_initstrc( &newtag, tag );
Packit 89ede9
		str_strcatc( &newtag, suffix );
Packit 89ede9
		fstatus = fields_add( bibout, str_cstr( &newtag ), str_cstr( data ), level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) {
Packit 89ede9
			status = BIBL_ERR_MEMERR;
Packit 89ede9
			goto out;
Packit 89ede9
		}
Packit 89ede9
		*match = 1;
Packit 89ede9
		goto out;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
out:
Packit 89ede9
	str_free( &newtag );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/**** bibtexin_btorg ****/
Packit 89ede9
Packit 89ede9
/*
Packit 89ede9
 * BibTeX uses 'organization' in lieu of publisher if that field is missing.
Packit 89ede9
 * Otherwise output as
Packit 89ede9
 * <name type="corporate">
Packit 89ede9
 *    <namePart>The organization</namePart>
Packit 89ede9
 *    <role>
Packit 89ede9
 *       <roleTerm authority="marcrelator" type="text">organizer of meeting</roleTerm>
Packit 89ede9
 *    </role>
Packit 89ede9
 * </name>
Packit 89ede9
 */
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_btorg( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	int n, fstatus;
Packit 89ede9
	n = fields_find( bibin, "publisher", LEVEL_ANY );
Packit 89ede9
	if ( n==FIELDS_NOTFOUND )
Packit 89ede9
		fstatus = fields_add( bibout, "PUBLISHER", str_cstr( invalue ), level );
Packit 89ede9
	else
Packit 89ede9
		fstatus = fields_add( bibout, "ORGANIZER:CORP", str_cstr( invalue ), level );
Packit 89ede9
	if ( fstatus==FIELDS_OK ) return BIBL_OK;
Packit 89ede9
	else return BIBL_ERR_MEMERR;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/**** bibtexin_btsente() ****/
Packit 89ede9
Packit 89ede9
/*
Packit 89ede9
 * sentelink = {file://localhost/full/path/to/file.pdf,Sente,PDF}
Packit 89ede9
 *
Packit 89ede9
 * Sente is an academic reference manager for MacOSX and Apple iPad.
Packit 89ede9
 */
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_btsente( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	int fstatus, status = BIBL_OK;
Packit 89ede9
	str link;
Packit 89ede9
Packit 89ede9
	str_init( &link );
Packit 89ede9
	str_cpytodelim( &link, skip_ws( invalue->data ), ",", 0 );
Packit 89ede9
	str_trimendingws( &link );
Packit 89ede9
	if ( str_memerr( &link ) ) status = BIBL_ERR_MEMERR;
Packit 89ede9
Packit 89ede9
	if ( status==BIBL_OK && link.len ) {
Packit 89ede9
		fstatus = fields_add( bibout, "FILEATTACH", str_cstr( &link ), level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	str_free( &link );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/**** bibtexin_linkedfile() ****/
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
count_colons( char *p )
Packit 89ede9
{
Packit 89ede9
	int n = 0;
Packit 89ede9
	while ( *p ) {
Packit 89ede9
		if ( *p==':' ) n++;
Packit 89ede9
		p++;
Packit 89ede9
	}
Packit 89ede9
	return n;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
first_colon( char *p )
Packit 89ede9
{
Packit 89ede9
	int n = 0;
Packit 89ede9
	while ( p[n] && p[n]!=':' ) n++;
Packit 89ede9
	return n;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
last_colon( char *p )
Packit 89ede9
{
Packit 89ede9
	int n = strlen( p ) - 1;
Packit 89ede9
	while ( n>0 && p[n]!=':' ) n--;
Packit 89ede9
	return n;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*
Packit 89ede9
 * file={Description:/full/path/to/file.pdf:PDF}
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
bibtexin_linkedfile( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	int fstatus, status = BIBL_OK;
Packit 89ede9
	char *p = invalue->data;
Packit 89ede9
	int i, n, n1, n2;
Packit 89ede9
	str link;
Packit 89ede9
Packit 89ede9
	n = count_colons( p );
Packit 89ede9
	if ( n > 1 ) {
Packit 89ede9
		/* A DOS file can contain a colon ":C:/....pdf:PDF" */
Packit 89ede9
		/* Extract after 1st and up to last colons */
Packit 89ede9
		n1 = first_colon( p ) + 1;
Packit 89ede9
		n2 = last_colon( p );
Packit 89ede9
		str_init( &link );
Packit 89ede9
		for ( i=n1; i
Packit 89ede9
			str_addchar( &link, p[i] );
Packit 89ede9
		}
Packit 89ede9
		str_trimstartingws( &link );
Packit 89ede9
		str_trimendingws( &link );
Packit 89ede9
		if ( str_memerr( &link ) ) {
Packit 89ede9
			status = BIBL_ERR_MEMERR;
Packit 89ede9
			goto out;
Packit 89ede9
		}
Packit 89ede9
		if ( link.len ) {
Packit 89ede9
			fstatus = fields_add( bibout, "FILEATTACH", link.data, level );
Packit 89ede9
			if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
		}
Packit 89ede9
out:
Packit 89ede9
		str_free( &link );
Packit 89ede9
	} else {
Packit 89ede9
		/* This field isn't formatted properly, so just copy directly */
Packit 89ede9
		fstatus = fields_add( bibout, "FILEATTACH", p, level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
	return status;
Packit 89ede9
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/**** bibtexin_howpublished() ****/
Packit 89ede9
Packit 89ede9
/*    howpublished={},
Packit 89ede9
 *
Packit 89ede9
 * Normally indicates the manner in which something was
Packit 89ede9
 * published in lieu of a formal publisher, so typically
Packit 89ede9
 * 'howpublished' and 'publisher' will never be in the
Packit 89ede9
 * same reference.
Packit 89ede9
 *
Packit 89ede9
 * Occassionally, people put Diploma thesis information
Packit 89ede9
 * into the field, so check that first.
Packit 89ede9
 *
Packit 89ede9
 * Returns BIBL_OK or BIBL_ERR_MEMERR
Packit 89ede9
 */
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_howpublished( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	int fstatus, status = BIBL_OK;
Packit 89ede9
	if ( !strncasecmp( str_cstr( invalue ), "Diplom", 6 ) ) {
Packit 89ede9
		fstatus = fields_replace_or_add( bibout, "GENRE:BIBUTILS", "Diploma thesis", level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
	else if ( !strncasecmp( str_cstr( invalue ), "HSabilitation", 13 ) ) {
Packit 89ede9
		fstatus = fields_replace_or_add( bibout, "GENRE:BIBUTILS", "Habilitation thesis", level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
	else if ( !strncasecmp( str_cstr( invalue ), "Licentiate", 10 ) ) {
Packit 89ede9
		fstatus = fields_replace_or_add( bibout, "GENRE:BIBUTILS", "Licentiate thesis", level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
	else if ( is_embedded_link( str_cstr( invalue ) ) ) {
Packit 89ede9
		status =  urls_split_and_add( str_cstr( invalue ), bibout, level );
Packit 89ede9
	}
Packit 89ede9
	else {
Packit 89ede9
		fstatus = fields_add( bibout, "PUBLISHER", str_cstr( invalue ), level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/**** bibtexin_eprint() ****/
Packit 89ede9
Packit 89ede9
/* Try to capture situations like
Packit 89ede9
 *
Packit 89ede9
 * eprint="1605.02026",
Packit 89ede9
 * archivePrefix="arXiv",
Packit 89ede9
 *
Packit 89ede9
 * or
Packit 89ede9
 *
Packit 89ede9
 * eprint="13211131",
Packit 89ede9
 * eprinttype="medline",
Packit 89ede9
 *
Packit 89ede9
 * If we don't know anything, concatenate archivePrefix:eprint
Packit 89ede9
 * and push into URL. (Could be wrong)
Packit 89ede9
 *
Packit 89ede9
 * If no info, just push eprint into URL. (Could be wrong)
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
process_eprint_with_prefix( fields *bibout, char *prefix, str *value, int level )
Packit 89ede9
{
Packit 89ede9
	int fstatus, status = BIBL_OK;
Packit 89ede9
	str merge;
Packit 89ede9
Packit 89ede9
	if ( !strcmp( prefix, "arXiv" ) ) {
Packit 89ede9
		fstatus = fields_add( bibout, "ARXIV", value->data, level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	else if ( !strcmp( prefix, "jstor" ) ) {
Packit 89ede9
		fstatus = fields_add( bibout, "JSTOR", value->data, level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	else if ( !strcmp( prefix, "medline" ) ) {
Packit 89ede9
		fstatus = fields_add( bibout, "MEDLINE", value->data, level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	else if ( !strcmp( prefix, "pubmed" ) ) {
Packit 89ede9
		fstatus = fields_add( bibout, "PMID", value->data, level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	/* ...if this is unknown prefix, merge prefix & eprint */
Packit 89ede9
	else {
Packit 89ede9
		str_init( &merge );
Packit 89ede9
		str_mergestrs( &merge, prefix, ":", value->data, NULL );
Packit 89ede9
		fstatus = fields_add( bibout, "URL", merge.data, level );
Packit 89ede9
		if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
Packit 89ede9
		str_free( &merge );
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
static int
Packit 89ede9
process_eprint_without_prefix( fields *bibout, str *value, int level )
Packit 89ede9
{
Packit 89ede9
	int fstatus;
Packit 89ede9
Packit 89ede9
	/* ...no archivePrefix, need to handle just 'eprint' tag */
Packit 89ede9
	fstatus = fields_add( bibout, "URL", value->data, level );
Packit 89ede9
Packit 89ede9
	if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
Packit 89ede9
	else return BIBL_OK;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_eprint( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	char *prefix;
Packit 89ede9
	int n;
Packit 89ede9
Packit 89ede9
	/* ...do we have an archivePrefix too? */
Packit 89ede9
	n = fields_find( bibin, "ARCHIVEPREFIX", level );
Packit 89ede9
	if ( n==FIELDS_NOTFOUND ) n = fields_find( bibin, "EPRINTTYPE", level );
Packit 89ede9
	if ( n!=FIELDS_NOTFOUND ) {
Packit 89ede9
		prefix = fields_value( bibin, n, FIELDS_CHRP );
Packit 89ede9
		return process_eprint_with_prefix( bibout, prefix, invalue, level );
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	/* ...no we don't */
Packit 89ede9
	return process_eprint_without_prefix( bibout, invalue, level );
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/**** bibtexin_keyword() ****/
Packit 89ede9
Packit 89ede9
/* Split keywords="" with semicolons.
Packit 89ede9
 * Commas are also frequently used, but will break
Packit 89ede9
 * entries like:
Packit 89ede9
 *       keywords="Microscopy, Confocal"
Packit 89ede9
 * Returns BIBL_OK or BIBL_ERR_MEMERR
Packit 89ede9
 */
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_keyword( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	int fstatus, status = BIBL_OK;
Packit 89ede9
	str keyword;
Packit 89ede9
	char *p;
Packit 89ede9
Packit 89ede9
	p = invalue->data;
Packit 89ede9
	str_init( &keyword );
Packit 89ede9
Packit 89ede9
	while ( *p ) {
Packit 89ede9
		p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
Packit 89ede9
		str_trimendingws( &keyword );
Packit 89ede9
		if ( str_memerr( &keyword ) ) {
Packit 89ede9
			status = BIBL_ERR_MEMERR;
Packit 89ede9
			goto out;
Packit 89ede9
		}
Packit 89ede9
		if ( keyword.len ) {
Packit 89ede9
			fstatus = fields_add( bibout, "KEYWORD", keyword.data, level );
Packit 89ede9
			if ( fstatus!=FIELDS_OK ) {
Packit 89ede9
				status = BIBL_ERR_MEMERR;
Packit 89ede9
				goto out;
Packit 89ede9
			}
Packit 89ede9
		}
Packit 89ede9
	}
Packit 89ede9
out:
Packit 89ede9
	str_free( &keyword );
Packit 89ede9
	return status;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/*
Packit 89ede9
 * bibtex_names( bibout, newtag, field, level);
Packit 89ede9
 *
Packit 89ede9
 * split names in author list separated by and's (use '|' character)
Packit 89ede9
 * and add names
Packit 89ede9
 *
Packit 89ede9
 * returns BIBL_OK on success, BIBL_ERR_MEMERR on memory error
Packit 89ede9
 */
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_person( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	int begin, end, ok, n, etal, i, status, match;
Packit 89ede9
	slist tokens;
Packit 89ede9
Packit 89ede9
	/* If we match the asis or corps list add and bail. */
Packit 89ede9
	status = bibtex_matches_list( bibout, outtag, ":ASIS", invalue, level, &(pm->asis), &match );
Packit 89ede9
	if ( match==1 || status!=BIBL_OK ) return status;
Packit 89ede9
	status = bibtex_matches_list( bibout, outtag, ":CORP", invalue, level, &(pm->corps), &match );
Packit 89ede9
	if ( match==1 || status!=BIBL_OK ) return status;
Packit 89ede9
Packit 89ede9
	slist_init( &tokens );
Packit 89ede9
Packit 89ede9
	bibtex_split( &tokens, invalue );
Packit 89ede9
	for ( i=0; i
Packit 89ede9
		bibtex_cleantoken( slist_str( &tokens, i ) );
Packit 89ede9
Packit 89ede9
	etal = name_findetal( &tokens );
Packit 89ede9
Packit 89ede9
	begin = 0;
Packit 89ede9
	n = tokens.n - etal;
Packit 89ede9
	while ( begin < n ) {
Packit 89ede9
Packit 89ede9
		end = begin + 1;
Packit 89ede9
Packit 89ede9
		while ( end < n && strcasecmp( slist_cstr( &tokens, end ), "and" ) )
Packit 89ede9
			end++;
Packit 89ede9
Packit 89ede9
		if ( end - begin == 1 ) {
Packit 89ede9
			ok = name_addsingleelement( bibout, outtag, slist_cstr( &tokens, begin ), level, 0 );
Packit 89ede9
			if ( !ok ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
		} else {
Packit 89ede9
			ok = name_addmultielement( bibout, outtag, &tokens, begin, end, level );
Packit 89ede9
			if ( !ok ) { status = BIBL_ERR_MEMERR; goto out; }
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
		begin = end + 1;
Packit 89ede9
Packit 89ede9
		/* Handle repeated 'and' errors: authors="G. F. Author and and B. K. Author" */
Packit 89ede9
		while ( begin < n && !strcasecmp( slist_cstr( &tokens, begin ), "and" ) )
Packit 89ede9
			begin++;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	if ( etal ) {
Packit 89ede9
		ok = name_addsingleelement( bibout, outtag, "et al.", level, 0 );
Packit 89ede9
		if ( !ok ) status = BIBL_ERR_MEMERR;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
out:
Packit 89ede9
	slist_free( &tokens );
Packit 89ede9
	return status;
Packit 89ede9
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
/**** bibtexin_title() ****/
Packit 89ede9
Packit 89ede9
/* bibtexin_titleinbook_isbooktitle()
Packit 89ede9
 *
Packit 89ede9
 * Normally, the title field of inbook refers to the book.  The
Packit 89ede9
 * section in a @inbook reference is untitled.  If it's titled,
Packit 89ede9
 * the @incollection should be used.  For example, in:
Packit 89ede9
 *
Packit 89ede9
 * @inbook{
Packit 89ede9
 *    title="xxx"
Packit 89ede9
 * }
Packit 89ede9
 *
Packit 89ede9
 * the booktitle is "xxx".
Packit 89ede9
 *
Packit 89ede9
 * However, @inbook is frequently abused (and treated like
Packit 89ede9
 * @incollection) so that title and booktitle are present
Packit 89ede9
 * and title is now 'supposed' to refer to the section.  For example:
Packit 89ede9
 *
Packit 89ede9
 * @inbook{
Packit 89ede9
 *     title="yyy",
Packit 89ede9
 *     booktitle="xxx"
Packit 89ede9
 * }
Packit 89ede9
 *
Packit 89ede9
 * Therefore report whether or not booktitle is present as well
Packit 89ede9
 * as title in @inbook references.  If not, then make 'title'
Packit 89ede9
 * correspond to the title of the book, not the section.
Packit 89ede9
 *
Packit 89ede9
 */
Packit 89ede9
static int
Packit 89ede9
bibtexin_titleinbook_isbooktitle( fields *bibin, char *intag )
Packit 89ede9
{
Packit 89ede9
	int n;
Packit 89ede9
Packit 89ede9
	/* ...look only at 'title="xxx"' elements */
Packit 89ede9
	if ( strcasecmp( intag, "TITLE" ) ) return 0;
Packit 89ede9
Packit 89ede9
	/* ...look only at '@inbook' references */
Packit 89ede9
	n = fields_find( bibin, "INTERNAL_TYPE", LEVEL_ANY );
Packit 89ede9
	if ( n==FIELDS_NOTFOUND ) return 0;
Packit 89ede9
	if ( strcasecmp( fields_value( bibin, n, FIELDS_CHRP ), "INBOOK" ) ) return 0;
Packit 89ede9
Packit 89ede9
	/* ...look to see if 'booktitle="yyy"' exists */
Packit 89ede9
	n = fields_find( bibin, "BOOKTITLE", LEVEL_ANY );
Packit 89ede9
	if ( n==FIELDS_NOTFOUND ) return 0;
Packit 89ede9
	else return 1;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_title( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
Packit 89ede9
{
Packit 89ede9
	int ok;
Packit 89ede9
Packit 89ede9
	if ( bibtexin_titleinbook_isbooktitle( bibin, intag->data ) ) level=LEVEL_MAIN;
Packit 89ede9
	ok = title_process( bibout, "TITLE", invalue->data, level, pm->nosplittitle );
Packit 89ede9
	if ( ok ) return BIBL_OK;
Packit 89ede9
	else return BIBL_ERR_MEMERR;
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static void
Packit 89ede9
bibtexin_notag( param *p, char *tag )
Packit 89ede9
{
Packit 89ede9
	if ( p->verbose && strcmp( tag, "INTERNAL_TYPE" ) ) {
Packit 89ede9
		if ( p->progname ) fprintf( stderr, "%s: ", p->progname );
Packit 89ede9
		fprintf( stderr, "Cannot find tag '%s'\n", tag );
Packit 89ede9
	}
Packit 89ede9
}
Packit 89ede9
Packit 89ede9
static int
Packit 89ede9
bibtexin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
Packit 89ede9
{
Packit 89ede9
	static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
Packit 89ede9
		[ 0 ... NUM_REFTYPES-1 ] = generic_null,
Packit 89ede9
		[ SIMPLE       ] = generic_simple,
Packit 89ede9
		[ TITLE        ] = bibtexin_title,
Packit 89ede9
		[ PERSON       ] = bibtexin_person,
Packit 89ede9
		[ PAGES        ] = generic_pages,
Packit 89ede9
		[ KEYWORD      ] = bibtexin_keyword,
Packit 89ede9
		[ EPRINT       ] = bibtexin_eprint,
Packit 89ede9
		[ HOWPUBLISHED ] = bibtexin_howpublished,
Packit 89ede9
		[ LINKEDFILE   ] = bibtexin_linkedfile,
Packit 89ede9
		[ NOTES        ] = generic_notes,
Packit 89ede9
		[ GENRE        ] = generic_genre,
Packit 89ede9
		[ BT_SENTE     ] = bibtexin_btsente,
Packit 89ede9
		[ BT_ORG       ] = bibtexin_btorg,
Packit 89ede9
		[ URL          ] = generic_url
Packit 89ede9
	};
Packit 89ede9
Packit 89ede9
	int process, level, i, nfields, status = BIBL_OK;
Packit 89ede9
	str *intag, *invalue;
Packit 89ede9
	char *outtag;
Packit 89ede9
Packit 89ede9
	nfields = fields_num( bibin );
Packit 89ede9
	for ( i=0; i
Packit 89ede9
Packit 89ede9
		if ( fields_used( bibin, i ) )   continue; /* e.g. successful crossref */
Packit 89ede9
		if ( fields_notag( bibin, i ) )  continue;
Packit 89ede9
		if ( fields_nodata( bibin, i ) ) continue;
Packit 89ede9
Packit 89ede9
		intag   = fields_tag( bibin, i, FIELDS_STRP );
Packit 89ede9
		invalue = fields_value( bibin, i, FIELDS_STRP );
Packit 89ede9
Packit 89ede9
		if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
Packit 89ede9
			bibtexin_notag( p, str_cstr( intag ) );
Packit 89ede9
			continue;
Packit 89ede9
		}
Packit 89ede9
Packit 89ede9
		status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
Packit 89ede9
		if ( status!=BIBL_OK ) return status;
Packit 89ede9
	}
Packit 89ede9
Packit 89ede9
	if ( status==BIBL_OK && p->verbose ) fields_report( bibout, stderr );
Packit 89ede9
Packit 89ede9
	return status;
Packit 89ede9
}