|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* name.c
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* mangle names w/ and w/o commas
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Copyright (c) Chris Putnam 2004-2018
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Source code released under the GPL version 2
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
#include <stdio.h>
|
|
Packit |
89ede9 |
#include <ctype.h>
|
|
Packit |
89ede9 |
#include <string.h>
|
|
Packit |
89ede9 |
#include "utf8.h"
|
|
Packit |
89ede9 |
#include "unicode.h"
|
|
Packit |
89ede9 |
#include "is_ws.h"
|
|
Packit |
89ede9 |
#include "str.h"
|
|
Packit |
89ede9 |
#include "fields.h"
|
|
Packit |
89ede9 |
#include "slist.h"
|
|
Packit |
89ede9 |
#include "intlist.h"
|
|
Packit |
89ede9 |
#include "name.h"
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* name_build_withcomma()
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* reconstruct parsed names in format: 'family|given|given||suffix'
|
|
Packit |
89ede9 |
* to 'family suffix, given given
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
void
|
|
Packit |
89ede9 |
name_build_withcomma( str *s, char *p )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int nseps = 0, nch;
|
|
Packit |
89ede9 |
char *suffix, *stopat;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_empty( s );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
suffix = strstr( p, "||" );
|
|
Packit |
89ede9 |
if ( suffix ) stopat = suffix;
|
|
Packit |
89ede9 |
else stopat = strchr( p, '\0' );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
while ( p != stopat ) {
|
|
Packit |
89ede9 |
nch = 0;
|
|
Packit |
89ede9 |
if ( nseps==1 ) {
|
|
Packit |
89ede9 |
if ( suffix ) {
|
|
Packit |
89ede9 |
str_strcatc( s, " " );
|
|
Packit |
89ede9 |
str_strcatc( s, suffix+2 );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
str_addchar( s, ',' );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( nseps ) str_addchar( s, ' ' );
|
|
Packit |
89ede9 |
while ( p!=stopat && *p!='|' ) {
|
|
Packit |
89ede9 |
str_addchar( s, *p++ );
|
|
Packit |
89ede9 |
nch++;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( p!=stopat && *p=='|' ) p++;
|
|
Packit |
89ede9 |
if ( nseps!=0 && nch==1 ) str_addchar( s, '.' );
|
|
Packit |
89ede9 |
nseps++;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* name_findetal()
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Returns number of final tokens to be skipped in processing
|
|
Packit |
89ede9 |
* of name lists.
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
int
|
|
Packit |
89ede9 |
name_findetal( slist *tokens )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
str *s1, *s2;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( tokens->n==0 ) return 0;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...check last entry for full 'et al.' or variant */
|
|
Packit |
89ede9 |
s2 = slist_str( tokens, tokens->n - 1 );
|
|
Packit |
89ede9 |
if ( !strcasecmp( s2->data, "et alia" ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "et al." ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "et al.," ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "et al" ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "etalia" ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "etal." ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "etal" ) ) {
|
|
Packit |
89ede9 |
return 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( tokens->n==1 ) return 0;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...check last two entries for full 'et' and 'al.' */
|
|
Packit |
89ede9 |
s1 = slist_str( tokens, tokens->n - 2 );
|
|
Packit |
89ede9 |
if ( !strcasecmp( s1->data, "et" ) ) {
|
|
Packit |
89ede9 |
if ( !strcasecmp( s2->data, "alia" ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "al." ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "al.," ) ||
|
|
Packit |
89ede9 |
!strcasecmp( s2->data, "al" ) ) {
|
|
Packit |
89ede9 |
return 2;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return 0;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
#define WITHCOMMA (1)
|
|
Packit |
89ede9 |
#define JUNIOR (2)
|
|
Packit |
89ede9 |
#define SENIOR (4)
|
|
Packit |
89ede9 |
#define THIRD (8)
|
|
Packit |
89ede9 |
#define FOURTH (16)
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
typedef struct {
|
|
Packit |
89ede9 |
char *s;
|
|
Packit |
89ede9 |
unsigned short value;
|
|
Packit |
89ede9 |
} suffix_value_t;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
identify_suffix( char *p )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
suffix_value_t suffixes[] = {
|
|
Packit |
89ede9 |
{ "Jr." , JUNIOR },
|
|
Packit |
89ede9 |
{ "Jr" , JUNIOR },
|
|
Packit |
89ede9 |
{ "Jr.," , JUNIOR | WITHCOMMA },
|
|
Packit |
89ede9 |
{ "Jr," , JUNIOR | WITHCOMMA },
|
|
Packit |
89ede9 |
{ "Sr." , SENIOR },
|
|
Packit |
89ede9 |
{ "Sr" , SENIOR },
|
|
Packit |
89ede9 |
{ "Sr.," , SENIOR | WITHCOMMA },
|
|
Packit |
89ede9 |
{ "Sr," , SENIOR | WITHCOMMA },
|
|
Packit |
89ede9 |
{ "III" , THIRD },
|
|
Packit |
89ede9 |
{ "III," , THIRD | WITHCOMMA },
|
|
Packit |
89ede9 |
{ "IV" , FOURTH },
|
|
Packit |
89ede9 |
{ "IV," , FOURTH | WITHCOMMA },
|
|
Packit |
89ede9 |
};
|
|
Packit |
89ede9 |
int i, nsuffixes = sizeof( suffixes ) / sizeof( suffixes[0] );
|
|
Packit |
89ede9 |
for ( i=0; i
|
|
Packit |
89ede9 |
if ( !strcmp( p, suffixes[i].s ) )
|
|
Packit |
89ede9 |
return suffixes[i].value;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return 0;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
has_suffix( slist *tokens, int begin, int end, int *suffixpos )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int i, ret;
|
|
Packit |
89ede9 |
str *s;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...check last element, e.g. "H. F. Author, Sr." */
|
|
Packit |
89ede9 |
s = slist_str( tokens, end - 1 );
|
|
Packit |
89ede9 |
ret = identify_suffix( s->data );
|
|
Packit |
89ede9 |
if ( ret ) {
|
|
Packit |
89ede9 |
*suffixpos = end - 1;
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...try to find one after a comma, e.g. "Author, Sr., H. F." */
|
|
Packit |
89ede9 |
for ( i=begin; i
|
|
Packit |
89ede9 |
s = slist_str( tokens, i );
|
|
Packit |
89ede9 |
if ( s->len && s->data[ s->len - 1 ]==',' ) {
|
|
Packit |
89ede9 |
s = slist_str( tokens, i+1 );
|
|
Packit |
89ede9 |
ret = identify_suffix( s->data );
|
|
Packit |
89ede9 |
if ( ret ) {
|
|
Packit |
89ede9 |
*suffixpos = i+1;
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return 0;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
add_given_split( str *name, str *s )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned int unicode_char;
|
|
Packit |
89ede9 |
unsigned int pos = 0;
|
|
Packit |
89ede9 |
char utf8s[7];
|
|
Packit |
89ede9 |
while ( pos < s->len ) {
|
|
Packit |
89ede9 |
unicode_char = utf8_decode( s->data, &pos );
|
|
Packit |
89ede9 |
if ( is_ws( (char) unicode_char ) ) continue;
|
|
Packit |
89ede9 |
else if ( unicode_char==(unsigned int)'.' ) {
|
|
Packit |
89ede9 |
if ( s->data[pos]=='-' ) {
|
|
Packit |
89ede9 |
str_strcatc( name, ".-" );
|
|
Packit |
89ede9 |
pos += 1;
|
|
Packit |
89ede9 |
unicode_char = utf8_decode( s->data, &pos );
|
|
Packit |
89ede9 |
utf8_encode_str( unicode_char, utf8s );
|
|
Packit |
89ede9 |
str_strcatc( name, utf8s );
|
|
Packit |
89ede9 |
str_addchar( name, '.' );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
} else if ( unicode_char==(unsigned int)'-' ) {
|
|
Packit |
89ede9 |
str_strcatc( name, ".-" );
|
|
Packit |
89ede9 |
unicode_char = utf8_decode( s->data, &pos );
|
|
Packit |
89ede9 |
utf8_encode_str( unicode_char, utf8s );
|
|
Packit |
89ede9 |
str_strcatc( name, utf8s );
|
|
Packit |
89ede9 |
str_addchar( name, '.' );
|
|
Packit |
89ede9 |
} else if ( unicode_char==(unsigned int)',' ) { /* nothing */
|
|
Packit |
89ede9 |
} else {
|
|
Packit |
89ede9 |
str_addchar( name, '|' );
|
|
Packit |
89ede9 |
utf8_encode_str( unicode_char, utf8s );
|
|
Packit |
89ede9 |
str_strcatc( name, utf8s );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static unsigned char
|
|
Packit |
89ede9 |
token_has_no_upper( slist *tokens, int n )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned short m;
|
|
Packit |
89ede9 |
str *s;
|
|
Packit |
89ede9 |
s = slist_str( tokens, n );
|
|
Packit |
89ede9 |
m = unicode_utf8_classify_str( s );
|
|
Packit |
89ede9 |
if ( m & UNICODE_UPPER ) return 0;
|
|
Packit |
89ede9 |
else return 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static unsigned char
|
|
Packit |
89ede9 |
token_has_upper( slist *tokens, int n )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
if ( token_has_no_upper( tokens, n ) ) return 0;
|
|
Packit |
89ede9 |
else return 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
name_multielement_nocomma( intlist *given, intlist *family, slist *tokens, int begin, int end, int suffixpos )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int family_start, family_end;
|
|
Packit |
89ede9 |
int i, n;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...family name(s) */
|
|
Packit |
89ede9 |
family_start = family_end = end - 1;
|
|
Packit |
89ede9 |
if ( family_start == suffixpos ) family_start = family_end = end - 2;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...if family name is capitalized, then look for first non-capitalized
|
|
Packit |
89ede9 |
* ...token and combine range to family name, e.g. single quoted parts of
|
|
Packit |
89ede9 |
* ..."Ludwig 'von Beethoven'"
|
|
Packit |
89ede9 |
* ..."Johannes Diderik 'van der Waals'"
|
|
Packit |
89ede9 |
* ..."Charles Louis Xavier Joseph 'de la Valla Poussin' */
|
|
Packit |
89ede9 |
if ( token_has_upper( tokens, family_start ) ) {
|
|
Packit |
89ede9 |
i = family_start - 1;
|
|
Packit |
89ede9 |
n = -1;
|
|
Packit |
89ede9 |
while ( i >= begin && ( n==-1 || token_has_no_upper( tokens, i ) ) ) {
|
|
Packit |
89ede9 |
if ( token_has_no_upper( tokens, i ) ) n = i;
|
|
Packit |
89ede9 |
i--;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( n != -1 ) family_start = n;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
for ( i=family_start; i
|
|
Packit |
89ede9 |
intlist_add( family, i );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...given names */
|
|
Packit |
89ede9 |
for ( i=begin; i
|
|
Packit |
89ede9 |
if ( i>=family_start && i<=family_end ) continue;
|
|
Packit |
89ede9 |
if ( i==suffixpos ) continue;
|
|
Packit |
89ede9 |
intlist_add( given, i );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
name_multielement_comma( intlist *given, intlist *family, slist *tokens, int begin, int end, int comma, int suffixpos )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
str *s;
|
|
Packit |
89ede9 |
int i;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...family names */
|
|
Packit |
89ede9 |
for ( i=begin; i
|
|
Packit |
89ede9 |
if ( i==suffixpos ) continue;
|
|
Packit |
89ede9 |
intlist_add( family, i );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
s = slist_str( tokens, comma );
|
|
Packit |
89ede9 |
str_trimend( s, 1 ); /* remove comma */
|
|
Packit |
89ede9 |
intlist_add( family, comma );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...given names */
|
|
Packit |
89ede9 |
for ( i=comma+1; i
|
|
Packit |
89ede9 |
if ( i==suffixpos ) continue;
|
|
Packit |
89ede9 |
intlist_add( given, i );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
name_mutlielement_build( str *name, intlist *given, intlist *family, slist *tokens )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
unsigned short case_given = 0, case_family = 0, should_split = 0;
|
|
Packit |
89ede9 |
str *s;
|
|
Packit |
89ede9 |
int i, m;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...copy and analyze family name */
|
|
Packit |
89ede9 |
for ( i=0; i<family->n; ++i ) {
|
|
Packit |
89ede9 |
m = intlist_get( family, i );
|
|
Packit |
89ede9 |
s = slist_str( tokens, m );
|
|
Packit |
89ede9 |
if ( i ) str_addchar( name, ' ' );
|
|
Packit |
89ede9 |
str_strcat( name, s );
|
|
Packit |
89ede9 |
case_family |= unicode_utf8_classify_str( s );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* ...check given name case */
|
|
Packit |
89ede9 |
for ( i=0; i<given->n; ++i ) {
|
|
Packit |
89ede9 |
m = intlist_get( given, i );
|
|
Packit |
89ede9 |
s = slist_str( tokens, m );
|
|
Packit |
89ede9 |
case_given |= unicode_utf8_classify_str( s );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( ( ( case_family & UNICODE_MIXEDCASE ) == UNICODE_MIXEDCASE ) &&
|
|
Packit |
89ede9 |
( ( case_given & UNICODE_MIXEDCASE ) == UNICODE_UPPER ) ) {
|
|
Packit |
89ede9 |
should_split = 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
for ( i=0; i<given->n; ++i ) {
|
|
Packit |
89ede9 |
m = intlist_get( given, i );
|
|
Packit |
89ede9 |
s = slist_str( tokens, m );
|
|
Packit |
89ede9 |
if ( !should_split ) {
|
|
Packit |
89ede9 |
str_addchar( name, '|' );
|
|
Packit |
89ede9 |
str_strcat( name, s );
|
|
Packit |
89ede9 |
} else add_given_split( name, s );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
name_construct_multi( str *outname, slist *tokens, int begin, int end )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int i, suffix, suffixpos=-1, comma=-1;
|
|
Packit |
89ede9 |
intlist given, family;
|
|
Packit |
89ede9 |
str *s;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
intlist_init( &family );
|
|
Packit |
89ede9 |
intlist_init( &given );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_empty( outname );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
suffix = has_suffix( tokens, begin, end, &suffixpos );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
for ( i=begin; i
|
|
Packit |
89ede9 |
if ( i==suffixpos ) continue;
|
|
Packit |
89ede9 |
s = slist_str( tokens, i );
|
|
Packit |
89ede9 |
if ( s->data[ s->len -1 ] == ',' ) {
|
|
Packit |
89ede9 |
if ( suffix && i==suffixpos-1 && !(suffix&WITHCOMMA) )
|
|
Packit |
89ede9 |
str_trimend( s, 1 );
|
|
Packit |
89ede9 |
else
|
|
Packit |
89ede9 |
comma = i;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( comma != -1 )
|
|
Packit |
89ede9 |
name_multielement_comma( &given, &family, tokens, begin, end, comma, suffixpos );
|
|
Packit |
89ede9 |
else
|
|
Packit |
89ede9 |
name_multielement_nocomma( &given, &family, tokens, begin, end, suffixpos );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
name_mutlielement_build( outname, &given, &family, tokens );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( suffix ) {
|
|
Packit |
89ede9 |
if ( suffix & JUNIOR ) str_strcatc( outname, "||Jr." );
|
|
Packit |
89ede9 |
if ( suffix & SENIOR ) str_strcatc( outname, "||Sr." );
|
|
Packit |
89ede9 |
if ( suffix & THIRD ) str_strcatc( outname, "||III" );
|
|
Packit |
89ede9 |
if ( suffix & FOURTH ) str_strcatc( outname, "||IV" );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
intlist_free( &given );
|
|
Packit |
89ede9 |
intlist_free( &family );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
int
|
|
Packit |
89ede9 |
name_addmultielement( fields *info, char *tag, slist *tokens, int begin, int end, int level )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status, ok = 1;
|
|
Packit |
89ede9 |
str name;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_init( &name );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
name_construct_multi( &name, tokens, begin, end );
|
|
Packit |
89ede9 |
status = fields_add_can_dup( info, tag, name.data, level );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ok = 0;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_free( &name );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return ok;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* name_addsingleelement()
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Treat names that are single tokens, e.g. {Random Corporation, Inc.} in bibtex
|
|
Packit |
89ede9 |
* as a name that should not be mangled (e.g. AUTHOR:ASIS or AUTHOR:CORP, if corp
|
|
Packit |
89ede9 |
* is set).
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
int
|
|
Packit |
89ede9 |
name_addsingleelement( fields *info, char *tag, char *name, int level, int corp )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status, ok = 1;
|
|
Packit |
89ede9 |
str outtag;
|
|
Packit |
89ede9 |
str_init( &outtag );
|
|
Packit |
89ede9 |
str_strcpyc( &outtag, tag );
|
|
Packit |
89ede9 |
if ( !corp ) str_strcatc( &outtag, ":ASIS" );
|
|
Packit |
89ede9 |
else str_strcatc( &outtag, ":CORP" );
|
|
Packit |
89ede9 |
status = fields_add_can_dup( info, outtag.data, name, level );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) ok = 0;
|
|
Packit |
89ede9 |
str_free( &outtag );
|
|
Packit |
89ede9 |
return ok;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* Takes a single name in a string and parses it.
|
|
Packit |
89ede9 |
* Skipped by bibtex/biblatex that come pre-parsed.
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Returns 0 on error.
|
|
Packit |
89ede9 |
* Returns 1 on ok.
|
|
Packit |
89ede9 |
* Returns 2 on ok and name in asis list
|
|
Packit |
89ede9 |
* Returns 3 on ok and name in corps list
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
int
|
|
Packit |
89ede9 |
name_parse( str *outname, str *inname, slist *asis, slist *corps )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status, ret = 1;
|
|
Packit |
89ede9 |
slist tokens;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_empty( outname );
|
|
Packit |
89ede9 |
if ( !inname || !inname->len ) return ret;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
slist_init( &tokens );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( asis && slist_find( asis, inname ) !=-1 ) {
|
|
Packit |
89ede9 |
str_strcpy( outname, inname );
|
|
Packit |
89ede9 |
ret = 2;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
} else if ( corps && slist_find( corps, inname ) != -1 ) {
|
|
Packit |
89ede9 |
str_strcpy( outname, inname );
|
|
Packit |
89ede9 |
ret = 3;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_findreplace( inname, ",", ", " );
|
|
Packit |
89ede9 |
status = slist_tokenize( &tokens, inname, " ", 1 );
|
|
Packit |
89ede9 |
if ( status!=SLIST_OK ) {
|
|
Packit |
89ede9 |
str_strcpy( outname, inname );
|
|
Packit |
89ede9 |
ret = 2;
|
|
Packit |
89ede9 |
goto out;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( tokens.n==1 ) {
|
|
Packit |
89ede9 |
str_strcpy( outname, inname );
|
|
Packit |
89ede9 |
ret = 2;
|
|
Packit |
89ede9 |
} else {
|
|
Packit |
89ede9 |
name_construct_multi( outname, &tokens, 0, tokens.n );
|
|
Packit |
89ede9 |
ret = 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
out:
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
slist_free( &tokens );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static char *
|
|
Packit |
89ede9 |
name_copy( str *name, char *p )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
char *start, *end, *q;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_empty( name );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
start = p = skip_ws( p );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* strip tailing whitespace and commas */
|
|
Packit |
89ede9 |
while ( *p && *p!='|' ) p++;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
end = p;
|
|
Packit |
89ede9 |
while ( is_ws( *end ) || *end==',' || *end=='|' || *end=='\0' )
|
|
Packit |
89ede9 |
end--;
|
|
Packit |
89ede9 |
if ( *p=='|' ) p++;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
for ( q=start; q<=end; q++ )
|
|
Packit |
89ede9 |
str_addchar( name, *q );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return p;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* name_add( info, newtag, data, level )
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* take name(s) in data, multiple names should be separated by
|
|
Packit |
89ede9 |
* '|' characters and divide into individual name, e.g.
|
|
Packit |
89ede9 |
* "H. F. Author|W. G. Author|Q. X. Author"
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* for each name, compare to names in the "as is" or "corporation"
|
|
Packit |
89ede9 |
* lists...these are not personal names and should be added to the
|
|
Packit |
89ede9 |
* bibliography fields directly and should not be mangled
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* for each personal name, send to appropriate algorithm depending
|
|
Packit |
89ede9 |
* on if the author name is in the format "H. F. Author" or
|
|
Packit |
89ede9 |
* "Author, H. F."
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
int
|
|
Packit |
89ede9 |
name_add( fields *info, char *tag, char *q, int level, slist *asis, slist *corps )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int ok, status, nametype, ret = 1;
|
|
Packit |
89ede9 |
str inname, outname;
|
|
Packit |
89ede9 |
slist tokens;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( !q ) return 0;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
slist_init( &tokens );
|
|
Packit |
89ede9 |
strs_init( &inname, &outname, NULL );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
while ( *q ) {
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
q = name_copy( &inname, q );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
nametype = name_parse( &outname, &inname, asis, corps );
|
|
Packit |
89ede9 |
if ( !nametype ) { ret = 0; goto out; }
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( nametype==1 ) {
|
|
Packit |
89ede9 |
status = fields_add_can_dup( info, tag, outname.data, level );
|
|
Packit |
89ede9 |
ok = ( status==FIELDS_OK ) ? 1 : 0;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
else if ( nametype==2 )
|
|
Packit |
89ede9 |
ok = name_addsingleelement( info, tag, outname.data, level, 0 );
|
|
Packit |
89ede9 |
else
|
|
Packit |
89ede9 |
ok = name_addsingleelement( info, tag, outname.data, level, 1 );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( !ok ) { ret = 0; goto out; }
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
out:
|
|
Packit |
89ede9 |
strs_free( &inname, &outname, NULL );
|
|
Packit |
89ede9 |
slist_free( &tokens );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return ret;
|
|
Packit |
89ede9 |
}
|