|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* medin.c
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Copyright (c) Chris Putnam 2004-2018
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* Source code released under the GPL version 2
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
#include <stdio.h>
|
|
Packit |
89ede9 |
#include <stdlib.h>
|
|
Packit |
89ede9 |
#include "is_ws.h"
|
|
Packit |
89ede9 |
#include "str.h"
|
|
Packit |
89ede9 |
#include "str_conv.h"
|
|
Packit |
89ede9 |
#include "fields.h"
|
|
Packit |
89ede9 |
#include "xml.h"
|
|
Packit |
89ede9 |
#include "xml_encoding.h"
|
|
Packit |
89ede9 |
#include "iso639_2.h"
|
|
Packit |
89ede9 |
#include "bibutils.h"
|
|
Packit |
89ede9 |
#include "bibformats.h"
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int medin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
|
|
Packit |
89ede9 |
static int medin_processf( fields *medin, char *data, char *filename, long nref, param *p );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*****************************************************
|
|
Packit |
89ede9 |
PUBLIC: void medin_initparams()
|
|
Packit |
89ede9 |
*****************************************************/
|
|
Packit |
89ede9 |
void
|
|
Packit |
89ede9 |
medin_initparams( param *p, const char *progname )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
p->readformat = BIBL_MEDLINEIN;
|
|
Packit |
89ede9 |
p->charsetin = BIBL_CHARSET_UNICODE;
|
|
Packit |
89ede9 |
p->charsetin_src = BIBL_SRC_DEFAULT;
|
|
Packit |
89ede9 |
p->latexin = 0;
|
|
Packit |
89ede9 |
p->xmlin = 1;
|
|
Packit |
89ede9 |
p->utf8in = 1;
|
|
Packit |
89ede9 |
p->nosplittitle = 0;
|
|
Packit |
89ede9 |
p->verbose = 0;
|
|
Packit |
89ede9 |
p->addcount = 0;
|
|
Packit |
89ede9 |
p->output_raw = BIBL_RAW_WITHMAKEREFID |
|
|
Packit |
89ede9 |
BIBL_RAW_WITHCHARCONVERT;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
p->readf = medin_readf;
|
|
Packit |
89ede9 |
p->processf = medin_processf;
|
|
Packit |
89ede9 |
p->cleanf = NULL;
|
|
Packit |
89ede9 |
p->typef = NULL;
|
|
Packit |
89ede9 |
p->convertf = NULL;
|
|
Packit |
89ede9 |
p->all = NULL;
|
|
Packit |
89ede9 |
p->nall = 0;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
slist_init( &(p->asis) );
|
|
Packit |
89ede9 |
slist_init( &(p->corps) );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( !progname ) p->progname = NULL;
|
|
Packit |
89ede9 |
else p->progname = strdup( progname );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*****************************************************
|
|
Packit |
89ede9 |
PUBLIC: int medin_readf()
|
|
Packit |
89ede9 |
*****************************************************/
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
* The only difference between MEDLINE and PUBMED in format is
|
|
Packit |
89ede9 |
* that the entire library is wrapped in <MedlineCitationSet>
|
|
Packit |
89ede9 |
* or <PubmedArticle> tags...
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static char *wrapper[] = { "PubmedArticle", "MedlineCitation" };
|
|
Packit |
89ede9 |
static int nwrapper = sizeof( wrapper ) / sizeof( wrapper[0] );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static char *
|
|
Packit |
89ede9 |
medin_findstartwrapper( char *buf, int *ntype )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
char *startptr=NULL;
|
|
Packit |
89ede9 |
int i;
|
|
Packit |
89ede9 |
for ( i=0; i
|
|
Packit |
89ede9 |
startptr = xml_find_start( buf, wrapper[ i ] );
|
|
Packit |
89ede9 |
if ( startptr && *ntype==-1 ) *ntype = i;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return startptr;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static char *
|
|
Packit |
89ede9 |
medin_findendwrapper( char *buf, int ntype )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
char *endptr = xml_find_end( buf, wrapper[ ntype ] );
|
|
Packit |
89ede9 |
return endptr;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
str tmp;
|
|
Packit |
89ede9 |
char *startptr = NULL, *endptr;
|
|
Packit |
89ede9 |
int haveref = 0, inref = 0, file_charset = CHARSET_UNKNOWN, m, type = -1;
|
|
Packit |
89ede9 |
str_init( &tmp );
|
|
Packit |
89ede9 |
while ( !haveref && str_fget( fp, buf, bufsize, bufpos, line ) ) {
|
|
Packit |
89ede9 |
if ( line->data ) {
|
|
Packit |
89ede9 |
m = xml_getencoding( line );
|
|
Packit |
89ede9 |
if ( m!=CHARSET_UNKNOWN ) file_charset = m;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( line->data ) {
|
|
Packit |
89ede9 |
startptr = medin_findstartwrapper( line->data, &type );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( startptr || inref ) {
|
|
Packit |
89ede9 |
if ( inref ) str_strcat( &tmp, line );
|
|
Packit |
89ede9 |
else {
|
|
Packit |
89ede9 |
str_strcatc( &tmp, startptr );
|
|
Packit |
89ede9 |
inref = 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
endptr = medin_findendwrapper( str_cstr( &tmp ), type );
|
|
Packit |
89ede9 |
if ( endptr ) {
|
|
Packit |
89ede9 |
str_segcpy( reference, str_cstr( &tmp ), endptr );
|
|
Packit |
89ede9 |
haveref = 1;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
str_free( &tmp );
|
|
Packit |
89ede9 |
*fcharset = file_charset;
|
|
Packit |
89ede9 |
return haveref;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*****************************************************
|
|
Packit |
89ede9 |
PUBLIC: int medin_processf()
|
|
Packit |
89ede9 |
*****************************************************/
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
typedef struct xml_convert {
|
|
Packit |
89ede9 |
char *in; /* The input tag */
|
|
Packit |
89ede9 |
char *a, *aval; /* The attribute="attribute_value" pair, if nec. */
|
|
Packit |
89ede9 |
char *out; /* The output tag */
|
|
Packit |
89ede9 |
int level;
|
|
Packit |
89ede9 |
} xml_convert;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_doconvert( xml *node, fields *info, xml_convert *c, int nc, int *found )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int i, fstatus;
|
|
Packit |
89ede9 |
char *d;
|
|
Packit |
89ede9 |
*found = 0;
|
|
Packit |
89ede9 |
if ( !xml_has_value( node ) ) return BIBL_OK;
|
|
Packit |
89ede9 |
d = xml_value_cstr( node );
|
|
Packit |
89ede9 |
for ( i=0; i
|
|
Packit |
89ede9 |
if ( c[i].a==NULL ) {
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, c[i].in ) ) {
|
|
Packit |
89ede9 |
*found = 1;
|
|
Packit |
89ede9 |
fstatus = fields_add( info, c[i].out, d, c[i].level );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
} else {
|
|
Packit |
89ede9 |
if ( xml_tag_has_attribute( node, c[i].in, c[i].a, c[i].aval ) ) {
|
|
Packit |
89ede9 |
*found = 1;
|
|
Packit |
89ede9 |
fstatus = fields_add( info, c[i].out, d, c[i].level );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <ArticleTitle>Mechanism and.....</ArticleTitle>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_articletitle( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int fstatus, status = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_has_value( node ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "TITLE", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <MedlineDate>2003 Jan-Feb</MedlineDate> */
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_medlinedate( fields *info, char *p, int level )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int fstatus;
|
|
Packit |
89ede9 |
str tmp;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_init( &tmp );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
p = str_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 );
|
|
Packit |
89ede9 |
if ( str_memerr( &tmp ) ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( str_has_value( &tmp ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "PARTDATE:YEAR", str_cstr( &tmp ), level );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
p = str_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 );
|
|
Packit |
89ede9 |
if ( str_memerr( &tmp ) ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( str_has_value( &tmp ) ) {
|
|
Packit |
89ede9 |
str_findreplace( &tmp, "-", "/" );
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "PARTDATE:MONTH", str_cstr( &tmp ), level );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
(void) str_cpytodelim( &tmp, skip_ws( p ), " \t\n\r", 0 );
|
|
Packit |
89ede9 |
if ( str_memerr( &tmp ) ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( str_has_value( &tmp ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "PARTDATE:DAY", str_cstr( &tmp ), level );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
str_free( &tmp );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <Langauge>eng</Language>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_language( xml *node, fields *info, int level )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
char *code, *language;
|
|
Packit |
89ede9 |
int fstatus;
|
|
Packit |
89ede9 |
code = xml_value_cstr( node );
|
|
Packit |
89ede9 |
if ( !code ) return BIBL_OK;
|
|
Packit |
89ede9 |
language = iso639_2_from_code( code );
|
|
Packit |
89ede9 |
if ( language )
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "LANGUAGE", language, level );
|
|
Packit |
89ede9 |
else
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "LANGUAGE", code, level );
|
|
Packit |
89ede9 |
if ( fstatus==FIELDS_OK ) return BIBL_OK;
|
|
Packit |
89ede9 |
else return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <Journal>
|
|
Packit |
89ede9 |
* <ISSN>0027-8424</ISSN>
|
|
Packit |
89ede9 |
* <JournalIssue PrintYN="Y">
|
|
Packit |
89ede9 |
* <Volume>100</Volume>
|
|
Packit |
89ede9 |
* <Issue>21</Issue>
|
|
Packit |
89ede9 |
* <PubDate>
|
|
Packit |
89ede9 |
* <Year>2003</Year>
|
|
Packit |
89ede9 |
* <Month>Oct</Month>
|
|
Packit |
89ede9 |
* <Day>14</Day>
|
|
Packit |
89ede9 |
* </PubDate>
|
|
Packit |
89ede9 |
* </Journal Issue>
|
|
Packit |
89ede9 |
* </Journal>
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* or....
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* <Journal>
|
|
Packit |
89ede9 |
* <ISSN IssnType="Print">0735-0414</ISSN>
|
|
Packit |
89ede9 |
* <JournalIssue CitedMedium="Print">
|
|
Packit |
89ede9 |
* <Volume>38</Volume>
|
|
Packit |
89ede9 |
* <Issue>1</Issue>
|
|
Packit |
89ede9 |
* <PubDate>
|
|
Packit |
89ede9 |
* <MedlineDate>2003 Jan-Feb</MedlineDate>
|
|
Packit |
89ede9 |
* </PubDate>
|
|
Packit |
89ede9 |
* </JournalIssue>
|
|
Packit |
89ede9 |
* <Title>Alcohol and alcoholism (Oxford, Oxfordshire) </Title>
|
|
Packit |
89ede9 |
* <ISOAbbreviation>Alcohol Alcohol.</ISOAbbreviation>
|
|
Packit |
89ede9 |
* </Journal>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_journal1( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
xml_convert c[] = {
|
|
Packit |
89ede9 |
{ "Title", NULL, NULL, "TITLE", 1 },
|
|
Packit |
89ede9 |
{ "ISOAbbreviation", NULL, NULL, "SHORTTITLE", 1 },
|
|
Packit |
89ede9 |
{ "ISSN", NULL, NULL, "ISSN", 1 },
|
|
Packit |
89ede9 |
{ "Volume", NULL, NULL, "VOLUME", 1 },
|
|
Packit |
89ede9 |
{ "Issue", NULL, NULL, "ISSUE", 1 },
|
|
Packit |
89ede9 |
{ "Year", NULL, NULL, "PARTDATE:YEAR", 1 },
|
|
Packit |
89ede9 |
{ "Month", NULL, NULL, "PARTDATE:MONTH", 1 },
|
|
Packit |
89ede9 |
{ "Day", NULL, NULL, "PARTDATE:DAY", 1 },
|
|
Packit |
89ede9 |
};
|
|
Packit |
89ede9 |
int nc = sizeof( c ) / sizeof( c[0] ), status, found;
|
|
Packit |
89ede9 |
if ( xml_has_value( node ) ) {
|
|
Packit |
89ede9 |
status = medin_doconvert( node, info, c, nc, &found );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
if ( !found ) {
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "MedlineDate" ) ) {
|
|
Packit |
89ede9 |
status = medin_medlinedate( info, xml_value_cstr( node ), 1 );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "Language" ) ) {
|
|
Packit |
89ede9 |
status = medin_language( node, info, 1 );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->down ) {
|
|
Packit |
89ede9 |
status = medin_journal1( node->down, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) {
|
|
Packit |
89ede9 |
status = medin_journal1( node->next, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <Pagination>
|
|
Packit |
89ede9 |
* <MedlinePgn>12111-6</MedlinePgn>
|
|
Packit |
89ede9 |
* </Pagination>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_pagination( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int i, fstatus, status;
|
|
Packit |
89ede9 |
str sp, ep;
|
|
Packit |
89ede9 |
char *p, *pp;
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "MedlinePgn" ) && node->value.len ) {
|
|
Packit |
89ede9 |
strs_init( &sp, &ep, NULL );
|
|
Packit |
89ede9 |
p = str_cpytodelim( &sp, xml_value_cstr( node ), "-", 1 );
|
|
Packit |
89ede9 |
if ( str_memerr( &sp ) ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
if ( str_has_value( &sp ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "PAGES:START", str_cstr( &sp ), 1 );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
(void) str_cpytodelim( &ep, p, "", 0 );
|
|
Packit |
89ede9 |
if ( str_memerr( &ep ) ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
if ( str_has_value( &ep ) ) {
|
|
Packit |
89ede9 |
if ( sp.len > ep.len ) {
|
|
Packit |
89ede9 |
for ( i=sp.len-ep.len; i
|
|
Packit |
89ede9 |
sp.data[i] = ep.data[i-sp.len+ep.len];
|
|
Packit |
89ede9 |
pp = sp.data;
|
|
Packit |
89ede9 |
} else pp = ep.data;
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "PAGES:STOP", pp, 1 );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
strs_free( &sp, &ep, NULL );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->down ) {
|
|
Packit |
89ede9 |
status = medin_pagination( node->down, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) {
|
|
Packit |
89ede9 |
status = medin_pagination( node->next, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <Abstract>
|
|
Packit |
89ede9 |
* <AbstractText>ljwejrelr</AbstractText>
|
|
Packit |
89ede9 |
* </Abstract>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_abstract( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int fstatus;
|
|
Packit |
89ede9 |
if ( xml_tag_matches_has_value( node, "AbstractText" ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "ABSTRACT", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
} else if ( node->next ) return medin_abstract( node->next, info );
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <AuthorList CompleteYN="Y">
|
|
Packit |
89ede9 |
* <Author>
|
|
Packit |
89ede9 |
* <LastName>Barondeau</LastName>
|
|
Packit |
89ede9 |
* <ForeName>David P</ForeName>
|
|
Packit |
89ede9 |
* ( or <FirstName>David P</FirstName> )
|
|
Packit |
89ede9 |
* <Initials>DP</Initials>
|
|
Packit |
89ede9 |
* </Author>
|
|
Packit |
89ede9 |
* <Author>
|
|
Packit |
89ede9 |
* <CollectiveName>Organization</CollectiveName>
|
|
Packit |
89ede9 |
* </Author>
|
|
Packit |
89ede9 |
* </AuthorList>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_author( xml *node, str *name )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
char *p;
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "LastName" ) ) {
|
|
Packit |
89ede9 |
if ( str_has_value( name ) ) {
|
|
Packit |
89ede9 |
str_prepend( name, "|" );
|
|
Packit |
89ede9 |
str_prepend( name, xml_value_cstr( node ) );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
else str_strcat( name, xml_value( node ) );
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "ForeName" ) ||
|
|
Packit |
89ede9 |
xml_tag_matches( node, "FirstName" ) ) {
|
|
Packit |
89ede9 |
p = xml_value_cstr( node );
|
|
Packit |
89ede9 |
while ( p && *p ) {
|
|
Packit |
89ede9 |
if ( str_has_value( name ) ) str_addchar( name, '|' );
|
|
Packit |
89ede9 |
while ( *p==' ' ) p++;
|
|
Packit |
89ede9 |
while ( *p && *p!=' ' ) str_addchar( name, *p++ );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "Initials" ) && !strchr( name->data, '|' )) {
|
|
Packit |
89ede9 |
p = xml_value_cstr( node );
|
|
Packit |
89ede9 |
while ( p && *p ) {
|
|
Packit |
89ede9 |
if ( str_has_value( name ) ) str_addchar( name, '|' );
|
|
Packit |
89ede9 |
if ( !is_ws(*p) ) str_addchar( name, *p++ );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) medin_author( node->next, name );
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_corpauthor( xml *node, str *name )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "CollectiveName" ) ) {
|
|
Packit |
89ede9 |
str_strcpy( name, xml_value( node ) );
|
|
Packit |
89ede9 |
} else if ( node->next ) medin_corpauthor( node->next, name );
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_authorlist( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int fstatus, status;
|
|
Packit |
89ede9 |
str name;
|
|
Packit |
89ede9 |
char *tag;
|
|
Packit |
89ede9 |
str_init( &name );
|
|
Packit |
89ede9 |
node = node->down;
|
|
Packit |
89ede9 |
while ( node ) {
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "Author" ) && node->down ) {
|
|
Packit |
89ede9 |
status = medin_author( node->down, &name );
|
|
Packit |
89ede9 |
tag = "AUTHOR";
|
|
Packit |
89ede9 |
if ( str_is_empty( &name ) ) {
|
|
Packit |
89ede9 |
status = medin_corpauthor( node->down, &name );
|
|
Packit |
89ede9 |
tag = "AUTHOR:CORP";
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( str_memerr( &name ) || status!=BIBL_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
if ( str_has_value( &name ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add(info,tag,name.data,0);
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
str_empty( &name );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
node = node->next;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
str_free( &name );
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <PublicationTypeList>
|
|
Packit |
89ede9 |
* <PublicationType>Journal Article</PublicationType>
|
|
Packit |
89ede9 |
* </PublicationTypeList>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <MedlineJournalInfo>
|
|
Packit |
89ede9 |
* <Country>United States</Country>
|
|
Packit |
89ede9 |
* <MedlineTA>Proc Natl Acad Sci U S A</MedlineTA>
|
|
Packit |
89ede9 |
* <NlmUniqueID>7507876</NlmUniqueID>
|
|
Packit |
89ede9 |
* </MedlineJournalInfo>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_journal2( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int fstatus, status = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_tag_matches_has_value( node, "MedlineTA" ) && fields_find( info, "TITLE", LEVEL_HOST )==FIELDS_NOTFOUND ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "TITLE", xml_value_cstr( node ), 1 );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->down ) {
|
|
Packit |
89ede9 |
status = medin_journal2( node->down, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) status = medin_journal2( node->next, info );
|
|
Packit |
89ede9 |
return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/*
|
|
Packit |
89ede9 |
<MeshHeadingList>
|
|
Packit |
89ede9 |
<MeshHeading>
|
|
Packit |
89ede9 |
<DescriptorName MajorTopicYN="N">Biophysics</DescriptorName>
|
|
Packit |
89ede9 |
</MeshHeading>
|
|
Packit |
89ede9 |
<MeshHeading>
|
|
Packit |
89ede9 |
<DescriptorName MajorTopicYN="N">Crystallography, X-Ray</DescriptorName>
|
|
Packit |
89ede9 |
</MeshHeading>
|
|
Packit |
89ede9 |
</MeshHeadingList>
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_meshheading( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int fstatus, status = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_tag_matches_has_value( node, "DescriptorName" ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "KEYWORD", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) status = medin_meshheading( node->next, info );
|
|
Packit |
89ede9 |
return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_meshheadinglist( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "MeshHeading" ) && node->down ) {
|
|
Packit |
89ede9 |
status = medin_meshheading( node->down, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) status = medin_meshheadinglist( node->next, info );
|
|
Packit |
89ede9 |
return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* <PubmedData>
|
|
Packit |
89ede9 |
* ....
|
|
Packit |
89ede9 |
* <ArticleIdList>
|
|
Packit |
89ede9 |
* <ArticleId IdType="pubmed">14523232</ArticleId>
|
|
Packit |
89ede9 |
* <ArticleId IdType="doi">10.1073/pnas.2133463100</ArticleId>
|
|
Packit |
89ede9 |
* <ArticleId IdType="pii">2133463100</ArticleId>
|
|
Packit |
89ede9 |
* <ArticleId IdType="pmc">PMC4833866</ArticleId>
|
|
Packit |
89ede9 |
* </ArticleIdList>
|
|
Packit |
89ede9 |
* </PubmedData>
|
|
Packit |
89ede9 |
*
|
|
Packit |
89ede9 |
* I think "pii" is "Publisher Item Identifier"
|
|
Packit |
89ede9 |
*/
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_pubmeddata( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
xml_convert c[] = {
|
|
Packit |
89ede9 |
{ "ArticleId", "IdType", "doi", "DOI", 0 },
|
|
Packit |
89ede9 |
{ "ArticleId", "IdType", "pubmed", "PMID", 0 },
|
|
Packit |
89ede9 |
{ "ArticleId", "IdType", "medline", "MEDLINE", 0 },
|
|
Packit |
89ede9 |
{ "ArticleId", "IdType", "pmc", "PMC", 0 },
|
|
Packit |
89ede9 |
{ "ArticleId", "IdType", "pii", "PII", 0 },
|
|
Packit |
89ede9 |
};
|
|
Packit |
89ede9 |
int nc = sizeof( c ) / sizeof( c[0] ), found, status;
|
|
Packit |
89ede9 |
status = medin_doconvert( node, info, c, nc, &found );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
if ( node->next ) {
|
|
Packit |
89ede9 |
status = medin_pubmeddata( node->next, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->down ) {
|
|
Packit |
89ede9 |
medin_pubmeddata( node->down, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
return BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_article( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int fstatus, status = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "Journal" ) )
|
|
Packit |
89ede9 |
status = medin_journal1( node, info );
|
|
Packit |
89ede9 |
else if ( xml_tag_matches( node, "ArticleTitle" ) )
|
|
Packit |
89ede9 |
status = medin_articletitle( node, info );
|
|
Packit |
89ede9 |
else if ( xml_tag_matches( node, "Pagination" ) && node->down )
|
|
Packit |
89ede9 |
status = medin_pagination( node->down, info );
|
|
Packit |
89ede9 |
else if ( xml_tag_matches( node, "Abstract" ) && node->down )
|
|
Packit |
89ede9 |
status = medin_abstract( node->down, info );
|
|
Packit |
89ede9 |
else if ( xml_tag_matches( node, "AuthorList" ) )
|
|
Packit |
89ede9 |
status = medin_authorlist( node, info );
|
|
Packit |
89ede9 |
else if ( xml_tag_matches( node, "Language" ) )
|
|
Packit |
89ede9 |
status = medin_language( node, info, 0 );
|
|
Packit |
89ede9 |
else if ( xml_tag_matches( node, "Affiliation" ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "ADDRESS", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
if ( node->next ) status = medin_article( node->next, info );
|
|
Packit |
89ede9 |
return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_medlinecitation( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int fstatus, status = BIBL_OK;
|
|
Packit |
89ede9 |
if ( xml_tag_matches_has_value( node, "PMID" ) ) {
|
|
Packit |
89ede9 |
fstatus = fields_add( info, "PMID", xml_value_cstr( node ), 0 );
|
|
Packit |
89ede9 |
if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->down ) {
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "Article" ) ) {
|
|
Packit |
89ede9 |
status = medin_article( node->down, info );
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "MedlineJournalInfo" ) ) {
|
|
Packit |
89ede9 |
status = medin_journal2( node->down, info );
|
|
Packit |
89ede9 |
} else if ( xml_tag_matches( node, "MeshHeadingList" ) )
|
|
Packit |
89ede9 |
status = medin_meshheadinglist( node->down, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) status = medin_medlinecitation( node->next, info );
|
|
Packit |
89ede9 |
return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_pubmedarticle( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status = BIBL_OK;
|
|
Packit |
89ede9 |
if ( node->down ) {
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "MedlineCitation" ) )
|
|
Packit |
89ede9 |
status = medin_medlinecitation( node->down, info );
|
|
Packit |
89ede9 |
else if ( xml_tag_matches( node, "PubmedData" ) )
|
|
Packit |
89ede9 |
status = medin_pubmeddata( node->down, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( node->next ) status = medin_pubmedarticle( node->next, info );
|
|
Packit |
89ede9 |
return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_assembleref( xml *node, fields *info )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status = BIBL_OK;
|
|
Packit |
89ede9 |
if ( node->down ) {
|
|
Packit |
89ede9 |
if ( xml_tag_matches( node, "PubmedArticle" ) )
|
|
Packit |
89ede9 |
status = medin_pubmedarticle( node->down, info );
|
|
Packit |
89ede9 |
else if ( xml_tag_matches( node, "MedlineCitation" ) )
|
|
Packit |
89ede9 |
status = medin_medlinecitation( node->down, info );
|
|
Packit |
89ede9 |
else
|
|
Packit |
89ede9 |
status = medin_assembleref( node->down, info );
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( node->next ) {
|
|
Packit |
89ede9 |
status = medin_assembleref( node->next, info );
|
|
Packit |
89ede9 |
if ( status!=BIBL_OK ) return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
/* assume everything is a journal article */
|
|
Packit |
89ede9 |
if ( fields_num( info ) ) {
|
|
Packit |
89ede9 |
status = fields_add( info, "RESOURCE", "text", 0 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
status = fields_add( info, "ISSUANCE", "continuing", 1 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
status = fields_add( info, "GENRE:MARC", "periodical", 1 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
status = fields_add( info, "GENRE:BIBUTILS", "academic journal", 1 );
|
|
Packit |
89ede9 |
if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
|
|
Packit |
89ede9 |
status = BIBL_OK;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
return status;
|
|
Packit |
89ede9 |
}
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
static int
|
|
Packit |
89ede9 |
medin_processf( fields *medin, char *data, char *filename, long nref, param *p )
|
|
Packit |
89ede9 |
{
|
|
Packit |
89ede9 |
int status;
|
|
Packit |
89ede9 |
xml top;
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
xml_init( &top );
|
|
Packit |
89ede9 |
xml_parse( data, &top );
|
|
Packit |
89ede9 |
status = medin_assembleref( &top, medin );
|
|
Packit |
89ede9 |
xml_free( &top );
|
|
Packit |
89ede9 |
|
|
Packit |
89ede9 |
if ( status==BIBL_OK ) return 1;
|
|
Packit |
89ede9 |
return 0;
|
|
Packit |
89ede9 |
}
|