Blame contrib/iptcutil/iptcutil.c

Packit 994f1a
/* $Id: iptcutil.c,v 1.4.2.2 2010-06-08 18:50:41 bfriesen Exp $ */
Packit 994f1a
Packit 994f1a
#include "tif_config.h"
Packit 994f1a
Packit 994f1a
#include <stdio.h>
Packit 994f1a
#include <stdlib.h>
Packit 994f1a
#include <string.h>
Packit 994f1a
#include <memory.h>
Packit 994f1a
#include <ctype.h>
Packit 994f1a
Packit 994f1a
#ifdef HAVE_STRINGS_H
Packit 994f1a
# include <strings.h>
Packit 994f1a
#endif
Packit 994f1a
Packit 994f1a
#ifdef HAVE_IO_H
Packit 994f1a
# include <io.h>
Packit 994f1a
#endif
Packit 994f1a
Packit 994f1a
#ifdef HAVE_FCNTL_H
Packit 994f1a
# include <fcntl.h>
Packit 994f1a
#endif
Packit 994f1a
Packit 994f1a
#ifdef WIN32
Packit 994f1a
#define STRNICMP strnicmp
Packit 994f1a
#else 
Packit 994f1a
#define STRNICMP strncasecmp
Packit 994f1a
#endif 
Packit 994f1a
Packit 994f1a
typedef struct _tag_spec
Packit 994f1a
{
Packit 994f1a
  short
Packit 994f1a
    id;
Packit 994f1a
Packit 994f1a
  char
Packit 994f1a
    *name;
Packit 994f1a
} tag_spec;
Packit 994f1a
Packit 994f1a
static tag_spec tags[] = {
Packit 994f1a
    { 5,"Image Name" },
Packit 994f1a
    { 7,"Edit Status" },
Packit 994f1a
    { 10,"Priority" },
Packit 994f1a
    { 15,"Category" },
Packit 994f1a
    { 20,"Supplemental Category" },
Packit 994f1a
    { 22,"Fixture Identifier" },
Packit 994f1a
    { 25,"Keyword" },
Packit 994f1a
    { 30,"Release Date" },
Packit 994f1a
    { 35,"Release Time" },
Packit 994f1a
    { 40,"Special Instructions" },
Packit 994f1a
    { 45,"Reference Service" },
Packit 994f1a
    { 47,"Reference Date" },
Packit 994f1a
    { 50,"Reference Number" },
Packit 994f1a
    { 55,"Created Date" },
Packit 994f1a
    { 60,"Created Time" },
Packit 994f1a
    { 65,"Originating Program" },
Packit 994f1a
    { 70,"Program Version" },
Packit 994f1a
    { 75,"Object Cycle" },
Packit 994f1a
    { 80,"Byline" },
Packit 994f1a
    { 85,"Byline Title" },
Packit 994f1a
    { 90,"City" },
Packit 994f1a
    { 95,"Province State" },
Packit 994f1a
    { 100,"Country Code" },
Packit 994f1a
    { 101,"Country" },
Packit 994f1a
    { 103,"Original Transmission Reference" },
Packit 994f1a
    { 105,"Headline" },
Packit 994f1a
    { 110,"Credit" },
Packit 994f1a
    { 115,"Source" },
Packit 994f1a
    { 116,"Copyright String" },
Packit 994f1a
    { 120,"Caption" },
Packit 994f1a
    { 121,"Local Caption" },
Packit 994f1a
    { 122,"Caption Writer" },
Packit 994f1a
    { 200,"Custom Field 1" },
Packit 994f1a
    { 201,"Custom Field 2" },
Packit 994f1a
    { 202,"Custom Field 3" },
Packit 994f1a
    { 203,"Custom Field 4" },
Packit 994f1a
    { 204,"Custom Field 5" },
Packit 994f1a
    { 205,"Custom Field 6" },
Packit 994f1a
    { 206,"Custom Field 7" },
Packit 994f1a
    { 207,"Custom Field 8" },
Packit 994f1a
    { 208,"Custom Field 9" },
Packit 994f1a
    { 209,"Custom Field 10" },
Packit 994f1a
    { 210,"Custom Field 11" },
Packit 994f1a
    { 211,"Custom Field 12" },
Packit 994f1a
    { 212,"Custom Field 13" },
Packit 994f1a
    { 213,"Custom Field 14" },
Packit 994f1a
    { 214,"Custom Field 15" },
Packit 994f1a
    { 215,"Custom Field 16" },
Packit 994f1a
    { 216,"Custom Field 17" },
Packit 994f1a
    { 217,"Custom Field 18" },
Packit 994f1a
    { 218,"Custom Field 19" },
Packit 994f1a
    { 219,"Custom Field 20" }
Packit 994f1a
};
Packit 994f1a
Packit 994f1a
/*
Packit 994f1a
 * We format the output using HTML conventions
Packit 994f1a
 * to preserve control characters and such.
Packit 994f1a
 */
Packit 994f1a
void formatString(FILE *ofile, const char *s, int len)
Packit 994f1a
{
Packit 994f1a
  putc('"', ofile);
Packit 994f1a
  for (; len > 0; --len, ++s) {
Packit 994f1a
    int c = *s;
Packit 994f1a
    switch (c) {
Packit 994f1a
    case '&':
Packit 994f1a
      fputs("&", ofile);
Packit 994f1a
      break;
Packit 994f1a
#ifdef HANDLE_GT_LT
Packit 994f1a
    case '<':
Packit 994f1a
      fputs("<", ofile);
Packit 994f1a
      break;
Packit 994f1a
    case '>':
Packit 994f1a
      fputs(">", ofile);
Packit 994f1a
      break;
Packit 994f1a
#endif
Packit 994f1a
    case '"':
Packit 994f1a
      fputs(""", ofile);
Packit 994f1a
      break;
Packit 994f1a
    default:
Packit 994f1a
      if (iscntrl(c))
Packit 994f1a
        fprintf(ofile, "&#%d;", c);
Packit 994f1a
      else
Packit 994f1a
        putc(*s, ofile);
Packit 994f1a
      break;
Packit 994f1a
    }
Packit 994f1a
  }
Packit 994f1a
  fputs("\"\n", ofile);
Packit 994f1a
}
Packit 994f1a
Packit 994f1a
typedef struct _html_code
Packit 994f1a
{
Packit 994f1a
  short
Packit 994f1a
    len;
Packit 994f1a
  const char
Packit 994f1a
    *code,
Packit 994f1a
    val;
Packit 994f1a
} html_code;
Packit 994f1a
Packit 994f1a
static html_code html_codes[] = {
Packit 994f1a
#ifdef HANDLE_GT_LT
Packit 994f1a
    { 4,"<",'<' },
Packit 994f1a
    { 4,">",'>' },
Packit 994f1a
#endif
Packit 994f1a
    { 5,"&",'&' },
Packit 994f1a
    { 6,""",'"' }
Packit 994f1a
};
Packit 994f1a
Packit 994f1a
/*
Packit 994f1a
 * This routine converts HTML escape sequence
Packit 994f1a
 * back to the original ASCII representation.
Packit 994f1a
 * - returns the number of characters dropped.
Packit 994f1a
 */
Packit 994f1a
int convertHTMLcodes(char *s, int len)
Packit 994f1a
{
Packit 994f1a
  if (len <=0 || s==(char*)NULL || *s=='\0')
Packit 994f1a
    return 0;
Packit 994f1a
Packit 994f1a
  if (s[1] == '#')
Packit 994f1a
    {
Packit 994f1a
      int val, o;
Packit 994f1a
Packit 994f1a
      if (sscanf(s,"&#%d;",&val) == 1)
Packit 994f1a
      {
Packit 994f1a
        o = 3;
Packit 994f1a
        while (s[o] != ';')
Packit 994f1a
        {
Packit 994f1a
          o++;
Packit 994f1a
          if (o > 5)
Packit 994f1a
            break;
Packit 994f1a
        }
Packit 994f1a
        if (o < 5)
Packit 994f1a
          strcpy(s+1, s+1+o);
Packit 994f1a
        *s = val;
Packit 994f1a
        return o;
Packit 994f1a
      }
Packit 994f1a
    }
Packit 994f1a
  else
Packit 994f1a
    {
Packit 994f1a
      int
Packit 994f1a
        i,
Packit 994f1a
        codes = sizeof(html_codes) / sizeof(html_code);
Packit 994f1a
Packit 994f1a
      for (i=0; i < codes; i++)
Packit 994f1a
      {
Packit 994f1a
        if (html_codes[i].len <= len)
Packit 994f1a
          if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
Packit 994f1a
            {
Packit 994f1a
              strcpy(s+1, s+html_codes[i].len);
Packit 994f1a
              *s = html_codes[i].val;
Packit 994f1a
              return html_codes[i].len-1;
Packit 994f1a
            }
Packit 994f1a
      }
Packit 994f1a
    }
Packit 994f1a
Packit 994f1a
  return 0;
Packit 994f1a
}
Packit 994f1a
Packit 994f1a
int formatIPTC(FILE *ifile, FILE *ofile)
Packit 994f1a
{
Packit 994f1a
  unsigned int
Packit 994f1a
    foundiptc,
Packit 994f1a
    tagsfound;
Packit 994f1a
Packit 994f1a
  unsigned char
Packit 994f1a
    recnum,
Packit 994f1a
    dataset;
Packit 994f1a
Packit 994f1a
  char
Packit 994f1a
    *readable,
Packit 994f1a
    *str;
Packit 994f1a
Packit 994f1a
  long
Packit 994f1a
    tagindx,
Packit 994f1a
    taglen;
Packit 994f1a
Packit 994f1a
  int
Packit 994f1a
    i,
Packit 994f1a
    tagcount = sizeof(tags) / sizeof(tag_spec);
Packit 994f1a
Packit 994f1a
  char
Packit 994f1a
    c;
Packit 994f1a
Packit 994f1a
  foundiptc = 0; /* found the IPTC-Header */
Packit 994f1a
  tagsfound = 0; /* number of tags found */
Packit 994f1a
Packit 994f1a
  c = getc(ifile);
Packit 994f1a
  while (c != EOF)
Packit 994f1a
  {
Packit 994f1a
	  if (c == 0x1c)
Packit 994f1a
	    foundiptc = 1;
Packit 994f1a
	  else
Packit 994f1a
      {
Packit 994f1a
        if (foundiptc)
Packit 994f1a
	        return -1;
Packit 994f1a
        else
Packit 994f1a
	        continue;
Packit 994f1a
	    }
Packit 994f1a
Packit 994f1a
    /* we found the 0x1c tag and now grab the dataset and record number tags */
Packit 994f1a
    dataset = getc(ifile);
Packit 994f1a
	  if ((char) dataset == EOF)
Packit 994f1a
	    return -1;
Packit 994f1a
    recnum = getc(ifile);
Packit 994f1a
	  if ((char) recnum == EOF)
Packit 994f1a
	    return -1;
Packit 994f1a
    /* try to match this record to one of the ones in our named table */
Packit 994f1a
    for (i=0; i< tagcount; i++)
Packit 994f1a
    {
Packit 994f1a
      if (tags[i].id == recnum)
Packit 994f1a
          break;
Packit 994f1a
    }
Packit 994f1a
    if (i < tagcount)
Packit 994f1a
      readable = tags[i].name;
Packit 994f1a
    else
Packit 994f1a
      readable = "";
Packit 994f1a
Packit 994f1a
    /* then we decode the length of the block that follows - long or short fmt */
Packit 994f1a
    c = getc(ifile);
Packit 994f1a
	  if (c == EOF)
Packit 994f1a
	    return 0;
Packit 994f1a
	  if (c & (unsigned char) 0x80)
Packit 994f1a
      {
Packit 994f1a
        unsigned char
Packit 994f1a
          buffer[4];
Packit 994f1a
Packit 994f1a
        for (i=0; i<4; i++)
Packit 994f1a
        {
Packit 994f1a
          c = buffer[i] = getc(ifile);
Packit 994f1a
          if (c == EOF)
Packit 994f1a
            return -1;
Packit 994f1a
        }
Packit 994f1a
        taglen = (((long) buffer[ 0 ]) << 24) |
Packit 994f1a
                 (((long) buffer[ 1 ]) << 16) | 
Packit 994f1a
	               (((long) buffer[ 2 ]) <<  8) |
Packit 994f1a
                 (((long) buffer[ 3 ]));
Packit 994f1a
	    }
Packit 994f1a
    else
Packit 994f1a
      {
Packit 994f1a
        unsigned char
Packit 994f1a
          x = c;
Packit 994f1a
Packit 994f1a
        taglen = ((long) x) << 8;
Packit 994f1a
        x = getc(ifile);
Packit 994f1a
        if ((char)x == EOF)
Packit 994f1a
          return -1;
Packit 994f1a
        taglen |= (long) x;
Packit 994f1a
	    }
Packit 994f1a
    /* make a buffer to hold the tag data and snag it from the input stream */
Packit 994f1a
    str = (char *) malloc((unsigned int) (taglen+1));
Packit 994f1a
    if (str == (char *) NULL)
Packit 994f1a
      {
Packit 994f1a
        printf("Memory allocation failed");
Packit 994f1a
        return 0;
Packit 994f1a
      }
Packit 994f1a
    for (tagindx=0; tagindx
Packit 994f1a
    {
Packit 994f1a
      c = str[tagindx] = getc(ifile);
Packit Service 9e085d
      if (c == EOF) {
Packit Service 9e085d
        free(str);
Packit 994f1a
        return -1;
Packit Service 9e085d
      }
Packit 994f1a
    }
Packit 994f1a
    str[ taglen ] = 0;
Packit 994f1a
Packit 994f1a
    /* now finish up by formatting this binary data into ASCII equivalent */
Packit 994f1a
    if (strlen(readable) > 0)
Packit 994f1a
	    fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
Packit 994f1a
    else
Packit 994f1a
	    fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
Packit 994f1a
    formatString( ofile, str, taglen );
Packit 994f1a
    free(str);
Packit 994f1a
Packit 994f1a
	  tagsfound++;
Packit 994f1a
Packit 994f1a
    c = getc(ifile);
Packit 994f1a
  }
Packit 994f1a
  return tagsfound;
Packit 994f1a
}
Packit 994f1a
Packit 994f1a
int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
Packit 994f1a
char *white,char *brkchar,char *quote,char eschar,char *brkused,
Packit 994f1a
int *next,char *quoted);
Packit 994f1a
Packit 994f1a
char *super_fgets(char *b, int *blen, FILE *file)
Packit 994f1a
{
Packit 994f1a
  int
Packit 994f1a
    c,
Packit 994f1a
    len;
Packit 994f1a
Packit 994f1a
  char
Packit 994f1a
    *q;
Packit 994f1a
Packit 994f1a
  len=*blen;
Packit 994f1a
  for (q=b; ; q++)
Packit 994f1a
  {
Packit 994f1a
    c=fgetc(file);
Packit 994f1a
    if (c == EOF || c == '\n')
Packit 994f1a
      break;
Packit 994f1a
    if (((int)q - (int)b + 1 ) >= (int) len)
Packit 994f1a
      {
Packit 994f1a
        int
Packit 994f1a
          tlen;
Packit 994f1a
Packit 994f1a
        tlen=(int)q-(int)b;
Packit 994f1a
        len<<=1;
Packit 994f1a
        b=(char *) realloc((char *) b,(len+2));
Packit 994f1a
        if ((char *) b == (char *) NULL)
Packit 994f1a
          break;
Packit 994f1a
        q=b+tlen;
Packit 994f1a
      }
Packit 994f1a
    *q=(unsigned char) c;
Packit 994f1a
  }
Packit 994f1a
  *blen=0;
Packit 994f1a
  if ((unsigned char *)b != (unsigned char *) NULL)
Packit 994f1a
    {
Packit 994f1a
      int
Packit 994f1a
        tlen;
Packit 994f1a
Packit 994f1a
      tlen=(int)q - (int)b;
Packit 994f1a
      if (tlen == 0)
Packit 994f1a
        return (char *) NULL;
Packit 994f1a
      b[tlen] = '\0';
Packit 994f1a
      *blen=++tlen;
Packit 994f1a
    }
Packit 994f1a
  return b;
Packit 994f1a
}
Packit 994f1a
Packit 994f1a
#define BUFFER_SZ 4096
Packit 994f1a
Packit 994f1a
int main(int argc, char *argv[])
Packit 994f1a
{            
Packit 994f1a
  unsigned int
Packit 994f1a
    length;
Packit 994f1a
Packit 994f1a
  unsigned char
Packit 994f1a
    *buffer;
Packit 994f1a
Packit 994f1a
  int
Packit 994f1a
    i,
Packit 994f1a
    mode; /* iptc binary, or iptc text */
Packit 994f1a
Packit 994f1a
  FILE
Packit 994f1a
    *ifile = stdin,
Packit 994f1a
    *ofile = stdout;
Packit 994f1a
Packit 994f1a
  char
Packit 994f1a
    c,
Packit 994f1a
    *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output";
Packit 994f1a
Packit 994f1a
  if( argc < 2 )
Packit 994f1a
    {
Packit 994f1a
      printf("%s\n", usage);
Packit 994f1a
	    return 1;
Packit 994f1a
    }
Packit 994f1a
Packit 994f1a
  mode = 0;
Packit 994f1a
  length = -1;
Packit 994f1a
  buffer = (unsigned char *)NULL;
Packit 994f1a
Packit 994f1a
  for (i=1; i
Packit 994f1a
  {
Packit 994f1a
    c = argv[i][0];
Packit 994f1a
    if (c == '-' || c == '/')
Packit 994f1a
      {
Packit 994f1a
        c = argv[i][1];
Packit 994f1a
        switch( c )
Packit 994f1a
        {
Packit 994f1a
        case 't':
Packit 994f1a
	        mode = 1;
Packit 994f1a
#ifdef WIN32
Packit 994f1a
          /* Set "stdout" to binary mode: */
Packit 994f1a
          _setmode( _fileno( ofile ), _O_BINARY );
Packit 994f1a
#endif
Packit 994f1a
	        break;
Packit 994f1a
        case 'b':
Packit 994f1a
	        mode = 0;
Packit 994f1a
#ifdef WIN32
Packit 994f1a
          /* Set "stdin" to binary mode: */
Packit 994f1a
          _setmode( _fileno( ifile ), _O_BINARY );
Packit 994f1a
#endif
Packit 994f1a
	        break;
Packit 994f1a
        case 'i':
Packit 994f1a
          if (mode == 0)
Packit 994f1a
            ifile = fopen(argv[++i], "rb");
Packit 994f1a
          else
Packit 994f1a
            ifile = fopen(argv[++i], "rt");
Packit 994f1a
          if (ifile == (FILE *)NULL)
Packit 994f1a
            {
Packit 994f1a
	            printf("Unable to open: %s\n", argv[i]);
Packit 994f1a
              return 1;
Packit 994f1a
            }
Packit 994f1a
	        break;
Packit 994f1a
        case 'o':
Packit 994f1a
          if (mode == 0)
Packit 994f1a
            ofile = fopen(argv[++i], "wt");
Packit 994f1a
          else
Packit 994f1a
            ofile = fopen(argv[++i], "wb");
Packit 994f1a
          if (ofile == (FILE *)NULL)
Packit 994f1a
            {
Packit 994f1a
	            printf("Unable to open: %s\n", argv[i]);
Packit 994f1a
              return 1;
Packit 994f1a
            }
Packit 994f1a
	        break;
Packit 994f1a
        default:
Packit 994f1a
	        printf("Unknown option: %s\n", argv[i]);
Packit 994f1a
	        return 1;
Packit 994f1a
        }
Packit 994f1a
      }
Packit 994f1a
    else
Packit 994f1a
      {
Packit 994f1a
        printf("%s\n", usage);
Packit 994f1a
	      return 1;
Packit 994f1a
      }
Packit 994f1a
  }
Packit 994f1a
Packit 994f1a
  if (mode == 0) /* handle binary iptc info */
Packit 994f1a
    formatIPTC(ifile, ofile);
Packit 994f1a
Packit 994f1a
  if (mode == 1) /* handle text form of iptc info */
Packit 994f1a
    {
Packit 994f1a
      char
Packit 994f1a
        brkused,
Packit 994f1a
        quoted,
Packit 994f1a
        *line,
Packit 994f1a
        *token,
Packit 994f1a
        *newstr;
Packit 994f1a
Packit 994f1a
      int
Packit 994f1a
        state,
Packit 994f1a
        next;
Packit 994f1a
Packit 994f1a
      unsigned char
Packit 994f1a
        recnum = 0,
Packit 994f1a
        dataset = 0;
Packit 994f1a
Packit 994f1a
      int
Packit 994f1a
        inputlen = BUFFER_SZ;
Packit 994f1a
Packit 994f1a
      line = (char *) malloc(inputlen);     
Packit 994f1a
      token = (char *)NULL;
Packit 994f1a
      while((line = super_fgets(line,&inputlen,ifile))!=NULL)
Packit 994f1a
      {
Packit 994f1a
        state=0;
Packit 994f1a
        next=0;
Packit 994f1a
Packit 994f1a
        token = (char *) malloc(inputlen);     
Packit 994f1a
        newstr = (char *) malloc(inputlen);     
Packit 994f1a
        while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
Packit 994f1a
          &brkused,&next,&quoted)==0)
Packit 994f1a
        {
Packit 994f1a
          if (state == 0)
Packit 994f1a
            {                  
Packit 994f1a
              int
Packit 994f1a
                state,
Packit 994f1a
                next;
Packit 994f1a
Packit 994f1a
              char
Packit 994f1a
                brkused,
Packit 994f1a
                quoted;
Packit 994f1a
Packit 994f1a
              state=0;
Packit 994f1a
              next=0;
Packit 994f1a
              while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
Packit 994f1a
                &brkused, &next, &quoted)==0)
Packit 994f1a
              {
Packit 994f1a
                if (state == 0)
Packit 994f1a
                  dataset = (unsigned char) atoi(newstr);
Packit 994f1a
                else
Packit 994f1a
                   if (state == 1)
Packit 994f1a
                     recnum = (unsigned char) atoi(newstr);
Packit 994f1a
                state++;
Packit 994f1a
              }
Packit 994f1a
            }
Packit 994f1a
          else
Packit 994f1a
            if (state == 1)
Packit 994f1a
              {
Packit 994f1a
                int
Packit 994f1a
                  next;
Packit 994f1a
Packit 994f1a
                unsigned long
Packit 994f1a
                  len;
Packit 994f1a
Packit 994f1a
                char
Packit 994f1a
                  brkused,
Packit 994f1a
                  quoted;
Packit 994f1a
Packit 994f1a
                next=0;
Packit 994f1a
                len = strlen(token);
Packit 994f1a
                while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
Packit 994f1a
                  &brkused, &next, &quoted)==0)
Packit 994f1a
                {
Packit 994f1a
                  if (brkused && next > 0)
Packit 994f1a
                    {
Packit 994f1a
                      char
Packit 994f1a
                        *s = &token[next-1];
Packit 994f1a
Packit 994f1a
                      len -= convertHTMLcodes(s, strlen(s));
Packit 994f1a
                    }
Packit 994f1a
                }
Packit 994f1a
Packit 994f1a
                fputc(0x1c, ofile);
Packit 994f1a
                fputc(dataset, ofile);
Packit 994f1a
                fputc(recnum, ofile);
Packit 994f1a
                if (len < 0x10000)
Packit 994f1a
                  {
Packit 994f1a
                    fputc((len >> 8) & 255, ofile);
Packit 994f1a
                    fputc(len & 255, ofile);
Packit 994f1a
                  }
Packit 994f1a
                else
Packit 994f1a
                  {
Packit 994f1a
                    fputc(((len >> 24) & 255) | 0x80, ofile);
Packit 994f1a
                    fputc((len >> 16) & 255, ofile);
Packit 994f1a
                    fputc((len >> 8) & 255, ofile);
Packit 994f1a
                    fputc(len & 255, ofile);
Packit 994f1a
                  }
Packit 994f1a
                next=0;
Packit 994f1a
                while (len--)
Packit 994f1a
                  fputc(token[next++], ofile);
Packit 994f1a
              }
Packit 994f1a
          state++;
Packit 994f1a
        }
Packit 994f1a
        free(token);
Packit 994f1a
        token = (char *)NULL;
Packit 994f1a
        free(newstr);
Packit 994f1a
        newstr = (char *)NULL;
Packit 994f1a
      }
Packit 994f1a
      free(line);
Packit 994f1a
Packit 994f1a
      fclose( ifile );
Packit 994f1a
      fclose( ofile );
Packit 994f1a
    }
Packit 994f1a
Packit 994f1a
  return 0;
Packit 994f1a
}
Packit 994f1a
Packit 994f1a
/*
Packit 994f1a
	This routine is a generalized, finite state token parser. It allows
Packit 994f1a
    you extract tokens one at a time from a string of characters.  The
Packit 994f1a
    characters used for white space, for break characters, and for quotes
Packit 994f1a
    can be specified. Also, characters in the string can be preceded by
Packit 994f1a
    a specifiable escape character which removes any special meaning the
Packit 994f1a
    character may have.
Packit 994f1a
Packit 994f1a
	There are a lot of formal parameters in this subroutine call, but
Packit 994f1a
	once you get familiar with them, this routine is fairly easy to use.
Packit 994f1a
	"#define" macros can be used to generate simpler looking calls for
Packit 994f1a
	commonly used applications of this routine.
Packit 994f1a
Packit 994f1a
	First, some terminology:
Packit 994f1a
Packit 994f1a
	token:		used here, a single unit of information in
Packit 994f1a
				the form of a group of characters.
Packit 994f1a
Packit 994f1a
	white space:	space that gets ignored (except within quotes
Packit 994f1a
				or when escaped), like blanks and tabs.  in
Packit 994f1a
				addition, white space terminates a non-quoted
Packit 994f1a
				token.
Packit 994f1a
Packit 994f1a
	break character: a character that separates non-quoted tokens.
Packit 994f1a
				commas are a common break character.  the
Packit 994f1a
				usage of break characters to signal the end
Packit 994f1a
				of a token is the same as that of white space,
Packit 994f1a
				except multiple break characters with nothing
Packit 994f1a
				or only white space between generate a null
Packit 994f1a
				token for each two break characters together.
Packit 994f1a
Packit 994f1a
				for example, if blank is set to be the white
Packit 994f1a
				space and comma is set to be the break
Packit 994f1a
				character, the line ...
Packit 994f1a
Packit 994f1a
				A, B, C ,  , DEF
Packit 994f1a
Packit 994f1a
				... consists of 5 tokens:
Packit 994f1a
Packit 994f1a
				1)	"A"
Packit 994f1a
				2)	"B"
Packit 994f1a
				3)	"C"
Packit 994f1a
				4)	""      (the null string)
Packit 994f1a
				5)	"DEF"
Packit 994f1a
Packit 994f1a
	quote character: 	a character that, when surrounding a group
Packit 994f1a
				of other characters, causes the group of
Packit 994f1a
				characters to be treated as a single token,
Packit 994f1a
				no matter how many white spaces or break
Packit 994f1a
				characters exist in the group.	also, a
Packit 994f1a
				token always terminates after the closing
Packit 994f1a
				quote.	for example, if ' is the quote
Packit 994f1a
				character, blank is white space, and comma
Packit 994f1a
				is the break character, the following
Packit 994f1a
				string ...
Packit 994f1a
Packit 994f1a
				A, ' B, CD'EF GHI
Packit 994f1a
Packit 994f1a
				... consists of 4 tokens:
Packit 994f1a
Packit 994f1a
				1)	"A"
Packit 994f1a
				2)	" B, CD" (note the blanks & comma)
Packit 994f1a
				3)	"EF"
Packit 994f1a
				4)	"GHI"
Packit 994f1a
Packit 994f1a
				the quote characters themselves do
Packit 994f1a
				not appear in the resultant tokens.  the
Packit 994f1a
				double quotes are delimiters i use here for
Packit 994f1a
				documentation purposes only.
Packit 994f1a
Packit 994f1a
	escape character:	a character which itself is ignored but
Packit 994f1a
				which causes the next character to be
Packit 994f1a
				used as is.  ^ and \ are often used as
Packit 994f1a
				escape characters.  an escape in the last
Packit 994f1a
				position of the string gets treated as a
Packit 994f1a
				"normal" (i.e., non-quote, non-white,
Packit 994f1a
				non-break, and non-escape) character.
Packit 994f1a
				for example, assume white space, break
Packit 994f1a
				character, and quote are the same as in the
Packit 994f1a
				above examples, and further, assume that
Packit 994f1a
				^ is the escape character.  then, in the
Packit 994f1a
				string ...
Packit 994f1a
Packit 994f1a
				ABC, ' DEF ^' GH' I ^ J K^ L ^
Packit 994f1a
Packit 994f1a
				... there are 7 tokens:
Packit 994f1a
Packit 994f1a
				1)	"ABC"
Packit 994f1a
				2)	" DEF ' GH"
Packit 994f1a
				3)	"I"
Packit 994f1a
				4)	" "     (a lone blank)
Packit 994f1a
				5)	"J"
Packit 994f1a
				6)	"K L"
Packit 994f1a
				7)	"^"     (passed as is at end of line)
Packit 994f1a
Packit 994f1a
Packit 994f1a
	OK, now that you have this background, here's how to call "tokenizer":
Packit 994f1a
Packit 994f1a
	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
Packit 994f1a
		      brkused,next,quoted)
Packit 994f1a
Packit 994f1a
	result: 	0 if we haven't reached EOS (end of string), and
Packit 994f1a
			1 if we have (this is an "int").
Packit 994f1a
Packit 994f1a
	flag:		right now, only the low order 3 bits are used.
Packit 994f1a
			1 => convert non-quoted tokens to upper case
Packit 994f1a
			2 => convert non-quoted tokens to lower case
Packit 994f1a
			0 => do not convert non-quoted tokens
Packit 994f1a
			(this is a "char").
Packit 994f1a
Packit 994f1a
	token:		a character string containing the returned next token
Packit 994f1a
			(this is a "char[]").
Packit 994f1a
Packit 994f1a
	maxtok: 	the maximum size of "token".  characters beyond
Packit 994f1a
			"maxtok" are truncated (this is an "int").
Packit 994f1a
Packit 994f1a
	string: 	the string to be parsed (this is a "char[]").
Packit 994f1a
Packit 994f1a
	white:		a string of the valid white spaces.  example:
Packit 994f1a
Packit 994f1a
			char whitesp[]={" \t"};
Packit 994f1a
Packit 994f1a
			blank and tab will be valid white space (this is
Packit 994f1a
			a "char[]").
Packit 994f1a
Packit 994f1a
	break:		a string of the valid break characters.  example:
Packit 994f1a
Packit 994f1a
			char breakch[]={";,"};
Packit 994f1a
Packit 994f1a
			semicolon and comma will be valid break characters
Packit 994f1a
			(this is a "char[]").
Packit 994f1a
Packit 994f1a
			IMPORTANT:  do not use the name "break" as a C
Packit 994f1a
			variable, as this is a reserved word in C.
Packit 994f1a
Packit 994f1a
	quote:		a string of the valid quote characters.  an example
Packit 994f1a
			would be
Packit 994f1a
Packit 994f1a
			char whitesp[]={"'\"");
Packit 994f1a
Packit 994f1a
			(this causes single and double quotes to be valid)
Packit 994f1a
			note that a token starting with one of these characters
Packit 994f1a
			needs the same quote character to terminate it.
Packit 994f1a
Packit 994f1a
			for example,
Packit 994f1a
Packit 994f1a
			"ABC '
Packit 994f1a
Packit 994f1a
			is unterminated, but
Packit 994f1a
Packit 994f1a
			"DEF" and 'GHI'
Packit 994f1a
Packit 994f1a
			are properly terminated.  note that different quote
Packit 994f1a
			characters can appear on the same line; only for
Packit 994f1a
			a given token do the quote characters have to be
Packit 994f1a
			the same (this is a "char[]").
Packit 994f1a
Packit 994f1a
	escape: 	the escape character (NOT a string ... only one
Packit 994f1a
			allowed).  use zero if none is desired (this is
Packit 994f1a
			a "char").
Packit 994f1a
Packit 994f1a
	brkused:	the break character used to terminate the current
Packit 994f1a
			token.	if the token was quoted, this will be the
Packit 994f1a
			quote used.  if the token is the last one on the
Packit 994f1a
			line, this will be zero (this is a pointer to a
Packit 994f1a
			"char").
Packit 994f1a
Packit 994f1a
	next:		this variable points to the first character of the
Packit 994f1a
			next token.  it gets reset by "tokenizer" as it steps
Packit 994f1a
			through the string.  set it to 0 upon initialization,
Packit 994f1a
			and leave it alone after that.	you can change it
Packit 994f1a
			if you want to jump around in the string or re-parse
Packit 994f1a
			from the beginning, but be careful (this is a
Packit 994f1a
			pointer to an "int").
Packit 994f1a
Packit 994f1a
	quoted: 	set to 1 (true) if the token was quoted and 0 (false)
Packit 994f1a
			if not.  you may need this information (for example:
Packit 994f1a
			in C, a string with quotes around it is a character
Packit 994f1a
			string, while one without is an identifier).
Packit 994f1a
Packit 994f1a
			(this is a pointer to a "char").
Packit 994f1a
*/
Packit 994f1a
Packit 994f1a
/* states */
Packit 994f1a
Packit 994f1a
#define IN_WHITE 0
Packit 994f1a
#define IN_TOKEN 1
Packit 994f1a
#define IN_QUOTE 2
Packit 994f1a
#define IN_OZONE 3
Packit 994f1a
Packit 994f1a
int _p_state;	   /* current state	 */
Packit 994f1a
unsigned _p_flag;  /* option flag	 */
Packit 994f1a
char _p_curquote;  /* current quote char */
Packit 994f1a
int _p_tokpos;	   /* current token pos  */
Packit 994f1a
Packit 994f1a
/* routine to find character in string ... used only by "tokenizer" */
Packit 994f1a
Packit 994f1a
int sindex(char ch,char *string)
Packit 994f1a
{
Packit 994f1a
  char *cp;
Packit 994f1a
  for(cp=string;*cp;++cp)
Packit 994f1a
    if(ch==*cp)
Packit 994f1a
      return (int)(cp-string);	/* return postion of character */
Packit 994f1a
  return -1;			/* eol ... no match found */
Packit 994f1a
}
Packit 994f1a
Packit 994f1a
/* routine to store a character in a string ... used only by "tokenizer" */
Packit 994f1a
Packit 994f1a
void chstore(char *string,int max,char ch)
Packit 994f1a
{
Packit 994f1a
  char c;
Packit 994f1a
  if(_p_tokpos>=0&&_p_tokpos
Packit 994f1a
  {
Packit 994f1a
    if(_p_state==IN_QUOTE)
Packit 994f1a
      c=ch;
Packit 994f1a
    else
Packit 994f1a
      switch(_p_flag&3)
Packit 994f1a
      {
Packit 994f1a
	    case 1: 	    /* convert to upper */
Packit 994f1a
	      c=toupper(ch);
Packit 994f1a
	      break;
Packit 994f1a
Packit 994f1a
	    case 2: 	    /* convert to lower */
Packit 994f1a
	      c=tolower(ch);
Packit 994f1a
	      break;
Packit 994f1a
Packit 994f1a
	    default:	    /* use as is */
Packit 994f1a
	      c=ch;
Packit 994f1a
	      break;
Packit 994f1a
      }
Packit 994f1a
    string[_p_tokpos++]=c;
Packit 994f1a
  }
Packit 994f1a
  return;
Packit 994f1a
}
Packit 994f1a
Packit 994f1a
int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
Packit 994f1a
  char *white,char *brkchar,char *quote,char eschar,char *brkused,
Packit 994f1a
    int *next,char *quoted)
Packit 994f1a
{
Packit 994f1a
  int qp;
Packit 994f1a
  char c,nc;
Packit 994f1a
Packit 994f1a
  *brkused=0;		/* initialize to null */
Packit 994f1a
  *quoted=0;		/* assume not quoted  */
Packit 994f1a
Packit 994f1a
  if(!line[*next])	/* if we're at end of line, indicate such */
Packit 994f1a
    return 1;
Packit 994f1a
Packit 994f1a
  _p_state=IN_WHITE;   /* initialize state */
Packit 994f1a
  _p_curquote=0;	   /* initialize previous quote char */
Packit 994f1a
  _p_flag=inflag;	   /* set option flag */
Packit 994f1a
Packit 994f1a
  for(_p_tokpos=0;(c=line[*next]);++(*next))	/* main loop */
Packit 994f1a
  {
Packit 994f1a
    if((qp=sindex(c,brkchar))>=0)  /* break */
Packit 994f1a
    {
Packit 994f1a
      switch(_p_state)
Packit 994f1a
      {
Packit 994f1a
	    case IN_WHITE:		/* these are the same here ...	*/
Packit 994f1a
	    case IN_TOKEN:		/* ... just get out		*/
Packit 994f1a
	    case IN_OZONE:		/* ditto			*/
Packit 994f1a
	      ++(*next);
Packit 994f1a
	      *brkused=brkchar[qp];
Packit 994f1a
	      goto byebye;
Packit 994f1a
Packit 994f1a
	    case IN_QUOTE:		 /* just keep going */
Packit 994f1a
	      chstore(token,tokmax,c);
Packit 994f1a
	      break;
Packit 994f1a
      }
Packit 994f1a
    }
Packit 994f1a
    else if((qp=sindex(c,quote))>=0)  /* quote */
Packit 994f1a
    {
Packit 994f1a
      switch(_p_state)
Packit 994f1a
      {
Packit 994f1a
	    case IN_WHITE:	 /* these are identical, */
Packit 994f1a
	      _p_state=IN_QUOTE; /* change states   */
Packit 994f1a
	      _p_curquote=quote[qp]; /* save quote char */
Packit 994f1a
	      *quoted=1;	/* set to true as long as something is in quotes */
Packit 994f1a
	      break;
Packit 994f1a
Packit 994f1a
	    case IN_QUOTE:
Packit 994f1a
	      if(quote[qp]==_p_curquote) /* same as the beginning quote? */
Packit 994f1a
	      {
Packit 994f1a
	        _p_state=IN_OZONE;
Packit 994f1a
	        _p_curquote=0;
Packit 994f1a
	      }
Packit 994f1a
	      else
Packit 994f1a
	        chstore(token,tokmax,c); /* treat as regular char */
Packit 994f1a
	      break;
Packit 994f1a
Packit 994f1a
	    case IN_TOKEN:
Packit 994f1a
	    case IN_OZONE:
Packit 994f1a
	      *brkused=c; /* uses quote as break char */
Packit 994f1a
	      goto byebye;
Packit 994f1a
      }
Packit 994f1a
    }
Packit 994f1a
    else if((qp=sindex(c,white))>=0) /* white */
Packit 994f1a
    {
Packit 994f1a
      switch(_p_state)
Packit 994f1a
      {
Packit 994f1a
	    case IN_WHITE:
Packit 994f1a
	    case IN_OZONE:
Packit 994f1a
	      break;		/* keep going */
Packit 994f1a
Packit 994f1a
	    case IN_TOKEN:
Packit 994f1a
	      _p_state=IN_OZONE;
Packit 994f1a
	      break;
Packit 994f1a
Packit 994f1a
	    case IN_QUOTE:
Packit 994f1a
	      chstore(token,tokmax,c); /* it's valid here */
Packit 994f1a
	      break;
Packit 994f1a
      }
Packit 994f1a
    }
Packit 994f1a
    else if(c==eschar)  /* escape */
Packit 994f1a
    {
Packit 994f1a
      nc=line[(*next)+1];
Packit 994f1a
      if(nc==0) 		/* end of line */
Packit 994f1a
      {
Packit 994f1a
	    *brkused=0;
Packit 994f1a
	    chstore(token,tokmax,c);
Packit 994f1a
	    ++(*next);
Packit 994f1a
	    goto byebye;
Packit 994f1a
      }
Packit 994f1a
      switch(_p_state)
Packit 994f1a
      {
Packit 994f1a
	    case IN_WHITE:
Packit 994f1a
	      --(*next);
Packit 994f1a
	      _p_state=IN_TOKEN;
Packit 994f1a
	      break;
Packit 994f1a
Packit 994f1a
	    case IN_TOKEN:
Packit 994f1a
	    case IN_QUOTE:
Packit 994f1a
	      ++(*next);
Packit 994f1a
	      chstore(token,tokmax,nc);
Packit 994f1a
	      break;
Packit 994f1a
Packit 994f1a
	    case IN_OZONE:
Packit 994f1a
	      goto byebye;
Packit 994f1a
      }
Packit 994f1a
    }
Packit 994f1a
    else	/* anything else is just a real character */
Packit 994f1a
    {
Packit 994f1a
      switch(_p_state)
Packit 994f1a
      {
Packit 994f1a
	    case IN_WHITE:
Packit 994f1a
	      _p_state=IN_TOKEN; /* switch states */
Packit 994f1a
Packit 994f1a
	    case IN_TOKEN:		 /* these 2 are     */
Packit 994f1a
	    case IN_QUOTE:		 /*  identical here */
Packit 994f1a
	      chstore(token,tokmax,c);
Packit 994f1a
	      break;
Packit 994f1a
Packit 994f1a
	    case IN_OZONE:
Packit 994f1a
	      goto byebye;
Packit 994f1a
      }
Packit 994f1a
    }
Packit 994f1a
  }		/* end of main loop */
Packit 994f1a
Packit 994f1a
byebye:
Packit 994f1a
  token[_p_tokpos]=0;	/* make sure token ends with EOS */
Packit 994f1a
Packit 994f1a
  return 0;
Packit 994f1a
}
Packit 994f1a
/*
Packit 994f1a
 * Local Variables:
Packit 994f1a
 * mode: c
Packit 994f1a
 * c-basic-offset: 8
Packit 994f1a
 * fill-column: 78
Packit 994f1a
 * End:
Packit 994f1a
 */