Blame contrib/iptcutil/iptcutil.c

Packit 7838c8
/* $Id: iptcutil.c,v 1.11 2015-06-21 01:09:09 bfriesen Exp $ */
Packit 7838c8
Packit 7838c8
#include "tif_config.h"
Packit 7838c8
Packit 7838c8
#include <stdio.h>
Packit 7838c8
#include <stdlib.h>
Packit 7838c8
#include <string.h>
Packit 7838c8
#include <ctype.h>
Packit 7838c8
Packit 7838c8
#ifdef HAVE_STRINGS_H
Packit 7838c8
# include <strings.h>
Packit 7838c8
#endif
Packit 7838c8
Packit 7838c8
#ifdef HAVE_IO_H
Packit 7838c8
# include <io.h>
Packit 7838c8
#endif
Packit 7838c8
Packit 7838c8
#ifdef HAVE_FCNTL_H
Packit 7838c8
# include <fcntl.h>
Packit 7838c8
#endif
Packit 7838c8
Packit 7838c8
#ifdef WIN32
Packit 7838c8
#define STRNICMP strnicmp
Packit 7838c8
#else 
Packit 7838c8
#define STRNICMP strncasecmp
Packit 7838c8
#endif 
Packit 7838c8
Packit 7838c8
typedef struct _tag_spec
Packit 7838c8
{
Packit 7838c8
  short
Packit 7838c8
    id;
Packit 7838c8
Packit 7838c8
  char
Packit 7838c8
    *name;
Packit 7838c8
} tag_spec;
Packit 7838c8
Packit 7838c8
static tag_spec tags[] = {
Packit 7838c8
  { 5,"Image Name" },
Packit 7838c8
  { 7,"Edit Status" },
Packit 7838c8
  { 10,"Priority" },
Packit 7838c8
  { 15,"Category" },
Packit 7838c8
  { 20,"Supplemental Category" },
Packit 7838c8
  { 22,"Fixture Identifier" },
Packit 7838c8
  { 25,"Keyword" },
Packit 7838c8
  { 30,"Release Date" },
Packit 7838c8
  { 35,"Release Time" },
Packit 7838c8
  { 40,"Special Instructions" },
Packit 7838c8
  { 45,"Reference Service" },
Packit 7838c8
  { 47,"Reference Date" },
Packit 7838c8
  { 50,"Reference Number" },
Packit 7838c8
  { 55,"Created Date" },
Packit 7838c8
  { 60,"Created Time" },
Packit 7838c8
  { 65,"Originating Program" },
Packit 7838c8
  { 70,"Program Version" },
Packit 7838c8
  { 75,"Object Cycle" },
Packit 7838c8
  { 80,"Byline" },
Packit 7838c8
  { 85,"Byline Title" },
Packit 7838c8
  { 90,"City" },
Packit 7838c8
  { 95,"Province State" },
Packit 7838c8
  { 100,"Country Code" },
Packit 7838c8
  { 101,"Country" },
Packit 7838c8
  { 103,"Original Transmission Reference" },
Packit 7838c8
  { 105,"Headline" },
Packit 7838c8
  { 110,"Credit" },
Packit 7838c8
  { 115,"Source" },
Packit 7838c8
  { 116,"Copyright String" },
Packit 7838c8
  { 120,"Caption" },
Packit 7838c8
  { 121,"Local Caption" },
Packit 7838c8
  { 122,"Caption Writer" },
Packit 7838c8
  { 200,"Custom Field 1" },
Packit 7838c8
  { 201,"Custom Field 2" },
Packit 7838c8
  { 202,"Custom Field 3" },
Packit 7838c8
  { 203,"Custom Field 4" },
Packit 7838c8
  { 204,"Custom Field 5" },
Packit 7838c8
  { 205,"Custom Field 6" },
Packit 7838c8
  { 206,"Custom Field 7" },
Packit 7838c8
  { 207,"Custom Field 8" },
Packit 7838c8
  { 208,"Custom Field 9" },
Packit 7838c8
  { 209,"Custom Field 10" },
Packit 7838c8
  { 210,"Custom Field 11" },
Packit 7838c8
  { 211,"Custom Field 12" },
Packit 7838c8
  { 212,"Custom Field 13" },
Packit 7838c8
  { 213,"Custom Field 14" },
Packit 7838c8
  { 214,"Custom Field 15" },
Packit 7838c8
  { 215,"Custom Field 16" },
Packit 7838c8
  { 216,"Custom Field 17" },
Packit 7838c8
  { 217,"Custom Field 18" },
Packit 7838c8
  { 218,"Custom Field 19" },
Packit 7838c8
  { 219,"Custom Field 20" }
Packit 7838c8
};
Packit 7838c8
Packit 7838c8
/*
Packit 7838c8
 * We format the output using HTML conventions
Packit 7838c8
 * to preserve control characters and such.
Packit 7838c8
 */
Packit 7838c8
void formatString(FILE *ofile, const char *s, int len)
Packit 7838c8
{
Packit 7838c8
  putc('"', ofile);
Packit 7838c8
  for (; len > 0; --len, ++s) {
Packit 7838c8
    int c = *s;
Packit 7838c8
    switch (c) {
Packit 7838c8
    case '&':
Packit 7838c8
      fputs("&", ofile);
Packit 7838c8
      break;
Packit 7838c8
#ifdef HANDLE_GT_LT
Packit 7838c8
    case '<':
Packit 7838c8
      fputs("<", ofile);
Packit 7838c8
      break;
Packit 7838c8
    case '>':
Packit 7838c8
      fputs(">", ofile);
Packit 7838c8
      break;
Packit 7838c8
#endif
Packit 7838c8
    case '"':
Packit 7838c8
      fputs(""", ofile);
Packit 7838c8
      break;
Packit 7838c8
    default:
Packit 7838c8
      if (iscntrl(c))
Packit 7838c8
        fprintf(ofile, "&#%d;", c);
Packit 7838c8
      else
Packit 7838c8
        putc(*s, ofile);
Packit 7838c8
      break;
Packit 7838c8
    }
Packit 7838c8
  }
Packit 7838c8
  fputs("\"\n", ofile);
Packit 7838c8
}
Packit 7838c8
Packit 7838c8
typedef struct _html_code
Packit 7838c8
{
Packit 7838c8
  short
Packit 7838c8
    len;
Packit 7838c8
  const char
Packit 7838c8
   *code,
Packit 7838c8
    val;
Packit 7838c8
} html_code;
Packit 7838c8
Packit 7838c8
static html_code html_codes[] = {
Packit 7838c8
#ifdef HANDLE_GT_LT
Packit 7838c8
    { 4,"<",'<' },
Packit 7838c8
    { 4,">",'>' },
Packit 7838c8
#endif
Packit 7838c8
    { 5,"&",'&' },
Packit 7838c8
    { 6,""",'"' }
Packit 7838c8
};
Packit 7838c8
Packit 7838c8
/*
Packit 7838c8
 * This routine converts HTML escape sequence
Packit 7838c8
 * back to the original ASCII representation.
Packit 7838c8
 * - returns the number of characters dropped.
Packit 7838c8
 */
Packit 7838c8
int convertHTMLcodes(char *s, int len)
Packit 7838c8
{
Packit 7838c8
  if (len <=0 || s==(char*)NULL || *s=='\0')
Packit 7838c8
    return 0;
Packit 7838c8
Packit 7838c8
  if (s[1] == '#')
Packit 7838c8
    {
Packit 7838c8
      int val, o;
Packit 7838c8
Packit 7838c8
      if (sscanf(s,"&#%d;",&val) == 1)
Packit 7838c8
        {
Packit 7838c8
          o = 3;
Packit 7838c8
          while (s[o] != ';')
Packit 7838c8
            {
Packit 7838c8
              o++;
Packit 7838c8
              if (o > 5)
Packit 7838c8
                break;
Packit 7838c8
            }
Packit 7838c8
          if (o < 5)
Packit 7838c8
            strcpy(s+1, s+1+o);
Packit 7838c8
          *s = val;
Packit 7838c8
          return o;
Packit 7838c8
        }
Packit 7838c8
    }
Packit 7838c8
  else
Packit 7838c8
    {
Packit 7838c8
      int
Packit 7838c8
        i,
Packit 7838c8
        codes = sizeof(html_codes) / sizeof(html_code);
Packit 7838c8
Packit 7838c8
      for (i=0; i < codes; i++)
Packit 7838c8
        {
Packit 7838c8
          if (html_codes[i].len <= len)
Packit 7838c8
            if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
Packit 7838c8
              {
Packit 7838c8
                strcpy(s+1, s+html_codes[i].len);
Packit 7838c8
                *s = html_codes[i].val;
Packit 7838c8
                return html_codes[i].len-1;
Packit 7838c8
              }
Packit 7838c8
        }
Packit 7838c8
    }
Packit 7838c8
Packit 7838c8
  return 0;
Packit 7838c8
}
Packit 7838c8
Packit 7838c8
int formatIPTC(FILE *ifile, FILE *ofile)
Packit 7838c8
{
Packit 7838c8
  unsigned int
Packit 7838c8
    foundiptc,
Packit 7838c8
    tagsfound;
Packit 7838c8
Packit 7838c8
  char
Packit 7838c8
    *readable,
Packit 7838c8
    *str;
Packit 7838c8
Packit 7838c8
  long
Packit 7838c8
    tagindx,
Packit 7838c8
    taglen;
Packit 7838c8
Packit 7838c8
  int
Packit 7838c8
    i,
Packit 7838c8
    tagcount = sizeof(tags) / sizeof(tag_spec);
Packit 7838c8
Packit 7838c8
  int
Packit 7838c8
    c,
Packit 7838c8
    dataset,
Packit 7838c8
    recnum;
Packit 7838c8
Packit 7838c8
  foundiptc = 0; /* found the IPTC-Header */
Packit 7838c8
  tagsfound = 0; /* number of tags found */
Packit 7838c8
Packit 7838c8
  c = getc(ifile);
Packit 7838c8
  while (c != EOF)
Packit 7838c8
    {
Packit 7838c8
      if (c == 0x1c)
Packit 7838c8
        foundiptc = 1;
Packit 7838c8
      else
Packit 7838c8
        {
Packit 7838c8
          if (foundiptc)
Packit 7838c8
            {
Packit 7838c8
              return -1;
Packit 7838c8
            }
Packit 7838c8
          else
Packit 7838c8
            {
Packit 7838c8
              c = getc(ifile);
Packit 7838c8
              continue;
Packit 7838c8
            }
Packit 7838c8
        }
Packit 7838c8
Packit 7838c8
      /* we found the 0x1c tag and now grab the dataset and record number tags */
Packit 7838c8
      dataset = getc(ifile);
Packit 7838c8
      if ((char) dataset == EOF)
Packit 7838c8
        return -1;
Packit 7838c8
      recnum = getc(ifile);
Packit 7838c8
      if ((char) recnum == EOF)
Packit 7838c8
        return -1;
Packit 7838c8
      /* try to match this record to one of the ones in our named table */
Packit 7838c8
      for (i=0; i< tagcount; i++)
Packit 7838c8
        {
Packit 7838c8
          if (tags[i].id == recnum)
Packit 7838c8
            break;
Packit 7838c8
        }
Packit 7838c8
      if (i < tagcount)
Packit 7838c8
        readable = tags[i].name;
Packit 7838c8
      else
Packit 7838c8
        readable = "";
Packit 7838c8
Packit 7838c8
      /* then we decode the length of the block that follows - long or short fmt */
Packit 7838c8
      c = getc(ifile);
Packit 7838c8
      if (c == EOF)
Packit 7838c8
        return 0;
Packit 7838c8
      if (c & (unsigned char) 0x80)
Packit 7838c8
        {
Packit 7838c8
          unsigned char
Packit 7838c8
            buffer[4];
Packit 7838c8
Packit 7838c8
          for (i=0; i<4; i++)
Packit 7838c8
            {
Packit 7838c8
              c = getc(ifile);
Packit 7838c8
              if (c == EOF)
Packit 7838c8
                return -1;
Packit 7838c8
              buffer[i] = c;
Packit 7838c8
            }
Packit 7838c8
          taglen = (((long) buffer[ 0 ]) << 24) |
Packit 7838c8
            (((long) buffer[ 1 ]) << 16) | 
Packit 7838c8
            (((long) buffer[ 2 ]) <<  8) |
Packit 7838c8
            (((long) buffer[ 3 ]));
Packit 7838c8
        }
Packit 7838c8
      else
Packit 7838c8
        {
Packit 7838c8
          int
Packit 7838c8
            x = c;
Packit 7838c8
Packit 7838c8
          taglen = x << 8;
Packit 7838c8
          x = getc(ifile);
Packit 7838c8
          if (x == EOF)
Packit 7838c8
            return -1;
Packit 7838c8
          taglen |= (long) x;
Packit 7838c8
        }
Packit 7838c8
      /* Place limits on tag length */
Packit 7838c8
      if ((taglen <= 0) || (taglen > 1048576))
Packit 7838c8
        {
Packit 7838c8
          printf("Inappropriate IPTC tag length %ld\n",taglen);
Packit 7838c8
          return -1;
Packit 7838c8
        }
Packit 7838c8
      /* make a buffer to hold the tag data and snag it from the input stream */
Packit 7838c8
      str = (char *) malloc((unsigned int) (taglen+1));
Packit 7838c8
      if (str == (char *) NULL)
Packit 7838c8
        {
Packit 7838c8
          printf("Memory allocation failed");
Packit 7838c8
          return 0;
Packit 7838c8
        }
Packit 7838c8
      for (tagindx=0; tagindx
Packit 7838c8
        {
Packit 7838c8
          c = getc(ifile);
Packit 7838c8
          if (c == EOF)
Packit 7838c8
            {
Packit 7838c8
              free(str);
Packit 7838c8
              return -1;
Packit 7838c8
            }
Packit 7838c8
          str[tagindx] = c;
Packit 7838c8
        }
Packit 7838c8
      str[ taglen ] = 0;
Packit 7838c8
Packit 7838c8
      /* now finish up by formatting this binary data into ASCII equivalent */
Packit 7838c8
      if (strlen(readable) > 0)
Packit 7838c8
        fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
Packit 7838c8
      else
Packit 7838c8
        fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
Packit 7838c8
      formatString( ofile, str, taglen );
Packit 7838c8
      free(str);
Packit 7838c8
Packit 7838c8
      tagsfound++;
Packit 7838c8
Packit 7838c8
      c = getc(ifile);
Packit 7838c8
    }
Packit 7838c8
  return tagsfound;
Packit 7838c8
}
Packit 7838c8
Packit 7838c8
int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
Packit 7838c8
char *white,char *brkchar,char *quote,char eschar,char *brkused,
Packit 7838c8
int *next,char *quoted);
Packit 7838c8
Packit 7838c8
char *super_fgets(char *b, int *blen, FILE *file)
Packit 7838c8
{
Packit 7838c8
  int
Packit 7838c8
    c,
Packit 7838c8
    len;
Packit 7838c8
Packit 7838c8
  char
Packit 7838c8
    *q;
Packit 7838c8
Packit 7838c8
  len=*blen;
Packit 7838c8
  for (q=b; ; q++)
Packit 7838c8
    {
Packit 7838c8
      c=fgetc(file);
Packit 7838c8
      if (c == EOF || c == '\n')
Packit 7838c8
        break;
Packit 7838c8
      if (((long)q - (long)b + 1 ) >= (long) len)
Packit 7838c8
        {
Packit 7838c8
          long
Packit 7838c8
            tlen;
Packit 7838c8
Packit 7838c8
          tlen=(long)q-(long)b;
Packit 7838c8
          len<<=1;
Packit 7838c8
          b=(char *) realloc((char *) b,(len+2));
Packit 7838c8
          if ((char *) b == (char *) NULL)
Packit 7838c8
            break;
Packit 7838c8
          q=b+tlen;
Packit 7838c8
        }
Packit 7838c8
      *q=(unsigned char) c;
Packit 7838c8
    }
Packit 7838c8
  *blen=0;
Packit 7838c8
  if ((unsigned char *)b != (unsigned char *) NULL)
Packit 7838c8
    {
Packit 7838c8
      int
Packit 7838c8
        tlen;
Packit 7838c8
Packit 7838c8
      tlen=(long)q - (long)b;
Packit 7838c8
      if (tlen == 0)
Packit 7838c8
        return (char *) NULL;
Packit 7838c8
      b[tlen] = '\0';
Packit 7838c8
      *blen=++tlen;
Packit 7838c8
    }
Packit 7838c8
  return b;
Packit 7838c8
}
Packit 7838c8
Packit 7838c8
#define BUFFER_SZ 4096
Packit 7838c8
Packit 7838c8
int main(int argc, char *argv[])
Packit 7838c8
{            
Packit 7838c8
  /* unsigned int */
Packit 7838c8
  /*   length; */
Packit 7838c8
Packit 7838c8
  /*unsigned char
Packit 7838c8
   *buffer;*/
Packit 7838c8
Packit 7838c8
  int
Packit 7838c8
    i,
Packit 7838c8
    mode; /* iptc binary, or iptc text */
Packit 7838c8
Packit 7838c8
  FILE
Packit 7838c8
    *ifile = stdin,
Packit 7838c8
    *ofile = stdout;
Packit 7838c8
Packit 7838c8
  char
Packit 7838c8
    c,
Packit 7838c8
    *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output";
Packit 7838c8
Packit 7838c8
  if( argc < 2 )
Packit 7838c8
    {
Packit 7838c8
      puts(usage);
Packit 7838c8
      return 1;
Packit 7838c8
    }
Packit 7838c8
Packit 7838c8
  mode = 0;
Packit 7838c8
  /* length = -1; */
Packit 7838c8
  /* buffer = (unsigned char *)NULL; */
Packit 7838c8
Packit 7838c8
  for (i=1; i
Packit 7838c8
    {
Packit 7838c8
      c = argv[i][0];
Packit 7838c8
      if (c == '-' || c == '/')
Packit 7838c8
        {
Packit 7838c8
          c = argv[i][1];
Packit 7838c8
          switch( c )
Packit 7838c8
            {
Packit 7838c8
            case 't':
Packit 7838c8
              mode = 1;
Packit 7838c8
#ifdef WIN32
Packit 7838c8
              /* Set "stdout" to binary mode: */
Packit 7838c8
              _setmode( _fileno( ofile ), _O_BINARY );
Packit 7838c8
#endif
Packit 7838c8
              break;
Packit 7838c8
            case 'b':
Packit 7838c8
              mode = 0;
Packit 7838c8
#ifdef WIN32
Packit 7838c8
              /* Set "stdin" to binary mode: */
Packit 7838c8
              _setmode( _fileno( ifile ), _O_BINARY );
Packit 7838c8
#endif
Packit 7838c8
              break;
Packit 7838c8
            case 'i':
Packit 7838c8
              if (mode == 0)
Packit 7838c8
                ifile = fopen(argv[++i], "rb");
Packit 7838c8
              else
Packit 7838c8
                ifile = fopen(argv[++i], "rt");
Packit 7838c8
              if (ifile == (FILE *)NULL)
Packit 7838c8
                {
Packit 7838c8
                  printf("Unable to open: %s\n", argv[i]);
Packit 7838c8
                  return 1;
Packit 7838c8
                }
Packit 7838c8
              break;
Packit 7838c8
            case 'o':
Packit 7838c8
              if (mode == 0)
Packit 7838c8
                ofile = fopen(argv[++i], "wt");
Packit 7838c8
              else
Packit 7838c8
                ofile = fopen(argv[++i], "wb");
Packit 7838c8
              if (ofile == (FILE *)NULL)
Packit 7838c8
                {
Packit 7838c8
                  printf("Unable to open: %s\n", argv[i]);
Packit 7838c8
                  return 1;
Packit 7838c8
                }
Packit 7838c8
              break;
Packit 7838c8
            default:
Packit 7838c8
              printf("Unknown option: %s\n", argv[i]);
Packit 7838c8
              return 1;
Packit 7838c8
            }
Packit 7838c8
        }
Packit 7838c8
      else
Packit 7838c8
        {
Packit 7838c8
          puts(usage);
Packit 7838c8
          return 1;
Packit 7838c8
        }
Packit 7838c8
    }
Packit 7838c8
Packit 7838c8
  if (mode == 0) /* handle binary iptc info */
Packit 7838c8
    formatIPTC(ifile, ofile);
Packit 7838c8
Packit 7838c8
  if (mode == 1) /* handle text form of iptc info */
Packit 7838c8
    {
Packit 7838c8
      char
Packit 7838c8
        brkused,
Packit 7838c8
        quoted,
Packit 7838c8
        *line,
Packit 7838c8
        *token,
Packit 7838c8
        *newstr;
Packit 7838c8
Packit 7838c8
      int
Packit 7838c8
        state,
Packit 7838c8
        next;
Packit 7838c8
Packit 7838c8
      unsigned char
Packit 7838c8
        recnum = 0,
Packit 7838c8
        dataset = 0;
Packit 7838c8
Packit 7838c8
      int
Packit 7838c8
        inputlen = BUFFER_SZ;
Packit 7838c8
Packit 7838c8
      line = (char *) malloc(inputlen);     
Packit 7838c8
      token = (char *)NULL;
Packit 7838c8
      while((line = super_fgets(line,&inputlen,ifile))!=NULL)
Packit 7838c8
        {
Packit 7838c8
          state=0;
Packit 7838c8
          next=0;
Packit 7838c8
Packit 7838c8
          token = (char *) malloc(inputlen);     
Packit 7838c8
          newstr = (char *) malloc(inputlen);     
Packit 7838c8
          while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
Packit 7838c8
                          &brkused,&next,&quoted)==0)
Packit 7838c8
            {
Packit 7838c8
              if (state == 0)
Packit 7838c8
                {                  
Packit 7838c8
                  int
Packit 7838c8
                    state,
Packit 7838c8
                    next;
Packit 7838c8
Packit 7838c8
                  char
Packit 7838c8
                    brkused,
Packit 7838c8
                    quoted;
Packit 7838c8
Packit 7838c8
                  state=0;
Packit 7838c8
                  next=0;
Packit 7838c8
                  while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
Packit 7838c8
                                  &brkused, &next, &quoted)==0)
Packit 7838c8
                    {
Packit 7838c8
                      if (state == 0)
Packit 7838c8
                        dataset = (unsigned char) atoi(newstr);
Packit 7838c8
                      else
Packit 7838c8
                        if (state == 1)
Packit 7838c8
                          recnum = (unsigned char) atoi(newstr);
Packit 7838c8
                      state++;
Packit 7838c8
                    }
Packit 7838c8
                }
Packit 7838c8
              else
Packit 7838c8
                if (state == 1)
Packit 7838c8
                  {
Packit 7838c8
                    int
Packit 7838c8
                      next;
Packit 7838c8
Packit 7838c8
                    unsigned long
Packit 7838c8
                      len;
Packit 7838c8
Packit 7838c8
                    char
Packit 7838c8
                      brkused,
Packit 7838c8
                      quoted;
Packit 7838c8
Packit 7838c8
                    next=0;
Packit 7838c8
                    len = strlen(token);
Packit 7838c8
                    while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
Packit 7838c8
                                    &brkused, &next, &quoted)==0)
Packit 7838c8
                      {
Packit 7838c8
                        if (brkused && next > 0)
Packit 7838c8
                          {
Packit 7838c8
                            char
Packit 7838c8
                              *s = &token[next-1];
Packit 7838c8
Packit 7838c8
                            len -= convertHTMLcodes(s, strlen(s));
Packit 7838c8
                          }
Packit 7838c8
                      }
Packit 7838c8
Packit 7838c8
                    fputc(0x1c, ofile);
Packit 7838c8
                    fputc(dataset, ofile);
Packit 7838c8
                    fputc(recnum, ofile);
Packit 7838c8
                    if (len < 0x10000)
Packit 7838c8
                      {
Packit 7838c8
                        fputc((len >> 8) & 255, ofile);
Packit 7838c8
                        fputc(len & 255, ofile);
Packit 7838c8
                      }
Packit 7838c8
                    else
Packit 7838c8
                      {
Packit 7838c8
                        fputc(((len >> 24) & 255) | 0x80, ofile);
Packit 7838c8
                        fputc((len >> 16) & 255, ofile);
Packit 7838c8
                        fputc((len >> 8) & 255, ofile);
Packit 7838c8
                        fputc(len & 255, ofile);
Packit 7838c8
                      }
Packit 7838c8
                    next=0;
Packit 7838c8
                    while (len--)
Packit 7838c8
                      fputc(token[next++], ofile);
Packit 7838c8
                  }
Packit 7838c8
              state++;
Packit 7838c8
            }
Packit 7838c8
          free(token);
Packit 7838c8
          token = (char *)NULL;
Packit 7838c8
          free(newstr);
Packit 7838c8
          newstr = (char *)NULL;
Packit 7838c8
        }
Packit 7838c8
      free(line);
Packit 7838c8
Packit 7838c8
      fclose( ifile );
Packit 7838c8
      fclose( ofile );
Packit 7838c8
    }
Packit 7838c8
Packit 7838c8
  return 0;
Packit 7838c8
}
Packit 7838c8
Packit 7838c8
/*
Packit 7838c8
	This routine is a generalized, finite state token parser. It allows
Packit 7838c8
    you extract tokens one at a time from a string of characters.  The
Packit 7838c8
    characters used for white space, for break characters, and for quotes
Packit 7838c8
    can be specified. Also, characters in the string can be preceded by
Packit 7838c8
    a specifiable escape character which removes any special meaning the
Packit 7838c8
    character may have.
Packit 7838c8
Packit 7838c8
	There are a lot of formal parameters in this subroutine call, but
Packit 7838c8
	once you get familiar with them, this routine is fairly easy to use.
Packit 7838c8
	"#define" macros can be used to generate simpler looking calls for
Packit 7838c8
	commonly used applications of this routine.
Packit 7838c8
Packit 7838c8
	First, some terminology:
Packit 7838c8
Packit 7838c8
	token:		used here, a single unit of information in
Packit 7838c8
				the form of a group of characters.
Packit 7838c8
Packit 7838c8
	white space:	space that gets ignored (except within quotes
Packit 7838c8
				or when escaped), like blanks and tabs.  in
Packit 7838c8
				addition, white space terminates a non-quoted
Packit 7838c8
				token.
Packit 7838c8
Packit 7838c8
	break character: a character that separates non-quoted tokens.
Packit 7838c8
				commas are a common break character.  the
Packit 7838c8
				usage of break characters to signal the end
Packit 7838c8
				of a token is the same as that of white space,
Packit 7838c8
				except multiple break characters with nothing
Packit 7838c8
				or only white space between generate a null
Packit 7838c8
				token for each two break characters together.
Packit 7838c8
Packit 7838c8
				for example, if blank is set to be the white
Packit 7838c8
				space and comma is set to be the break
Packit 7838c8
				character, the line ...
Packit 7838c8
Packit 7838c8
				A, B, C ,  , DEF
Packit 7838c8
Packit 7838c8
				... consists of 5 tokens:
Packit 7838c8
Packit 7838c8
				1)	"A"
Packit 7838c8
				2)	"B"
Packit 7838c8
				3)	"C"
Packit 7838c8
				4)	""      (the null string)
Packit 7838c8
				5)	"DEF"
Packit 7838c8
Packit 7838c8
	quote character: 	a character that, when surrounding a group
Packit 7838c8
				of other characters, causes the group of
Packit 7838c8
				characters to be treated as a single token,
Packit 7838c8
				no matter how many white spaces or break
Packit 7838c8
				characters exist in the group.	also, a
Packit 7838c8
				token always terminates after the closing
Packit 7838c8
				quote.	for example, if ' is the quote
Packit 7838c8
				character, blank is white space, and comma
Packit 7838c8
				is the break character, the following
Packit 7838c8
				string ...
Packit 7838c8
Packit 7838c8
				A, ' B, CD'EF GHI
Packit 7838c8
Packit 7838c8
				... consists of 4 tokens:
Packit 7838c8
Packit 7838c8
				1)	"A"
Packit 7838c8
				2)	" B, CD" (note the blanks & comma)
Packit 7838c8
				3)	"EF"
Packit 7838c8
				4)	"GHI"
Packit 7838c8
Packit 7838c8
				the quote characters themselves do
Packit 7838c8
				not appear in the resultant tokens.  the
Packit 7838c8
				double quotes are delimiters i use here for
Packit 7838c8
				documentation purposes only.
Packit 7838c8
Packit 7838c8
	escape character:	a character which itself is ignored but
Packit 7838c8
				which causes the next character to be
Packit 7838c8
				used as is.  ^ and \ are often used as
Packit 7838c8
				escape characters.  an escape in the last
Packit 7838c8
				position of the string gets treated as a
Packit 7838c8
				"normal" (i.e., non-quote, non-white,
Packit 7838c8
				non-break, and non-escape) character.
Packit 7838c8
				for example, assume white space, break
Packit 7838c8
				character, and quote are the same as in the
Packit 7838c8
				above examples, and further, assume that
Packit 7838c8
				^ is the escape character.  then, in the
Packit 7838c8
				string ...
Packit 7838c8
Packit 7838c8
				ABC, ' DEF ^' GH' I ^ J K^ L ^
Packit 7838c8
Packit 7838c8
				... there are 7 tokens:
Packit 7838c8
Packit 7838c8
				1)	"ABC"
Packit 7838c8
				2)	" DEF ' GH"
Packit 7838c8
				3)	"I"
Packit 7838c8
				4)	" "     (a lone blank)
Packit 7838c8
				5)	"J"
Packit 7838c8
				6)	"K L"
Packit 7838c8
				7)	"^"     (passed as is at end of line)
Packit 7838c8
Packit 7838c8
Packit 7838c8
	OK, now that you have this background, here's how to call "tokenizer":
Packit 7838c8
Packit 7838c8
	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
Packit 7838c8
		      brkused,next,quoted)
Packit 7838c8
Packit 7838c8
	result: 	0 if we haven't reached EOS (end of string), and
Packit 7838c8
			1 if we have (this is an "int").
Packit 7838c8
Packit 7838c8
	flag:		right now, only the low order 3 bits are used.
Packit 7838c8
			1 => convert non-quoted tokens to upper case
Packit 7838c8
			2 => convert non-quoted tokens to lower case
Packit 7838c8
			0 => do not convert non-quoted tokens
Packit 7838c8
			(this is a "char").
Packit 7838c8
Packit 7838c8
	token:		a character string containing the returned next token
Packit 7838c8
			(this is a "char[]").
Packit 7838c8
Packit 7838c8
	maxtok: 	the maximum size of "token".  characters beyond
Packit 7838c8
			"maxtok" are truncated (this is an "int").
Packit 7838c8
Packit 7838c8
	string: 	the string to be parsed (this is a "char[]").
Packit 7838c8
Packit 7838c8
	white:		a string of the valid white spaces.  example:
Packit 7838c8
Packit 7838c8
			char whitesp[]={" \t"};
Packit 7838c8
Packit 7838c8
			blank and tab will be valid white space (this is
Packit 7838c8
			a "char[]").
Packit 7838c8
Packit 7838c8
	break:		a string of the valid break characters.  example:
Packit 7838c8
Packit 7838c8
			char breakch[]={";,"};
Packit 7838c8
Packit 7838c8
			semicolon and comma will be valid break characters
Packit 7838c8
			(this is a "char[]").
Packit 7838c8
Packit 7838c8
			IMPORTANT:  do not use the name "break" as a C
Packit 7838c8
			variable, as this is a reserved word in C.
Packit 7838c8
Packit 7838c8
	quote:		a string of the valid quote characters.  an example
Packit 7838c8
			would be
Packit 7838c8
Packit 7838c8
			char whitesp[]={"'\"");
Packit 7838c8
Packit 7838c8
			(this causes single and double quotes to be valid)
Packit 7838c8
			note that a token starting with one of these characters
Packit 7838c8
			needs the same quote character to terminate it.
Packit 7838c8
Packit 7838c8
			for example,
Packit 7838c8
Packit 7838c8
			"ABC '
Packit 7838c8
Packit 7838c8
			is unterminated, but
Packit 7838c8
Packit 7838c8
			"DEF" and 'GHI'
Packit 7838c8
Packit 7838c8
			are properly terminated.  note that different quote
Packit 7838c8
			characters can appear on the same line; only for
Packit 7838c8
			a given token do the quote characters have to be
Packit 7838c8
			the same (this is a "char[]").
Packit 7838c8
Packit 7838c8
	escape: 	the escape character (NOT a string ... only one
Packit 7838c8
			allowed).  use zero if none is desired (this is
Packit 7838c8
			a "char").
Packit 7838c8
Packit 7838c8
	brkused:	the break character used to terminate the current
Packit 7838c8
			token.	if the token was quoted, this will be the
Packit 7838c8
			quote used.  if the token is the last one on the
Packit 7838c8
			line, this will be zero (this is a pointer to a
Packit 7838c8
			"char").
Packit 7838c8
Packit 7838c8
	next:		this variable points to the first character of the
Packit 7838c8
			next token.  it gets reset by "tokenizer" as it steps
Packit 7838c8
			through the string.  set it to 0 upon initialization,
Packit 7838c8
			and leave it alone after that.	you can change it
Packit 7838c8
			if you want to jump around in the string or re-parse
Packit 7838c8
			from the beginning, but be careful (this is a
Packit 7838c8
			pointer to an "int").
Packit 7838c8
Packit 7838c8
	quoted: 	set to 1 (true) if the token was quoted and 0 (false)
Packit 7838c8
			if not.  you may need this information (for example:
Packit 7838c8
			in C, a string with quotes around it is a character
Packit 7838c8
			string, while one without is an identifier).
Packit 7838c8
Packit 7838c8
			(this is a pointer to a "char").
Packit 7838c8
*/
Packit 7838c8
Packit 7838c8
/* states */
Packit 7838c8
Packit 7838c8
#define IN_WHITE 0
Packit 7838c8
#define IN_TOKEN 1
Packit 7838c8
#define IN_QUOTE 2
Packit 7838c8
#define IN_OZONE 3
Packit 7838c8
Packit 7838c8
int _p_state;	   /* current state	 */
Packit 7838c8
unsigned _p_flag;  /* option flag	 */
Packit 7838c8
char _p_curquote;  /* current quote char */
Packit 7838c8
int _p_tokpos;	   /* current token pos  */
Packit 7838c8
Packit 7838c8
/* routine to find character in string ... used only by "tokenizer" */
Packit 7838c8
Packit 7838c8
int sindex(char ch,char *string)
Packit 7838c8
{
Packit 7838c8
  char *cp;
Packit 7838c8
  for(cp=string;*cp;++cp)
Packit 7838c8
    if(ch==*cp)
Packit 7838c8
      return (int)(cp-string);	/* return postion of character */
Packit 7838c8
  return -1;			/* eol ... no match found */
Packit 7838c8
}
Packit 7838c8
Packit 7838c8
/* routine to store a character in a string ... used only by "tokenizer" */
Packit 7838c8
Packit 7838c8
void chstore(char *string,int max,char ch)
Packit 7838c8
{
Packit 7838c8
  char c;
Packit 7838c8
  if(_p_tokpos>=0&&_p_tokpos
Packit 7838c8
    {
Packit 7838c8
      if(_p_state==IN_QUOTE)
Packit 7838c8
        c=ch;
Packit 7838c8
      else
Packit 7838c8
        switch(_p_flag&3)
Packit 7838c8
          {
Packit 7838c8
          case 1: 	    /* convert to upper */
Packit 7838c8
            c=toupper((int) ch);
Packit 7838c8
            break;
Packit 7838c8
Packit 7838c8
          case 2: 	    /* convert to lower */
Packit 7838c8
            c=tolower((int) ch);
Packit 7838c8
            break;
Packit 7838c8
Packit 7838c8
          default:	    /* use as is */
Packit 7838c8
            c=ch;
Packit 7838c8
            break;
Packit 7838c8
          }
Packit 7838c8
      string[_p_tokpos++]=c;
Packit 7838c8
    }
Packit 7838c8
  return;
Packit 7838c8
}
Packit 7838c8
Packit 7838c8
int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
Packit 7838c8
              char *white,char *brkchar,char *quote,char eschar,char *brkused,
Packit 7838c8
              int *next,char *quoted)
Packit 7838c8
{
Packit 7838c8
  int qp;
Packit 7838c8
  char c,nc;
Packit 7838c8
Packit 7838c8
  *brkused=0;		/* initialize to null */
Packit 7838c8
  *quoted=0;		/* assume not quoted  */
Packit 7838c8
Packit 7838c8
  if(!line[*next])	/* if we're at end of line, indicate such */
Packit 7838c8
    return 1;
Packit 7838c8
Packit 7838c8
  _p_state=IN_WHITE;   /* initialize state */
Packit 7838c8
  _p_curquote=0;	   /* initialize previous quote char */
Packit 7838c8
  _p_flag=inflag;	   /* set option flag */
Packit 7838c8
Packit 7838c8
  for(_p_tokpos=0;(c=line[*next]);++(*next))	/* main loop */
Packit 7838c8
    {
Packit 7838c8
      if((qp=sindex(c,brkchar))>=0)  /* break */
Packit 7838c8
        {
Packit 7838c8
          switch(_p_state)
Packit 7838c8
            {
Packit 7838c8
	    case IN_WHITE:		/* these are the same here ...	*/
Packit 7838c8
	    case IN_TOKEN:		/* ... just get out		*/
Packit 7838c8
	    case IN_OZONE:		/* ditto			*/
Packit 7838c8
	      ++(*next);
Packit 7838c8
	      *brkused=brkchar[qp];
Packit 7838c8
	      goto byebye;
Packit 7838c8
Packit 7838c8
	    case IN_QUOTE:		 /* just keep going */
Packit 7838c8
	      chstore(token,tokmax,c);
Packit 7838c8
	      break;
Packit 7838c8
            }
Packit 7838c8
        }
Packit 7838c8
      else if((qp=sindex(c,quote))>=0)  /* quote */
Packit 7838c8
        {
Packit 7838c8
          switch(_p_state)
Packit 7838c8
            {
Packit 7838c8
	    case IN_WHITE:	 /* these are identical, */
Packit 7838c8
	      _p_state=IN_QUOTE; /* change states   */
Packit 7838c8
	      _p_curquote=quote[qp]; /* save quote char */
Packit 7838c8
	      *quoted=1;	/* set to true as long as something is in quotes */
Packit 7838c8
	      break;
Packit 7838c8
Packit 7838c8
	    case IN_QUOTE:
Packit 7838c8
	      if(quote[qp]==_p_curquote) /* same as the beginning quote? */
Packit 7838c8
                {
Packit 7838c8
                  _p_state=IN_OZONE;
Packit 7838c8
                  _p_curquote=0;
Packit 7838c8
                }
Packit 7838c8
	      else
Packit 7838c8
	        chstore(token,tokmax,c); /* treat as regular char */
Packit 7838c8
	      break;
Packit 7838c8
Packit 7838c8
	    case IN_TOKEN:
Packit 7838c8
	    case IN_OZONE:
Packit 7838c8
	      *brkused=c; /* uses quote as break char */
Packit 7838c8
	      goto byebye;
Packit 7838c8
            }
Packit 7838c8
        }
Packit 7838c8
      else if((qp=sindex(c,white))>=0) /* white */
Packit 7838c8
        {
Packit 7838c8
          switch(_p_state)
Packit 7838c8
            {
Packit 7838c8
	    case IN_WHITE:
Packit 7838c8
	    case IN_OZONE:
Packit 7838c8
	      break;		/* keep going */
Packit 7838c8
Packit 7838c8
	    case IN_TOKEN:
Packit 7838c8
	      _p_state=IN_OZONE;
Packit 7838c8
	      break;
Packit 7838c8
Packit 7838c8
	    case IN_QUOTE:
Packit 7838c8
	      chstore(token,tokmax,c); /* it's valid here */
Packit 7838c8
	      break;
Packit 7838c8
            }
Packit 7838c8
        }
Packit 7838c8
      else if(c==eschar)  /* escape */
Packit 7838c8
        {
Packit 7838c8
          nc=line[(*next)+1];
Packit 7838c8
          if(nc==0) 		/* end of line */
Packit 7838c8
            {
Packit 7838c8
              *brkused=0;
Packit 7838c8
              chstore(token,tokmax,c);
Packit 7838c8
              ++(*next);
Packit 7838c8
              goto byebye;
Packit 7838c8
            }
Packit 7838c8
          switch(_p_state)
Packit 7838c8
            {
Packit 7838c8
	    case IN_WHITE:
Packit 7838c8
	      --(*next);
Packit 7838c8
	      _p_state=IN_TOKEN;
Packit 7838c8
	      break;
Packit 7838c8
Packit 7838c8
	    case IN_TOKEN:
Packit 7838c8
	    case IN_QUOTE:
Packit 7838c8
	      ++(*next);
Packit 7838c8
	      chstore(token,tokmax,nc);
Packit 7838c8
	      break;
Packit 7838c8
Packit 7838c8
	    case IN_OZONE:
Packit 7838c8
	      goto byebye;
Packit 7838c8
            }
Packit 7838c8
        }
Packit 7838c8
      else	/* anything else is just a real character */
Packit 7838c8
        {
Packit 7838c8
          switch(_p_state)
Packit 7838c8
            {
Packit 7838c8
	    case IN_WHITE:
Packit 7838c8
	      _p_state=IN_TOKEN; /* switch states */
Packit 7838c8
Packit 7838c8
	    case IN_TOKEN:		 /* these 2 are     */
Packit 7838c8
	    case IN_QUOTE:		 /*  identical here */
Packit 7838c8
	      chstore(token,tokmax,c);
Packit 7838c8
	      break;
Packit 7838c8
Packit 7838c8
	    case IN_OZONE:
Packit 7838c8
	      goto byebye;
Packit 7838c8
            }
Packit 7838c8
        }
Packit 7838c8
    }		/* end of main loop */
Packit 7838c8
Packit 7838c8
 byebye:
Packit 7838c8
  token[_p_tokpos]=0;	/* make sure token ends with EOS */
Packit 7838c8
Packit 7838c8
  return 0;
Packit 7838c8
}
Packit 7838c8
/*
Packit 7838c8
 * Local Variables:
Packit 7838c8
 * mode: c
Packit 7838c8
 * c-basic-offset: 2
Packit 7838c8
 * fill-column: 78
Packit 7838c8
 * End:
Packit 7838c8
 */