Blame libdjvu/XMLTags.cpp

Packit df99a1
//C-  -*- C++ -*-
Packit df99a1
//C- -------------------------------------------------------------------
Packit df99a1
//C- DjVuLibre-3.5
Packit df99a1
//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
Packit df99a1
//C- Copyright (c) 2001  AT&T
Packit df99a1
//C-
Packit df99a1
//C- This software is subject to, and may be distributed under, the
Packit df99a1
//C- GNU General Public License, either Version 2 of the license,
Packit df99a1
//C- or (at your option) any later version. The license should have
Packit df99a1
//C- accompanied the software or you may obtain a copy of the license
Packit df99a1
//C- from the Free Software Foundation at http://www.fsf.org .
Packit df99a1
//C-
Packit df99a1
//C- This program is distributed in the hope that it will be useful,
Packit df99a1
//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit df99a1
//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit df99a1
//C- GNU General Public License for more details.
Packit df99a1
//C- 
Packit df99a1
//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
Packit df99a1
//C- Lizardtech Software.  Lizardtech Software has authorized us to
Packit df99a1
//C- replace the original DjVu(r) Reference Library notice by the following
Packit df99a1
//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
Packit df99a1
//C-
Packit df99a1
//C-  ------------------------------------------------------------------
Packit df99a1
//C- | DjVu (r) Reference Library (v. 3.5)
Packit df99a1
//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
Packit df99a1
//C- | The DjVu Reference Library is protected by U.S. Pat. No.
Packit df99a1
//C- | 6,058,214 and patents pending.
Packit df99a1
//C- |
Packit df99a1
//C- | This software is subject to, and may be distributed under, the
Packit df99a1
//C- | GNU General Public License, either Version 2 of the license,
Packit df99a1
//C- | or (at your option) any later version. The license should have
Packit df99a1
//C- | accompanied the software or you may obtain a copy of the license
Packit df99a1
//C- | from the Free Software Foundation at http://www.fsf.org .
Packit df99a1
//C- |
Packit df99a1
//C- | The computer code originally released by LizardTech under this
Packit df99a1
//C- | license and unmodified by other parties is deemed "the LIZARDTECH
Packit df99a1
//C- | ORIGINAL CODE."  Subject to any third party intellectual property
Packit df99a1
//C- | claims, LizardTech grants recipient a worldwide, royalty-free, 
Packit df99a1
//C- | non-exclusive license to make, use, sell, or otherwise dispose of 
Packit df99a1
//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the 
Packit df99a1
//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU 
Packit df99a1
//C- | General Public License.   This grant only confers the right to 
Packit df99a1
//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to 
Packit df99a1
//C- | the extent such infringement is reasonably necessary to enable 
Packit df99a1
//C- | recipient to make, have made, practice, sell, or otherwise dispose 
Packit df99a1
//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to 
Packit df99a1
//C- | any greater extent that may be necessary to utilize further 
Packit df99a1
//C- | modifications or combinations.
Packit df99a1
//C- |
Packit df99a1
//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
Packit df99a1
//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
Packit df99a1
//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
Packit df99a1
//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
Packit df99a1
//C- +------------------------------------------------------------------
Packit df99a1
Packit df99a1
#ifdef HAVE_CONFIG_H
Packit df99a1
# include "config.h"
Packit df99a1
#endif
Packit df99a1
#if NEED_GNUG_PRAGMAS
Packit df99a1
# pragma implementation
Packit df99a1
#endif
Packit df99a1
Packit df99a1
// From: Leon Bottou, 1/31/2002
Packit df99a1
// This is purely Lizardtech stuff.
Packit df99a1
Packit df99a1
#include "XMLTags.h"
Packit df99a1
#include "UnicodeByteStream.h"
Packit df99a1
#include <ctype.h>
Packit df99a1
#if HAS_WCTYPE
Packit df99a1
#include <wctype.h>
Packit df99a1
#endif
Packit df99a1
Packit df99a1
Packit df99a1
#ifdef HAVE_NAMESPACES
Packit df99a1
namespace DJVU {
Packit df99a1
# ifdef NOT_DEFINED // Just to fool emacs c++ mode
Packit df99a1
}
Packit df99a1
#endif
Packit df99a1
#endif
Packit df99a1
Packit df99a1
lt_XMLContents::lt_XMLContents(void) {}
Packit df99a1
Packit df99a1
lt_XMLContents::lt_XMLContents(GP<lt_XMLTags> t)
Packit df99a1
{
Packit df99a1
  tag=t;
Packit df99a1
}
Packit df99a1
Packit df99a1
static GUTF8String
Packit df99a1
getargn(char const tag[], char const *&t)
Packit df99a1
{
Packit df99a1
  char const *s;
Packit df99a1
  for(s=tag;isspace(*s);s++);
Packit df99a1
  for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&((*t)!='=')&&!isspace(*t);++t);
Packit df99a1
  return GUTF8String(s,t-s);
Packit df99a1
}
Packit df99a1
Packit df99a1
static GUTF8String
Packit df99a1
getargv(char const tag[], char const *&t)
Packit df99a1
{
Packit df99a1
  GUTF8String retval;
Packit df99a1
  if(tag && tag[0] == '=')
Packit df99a1
  {
Packit df99a1
    char const *s=t=tag+1;
Packit df99a1
    if((*t == '"')||(*t == '\47'))
Packit df99a1
    {
Packit df99a1
      char const q=*(t++);
Packit df99a1
      for(s++;(*t)&&((*t)!=q)&&((*t)!='>');++t);
Packit df99a1
      retval=GUTF8String(s,t-s);
Packit df99a1
      if (t[0] == q)
Packit df99a1
      {
Packit df99a1
        ++t;
Packit df99a1
      }
Packit df99a1
    }else
Packit df99a1
    {
Packit df99a1
      for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&!isspace(*t);++t);
Packit df99a1
      retval=GUTF8String(s,t-s);
Packit df99a1
    }
Packit df99a1
  }else
Packit df99a1
  {
Packit df99a1
    t=tag;
Packit df99a1
  }
Packit df99a1
  return retval;
Packit df99a1
}
Packit df99a1
Packit df99a1
static GUTF8String
Packit df99a1
tagtoname(char const tag[],char const *&t)
Packit df99a1
{
Packit df99a1
  char const *s;
Packit df99a1
  for(s=tag;isspace(*s);s++);
Packit df99a1
  for(t=s;(*t)&&((*t)!='>')&&((*t)!='/')&&!isspace(*t);++t);
Packit df99a1
  return GUTF8String(s,t-s);
Packit df99a1
}
Packit df99a1
Packit df99a1
static inline GUTF8String
Packit df99a1
tagtoname(char const tag[])
Packit df99a1
{
Packit df99a1
  char const *t;
Packit df99a1
  return tagtoname(tag,t);
Packit df99a1
}
Packit df99a1
Packit df99a1
static inline bool
Packit df99a1
isspaces(const GUTF8String &raw)
Packit df99a1
{
Packit df99a1
  return (raw.nextNonSpace() == (int)raw.length());
Packit df99a1
}
Packit df99a1
Packit df99a1
void
Packit df99a1
lt_XMLTags::ParseValues(char const *t, GMap<GUTF8String,GUTF8String> &args,bool downcase)
Packit df99a1
{
Packit df99a1
  GUTF8String argn;
Packit df99a1
  char const *tt;
Packit df99a1
  while((argn=getargn(t,tt)).length())
Packit df99a1
  {
Packit df99a1
    if(downcase)
Packit df99a1
      argn=argn.downcase();
Packit df99a1
    args[argn]=getargv(tt,t).fromEscaped();
Packit df99a1
  }
Packit df99a1
}
Packit df99a1
Packit df99a1
lt_XMLTags::~lt_XMLTags() {}
Packit df99a1
Packit df99a1
lt_XMLTags::lt_XMLTags(void) : startline(0) {}
Packit df99a1
Packit df99a1
lt_XMLTags::lt_XMLTags(const char n[]) : startline(0)
Packit df99a1
{
Packit df99a1
  char const *t;
Packit df99a1
  name=tagtoname(n,t);
Packit df99a1
  ParseValues(t,args);
Packit df99a1
}
Packit df99a1
Packit df99a1
void
Packit df99a1
lt_XMLTags::init(const GP<ByteStream> &bs)
Packit df99a1
{
Packit df99a1
  GP<XMLByteStream> gxmlbs=XMLByteStream::create(bs);
Packit df99a1
  init(*gxmlbs);
Packit df99a1
}
Packit df99a1
Packit df99a1
void
Packit df99a1
lt_XMLTags::init(const GURL &url)
Packit df99a1
{
Packit df99a1
  const GP<ByteStream> bs=ByteStream::create(url,"rb");
Packit df99a1
  init(bs);
Packit df99a1
}
Packit df99a1
Packit df99a1
void
Packit df99a1
lt_XMLTags::init(XMLByteStream &xmlbs)
Packit df99a1
{
Packit df99a1
  if(!get_count())
Packit df99a1
  {
Packit df99a1
    G_THROW( ERR_MSG("XMLTags.no_GP") );
Packit df99a1
  }
Packit df99a1
  GPList<lt_XMLTags> level;
Packit df99a1
  GUTF8String tag,raw(xmlbs.gets(0,'<',false));
Packit df99a1
  int linesread=xmlbs.get_lines_read();
Packit df99a1
  if(!isspaces(raw))
Packit df99a1
  {
Packit df99a1
    G_THROW( (ERR_MSG("XMLTags.raw_string") "\t")+raw);
Packit df99a1
  }
Packit df99a1
  GUTF8String encoding;
Packit df99a1
  for(int len;(len=(tag=xmlbs.gets(0,'>',true)).length());)
Packit df99a1
  {
Packit df99a1
    if(tag[len-1] != '>')
Packit df99a1
    {
Packit df99a1
      G_THROW((ERR_MSG("XMLTags.bad_tag") "\t")+tag);
Packit df99a1
    }
Packit df99a1
    switch(tag[1])
Packit df99a1
    {
Packit df99a1
      case '?':
Packit df99a1
      {
Packit df99a1
        while(len < 4 || tag.substr(len-2,len) != "?>")
Packit df99a1
        {
Packit df99a1
          GUTF8String cont(xmlbs.gets(0,'>',true));
Packit df99a1
          if(!cont.length())
Packit df99a1
          { 
Packit df99a1
            G_THROW( (ERR_MSG("XMLTags.bad_PI") "\t")+tag);
Packit df99a1
          }
Packit df99a1
          len=((tag+=cont).length());
Packit df99a1
        }
Packit df99a1
        char const *n;
Packit df99a1
        GUTF8String xtag = tag.substr(2,-1);
Packit df99a1
        GUTF8String xname = tagtoname(xtag,n);
Packit df99a1
        if(xname.downcase() == "xml")
Packit df99a1
        {
Packit df99a1
          ParseValues(n,args);
Packit df99a1
          for(GPosition pos=args;pos;++pos)
Packit df99a1
          {
Packit df99a1
            if(args.key(pos) == "encoding")
Packit df99a1
            {
Packit df99a1
              const GUTF8String e=args[pos].upcase();
Packit df99a1
              if(e != encoding)
Packit df99a1
              {
Packit df99a1
                xmlbs.set_encoding((encoding=e));
Packit df99a1
              }
Packit df99a1
            }
Packit df99a1
          }
Packit df99a1
        }
Packit df99a1
        break;
Packit df99a1
      }
Packit df99a1
      case '!':
Packit df99a1
      {
Packit df99a1
        if(tag[2] == '-' && tag[3] == '-')
Packit df99a1
        {
Packit df99a1
          while((len < 7) ||
Packit df99a1
            (tag.substr(len-3,-1) != "-->"))
Packit df99a1
          {
Packit df99a1
            GUTF8String cont(xmlbs.gets(0,'>',true));
Packit df99a1
            if(!cont.length())
Packit df99a1
            { 
Packit df99a1
              GUTF8String mesg;
Packit df99a1
              mesg.format( ERR_MSG("XMLTags.bad_comment") "\t%s",(const char *)tag);
Packit df99a1
              G_THROW(mesg);
Packit df99a1
            }
Packit df99a1
            len=((tag+=cont).length());
Packit df99a1
          }
Packit df99a1
        }
Packit df99a1
        break;
Packit df99a1
      }
Packit df99a1
      case '/':
Packit df99a1
      {
Packit df99a1
        GUTF8String xname=tagtoname(tag.substr(2,-1));
Packit df99a1
        GPosition last=level.lastpos();
Packit df99a1
        if(last)
Packit df99a1
        {
Packit df99a1
          if(level[last]->name != xname)
Packit df99a1
          {
Packit df99a1
            G_THROW( (ERR_MSG("XMLTags.unmatched_end") "\t")
Packit df99a1
              +level[last]->name+("\t"+GUTF8String(level[last]->get_Line()))
Packit df99a1
              +("\t"+xname)+("\t"+GUTF8String(linesread+1)));
Packit df99a1
          }
Packit df99a1
          level.del(last);
Packit df99a1
        }else
Packit df99a1
        {
Packit df99a1
          G_THROW( ERR_MSG("XMLTags.bad_form") );
Packit df99a1
        }
Packit df99a1
        break;
Packit df99a1
      }
Packit df99a1
      default:
Packit df99a1
      {
Packit df99a1
        GPosition last=level.lastpos();
Packit df99a1
        GP<lt_XMLTags> t;
Packit df99a1
        if(last)
Packit df99a1
        {
Packit df99a1
          t=new lt_XMLTags(tag.substr(1,len-1));
Packit df99a1
          level[last]->addtag(t);
Packit df99a1
          if(tag[len-2] != '/')
Packit df99a1
          {
Packit df99a1
            level.append(t);
Packit df99a1
          }
Packit df99a1
        }else if(tag[len-2] != '/')
Packit df99a1
        {
Packit df99a1
          char const *n;
Packit df99a1
          GUTF8String xtag = tag.substr(1,-1); 
Packit df99a1
          name=tagtoname(xtag, n);
Packit df99a1
          ParseValues(n,args);
Packit df99a1
          t=this;
Packit df99a1
          level.append(t);
Packit df99a1
        }else
Packit df99a1
        {
Packit df99a1
          G_THROW( ERR_MSG("XMLTags.no_body") );
Packit df99a1
        }
Packit df99a1
        t->set_Line(linesread+1);
Packit df99a1
        break;
Packit df99a1
      }
Packit df99a1
    }
Packit df99a1
    if((raw=xmlbs.gets(0,'<',false))[0])
Packit df99a1
    { 
Packit df99a1
      linesread=xmlbs.get_lines_read();
Packit df99a1
      GPosition last=level.lastpos();
Packit df99a1
      if(last)
Packit df99a1
      {
Packit df99a1
        level[last]->addraw(raw);
Packit df99a1
      }else if(!isspaces(raw))
Packit df99a1
      {
Packit df99a1
        G_THROW(( ERR_MSG("XMLTags.raw_string") "\t")+raw);
Packit df99a1
      }
Packit df99a1
    }
Packit df99a1
  }
Packit df99a1
}
Packit df99a1
Packit df99a1
GPList<lt_XMLTags>
Packit df99a1
lt_XMLTags::get_Tags(char const tagname[]) const
Packit df99a1
{
Packit df99a1
  GPosition pos=allTags.contains(tagname);
Packit df99a1
  GPList<lt_XMLTags> retval;
Packit df99a1
  return (pos?allTags[pos]:retval);
Packit df99a1
}
Packit df99a1
Packit df99a1
void
Packit df99a1
lt_XMLTags::get_Maps(char const tagname[],
Packit df99a1
                     char const argn[],
Packit df99a1
                     GPList<lt_XMLTags> list,
Packit df99a1
                     GMap<GUTF8String, GP<lt_XMLTags> > &map)
Packit df99a1
{
Packit df99a1
  for(GPosition pos=list;pos;++pos)
Packit df99a1
  {
Packit df99a1
    GP<lt_XMLTags> &tag=list[pos];
Packit df99a1
    if(tag)
Packit df99a1
    {
Packit df99a1
      GPosition loc;
Packit df99a1
      if((loc=tag->contains(tagname)))
Packit df99a1
      {
Packit df99a1
        GPList<lt_XMLTags> maps=(GPList<lt_XMLTags> &)((*tag)[loc]);
Packit df99a1
        for(GPosition mloc=maps;mloc;++mloc)
Packit df99a1
        {
Packit df99a1
          GP<lt_XMLTags> gtag=maps[mloc];
Packit df99a1
          if(gtag)
Packit df99a1
          {
Packit df99a1
            GMap<GUTF8String,GUTF8String> &args=gtag->args;
Packit df99a1
            GPosition gpos;
Packit df99a1
            if((gpos=args.contains(argn)))
Packit df99a1
            {
Packit df99a1
              map[args[gpos]]=gtag;
Packit df99a1
            }
Packit df99a1
          }
Packit df99a1
        }
Packit df99a1
      }
Packit df99a1
    }
Packit df99a1
  }
Packit df99a1
}
Packit df99a1
Packit df99a1
void
Packit df99a1
lt_XMLTags::write(ByteStream &bs,bool const top) const
Packit df99a1
{
Packit df99a1
  if(name.length())
Packit df99a1
  {
Packit df99a1
    GUTF8String tag="<"+name;
Packit df99a1
    for(GPosition pos=args;pos;++pos)
Packit df99a1
    {
Packit df99a1
      tag+=GUTF8String(' ')+args.key(pos)+GUTF8String("=\42")+args[pos].toEscaped()+GUTF8String("\42");
Packit df99a1
    }
Packit df99a1
    GPosition tags=content;
Packit df99a1
    if(tags||raw.length()) 
Packit df99a1
    {
Packit df99a1
      tag+=">";
Packit df99a1
      bs.writall((const char *)tag,tag.length());
Packit df99a1
      tag="</"+name+">";
Packit df99a1
      if(raw.length())
Packit df99a1
      {
Packit df99a1
        bs.writestring(raw);
Packit df99a1
      }
Packit df99a1
      for(;tags;++tags)
Packit df99a1
      {
Packit df99a1
        content[tags].write(bs);
Packit df99a1
      }
Packit df99a1
    }else if(!raw.length())
Packit df99a1
    {
Packit df99a1
      tag+="/>";
Packit df99a1
    }
Packit df99a1
    bs.writall((const char *)tag,tag.length());
Packit df99a1
  }
Packit df99a1
  if(top)
Packit df99a1
  {
Packit df99a1
     bs.writall("\n",1);
Packit df99a1
  }
Packit df99a1
}
Packit df99a1
Packit df99a1
void
Packit df99a1
lt_XMLContents::write(ByteStream &bs) const
Packit df99a1
{
Packit df99a1
  if(tag)
Packit df99a1
  {
Packit df99a1
    tag->write(bs,false);
Packit df99a1
  }
Packit df99a1
  if(raw.length())
Packit df99a1
  {
Packit df99a1
    bs.writestring(raw);
Packit df99a1
  } 
Packit df99a1
}
Packit df99a1
Packit df99a1
Packit df99a1
#ifdef HAVE_NAMESPACES
Packit df99a1
}
Packit df99a1
# ifndef NOT_USING_DJVU_NAMESPACE
Packit df99a1
using namespace DJVU;
Packit df99a1
# endif
Packit df99a1
#endif