Blob Blame History Raw
//C-  -*- C++ -*-
//C- -------------------------------------------------------------------
//C- DjVuLibre-3.5
//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
//C- Copyright (c) 2001  AT&T
//C-
//C- This software is subject to, and may be distributed under, the
//C- GNU General Public License, either Version 2 of the license,
//C- or (at your option) any later version. The license should have
//C- accompanied the software or you may obtain a copy of the license
//C- from the Free Software Foundation at http://www.fsf.org .
//C-
//C- This program is distributed in the hope that it will be useful,
//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//C- GNU General Public License for more details.
//C- 
//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
//C- Lizardtech Software.  Lizardtech Software has authorized us to
//C- replace the original DjVu(r) Reference Library notice by the following
//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
//C-
//C-  ------------------------------------------------------------------
//C- | DjVu (r) Reference Library (v. 3.5)
//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
//C- | The DjVu Reference Library is protected by U.S. Pat. No.
//C- | 6,058,214 and patents pending.
//C- |
//C- | This software is subject to, and may be distributed under, the
//C- | GNU General Public License, either Version 2 of the license,
//C- | or (at your option) any later version. The license should have
//C- | accompanied the software or you may obtain a copy of the license
//C- | from the Free Software Foundation at http://www.fsf.org .
//C- |
//C- | The computer code originally released by LizardTech under this
//C- | license and unmodified by other parties is deemed "the LIZARDTECH
//C- | ORIGINAL CODE."  Subject to any third party intellectual property
//C- | claims, LizardTech grants recipient a worldwide, royalty-free, 
//C- | non-exclusive license to make, use, sell, or otherwise dispose of 
//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the 
//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU 
//C- | General Public License.   This grant only confers the right to 
//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to 
//C- | the extent such infringement is reasonably necessary to enable 
//C- | recipient to make, have made, practice, sell, or otherwise dispose 
//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to 
//C- | any greater extent that may be necessary to utilize further 
//C- | modifications or combinations.
//C- |
//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
//C- +------------------------------------------------------------------

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#if NEED_GNUG_PRAGMAS
# pragma implementation
#endif

// From: Leon Bottou, 1/31/2002
// This is purely Lizardtech stuff.

#include "XMLTags.h"
#include "UnicodeByteStream.h"
#include <ctype.h>
#if HAS_WCTYPE
#include <wctype.h>
#endif


#ifdef HAVE_NAMESPACES
namespace DJVU {
# ifdef NOT_DEFINED // Just to fool emacs c++ mode
}
#endif
#endif

lt_XMLContents::lt_XMLContents(void) {}

lt_XMLContents::lt_XMLContents(GP<lt_XMLTags> t)
{
  tag=t;
}

static GUTF8String
getargn(char const tag[], char const *&t)
{
  char const *s;
  for(s=tag;isspace(*s);s++);
  for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&((*t)!='=')&&!isspace(*t);++t);
  return GUTF8String(s,t-s);
}

static GUTF8String
getargv(char const tag[], char const *&t)
{
  GUTF8String retval;
  if(tag && tag[0] == '=')
  {
    char const *s=t=tag+1;
    if((*t == '"')||(*t == '\47'))
    {
      char const q=*(t++);
      for(s++;(*t)&&((*t)!=q)&&((*t)!='>');++t);
      retval=GUTF8String(s,t-s);
      if (t[0] == q)
      {
        ++t;
      }
    }else
    {
      for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&!isspace(*t);++t);
      retval=GUTF8String(s,t-s);
    }
  }else
  {
    t=tag;
  }
  return retval;
}

static GUTF8String
tagtoname(char const tag[],char const *&t)
{
  char const *s;
  for(s=tag;isspace(*s);s++);
  for(t=s;(*t)&&((*t)!='>')&&((*t)!='/')&&!isspace(*t);++t);
  return GUTF8String(s,t-s);
}

static inline GUTF8String
tagtoname(char const tag[])
{
  char const *t;
  return tagtoname(tag,t);
}

static inline bool
isspaces(const GUTF8String &raw)
{
  return (raw.nextNonSpace() == (int)raw.length());
}

void
lt_XMLTags::ParseValues(char const *t, GMap<GUTF8String,GUTF8String> &args,bool downcase)
{
  GUTF8String argn;
  char const *tt;
  while((argn=getargn(t,tt)).length())
  {
    if(downcase)
      argn=argn.downcase();
    args[argn]=getargv(tt,t).fromEscaped();
  }
}

lt_XMLTags::~lt_XMLTags() {}

lt_XMLTags::lt_XMLTags(void) : startline(0) {}

lt_XMLTags::lt_XMLTags(const char n[]) : startline(0)
{
  char const *t;
  name=tagtoname(n,t);
  ParseValues(t,args);
}

void
lt_XMLTags::init(const GP<ByteStream> &bs)
{
  GP<XMLByteStream> gxmlbs=XMLByteStream::create(bs);
  init(*gxmlbs);
}

void
lt_XMLTags::init(const GURL &url)
{
  const GP<ByteStream> bs=ByteStream::create(url,"rb");
  init(bs);
}

void
lt_XMLTags::init(XMLByteStream &xmlbs)
{
  if(!get_count())
  {
    G_THROW( ERR_MSG("XMLTags.no_GP") );
  }
  GPList<lt_XMLTags> level;
  GUTF8String tag,raw(xmlbs.gets(0,'<',false));
  int linesread=xmlbs.get_lines_read();
  if(!isspaces(raw))
  {
    G_THROW( (ERR_MSG("XMLTags.raw_string") "\t")+raw);
  }
  GUTF8String encoding;
  for(int len;(len=(tag=xmlbs.gets(0,'>',true)).length());)
  {
    if(tag[len-1] != '>')
    {
      G_THROW((ERR_MSG("XMLTags.bad_tag") "\t")+tag);
    }
    switch(tag[1])
    {
      case '?':
      {
        while(len < 4 || tag.substr(len-2,len) != "?>")
        {
          GUTF8String cont(xmlbs.gets(0,'>',true));
          if(!cont.length())
          { 
            G_THROW( (ERR_MSG("XMLTags.bad_PI") "\t")+tag);
          }
          len=((tag+=cont).length());
        }
        char const *n;
        GUTF8String xtag = tag.substr(2,-1);
        GUTF8String xname = tagtoname(xtag,n);
        if(xname.downcase() == "xml")
        {
          ParseValues(n,args);
          for(GPosition pos=args;pos;++pos)
          {
            if(args.key(pos) == "encoding")
            {
              const GUTF8String e=args[pos].upcase();
              if(e != encoding)
              {
                xmlbs.set_encoding((encoding=e));
              }
            }
          }
        }
        break;
      }
      case '!':
      {
        if(tag[2] == '-' && tag[3] == '-')
        {
          while((len < 7) ||
            (tag.substr(len-3,-1) != "-->"))
          {
            GUTF8String cont(xmlbs.gets(0,'>',true));
            if(!cont.length())
            { 
              GUTF8String mesg;
              mesg.format( ERR_MSG("XMLTags.bad_comment") "\t%s",(const char *)tag);
              G_THROW(mesg);
            }
            len=((tag+=cont).length());
          }
        }
        break;
      }
      case '/':
      {
        GUTF8String xname=tagtoname(tag.substr(2,-1));
        GPosition last=level.lastpos();
        if(last)
        {
          if(level[last]->name != xname)
          {
            G_THROW( (ERR_MSG("XMLTags.unmatched_end") "\t")
              +level[last]->name+("\t"+GUTF8String(level[last]->get_Line()))
              +("\t"+xname)+("\t"+GUTF8String(linesread+1)));
          }
          level.del(last);
        }else
        {
          G_THROW( ERR_MSG("XMLTags.bad_form") );
        }
        break;
      }
      default:
      {
        GPosition last=level.lastpos();
        GP<lt_XMLTags> t;
        if(last)
        {
          t=new lt_XMLTags(tag.substr(1,len-1));
          level[last]->addtag(t);
          if(tag[len-2] != '/')
          {
            level.append(t);
          }
        }else if(tag[len-2] != '/')
        {
          char const *n;
          GUTF8String xtag = tag.substr(1,-1); 
          name=tagtoname(xtag, n);
          ParseValues(n,args);
          t=this;
          level.append(t);
        }else
        {
          G_THROW( ERR_MSG("XMLTags.no_body") );
        }
        t->set_Line(linesread+1);
        break;
      }
    }
    if((raw=xmlbs.gets(0,'<',false))[0])
    { 
      linesread=xmlbs.get_lines_read();
      GPosition last=level.lastpos();
      if(last)
      {
        level[last]->addraw(raw);
      }else if(!isspaces(raw))
      {
        G_THROW(( ERR_MSG("XMLTags.raw_string") "\t")+raw);
      }
    }
  }
}

GPList<lt_XMLTags>
lt_XMLTags::get_Tags(char const tagname[]) const
{
  GPosition pos=allTags.contains(tagname);
  GPList<lt_XMLTags> retval;
  return (pos?allTags[pos]:retval);
}

void
lt_XMLTags::get_Maps(char const tagname[],
                     char const argn[],
                     GPList<lt_XMLTags> list,
                     GMap<GUTF8String, GP<lt_XMLTags> > &map)
{
  for(GPosition pos=list;pos;++pos)
  {
    GP<lt_XMLTags> &tag=list[pos];
    if(tag)
    {
      GPosition loc;
      if((loc=tag->contains(tagname)))
      {
        GPList<lt_XMLTags> maps=(GPList<lt_XMLTags> &)((*tag)[loc]);
        for(GPosition mloc=maps;mloc;++mloc)
        {
          GP<lt_XMLTags> gtag=maps[mloc];
          if(gtag)
          {
            GMap<GUTF8String,GUTF8String> &args=gtag->args;
            GPosition gpos;
            if((gpos=args.contains(argn)))
            {
              map[args[gpos]]=gtag;
            }
          }
        }
      }
    }
  }
}

void
lt_XMLTags::write(ByteStream &bs,bool const top) const
{
  if(name.length())
  {
    GUTF8String tag="<"+name;
    for(GPosition pos=args;pos;++pos)
    {
      tag+=GUTF8String(' ')+args.key(pos)+GUTF8String("=\42")+args[pos].toEscaped()+GUTF8String("\42");
    }
    GPosition tags=content;
    if(tags||raw.length()) 
    {
      tag+=">";
      bs.writall((const char *)tag,tag.length());
      tag="</"+name+">";
      if(raw.length())
      {
        bs.writestring(raw);
      }
      for(;tags;++tags)
      {
        content[tags].write(bs);
      }
    }else if(!raw.length())
    {
      tag+="/>";
    }
    bs.writall((const char *)tag,tag.length());
  }
  if(top)
  {
     bs.writall("\n",1);
  }
}

void
lt_XMLContents::write(ByteStream &bs) const
{
  if(tag)
  {
    tag->write(bs,false);
  }
  if(raw.length())
  {
    bs.writestring(raw);
  } 
}


#ifdef HAVE_NAMESPACES
}
# ifndef NOT_USING_DJVU_NAMESPACE
using namespace DJVU;
# endif
#endif