Blob Blame History Raw
//C-  -*- C++ -*-
//C- -------------------------------------------------------------------
//C- DjVuLibre-3.5
//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
//C- Copyright (c) 2001  AT&T
//C-
//C- This software is subject to, and may be distributed under, the
//C- GNU General Public License, either Version 2 of the license,
//C- or (at your option) any later version. The license should have
//C- accompanied the software or you may obtain a copy of the license
//C- from the Free Software Foundation at http://www.fsf.org .
//C-
//C- This program is distributed in the hope that it will be useful,
//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//C- GNU General Public License for more details.
//C- 
//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
//C- Lizardtech Software.  Lizardtech Software has authorized us to
//C- replace the original DjVu(r) Reference Library notice by the following
//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
//C-
//C-  ------------------------------------------------------------------
//C- | DjVu (r) Reference Library (v. 3.5)
//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
//C- | The DjVu Reference Library is protected by U.S. Pat. No.
//C- | 6,058,214 and patents pending.
//C- |
//C- | This software is subject to, and may be distributed under, the
//C- | GNU General Public License, either Version 2 of the license,
//C- | or (at your option) any later version. The license should have
//C- | accompanied the software or you may obtain a copy of the license
//C- | from the Free Software Foundation at http://www.fsf.org .
//C- |
//C- | The computer code originally released by LizardTech under this
//C- | license and unmodified by other parties is deemed "the LIZARDTECH
//C- | ORIGINAL CODE."  Subject to any third party intellectual property
//C- | claims, LizardTech grants recipient a worldwide, royalty-free, 
//C- | non-exclusive license to make, use, sell, or otherwise dispose of 
//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the 
//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU 
//C- | General Public License.   This grant only confers the right to 
//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to 
//C- | the extent such infringement is reasonably necessary to enable 
//C- | recipient to make, have made, practice, sell, or otherwise dispose 
//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to 
//C- | any greater extent that may be necessary to utilize further 
//C- | modifications or combinations.
//C- |
//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
//C- +------------------------------------------------------------------

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#if NEED_GNUG_PRAGMAS
# pragma implementation
#endif

#include "DjVuDocument.h"
#include "DjVmDir.h"
#include "ByteStream.h"
#include "IFFByteStream.h"
#include "DjVuText.h"
#include "DjVuImage.h"
#include "GString.h"
#include "GOS.h"
#include "GURL.h"
#include "DjVuMessage.h"

#include "common.h"

#include <sys/stat.h>
#include <time.h>


static bool cgi = false;
static bool head = false;


struct DJVUSERVEGlobal 
{
  // Globals that need static initialization
  // are grouped here to work around broken compilers.
  GUTF8String pathinfo;
  GUTF8String pathtranslated;
  GUTF8String requestmethod;
  GUTF8String querystring;
};

static DJVUSERVEGlobal& g(void)
{
  static DJVUSERVEGlobal g;
  return g;
}


static void
usage(void)
{
   DjVuPrintErrorUTF8(
#ifdef DJVULIBRE_VERSION
          "DJVUSERVE --- DjVuLibre-" DJVULIBRE_VERSION "\n"
#endif
          "Extracts hidden text from Djvu files\n"
          "\n"
          "Usage: djvuserve [<djvufile>[/<djvmid>]\n"
          "Outputs the specified <djvufile> with valid Content-Type,\n"
          "Content-Length, and Expire HTTP headers.  Bundled multipage DjVu\n"
          "documents are accessed as indirect document using the <djvmid>\n"
          "syntax. Specifying a <djvmid> of <index> generates an indirect page\n"
          "directory pointing to the other component files.\n"
          "This program is designed to be used as a CGI executable.\n"
          "It uses environment variable PATH_TRANSLATED when executed\n"
          "without arguments.\n\n" );
   exit(10);
}

static const char * 
day_name(int d)
{
   static const char *n[] = {
     "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" 
   };
   if (d>=0 && d<7)
     return n[d];
   return "???";
}

static const char* 
month_name(int d)
{
   static const char *n[] = {
     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 
   };
   if (d>=0 && d<12)
     return n[d];
   return "???";
}

void
fprintdate(FILE *f, const char *fmt, const time_t *tim)
{
  char ctim[128];
  struct tm *ttim = gmtime(tim);
  /* strftime(ctim, sizeof(ctim)-1, "%a, %d %b %Y %H:%M:%S GMT", ttim); */
  sprintf(ctim,"%3s, %02d %3s %04d %02d:%02d:%02d GMT",
	  day_name(ttim->tm_wday), ttim->tm_mday,
	  month_name(ttim->tm_mon), 1900+ttim->tm_year,
	  ttim->tm_hour, ttim->tm_min, ttim->tm_sec);
  fprintf(stdout, fmt, ctim);
}

void
headers(const struct stat *statbuf, const char *fname = 0)
{
  fprintf(stdout,"Content-Type: image/x.djvu\n");
  if (fname)
    fprintf(stdout,"Content-Disposition: attachment; filename=\"%s\"\n", fname);
  fprintf(stdout,"Content-Length: %ld\n", (long)statbuf->st_size);
  time_t tim = time(0) + 360 * 24 * 3600;
  fprintdate(stdout, "Last-Modified: %s\n", &statbuf->st_mtime);
  fprintdate(stdout, "Expires: %s\n", &tim);
}

bool
is_djvu_file_bundled(GURL &pathurl)
{
  GP<ByteStream> in = ByteStream::create(pathurl,"rb");
  GP<IFFByteStream> iff = IFFByteStream::create(in);
  GUTF8String chkid;
  iff->get_chunk(chkid);
  // Make sure that this is a DjVu file.
  if (chkid != "FORM:DJVU" &&
      chkid != "FORM:DJVM" &&
      chkid != "FORM:PM44" &&
      chkid != "FORM:BM44"   )
    G_THROW("Corrupted DjVu file");
  // Test if it is bundled
  if (chkid == "FORM:DJVM")
    {
      while (iff->get_chunk(chkid) && chkid!="DIRM")
        iff->close_chunk();
      if (chkid == "DIRM")
        {
          GP<ByteStream> dirm = iff->get_bytestream();
          if (dirm->read8() & 0x80)
            return true;
        }
    }
  return false;
}

void 
djvuserver_file(GURL pathurl, bool bundled, bool download)
{
  GNativeString fname = pathurl.NativeFilename();
  struct stat statbuf;
  if (stat((const char *)fname, &statbuf) < 0)
    G_THROW(strerror(errno));
  
  // Is this a bundled file?
  if (is_djvu_file_bundled(pathurl) && !bundled)
    {
      // It is bundled
      GUTF8String id = pathurl.name();
      fprintf(stdout,"Location: %s/index.djvu", (const char*)id);
      if (g().querystring.length())
        fprintf(stdout,"?%s", (const char*)g().querystring);
      fprintf(stdout,"\n\n");
      return;
    }
  // Push the file
  if (download)
    headers(&statbuf, pathurl.fname());
  else
    headers(&statbuf);
  if (head) 
    return;
  fprintf(stdout,"\n");
  fflush(stdout);
  GP<ByteStream> in = ByteStream::create(pathurl,"rb");
  GP<ByteStream> out = ByteStream::get_stdout("ab");
  out->copy(*in);
}

void 
djvuserver_directory(GURL pathurl)
{
  GNativeString fname = pathurl.NativeFilename();
  struct stat statbuf;
  if (stat((const char *)fname, &statbuf) < 0)
    G_THROW(strerror(errno));
  // Find the DIRM chunk directly (save time)
  GP<ByteStream> temp;
  GP<ByteStream> bsin = ByteStream::create(pathurl,"rb");
  GP<DjVmDir> dir = DjVmDir::create();
  {
    GP<IFFByteStream> iffin = IFFByteStream::create(bsin);
    GUTF8String chkid;
    iffin->get_chunk(chkid);
    if (chkid != "FORM:DJVM")
      G_THROW( "This is not a multipage DjVu document" );
    while (iffin->get_chunk(chkid) && chkid!="DIRM")
      iffin->close_chunk();
    if (chkid != "DIRM")
      G_THROW( "This is not a new style bundled DjVu document" );
    temp = iffin->get_bytestream();
    dir->decode(temp);
    if (! dir->is_bundled())
      G_THROW( "This is not a bundled DjVu document" );
  }
  // Assemble index of indirect multipage file
  GP<ByteStream> bsdir = ByteStream::create();
  {
    GP<IFFByteStream> iff = IFFByteStream::create(bsdir);
    iff->put_chunk("FORM:DJVM",1);
    iff->put_chunk("DIRM");
    temp = iff->get_bytestream();
    dir->encode(temp, false, false);
    iff->close_chunk();
    iff->close_chunk();
  }
  // HTTP output
  statbuf.st_size = bsdir->tell();
  headers(&statbuf);
  if (head) 
    return;
  bsdir->seek(0);
  fprintf(stdout,"\n");
  fflush(stdout);
  GP<ByteStream> out = ByteStream::get_stdout("ab");
  out->copy(*bsdir);
}

void 
djvuserver_component(GURL pathurl, GUTF8String id)
{
  GNativeString fname = pathurl.NativeFilename();
  struct stat statbuf;
  if (stat((const char *)fname, &statbuf) < 0)
    G_THROW(strerror(errno));
  // Find the DIRM chunk directly (save time)
  GP<ByteStream> temp;
  GP<ByteStream> bsin = ByteStream::create(pathurl,"rb");
  GP<DjVmDir> dir = DjVmDir::create();
  {
    GP<IFFByteStream> iffin = IFFByteStream::create(bsin);
    GUTF8String chkid;
    iffin->get_chunk(chkid);
    if (chkid != "FORM:DJVM")
      G_THROW( "This is not a multipage DjVu document" );
    while (iffin->get_chunk(chkid) && chkid!="DIRM")
      iffin->close_chunk();
    if (chkid != "DIRM")
      G_THROW( "This is not a new style bundled DjVu document" );
    temp = iffin->get_bytestream();
    dir->decode(temp);
    if (! dir->is_bundled())
      G_THROW( "This is not a bundled DjVu document" );
  }
  // Find the file record
  GP<DjVmDir::File> frec = dir->id_to_file(id);
  if (!frec)
    G_THROW( "Cannot locate requested component file" );
  if (!frec->size || !frec->offset)
    G_THROW( "Corrupted DjVu directory" );

  // HTTP output
  statbuf.st_size = frec->size + 4;
  headers(&statbuf);
  if (head) 
    return;
  fprintf(stdout,"\n");
  fflush(stdout);
  GP<ByteStream> out = ByteStream::get_stdout("ab");
  out->writall("AT&T", 4);
  bsin->seek(frec->offset);
  out->copy(*bsin, frec->size);
}


bool
search_cgi_arg(const char *name)
{
  const char *s = g().querystring;
  int l = strlen(name);
  if (*s == '?')
    s += 1;
  while (*s)
    {
      if (! strncmp(s, name, l))
        if (s[l]=='&' || s[l]=='=' || s[l]==0)
          return true;
      while (*s && *s != '&')
        s += 1;
      if (*s == '&')
        s += 1;
    }
  return false;
}


int
main(int argc, char ** argv)
{
  DJVU_LOCALE;
  G_TRY 
    {
      // Obtain path
      bool bundled = false;
      bool download = false;
      if (argc == 1)
        {
          cgi = true;
          g().pathinfo = GNativeString(getenv("PATH_INFO"));
          g().pathtranslated = GNativeString(getenv("PATH_TRANSLATED"));
          if (! g().pathinfo)
            usage();
          if (! g().pathtranslated)
            G_THROW("No path information");
          g().requestmethod = GNativeString(getenv("REQUEST_METHOD"));
          g().querystring = GUTF8String(getenv("QUERY_STRING"));
          if (search_cgi_arg("bundled"))
            bundled = true;
          if (search_cgi_arg("download") || search_cgi_arg("bundle"))
            bundled = download = true;
        }
      else if (argc == 2)
        {
          cgi = false;
          g().pathtranslated = GNativeString(argv[1]);
          g().requestmethod = "GET";
        }
      if (! g().pathtranslated)
        usage();
      head = false;
      if (g().requestmethod == "HEAD")
        head = true;
      else if (g().requestmethod != "GET")
        G_THROW("Only serve HEAD and GET requests");
      // Do it.
      GURL pathurl = GURL::Filename::UTF8(g().pathtranslated);
      if (pathurl.is_file())
        {
          djvuserver_file(pathurl, bundled, download);
        }
      else
        {
          GUTF8String id = pathurl.name();
          pathurl = pathurl.base();
          if (! pathurl.is_file())
            G_THROW("File not found");
          if (id != "index" && id != "index.djvu")
            djvuserver_component(pathurl, id);
          else if (bundled)
            djvuserver_file(pathurl, bundled, download);
          else
            djvuserver_directory(pathurl);
        }
    }
  G_CATCH(ex)
    {
      if (cgi)
        {
          GUTF8String cause = DjVuMessageLite::LookUpUTF8(ex.get_cause());
          fprintf(stdout,"Status: 400 %s\n", (const char*)cause);
          fprintf(stdout,"Content-Type: text/html\n\n");
          fprintf(stdout,
                  "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"
                  "<HTML><HEAD><TITLE>400 Error</TITLE></HEAD><BODY>\n"
                  "<H1>%s</H1>The requested URL '%s' cannot be processed.<P>\n"
#ifdef DJVULIBRE_VERSION
                  "<HR><ADDRESS>djvuserve/DjVuLibre-" DJVULIBRE_VERSION "</ADDRESS>\n"
#endif
                  "</BODY></HTML>\n",
                  (const char *) cause,
                  (const char *) g().pathinfo );
        }
      else
        {
          ex.perror();
        }
      exit(10);
    }
  G_ENDCATCH;
  // return code zero
  return 0;
}