Blame tools/jb2tune.cpp

Packit df99a1
//C-  -*- C++ -*-
Packit df99a1
//C- -------------------------------------------------------------------
Packit df99a1
//C- DjVuLibre-3.5
Packit df99a1
//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
Packit df99a1
//C- Copyright (c) 2001  AT&T
Packit df99a1
//C-
Packit df99a1
//C- This software is subject to, and may be distributed under, the
Packit df99a1
//C- GNU General Public License, either Version 2 of the license,
Packit df99a1
//C- or (at your option) any later version. The license should have
Packit df99a1
//C- accompanied the software or you may obtain a copy of the license
Packit df99a1
//C- from the Free Software Foundation at http://www.fsf.org .
Packit df99a1
//C-
Packit df99a1
//C- This program is distributed in the hope that it will be useful,
Packit df99a1
//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit df99a1
//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit df99a1
//C- GNU General Public License for more details.
Packit df99a1
//C- 
Packit df99a1
//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
Packit df99a1
//C- Lizardtech Software.  Lizardtech Software has authorized us to
Packit df99a1
//C- replace the original DjVu(r) Reference Library notice by the following
Packit df99a1
//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
Packit df99a1
//C-
Packit df99a1
//C-  ------------------------------------------------------------------
Packit df99a1
//C- | DjVu (r) Reference Library (v. 3.5)
Packit df99a1
//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
Packit df99a1
//C- | The DjVu Reference Library is protected by U.S. Pat. No.
Packit df99a1
//C- | 6,058,214 and patents pending.
Packit df99a1
//C- |
Packit df99a1
//C- | This software is subject to, and may be distributed under, the
Packit df99a1
//C- | GNU General Public License, either Version 2 of the license,
Packit df99a1
//C- | or (at your option) any later version. The license should have
Packit df99a1
//C- | accompanied the software or you may obtain a copy of the license
Packit df99a1
//C- | from the Free Software Foundation at http://www.fsf.org .
Packit df99a1
//C- |
Packit df99a1
//C- | The computer code originally released by LizardTech under this
Packit df99a1
//C- | license and unmodified by other parties is deemed "the LIZARDTECH
Packit df99a1
//C- | ORIGINAL CODE."  Subject to any third party intellectual property
Packit df99a1
//C- | claims, LizardTech grants recipient a worldwide, royalty-free, 
Packit df99a1
//C- | non-exclusive license to make, use, sell, or otherwise dispose of 
Packit df99a1
//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the 
Packit df99a1
//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU 
Packit df99a1
//C- | General Public License.   This grant only confers the right to 
Packit df99a1
//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to 
Packit df99a1
//C- | the extent such infringement is reasonably necessary to enable 
Packit df99a1
//C- | recipient to make, have made, practice, sell, or otherwise dispose 
Packit df99a1
//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to 
Packit df99a1
//C- | any greater extent that may be necessary to utilize further 
Packit df99a1
//C- | modifications or combinations.
Packit df99a1
//C- |
Packit df99a1
//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
Packit df99a1
//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
Packit df99a1
//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
Packit df99a1
//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
Packit df99a1
//C- +------------------------------------------------------------------
Packit df99a1
Packit df99a1
#ifdef HAVE_CONFIG_H
Packit df99a1
# include "config.h"
Packit df99a1
#endif
Packit df99a1
#if NEED_GNUG_PRAGMAS
Packit df99a1
# pragma implementation
Packit df99a1
#endif
Packit df99a1
Packit df99a1
#include "DjVuGlobal.h"
Packit df99a1
#include "GException.h"
Packit df99a1
#include "GSmartPointer.h"
Packit df99a1
#include "GContainer.h"
Packit df99a1
#include "GRect.h"
Packit df99a1
#include "GBitmap.h"
Packit df99a1
#include "JB2Image.h"
Packit df99a1
Packit df99a1
#include "jb2tune.h"
Packit df99a1
Packit df99a1
#include "jb2cmp/minidjvu.h"
Packit df99a1
#include "jb2cmp/patterns.h"
Packit df99a1
#include "jb2cmp/classify.h"
Packit df99a1
Packit df99a1
#include <math.h>
Packit df99a1
Packit df99a1
#define REFINE_THRESHOLD 21
Packit df99a1
Packit df99a1
Packit df99a1
Packit df99a1
// ----------------------------------------
Packit df99a1
// UTILITIES
Packit df99a1
Packit df99a1
Packit df99a1
// Keep informations for pattern matching
Packit df99a1
struct MatchData 
Packit df99a1
{
Packit df99a1
  GP<GBitmap> bits;    // bitmap pointer
Packit df99a1
  int area;            // number of black pixels
Packit df99a1
  int match;           // jb2cmp pattern match
Packit df99a1
};
Packit df99a1
Packit df99a1
Packit df99a1
// Compute the number of black pixels.
Packit df99a1
static int 
Packit df99a1
compute_area(GBitmap *bits)
Packit df99a1
{
Packit df99a1
  GBitmap &bitmap = *bits;
Packit df99a1
  int w = bitmap.columns();
Packit df99a1
  int h = bitmap.rows();
Packit df99a1
  int black_pixels = 0;
Packit df99a1
  for (int i=0; i
Packit df99a1
    {
Packit df99a1
      unsigned char *row = bitmap[i];
Packit df99a1
      for (int j=0; j
Packit df99a1
        if (row[j])
Packit df99a1
          black_pixels++;
Packit df99a1
    }
Packit df99a1
  return black_pixels;
Packit df99a1
}
Packit df99a1
Packit df99a1
Packit df99a1
// Estimate position of baseline.
Packit df99a1
// The baseline position is measured in quarter pixels.
Packit df99a1
// Using fractional pixels makes a big improvement.
Packit df99a1
static int
Packit df99a1
compute_baseline(GBitmap *bits)
Packit df99a1
{
Packit df99a1
   int h = bits->rows();
Packit df99a1
   int w = bits->columns();
Packit df99a1
   GTArray<int> mass(h);
Packit df99a1
   int i, j, m;
Packit df99a1
   int tm = 0;
Packit df99a1
   for (i=0; i
Packit df99a1
     {
Packit df99a1
       unsigned char *row = (*bits)[i];
Packit df99a1
       for (j=0; j
Packit df99a1
         if (row[j])
Packit df99a1
           break;
Packit df99a1
       for (m = w-j; m>0; m--)
Packit df99a1
         if (row[j+m-1])
Packit df99a1
           break;
Packit df99a1
       mass[i] = m;
Packit df99a1
       tm += m;
Packit df99a1
     }
Packit df99a1
   m = 0;
Packit df99a1
   i = 0;
Packit df99a1
   while (m * 6 < tm * 4)
Packit df99a1
     {
Packit df99a1
       m += mass[i/4];
Packit df99a1
       i += 1;
Packit df99a1
     }
Packit df99a1
   return i;
Packit df99a1
}
Packit df99a1
Packit df99a1
Packit df99a1
// Fill the MatchData array for lossless compression
Packit df99a1
static void
Packit df99a1
compute_matchdata_lossless(JB2Image *jimg, MatchData *lib)
Packit df99a1
{
Packit df99a1
  int i;
Packit df99a1
  int nshapes = jimg->get_shape_count();
Packit df99a1
  for (i=0; i
Packit df99a1
    {
Packit df99a1
      JB2Shape &jshp = jimg->get_shape(i);
Packit df99a1
      lib[i].bits = 0;
Packit df99a1
      lib[i].area = 0;
Packit df99a1
      lib[i].match = -1;
Packit df99a1
      if (! jshp.bits) continue;
Packit df99a1
      if (jshp.userdata & JB2SHAPE_SPECIAL) continue;
Packit df99a1
      lib[i].bits = jshp.bits;
Packit df99a1
      lib[i].area = compute_area(jshp.bits);
Packit df99a1
    }
Packit df99a1
}
Packit df99a1
Packit df99a1
Packit df99a1
// Interface with Ilya's data structures.
Packit df99a1
static mdjvu_pattern_t 
Packit df99a1
compute_comparable_image(GBitmap *bits)
Packit df99a1
{
Packit df99a1
  int w = bits->columns();
Packit df99a1
  int h = bits->rows();
Packit df99a1
  GTArray<unsigned char*> p(h);
Packit df99a1
  for (int i=0; i
Packit df99a1
  return mdjvu_pattern_create_from_array(p, w, h);  
Packit df99a1
}
Packit df99a1
Packit df99a1
Packit df99a1
// Compute MatchData array for lossy compression.
Packit df99a1
static void
Packit df99a1
compute_matchdata_lossy(JB2Image *jimg, MatchData *lib,
Packit df99a1
                        int dpi, mdjvu_matcher_options_t options)
Packit df99a1
{
Packit df99a1
  int i;
Packit df99a1
  int nshapes = jimg->get_shape_count();
Packit df99a1
  // Prepare MatchData
Packit df99a1
  GTArray<mdjvu_pattern_t> handles(nshapes);
Packit df99a1
  for (i=0; i
Packit df99a1
    {
Packit df99a1
      JB2Shape &jshp = jimg->get_shape(i);
Packit df99a1
      lib[i].bits = 0;
Packit df99a1
      lib[i].area = 0;
Packit df99a1
      lib[i].match = -1;
Packit df99a1
      handles[i] = 0;
Packit df99a1
      if (! jshp.bits) continue;
Packit df99a1
      if (jshp.userdata & JB2SHAPE_SPECIAL) continue;
Packit df99a1
      lib[i].bits = jshp.bits;
Packit df99a1
      lib[i].area = compute_area(jshp.bits);
Packit df99a1
      handles[i] = compute_comparable_image(jshp.bits);
Packit df99a1
    }
Packit df99a1
  // Run Ilya's pattern matcher.
Packit df99a1
  GTArray<int> tags(nshapes);  
Packit df99a1
  int maxtag = mdjvu_classify_patterns(handles, tags, nshapes, dpi, options);
Packit df99a1
  // Extract substitutions
Packit df99a1
  GTArray<int> reps(maxtag);
Packit df99a1
  for (i=0; i<=maxtag; i++)
Packit df99a1
    reps[i] = -1;
Packit df99a1
  for (i=0; i
Packit df99a1
    if (handles[i])
Packit df99a1
      {
Packit df99a1
        int r = reps[tags[i]];
Packit df99a1
        lib[i].match = r;
Packit df99a1
        if (r < 0) 
Packit df99a1
          reps[tags[i]] = i;
Packit df99a1
      }
Packit df99a1
  // Free Ilya's data structures.
Packit df99a1
  for (i=0; i
Packit df99a1
    if (handles[i])
Packit df99a1
      mdjvu_pattern_destroy(handles[i]);
Packit df99a1
}
Packit df99a1
Packit df99a1
Packit df99a1
// Reorganize jb2image on the basis of matchdata.
Packit df99a1
// Also locate cross-coding buddys.
Packit df99a1
// Flag lossy is not strictly necessary
Packit df99a1
// but speeds up things when it is false.
Packit df99a1
static void 
Packit df99a1
tune_jb2image(JB2Image *jimg, MatchData *lib, bool lossy)
Packit df99a1
{
Packit df99a1
  int nshapes = jimg->get_shape_count();
Packit df99a1
  // Loop on all shapes
Packit df99a1
  for (int current=0; current
Packit df99a1
    {
Packit df99a1
      JB2Shape &jshp = jimg->get_shape(current);
Packit df99a1
      // Process substitutions.
Packit df99a1
      if (lossy && !(jshp.userdata & JB2SHAPE_LOSSLESS))
Packit df99a1
        {
Packit df99a1
          int substitute = lib[current].match;
Packit df99a1
          if (substitute >= 0)
Packit df99a1
            {
Packit df99a1
              jshp.parent = substitute;
Packit df99a1
              lib[current].bits = 0;
Packit df99a1
              continue;
Packit df99a1
            }
Packit df99a1
        }
Packit df99a1
      // Leave special shapes alone.
Packit df99a1
      if (! jshp.bits) continue;
Packit df99a1
      if (jshp.userdata & JB2SHAPE_SPECIAL) continue;
Packit df99a1
      // Compute matchdata info
Packit df99a1
      GBitmap &bitmap = *(jshp.bits);
Packit df99a1
      int rows = bitmap.rows();
Packit df99a1
      int cols = bitmap.columns();
Packit df99a1
      int best_score = (REFINE_THRESHOLD * rows * cols + 50) / 100;
Packit df99a1
      int black_pixels = lib[current].area;
Packit df99a1
      int closest = -1;
Packit df99a1
      // Search cross-coding buddy
Packit df99a1
      bitmap.minborder(2);
Packit df99a1
      if (best_score < 2) 
Packit df99a1
        best_score = 2;
Packit df99a1
      for (int candidate = 0; candidate < current; candidate++) 
Packit df99a1
        {
Packit df99a1
          int row, column;
Packit df99a1
          // Access candidate bitmap
Packit df99a1
          if (! lib[candidate].bits) 
Packit df99a1
            continue;
Packit df99a1
          GBitmap &cross_bitmap = *lib[candidate].bits;
Packit df99a1
          int cross_cols = cross_bitmap.columns();
Packit df99a1
          int cross_rows = cross_bitmap.rows();
Packit df99a1
          // Prune
Packit df99a1
          if (abs (lib[candidate].area - black_pixels) > best_score) 
Packit df99a1
            continue;
Packit df99a1
          if (abs (cross_rows - rows) > 2) 
Packit df99a1
            continue;
Packit df99a1
          if (abs (cross_cols - cols) > 2)
Packit df99a1
            continue;
Packit df99a1
          // Compute alignment (these are always +1, 0 or -1)
Packit df99a1
          int cross_col_adjust = (cross_cols-cross_cols/2)-(cols-cols/2);
Packit df99a1
          int cross_row_adjust = (cross_rows-cross_rows/2)-(rows-rows/2);
Packit df99a1
          // Ensure adequate borders
Packit df99a1
          cross_bitmap.minborder (2-cross_col_adjust);
Packit df99a1
          cross_bitmap.minborder (2+cols-cross_cols+cross_col_adjust);
Packit df99a1
          // Count pixel differences (including borders)
Packit df99a1
          int score = 0;
Packit df99a1
          unsigned char *p_row;
Packit df99a1
          unsigned char *p_cross_row;
Packit df99a1
          for (row = -1; row <= rows; row++) 
Packit df99a1
            {
Packit df99a1
              p_row = bitmap[row];
Packit df99a1
              p_cross_row = cross_bitmap[row+cross_row_adjust];
Packit df99a1
              p_cross_row += cross_col_adjust;
Packit df99a1
              for (column = -1; column <= cols; column++) 
Packit df99a1
                if (p_row[column] != p_cross_row[column])
Packit df99a1
                  score ++;
Packit df99a1
              if (score >= best_score)  // prune
Packit df99a1
                break;
Packit df99a1
            }
Packit df99a1
          if (score < best_score) 
Packit df99a1
            {
Packit df99a1
              best_score = score;
Packit df99a1
              closest = candidate;
Packit df99a1
            }
Packit df99a1
        }
Packit df99a1
      // Decide what to do with the match.
Packit df99a1
      if (closest >= 0)
Packit df99a1
        {
Packit df99a1
          // Mark the shape for cross-coding (``soft pattern matching'')
Packit df99a1
          jshp.parent = closest;
Packit df99a1
          // Exact match ==> Substitution
Packit df99a1
          if (best_score == 0)
Packit df99a1
            {
Packit df99a1
              lib[current].match = closest;
Packit df99a1
              lib[current].bits = 0;
Packit df99a1
            }
Packit df99a1
          // ISSUE: CROSS-IMPROVING.  When we decide not to do a substitution,
Packit df99a1
          // we can slightly modify the current shape in order to make it
Packit df99a1
          // closer to the matching shape, therefore improving the file size.
Packit df99a1
          // In fact there is a continuity between pure cross-coding and pure
Packit df99a1
          // substitution...
Packit df99a1
        }
Packit df99a1
    }
Packit df99a1
  
Packit df99a1
  // Process shape substitutions
Packit df99a1
  for (int blitno=0; blitno<jimg->get_blit_count(); blitno++)
Packit df99a1
    {
Packit df99a1
      JB2Blit *jblt = jimg->get_blit(blitno);
Packit df99a1
      JB2Shape &jshp = jimg->get_shape(jblt->shapeno);
Packit df99a1
      if (lib[jblt->shapeno].bits==0 && jshp.parent>=0)
Packit df99a1
        {
Packit df99a1
          // Locate parent
Packit df99a1
          int parent = jshp.parent;
Packit df99a1
          while (! lib[parent].bits)
Packit df99a1
            parent = lib[parent].match;
Packit df99a1
          // Compute coordinate adjustment.
Packit df99a1
          int cols = jshp.bits->columns();
Packit df99a1
          int rows = jshp.bits->rows();
Packit df99a1
          int cross_cols = lib[parent].bits->columns();
Packit df99a1
          int cross_rows = lib[parent].bits->rows();
Packit df99a1
          int cross_col_adjust = (cross_cols-cross_cols/2)-(cols-cols/2);
Packit df99a1
          int cross_row_adjust = (cross_rows-cross_rows/2)-(rows-rows/2);
Packit df99a1
           // Refine vertical adjustment
Packit df99a1
          if (lossy)
Packit df99a1
            {
Packit df99a1
              int adjust = compute_baseline(lib[parent].bits) 
Packit df99a1
                         - compute_baseline(jshp.bits);
Packit df99a1
              if (adjust < 0)
Packit df99a1
                adjust = - (2 - adjust) / 4;
Packit df99a1
              else
Packit df99a1
                adjust =   (2 + adjust) / 4;
Packit df99a1
              if (abs(adjust - cross_row_adjust) <= 1 + cols/16 )
Packit df99a1
                cross_row_adjust = adjust;
Packit df99a1
            }
Packit df99a1
          // Update blit record.
Packit df99a1
          jblt->bottom -= cross_row_adjust;
Packit df99a1
          jblt->left   -= cross_col_adjust;
Packit df99a1
          jblt->shapeno = parent;
Packit df99a1
          // Update shape record.
Packit df99a1
          jshp.bits = 0;
Packit df99a1
        }
Packit df99a1
    }
Packit df99a1
}
Packit df99a1
Packit df99a1
Packit df99a1
Packit df99a1
Packit df99a1
// ----------------------------------------
Packit df99a1
// LOSSLESS COMPRESSION
Packit df99a1
Packit df99a1
Packit df99a1
void 
Packit df99a1
tune_jb2image_lossless(JB2Image *jimg)
Packit df99a1
{
Packit df99a1
  int nshapes = jimg->get_shape_count();
Packit df99a1
  GArray<MatchData> lib(nshapes);
Packit df99a1
  compute_matchdata_lossless(jimg, lib);
Packit df99a1
  tune_jb2image(jimg, lib, false);
Packit df99a1
}
Packit df99a1
Packit df99a1
Packit df99a1
// ----------------------------------------
Packit df99a1
// LOSSY COMPRESSION
Packit df99a1
// Thanks to Ilya Mezhirov.
Packit df99a1
Packit df99a1
void 
Packit df99a1
tune_jb2image_lossy(JB2Image *jimg, int dpi, int aggression)
Packit df99a1
{
Packit df99a1
  int nshapes = jimg->get_shape_count();
Packit df99a1
  GArray<MatchData> lib(nshapes);
Packit df99a1
Packit df99a1
  mdjvu_matcher_options_t options = mdjvu_matcher_options_create();
Packit df99a1
  mdjvu_set_aggression(options, aggression);
Packit df99a1
  compute_matchdata_lossy(jimg, lib, dpi, options);
Packit df99a1
  mdjvu_matcher_options_destroy(options);
Packit df99a1
Packit df99a1
  tune_jb2image(jimg, lib, true);
Packit df99a1
}