//C- -*- C++ -*- //C- ------------------------------------------------------------------- //C- DjVuLibre-3.5 //C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. //C- Copyright (c) 2001 AT&T //C- //C- This software is subject to, and may be distributed under, the //C- GNU General Public License, either Version 2 of the license, //C- or (at your option) any later version. The license should have //C- accompanied the software or you may obtain a copy of the license //C- from the Free Software Foundation at http://www.fsf.org . //C- //C- This program is distributed in the hope that it will be useful, //C- but WITHOUT ANY WARRANTY; without even the implied warranty of //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //C- GNU General Public License for more details. //C- //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from //C- Lizardtech Software. Lizardtech Software has authorized us to //C- replace the original DjVu(r) Reference Library notice by the following //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu): //C- //C- ------------------------------------------------------------------ //C- | DjVu (r) Reference Library (v. 3.5) //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. //C- | The DjVu Reference Library is protected by U.S. Pat. No. //C- | 6,058,214 and patents pending. //C- | //C- | This software is subject to, and may be distributed under, the //C- | GNU General Public License, either Version 2 of the license, //C- | or (at your option) any later version. The license should have //C- | accompanied the software or you may obtain a copy of the license //C- | from the Free Software Foundation at http://www.fsf.org . //C- | //C- | The computer code originally released by LizardTech under this //C- | license and unmodified by other parties is deemed "the LIZARDTECH //C- | ORIGINAL CODE." Subject to any third party intellectual property //C- | claims, LizardTech grants recipient a worldwide, royalty-free, //C- | non-exclusive license to make, use, sell, or otherwise dispose of //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU //C- | General Public License. This grant only confers the right to //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to //C- | the extent such infringement is reasonably necessary to enable //C- | recipient to make, have made, practice, sell, or otherwise dispose //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to //C- | any greater extent that may be necessary to utilize further //C- | modifications or combinations. //C- | //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. //C- +------------------------------------------------------------------ #ifdef HAVE_CONFIG_H # include "config.h" #endif #if NEED_GNUG_PRAGMAS # pragma implementation #endif #include "DjVuGlobal.h" #include "GException.h" #include "GSmartPointer.h" #include "GContainer.h" #include "GRect.h" #include "GBitmap.h" #include "JB2Image.h" #include "jb2tune.h" #include "jb2cmp/minidjvu.h" #include "jb2cmp/patterns.h" #include "jb2cmp/classify.h" #include #define REFINE_THRESHOLD 21 // ---------------------------------------- // UTILITIES // Keep informations for pattern matching struct MatchData { GP bits; // bitmap pointer int area; // number of black pixels int match; // jb2cmp pattern match }; // Compute the number of black pixels. static int compute_area(GBitmap *bits) { GBitmap &bitmap = *bits; int w = bitmap.columns(); int h = bitmap.rows(); int black_pixels = 0; for (int i=0; irows(); int w = bits->columns(); GTArray mass(h); int i, j, m; int tm = 0; for (i=0; i0; m--) if (row[j+m-1]) break; mass[i] = m; tm += m; } m = 0; i = 0; while (m * 6 < tm * 4) { m += mass[i/4]; i += 1; } return i; } // Fill the MatchData array for lossless compression static void compute_matchdata_lossless(JB2Image *jimg, MatchData *lib) { int i; int nshapes = jimg->get_shape_count(); for (i=0; iget_shape(i); lib[i].bits = 0; lib[i].area = 0; lib[i].match = -1; if (! jshp.bits) continue; if (jshp.userdata & JB2SHAPE_SPECIAL) continue; lib[i].bits = jshp.bits; lib[i].area = compute_area(jshp.bits); } } // Interface with Ilya's data structures. static mdjvu_pattern_t compute_comparable_image(GBitmap *bits) { int w = bits->columns(); int h = bits->rows(); GTArray p(h); for (int i=0; iget_shape_count(); // Prepare MatchData GTArray handles(nshapes); for (i=0; iget_shape(i); lib[i].bits = 0; lib[i].area = 0; lib[i].match = -1; handles[i] = 0; if (! jshp.bits) continue; if (jshp.userdata & JB2SHAPE_SPECIAL) continue; lib[i].bits = jshp.bits; lib[i].area = compute_area(jshp.bits); handles[i] = compute_comparable_image(jshp.bits); } // Run Ilya's pattern matcher. GTArray tags(nshapes); int maxtag = mdjvu_classify_patterns(handles, tags, nshapes, dpi, options); // Extract substitutions GTArray reps(maxtag); for (i=0; i<=maxtag; i++) reps[i] = -1; for (i=0; iget_shape_count(); // Loop on all shapes for (int current=0; currentget_shape(current); // Process substitutions. if (lossy && !(jshp.userdata & JB2SHAPE_LOSSLESS)) { int substitute = lib[current].match; if (substitute >= 0) { jshp.parent = substitute; lib[current].bits = 0; continue; } } // Leave special shapes alone. if (! jshp.bits) continue; if (jshp.userdata & JB2SHAPE_SPECIAL) continue; // Compute matchdata info GBitmap &bitmap = *(jshp.bits); int rows = bitmap.rows(); int cols = bitmap.columns(); int best_score = (REFINE_THRESHOLD * rows * cols + 50) / 100; int black_pixels = lib[current].area; int closest = -1; // Search cross-coding buddy bitmap.minborder(2); if (best_score < 2) best_score = 2; for (int candidate = 0; candidate < current; candidate++) { int row, column; // Access candidate bitmap if (! lib[candidate].bits) continue; GBitmap &cross_bitmap = *lib[candidate].bits; int cross_cols = cross_bitmap.columns(); int cross_rows = cross_bitmap.rows(); // Prune if (abs (lib[candidate].area - black_pixels) > best_score) continue; if (abs (cross_rows - rows) > 2) continue; if (abs (cross_cols - cols) > 2) continue; // Compute alignment (these are always +1, 0 or -1) int cross_col_adjust = (cross_cols-cross_cols/2)-(cols-cols/2); int cross_row_adjust = (cross_rows-cross_rows/2)-(rows-rows/2); // Ensure adequate borders cross_bitmap.minborder (2-cross_col_adjust); cross_bitmap.minborder (2+cols-cross_cols+cross_col_adjust); // Count pixel differences (including borders) int score = 0; unsigned char *p_row; unsigned char *p_cross_row; for (row = -1; row <= rows; row++) { p_row = bitmap[row]; p_cross_row = cross_bitmap[row+cross_row_adjust]; p_cross_row += cross_col_adjust; for (column = -1; column <= cols; column++) if (p_row[column] != p_cross_row[column]) score ++; if (score >= best_score) // prune break; } if (score < best_score) { best_score = score; closest = candidate; } } // Decide what to do with the match. if (closest >= 0) { // Mark the shape for cross-coding (``soft pattern matching'') jshp.parent = closest; // Exact match ==> Substitution if (best_score == 0) { lib[current].match = closest; lib[current].bits = 0; } // ISSUE: CROSS-IMPROVING. When we decide not to do a substitution, // we can slightly modify the current shape in order to make it // closer to the matching shape, therefore improving the file size. // In fact there is a continuity between pure cross-coding and pure // substitution... } } // Process shape substitutions for (int blitno=0; blitnoget_blit_count(); blitno++) { JB2Blit *jblt = jimg->get_blit(blitno); JB2Shape &jshp = jimg->get_shape(jblt->shapeno); if (lib[jblt->shapeno].bits==0 && jshp.parent>=0) { // Locate parent int parent = jshp.parent; while (! lib[parent].bits) parent = lib[parent].match; // Compute coordinate adjustment. int cols = jshp.bits->columns(); int rows = jshp.bits->rows(); int cross_cols = lib[parent].bits->columns(); int cross_rows = lib[parent].bits->rows(); int cross_col_adjust = (cross_cols-cross_cols/2)-(cols-cols/2); int cross_row_adjust = (cross_rows-cross_rows/2)-(rows-rows/2); // Refine vertical adjustment if (lossy) { int adjust = compute_baseline(lib[parent].bits) - compute_baseline(jshp.bits); if (adjust < 0) adjust = - (2 - adjust) / 4; else adjust = (2 + adjust) / 4; if (abs(adjust - cross_row_adjust) <= 1 + cols/16 ) cross_row_adjust = adjust; } // Update blit record. jblt->bottom -= cross_row_adjust; jblt->left -= cross_col_adjust; jblt->shapeno = parent; // Update shape record. jshp.bits = 0; } } } // ---------------------------------------- // LOSSLESS COMPRESSION void tune_jb2image_lossless(JB2Image *jimg) { int nshapes = jimg->get_shape_count(); GArray lib(nshapes); compute_matchdata_lossless(jimg, lib); tune_jb2image(jimg, lib, false); } // ---------------------------------------- // LOSSY COMPRESSION // Thanks to Ilya Mezhirov. void tune_jb2image_lossy(JB2Image *jimg, int dpi, int aggression) { int nshapes = jimg->get_shape_count(); GArray lib(nshapes); mdjvu_matcher_options_t options = mdjvu_matcher_options_create(); mdjvu_set_aggression(options, aggression); compute_matchdata_lossy(jimg, lib, dpi, options); mdjvu_matcher_options_destroy(options); tune_jb2image(jimg, lib, true); }