/* pbmtog3.c - read a PBM image and produce a Group 3 FAX file
For specifications for Group 3 (G3) fax MH coding see ITU-T T.4:
Standardization of Group 3 facsimile terminals for document transmission
https://www.itu.int/rec/T-REC-T.4/en
This program generates only MH.
*/
#include <assert.h>
#include "pm_c_util.h"
#include "shhopt.h"
#include "mallocvar.h"
#include "bitreverse.h"
#include "intcode.h"
#include "g3.h"
#include "pbm.h"
enum G3eol {EOL, ALIGN8, ALIGN16, NO_EOL, NO_RTC, NO_EOLRTC};
struct OutStream;
struct OutStream {
FILE * fp;
struct BitString buffer;
bool reverseBits; /* Reverse bit order */
enum G3eol eolAlign; /* Omit EOL and/or RTC; align EOL to 8/16 bits */
void * data; /* Reserved for future expansion */
};
struct CmdlineInfo {
/* All the information the user supplied in the command line,
in a form easy for the program to use.
*/
const char * inputFileName;
unsigned int reversebits;
enum G3eol align;
unsigned int desiredWidth;
unsigned int verbose;
};
static void
parseCommandLine(int argc, const char ** const argv,
struct CmdlineInfo * const cmdlineP) {
/*----------------------------------------------------------------------------
Note that the file spec array we return is stored in the storage that
was passed to us as the argv array.
-----------------------------------------------------------------------------*/
optEntry * option_def;
/* Instructions to OptParseOptions2 on how to parse our options. */
optStruct3 opt;
unsigned int nofixedwidth;
unsigned int align8, align16;
unsigned int option_def_index;
MALLOCARRAY_NOFAIL(option_def, 100);
option_def_index = 0; /* incremented by OPTENTRY */
OPTENT3(0, "reversebits", OPT_FLAG, NULL, &cmdlineP->reversebits,
0);
OPTENT3(0, "nofixedwidth", OPT_FLAG, NULL, &nofixedwidth,
0);
OPTENT3(0, "align8", OPT_FLAG, NULL, &align8,
0);
OPTENT3(0, "align16", OPT_FLAG, NULL, &align16,
0);
OPTENT3(0, "verbose", OPT_FLAG, NULL, &cmdlineP->verbose,
0);
/* TODO
Explicit fixed widths: -A4 -B4 -A3
*/
opt.opt_table = option_def;
opt.short_allowed = false; /* We have no short (old-fashioned) options */
opt.allowNegNum = true; /* We may have parms that are negative numbers */
pm_optParseOptions3(&argc, (char **)argv, opt, sizeof(opt), 0);
/* Uses and sets argc, argv, and some of *cmdlineP and others. */
free(option_def);
if (align8) {
if (align16)
pm_error("You can't specify both -align8 and -align16");
else
cmdlineP->align = ALIGN8;
} else if (align16)
cmdlineP->align = ALIGN16;
else
cmdlineP->align = EOL;
if (nofixedwidth)
cmdlineP->desiredWidth = 0;
else
cmdlineP->desiredWidth = 1728;
if (argc-1 == 0)
cmdlineP->inputFileName = "-";
else if (argc-1 != 1)
pm_error("Program takes zero or one argument (filename). You "
"specified %d", argc-1);
else
cmdlineP->inputFileName = argv[1];
}
static void
reversebuffer(unsigned char * const p,
unsigned int const n) {
unsigned int i;
for (i = 0; i < n; ++i)
p[i] = bitreverse[p[i]];
}
static void
flushBuffer(struct OutStream * const outP) {
/*----------------------------------------------------------------------------
Flush the contents of the bit buffer
-----------------------------------------------------------------------------*/
struct BitString const buffer = outP->buffer;
assert (buffer.bitCount <= 32);
if (buffer.bitCount > 0) {
unsigned int const fullBuffer = sizeof(buffer.intBuffer) * 8;
unsigned int const bytesToWrite = (buffer.bitCount+7)/8;
bigend32 outbytes;
size_t rc;
outbytes = pm_bigendFromUint32(
buffer.intBuffer << (fullBuffer - buffer.bitCount));
if (outP->reverseBits)
reversebuffer((unsigned char *)&outbytes, bytesToWrite);
rc = fwrite((unsigned char *)&outbytes, 1, bytesToWrite, outP->fp);
if (rc != bytesToWrite)
pm_error("Output error");
}
}
#if 1==0
static void
putbitsDump(struct OutStream * const outP,
struct BitString const newBits) {
/*----------------------------------------------------------------------------
Print the content of the bit put request, in human readable text form
For debugging. Also good for studying how the coding scheme works.
By default the compiler ignores this function.
To turn on, remove the "#if" - "#endif" lines enclosing the function and
edit the output function name in putbits().
-----------------------------------------------------------------------------*/
unsigned int const bitCount = newBits.bitCount;
unsigned int i;
char charBuff[128];
assert (bitCount >= 0 && bitCount < 32);
assert (sizeof(newBits.intBuffer) + 2 < 128);
for (i = 0; i < bitCount; ++i) {
unsigned int const n = bitCount - i - 1;
charBuff[i] = ((newBits.intBuffer >> n) & 0x01) + '0';
}
charBuff[bitCount] = '\n';
charBuff[bitCount+1] = '\0';
fwrite(charBuff, 1, bitCount+1, outP->fp);
}
#endif
static void
putbitsBinary(struct OutStream * const outP,
struct BitString const newBits) {
/*----------------------------------------------------------------------------
Push the bits 'newBits' onto the right end of output buffer
out.buffer (moving the bits already in the buffer left).
Flush the buffer to stdout as necessary to make room.
'newBits' must be shorter than a whole word.
N.B. the definition of struct BitString requires upper bits to be zero.
-----------------------------------------------------------------------------*/
unsigned int const fullBuffer = sizeof(outP->buffer.intBuffer) * 8;
unsigned int const spaceLeft = fullBuffer - outP->buffer.bitCount;
/* Number of bits of unused space (at the high end) in buffer */
assert(newBits.bitCount < fullBuffer);
assert(newBits.intBuffer >> newBits.bitCount == 0);
if (spaceLeft > newBits.bitCount) {
/* New bits fit with bits to spare */
outP->buffer.intBuffer =
outP->buffer.intBuffer << newBits.bitCount | newBits.intBuffer;
outP->buffer.bitCount += newBits.bitCount;
} else {
/* New bits fill buffer. We'll have to flush the buffer to stdout
and put the rest of the bits in the new buffer.
*/
unsigned int const nextBufBitCount = newBits.bitCount - spaceLeft;
unsigned int const bitMask = ((1<<nextBufBitCount) - 1);
outP->buffer.intBuffer = ( (outP->buffer.intBuffer << spaceLeft)
| (newBits.intBuffer >> nextBufBitCount));
outP->buffer.bitCount = fullBuffer;
flushBuffer(outP);
outP->buffer.intBuffer = newBits.intBuffer & bitMask;
outP->buffer.bitCount = nextBufBitCount;
}
}
static void
initOutStream(struct OutStream * const outP,
bool const reverseBits,
enum G3eol const eolAlign) {
outP->buffer.intBuffer = 0;
outP->buffer.bitCount = 0;
outP->reverseBits = reverseBits;
outP->fp = stdout;
outP->eolAlign = eolAlign;
}
static struct BitString
tableEntryToBitString(G3TableEntry const tableEntry) {
struct BitString retval;
retval.intBuffer = tableEntry.code;
retval.bitCount = tableEntry.length;
return retval;
}
static void
putbits(struct OutStream * const outP,
struct BitString const newBits) {
putbitsBinary(outP, newBits);
/* Change to putbitsDump() for human readable output */
}
static void
putcodeShort(struct OutStream * const outP,
bit const color,
unsigned int const runLength) {
/* Note that this requires ttable to be aligned white entry, black
entry, white, black, etc.
*/
unsigned int index = runLength * 2 + color;
putbits(outP, tableEntryToBitString(ttable[index]));
}
static void
putcodeLong(struct OutStream * const outP,
bit const color,
unsigned int const runLength) {
/*----------------------------------------------------------------------------
Output Make-up code and Terminating code at once.
For run lengths which require both: length 64 and above
The codes are combined here to avoid calculations in putbits()
Terminating code is max 12 bits, Make-up code is max 13 bits.
(See ttable, mtable entries in pbmtog3.h)
Also reduces object code size when putcode is compiled inline.
-----------------------------------------------------------------------------*/
unsigned int const loIndex = runLength % 64 * 2 + color;
unsigned int const hiIndex = runLength / 64 * 2 + color;
unsigned int const loLength = ttable[loIndex].length;
unsigned int const hiLength = mtable[hiIndex].length;
struct BitString combinedCode;
combinedCode.intBuffer = mtable[hiIndex].code << loLength |
ttable[loIndex].code;
combinedCode.bitCount = hiLength + loLength;
putbits(outP, combinedCode);
}
static void
putcodeExtra(struct OutStream * const outP,
int const color,
int const runLength) {
/*----------------------------------------------------------------------------
Lengths over 2560. This is rare.
According to the standard, the mark-up code for 2560 can be issued as
many times as necessary without terminal codes.
--------------------------------------------------------------------------*/
G3TableEntry const markUp2560 = mtable[2560/64*2];
/* Same code for black and white */
unsigned int remainingLen;
for (remainingLen = runLength; remainingLen > 2560; remainingLen -= 2560)
putbits(outP, tableEntryToBitString(markUp2560));
/* after the above: 0 < remainingLen <= 2560 */
if (remainingLen >= 64)
putcodeLong(outP, color, remainingLen);
else
putcodeShort(outP, color, remainingLen);
}
static void
putspan(struct OutStream * const outP,
bit const color,
unsigned int const runLength) {
if (runLength < 64)
putcodeShort(outP, color, runLength);
else if (runLength < 2560)
putcodeLong (outP, color, runLength);
else /* runLength > 2560 : rare */
putcodeExtra(outP, color, runLength);
}
static void
puteol(struct OutStream * const outP) {
switch (outP->eolAlign) {
case EOL: {
struct BitString const eol = { 1, 12 };
putbits(outP, eol);
} break;
case ALIGN8: case ALIGN16: {
unsigned int const bitCount = outP->buffer.bitCount;
unsigned int const fillbits =
(outP->eolAlign == ALIGN8) ? (44 - bitCount) % 8
: (52 - bitCount) % 16;
struct BitString eol;
eol.bitCount = 12 + fillbits; eol.intBuffer = 1;
putbits(outP, eol);
} break;
case NO_EOL: case NO_EOLRTC:
break;
case NO_RTC:
pm_error("INTERNAL ERROR: no-RTC EOL treatment not implemented");
break;
}
}
static void
putrtc(struct OutStream * const outP) {
switch (outP->eolAlign) {
case NO_RTC: case NO_EOLRTC:
break;
default:
puteol(outP); puteol(outP); puteol(outP);
puteol(outP); puteol(outP); puteol(outP);
}
}
static void
readOffSideMargins(unsigned char * const bitrow,
unsigned int const colChars,
unsigned int * const firstNonWhiteCharP,
unsigned int * const lastNonWhiteCharP,
bool * const blankRowP) {
/*----------------------------------------------------------------------------
Determine the white margins on the left and right side of a row.
This is an enhancement: convertRowToG3() works without this.
-----------------------------------------------------------------------------*/
unsigned int charCnt;
unsigned int firstChar;
unsigned int lastChar;
bool blankRow;
assert(colChars > 0);
for (charCnt = 0; charCnt < colChars && bitrow[charCnt] == 0; ++charCnt);
if (charCnt >= colChars) {
/* Reached end of bitrow with no black pixels encountered */
firstChar = lastChar = 0;
blankRow = true;
} else {
/* There is at least one black pixel in the row */
firstChar = charCnt;
blankRow = false;
charCnt = colChars - 1;
while (bitrow[charCnt--] == 0x00)
;
lastChar = charCnt + 1;
}
*firstNonWhiteCharP = firstChar;
*lastNonWhiteCharP = lastChar;
*blankRowP = blankRow;
}
static void
setBlockBitsInFinalChar(unsigned char * const finalByteP,
unsigned int const cols) {
/*----------------------------------------------------------------------------
If the char in the row is fractional, set it up so that the don't care
bits are the opposite color of the last valid pixel.
----------------------------------------------------------------------------*/
unsigned char const finalByte = *finalByteP;
unsigned int const silentBitCnt = 8 - cols % 8;
bit const rowEndColor = (finalByte >> silentBitCnt) & 0x01;
if (rowEndColor == PBM_WHITE) {
unsigned char const blackMask = (0x01 << silentBitCnt) - 1;
*finalByteP = finalByte | blackMask;
}
/* No adjustment required if the row ends with a black pixel.
pbm_cleanrowend_packed() takes care of this.
*/
}
static void
trimFinalChar(struct OutStream * const outP,
bit const color,
int const carryLength,
int const existingCols,
int const desiredWidth) {
/*---------------------------------------------------------------------------
If the carry value from the last char in the row represents a valid
sequence, output it.
(1) If input row width is not a whole multiple of 8 and -nofixwidth
was specified, the final carry value represents inactive bits
at the row end. Emit no code. See setBlockBitsInFinalChar().
(2) If there is white margin on the right side, the final carry value
is valid. We add to it the margin width. Right-side margin may
be added in main() to a narrow input image, detected in the
input row by readOffSideMargins() or both. The same treatment
applies regardless of the nature of the right-side margin.
----------------------------------------------------------------------------*/
if (existingCols == desiredWidth) {
if (existingCols % 8 == 0)
putspan(outP, color, carryLength); /* Code up to byte boundary */
/* Emit nothing if existingCols is not a whole multiple of 8 */
} else if (existingCols < desiredWidth) {
if (color == 0) { /* Last bit sequence in final char: white */
unsigned int const totalLength =
carryLength + (desiredWidth - existingCols);
putspan(outP, 0, totalLength);
} else { /* Black */
unsigned int const padLength = desiredWidth - existingCols;
putspan(outP, 1, carryLength);
putspan(outP, 0, padLength);
}
}
}
static void
convertRowToG3(struct OutStream * const outP,
unsigned char * const bitrow,
unsigned int const existingCols,
unsigned int const desiredWidth) {
/*----------------------------------------------------------------------------
Table based Huffman coding
Normally Huffman code encoders count sequences of ones and zeros
and convert them to binary codes as they terminate. This program
recognizes chains of pixels and converts them directly, reading
prefabricated code chains from an indexed table.
For example the 8-bit sequence 01100110 translates to
Huffman code: 000111 11 0111 11 000111.
In reality things are more complicated. The leftmost 0 (MSB) may be
part of a longer sequence starting in the adjacent byte or perhaps
spanning several bytes. Likewise for the rightmost 0.
So we first remove the sequence on the left side and compare its
color with the leftmost pixel of the adjacent byte and emit either
one code for a single sequence if they agree or two if they disagree.
Next the composite code for the central part (in the above example
110011 -> 11 0111 11) is emitted. Finally we save the length and
color of the sequence on the right end as carry-over for the next
byte cycle. Some 8-bit input sequences (00000000, 01111111,
00111111, etc.) have no central part: these are special cases.
---------------------------------------------------------------------------*/
unsigned int const colChars = pbm_packed_bytes(existingCols);
unsigned int charCnt;
unsigned int firstActiveChar;
unsigned int lastActiveChar;
bool blankRow;
bit borderColor;
borderColor = PBM_WHITE; /* initial value */
if (existingCols == desiredWidth && (desiredWidth % 8) > 0)
setBlockBitsInFinalChar(&bitrow[colChars-1], desiredWidth);
readOffSideMargins(bitrow, colChars,
&firstActiveChar, &lastActiveChar, &blankRow);
if (blankRow)
putspan(outP, PBM_WHITE, desiredWidth);
else {
unsigned int carryLength;
for (charCnt = firstActiveChar, carryLength = firstActiveChar * 8;
charCnt <=lastActiveChar;
++charCnt) {
unsigned char const byte = bitrow[charCnt];
bit const rColor = !borderColor;
if (byte == borderColor * 0xFF) {
carryLength += 8;
} else if (byte == (unsigned char) ~(borderColor * 0xFF)) {
putspan(outP, borderColor, carryLength);
carryLength = 8;
borderColor = rColor;
} else {
struct PrefabCode const code = prefabCode[byte];
unsigned int const activeLength =
8 - code.leadBits - code.trailBits;
if (borderColor == (byte >> 7)) {
putspan(outP, borderColor, carryLength + code.leadBits);
} else {
putspan(outP, borderColor, carryLength);
putcodeShort(outP, rColor, code.leadBits);
}
if (activeLength > 0)
putbits(outP, code.activeBits);
borderColor = byte & 0x01;
carryLength = code.trailBits;
}
}
trimFinalChar(outP, borderColor, carryLength,
(lastActiveChar + 1) * 8, desiredWidth);
}
puteol(outP);
}
int
main(int argc,
const char * argv[]) {
struct CmdlineInfo cmdline;
FILE * ifP;
unsigned char * bitrow;
/* This is the bits of the current row, as read from the input and
modified various ways at various points in the program. It has
a word of zero padding on the high (right) end for the convenience
of code that accesses this buffer in word-size bites.
*/
int rows;
int cols;
int format;
unsigned int existingCols;
unsigned int desiredWidth;
unsigned int row;
struct OutStream out;
pm_proginit(&argc, argv);
parseCommandLine(argc, argv, &cmdline);
ifP = pm_openr(cmdline.inputFileName);
pbm_readpbminit(ifP, &cols, &rows, &format);
if (cmdline.desiredWidth == 0)
desiredWidth = existingCols = cols;
else {
if (cmdline.desiredWidth < cols)
existingCols = desiredWidth = cmdline.desiredWidth;
else {
existingCols = pbm_packed_bytes(cols) * 8;
desiredWidth = cmdline.desiredWidth;
}
}
MALLOCARRAY(bitrow, pbm_packed_bytes(cols) + sizeof(uint32_t));
if (!bitrow)
pm_error("Failed to allocate a row buffer for %u columns", cols);
initOutStream(&out, cmdline.reversebits, cmdline.align);
puteol(&out);
for (row = 0; row < rows; ++row) {
pbm_readpbmrow_packed(ifP, bitrow, cols, format);
pbm_cleanrowend_packed(bitrow, cols);
convertRowToG3(&out, bitrow, existingCols, desiredWidth);
}
pbm_freerow_packed(bitrow);
putrtc(&out);
flushBuffer(&out);
pm_close(ifP);
return 0;
}