|
Packit |
664db3 |
#ifdef HAVE_CONFIG_H
|
|
Packit |
664db3 |
#include "config.h"
|
|
Packit |
664db3 |
#endif
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
#include "arch.h"
|
|
Packit |
664db3 |
#include "entdec.h"
|
|
Packit |
664db3 |
#include "mfrngcod.h"
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
/*A range decoder.
|
|
Packit |
664db3 |
This is an entropy decoder based upon \cite{Mar79}, which is itself a
|
|
Packit |
664db3 |
rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
|
|
Packit |
664db3 |
It is very similar to arithmetic encoding, except that encoding is done with
|
|
Packit |
664db3 |
digits in any base, instead of with bits, and so it is faster when using
|
|
Packit |
664db3 |
larger bases (i.e.: a byte).
|
|
Packit |
664db3 |
The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
|
|
Packit |
664db3 |
is the base, longer than the theoretical optimum, but to my knowledge there
|
|
Packit |
664db3 |
is no published justification for this claim.
|
|
Packit |
664db3 |
This only seems true when using near-infinite precision arithmetic so that
|
|
Packit |
664db3 |
the process is carried out with no rounding errors.
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
IBM (the author's employer) never sought to patent the idea, and to my
|
|
Packit |
664db3 |
knowledge the algorithm is unencumbered by any patents, though its
|
|
Packit |
664db3 |
performance is very competitive with proprietary arithmetic coding.
|
|
Packit |
664db3 |
The two are based on very similar ideas, however.
|
|
Packit |
664db3 |
An excellent description of implementation details is available at
|
|
Packit |
664db3 |
http://www.arturocampos.com/ac_range.html
|
|
Packit |
664db3 |
A recent work \cite{MNW98} which proposes several changes to arithmetic
|
|
Packit |
664db3 |
encoding for efficiency actually re-discovers many of the principles
|
|
Packit |
664db3 |
behind range encoding, and presents a good theoretical analysis of them.
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
This coder handles the end of the stream in a slightly more graceful fashion
|
|
Packit |
664db3 |
than most arithmetic or range coders.
|
|
Packit |
664db3 |
Once the final symbol has been encoded, the coder selects the code word with
|
|
Packit |
664db3 |
the shortest number of bits that still falls within the final interval.
|
|
Packit |
664db3 |
This method is not novel.
|
|
Packit |
664db3 |
Here, by the length of the code word, we refer to the number of bits until
|
|
Packit |
664db3 |
its final 1.
|
|
Packit |
664db3 |
Any trailing zeros may be discarded, since the encoder, once it runs out of
|
|
Packit |
664db3 |
input, will pad its buffer with zeros.
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
But this means that no encoded stream would ever have any zero bytes at the
|
|
Packit |
664db3 |
end.
|
|
Packit |
664db3 |
Since there are some coded representations we cannot produce, it implies that
|
|
Packit |
664db3 |
there is still some redundancy in the stream.
|
|
Packit |
664db3 |
In this case, we can pick a special byte value, RSV1, and should the stream
|
|
Packit |
664db3 |
end in a sequence of zeros, followed by the RSV1 byte, we can code the
|
|
Packit |
664db3 |
zeros, and discard the RSV1 byte.
|
|
Packit |
664db3 |
The decoder, knowing that the encoder would never produce a sequence of zeros
|
|
Packit |
664db3 |
at the end, would then know to add in the RSV1 byte if it observed it.
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
Now, the encoder would never produce a stream that ended in a sequence of
|
|
Packit |
664db3 |
zeros followed by a RSV1 byte.
|
|
Packit |
664db3 |
So, if the stream ends in a non-empty sequence of zeros, followed by any
|
|
Packit |
664db3 |
positive number of RSV1 bytes, the last RSV1 byte is discarded.
|
|
Packit |
664db3 |
The decoder, if it encounters a stream that ends in non-empty sequence of
|
|
Packit |
664db3 |
zeros followed by any non-negative number of RSV1 bytes, adds an additional
|
|
Packit |
664db3 |
RSV1 byte to the stream.
|
|
Packit |
664db3 |
With this strategy, every possible sequence of input bytes is transformed to
|
|
Packit |
664db3 |
one that could actually be produced by the encoder.
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
The only question is what non-zero value to use for RSV1.
|
|
Packit |
664db3 |
We select 0x80, since it has the nice property of producing the shortest
|
|
Packit |
664db3 |
possible byte streams when using our strategy for selecting a number within
|
|
Packit |
664db3 |
the final interval to encode.
|
|
Packit |
664db3 |
Clearly if the shortest possible code word that falls within the interval has
|
|
Packit |
664db3 |
its last one bit as the most significant bit of the final byte, and the
|
|
Packit |
664db3 |
previous bytes were a non-empty sequence of zeros followed by a non-negative
|
|
Packit |
664db3 |
number of 0x80 bytes, then the last byte would be discarded.
|
|
Packit |
664db3 |
If the shortest code word is not so formed, then no other code word in the
|
|
Packit |
664db3 |
interval would result in any more bytes being discarded.
|
|
Packit |
664db3 |
Any longer code word would have an additional one bit somewhere, and so would
|
|
Packit |
664db3 |
require at a minimum that that byte would be coded.
|
|
Packit |
664db3 |
If the shortest code word has a 1 before the final one that is preventing the
|
|
Packit |
664db3 |
stream from ending in a non-empty sequence of zeros followed by a
|
|
Packit |
664db3 |
non-negative number of 0x80's, then there is no code word of the same length
|
|
Packit |
664db3 |
which contains that bit as a zero.
|
|
Packit |
664db3 |
If there were, then we could simply leave that bit a 1, and drop all the bits
|
|
Packit |
664db3 |
after it without leaving the interval, thus producing a shorter code word.
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
In this case, RSV1 can only drop 1 bit off the final stream.
|
|
Packit |
664db3 |
Other choices could lead to savings of up to 8 bits for particular streams,
|
|
Packit |
664db3 |
but this would produce the odd situation that a stream with more non-zero
|
|
Packit |
664db3 |
bits is actually encoded in fewer bytes.
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
@PHDTHESIS{Pas76,
|
|
Packit |
664db3 |
author="Richard Clark Pasco",
|
|
Packit |
664db3 |
title="Source coding algorithms for fast data compression",
|
|
Packit |
664db3 |
school="Dept. of Electrical Engineering, Stanford University",
|
|
Packit |
664db3 |
address="Stanford, CA",
|
|
Packit |
664db3 |
month=May,
|
|
Packit |
664db3 |
year=1976
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
@INPROCEEDINGS{Mar79,
|
|
Packit |
664db3 |
author="Martin, G.N.N.",
|
|
Packit |
664db3 |
title="Range encoding: an algorithm for removing redundancy from a digitised
|
|
Packit |
664db3 |
message",
|
|
Packit |
664db3 |
booktitle="Video & Data Recording Conference",
|
|
Packit |
664db3 |
year=1979,
|
|
Packit |
664db3 |
address="Southampton",
|
|
Packit |
664db3 |
month=Jul
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
@ARTICLE{MNW98,
|
|
Packit |
664db3 |
author="Alistair Moffat and Radford Neal and Ian H. Witten",
|
|
Packit |
664db3 |
title="Arithmetic Coding Revisited",
|
|
Packit |
664db3 |
journal="{ACM} Transactions on Information Systems",
|
|
Packit |
664db3 |
year=1998,
|
|
Packit |
664db3 |
volume=16,
|
|
Packit |
664db3 |
number=3,
|
|
Packit |
664db3 |
pages="256--294",
|
|
Packit |
664db3 |
month=Jul,
|
|
Packit |
664db3 |
URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
|
|
Packit |
664db3 |
}*/
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
/*Gets the next byte of input.
|
|
Packit |
664db3 |
After all the bytes in the current packet have been consumed, and the extra
|
|
Packit |
664db3 |
end code returned if needed, this function will continue to return zero each
|
|
Packit |
664db3 |
time it is called.
|
|
Packit |
664db3 |
Return: The next byte of input.*/
|
|
Packit |
664db3 |
static int ec_dec_in(ec_dec *_this){
|
|
Packit |
664db3 |
int ret;
|
|
Packit |
664db3 |
ret=ec_byte_read1(_this->buf);
|
|
Packit |
664db3 |
if(ret<0){
|
|
Packit |
664db3 |
ret=0;
|
|
Packit |
664db3 |
/*Needed to keep oc_dec_tell() operating correctly.*/
|
|
Packit |
664db3 |
ec_byte_adv1(_this->buf);
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
return ret;
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
/*Normalizes the contents of dif and rng so that rng lies entirely in the
|
|
Packit |
664db3 |
high-order symbol.*/
|
|
Packit |
664db3 |
static inline void ec_dec_normalize(ec_dec *_this){
|
|
Packit |
664db3 |
/*If the range is too small, rescale it and input some bits.*/
|
|
Packit |
664db3 |
while(_this->rng<=EC_CODE_BOT){
|
|
Packit |
664db3 |
int sym;
|
|
Packit |
664db3 |
_this->rng<<=EC_SYM_BITS;
|
|
Packit |
664db3 |
/*Use up the remaining bits from our last symbol.*/
|
|
Packit |
664db3 |
sym=_this->rem<
|
|
Packit |
664db3 |
/*Read the next value from the input.*/
|
|
Packit |
664db3 |
_this->rem=ec_dec_in(_this);
|
|
Packit |
664db3 |
/*Take the rest of the bits we need from this new symbol.*/
|
|
Packit |
664db3 |
sym|=_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA;
|
|
Packit |
664db3 |
_this->dif=(_this->dif<
|
|
Packit |
664db3 |
/*dif can never be larger than EC_CODE_TOP.
|
|
Packit |
664db3 |
This is equivalent to the slightly more readable:
|
|
Packit |
664db3 |
if(_this->dif>EC_CODE_TOP)_this->dif-=EC_CODE_TOP;*/
|
|
Packit |
664db3 |
_this->dif^=_this->dif&_this->dif-1&EC_CODE_TOP;
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
void ec_dec_init(ec_dec *_this,ec_byte_buffer *_buf){
|
|
Packit |
664db3 |
_this->buf=_buf;
|
|
Packit |
664db3 |
_this->rem=ec_dec_in(_this);
|
|
Packit |
664db3 |
_this->rng=1U<
|
|
Packit |
664db3 |
_this->dif=_this->rng-(_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA);
|
|
Packit |
664db3 |
/*Normalize the interval.*/
|
|
Packit |
664db3 |
ec_dec_normalize(_this);
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
unsigned ec_decode(ec_dec *_this,unsigned _ft){
|
|
Packit |
664db3 |
unsigned s;
|
|
Packit |
664db3 |
_this->nrm=_this->rng/_ft;
|
|
Packit |
664db3 |
s=(unsigned)((_this->dif-1)/_this->nrm);
|
|
Packit |
664db3 |
return _ft-EC_MINI(s+1,_ft);
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
unsigned ec_decode_bin(ec_dec *_this,unsigned bits){
|
|
Packit |
664db3 |
unsigned s;
|
|
Packit |
664db3 |
ec_uint32 ft;
|
|
Packit |
664db3 |
ft = (ec_uint32)1<
|
|
Packit |
664db3 |
_this->nrm=_this->rng>>bits;
|
|
Packit |
664db3 |
s=(unsigned)((_this->dif-1)/_this->nrm);
|
|
Packit |
664db3 |
return ft-EC_MINI(s+1,ft);
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){
|
|
Packit |
664db3 |
ec_uint32 s;
|
|
Packit |
664db3 |
s=IMUL32(_this->nrm,(_ft-_fh));
|
|
Packit |
664db3 |
_this->dif-=s;
|
|
Packit |
664db3 |
_this->rng=_fl>0?IMUL32(_this->nrm,(_fh-_fl)):_this->rng-s;
|
|
Packit |
664db3 |
ec_dec_normalize(_this);
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
long ec_dec_tell(ec_dec *_this,int _b){
|
|
Packit |
664db3 |
ec_uint32 r;
|
|
Packit |
664db3 |
int l;
|
|
Packit |
664db3 |
long nbits;
|
|
Packit |
664db3 |
nbits=(ec_byte_bytes(_this->buf)-(EC_CODE_BITS+EC_SYM_BITS-1)/EC_SYM_BITS)*
|
|
Packit |
664db3 |
EC_SYM_BITS;
|
|
Packit |
664db3 |
/*To handle the non-integral number of bits still left in the encoder state,
|
|
Packit |
664db3 |
we compute the number of bits of low that must be encoded to ensure that
|
|
Packit |
664db3 |
the value is inside the range for any possible subsequent bits.
|
|
Packit |
664db3 |
Note that this is subtly different than the actual value we would end the
|
|
Packit |
664db3 |
stream with, which tries to make as many of the trailing bits zeros as
|
|
Packit |
664db3 |
possible.*/
|
|
Packit |
664db3 |
nbits+=EC_CODE_BITS;
|
|
Packit |
664db3 |
nbits<<=_b;
|
|
Packit |
664db3 |
l=EC_ILOG(_this->rng);
|
|
Packit |
664db3 |
r=_this->rng>>l-16;
|
|
Packit |
664db3 |
while(_b-->0){
|
|
Packit |
664db3 |
int b;
|
|
Packit |
664db3 |
r=r*r>>15;
|
|
Packit |
664db3 |
b=(int)(r>>16);
|
|
Packit |
664db3 |
l=l<<1|b;
|
|
Packit |
664db3 |
r>>=b;
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
return nbits-l;
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
|
|
Packit |
664db3 |
#if 0
|
|
Packit |
664db3 |
int ec_dec_done(ec_dec *_this){
|
|
Packit |
664db3 |
unsigned low;
|
|
Packit |
664db3 |
int ret;
|
|
Packit |
664db3 |
/*Check to make sure we've used all the input bytes.
|
|
Packit |
664db3 |
This ensures that no more ones would ever be inserted into the decoder.*/
|
|
Packit |
664db3 |
if(_this->buf->ptr-ec_byte_get_buffer(_this->buf)<=
|
|
Packit |
664db3 |
ec_byte_bytes(_this->buf)){
|
|
Packit |
664db3 |
return 0;
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
/*We compute the smallest finitely odd fraction that fits inside the current
|
|
Packit |
664db3 |
range, and write that to the stream.
|
|
Packit |
664db3 |
This is guaranteed to yield the smallest possible encoding.*/
|
|
Packit |
664db3 |
/*TODO: Fix this line, as it is wrong.
|
|
Packit |
664db3 |
It doesn't seem worth being able to make this check to do an extra
|
|
Packit |
664db3 |
subtraction for every symbol decoded.*/
|
|
Packit |
664db3 |
low=/*What we want: _this->top-_this->rng; What we have:*/_this->dif
|
|
Packit |
664db3 |
if(low){
|
|
Packit |
664db3 |
unsigned end;
|
|
Packit |
664db3 |
end=EC_CODE_TOP;
|
|
Packit |
664db3 |
/*Ensure that the next free end is in the range.*/
|
|
Packit |
664db3 |
if(end-low>=_this->rng){
|
|
Packit |
664db3 |
unsigned msk;
|
|
Packit |
664db3 |
msk=EC_CODE_TOP-1;
|
|
Packit |
664db3 |
do{
|
|
Packit |
664db3 |
msk>>=1;
|
|
Packit |
664db3 |
end=(low+msk)&~msk|msk+1;
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
while(end-low>=_this->rng);
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
/*The remaining input should have been the next free end.*/
|
|
Packit |
664db3 |
return end-low!=_this->dif;
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
return 1;
|
|
Packit |
664db3 |
}
|
|
Packit |
664db3 |
#endif
|