Blame libcelt/rangedec.c

Packit 664db3
#ifdef HAVE_CONFIG_H
Packit 664db3
#include "config.h"
Packit 664db3
#endif
Packit 664db3
Packit 664db3
#include "arch.h"
Packit 664db3
#include "entdec.h"
Packit 664db3
#include "mfrngcod.h"
Packit 664db3
Packit 664db3
Packit 664db3
Packit 664db3
/*A range decoder.
Packit 664db3
  This is an entropy decoder based upon \cite{Mar79}, which is itself a
Packit 664db3
   rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
Packit 664db3
  It is very similar to arithmetic encoding, except that encoding is done with
Packit 664db3
   digits in any base, instead of with bits, and so it is faster when using
Packit 664db3
   larger bases (i.e.: a byte).
Packit 664db3
  The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
Packit 664db3
   is the base, longer than the theoretical optimum, but to my knowledge there
Packit 664db3
   is no published justification for this claim.
Packit 664db3
  This only seems true when using near-infinite precision arithmetic so that
Packit 664db3
   the process is carried out with no rounding errors.
Packit 664db3
Packit 664db3
  IBM (the author's employer) never sought to patent the idea, and to my
Packit 664db3
   knowledge the algorithm is unencumbered by any patents, though its
Packit 664db3
   performance is very competitive with proprietary arithmetic coding.
Packit 664db3
  The two are based on very similar ideas, however.
Packit 664db3
  An excellent description of implementation details is available at
Packit 664db3
   http://www.arturocampos.com/ac_range.html
Packit 664db3
  A recent work \cite{MNW98} which proposes several changes to arithmetic
Packit 664db3
   encoding for efficiency actually re-discovers many of the principles
Packit 664db3
   behind range encoding, and presents a good theoretical analysis of them.
Packit 664db3
Packit 664db3
  This coder handles the end of the stream in a slightly more graceful fashion
Packit 664db3
   than most arithmetic or range coders.
Packit 664db3
  Once the final symbol has been encoded, the coder selects the code word with
Packit 664db3
   the shortest number of bits that still falls within the final interval.
Packit 664db3
  This method is not novel.
Packit 664db3
  Here, by the length of the code word, we refer to the number of bits until
Packit 664db3
   its final 1.
Packit 664db3
  Any trailing zeros may be discarded, since the encoder, once it runs out of
Packit 664db3
   input, will pad its buffer with zeros.
Packit 664db3
Packit 664db3
  But this means that no encoded stream would ever have any zero bytes at the
Packit 664db3
   end.
Packit 664db3
  Since there are some coded representations we cannot produce, it implies that
Packit 664db3
   there is still some redundancy in the stream.
Packit 664db3
  In this case, we can pick a special byte value, RSV1, and should the stream
Packit 664db3
   end in a sequence of zeros, followed by the RSV1 byte, we can code the
Packit 664db3
   zeros, and discard the RSV1 byte.
Packit 664db3
  The decoder, knowing that the encoder would never produce a sequence of zeros
Packit 664db3
   at the end, would then know to add in the RSV1 byte if it observed it.
Packit 664db3
Packit 664db3
  Now, the encoder would never produce a stream that ended in a sequence of
Packit 664db3
   zeros followed by a RSV1 byte.
Packit 664db3
  So, if the stream ends in a non-empty sequence of zeros, followed by any
Packit 664db3
   positive number of RSV1 bytes, the last RSV1 byte is discarded.
Packit 664db3
  The decoder, if it encounters a stream that ends in non-empty sequence of
Packit 664db3
   zeros followed by any non-negative number of RSV1 bytes, adds an additional
Packit 664db3
   RSV1 byte to the stream.
Packit 664db3
  With this strategy, every possible sequence of input bytes is transformed to
Packit 664db3
   one that could actually be produced by the encoder.
Packit 664db3
Packit 664db3
  The only question is what non-zero value to use for RSV1.
Packit 664db3
  We select 0x80, since it has the nice property of producing the shortest
Packit 664db3
   possible byte streams when using our strategy for selecting a number within
Packit 664db3
   the final interval to encode.
Packit 664db3
  Clearly if the shortest possible code word that falls within the interval has
Packit 664db3
   its last one bit as the most significant bit of the final byte, and the
Packit 664db3
   previous bytes were a non-empty sequence of zeros followed by a non-negative
Packit 664db3
   number of 0x80 bytes, then the last byte would be discarded.
Packit 664db3
  If the shortest code word is not so formed, then no other code word in the
Packit 664db3
   interval would result in any more bytes being discarded.
Packit 664db3
  Any longer code word would have an additional one bit somewhere, and so would
Packit 664db3
   require at a minimum that that byte would be coded.
Packit 664db3
  If the shortest code word has a 1 before the final one that is preventing the
Packit 664db3
   stream from ending in a non-empty sequence of zeros followed by a
Packit 664db3
   non-negative number of 0x80's, then there is no code word of the same length
Packit 664db3
   which contains that bit as a zero.
Packit 664db3
  If there were, then we could simply leave that bit a 1, and drop all the bits
Packit 664db3
   after it without leaving the interval, thus producing a shorter code word.
Packit 664db3
Packit 664db3
  In this case, RSV1 can only drop 1 bit off the final stream.
Packit 664db3
  Other choices could lead to savings of up to 8 bits for particular streams,
Packit 664db3
   but this would produce the odd situation that a stream with more non-zero
Packit 664db3
   bits is actually encoded in fewer bytes.
Packit 664db3
Packit 664db3
  @PHDTHESIS{Pas76,
Packit 664db3
    author="Richard Clark Pasco",
Packit 664db3
    title="Source coding algorithms for fast data compression",
Packit 664db3
    school="Dept. of Electrical Engineering, Stanford University",
Packit 664db3
    address="Stanford, CA",
Packit 664db3
    month=May,
Packit 664db3
    year=1976
Packit 664db3
  }
Packit 664db3
  @INPROCEEDINGS{Mar79,
Packit 664db3
   author="Martin, G.N.N.",
Packit 664db3
   title="Range encoding: an algorithm for removing redundancy from a digitised
Packit 664db3
    message",
Packit 664db3
   booktitle="Video & Data Recording Conference",
Packit 664db3
   year=1979,
Packit 664db3
   address="Southampton",
Packit 664db3
   month=Jul
Packit 664db3
  }
Packit 664db3
  @ARTICLE{MNW98,
Packit 664db3
   author="Alistair Moffat and Radford Neal and Ian H. Witten",
Packit 664db3
   title="Arithmetic Coding Revisited",
Packit 664db3
   journal="{ACM} Transactions on Information Systems",
Packit 664db3
   year=1998,
Packit 664db3
   volume=16,
Packit 664db3
   number=3,
Packit 664db3
   pages="256--294",
Packit 664db3
   month=Jul,
Packit 664db3
   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
Packit 664db3
  }*/
Packit 664db3
Packit 664db3
Packit 664db3
/*Gets the next byte of input.
Packit 664db3
  After all the bytes in the current packet have been consumed, and the extra
Packit 664db3
   end code returned if needed, this function will continue to return zero each
Packit 664db3
   time it is called.
Packit 664db3
  Return: The next byte of input.*/
Packit 664db3
static int ec_dec_in(ec_dec *_this){
Packit 664db3
  int ret;
Packit 664db3
  ret=ec_byte_read1(_this->buf);
Packit 664db3
  if(ret<0){
Packit 664db3
    ret=0;
Packit 664db3
    /*Needed to keep oc_dec_tell() operating correctly.*/
Packit 664db3
    ec_byte_adv1(_this->buf);
Packit 664db3
  }
Packit 664db3
  return ret;
Packit 664db3
}
Packit 664db3
Packit 664db3
/*Normalizes the contents of dif and rng so that rng lies entirely in the
Packit 664db3
   high-order symbol.*/
Packit 664db3
static inline void ec_dec_normalize(ec_dec *_this){
Packit 664db3
  /*If the range is too small, rescale it and input some bits.*/
Packit 664db3
  while(_this->rng<=EC_CODE_BOT){
Packit 664db3
    int sym;
Packit 664db3
    _this->rng<<=EC_SYM_BITS;
Packit 664db3
    /*Use up the remaining bits from our last symbol.*/
Packit 664db3
    sym=_this->rem<
Packit 664db3
    /*Read the next value from the input.*/
Packit 664db3
    _this->rem=ec_dec_in(_this);
Packit 664db3
    /*Take the rest of the bits we need from this new symbol.*/
Packit 664db3
    sym|=_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA;
Packit 664db3
    _this->dif=(_this->dif<
Packit 664db3
    /*dif can never be larger than EC_CODE_TOP.
Packit 664db3
      This is equivalent to the slightly more readable:
Packit 664db3
      if(_this->dif>EC_CODE_TOP)_this->dif-=EC_CODE_TOP;*/
Packit 664db3
    _this->dif^=_this->dif&_this->dif-1&EC_CODE_TOP;
Packit 664db3
  }
Packit 664db3
}
Packit 664db3
Packit 664db3
void ec_dec_init(ec_dec *_this,ec_byte_buffer *_buf){
Packit 664db3
  _this->buf=_buf;
Packit 664db3
  _this->rem=ec_dec_in(_this);
Packit 664db3
  _this->rng=1U<
Packit 664db3
  _this->dif=_this->rng-(_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA);
Packit 664db3
  /*Normalize the interval.*/
Packit 664db3
  ec_dec_normalize(_this);
Packit 664db3
}
Packit 664db3
Packit 664db3
Packit 664db3
unsigned ec_decode(ec_dec *_this,unsigned _ft){
Packit 664db3
  unsigned s;
Packit 664db3
  _this->nrm=_this->rng/_ft;
Packit 664db3
  s=(unsigned)((_this->dif-1)/_this->nrm);
Packit 664db3
  return _ft-EC_MINI(s+1,_ft);
Packit 664db3
}
Packit 664db3
Packit 664db3
unsigned ec_decode_bin(ec_dec *_this,unsigned bits){
Packit 664db3
   unsigned s;
Packit 664db3
   ec_uint32 ft;
Packit 664db3
   ft = (ec_uint32)1<
Packit 664db3
   _this->nrm=_this->rng>>bits;
Packit 664db3
   s=(unsigned)((_this->dif-1)/_this->nrm);
Packit 664db3
   return ft-EC_MINI(s+1,ft);
Packit 664db3
}
Packit 664db3
Packit 664db3
void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){
Packit 664db3
  ec_uint32 s;
Packit 664db3
  s=IMUL32(_this->nrm,(_ft-_fh));
Packit 664db3
  _this->dif-=s;
Packit 664db3
  _this->rng=_fl>0?IMUL32(_this->nrm,(_fh-_fl)):_this->rng-s;
Packit 664db3
  ec_dec_normalize(_this);
Packit 664db3
}
Packit 664db3
Packit 664db3
long ec_dec_tell(ec_dec *_this,int _b){
Packit 664db3
  ec_uint32 r;
Packit 664db3
  int       l;
Packit 664db3
  long      nbits;
Packit 664db3
  nbits=(ec_byte_bytes(_this->buf)-(EC_CODE_BITS+EC_SYM_BITS-1)/EC_SYM_BITS)*
Packit 664db3
   EC_SYM_BITS;
Packit 664db3
  /*To handle the non-integral number of bits still left in the encoder state,
Packit 664db3
     we compute the number of bits of low that must be encoded to ensure that
Packit 664db3
     the value is inside the range for any possible subsequent bits.
Packit 664db3
    Note that this is subtly different than the actual value we would end the
Packit 664db3
     stream with, which tries to make as many of the trailing bits zeros as
Packit 664db3
     possible.*/
Packit 664db3
  nbits+=EC_CODE_BITS;
Packit 664db3
  nbits<<=_b;
Packit 664db3
  l=EC_ILOG(_this->rng);
Packit 664db3
  r=_this->rng>>l-16;
Packit 664db3
  while(_b-->0){
Packit 664db3
    int b;
Packit 664db3
    r=r*r>>15;
Packit 664db3
    b=(int)(r>>16);
Packit 664db3
    l=l<<1|b;
Packit 664db3
    r>>=b;
Packit 664db3
  }
Packit 664db3
  return nbits-l;
Packit 664db3
}
Packit 664db3
Packit 664db3
#if 0
Packit 664db3
int ec_dec_done(ec_dec *_this){
Packit 664db3
  unsigned low;
Packit 664db3
  int      ret;
Packit 664db3
  /*Check to make sure we've used all the input bytes.
Packit 664db3
    This ensures that no more ones would ever be inserted into the decoder.*/
Packit 664db3
  if(_this->buf->ptr-ec_byte_get_buffer(_this->buf)<=
Packit 664db3
   ec_byte_bytes(_this->buf)){
Packit 664db3
    return 0;
Packit 664db3
  }
Packit 664db3
  /*We compute the smallest finitely odd fraction that fits inside the current
Packit 664db3
     range, and write that to the stream.
Packit 664db3
    This is guaranteed to yield the smallest possible encoding.*/
Packit 664db3
  /*TODO: Fix this line, as it is wrong.
Packit 664db3
    It doesn't seem worth being able to make this check to do an extra
Packit 664db3
     subtraction for every symbol decoded.*/
Packit 664db3
  low=/*What we want: _this->top-_this->rng; What we have:*/_this->dif
Packit 664db3
  if(low){
Packit 664db3
    unsigned end;
Packit 664db3
    end=EC_CODE_TOP;
Packit 664db3
    /*Ensure that the next free end is in the range.*/
Packit 664db3
    if(end-low>=_this->rng){
Packit 664db3
      unsigned msk;
Packit 664db3
      msk=EC_CODE_TOP-1;
Packit 664db3
      do{
Packit 664db3
        msk>>=1;
Packit 664db3
        end=(low+msk)&~msk|msk+1;
Packit 664db3
      }
Packit 664db3
      while(end-low>=_this->rng);
Packit 664db3
    }
Packit 664db3
    /*The remaining input should have been the next free end.*/
Packit 664db3
    return end-low!=_this->dif;
Packit 664db3
  }
Packit 664db3
  return 1;
Packit 664db3
}
Packit 664db3
#endif