Blame src/enc/quant_enc.c

Packit 9c6abc
// Copyright 2011 Google Inc. All Rights Reserved.
Packit 9c6abc
//
Packit 9c6abc
// Use of this source code is governed by a BSD-style license
Packit 9c6abc
// that can be found in the COPYING file in the root of the source
Packit 9c6abc
// tree. An additional intellectual property rights grant can be found
Packit 9c6abc
// in the file PATENTS. All contributing project authors may
Packit 9c6abc
// be found in the AUTHORS file in the root of the source tree.
Packit 9c6abc
// -----------------------------------------------------------------------------
Packit 9c6abc
//
Packit 9c6abc
//   Quantization
Packit 9c6abc
//
Packit 9c6abc
// Author: Skal (pascal.massimino@gmail.com)
Packit 9c6abc
Packit 9c6abc
#include <assert.h>
Packit 9c6abc
#include <math.h>
Packit 9c6abc
#include <stdlib.h>  // for abs()
Packit 9c6abc
Packit 9c6abc
#include "src/enc/vp8i_enc.h"
Packit 9c6abc
#include "src/enc/cost_enc.h"
Packit 9c6abc
Packit 9c6abc
#define DO_TRELLIS_I4  1
Packit 9c6abc
#define DO_TRELLIS_I16 1   // not a huge gain, but ok at low bitrate.
Packit 9c6abc
#define DO_TRELLIS_UV  0   // disable trellis for UV. Risky. Not worth.
Packit 9c6abc
#define USE_TDISTO 1
Packit 9c6abc
Packit 9c6abc
#define MID_ALPHA 64      // neutral value for susceptibility
Packit 9c6abc
#define MIN_ALPHA 30      // lowest usable value for susceptibility
Packit 9c6abc
#define MAX_ALPHA 100     // higher meaningful value for susceptibility
Packit 9c6abc
Packit 9c6abc
#define SNS_TO_DQ 0.9     // Scaling constant between the sns value and the QP
Packit 9c6abc
                          // power-law modulation. Must be strictly less than 1.
Packit 9c6abc
Packit 9c6abc
// number of non-zero coeffs below which we consider the block very flat
Packit 9c6abc
// (and apply a penalty to complex predictions)
Packit 9c6abc
#define FLATNESS_LIMIT_I16 10      // I16 mode
Packit 9c6abc
#define FLATNESS_LIMIT_I4  3       // I4 mode
Packit 9c6abc
#define FLATNESS_LIMIT_UV  2       // UV mode
Packit 9c6abc
#define FLATNESS_PENALTY   140     // roughly ~1bit per block
Packit 9c6abc
Packit 9c6abc
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
Packit 9c6abc
Packit 9c6abc
#define RD_DISTO_MULT      256  // distortion multiplier (equivalent of lambda)
Packit 9c6abc
Packit 9c6abc
// #define DEBUG_BLOCK
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
Packit 9c6abc
#if defined(DEBUG_BLOCK)
Packit 9c6abc
Packit 9c6abc
#include <stdio.h>
Packit 9c6abc
#include <stdlib.h>
Packit 9c6abc
Packit 9c6abc
static void PrintBlockInfo(const VP8EncIterator* const it,
Packit 9c6abc
                           const VP8ModeScore* const rd) {
Packit 9c6abc
  int i, j;
Packit 9c6abc
  const int is_i16 = (it->mb_->type_ == 1);
Packit 9c6abc
  const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
Packit 9c6abc
  const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
Packit 9c6abc
  const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
Packit 9c6abc
  const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
Packit 9c6abc
  printf("SOURCE / OUTPUT / ABS DELTA\n");
Packit 9c6abc
  for (j = 0; j < 16; ++j) {
Packit 9c6abc
    for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
Packit 9c6abc
    printf("     ");
Packit 9c6abc
    for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);
Packit 9c6abc
    printf("     ");
Packit 9c6abc
    for (i = 0; i < 16; ++i) {
Packit 9c6abc
      printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));
Packit 9c6abc
    }
Packit 9c6abc
    printf("\n");
Packit 9c6abc
  }
Packit 9c6abc
  printf("\n");   // newline before the U/V block
Packit 9c6abc
  for (j = 0; j < 8; ++j) {
Packit 9c6abc
    for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);
Packit 9c6abc
    printf(" ");
Packit 9c6abc
    for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);
Packit 9c6abc
    printf("    ");
Packit 9c6abc
    for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);
Packit 9c6abc
    printf(" ");
Packit 9c6abc
    for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);
Packit 9c6abc
    printf("   ");
Packit 9c6abc
    for (i = 0; i < 8; ++i) {
Packit 9c6abc
      printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
Packit 9c6abc
    }
Packit 9c6abc
    printf(" ");
Packit 9c6abc
    for (i = 8; i < 16; ++i) {
Packit 9c6abc
      printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
Packit 9c6abc
    }
Packit 9c6abc
    printf("\n");
Packit 9c6abc
  }
Packit 9c6abc
  printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
Packit 9c6abc
    (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
Packit 9c6abc
    (int)rd->score);
Packit 9c6abc
  if (is_i16) {
Packit 9c6abc
    printf("Mode: %d\n", rd->mode_i16);
Packit 9c6abc
    printf("y_dc_levels:");
Packit 9c6abc
    for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
Packit 9c6abc
    printf("\n");
Packit 9c6abc
  } else {
Packit 9c6abc
    printf("Modes[16]: ");
Packit 9c6abc
    for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
Packit 9c6abc
    printf("\n");
Packit 9c6abc
  }
Packit 9c6abc
  printf("y_ac_levels:\n");
Packit 9c6abc
  for (j = 0; j < 16; ++j) {
Packit 9c6abc
    for (i = is_i16 ? 1 : 0; i < 16; ++i) {
Packit 9c6abc
      printf("%4d ", rd->y_ac_levels[j][i]);
Packit 9c6abc
    }
Packit 9c6abc
    printf("\n");
Packit 9c6abc
  }
Packit 9c6abc
  printf("\n");
Packit 9c6abc
  printf("uv_levels (mode=%d):\n", rd->mode_uv);
Packit 9c6abc
  for (j = 0; j < 8; ++j) {
Packit 9c6abc
    for (i = 0; i < 16; ++i) {
Packit 9c6abc
      printf("%4d ", rd->uv_levels[j][i]);
Packit 9c6abc
    }
Packit 9c6abc
    printf("\n");
Packit 9c6abc
  }
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
#endif   // DEBUG_BLOCK
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
Packit 9c6abc
static WEBP_INLINE int clip(int v, int m, int M) {
Packit 9c6abc
  return v < m ? m : v > M ? M : v;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static const uint8_t kZigzag[16] = {
Packit 9c6abc
  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
static const uint8_t kDcTable[128] = {
Packit 9c6abc
  4,     5,   6,   7,   8,   9,  10,  10,
Packit 9c6abc
  11,   12,  13,  14,  15,  16,  17,  17,
Packit 9c6abc
  18,   19,  20,  20,  21,  21,  22,  22,
Packit 9c6abc
  23,   23,  24,  25,  25,  26,  27,  28,
Packit 9c6abc
  29,   30,  31,  32,  33,  34,  35,  36,
Packit 9c6abc
  37,   37,  38,  39,  40,  41,  42,  43,
Packit 9c6abc
  44,   45,  46,  46,  47,  48,  49,  50,
Packit 9c6abc
  51,   52,  53,  54,  55,  56,  57,  58,
Packit 9c6abc
  59,   60,  61,  62,  63,  64,  65,  66,
Packit 9c6abc
  67,   68,  69,  70,  71,  72,  73,  74,
Packit 9c6abc
  75,   76,  76,  77,  78,  79,  80,  81,
Packit 9c6abc
  82,   83,  84,  85,  86,  87,  88,  89,
Packit 9c6abc
  91,   93,  95,  96,  98, 100, 101, 102,
Packit 9c6abc
  104, 106, 108, 110, 112, 114, 116, 118,
Packit 9c6abc
  122, 124, 126, 128, 130, 132, 134, 136,
Packit 9c6abc
  138, 140, 143, 145, 148, 151, 154, 157
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
static const uint16_t kAcTable[128] = {
Packit 9c6abc
  4,     5,   6,   7,   8,   9,  10,  11,
Packit 9c6abc
  12,   13,  14,  15,  16,  17,  18,  19,
Packit 9c6abc
  20,   21,  22,  23,  24,  25,  26,  27,
Packit 9c6abc
  28,   29,  30,  31,  32,  33,  34,  35,
Packit 9c6abc
  36,   37,  38,  39,  40,  41,  42,  43,
Packit 9c6abc
  44,   45,  46,  47,  48,  49,  50,  51,
Packit 9c6abc
  52,   53,  54,  55,  56,  57,  58,  60,
Packit 9c6abc
  62,   64,  66,  68,  70,  72,  74,  76,
Packit 9c6abc
  78,   80,  82,  84,  86,  88,  90,  92,
Packit 9c6abc
  94,   96,  98, 100, 102, 104, 106, 108,
Packit 9c6abc
  110, 112, 114, 116, 119, 122, 125, 128,
Packit 9c6abc
  131, 134, 137, 140, 143, 146, 149, 152,
Packit 9c6abc
  155, 158, 161, 164, 167, 170, 173, 177,
Packit 9c6abc
  181, 185, 189, 193, 197, 201, 205, 209,
Packit 9c6abc
  213, 217, 221, 225, 229, 234, 239, 245,
Packit 9c6abc
  249, 254, 259, 264, 269, 274, 279, 284
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
static const uint16_t kAcTable2[128] = {
Packit 9c6abc
  8,     8,   9,  10,  12,  13,  15,  17,
Packit 9c6abc
  18,   20,  21,  23,  24,  26,  27,  29,
Packit 9c6abc
  31,   32,  34,  35,  37,  38,  40,  41,
Packit 9c6abc
  43,   44,  46,  48,  49,  51,  52,  54,
Packit 9c6abc
  55,   57,  58,  60,  62,  63,  65,  66,
Packit 9c6abc
  68,   69,  71,  72,  74,  75,  77,  79,
Packit 9c6abc
  80,   82,  83,  85,  86,  88,  89,  93,
Packit 9c6abc
  96,   99, 102, 105, 108, 111, 114, 117,
Packit 9c6abc
  120, 124, 127, 130, 133, 136, 139, 142,
Packit 9c6abc
  145, 148, 151, 155, 158, 161, 164, 167,
Packit 9c6abc
  170, 173, 176, 179, 184, 189, 193, 198,
Packit 9c6abc
  203, 207, 212, 217, 221, 226, 230, 235,
Packit 9c6abc
  240, 244, 249, 254, 258, 263, 268, 274,
Packit 9c6abc
  280, 286, 292, 299, 305, 311, 317, 323,
Packit 9c6abc
  330, 336, 342, 348, 354, 362, 370, 379,
Packit 9c6abc
  385, 393, 401, 409, 416, 424, 432, 440
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
static const uint8_t kBiasMatrices[3][2] = {  // [luma-ac,luma-dc,chroma][dc,ac]
Packit 9c6abc
  { 96, 110 }, { 96, 108 }, { 110, 115 }
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
// Sharpening by (slightly) raising the hi-frequency coeffs.
Packit 9c6abc
// Hack-ish but helpful for mid-bitrate range. Use with care.
Packit 9c6abc
#define SHARPEN_BITS 11  // number of descaling bits for sharpening bias
Packit 9c6abc
static const uint8_t kFreqSharpening[16] = {
Packit 9c6abc
  0,  30, 60, 90,
Packit 9c6abc
  30, 60, 90, 90,
Packit 9c6abc
  60, 90, 90, 90,
Packit 9c6abc
  90, 90, 90, 90
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Initialize quantization parameters in VP8Matrix
Packit 9c6abc
Packit 9c6abc
// Returns the average quantizer
Packit 9c6abc
static int ExpandMatrix(VP8Matrix* const m, int type) {
Packit 9c6abc
  int i, sum;
Packit 9c6abc
  for (i = 0; i < 2; ++i) {
Packit 9c6abc
    const int is_ac_coeff = (i > 0);
Packit 9c6abc
    const int bias = kBiasMatrices[type][is_ac_coeff];
Packit 9c6abc
    m->iq_[i] = (1 << QFIX) / m->q_[i];
Packit 9c6abc
    m->bias_[i] = BIAS(bias);
Packit 9c6abc
    // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
Packit 9c6abc
    //   * zero if coeff <= zthresh
Packit 9c6abc
    //   * non-zero if coeff > zthresh
Packit 9c6abc
    m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
Packit 9c6abc
  }
Packit 9c6abc
  for (i = 2; i < 16; ++i) {
Packit 9c6abc
    m->q_[i] = m->q_[1];
Packit 9c6abc
    m->iq_[i] = m->iq_[1];
Packit 9c6abc
    m->bias_[i] = m->bias_[1];
Packit 9c6abc
    m->zthresh_[i] = m->zthresh_[1];
Packit 9c6abc
  }
Packit 9c6abc
  for (sum = 0, i = 0; i < 16; ++i) {
Packit 9c6abc
    if (type == 0) {  // we only use sharpening for AC luma coeffs
Packit 9c6abc
      m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
Packit 9c6abc
    } else {
Packit 9c6abc
      m->sharpen_[i] = 0;
Packit 9c6abc
    }
Packit 9c6abc
    sum += m->q_[i];
Packit 9c6abc
  }
Packit 9c6abc
  return (sum + 8) >> 4;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; }
Packit 9c6abc
Packit 9c6abc
static void SetupMatrices(VP8Encoder* enc) {
Packit 9c6abc
  int i;
Packit 9c6abc
  const int tlambda_scale =
Packit 9c6abc
    (enc->method_ >= 4) ? enc->config_->sns_strength
Packit 9c6abc
                        : 0;
Packit 9c6abc
  const int num_segments = enc->segment_hdr_.num_segments_;
Packit 9c6abc
  for (i = 0; i < num_segments; ++i) {
Packit 9c6abc
    VP8SegmentInfo* const m = &enc->dqm_[i];
Packit 9c6abc
    const int q = m->quant_;
Packit 9c6abc
    int q_i4, q_i16, q_uv;
Packit 9c6abc
    m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
Packit 9c6abc
    m->y1_.q_[1] = kAcTable[clip(q,                  0, 127)];
Packit 9c6abc
Packit 9c6abc
    m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
Packit 9c6abc
    m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
Packit 9c6abc
Packit 9c6abc
    m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
Packit 9c6abc
    m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
Packit 9c6abc
Packit 9c6abc
    q_i4  = ExpandMatrix(&m->y1_, 0);
Packit 9c6abc
    q_i16 = ExpandMatrix(&m->y2_, 1);
Packit 9c6abc
    q_uv  = ExpandMatrix(&m->uv_, 2);
Packit 9c6abc
Packit 9c6abc
    m->lambda_i4_          = (3 * q_i4 * q_i4) >> 7;
Packit 9c6abc
    m->lambda_i16_         = (3 * q_i16 * q_i16);
Packit 9c6abc
    m->lambda_uv_          = (3 * q_uv * q_uv) >> 6;
Packit 9c6abc
    m->lambda_mode_        = (1 * q_i4 * q_i4) >> 7;
Packit 9c6abc
    m->lambda_trellis_i4_  = (7 * q_i4 * q_i4) >> 3;
Packit 9c6abc
    m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2;
Packit 9c6abc
    m->lambda_trellis_uv_  = (q_uv * q_uv) << 1;
Packit 9c6abc
    m->tlambda_            = (tlambda_scale * q_i4) >> 5;
Packit 9c6abc
Packit 9c6abc
    // none of these constants should be < 1
Packit 9c6abc
    CheckLambdaValue(&m->lambda_i4_);
Packit 9c6abc
    CheckLambdaValue(&m->lambda_i16_);
Packit 9c6abc
    CheckLambdaValue(&m->lambda_uv_);
Packit 9c6abc
    CheckLambdaValue(&m->lambda_mode_);
Packit 9c6abc
    CheckLambdaValue(&m->lambda_trellis_i4_);
Packit 9c6abc
    CheckLambdaValue(&m->lambda_trellis_i16_);
Packit 9c6abc
    CheckLambdaValue(&m->lambda_trellis_uv_);
Packit 9c6abc
    CheckLambdaValue(&m->tlambda_);
Packit 9c6abc
Packit 9c6abc
    m->min_disto_ = 20 * m->y1_.q_[0];   // quantization-aware min disto
Packit 9c6abc
    m->max_edge_  = 0;
Packit 9c6abc
Packit 9c6abc
    m->i4_penalty_ = 1000 * q_i4 * q_i4;
Packit 9c6abc
  }
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Initialize filtering parameters
Packit 9c6abc
Packit 9c6abc
// Very small filter-strength values have close to no visual effect. So we can
Packit 9c6abc
// save a little decoding-CPU by turning filtering off for these.
Packit 9c6abc
#define FSTRENGTH_CUTOFF 2
Packit 9c6abc
Packit 9c6abc
static void SetupFilterStrength(VP8Encoder* const enc) {
Packit 9c6abc
  int i;
Packit 9c6abc
  // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
Packit 9c6abc
  const int level0 = 5 * enc->config_->filter_strength;
Packit 9c6abc
  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
Packit 9c6abc
    VP8SegmentInfo* const m = &enc->dqm_[i];
Packit 9c6abc
    // We focus on the quantization of AC coeffs.
Packit 9c6abc
    const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
Packit 9c6abc
    const int base_strength =
Packit 9c6abc
        VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
Packit 9c6abc
    // Segments with lower complexity ('beta') will be less filtered.
Packit 9c6abc
    const int f = base_strength * level0 / (256 + m->beta_);
Packit 9c6abc
    m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
Packit 9c6abc
  }
Packit 9c6abc
  // We record the initial strength (mainly for the case of 1-segment only).
Packit 9c6abc
  enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
Packit 9c6abc
  enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
Packit 9c6abc
  enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
Packit 9c6abc
// Note: if you change the values below, remember that the max range
Packit 9c6abc
// allowed by the syntax for DQ_UV is [-16,16].
Packit 9c6abc
#define MAX_DQ_UV (6)
Packit 9c6abc
#define MIN_DQ_UV (-4)
Packit 9c6abc
Packit 9c6abc
// We want to emulate jpeg-like behaviour where the expected "good" quality
Packit 9c6abc
// is around q=75. Internally, our "good" middle is around c=50. So we
Packit 9c6abc
// map accordingly using linear piece-wise function
Packit 9c6abc
static double QualityToCompression(double c) {
Packit 9c6abc
  const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
Packit 9c6abc
  // The file size roughly scales as pow(quantizer, 3.). Actually, the
Packit 9c6abc
  // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
Packit 9c6abc
  // in the mid-quant range. So we scale the compressibility inversely to
Packit 9c6abc
  // this power-law: quant ~= compression ^ 1/3. This law holds well for
Packit 9c6abc
  // low quant. Finer modeling for high-quant would make use of kAcTable[]
Packit 9c6abc
  // more explicitly.
Packit 9c6abc
  const double v = pow(linear_c, 1 / 3.);
Packit 9c6abc
  return v;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static double QualityToJPEGCompression(double c, double alpha) {
Packit 9c6abc
  // We map the complexity 'alpha' and quality setting 'c' to a compression
Packit 9c6abc
  // exponent empirically matched to the compression curve of libjpeg6b.
Packit 9c6abc
  // On average, the WebP output size will be roughly similar to that of a
Packit 9c6abc
  // JPEG file compressed with same quality factor.
Packit 9c6abc
  const double amin = 0.30;
Packit 9c6abc
  const double amax = 0.85;
Packit 9c6abc
  const double exp_min = 0.4;
Packit 9c6abc
  const double exp_max = 0.9;
Packit 9c6abc
  const double slope = (exp_min - exp_max) / (amax - amin);
Packit 9c6abc
  // Linearly interpolate 'expn' from exp_min to exp_max
Packit 9c6abc
  // in the [amin, amax] range.
Packit 9c6abc
  const double expn = (alpha > amax) ? exp_min
Packit 9c6abc
                    : (alpha < amin) ? exp_max
Packit 9c6abc
                    : exp_max + slope * (alpha - amin);
Packit 9c6abc
  const double v = pow(c, expn);
Packit 9c6abc
  return v;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
Packit 9c6abc
                                 const VP8SegmentInfo* const S2) {
Packit 9c6abc
  return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void SimplifySegments(VP8Encoder* const enc) {
Packit 9c6abc
  int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
Packit 9c6abc
  // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
Packit 9c6abc
  // explicit check is needed to avoid a spurious warning about 'i' exceeding
Packit 9c6abc
  // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9).
Packit 9c6abc
  const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS)
Packit 9c6abc
                               ? enc->segment_hdr_.num_segments_
Packit 9c6abc
                               : NUM_MB_SEGMENTS;
Packit 9c6abc
  int num_final_segments = 1;
Packit 9c6abc
  int s1, s2;
Packit 9c6abc
  for (s1 = 1; s1 < num_segments; ++s1) {    // find similar segments
Packit 9c6abc
    const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
Packit 9c6abc
    int found = 0;
Packit 9c6abc
    // check if we already have similar segment
Packit 9c6abc
    for (s2 = 0; s2 < num_final_segments; ++s2) {
Packit 9c6abc
      const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
Packit 9c6abc
      if (SegmentsAreEquivalent(S1, S2)) {
Packit 9c6abc
        found = 1;
Packit 9c6abc
        break;
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
    map[s1] = s2;
Packit 9c6abc
    if (!found) {
Packit 9c6abc
      if (num_final_segments != s1) {
Packit 9c6abc
        enc->dqm_[num_final_segments] = enc->dqm_[s1];
Packit 9c6abc
      }
Packit 9c6abc
      ++num_final_segments;
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
  if (num_final_segments < num_segments) {  // Remap
Packit 9c6abc
    int i = enc->mb_w_ * enc->mb_h_;
Packit 9c6abc
    while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
Packit 9c6abc
    enc->segment_hdr_.num_segments_ = num_final_segments;
Packit 9c6abc
    // Replicate the trailing segment infos (it's mostly cosmetics)
Packit 9c6abc
    for (i = num_final_segments; i < num_segments; ++i) {
Packit 9c6abc
      enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
Packit 9c6abc
  int i;
Packit 9c6abc
  int dq_uv_ac, dq_uv_dc;
Packit 9c6abc
  const int num_segments = enc->segment_hdr_.num_segments_;
Packit 9c6abc
  const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
Packit 9c6abc
  const double Q = quality / 100.;
Packit 9c6abc
  const double c_base = enc->config_->emulate_jpeg_size ?
Packit 9c6abc
      QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
Packit 9c6abc
      QualityToCompression(Q);
Packit 9c6abc
  for (i = 0; i < num_segments; ++i) {
Packit 9c6abc
    // We modulate the base coefficient to accommodate for the quantization
Packit 9c6abc
    // susceptibility and allow denser segments to be quantized more.
Packit 9c6abc
    const double expn = 1. - amp * enc->dqm_[i].alpha_;
Packit 9c6abc
    const double c = pow(c_base, expn);
Packit 9c6abc
    const int q = (int)(127. * (1. - c));
Packit 9c6abc
    assert(expn > 0.);
Packit 9c6abc
    enc->dqm_[i].quant_ = clip(q, 0, 127);
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  // purely indicative in the bitstream (except for the 1-segment case)
Packit 9c6abc
  enc->base_quant_ = enc->dqm_[0].quant_;
Packit 9c6abc
Packit 9c6abc
  // fill-in values for the unused segments (required by the syntax)
Packit 9c6abc
  for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
Packit 9c6abc
    enc->dqm_[i].quant_ = enc->base_quant_;
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  // uv_alpha_ is normally spread around ~60. The useful range is
Packit 9c6abc
  // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
Packit 9c6abc
  // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
Packit 9c6abc
  dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
Packit 9c6abc
                                          / (MAX_ALPHA - MIN_ALPHA);
Packit 9c6abc
  // we rescale by the user-defined strength of adaptation
Packit 9c6abc
  dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
Packit 9c6abc
  // and make it safe.
Packit 9c6abc
  dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
Packit 9c6abc
  // We also boost the dc-uv-quant a little, based on sns-strength, since
Packit 9c6abc
  // U/V channels are quite more reactive to high quants (flat DC-blocks
Packit 9c6abc
  // tend to appear, and are unpleasant).
Packit 9c6abc
  dq_uv_dc = -4 * enc->config_->sns_strength / 100;
Packit 9c6abc
  dq_uv_dc = clip(dq_uv_dc, -15, 15);   // 4bit-signed max allowed
Packit 9c6abc
Packit 9c6abc
  enc->dq_y1_dc_ = 0;       // TODO(skal): dq-lum
Packit 9c6abc
  enc->dq_y2_dc_ = 0;
Packit 9c6abc
  enc->dq_y2_ac_ = 0;
Packit 9c6abc
  enc->dq_uv_dc_ = dq_uv_dc;
Packit 9c6abc
  enc->dq_uv_ac_ = dq_uv_ac;
Packit 9c6abc
Packit 9c6abc
  SetupFilterStrength(enc);   // initialize segments' filtering, eventually
Packit 9c6abc
Packit 9c6abc
  if (num_segments > 1) SimplifySegments(enc);
Packit 9c6abc
Packit 9c6abc
  SetupMatrices(enc);         // finalize quantization matrices
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Form the predictions in cache
Packit 9c6abc
Packit 9c6abc
// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
Packit 9c6abc
const uint16_t VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
Packit 9c6abc
const uint16_t VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
Packit 9c6abc
Packit 9c6abc
// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
Packit 9c6abc
const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
Packit 9c6abc
  I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
Packit 9c6abc
  const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
Packit 9c6abc
  const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
Packit 9c6abc
  VP8EncPredLuma16(it->yuv_p_, left, top);
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
Packit 9c6abc
  const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
Packit 9c6abc
  const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
Packit 9c6abc
  VP8EncPredChroma8(it->yuv_p_, left, top);
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
Packit 9c6abc
  VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Quantize
Packit 9c6abc
Packit 9c6abc
// Layout:
Packit 9c6abc
// +----+----+
Packit 9c6abc
// |YYYY|UUVV| 0
Packit 9c6abc
// |YYYY|UUVV| 4
Packit 9c6abc
// |YYYY|....| 8
Packit 9c6abc
// |YYYY|....| 12
Packit 9c6abc
// +----+----+
Packit 9c6abc
Packit 9c6abc
const uint16_t VP8Scan[16] = {  // Luma
Packit 9c6abc
  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
Packit 9c6abc
  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
Packit 9c6abc
  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
Packit 9c6abc
  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
static const uint16_t VP8ScanUV[4 + 4] = {
Packit 9c6abc
  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
Packit 9c6abc
  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Distortion measurement
Packit 9c6abc
Packit 9c6abc
static const uint16_t kWeightY[16] = {
Packit 9c6abc
  38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
static const uint16_t kWeightTrellis[16] = {
Packit 9c6abc
#if USE_TDISTO == 0
Packit 9c6abc
  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
Packit 9c6abc
#else
Packit 9c6abc
  30, 27, 19, 11,
Packit 9c6abc
  27, 24, 17, 10,
Packit 9c6abc
  19, 17, 12,  8,
Packit 9c6abc
  11, 10,  8,  6
Packit 9c6abc
#endif
Packit 9c6abc
};
Packit 9c6abc
Packit 9c6abc
// Init/Copy the common fields in score.
Packit 9c6abc
static void InitScore(VP8ModeScore* const rd) {
Packit 9c6abc
  rd->D  = 0;
Packit 9c6abc
  rd->SD = 0;
Packit 9c6abc
  rd->R  = 0;
Packit 9c6abc
  rd->H  = 0;
Packit 9c6abc
  rd->nz = 0;
Packit 9c6abc
  rd->score = MAX_COST;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
Packit 9c6abc
  dst->D  = src->D;
Packit 9c6abc
  dst->SD = src->SD;
Packit 9c6abc
  dst->R  = src->R;
Packit 9c6abc
  dst->H  = src->H;
Packit 9c6abc
  dst->nz = src->nz;      // note that nz is not accumulated, but just copied.
Packit 9c6abc
  dst->score = src->score;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
Packit 9c6abc
  dst->D  += src->D;
Packit 9c6abc
  dst->SD += src->SD;
Packit 9c6abc
  dst->R  += src->R;
Packit 9c6abc
  dst->H  += src->H;
Packit 9c6abc
  dst->nz |= src->nz;     // here, new nz bits are accumulated.
Packit 9c6abc
  dst->score += src->score;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Performs trellis-optimized quantization.
Packit 9c6abc
Packit 9c6abc
// Trellis node
Packit 9c6abc
typedef struct {
Packit 9c6abc
  int8_t prev;            // best previous node
Packit 9c6abc
  int8_t sign;            // sign of coeff_i
Packit 9c6abc
  int16_t level;          // level
Packit 9c6abc
} Node;
Packit 9c6abc
Packit 9c6abc
// Score state
Packit 9c6abc
typedef struct {
Packit 9c6abc
  score_t score;          // partial RD score
Packit 9c6abc
  const uint16_t* costs;  // shortcut to cost tables
Packit 9c6abc
} ScoreState;
Packit 9c6abc
Packit 9c6abc
// If a coefficient was quantized to a value Q (using a neutral bias),
Packit 9c6abc
// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
Packit 9c6abc
// We don't test negative values though.
Packit 9c6abc
#define MIN_DELTA 0   // how much lower level to try
Packit 9c6abc
#define MAX_DELTA 1   // how much higher
Packit 9c6abc
#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
Packit 9c6abc
#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
Packit 9c6abc
#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
Packit 9c6abc
Packit 9c6abc
static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
Packit 9c6abc
  rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
Packit 9c6abc
                                          score_t distortion) {
Packit 9c6abc
  return rate * lambda + RD_DISTO_MULT * distortion;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static int TrellisQuantizeBlock(const VP8Encoder* const enc,
Packit 9c6abc
                                int16_t in[16], int16_t out[16],
Packit 9c6abc
                                int ctx0, int coeff_type,
Packit 9c6abc
                                const VP8Matrix* const mtx,
Packit 9c6abc
                                int lambda) {
Packit 9c6abc
  const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
Packit 9c6abc
  CostArrayPtr const costs =
Packit 9c6abc
      (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
Packit 9c6abc
  const int first = (coeff_type == 0) ? 1 : 0;
Packit 9c6abc
  Node nodes[16][NUM_NODES];
Packit 9c6abc
  ScoreState score_states[2][NUM_NODES];
Packit 9c6abc
  ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
Packit 9c6abc
  ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
Packit 9c6abc
  int best_path[3] = {-1, -1, -1};   // store best-last/best-level/best-previous
Packit 9c6abc
  score_t best_score;
Packit 9c6abc
  int n, m, p, last;
Packit 9c6abc
Packit 9c6abc
  {
Packit 9c6abc
    score_t cost;
Packit 9c6abc
    const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
Packit 9c6abc
    const int last_proba = probas[VP8EncBands[first]][ctx0][0];
Packit 9c6abc
Packit 9c6abc
    // compute the position of the last interesting coefficient
Packit 9c6abc
    last = first - 1;
Packit 9c6abc
    for (n = 15; n >= first; --n) {
Packit 9c6abc
      const int j = kZigzag[n];
Packit 9c6abc
      const int err = in[j] * in[j];
Packit 9c6abc
      if (err > thresh) {
Packit 9c6abc
        last = n;
Packit 9c6abc
        break;
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
    // we don't need to go inspect up to n = 16 coeffs. We can just go up
Packit 9c6abc
    // to last + 1 (inclusive) without losing much.
Packit 9c6abc
    if (last < 15) ++last;
Packit 9c6abc
Packit 9c6abc
    // compute 'skip' score. This is the max score one can do.
Packit 9c6abc
    cost = VP8BitCost(0, last_proba);
Packit 9c6abc
    best_score = RDScoreTrellis(lambda, cost, 0);
Packit 9c6abc
Packit 9c6abc
    // initialize source node.
Packit 9c6abc
    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
Packit 9c6abc
      const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
Packit 9c6abc
      ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
Packit 9c6abc
      ss_cur[m].costs = costs[first][ctx0];
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  // traverse trellis.
Packit 9c6abc
  for (n = first; n <= last; ++n) {
Packit 9c6abc
    const int j = kZigzag[n];
Packit 9c6abc
    const uint32_t Q  = mtx->q_[j];
Packit 9c6abc
    const uint32_t iQ = mtx->iq_[j];
Packit 9c6abc
    const uint32_t B = BIAS(0x00);     // neutral bias
Packit 9c6abc
    // note: it's important to take sign of the _original_ coeff,
Packit 9c6abc
    // so we don't have to consider level < 0 afterward.
Packit 9c6abc
    const int sign = (in[j] < 0);
Packit 9c6abc
    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
Packit 9c6abc
    int level0 = QUANTDIV(coeff0, iQ, B);
Packit 9c6abc
    int thresh_level = QUANTDIV(coeff0, iQ, BIAS(0x80));
Packit 9c6abc
    if (thresh_level > MAX_LEVEL) thresh_level = MAX_LEVEL;
Packit 9c6abc
    if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
Packit 9c6abc
Packit 9c6abc
    {   // Swap current and previous score states
Packit 9c6abc
      ScoreState* const tmp = ss_cur;
Packit 9c6abc
      ss_cur = ss_prev;
Packit 9c6abc
      ss_prev = tmp;
Packit 9c6abc
    }
Packit 9c6abc
Packit 9c6abc
    // test all alternate level values around level0.
Packit 9c6abc
    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
Packit 9c6abc
      Node* const cur = &NODE(n, m);
Packit 9c6abc
      int level = level0 + m;
Packit 9c6abc
      const int ctx = (level > 2) ? 2 : level;
Packit 9c6abc
      const int band = VP8EncBands[n + 1];
Packit 9c6abc
      score_t base_score;
Packit 9c6abc
      score_t best_cur_score = MAX_COST;
Packit 9c6abc
      int best_prev = 0;   // default, in case
Packit 9c6abc
Packit 9c6abc
      ss_cur[m].score = MAX_COST;
Packit 9c6abc
      ss_cur[m].costs = costs[n + 1][ctx];
Packit 9c6abc
      if (level < 0 || level > thresh_level) {
Packit 9c6abc
        // Node is dead.
Packit 9c6abc
        continue;
Packit 9c6abc
      }
Packit 9c6abc
Packit 9c6abc
      {
Packit 9c6abc
        // Compute delta_error = how much coding this level will
Packit 9c6abc
        // subtract to max_error as distortion.
Packit 9c6abc
        // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
Packit 9c6abc
        const int new_error = coeff0 - level * Q;
Packit 9c6abc
        const int delta_error =
Packit 9c6abc
            kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
Packit 9c6abc
        base_score = RDScoreTrellis(lambda, 0, delta_error);
Packit 9c6abc
      }
Packit 9c6abc
Packit 9c6abc
      // Inspect all possible non-dead predecessors. Retain only the best one.
Packit 9c6abc
      for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
Packit 9c6abc
        // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
Packit 9c6abc
        // eliminated since their score can't be better than the current best.
Packit 9c6abc
        const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
Packit 9c6abc
        // Examine node assuming it's a non-terminal one.
Packit 9c6abc
        const score_t score =
Packit 9c6abc
            base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
Packit 9c6abc
        if (score < best_cur_score) {
Packit 9c6abc
          best_cur_score = score;
Packit 9c6abc
          best_prev = p;
Packit 9c6abc
        }
Packit 9c6abc
      }
Packit 9c6abc
      // Store best finding in current node.
Packit 9c6abc
      cur->sign = sign;
Packit 9c6abc
      cur->level = level;
Packit 9c6abc
      cur->prev = best_prev;
Packit 9c6abc
      ss_cur[m].score = best_cur_score;
Packit 9c6abc
Packit 9c6abc
      // Now, record best terminal node (and thus best entry in the graph).
Packit 9c6abc
      if (level != 0) {
Packit 9c6abc
        const score_t last_pos_cost =
Packit 9c6abc
            (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
Packit 9c6abc
        const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
Packit 9c6abc
        const score_t score = best_cur_score + last_pos_score;
Packit 9c6abc
        if (score < best_score) {
Packit 9c6abc
          best_score = score;
Packit 9c6abc
          best_path[0] = n;                     // best eob position
Packit 9c6abc
          best_path[1] = m;                     // best node index
Packit 9c6abc
          best_path[2] = best_prev;             // best predecessor
Packit 9c6abc
        }
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  // Fresh start
Packit 9c6abc
  memset(in + first, 0, (16 - first) * sizeof(*in));
Packit 9c6abc
  memset(out + first, 0, (16 - first) * sizeof(*out));
Packit 9c6abc
  if (best_path[0] == -1) {
Packit 9c6abc
    return 0;   // skip!
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  {
Packit 9c6abc
    // Unwind the best path.
Packit 9c6abc
    // Note: best-prev on terminal node is not necessarily equal to the
Packit 9c6abc
    // best_prev for non-terminal. So we patch best_path[2] in.
Packit 9c6abc
    int nz = 0;
Packit 9c6abc
    int best_node = best_path[1];
Packit 9c6abc
    n = best_path[0];
Packit 9c6abc
    NODE(n, best_node).prev = best_path[2];   // force best-prev for terminal
Packit 9c6abc
Packit 9c6abc
    for (; n >= first; --n) {
Packit 9c6abc
      const Node* const node = &NODE(n, best_node);
Packit 9c6abc
      const int j = kZigzag[n];
Packit 9c6abc
      out[n] = node->sign ? -node->level : node->level;
Packit 9c6abc
      nz |= node->level;
Packit 9c6abc
      in[j] = out[n] * mtx->q_[j];
Packit 9c6abc
      best_node = node->prev;
Packit 9c6abc
    }
Packit 9c6abc
    return (nz != 0);
Packit 9c6abc
  }
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
#undef NODE
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Performs: difference, transform, quantize, back-transform, add
Packit 9c6abc
// all at once. Output is the reconstructed block in *yuv_out, and the
Packit 9c6abc
// quantized levels in *levels.
Packit 9c6abc
Packit 9c6abc
static int ReconstructIntra16(VP8EncIterator* const it,
Packit 9c6abc
                              VP8ModeScore* const rd,
Packit 9c6abc
                              uint8_t* const yuv_out,
Packit 9c6abc
                              int mode) {
Packit 9c6abc
  const VP8Encoder* const enc = it->enc_;
Packit 9c6abc
  const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
Packit 9c6abc
  const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
Packit 9c6abc
  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
Packit 9c6abc
  int nz = 0;
Packit 9c6abc
  int n;
Packit 9c6abc
  int16_t tmp[16][16], dc_tmp[16];
Packit 9c6abc
Packit 9c6abc
  for (n = 0; n < 16; n += 2) {
Packit 9c6abc
    VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
Packit 9c6abc
  }
Packit 9c6abc
  VP8FTransformWHT(tmp[0], dc_tmp);
Packit 9c6abc
  nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
Packit 9c6abc
Packit 9c6abc
  if (DO_TRELLIS_I16 && it->do_trellis_) {
Packit 9c6abc
    int x, y;
Packit 9c6abc
    VP8IteratorNzToBytes(it);
Packit 9c6abc
    for (y = 0, n = 0; y < 4; ++y) {
Packit 9c6abc
      for (x = 0; x < 4; ++x, ++n) {
Packit 9c6abc
        const int ctx = it->top_nz_[x] + it->left_nz_[y];
Packit 9c6abc
        const int non_zero =
Packit 9c6abc
            TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
Packit 9c6abc
                                 &dqm->y1_, dqm->lambda_trellis_i16_);
Packit 9c6abc
        it->top_nz_[x] = it->left_nz_[y] = non_zero;
Packit 9c6abc
        rd->y_ac_levels[n][0] = 0;
Packit 9c6abc
        nz |= non_zero << n;
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
  } else {
Packit 9c6abc
    for (n = 0; n < 16; n += 2) {
Packit 9c6abc
      // Zero-out the first coeff, so that: a) nz is correct below, and
Packit 9c6abc
      // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
Packit 9c6abc
      tmp[n][0] = tmp[n + 1][0] = 0;
Packit 9c6abc
      nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
Packit 9c6abc
      assert(rd->y_ac_levels[n + 0][0] == 0);
Packit 9c6abc
      assert(rd->y_ac_levels[n + 1][0] == 0);
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  // Transform back
Packit 9c6abc
  VP8TransformWHT(dc_tmp, tmp[0]);
Packit 9c6abc
  for (n = 0; n < 16; n += 2) {
Packit 9c6abc
    VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  return nz;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static int ReconstructIntra4(VP8EncIterator* const it,
Packit 9c6abc
                             int16_t levels[16],
Packit 9c6abc
                             const uint8_t* const src,
Packit 9c6abc
                             uint8_t* const yuv_out,
Packit 9c6abc
                             int mode) {
Packit 9c6abc
  const VP8Encoder* const enc = it->enc_;
Packit 9c6abc
  const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
Packit 9c6abc
  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
Packit 9c6abc
  int nz = 0;
Packit 9c6abc
  int16_t tmp[16];
Packit 9c6abc
Packit 9c6abc
  VP8FTransform(src, ref, tmp);
Packit 9c6abc
  if (DO_TRELLIS_I4 && it->do_trellis_) {
Packit 9c6abc
    const int x = it->i4_ & 3, y = it->i4_ >> 2;
Packit 9c6abc
    const int ctx = it->top_nz_[x] + it->left_nz_[y];
Packit 9c6abc
    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
Packit 9c6abc
                              dqm->lambda_trellis_i4_);
Packit 9c6abc
  } else {
Packit 9c6abc
    nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
Packit 9c6abc
  }
Packit 9c6abc
  VP8ITransform(ref, tmp, yuv_out, 0);
Packit 9c6abc
  return nz;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// DC-error diffusion
Packit 9c6abc
Packit 9c6abc
// Diffusion weights. We under-correct a bit (15/16th of the error is actually
Packit 9c6abc
// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0.
Packit 9c6abc
#define C1 7    // fraction of error sent to the 4x4 block below
Packit 9c6abc
#define C2 8    // fraction of error sent to the 4x4 block on the right
Packit 9c6abc
#define DSHIFT 4
Packit 9c6abc
#define DSCALE 1   // storage descaling, needed to make the error fit int8_t
Packit 9c6abc
Packit 9c6abc
// Quantize as usual, but also compute and return the quantization error.
Packit 9c6abc
// Error is already divided by DSHIFT.
Packit 9c6abc
static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
Packit 9c6abc
  int V = *v;
Packit 9c6abc
  const int sign = (V < 0);
Packit 9c6abc
  if (sign) V = -V;
Packit 9c6abc
  if (V > (int)mtx->zthresh_[0]) {
Packit 9c6abc
    const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0];
Packit 9c6abc
    const int err = (V - qV);
Packit 9c6abc
    *v = sign ? -qV : qV;
Packit 9c6abc
    return (sign ? -err : err) >> DSCALE;
Packit 9c6abc
  }
Packit 9c6abc
  *v = 0;
Packit 9c6abc
  return (sign ? -V : V) >> DSCALE;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void CorrectDCValues(const VP8EncIterator* const it,
Packit 9c6abc
                            const VP8Matrix* const mtx,
Packit 9c6abc
                            int16_t tmp[][16], VP8ModeScore* const rd) {
Packit 9c6abc
  //         | top[0] | top[1]
Packit 9c6abc
  // --------+--------+---------
Packit 9c6abc
  // left[0] | tmp[0]   tmp[1]  <->   err0 err1
Packit 9c6abc
  // left[1] | tmp[2]   tmp[3]        err2 err3
Packit 9c6abc
  //
Packit 9c6abc
  // Final errors {err1,err2,err3} are preserved and later restored
Packit 9c6abc
  // as top[]/left[] on the next block.
Packit 9c6abc
  int ch;
Packit 9c6abc
  for (ch = 0; ch <= 1; ++ch) {
Packit 9c6abc
    const int8_t* const top = it->top_derr_[it->x_][ch];
Packit 9c6abc
    const int8_t* const left = it->left_derr_[ch];
Packit 9c6abc
    int16_t (* const c)[16] = &tmp[ch * 4];
Packit 9c6abc
    int err0, err1, err2, err3;
Packit 9c6abc
    c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE);
Packit 9c6abc
    err0 = QuantizeSingle(&c[0][0], mtx);
Packit 9c6abc
    c[1][0] += (C1 * top[1] + C2 * err0) >> (DSHIFT - DSCALE);
Packit 9c6abc
    err1 = QuantizeSingle(&c[1][0], mtx);
Packit 9c6abc
    c[2][0] += (C1 * err0 + C2 * left[1]) >> (DSHIFT - DSCALE);
Packit 9c6abc
    err2 = QuantizeSingle(&c[2][0], mtx);
Packit 9c6abc
    c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE);
Packit 9c6abc
    err3 = QuantizeSingle(&c[3][0], mtx);
Packit 9c6abc
    // error 'err' is bounded by mtx->q_[0] which is 132 at max. Hence
Packit 9c6abc
    // err >> DSCALE will fit in an int8_t type if DSCALE>=1.
Packit 9c6abc
    assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127);
Packit 9c6abc
    rd->derr[ch][0] = (int8_t)err1;
Packit 9c6abc
    rd->derr[ch][1] = (int8_t)err2;
Packit 9c6abc
    rd->derr[ch][2] = (int8_t)err3;
Packit 9c6abc
  }
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void StoreDiffusionErrors(VP8EncIterator* const it,
Packit 9c6abc
                                 const VP8ModeScore* const rd) {
Packit 9c6abc
  int ch;
Packit 9c6abc
  for (ch = 0; ch <= 1; ++ch) {
Packit 9c6abc
    int8_t* const top = it->top_derr_[it->x_][ch];
Packit 9c6abc
    int8_t* const left = it->left_derr_[ch];
Packit 9c6abc
    left[0] = rd->derr[ch][0];            // restore err1
Packit 9c6abc
    left[1] = 3 * rd->derr[ch][2] >> 2;   //     ... 3/4th of err3
Packit 9c6abc
    top[0]  = rd->derr[ch][1];            //     ... err2
Packit 9c6abc
    top[1]  = rd->derr[ch][2] - left[1];  //     ... 1/4th of err3.
Packit 9c6abc
  }
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
#undef C1
Packit 9c6abc
#undef C2
Packit 9c6abc
#undef DSHIFT
Packit 9c6abc
#undef DSCALE
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
Packit 9c6abc
static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
Packit 9c6abc
                         uint8_t* const yuv_out, int mode) {
Packit 9c6abc
  const VP8Encoder* const enc = it->enc_;
Packit 9c6abc
  const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
Packit 9c6abc
  const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
Packit 9c6abc
  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
Packit 9c6abc
  int nz = 0;
Packit 9c6abc
  int n;
Packit 9c6abc
  int16_t tmp[8][16];
Packit 9c6abc
Packit 9c6abc
  for (n = 0; n < 8; n += 2) {
Packit 9c6abc
    VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
Packit 9c6abc
  }
Packit 9c6abc
  if (it->top_derr_ != NULL) CorrectDCValues(it, &dqm->uv_, tmp, rd);
Packit 9c6abc
Packit 9c6abc
  if (DO_TRELLIS_UV && it->do_trellis_) {
Packit 9c6abc
    int ch, x, y;
Packit 9c6abc
    for (ch = 0, n = 0; ch <= 2; ch += 2) {
Packit 9c6abc
      for (y = 0; y < 2; ++y) {
Packit 9c6abc
        for (x = 0; x < 2; ++x, ++n) {
Packit 9c6abc
          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
Packit 9c6abc
          const int non_zero =
Packit 9c6abc
              TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
Packit 9c6abc
                                   &dqm->uv_, dqm->lambda_trellis_uv_);
Packit 9c6abc
          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
Packit 9c6abc
          nz |= non_zero << n;
Packit 9c6abc
        }
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
  } else {
Packit 9c6abc
    for (n = 0; n < 8; n += 2) {
Packit 9c6abc
      nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  for (n = 0; n < 8; n += 2) {
Packit 9c6abc
    VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
Packit 9c6abc
  }
Packit 9c6abc
  return (nz << 16);
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
Packit 9c6abc
// Pick the mode is lower RD-cost = Rate + lambda * Distortion.
Packit 9c6abc
Packit 9c6abc
static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
Packit 9c6abc
  // We look at the first three AC coefficients to determine what is the average
Packit 9c6abc
  // delta between each sub-4x4 block.
Packit 9c6abc
  const int v0 = abs(DCs[1]);
Packit 9c6abc
  const int v1 = abs(DCs[2]);
Packit 9c6abc
  const int v2 = abs(DCs[4]);
Packit 9c6abc
  int max_v = (v1 > v0) ? v1 : v0;
Packit 9c6abc
  max_v = (v2 > max_v) ? v2 : max_v;
Packit 9c6abc
  if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
Packit 9c6abc
  VP8ModeScore* const tmp = *a;
Packit 9c6abc
  *a = *b;
Packit 9c6abc
  *b = tmp;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void SwapPtr(uint8_t** a, uint8_t** b) {
Packit 9c6abc
  uint8_t* const tmp = *a;
Packit 9c6abc
  *a = *b;
Packit 9c6abc
  *b = tmp;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void SwapOut(VP8EncIterator* const it) {
Packit 9c6abc
  SwapPtr(&it->yuv_out_, &it->yuv_out2_);
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
Packit 9c6abc
  score_t score = 0;
Packit 9c6abc
  while (num_blocks-- > 0) {      // TODO(skal): refine positional scoring?
Packit 9c6abc
    int i;
Packit 9c6abc
    for (i = 1; i < 16; ++i) {    // omit DC, we're only interested in AC
Packit 9c6abc
      score += (levels[i] != 0);
Packit 9c6abc
      if (score > thresh) return 0;
Packit 9c6abc
    }
Packit 9c6abc
    levels += 16;
Packit 9c6abc
  }
Packit 9c6abc
  return 1;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
Packit 9c6abc
  const int kNumBlocks = 16;
Packit 9c6abc
  VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
Packit 9c6abc
  const int lambda = dqm->lambda_i16_;
Packit 9c6abc
  const int tlambda = dqm->tlambda_;
Packit 9c6abc
  const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
Packit 9c6abc
  VP8ModeScore rd_tmp;
Packit 9c6abc
  VP8ModeScore* rd_cur = &rd_tmp;
Packit 9c6abc
  VP8ModeScore* rd_best = rd;
Packit 9c6abc
  int mode;
Packit 9c6abc
Packit 9c6abc
  rd->mode_i16 = -1;
Packit 9c6abc
  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
Packit 9c6abc
    uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC;  // scratch buffer
Packit 9c6abc
    rd_cur->mode_i16 = mode;
Packit 9c6abc
Packit 9c6abc
    // Reconstruct
Packit 9c6abc
    rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
Packit 9c6abc
Packit 9c6abc
    // Measure RD-score
Packit 9c6abc
    rd_cur->D = VP8SSE16x16(src, tmp_dst);
Packit 9c6abc
    rd_cur->SD =
Packit 9c6abc
        tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
Packit 9c6abc
    rd_cur->H = VP8FixedCostsI16[mode];
Packit 9c6abc
    rd_cur->R = VP8GetCostLuma16(it, rd_cur);
Packit 9c6abc
    if (mode > 0 &&
Packit 9c6abc
        IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
Packit 9c6abc
      // penalty to avoid flat area to be mispredicted by complex mode
Packit 9c6abc
      rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
Packit 9c6abc
    }
Packit 9c6abc
Packit 9c6abc
    // Since we always examine Intra16 first, we can overwrite *rd directly.
Packit 9c6abc
    SetRDScore(lambda, rd_cur);
Packit 9c6abc
    if (mode == 0 || rd_cur->score < rd_best->score) {
Packit 9c6abc
      SwapModeScore(&rd_cur, &rd_best);
Packit 9c6abc
      SwapOut(it);
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
  if (rd_best != rd) {
Packit 9c6abc
    memcpy(rd, rd_best, sizeof(*rd));
Packit 9c6abc
  }
Packit 9c6abc
  SetRDScore(dqm->lambda_mode_, rd);   // finalize score for mode decision.
Packit 9c6abc
  VP8SetIntra16Mode(it, rd->mode_i16);
Packit 9c6abc
Packit 9c6abc
  // we have a blocky macroblock (only DCs are non-zero) with fairly high
Packit 9c6abc
  // distortion, record max delta so we can later adjust the minimal filtering
Packit 9c6abc
  // strength needed to smooth these blocks out.
Packit 9c6abc
  if ((rd->nz & 0x100ffff) == 0x1000000 && rd->D > dqm->min_disto_) {
Packit 9c6abc
    StoreMaxDelta(dqm, rd->y_dc_levels);
Packit 9c6abc
  }
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
Packit 9c6abc
// return the cost array corresponding to the surrounding prediction modes.
Packit 9c6abc
static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
Packit 9c6abc
                                     const uint8_t modes[16]) {
Packit 9c6abc
  const int preds_w = it->enc_->preds_w_;
Packit 9c6abc
  const int x = (it->i4_ & 3), y = it->i4_ >> 2;
Packit 9c6abc
  const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
Packit 9c6abc
  const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
Packit 9c6abc
  return VP8FixedCostsI4[top][left];
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
Packit 9c6abc
  const VP8Encoder* const enc = it->enc_;
Packit 9c6abc
  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
Packit 9c6abc
  const int lambda = dqm->lambda_i4_;
Packit 9c6abc
  const int tlambda = dqm->tlambda_;
Packit 9c6abc
  const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
Packit 9c6abc
  uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
Packit 9c6abc
  int total_header_bits = 0;
Packit 9c6abc
  VP8ModeScore rd_best;
Packit 9c6abc
Packit 9c6abc
  if (enc->max_i4_header_bits_ == 0) {
Packit 9c6abc
    return 0;
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  InitScore(&rd_best);
Packit 9c6abc
  rd_best.H = 211;  // '211' is the value of VP8BitCost(0, 145)
Packit 9c6abc
  SetRDScore(dqm->lambda_mode_, &rd_best);
Packit 9c6abc
  VP8IteratorStartI4(it);
Packit 9c6abc
  do {
Packit 9c6abc
    const int kNumBlocks = 1;
Packit 9c6abc
    VP8ModeScore rd_i4;
Packit 9c6abc
    int mode;
Packit 9c6abc
    int best_mode = -1;
Packit 9c6abc
    const uint8_t* const src = src0 + VP8Scan[it->i4_];
Packit 9c6abc
    const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
Packit 9c6abc
    uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
Packit 9c6abc
    uint8_t* tmp_dst = it->yuv_p_ + I4TMP;    // scratch buffer.
Packit 9c6abc
Packit 9c6abc
    InitScore(&rd_i4);
Packit 9c6abc
    VP8MakeIntra4Preds(it);
Packit 9c6abc
    for (mode = 0; mode < NUM_BMODES; ++mode) {
Packit 9c6abc
      VP8ModeScore rd_tmp;
Packit 9c6abc
      int16_t tmp_levels[16];
Packit 9c6abc
Packit 9c6abc
      // Reconstruct
Packit 9c6abc
      rd_tmp.nz =
Packit 9c6abc
          ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
Packit 9c6abc
Packit 9c6abc
      // Compute RD-score
Packit 9c6abc
      rd_tmp.D = VP8SSE4x4(src, tmp_dst);
Packit 9c6abc
      rd_tmp.SD =
Packit 9c6abc
          tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
Packit 9c6abc
                  : 0;
Packit 9c6abc
      rd_tmp.H = mode_costs[mode];
Packit 9c6abc
Packit 9c6abc
      // Add flatness penalty
Packit 9c6abc
      if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
Packit 9c6abc
        rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
Packit 9c6abc
      } else {
Packit 9c6abc
        rd_tmp.R = 0;
Packit 9c6abc
      }
Packit 9c6abc
Packit 9c6abc
      // early-out check
Packit 9c6abc
      SetRDScore(lambda, &rd_tmp);
Packit 9c6abc
      if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
Packit 9c6abc
Packit 9c6abc
      // finish computing score
Packit 9c6abc
      rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
Packit 9c6abc
      SetRDScore(lambda, &rd_tmp);
Packit 9c6abc
Packit 9c6abc
      if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
Packit 9c6abc
        CopyScore(&rd_i4, &rd_tmp);
Packit 9c6abc
        best_mode = mode;
Packit 9c6abc
        SwapPtr(&tmp_dst, &best_block);
Packit 9c6abc
        memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
Packit 9c6abc
               sizeof(rd_best.y_ac_levels[it->i4_]));
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
    SetRDScore(dqm->lambda_mode_, &rd_i4);
Packit 9c6abc
    AddScore(&rd_best, &rd_i4);
Packit 9c6abc
    if (rd_best.score >= rd->score) {
Packit 9c6abc
      return 0;
Packit 9c6abc
    }
Packit 9c6abc
    total_header_bits += (int)rd_i4.H;   // <- equal to mode_costs[best_mode];
Packit 9c6abc
    if (total_header_bits > enc->max_i4_header_bits_) {
Packit 9c6abc
      return 0;
Packit 9c6abc
    }
Packit 9c6abc
    // Copy selected samples if not in the right place already.
Packit 9c6abc
    if (best_block != best_blocks + VP8Scan[it->i4_]) {
Packit 9c6abc
      VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
Packit 9c6abc
    }
Packit 9c6abc
    rd->modes_i4[it->i4_] = best_mode;
Packit 9c6abc
    it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
Packit 9c6abc
  } while (VP8IteratorRotateI4(it, best_blocks));
Packit 9c6abc
Packit 9c6abc
  // finalize state
Packit 9c6abc
  CopyScore(rd, &rd_best);
Packit 9c6abc
  VP8SetIntra4Mode(it, rd->modes_i4);
Packit 9c6abc
  SwapOut(it);
Packit 9c6abc
  memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
Packit 9c6abc
  return 1;   // select intra4x4 over intra16x16
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
Packit 9c6abc
static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
Packit 9c6abc
  const int kNumBlocks = 8;
Packit 9c6abc
  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
Packit 9c6abc
  const int lambda = dqm->lambda_uv_;
Packit 9c6abc
  const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
Packit 9c6abc
  uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC;  // scratch buffer
Packit 9c6abc
  uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
Packit 9c6abc
  uint8_t* dst = dst0;
Packit 9c6abc
  VP8ModeScore rd_best;
Packit 9c6abc
  int mode;
Packit 9c6abc
Packit 9c6abc
  rd->mode_uv = -1;
Packit 9c6abc
  InitScore(&rd_best);
Packit 9c6abc
  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
Packit 9c6abc
    VP8ModeScore rd_uv;
Packit 9c6abc
Packit 9c6abc
    // Reconstruct
Packit 9c6abc
    rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
Packit 9c6abc
Packit 9c6abc
    // Compute RD-score
Packit 9c6abc
    rd_uv.D  = VP8SSE16x8(src, tmp_dst);
Packit 9c6abc
    rd_uv.SD = 0;    // not calling TDisto here: it tends to flatten areas.
Packit 9c6abc
    rd_uv.H  = VP8FixedCostsUV[mode];
Packit 9c6abc
    rd_uv.R  = VP8GetCostUV(it, &rd_uv);
Packit 9c6abc
    if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
Packit 9c6abc
      rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
Packit 9c6abc
    }
Packit 9c6abc
Packit 9c6abc
    SetRDScore(lambda, &rd_uv);
Packit 9c6abc
    if (mode == 0 || rd_uv.score < rd_best.score) {
Packit 9c6abc
      CopyScore(&rd_best, &rd_uv);
Packit 9c6abc
      rd->mode_uv = mode;
Packit 9c6abc
      memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
Packit 9c6abc
      if (it->top_derr_ != NULL) {
Packit 9c6abc
        memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr));
Packit 9c6abc
      }
Packit 9c6abc
      SwapPtr(&dst, &tmp_dst);
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
  VP8SetIntraUVMode(it, rd->mode_uv);
Packit 9c6abc
  AddScore(rd, &rd_best);
Packit 9c6abc
  if (dst != dst0) {   // copy 16x8 block if needed
Packit 9c6abc
    VP8Copy16x8(dst, dst0);
Packit 9c6abc
  }
Packit 9c6abc
  if (it->top_derr_ != NULL) {  // store diffusion errors for next block
Packit 9c6abc
    StoreDiffusionErrors(it, rd);
Packit 9c6abc
  }
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Final reconstruction and quantization.
Packit 9c6abc
Packit 9c6abc
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
Packit 9c6abc
  const VP8Encoder* const enc = it->enc_;
Packit 9c6abc
  const int is_i16 = (it->mb_->type_ == 1);
Packit 9c6abc
  int nz = 0;
Packit 9c6abc
Packit 9c6abc
  if (is_i16) {
Packit 9c6abc
    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
Packit 9c6abc
  } else {
Packit 9c6abc
    VP8IteratorStartI4(it);
Packit 9c6abc
    do {
Packit 9c6abc
      const int mode =
Packit 9c6abc
          it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
Packit 9c6abc
      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
Packit 9c6abc
      uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
Packit 9c6abc
      VP8MakeIntra4Preds(it);
Packit 9c6abc
      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
Packit 9c6abc
                              src, dst, mode) << it->i4_;
Packit 9c6abc
    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
Packit 9c6abc
  rd->nz = nz;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
Packit 9c6abc
static void RefineUsingDistortion(VP8EncIterator* const it,
Packit 9c6abc
                                  int try_both_modes, int refine_uv_mode,
Packit 9c6abc
                                  VP8ModeScore* const rd) {
Packit 9c6abc
  score_t best_score = MAX_COST;
Packit 9c6abc
  int nz = 0;
Packit 9c6abc
  int mode;
Packit 9c6abc
  int is_i16 = try_both_modes || (it->mb_->type_ == 1);
Packit 9c6abc
Packit 9c6abc
  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
Packit 9c6abc
  // Some empiric constants, of approximate order of magnitude.
Packit 9c6abc
  const int lambda_d_i16 = 106;
Packit 9c6abc
  const int lambda_d_i4 = 11;
Packit 9c6abc
  const int lambda_d_uv = 120;
Packit 9c6abc
  score_t score_i4 = dqm->i4_penalty_;
Packit 9c6abc
  score_t i4_bit_sum = 0;
Packit 9c6abc
  const score_t bit_limit = try_both_modes ? it->enc_->mb_header_limit_
Packit 9c6abc
                                           : MAX_COST;  // no early-out allowed
Packit 9c6abc
Packit 9c6abc
  if (is_i16) {   // First, evaluate Intra16 distortion
Packit 9c6abc
    int best_mode = -1;
Packit 9c6abc
    const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
Packit 9c6abc
    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
Packit 9c6abc
      const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
Packit 9c6abc
      const score_t score = (score_t)VP8SSE16x16(src, ref) * RD_DISTO_MULT
Packit 9c6abc
                          + VP8FixedCostsI16[mode] * lambda_d_i16;
Packit 9c6abc
      if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
Packit 9c6abc
        continue;
Packit 9c6abc
      }
Packit 9c6abc
      if (score < best_score) {
Packit 9c6abc
        best_mode = mode;
Packit 9c6abc
        best_score = score;
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
    VP8SetIntra16Mode(it, best_mode);
Packit 9c6abc
    // we'll reconstruct later, if i16 mode actually gets selected
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  // Next, evaluate Intra4
Packit 9c6abc
  if (try_both_modes || !is_i16) {
Packit 9c6abc
    // We don't evaluate the rate here, but just account for it through a
Packit 9c6abc
    // constant penalty (i4 mode usually needs more bits compared to i16).
Packit 9c6abc
    is_i16 = 0;
Packit 9c6abc
    VP8IteratorStartI4(it);
Packit 9c6abc
    do {
Packit 9c6abc
      int best_i4_mode = -1;
Packit 9c6abc
      score_t best_i4_score = MAX_COST;
Packit 9c6abc
      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
Packit 9c6abc
      const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
Packit 9c6abc
Packit 9c6abc
      VP8MakeIntra4Preds(it);
Packit 9c6abc
      for (mode = 0; mode < NUM_BMODES; ++mode) {
Packit 9c6abc
        const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
Packit 9c6abc
        const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT
Packit 9c6abc
                            + mode_costs[mode] * lambda_d_i4;
Packit 9c6abc
        if (score < best_i4_score) {
Packit 9c6abc
          best_i4_mode = mode;
Packit 9c6abc
          best_i4_score = score;
Packit 9c6abc
        }
Packit 9c6abc
      }
Packit 9c6abc
      i4_bit_sum += mode_costs[best_i4_mode];
Packit 9c6abc
      rd->modes_i4[it->i4_] = best_i4_mode;
Packit 9c6abc
      score_i4 += best_i4_score;
Packit 9c6abc
      if (score_i4 >= best_score || i4_bit_sum > bit_limit) {
Packit 9c6abc
        // Intra4 won't be better than Intra16. Bail out and pick Intra16.
Packit 9c6abc
        is_i16 = 1;
Packit 9c6abc
        break;
Packit 9c6abc
      } else {  // reconstruct partial block inside yuv_out2_ buffer
Packit 9c6abc
        uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
Packit 9c6abc
        nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
Packit 9c6abc
                                src, tmp_dst, best_i4_mode) << it->i4_;
Packit 9c6abc
      }
Packit 9c6abc
    } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  // Final reconstruction, depending on which mode is selected.
Packit 9c6abc
  if (!is_i16) {
Packit 9c6abc
    VP8SetIntra4Mode(it, rd->modes_i4);
Packit 9c6abc
    SwapOut(it);
Packit 9c6abc
    best_score = score_i4;
Packit 9c6abc
  } else {
Packit 9c6abc
    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  // ... and UV!
Packit 9c6abc
  if (refine_uv_mode) {
Packit 9c6abc
    int best_mode = -1;
Packit 9c6abc
    score_t best_uv_score = MAX_COST;
Packit 9c6abc
    const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
Packit 9c6abc
    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
Packit 9c6abc
      const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
Packit 9c6abc
      const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT
Packit 9c6abc
                          + VP8FixedCostsUV[mode] * lambda_d_uv;
Packit 9c6abc
      if (score < best_uv_score) {
Packit 9c6abc
        best_mode = mode;
Packit 9c6abc
        best_uv_score = score;
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
    VP8SetIntraUVMode(it, best_mode);
Packit 9c6abc
  }
Packit 9c6abc
  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
Packit 9c6abc
Packit 9c6abc
  rd->nz = nz;
Packit 9c6abc
  rd->score = best_score;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// Entry point
Packit 9c6abc
Packit 9c6abc
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
Packit 9c6abc
                VP8RDLevel rd_opt) {
Packit 9c6abc
  int is_skipped;
Packit 9c6abc
  const int method = it->enc_->method_;
Packit 9c6abc
Packit 9c6abc
  InitScore(rd);
Packit 9c6abc
Packit 9c6abc
  // We can perform predictions for Luma16x16 and Chroma8x8 already.
Packit 9c6abc
  // Luma4x4 predictions needs to be done as-we-go.
Packit 9c6abc
  VP8MakeLuma16Preds(it);
Packit 9c6abc
  VP8MakeChroma8Preds(it);
Packit 9c6abc
Packit 9c6abc
  if (rd_opt > RD_OPT_NONE) {
Packit 9c6abc
    it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
Packit 9c6abc
    PickBestIntra16(it, rd);
Packit 9c6abc
    if (method >= 2) {
Packit 9c6abc
      PickBestIntra4(it, rd);
Packit 9c6abc
    }
Packit 9c6abc
    PickBestUV(it, rd);
Packit 9c6abc
    if (rd_opt == RD_OPT_TRELLIS) {   // finish off with trellis-optim now
Packit 9c6abc
      it->do_trellis_ = 1;
Packit 9c6abc
      SimpleQuantize(it, rd);
Packit 9c6abc
    }
Packit 9c6abc
  } else {
Packit 9c6abc
    // At this point we have heuristically decided intra16 / intra4.
Packit 9c6abc
    // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).
Packit 9c6abc
    // For method <= 1, we don't re-examine the decision but just go ahead with
Packit 9c6abc
    // quantization/reconstruction.
Packit 9c6abc
    RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);
Packit 9c6abc
  }
Packit 9c6abc
  is_skipped = (rd->nz == 0);
Packit 9c6abc
  VP8SetSkip(it, is_skipped);
Packit 9c6abc
  return is_skipped;
Packit 9c6abc
}