Blob Blame History Raw
/* Copyright 2013 Google Inc. All Rights Reserved.

   Distributed under MIT license.
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/

/* Block split point selection utilities. */

#include "./block_splitter.h"

#include <string.h>  /* memcpy, memset */

#include "../common/platform.h"
#include "./bit_cost.h"
#include "./cluster.h"
#include "./command.h"
#include "./fast_log.h"
#include "./histogram.h"
#include "./memory.h"
#include "./quality.h"

#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif

static const size_t kMaxLiteralHistograms = 100;
static const size_t kMaxCommandHistograms = 50;
static const double kLiteralBlockSwitchCost = 28.1;
static const double kCommandBlockSwitchCost = 13.5;
static const double kDistanceBlockSwitchCost = 14.6;
static const size_t kLiteralStrideLength = 70;
static const size_t kCommandStrideLength = 40;
static const size_t kSymbolsPerLiteralHistogram = 544;
static const size_t kSymbolsPerCommandHistogram = 530;
static const size_t kSymbolsPerDistanceHistogram = 544;
static const size_t kMinLengthForBlockSplitting = 128;
static const size_t kIterMulForRefining = 2;
static const size_t kMinItersForRefining = 100;

static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
  /* Count how many we have. */
  size_t total_length = 0;
  size_t i;
  for (i = 0; i < num_commands; ++i) {
    total_length += cmds[i].insert_len_;
  }
  return total_length;
}

static void CopyLiteralsToByteArray(const Command* cmds,
                                    const size_t num_commands,
                                    const uint8_t* data,
                                    const size_t offset,
                                    const size_t mask,
                                    uint8_t* literals) {
  size_t pos = 0;
  size_t from_pos = offset & mask;
  size_t i;
  for (i = 0; i < num_commands; ++i) {
    size_t insert_len = cmds[i].insert_len_;
    if (from_pos + insert_len > mask) {
      size_t head_size = mask + 1 - from_pos;
      memcpy(literals + pos, data + from_pos, head_size);
      from_pos = 0;
      pos += head_size;
      insert_len -= head_size;
    }
    if (insert_len > 0) {
      memcpy(literals + pos, data + from_pos, insert_len);
      pos += insert_len;
    }
    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
  }
}

static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
  *seed *= 16807U;
  return *seed;
}

static BROTLI_INLINE double BitCost(size_t count) {
  return count == 0 ? -2.0 : FastLog2(count);
}

#define HISTOGRAMS_PER_BATCH 64
#define CLUSTERS_PER_BATCH 16

#define FN(X) X ## Literal
#define DataType uint8_t
/* NOLINTNEXTLINE(build/include) */
#include "./block_splitter_inc.h"
#undef DataType
#undef FN

#define FN(X) X ## Command
#define DataType uint16_t
/* NOLINTNEXTLINE(build/include) */
#include "./block_splitter_inc.h"
#undef FN

#define FN(X) X ## Distance
/* NOLINTNEXTLINE(build/include) */
#include "./block_splitter_inc.h"
#undef DataType
#undef FN

void BrotliInitBlockSplit(BlockSplit* self) {
  self->num_types = 0;
  self->num_blocks = 0;
  self->types = 0;
  self->lengths = 0;
  self->types_alloc_size = 0;
  self->lengths_alloc_size = 0;
}

void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
  BROTLI_FREE(m, self->types);
  BROTLI_FREE(m, self->lengths);
}

void BrotliSplitBlock(MemoryManager* m,
                      const Command* cmds,
                      const size_t num_commands,
                      const uint8_t* data,
                      const size_t pos,
                      const size_t mask,
                      const BrotliEncoderParams* params,
                      BlockSplit* literal_split,
                      BlockSplit* insert_and_copy_split,
                      BlockSplit* dist_split) {
  {
    size_t literals_count = CountLiterals(cmds, num_commands);
    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
    if (BROTLI_IS_OOM(m)) return;
    /* Create a continuous array of literals. */
    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
    /* Create the block split on the array of literals.
       Literal histograms have alphabet size 256. */
    SplitByteVectorLiteral(
        m, literals, literals_count,
        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
        literal_split);
    if (BROTLI_IS_OOM(m)) return;
    BROTLI_FREE(m, literals);
  }

  {
    /* Compute prefix codes for commands. */
    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
    size_t i;
    if (BROTLI_IS_OOM(m)) return;
    for (i = 0; i < num_commands; ++i) {
      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
    }
    /* Create the block split on the array of command prefixes. */
    SplitByteVectorCommand(
        m, insert_and_copy_codes, num_commands,
        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
        kCommandStrideLength, kCommandBlockSwitchCost, params,
        insert_and_copy_split);
    if (BROTLI_IS_OOM(m)) return;
    /* TODO: reuse for distances? */
    BROTLI_FREE(m, insert_and_copy_codes);
  }

  {
    /* Create a continuous array of distance prefixes. */
    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
    size_t j = 0;
    size_t i;
    if (BROTLI_IS_OOM(m)) return;
    for (i = 0; i < num_commands; ++i) {
      const Command* cmd = &cmds[i];
      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
        distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
      }
    }
    /* Create the block split on the array of distance prefixes. */
    SplitByteVectorDistance(
        m, distance_prefixes, j,
        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
        kCommandStrideLength, kDistanceBlockSwitchCost, params,
        dist_split);
    if (BROTLI_IS_OOM(m)) return;
    BROTLI_FREE(m, distance_prefixes);
  }
}


#if defined(__cplusplus) || defined(c_plusplus)
}  /* extern "C" */
#endif