brotli/metablock_distance.go

164 lines
6.0 KiB
Go

package brotli
/* NOLINT(build/header_guard) */
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Greedy block splitter for one block category (literal, command or distance).
*/
type blockSplitterDistance struct {
alphabet_size_ uint
min_block_size_ uint
split_threshold_ float64
num_blocks_ uint
split_ *blockSplit
histograms_ []histogramDistance
histograms_size_ *uint
target_block_size_ uint
block_size_ uint
curr_histogram_ix_ uint
last_histogram_ix_ [2]uint
last_entropy_ [2]float64
merge_last_count_ uint
}
func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramDistance, histograms_size *uint) {
var max_num_blocks uint = num_symbols/min_block_size + 1
var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
/* We have to allocate one more histogram than the maximum number of block
types for the current histogram when the meta-block is too big. */
self.alphabet_size_ = alphabet_size
self.min_block_size_ = min_block_size
self.split_threshold_ = split_threshold
self.num_blocks_ = 0
self.split_ = split
self.histograms_size_ = histograms_size
self.target_block_size_ = min_block_size
self.block_size_ = 0
self.curr_histogram_ix_ = 0
self.merge_last_count_ = 0
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
self.split_.num_blocks = max_num_blocks
assert(*histograms == nil)
*histograms_size = max_num_types
*histograms = make([]histogramDistance, (*histograms_size))
self.histograms_ = *histograms
/* Clear only current histogram. */
histogramClearDistance(&self.histograms_[0])
self.last_histogram_ix_[1] = 0
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
}
/* Does either of three things:
(1) emits the current block with a new block type;
(2) emits the current block with the type of the second last block;
(3) merges the current block with the last block. */
func blockSplitterFinishBlockDistance(self *blockSplitterDistance, is_final bool) {
var split *blockSplit = self.split_
var last_entropy []float64 = self.last_entropy_[:]
var histograms []histogramDistance = self.histograms_
self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
if self.num_blocks_ == 0 {
/* Create first block. */
split.lengths[0] = uint32(self.block_size_)
split.types[0] = 0
last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
last_entropy[1] = last_entropy[0]
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_++
if self.curr_histogram_ix_ < *self.histograms_size_ {
histogramClearDistance(&histograms[self.curr_histogram_ix_])
}
self.block_size_ = 0
} else if self.block_size_ > 0 {
var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
var combined_histo [2]histogramDistance
var combined_entropy [2]float64
var diff [2]float64
var j uint
for j = 0; j < 2; j++ {
var last_histogram_ix uint = self.last_histogram_ix_[j]
combined_histo[j] = histograms[self.curr_histogram_ix_]
histogramAddHistogramDistance(&combined_histo[j], &histograms[last_histogram_ix])
combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
diff[j] = combined_entropy[j] - entropy - last_entropy[j]
}
if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
/* Create new block. */
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = byte(split.num_types)
self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = uint(byte(split.num_types))
last_entropy[1] = last_entropy[0]
last_entropy[0] = entropy
self.num_blocks_++
split.num_types++
self.curr_histogram_ix_++
if self.curr_histogram_ix_ < *self.histograms_size_ {
histogramClearDistance(&histograms[self.curr_histogram_ix_])
}
self.block_size_ = 0
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else if diff[1] < diff[0]-20.0 {
split.lengths[self.num_blocks_] = uint32(self.block_size_)
split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
/* Combine this block with second last block. */
var tmp uint = self.last_histogram_ix_[0]
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
self.last_histogram_ix_[1] = tmp
histograms[self.last_histogram_ix_[0]] = combined_histo[1]
last_entropy[1] = last_entropy[0]
last_entropy[0] = combined_entropy[1]
self.num_blocks_++
self.block_size_ = 0
histogramClearDistance(&histograms[self.curr_histogram_ix_])
self.merge_last_count_ = 0
self.target_block_size_ = self.min_block_size_
} else {
/* Combine this block with last block. */
split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
histograms[self.last_histogram_ix_[0]] = combined_histo[0]
last_entropy[0] = combined_entropy[0]
if split.num_types == 1 {
last_entropy[1] = last_entropy[0]
}
self.block_size_ = 0
histogramClearDistance(&histograms[self.curr_histogram_ix_])
self.merge_last_count_++
if self.merge_last_count_ > 1 {
self.target_block_size_ += self.min_block_size_
}
}
}
if is_final {
*self.histograms_size_ = split.num_types
split.num_blocks = self.num_blocks_
}
}
/* Adds the next symbol to the current histogram. When the current histogram
reaches the target size, decides on merging the block. */
func blockSplitterAddSymbolDistance(self *blockSplitterDistance, symbol uint) {
histogramAddDistance(&self.histograms_[self.curr_histogram_ix_], symbol)
self.block_size_++
if self.block_size_ == self.target_block_size_ {
blockSplitterFinishBlockDistance(self, false) /* is_final = */
}
}