2019-03-07 01:55:38 +03:00
|
|
|
package brotli
|
|
|
|
|
|
|
|
/* Copyright 2014 Google Inc. All Rights Reserved.
|
|
|
|
|
|
|
|
Distributed under MIT license.
|
|
|
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Algorithms for distributing the literals and commands of a metablock between
|
|
|
|
block types and contexts. */
|
2019-03-22 01:34:30 +03:00
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
type metaBlockSplit struct {
|
2019-03-15 22:05:31 +03:00
|
|
|
literal_split blockSplit
|
|
|
|
command_split blockSplit
|
|
|
|
distance_split blockSplit
|
2019-03-07 01:55:38 +03:00
|
|
|
literal_context_map []uint32
|
|
|
|
literal_context_map_size uint
|
|
|
|
distance_context_map []uint32
|
|
|
|
distance_context_map_size uint
|
2019-03-16 03:24:40 +03:00
|
|
|
literal_histograms []histogramLiteral
|
2019-03-07 01:55:38 +03:00
|
|
|
literal_histograms_size uint
|
2019-03-16 03:24:40 +03:00
|
|
|
command_histograms []histogramCommand
|
2019-03-07 01:55:38 +03:00
|
|
|
command_histograms_size uint
|
2019-03-16 03:24:40 +03:00
|
|
|
distance_histograms []histogramDistance
|
2019-03-07 01:55:38 +03:00
|
|
|
distance_histograms_size uint
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
func initMetaBlockSplit(mb *metaBlockSplit) {
|
2019-03-15 22:05:31 +03:00
|
|
|
initBlockSplit(&mb.literal_split)
|
|
|
|
initBlockSplit(&mb.command_split)
|
|
|
|
initBlockSplit(&mb.distance_split)
|
2019-03-07 01:55:38 +03:00
|
|
|
mb.literal_context_map = nil
|
|
|
|
mb.literal_context_map_size = 0
|
|
|
|
mb.distance_context_map = nil
|
|
|
|
mb.distance_context_map_size = 0
|
|
|
|
mb.literal_histograms = nil
|
|
|
|
mb.literal_histograms_size = 0
|
|
|
|
mb.command_histograms = nil
|
|
|
|
mb.command_histograms_size = 0
|
|
|
|
mb.distance_histograms = nil
|
|
|
|
mb.distance_histograms_size = 0
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
func destroyMetaBlockSplit(mb *metaBlockSplit) {
|
2019-03-15 22:05:31 +03:00
|
|
|
destroyBlockSplit(&mb.literal_split)
|
|
|
|
destroyBlockSplit(&mb.command_split)
|
|
|
|
destroyBlockSplit(&mb.distance_split)
|
2019-03-07 01:55:38 +03:00
|
|
|
mb.literal_context_map = nil
|
|
|
|
mb.distance_context_map = nil
|
|
|
|
mb.literal_histograms = nil
|
|
|
|
mb.command_histograms = nil
|
|
|
|
mb.distance_histograms = nil
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
func initDistanceParams(params *encoderParams, npostfix uint32, ndirect uint32) {
|
|
|
|
var dist_params *distanceParams = ¶ms.dist
|
2019-03-07 01:55:38 +03:00
|
|
|
var alphabet_size uint32
|
|
|
|
var max_distance uint32
|
|
|
|
|
|
|
|
dist_params.distance_postfix_bits = npostfix
|
|
|
|
dist_params.num_direct_distance_codes = ndirect
|
|
|
|
|
2019-03-15 22:05:31 +03:00
|
|
|
alphabet_size = uint32(distanceAlphabetSize(uint(npostfix), uint(ndirect), maxDistanceBits))
|
|
|
|
max_distance = ndirect + (1 << (maxDistanceBits + npostfix + 2)) - (1 << (npostfix + 2))
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
if params.large_window {
|
2019-03-15 22:05:31 +03:00
|
|
|
var bound = [maxNpostfix + 1]uint32{0, 4, 12, 28}
|
2019-03-07 01:55:38 +03:00
|
|
|
var postfix uint32 = 1 << npostfix
|
2019-03-15 22:05:31 +03:00
|
|
|
alphabet_size = uint32(distanceAlphabetSize(uint(npostfix), uint(ndirect), largeMaxDistanceBits))
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
/* The maximum distance is set so that no distance symbol used can encode
|
|
|
|
a distance larger than BROTLI_MAX_ALLOWED_DISTANCE with all
|
|
|
|
its extra bits set. */
|
|
|
|
if ndirect < bound[npostfix] {
|
2019-03-16 03:24:40 +03:00
|
|
|
max_distance = maxAllowedDistance - (bound[npostfix] - ndirect)
|
2019-03-07 01:55:38 +03:00
|
|
|
} else if ndirect >= bound[npostfix]+postfix {
|
|
|
|
max_distance = (3 << 29) - 4 + (ndirect - bound[npostfix])
|
|
|
|
} else {
|
2019-03-16 03:24:40 +03:00
|
|
|
max_distance = maxAllowedDistance
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
dist_params.alphabet_size = alphabet_size
|
|
|
|
dist_params.max_distance = uint(max_distance)
|
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
func recomputeDistancePrefixes(cmds []command, orig_params *distanceParams, new_params *distanceParams) {
|
2019-03-07 01:55:38 +03:00
|
|
|
if orig_params.distance_postfix_bits == new_params.distance_postfix_bits && orig_params.num_direct_distance_codes == new_params.num_direct_distance_codes {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
for i := range cmds {
|
2019-03-15 22:05:31 +03:00
|
|
|
var cmd *command = &cmds[i]
|
|
|
|
if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 {
|
2019-03-16 04:00:20 +03:00
|
|
|
prefixEncodeCopyDistance(uint(commandRestoreDistanceCode(cmd, orig_params)), uint(new_params.num_direct_distance_codes), uint(new_params.distance_postfix_bits), &cmd.dist_prefix_, &cmd.dist_extra_)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
func computeDistanceCost(cmds []command, orig_params *distanceParams, new_params *distanceParams, cost *float64) bool {
|
2019-03-07 01:55:38 +03:00
|
|
|
var equal_params bool = false
|
|
|
|
var dist_prefix uint16
|
|
|
|
var dist_extra uint32
|
|
|
|
var extra_bits float64 = 0.0
|
2019-03-16 03:24:40 +03:00
|
|
|
var histo histogramDistance
|
|
|
|
histogramClearDistance(&histo)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
if orig_params.distance_postfix_bits == new_params.distance_postfix_bits && orig_params.num_direct_distance_codes == new_params.num_direct_distance_codes {
|
|
|
|
equal_params = true
|
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
for i := range cmds {
|
|
|
|
cmd := &cmds[i]
|
2019-03-15 22:05:31 +03:00
|
|
|
if commandCopyLen(cmd) != 0 && cmd.cmd_prefix_ >= 128 {
|
2019-03-07 01:55:38 +03:00
|
|
|
if equal_params {
|
|
|
|
dist_prefix = cmd.dist_prefix_
|
|
|
|
} else {
|
2019-03-15 22:05:31 +03:00
|
|
|
var distance uint32 = commandRestoreDistanceCode(cmd, orig_params)
|
2019-03-07 01:55:38 +03:00
|
|
|
if distance > uint32(new_params.max_distance) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-03-16 04:00:20 +03:00
|
|
|
prefixEncodeCopyDistance(uint(distance), uint(new_params.num_direct_distance_codes), uint(new_params.distance_postfix_bits), &dist_prefix, &dist_extra)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
histogramAddDistance(&histo, uint(dist_prefix)&0x3FF)
|
2019-03-07 01:55:38 +03:00
|
|
|
extra_bits += float64(dist_prefix >> 10)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-10 04:11:32 +03:00
|
|
|
*cost = populationCostDistance(&histo) + extra_bits
|
2019-03-07 01:55:38 +03:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
var buildMetaBlock_kMaxNumberOfHistograms uint = 256
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
func buildMetaBlock(ringbuffer []byte, pos uint, mask uint, params *encoderParams, prev_byte byte, prev_byte2 byte, cmds []command, literal_context_mode int, mb *metaBlockSplit) {
|
2019-03-16 03:24:40 +03:00
|
|
|
var distance_histograms []histogramDistance
|
|
|
|
var literal_histograms []histogramLiteral
|
2019-03-07 01:55:38 +03:00
|
|
|
var literal_context_modes []int = nil
|
|
|
|
var literal_histograms_size uint
|
|
|
|
var distance_histograms_size uint
|
|
|
|
var i uint
|
|
|
|
var literal_context_multiplier uint = 1
|
|
|
|
var npostfix uint32
|
|
|
|
var ndirect_msb uint32 = 0
|
|
|
|
var check_orig bool = true
|
|
|
|
var best_dist_cost float64 = 1e99
|
2019-03-16 03:24:40 +03:00
|
|
|
var orig_params encoderParams = *params
|
2019-03-07 01:55:38 +03:00
|
|
|
/* Histogram ids need to fit in one byte. */
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
var new_params encoderParams = *params
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2019-03-15 22:05:31 +03:00
|
|
|
for npostfix = 0; npostfix <= maxNpostfix; npostfix++ {
|
2019-03-07 01:55:38 +03:00
|
|
|
for ; ndirect_msb < 16; ndirect_msb++ {
|
|
|
|
var ndirect uint32 = ndirect_msb << npostfix
|
|
|
|
var skip bool
|
|
|
|
var dist_cost float64
|
2019-03-16 03:24:40 +03:00
|
|
|
initDistanceParams(&new_params, npostfix, ndirect)
|
2019-03-07 01:55:38 +03:00
|
|
|
if npostfix == orig_params.dist.distance_postfix_bits && ndirect == orig_params.dist.num_direct_distance_codes {
|
|
|
|
check_orig = false
|
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
skip = !computeDistanceCost(cmds, &orig_params.dist, &new_params.dist, &dist_cost)
|
2019-03-07 01:55:38 +03:00
|
|
|
if skip || (dist_cost > best_dist_cost) {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
best_dist_cost = dist_cost
|
|
|
|
params.dist = new_params.dist
|
|
|
|
}
|
|
|
|
|
|
|
|
if ndirect_msb > 0 {
|
|
|
|
ndirect_msb--
|
|
|
|
}
|
|
|
|
ndirect_msb /= 2
|
|
|
|
}
|
|
|
|
|
|
|
|
if check_orig {
|
|
|
|
var dist_cost float64
|
2020-05-09 02:48:16 +03:00
|
|
|
computeDistanceCost(cmds, &orig_params.dist, &orig_params.dist, &dist_cost)
|
2019-03-07 01:55:38 +03:00
|
|
|
if dist_cost < best_dist_cost {
|
|
|
|
/* NB: currently unused; uncomment when more param tuning is added. */
|
|
|
|
/* best_dist_cost = dist_cost; */
|
|
|
|
params.dist = orig_params.dist
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
recomputeDistancePrefixes(cmds, &orig_params.dist, ¶ms.dist)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
splitBlock(cmds, ringbuffer, pos, mask, params, &mb.literal_split, &mb.command_split, &mb.distance_split)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
if !params.disable_literal_context_modeling {
|
2019-03-16 03:24:40 +03:00
|
|
|
literal_context_multiplier = 1 << literalContextBits
|
2019-03-07 01:55:38 +03:00
|
|
|
literal_context_modes = make([]int, (mb.literal_split.num_types))
|
|
|
|
for i = 0; i < mb.literal_split.num_types; i++ {
|
|
|
|
literal_context_modes[i] = literal_context_mode
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
literal_histograms_size = mb.literal_split.num_types * literal_context_multiplier
|
2019-03-16 03:24:40 +03:00
|
|
|
literal_histograms = make([]histogramLiteral, literal_histograms_size)
|
|
|
|
clearHistogramsLiteral(literal_histograms, literal_histograms_size)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
distance_histograms_size = mb.distance_split.num_types << distanceContextBits
|
|
|
|
distance_histograms = make([]histogramDistance, distance_histograms_size)
|
|
|
|
clearHistogramsDistance(distance_histograms, distance_histograms_size)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
assert(mb.command_histograms == nil)
|
|
|
|
mb.command_histograms_size = mb.command_split.num_types
|
2019-03-16 03:24:40 +03:00
|
|
|
mb.command_histograms = make([]histogramCommand, (mb.command_histograms_size))
|
|
|
|
clearHistogramsCommand(mb.command_histograms, mb.command_histograms_size)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
buildHistogramsWithContext(cmds, &mb.literal_split, &mb.command_split, &mb.distance_split, ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes, literal_histograms, mb.command_histograms, distance_histograms)
|
2019-03-07 01:55:38 +03:00
|
|
|
literal_context_modes = nil
|
|
|
|
|
|
|
|
assert(mb.literal_context_map == nil)
|
2019-03-16 03:24:40 +03:00
|
|
|
mb.literal_context_map_size = mb.literal_split.num_types << literalContextBits
|
2019-03-07 01:55:38 +03:00
|
|
|
mb.literal_context_map = make([]uint32, (mb.literal_context_map_size))
|
|
|
|
|
|
|
|
assert(mb.literal_histograms == nil)
|
|
|
|
mb.literal_histograms_size = mb.literal_context_map_size
|
2019-03-16 03:24:40 +03:00
|
|
|
mb.literal_histograms = make([]histogramLiteral, (mb.literal_histograms_size))
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
clusterHistogramsLiteral(literal_histograms, literal_histograms_size, buildMetaBlock_kMaxNumberOfHistograms, mb.literal_histograms, &mb.literal_histograms_size, mb.literal_context_map)
|
2019-03-07 01:55:38 +03:00
|
|
|
literal_histograms = nil
|
|
|
|
|
|
|
|
if params.disable_literal_context_modeling {
|
|
|
|
/* Distribute assignment to all contexts. */
|
|
|
|
for i = mb.literal_split.num_types; i != 0; {
|
|
|
|
var j uint = 0
|
|
|
|
i--
|
2019-03-16 03:24:40 +03:00
|
|
|
for ; j < 1<<literalContextBits; j++ {
|
|
|
|
mb.literal_context_map[(i<<literalContextBits)+j] = mb.literal_context_map[i]
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(mb.distance_context_map == nil)
|
2019-03-16 03:24:40 +03:00
|
|
|
mb.distance_context_map_size = mb.distance_split.num_types << distanceContextBits
|
2019-03-07 01:55:38 +03:00
|
|
|
mb.distance_context_map = make([]uint32, (mb.distance_context_map_size))
|
|
|
|
|
|
|
|
assert(mb.distance_histograms == nil)
|
|
|
|
mb.distance_histograms_size = mb.distance_context_map_size
|
2019-03-16 03:24:40 +03:00
|
|
|
mb.distance_histograms = make([]histogramDistance, (mb.distance_histograms_size))
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
clusterHistogramsDistance(distance_histograms, mb.distance_context_map_size, buildMetaBlock_kMaxNumberOfHistograms, mb.distance_histograms, &mb.distance_histograms_size, mb.distance_context_map)
|
2019-03-07 01:55:38 +03:00
|
|
|
distance_histograms = nil
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
const maxStaticContexts = 13
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
/* Greedy block splitter for one block category (literal, command or distance).
|
|
|
|
Gathers histograms for all context buckets. */
|
2019-03-16 03:24:40 +03:00
|
|
|
type contextBlockSplitter struct {
|
2019-03-07 01:55:38 +03:00
|
|
|
alphabet_size_ uint
|
|
|
|
num_contexts_ uint
|
|
|
|
max_block_types_ uint
|
|
|
|
min_block_size_ uint
|
|
|
|
split_threshold_ float64
|
|
|
|
num_blocks_ uint
|
2019-03-15 22:05:31 +03:00
|
|
|
split_ *blockSplit
|
2019-03-16 03:24:40 +03:00
|
|
|
histograms_ []histogramLiteral
|
2019-03-07 01:55:38 +03:00
|
|
|
histograms_size_ *uint
|
|
|
|
target_block_size_ uint
|
|
|
|
block_size_ uint
|
|
|
|
curr_histogram_ix_ uint
|
|
|
|
last_histogram_ix_ [2]uint
|
2019-03-16 03:24:40 +03:00
|
|
|
last_entropy_ [2 * maxStaticContexts]float64
|
2019-03-07 01:55:38 +03:00
|
|
|
merge_last_count_ uint
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
func initContextBlockSplitter(self *contextBlockSplitter, alphabet_size uint, num_contexts uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramLiteral, histograms_size *uint) {
|
2019-03-07 01:55:38 +03:00
|
|
|
var max_num_blocks uint = num_symbols/min_block_size + 1
|
|
|
|
var max_num_types uint
|
2019-03-16 03:24:40 +03:00
|
|
|
assert(num_contexts <= maxStaticContexts)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
self.alphabet_size_ = alphabet_size
|
|
|
|
self.num_contexts_ = num_contexts
|
2019-03-15 22:05:31 +03:00
|
|
|
self.max_block_types_ = maxNumberOfBlockTypes / num_contexts
|
2019-03-07 01:55:38 +03:00
|
|
|
self.min_block_size_ = min_block_size
|
|
|
|
self.split_threshold_ = split_threshold
|
|
|
|
self.num_blocks_ = 0
|
|
|
|
self.split_ = split
|
|
|
|
self.histograms_size_ = histograms_size
|
|
|
|
self.target_block_size_ = min_block_size
|
|
|
|
self.block_size_ = 0
|
|
|
|
self.curr_histogram_ix_ = 0
|
|
|
|
self.merge_last_count_ = 0
|
|
|
|
|
|
|
|
/* We have to allocate one more histogram than the maximum number of block
|
|
|
|
types for the current histogram when the meta-block is too big. */
|
|
|
|
max_num_types = brotli_min_size_t(max_num_blocks, self.max_block_types_+1)
|
|
|
|
|
|
|
|
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
|
|
|
|
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
|
|
|
|
split.num_blocks = max_num_blocks
|
|
|
|
assert(*histograms == nil)
|
|
|
|
*histograms_size = max_num_types * num_contexts
|
2019-03-16 03:24:40 +03:00
|
|
|
*histograms = make([]histogramLiteral, (*histograms_size))
|
2019-03-07 01:55:38 +03:00
|
|
|
self.histograms_ = *histograms
|
|
|
|
|
|
|
|
/* Clear only current histogram. */
|
2019-03-16 03:24:40 +03:00
|
|
|
clearHistogramsLiteral(self.histograms_[0:], num_contexts)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
self.last_histogram_ix_[1] = 0
|
|
|
|
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Does either of three things:
|
|
|
|
(1) emits the current block with a new block type;
|
|
|
|
(2) emits the current block with the type of the second last block;
|
|
|
|
(3) merges the current block with the last block. */
|
2019-03-16 03:24:40 +03:00
|
|
|
func contextBlockSplitterFinishBlock(self *contextBlockSplitter, is_final bool) {
|
2019-03-15 22:05:31 +03:00
|
|
|
var split *blockSplit = self.split_
|
2019-03-07 01:55:38 +03:00
|
|
|
var num_contexts uint = self.num_contexts_
|
|
|
|
var last_entropy []float64 = self.last_entropy_[:]
|
2019-03-16 03:24:40 +03:00
|
|
|
var histograms []histogramLiteral = self.histograms_
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
if self.block_size_ < self.min_block_size_ {
|
|
|
|
self.block_size_ = self.min_block_size_
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.num_blocks_ == 0 {
|
|
|
|
var i uint
|
|
|
|
|
|
|
|
/* Create first block. */
|
|
|
|
split.lengths[0] = uint32(self.block_size_)
|
|
|
|
|
|
|
|
split.types[0] = 0
|
|
|
|
|
|
|
|
for i = 0; i < num_contexts; i++ {
|
2019-03-10 04:11:32 +03:00
|
|
|
last_entropy[i] = bitsEntropy(histograms[i].data_[:], self.alphabet_size_)
|
2019-03-07 01:55:38 +03:00
|
|
|
last_entropy[num_contexts+i] = last_entropy[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
self.num_blocks_++
|
|
|
|
split.num_types++
|
|
|
|
self.curr_histogram_ix_ += num_contexts
|
|
|
|
if self.curr_histogram_ix_ < *self.histograms_size_ {
|
2019-03-16 03:24:40 +03:00
|
|
|
clearHistogramsLiteral(self.histograms_[self.curr_histogram_ix_:], self.num_contexts_)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
self.block_size_ = 0
|
|
|
|
} else if self.block_size_ > 0 {
|
2019-03-16 03:24:40 +03:00
|
|
|
var entropy [maxStaticContexts]float64
|
|
|
|
var combined_histo []histogramLiteral = make([]histogramLiteral, (2 * num_contexts))
|
|
|
|
var combined_entropy [2 * maxStaticContexts]float64
|
2019-03-07 01:55:38 +03:00
|
|
|
var diff = [2]float64{0.0}
|
|
|
|
/* Try merging the set of histograms for the current block type with the
|
|
|
|
respective set of histograms for the last and second last block types.
|
|
|
|
Decide over the split based on the total reduction of entropy across
|
|
|
|
all contexts. */
|
|
|
|
|
|
|
|
var i uint
|
|
|
|
for i = 0; i < num_contexts; i++ {
|
|
|
|
var curr_histo_ix uint = self.curr_histogram_ix_ + i
|
|
|
|
var j uint
|
2019-03-10 04:11:32 +03:00
|
|
|
entropy[i] = bitsEntropy(histograms[curr_histo_ix].data_[:], self.alphabet_size_)
|
2019-03-07 01:55:38 +03:00
|
|
|
for j = 0; j < 2; j++ {
|
|
|
|
var jx uint = j*num_contexts + i
|
|
|
|
var last_histogram_ix uint = self.last_histogram_ix_[j] + i
|
|
|
|
combined_histo[jx] = histograms[curr_histo_ix]
|
2019-03-16 03:24:40 +03:00
|
|
|
histogramAddHistogramLiteral(&combined_histo[jx], &histograms[last_histogram_ix])
|
2019-03-10 04:11:32 +03:00
|
|
|
combined_entropy[jx] = bitsEntropy(combined_histo[jx].data_[0:], self.alphabet_size_)
|
2019-03-07 01:55:38 +03:00
|
|
|
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if split.num_types < self.max_block_types_ && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
|
|
|
|
/* Create new block. */
|
|
|
|
split.lengths[self.num_blocks_] = uint32(self.block_size_)
|
|
|
|
|
|
|
|
split.types[self.num_blocks_] = byte(split.num_types)
|
|
|
|
self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
|
|
|
|
self.last_histogram_ix_[0] = split.num_types * num_contexts
|
|
|
|
for i = 0; i < num_contexts; i++ {
|
|
|
|
last_entropy[num_contexts+i] = last_entropy[i]
|
|
|
|
last_entropy[i] = entropy[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
self.num_blocks_++
|
|
|
|
split.num_types++
|
|
|
|
self.curr_histogram_ix_ += num_contexts
|
|
|
|
if self.curr_histogram_ix_ < *self.histograms_size_ {
|
2019-03-16 03:24:40 +03:00
|
|
|
clearHistogramsLiteral(self.histograms_[self.curr_histogram_ix_:], self.num_contexts_)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
self.block_size_ = 0
|
|
|
|
self.merge_last_count_ = 0
|
|
|
|
self.target_block_size_ = self.min_block_size_
|
|
|
|
} else if diff[1] < diff[0]-20.0 {
|
|
|
|
split.lengths[self.num_blocks_] = uint32(self.block_size_)
|
|
|
|
split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
|
|
|
|
/* Combine this block with second last block. */
|
|
|
|
|
|
|
|
var tmp uint = self.last_histogram_ix_[0]
|
|
|
|
self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
|
|
|
|
self.last_histogram_ix_[1] = tmp
|
|
|
|
for i = 0; i < num_contexts; i++ {
|
|
|
|
histograms[self.last_histogram_ix_[0]+i] = combined_histo[num_contexts+i]
|
|
|
|
last_entropy[num_contexts+i] = last_entropy[i]
|
|
|
|
last_entropy[i] = combined_entropy[num_contexts+i]
|
2019-03-16 03:24:40 +03:00
|
|
|
histogramClearLiteral(&histograms[self.curr_histogram_ix_+i])
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
self.num_blocks_++
|
|
|
|
self.block_size_ = 0
|
|
|
|
self.merge_last_count_ = 0
|
|
|
|
self.target_block_size_ = self.min_block_size_
|
|
|
|
} else {
|
|
|
|
/* Combine this block with last block. */
|
|
|
|
split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
|
|
|
|
|
|
|
|
for i = 0; i < num_contexts; i++ {
|
|
|
|
histograms[self.last_histogram_ix_[0]+i] = combined_histo[i]
|
|
|
|
last_entropy[i] = combined_entropy[i]
|
|
|
|
if split.num_types == 1 {
|
|
|
|
last_entropy[num_contexts+i] = last_entropy[i]
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
histogramClearLiteral(&histograms[self.curr_histogram_ix_+i])
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
self.block_size_ = 0
|
|
|
|
self.merge_last_count_++
|
|
|
|
if self.merge_last_count_ > 1 {
|
|
|
|
self.target_block_size_ += self.min_block_size_
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
combined_histo = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if is_final {
|
|
|
|
*self.histograms_size_ = split.num_types * num_contexts
|
|
|
|
split.num_blocks = self.num_blocks_
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Adds the next symbol to the current block type and context. When the
|
|
|
|
current block reaches the target size, decides on merging the block. */
|
2019-03-16 03:24:40 +03:00
|
|
|
func contextBlockSplitterAddSymbol(self *contextBlockSplitter, symbol uint, context uint) {
|
|
|
|
histogramAddLiteral(&self.histograms_[self.curr_histogram_ix_+context], symbol)
|
2019-03-07 01:55:38 +03:00
|
|
|
self.block_size_++
|
|
|
|
if self.block_size_ == self.target_block_size_ {
|
2019-03-16 03:24:40 +03:00
|
|
|
contextBlockSplitterFinishBlock(self, false) /* is_final = */
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
func mapStaticContexts(num_contexts uint, static_context_map []uint32, mb *metaBlockSplit) {
|
2019-03-07 01:55:38 +03:00
|
|
|
var i uint
|
|
|
|
assert(mb.literal_context_map == nil)
|
2019-03-16 03:24:40 +03:00
|
|
|
mb.literal_context_map_size = mb.literal_split.num_types << literalContextBits
|
2019-03-07 01:55:38 +03:00
|
|
|
mb.literal_context_map = make([]uint32, (mb.literal_context_map_size))
|
|
|
|
|
|
|
|
for i = 0; i < mb.literal_split.num_types; i++ {
|
|
|
|
var offset uint32 = uint32(i * num_contexts)
|
|
|
|
var j uint
|
2019-03-16 03:24:40 +03:00
|
|
|
for j = 0; j < 1<<literalContextBits; j++ {
|
|
|
|
mb.literal_context_map[(i<<literalContextBits)+j] = offset + static_context_map[j]
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
func buildMetaBlockGreedyInternal(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, mb *metaBlockSplit) {
|
2019-03-07 01:55:38 +03:00
|
|
|
var lit_blocks struct {
|
2019-03-16 03:24:40 +03:00
|
|
|
plain blockSplitterLiteral
|
|
|
|
ctx contextBlockSplitter
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
2019-03-16 03:24:40 +03:00
|
|
|
var cmd_blocks blockSplitterCommand
|
|
|
|
var dist_blocks blockSplitterDistance
|
2019-03-07 01:55:38 +03:00
|
|
|
var num_literals uint = 0
|
2020-05-09 02:48:16 +03:00
|
|
|
for i := range commands {
|
2019-03-07 01:55:38 +03:00
|
|
|
num_literals += uint(commands[i].insert_len_)
|
|
|
|
}
|
|
|
|
|
|
|
|
if num_contexts == 1 {
|
2019-03-16 03:24:40 +03:00
|
|
|
initBlockSplitterLiteral(&lit_blocks.plain, 256, 512, 400.0, num_literals, &mb.literal_split, &mb.literal_histograms, &mb.literal_histograms_size)
|
2019-03-07 01:55:38 +03:00
|
|
|
} else {
|
2019-03-16 03:24:40 +03:00
|
|
|
initContextBlockSplitter(&lit_blocks.ctx, 256, num_contexts, 512, 400.0, num_literals, &mb.literal_split, &mb.literal_histograms, &mb.literal_histograms_size)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
initBlockSplitterCommand(&cmd_blocks, numCommandSymbols, 1024, 500.0, uint(len(commands)), &mb.command_split, &mb.command_histograms, &mb.command_histograms_size)
|
|
|
|
initBlockSplitterDistance(&dist_blocks, 64, 512, 100.0, uint(len(commands)), &mb.distance_split, &mb.distance_histograms, &mb.distance_histograms_size)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
for _, cmd := range commands {
|
2019-03-07 01:55:38 +03:00
|
|
|
var j uint
|
2019-03-16 03:24:40 +03:00
|
|
|
blockSplitterAddSymbolCommand(&cmd_blocks, uint(cmd.cmd_prefix_))
|
2019-03-07 01:55:38 +03:00
|
|
|
for j = uint(cmd.insert_len_); j != 0; j-- {
|
|
|
|
var literal byte = ringbuffer[pos&mask]
|
|
|
|
if num_contexts == 1 {
|
2019-03-16 03:24:40 +03:00
|
|
|
blockSplitterAddSymbolLiteral(&lit_blocks.plain, uint(literal))
|
2019-03-07 01:55:38 +03:00
|
|
|
} else {
|
2019-03-16 03:24:40 +03:00
|
|
|
var context uint = uint(getContext(prev_byte, prev_byte2, literal_context_lut))
|
|
|
|
contextBlockSplitterAddSymbol(&lit_blocks.ctx, uint(literal), uint(static_context_map[context]))
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
prev_byte2 = prev_byte
|
|
|
|
prev_byte = literal
|
|
|
|
pos++
|
|
|
|
}
|
|
|
|
|
2019-03-15 22:05:31 +03:00
|
|
|
pos += uint(commandCopyLen(&cmd))
|
|
|
|
if commandCopyLen(&cmd) != 0 {
|
2019-03-07 01:55:38 +03:00
|
|
|
prev_byte2 = ringbuffer[(pos-2)&mask]
|
|
|
|
prev_byte = ringbuffer[(pos-1)&mask]
|
|
|
|
if cmd.cmd_prefix_ >= 128 {
|
2019-03-16 03:24:40 +03:00
|
|
|
blockSplitterAddSymbolDistance(&dist_blocks, uint(cmd.dist_prefix_)&0x3FF)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if num_contexts == 1 {
|
2019-03-16 03:24:40 +03:00
|
|
|
blockSplitterFinishBlockLiteral(&lit_blocks.plain, true) /* is_final = */
|
2019-03-07 01:55:38 +03:00
|
|
|
} else {
|
2019-03-16 03:24:40 +03:00
|
|
|
contextBlockSplitterFinishBlock(&lit_blocks.ctx, true) /* is_final = */
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
blockSplitterFinishBlockCommand(&cmd_blocks, true) /* is_final = */
|
|
|
|
blockSplitterFinishBlockDistance(&dist_blocks, true) /* is_final = */
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
if num_contexts > 1 {
|
2019-03-16 03:24:40 +03:00
|
|
|
mapStaticContexts(num_contexts, static_context_map, mb)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-09 02:48:16 +03:00
|
|
|
func buildMetaBlockGreedy(ringbuffer []byte, pos uint, mask uint, prev_byte byte, prev_byte2 byte, literal_context_lut contextLUT, num_contexts uint, static_context_map []uint32, commands []command, mb *metaBlockSplit) {
|
2019-03-07 01:55:38 +03:00
|
|
|
if num_contexts == 1 {
|
2020-05-09 02:48:16 +03:00
|
|
|
buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, 1, nil, commands, mb)
|
2019-03-07 01:55:38 +03:00
|
|
|
} else {
|
2020-05-09 02:48:16 +03:00
|
|
|
buildMetaBlockGreedyInternal(ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_lut, num_contexts, static_context_map, commands, mb)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
func optimizeHistograms(num_distance_codes uint32, mb *metaBlockSplit) {
|
2019-03-15 22:05:31 +03:00
|
|
|
var good_for_rle [numCommandSymbols]byte
|
2019-03-07 01:55:38 +03:00
|
|
|
var i uint
|
|
|
|
for i = 0; i < mb.literal_histograms_size; i++ {
|
2019-03-16 03:24:40 +03:00
|
|
|
optimizeHuffmanCountsForRLE(256, mb.literal_histograms[i].data_[:], good_for_rle[:])
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
for i = 0; i < mb.command_histograms_size; i++ {
|
2019-03-16 03:24:40 +03:00
|
|
|
optimizeHuffmanCountsForRLE(numCommandSymbols, mb.command_histograms[i].data_[:], good_for_rle[:])
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
for i = 0; i < mb.distance_histograms_size; i++ {
|
2019-03-16 03:24:40 +03:00
|
|
|
optimizeHuffmanCountsForRLE(uint(num_distance_codes), mb.distance_histograms[i].data_[:], good_for_rle[:])
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|