2019-03-07 01:55:38 +03:00
|
|
|
package brotli
|
|
|
|
|
2019-03-09 06:45:16 +03:00
|
|
|
import "encoding/binary"
|
|
|
|
|
2019-03-07 01:55:38 +03:00
|
|
|
/* NOLINT(build/header_guard) */
|
|
|
|
/* Copyright 2010 Google Inc. All Rights Reserved.
|
|
|
|
|
|
|
|
Distributed under MIT license.
|
|
|
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
|
|
|
|
a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
|
|
|
and HTML inputs. */
|
2019-03-10 00:01:56 +03:00
|
|
|
func (*hashLongestMatchQuickly) HashTypeLength() uint {
|
2019-03-07 01:55:38 +03:00
|
|
|
return 8
|
|
|
|
}
|
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
func (*hashLongestMatchQuickly) StoreLookahead() uint {
|
2019-03-07 01:55:38 +03:00
|
|
|
return 8
|
|
|
|
}
|
|
|
|
|
|
|
|
/* HashBytes is the function that chooses the bucket to place
|
2019-03-10 00:01:56 +03:00
|
|
|
the address in. The HashLongestMatch and hashLongestMatchQuickly
|
2019-03-07 01:55:38 +03:00
|
|
|
classes have separate, different implementations of hashing. */
|
2019-03-10 00:01:56 +03:00
|
|
|
func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 {
|
|
|
|
var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64)
|
2019-03-07 01:55:38 +03:00
|
|
|
|
|
|
|
/* The higher bits contain more mixture from the multiplication,
|
|
|
|
so we take our results from there. */
|
2019-03-10 00:01:56 +03:00
|
|
|
return uint32(hash >> (64 - h.bucketBits))
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* A (forgetful) hash table to the data seen by the compressor, to
|
|
|
|
help create backward references to previous data.
|
|
|
|
|
|
|
|
This is a hash map of fixed size (1 << 16). Starting from the
|
|
|
|
given index, 1 buckets are used to store values of a key. */
|
2019-03-10 00:01:56 +03:00
|
|
|
type hashLongestMatchQuickly struct {
|
2019-03-16 03:24:40 +03:00
|
|
|
hasherCommon
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
bucketBits uint
|
|
|
|
bucketSweep int
|
|
|
|
hashLen uint
|
|
|
|
useDictionary bool
|
|
|
|
|
|
|
|
buckets []uint32
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
func (h *hashLongestMatchQuickly) Initialize(params *encoderParams) {
|
2019-03-10 00:01:56 +03:00
|
|
|
h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) {
|
|
|
|
var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7
|
2019-03-07 01:55:38 +03:00
|
|
|
/* Partial preparation is 100 times slower (per socket). */
|
|
|
|
if one_shot && input_size <= partial_prepare_threshold {
|
|
|
|
var i uint
|
|
|
|
for i = 0; i < input_size; i++ {
|
2019-03-10 00:01:56 +03:00
|
|
|
var key uint32 = h.HashBytes(data[i:])
|
|
|
|
for j := 0; j < h.bucketSweep; j++ {
|
|
|
|
h.buckets[key+uint32(j)] = 0
|
|
|
|
}
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* It is not strictly necessary to fill this buffer here, but
|
|
|
|
not filling will make the results of the compression stochastic
|
|
|
|
(but correct). This is because random data would cause the
|
|
|
|
system to find accidentally good backward references here and there. */
|
2019-03-10 00:01:56 +03:00
|
|
|
for i := range h.buckets {
|
|
|
|
h.buckets[i] = 0
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Look at 5 bytes at &data[ix & mask].
|
|
|
|
Compute a hash from these, and store the value somewhere within
|
|
|
|
[ix .. ix+3]. */
|
2019-03-10 00:01:56 +03:00
|
|
|
func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) {
|
|
|
|
var key uint32 = h.HashBytes(data[ix&mask:])
|
|
|
|
var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep)
|
2019-03-07 01:55:38 +03:00
|
|
|
/* Wiggle the value with the bucket sweep range. */
|
2019-03-10 00:01:56 +03:00
|
|
|
h.buckets[key+off] = uint32(ix)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
2019-03-07 01:55:38 +03:00
|
|
|
var i uint
|
|
|
|
for i = ix_start; i < ix_end; i++ {
|
2019-03-09 03:43:15 +03:00
|
|
|
h.Store(data, mask, i)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
2019-03-09 03:43:15 +03:00
|
|
|
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
|
2019-03-07 01:55:38 +03:00
|
|
|
/* Prepare the hashes for three last bytes of the last write.
|
|
|
|
These could not be calculated before, since they require knowledge
|
|
|
|
of both the previous and the current block. */
|
2019-03-09 03:43:15 +03:00
|
|
|
h.Store(ringbuffer, ringbuffer_mask, position-3)
|
|
|
|
h.Store(ringbuffer, ringbuffer_mask, position-2)
|
|
|
|
h.Store(ringbuffer, ringbuffer_mask, position-1)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) {
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
|
|
|
|
up to the length of max_length and stores the position cur_ix in the
|
|
|
|
hash table.
|
|
|
|
|
|
|
|
Does not look for matches longer than max_length.
|
|
|
|
Does not look for matches further away than max_backward.
|
|
|
|
Writes the best match into |out|.
|
|
|
|
|out|->score is updated only if a better match is found. */
|
2019-03-16 03:24:40 +03:00
|
|
|
func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
|
2019-03-07 01:55:38 +03:00
|
|
|
var best_len_in uint = out.len
|
|
|
|
var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
2019-03-10 00:01:56 +03:00
|
|
|
var key uint32 = h.HashBytes(data[cur_ix_masked:])
|
2019-03-07 01:55:38 +03:00
|
|
|
var compare_char int = int(data[cur_ix_masked+best_len_in])
|
|
|
|
var min_score uint = out.score
|
|
|
|
var best_score uint = out.score
|
|
|
|
var best_len uint = best_len_in
|
|
|
|
var cached_backward uint = uint(distance_cache[0])
|
|
|
|
var prev_ix uint = cur_ix - cached_backward
|
|
|
|
var bucket []uint32
|
|
|
|
out.len_code_delta = 0
|
|
|
|
if prev_ix < cur_ix {
|
|
|
|
prev_ix &= uint(uint32(ring_buffer_mask))
|
|
|
|
if compare_char == int(data[prev_ix+best_len]) {
|
2019-03-16 03:24:40 +03:00
|
|
|
var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
2019-03-07 01:55:38 +03:00
|
|
|
if len >= 4 {
|
2019-03-16 03:24:40 +03:00
|
|
|
var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
|
2019-03-07 01:55:38 +03:00
|
|
|
if best_score < score {
|
|
|
|
best_score = score
|
|
|
|
best_len = uint(len)
|
|
|
|
out.len = uint(len)
|
|
|
|
out.distance = cached_backward
|
|
|
|
out.score = best_score
|
|
|
|
compare_char = int(data[cur_ix_masked+best_len])
|
2019-03-10 00:01:56 +03:00
|
|
|
if h.bucketSweep == 1 {
|
|
|
|
h.buckets[key] = uint32(cur_ix)
|
2019-03-07 01:55:38 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
if h.bucketSweep == 1 {
|
2019-03-07 01:55:38 +03:00
|
|
|
var backward uint
|
|
|
|
var len uint
|
|
|
|
|
|
|
|
/* Only one to look for, don't bother to prepare for a loop. */
|
2019-03-10 00:01:56 +03:00
|
|
|
prev_ix = uint(h.buckets[key])
|
2019-03-07 01:55:38 +03:00
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
h.buckets[key] = uint32(cur_ix)
|
2019-03-07 01:55:38 +03:00
|
|
|
backward = cur_ix - prev_ix
|
|
|
|
prev_ix &= uint(uint32(ring_buffer_mask))
|
|
|
|
if compare_char != int(data[prev_ix+best_len_in]) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if backward == 0 || backward > max_backward {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
2019-03-07 01:55:38 +03:00
|
|
|
if len >= 4 {
|
2019-03-16 03:24:40 +03:00
|
|
|
var score uint = backwardReferenceScore(uint(len), backward)
|
2019-03-07 01:55:38 +03:00
|
|
|
if best_score < score {
|
|
|
|
out.len = uint(len)
|
|
|
|
out.distance = backward
|
|
|
|
out.score = score
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2019-03-10 00:01:56 +03:00
|
|
|
bucket = h.buckets[key:]
|
2019-03-07 01:55:38 +03:00
|
|
|
var i int
|
|
|
|
prev_ix = uint(bucket[0])
|
|
|
|
bucket = bucket[1:]
|
2019-03-10 00:01:56 +03:00
|
|
|
for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
|
2019-03-07 01:55:38 +03:00
|
|
|
var backward uint = cur_ix - prev_ix
|
|
|
|
var len uint
|
|
|
|
prev_ix &= uint(uint32(ring_buffer_mask))
|
|
|
|
if compare_char != int(data[prev_ix+best_len]) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if backward == 0 || backward > max_backward {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-03-16 03:24:40 +03:00
|
|
|
len = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
2019-03-07 01:55:38 +03:00
|
|
|
if len >= 4 {
|
2019-03-16 03:24:40 +03:00
|
|
|
var score uint = backwardReferenceScore(uint(len), backward)
|
2019-03-07 01:55:38 +03:00
|
|
|
if best_score < score {
|
|
|
|
best_score = score
|
|
|
|
best_len = uint(len)
|
|
|
|
out.len = best_len
|
|
|
|
out.distance = backward
|
|
|
|
out.score = score
|
|
|
|
compare_char = int(data[cur_ix_masked+best_len])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
if h.useDictionary && min_score == out.score {
|
2019-03-16 03:24:40 +03:00
|
|
|
searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|
|
|
|
|
2019-03-10 00:01:56 +03:00
|
|
|
h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix)
|
2019-03-07 01:55:38 +03:00
|
|
|
}
|