Start reducing duplication in hasher code.

The C version defined several related hasher types with preprocessor
tricks, but I split them up for the translation to Go. Now I'm
recombining them.
This commit is contained in:
Andy Balholm 2019-03-09 13:01:56 -08:00
parent 74ae18c776
commit c4f1bfa34f
7 changed files with 102 additions and 680 deletions

203
h3.go
View File

@ -1,203 +0,0 @@
package brotli
import "encoding/binary"
/* NOLINT(build/header_guard) */
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func (*H3) HashTypeLength() uint {
return 8
}
func (*H3) StoreLookahead() uint {
return 8
}
/* HashBytes is the function that chooses the bucket to place
the address in. The HashLongestMatch and H3
classes have separate, different implementations of hashing. */
func HashBytesH3(data []byte) uint32 {
var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64)
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return uint32(h >> (64 - 16))
}
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size (BUCKET_SIZE). Starting from the
given index, 2 buckets are used to store values of a key. */
type H3 struct {
HasherCommon
buckets_ [(1 << 16) + 2]uint32
}
func SelfH3(handle HasherHandle) *H3 {
return handle.(*H3)
}
func (*H3) Initialize(params *BrotliEncoderParams) {
}
func (h *H3) Prepare(one_shot bool, input_size uint, data []byte) {
var partial_prepare_threshold uint = (4 << 16) >> 7
/* Partial preparation is 100 times slower (per socket). */
if one_shot && input_size <= partial_prepare_threshold {
var i uint
for i = 0; i < input_size; i++ {
var key uint32 = HashBytesH3(data[i:])
for i := 0; i < int(2); i++ {
h.buckets_[key:][i] = 0
}
}
} else {
/* It is not strictly necessary to fill this buffer here, but
not filling will make the results of the compression stochastic
(but correct). This is because random data would cause the
system to find accidentally good backward references here and there. */
var i int
for i = 0; i < len(h.buckets_); i++ {
h.buckets_[i] = 0
}
}
}
/* Look at 5 bytes at &data[ix & mask].
Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */
func (h *H3) Store(data []byte, mask uint, ix uint) {
var key uint32 = HashBytesH3(data[ix&mask:])
var off uint32 = uint32(ix>>3) % 2
/* Wiggle the value with the bucket sweep range. */
h.buckets_[key+off] = uint32(ix)
}
func (h *H3) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint
for i = ix_start; i < ix_end; i++ {
h.Store(data, mask, i)
}
}
func (h *H3) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
h.Store(ringbuffer, ringbuffer_mask, position-3)
h.Store(ringbuffer, ringbuffer_mask, position-2)
h.Store(ringbuffer, ringbuffer_mask, position-1)
}
}
func (*H3) PrepareDistanceCache(distance_cache []int) {
}
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
up to the length of max_length and stores the position cur_ix in the
hash table.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
func (h *H3) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
var best_len_in uint = out.len
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var key uint32 = HashBytesH3(data[cur_ix_masked:])
var compare_char int = int(data[cur_ix_masked+best_len_in])
var best_score uint = out.score
var best_len uint = best_len_in
var cached_backward uint = uint(distance_cache[0])
var prev_ix uint = cur_ix - cached_backward
var bucket []uint32
out.len_code_delta = 0
if prev_ix < cur_ix {
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char == int(data[prev_ix+best_len]) {
var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
if best_score < score {
best_score = score
best_len = uint(len)
out.len = uint(len)
out.distance = cached_backward
out.score = best_score
compare_char = int(data[cur_ix_masked+best_len])
if 2 == 1 {
h.buckets_[key] = uint32(cur_ix)
return
}
}
}
}
}
if 2 == 1 {
var backward uint
var len uint
/* Only one to look for, don't bother to prepare for a loop. */
prev_ix = uint(h.buckets_[key])
h.buckets_[key] = uint32(cur_ix)
backward = cur_ix - prev_ix
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len_in]) {
return
}
if backward == 0 || backward > max_backward {
return
}
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScore(uint(len), backward)
if best_score < score {
out.len = uint(len)
out.distance = backward
out.score = score
return
}
}
} else {
bucket = h.buckets_[key:]
var i int
prev_ix = uint(bucket[0])
bucket = bucket[1:]
for i = 0; i < 2; (func() { i++; tmp4 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp4[0]) })() {
var backward uint = cur_ix - prev_ix
var len uint
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len]) {
continue
}
if backward == 0 || backward > max_backward {
continue
}
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScore(uint(len), backward)
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = score
compare_char = int(data[cur_ix_masked+best_len])
}
}
}
}
h.buckets_[key+uint32((cur_ix>>3)%2)] = uint32(cur_ix)
}

2
h35.go
View File

@ -57,7 +57,7 @@ func (h *H35) Prepare(one_shot bool, input_size uint, data []byte) {
var common_a *HasherCommon var common_a *HasherCommon
var common_b *HasherCommon var common_b *HasherCommon
h.ha = new(H3) h.ha = newHasher(3)
common_a = h.ha.Common() common_a = h.ha.Common()
common_a.params = h.params.hasher common_a.params = h.params.hasher
common_a.is_prepared_ = false common_a.is_prepared_ = false

208
h4.go
View File

@ -1,208 +0,0 @@
package brotli
import "encoding/binary"
/* NOLINT(build/header_guard) */
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func (*H4) HashTypeLength() uint {
return 8
}
func (*H4) StoreLookahead() uint {
return 8
}
/* HashBytes is the function that chooses the bucket to place
the address in. The HashLongestMatch and H4
classes have separate, different implementations of hashing. */
func HashBytesH4(data []byte) uint32 {
var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64)
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return uint32(h >> (64 - 17))
}
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size (BUCKET_SIZE). Starting from the
given index, 4 buckets are used to store values of a key. */
type H4 struct {
HasherCommon
buckets_ [(1 << 17) + 4]uint32
}
func SelfH4(handle HasherHandle) *H4 {
return handle.(*H4)
}
func (*H4) Initialize(params *BrotliEncoderParams) {
}
func (h *H4) Prepare(one_shot bool, input_size uint, data []byte) {
var partial_prepare_threshold uint = (4 << 17) >> 7
/* Partial preparation is 100 times slower (per socket). */
if one_shot && input_size <= partial_prepare_threshold {
var i uint
for i = 0; i < input_size; i++ {
var key uint32 = HashBytesH4(data[i:])
for i := 0; i < int(4); i++ {
h.buckets_[key:][i] = 0
}
}
} else {
/* It is not strictly necessary to fill this buffer here, but
not filling will make the results of the compression stochastic
(but correct). This is because random data would cause the
system to find accidentally good backward references here and there. */
var i int
for i = 0; i < len(h.buckets_); i++ {
h.buckets_[i] = 0
}
}
}
/* Look at 5 bytes at &data[ix & mask].
Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */
func (h *H4) Store(data []byte, mask uint, ix uint) {
var key uint32 = HashBytesH4(data[ix&mask:])
var off uint32 = uint32(ix>>3) % 4
/* Wiggle the value with the bucket sweep range. */
h.buckets_[key+off] = uint32(ix)
}
func (h *H4) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint
for i = ix_start; i < ix_end; i++ {
h.Store(data, mask, i)
}
}
func (h *H4) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
h.Store(ringbuffer, ringbuffer_mask, position-3)
h.Store(ringbuffer, ringbuffer_mask, position-2)
h.Store(ringbuffer, ringbuffer_mask, position-1)
}
}
func (*H4) PrepareDistanceCache(distance_cache []int) {
}
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
up to the length of max_length and stores the position cur_ix in the
hash table.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
func (h *H4) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
var best_len_in uint = out.len
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var key uint32 = HashBytesH4(data[cur_ix_masked:])
var compare_char int = int(data[cur_ix_masked+best_len_in])
var min_score uint = out.score
var best_score uint = out.score
var best_len uint = best_len_in
var cached_backward uint = uint(distance_cache[0])
var prev_ix uint = cur_ix - cached_backward
var bucket []uint32
out.len_code_delta = 0
if prev_ix < cur_ix {
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char == int(data[prev_ix+best_len]) {
var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
if best_score < score {
best_score = score
best_len = uint(len)
out.len = uint(len)
out.distance = cached_backward
out.score = best_score
compare_char = int(data[cur_ix_masked+best_len])
if 4 == 1 {
h.buckets_[key] = uint32(cur_ix)
return
}
}
}
}
}
if 4 == 1 {
var backward uint
var len uint
/* Only one to look for, don't bother to prepare for a loop. */
prev_ix = uint(h.buckets_[key])
h.buckets_[key] = uint32(cur_ix)
backward = cur_ix - prev_ix
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len_in]) {
return
}
if backward == 0 || backward > max_backward {
return
}
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScore(uint(len), backward)
if best_score < score {
out.len = uint(len)
out.distance = backward
out.score = score
return
}
}
} else {
bucket = h.buckets_[key:]
var i int
prev_ix = uint(bucket[0])
bucket = bucket[1:]
for i = 0; i < 4; (func() { i++; tmp5 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp5[0]) })() {
var backward uint = cur_ix - prev_ix
var len uint
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len]) {
continue
}
if backward == 0 || backward > max_backward {
continue
}
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScore(uint(len), backward)
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = score
compare_char = int(data[cur_ix_masked+best_len])
}
}
}
}
if min_score == out.score {
SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
}
h.buckets_[key+uint32((cur_ix>>3)%4)] = uint32(cur_ix)
}

200
h54.go
View File

@ -1,200 +0,0 @@
package brotli
import "encoding/binary"
/* NOLINT(build/header_guard) */
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
func (*H54) HashTypeLength() uint {
return 8
}
func (*H54) StoreLookahead() uint {
return 8
}
/* HashBytes is the function that chooses the bucket to place
the address in. The HashLongestMatch and H54
classes have separate, different implementations of hashing. */
func HashBytesH54(data []byte) uint32 {
var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*7)) * kHashMul64)
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return uint32(h >> (64 - 20))
}
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size ((1 << 20)). Starting from the
given index, 4 buckets are used to store values of a key. */
type H54 struct {
HasherCommon
buckets_ [(1 << 20) + 4]uint32
}
func SelfH54(handle HasherHandle) *H54 {
return handle.(*H54)
}
func (*H54) Initialize(params *BrotliEncoderParams) {
}
func (h *H54) Prepare(one_shot bool, input_size uint, data []byte) {
var partial_prepare_threshold uint = (4 << 20) >> 7
/* Partial preparation is 100 times slower (per socket). */
if one_shot && input_size <= partial_prepare_threshold {
var i uint
for i = 0; i < input_size; i++ {
var key uint32 = HashBytesH54(data[i:])
for i := 0; i < int(4); i++ {
h.buckets_[key:][i] = 0
}
}
} else {
/* It is not strictly necessary to fill this buffer here, but
not filling will make the results of the compression stochastic
(but correct). This is because random data would cause the
system to find accidentally good backward references here and there. */
h.buckets_ = [(1 << 20) + 4]uint32{}
}
}
/* Look at 5 bytes at &data[ix & mask].
Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */
func (h *H54) Store(data []byte, mask uint, ix uint) {
var key uint32 = HashBytesH54(data[ix&mask:])
var off uint32 = uint32(ix>>3) % 4
/* Wiggle the value with the bucket sweep range. */
h.buckets_[key+off] = uint32(ix)
}
func (h *H54) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint
for i = ix_start; i < ix_end; i++ {
h.Store(data, mask, i)
}
}
func (h *H54) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
h.Store(ringbuffer, ringbuffer_mask, position-3)
h.Store(ringbuffer, ringbuffer_mask, position-2)
h.Store(ringbuffer, ringbuffer_mask, position-1)
}
}
func (*H54) PrepareDistanceCache(distance_cache []int) {
}
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
up to the length of max_length and stores the position cur_ix in the
hash table.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
func (h *H54) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
var best_len_in uint = out.len
var cur_ix_masked uint = cur_ix & ring_buffer_mask
var key uint32 = HashBytesH54(data[cur_ix_masked:])
var compare_char int = int(data[cur_ix_masked+best_len_in])
var best_score uint = out.score
var best_len uint = best_len_in
var cached_backward uint = uint(distance_cache[0])
var prev_ix uint = cur_ix - cached_backward
var bucket []uint32
out.len_code_delta = 0
if prev_ix < cur_ix {
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char == int(data[prev_ix+best_len]) {
var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
if best_score < score {
best_score = score
best_len = uint(len)
out.len = uint(len)
out.distance = cached_backward
out.score = best_score
compare_char = int(data[cur_ix_masked+best_len])
if 4 == 1 {
h.buckets_[key] = uint32(cur_ix)
return
}
}
}
}
}
if 4 == 1 {
var backward uint
var len uint
/* Only one to look for, don't bother to prepare for a loop. */
prev_ix = uint(h.buckets_[key])
h.buckets_[key] = uint32(cur_ix)
backward = cur_ix - prev_ix
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len_in]) {
return
}
if backward == 0 || backward > max_backward {
return
}
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScore(uint(len), backward)
if best_score < score {
out.len = uint(len)
out.distance = backward
out.score = score
return
}
}
} else {
bucket = h.buckets_[key:]
var i int
prev_ix = uint(bucket[0])
bucket = bucket[1:]
for i = 0; i < 4; (func() { i++; tmp9 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp9[0]) })() {
var backward uint = cur_ix - prev_ix
var len uint
prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len]) {
continue
}
if backward == 0 || backward > max_backward {
continue
}
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
if len >= 4 {
var score uint = BackwardReferenceScore(uint(len), backward)
if best_score < score {
best_score = score
best_len = uint(len)
out.len = best_len
out.distance = backward
out.score = score
compare_char = int(data[cur_ix_masked+best_len])
}
}
}
}
h.buckets_[key+uint32((cur_ix>>3)%4)] = uint32(cur_ix)
}

2
h55.go
View File

@ -55,7 +55,7 @@ func (h *H55) Prepare(one_shot bool, input_size uint, data []byte) {
var common_a *HasherCommon var common_a *HasherCommon
var common_b *HasherCommon var common_b *HasherCommon
h.ha = new(H54) h.ha = newHasher(54)
common_a = h.ha.Common() common_a = h.ha.Common()
common_a.params = h.params.hasher common_a.params = h.params.hasher
common_a.is_prepared_ = false common_a.is_prepared_ = false

87
hash.go
View File

@ -1,6 +1,9 @@
package brotli package brotli
import "encoding/binary" import (
"encoding/binary"
"fmt"
)
/* Matches data against static dictionary words, and for each length l, /* Matches data against static dictionary words, and for each length l,
for which a match is found, updates matches[l] to be the minimum possible for which a match is found, updates matches[l] to be the minimum possible
@ -253,40 +256,66 @@ func HasherReset(handle HasherHandle) {
handle.Common().is_prepared_ = false handle.Common().is_prepared_ = false
} }
func newHasher(typ int) HasherHandle {
switch typ {
case 2:
return &hashLongestMatchQuickly{
bucketBits: 16,
bucketSweep: 1,
hashLen: 5,
useDictionary: true,
}
case 3:
return &hashLongestMatchQuickly{
bucketBits: 16,
bucketSweep: 2,
hashLen: 5,
useDictionary: false,
}
case 4:
return &hashLongestMatchQuickly{
bucketBits: 17,
bucketSweep: 4,
hashLen: 5,
useDictionary: true,
}
case 5:
return new(H5)
case 6:
return new(H6)
case 40:
return new(H40)
case 41:
return new(H41)
case 42:
return new(H42)
case 54:
return &hashLongestMatchQuickly{
bucketBits: 20,
bucketSweep: 4,
hashLen: 7,
useDictionary: false,
}
case 35:
return new(H35)
case 55:
return new(H55)
case 65:
return new(H65)
case 10:
return new(H10)
}
panic(fmt.Sprintf("unknown hasher type: %d", typ))
}
func HasherSetup(handle *HasherHandle, params *BrotliEncoderParams, data []byte, position uint, input_size uint, is_last bool) { func HasherSetup(handle *HasherHandle, params *BrotliEncoderParams, data []byte, position uint, input_size uint, is_last bool) {
var self HasherHandle = nil var self HasherHandle = nil
var common *HasherCommon = nil var common *HasherCommon = nil
var one_shot bool = (position == 0 && is_last) var one_shot bool = (position == 0 && is_last)
if *handle == nil { if *handle == nil {
ChooseHasher(params, &params.hasher) ChooseHasher(params, &params.hasher)
switch params.hasher.type_ { self = newHasher(params.hasher.type_)
case 2:
self = new(H2)
case 3:
self = new(H3)
case 4:
self = new(H4)
case 5:
self = new(H5)
case 6:
self = new(H6)
case 40:
self = new(H40)
case 41:
self = new(H41)
case 42:
self = new(H42)
case 54:
self = new(H54)
case 35:
self = new(H35)
case 55:
self = new(H55)
case 65:
self = new(H65)
case 10:
self = new(H10)
}
*handle = self *handle = self
common = self.Common() common = self.Common()

View File

@ -12,23 +12,23 @@ import "encoding/binary"
/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
a little faster (0.5% - 1%) and it compresses 0.15% better on small text a little faster (0.5% - 1%) and it compresses 0.15% better on small text
and HTML inputs. */ and HTML inputs. */
func (*H2) HashTypeLength() uint { func (*hashLongestMatchQuickly) HashTypeLength() uint {
return 8 return 8
} }
func (*H2) StoreLookahead() uint { func (*hashLongestMatchQuickly) StoreLookahead() uint {
return 8 return 8
} }
/* HashBytes is the function that chooses the bucket to place /* HashBytes is the function that chooses the bucket to place
the address in. The HashLongestMatch and H2 the address in. The HashLongestMatch and hashLongestMatchQuickly
classes have separate, different implementations of hashing. */ classes have separate, different implementations of hashing. */
func HashBytesH2(data []byte) uint32 { func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 {
var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64) var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64)
/* The higher bits contain more mixture from the multiplication, /* The higher bits contain more mixture from the multiplication,
so we take our results from there. */ so we take our results from there. */
return uint32(h >> (64 - 16)) return uint32(hash >> (64 - h.bucketBits))
} }
/* A (forgetful) hash table to the data seen by the compressor, to /* A (forgetful) hash table to the data seen by the compressor, to
@ -36,35 +36,39 @@ func HashBytesH2(data []byte) uint32 {
This is a hash map of fixed size (1 << 16). Starting from the This is a hash map of fixed size (1 << 16). Starting from the
given index, 1 buckets are used to store values of a key. */ given index, 1 buckets are used to store values of a key. */
type H2 struct { type hashLongestMatchQuickly struct {
HasherCommon HasherCommon
buckets_ [(1 << 16) + 1]uint32
bucketBits uint
bucketSweep int
hashLen uint
useDictionary bool
buckets []uint32
} }
func SelfH2(handle HasherHandle) *H2 { func (h *hashLongestMatchQuickly) Initialize(params *BrotliEncoderParams) {
return handle.(*H2) h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep)
} }
func (*H2) Initialize(params *BrotliEncoderParams) { func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) {
} var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7
func (h *H2) Prepare(one_shot bool, input_size uint, data []byte) {
var partial_prepare_threshold uint = (4 << 16) >> 7
/* Partial preparation is 100 times slower (per socket). */ /* Partial preparation is 100 times slower (per socket). */
if one_shot && input_size <= partial_prepare_threshold { if one_shot && input_size <= partial_prepare_threshold {
var i uint var i uint
for i = 0; i < input_size; i++ { for i = 0; i < input_size; i++ {
var key uint32 = HashBytesH2(data[i:]) var key uint32 = h.HashBytes(data[i:])
h.buckets_[key] = 0 for j := 0; j < h.bucketSweep; j++ {
h.buckets[key+uint32(j)] = 0
}
} }
} else { } else {
/* It is not strictly necessary to fill this buffer here, but /* It is not strictly necessary to fill this buffer here, but
not filling will make the results of the compression stochastic not filling will make the results of the compression stochastic
(but correct). This is because random data would cause the (but correct). This is because random data would cause the
system to find accidentally good backward references here and there. */ system to find accidentally good backward references here and there. */
var i int for i := range h.buckets {
for i = 0; i < len(h.buckets_); i++ { h.buckets[i] = 0
h.buckets_[i] = 0
} }
} }
} }
@ -72,21 +76,21 @@ func (h *H2) Prepare(one_shot bool, input_size uint, data []byte) {
/* Look at 5 bytes at &data[ix & mask]. /* Look at 5 bytes at &data[ix & mask].
Compute a hash from these, and store the value somewhere within Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */ [ix .. ix+3]. */
func (h *H2) Store(data []byte, mask uint, ix uint) { func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) {
var key uint32 = HashBytesH2(data[ix&mask:]) var key uint32 = h.HashBytes(data[ix&mask:])
var off uint32 = uint32(ix>>3) % 1 var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep)
/* Wiggle the value with the bucket sweep range. */ /* Wiggle the value with the bucket sweep range. */
h.buckets_[key+off] = uint32(ix) h.buckets[key+off] = uint32(ix)
} }
func (h *H2) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
var i uint var i uint
for i = ix_start; i < ix_end; i++ { for i = ix_start; i < ix_end; i++ {
h.Store(data, mask, i) h.Store(data, mask, i)
} }
} }
func (h *H2) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) { func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
if num_bytes >= h.HashTypeLength()-1 && position >= 3 { if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
/* Prepare the hashes for three last bytes of the last write. /* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge These could not be calculated before, since they require knowledge
@ -97,7 +101,7 @@ func (h *H2) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []b
} }
} }
func (*H2) PrepareDistanceCache(distance_cache []int) { func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) {
} }
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask] /* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
@ -108,10 +112,10 @@ func (*H2) PrepareDistanceCache(distance_cache []int) {
Does not look for matches further away than max_backward. Does not look for matches further away than max_backward.
Writes the best match into |out|. Writes the best match into |out|.
|out|->score is updated only if a better match is found. */ |out|->score is updated only if a better match is found. */
func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
var best_len_in uint = out.len var best_len_in uint = out.len
var cur_ix_masked uint = cur_ix & ring_buffer_mask var cur_ix_masked uint = cur_ix & ring_buffer_mask
var key uint32 = HashBytesH2(data[cur_ix_masked:]) var key uint32 = h.HashBytes(data[cur_ix_masked:])
var compare_char int = int(data[cur_ix_masked+best_len_in]) var compare_char int = int(data[cur_ix_masked+best_len_in])
var min_score uint = out.score var min_score uint = out.score
var best_score uint = out.score var best_score uint = out.score
@ -133,8 +137,8 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
out.distance = cached_backward out.distance = cached_backward
out.score = best_score out.score = best_score
compare_char = int(data[cur_ix_masked+best_len]) compare_char = int(data[cur_ix_masked+best_len])
if 1 == 1 { if h.bucketSweep == 1 {
h.buckets_[key] = uint32(cur_ix) h.buckets[key] = uint32(cur_ix)
return return
} }
} }
@ -142,14 +146,14 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
} }
} }
if 1 == 1 { if h.bucketSweep == 1 {
var backward uint var backward uint
var len uint var len uint
/* Only one to look for, don't bother to prepare for a loop. */ /* Only one to look for, don't bother to prepare for a loop. */
prev_ix = uint(h.buckets_[key]) prev_ix = uint(h.buckets[key])
h.buckets_[key] = uint32(cur_ix) h.buckets[key] = uint32(cur_ix)
backward = cur_ix - prev_ix backward = cur_ix - prev_ix
prev_ix &= uint(uint32(ring_buffer_mask)) prev_ix &= uint(uint32(ring_buffer_mask))
if compare_char != int(data[prev_ix+best_len_in]) { if compare_char != int(data[prev_ix+best_len_in]) {
@ -171,11 +175,11 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
} }
} }
} else { } else {
bucket = h.buckets_[key:] bucket = h.buckets[key:]
var i int var i int
prev_ix = uint(bucket[0]) prev_ix = uint(bucket[0])
bucket = bucket[1:] bucket = bucket[1:]
for i = 0; i < 1; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() { for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
var backward uint = cur_ix - prev_ix var backward uint = cur_ix - prev_ix
var len uint var len uint
prev_ix &= uint(uint32(ring_buffer_mask)) prev_ix &= uint(uint32(ring_buffer_mask))
@ -202,9 +206,9 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
} }
} }
if min_score == out.score { if h.useDictionary && min_score == out.score {
SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true) SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
} }
h.buckets_[key+uint32((cur_ix>>3)%1)] = uint32(cur_ix) h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix)
} }