forked from mirror/brotli
Start reducing duplication in hasher code.
The C version defined several related hasher types with preprocessor tricks, but I split them up for the translation to Go. Now I'm recombining them.
This commit is contained in:
parent
74ae18c776
commit
c4f1bfa34f
203
h3.go
203
h3.go
|
@ -1,203 +0,0 @@
|
|||
package brotli
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
func (*H3) HashTypeLength() uint {
|
||||
return 8
|
||||
}
|
||||
|
||||
func (*H3) StoreLookahead() uint {
|
||||
return 8
|
||||
}
|
||||
|
||||
/* HashBytes is the function that chooses the bucket to place
|
||||
the address in. The HashLongestMatch and H3
|
||||
classes have separate, different implementations of hashing. */
|
||||
func HashBytesH3(data []byte) uint32 {
|
||||
var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64)
|
||||
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return uint32(h >> (64 - 16))
|
||||
}
|
||||
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
help create backward references to previous data.
|
||||
|
||||
This is a hash map of fixed size (BUCKET_SIZE). Starting from the
|
||||
given index, 2 buckets are used to store values of a key. */
|
||||
type H3 struct {
|
||||
HasherCommon
|
||||
buckets_ [(1 << 16) + 2]uint32
|
||||
}
|
||||
|
||||
func SelfH3(handle HasherHandle) *H3 {
|
||||
return handle.(*H3)
|
||||
}
|
||||
|
||||
func (*H3) Initialize(params *BrotliEncoderParams) {
|
||||
}
|
||||
|
||||
func (h *H3) Prepare(one_shot bool, input_size uint, data []byte) {
|
||||
var partial_prepare_threshold uint = (4 << 16) >> 7
|
||||
/* Partial preparation is 100 times slower (per socket). */
|
||||
if one_shot && input_size <= partial_prepare_threshold {
|
||||
var i uint
|
||||
for i = 0; i < input_size; i++ {
|
||||
var key uint32 = HashBytesH3(data[i:])
|
||||
for i := 0; i < int(2); i++ {
|
||||
h.buckets_[key:][i] = 0
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* It is not strictly necessary to fill this buffer here, but
|
||||
not filling will make the results of the compression stochastic
|
||||
(but correct). This is because random data would cause the
|
||||
system to find accidentally good backward references here and there. */
|
||||
var i int
|
||||
for i = 0; i < len(h.buckets_); i++ {
|
||||
h.buckets_[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Look at 5 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value somewhere within
|
||||
[ix .. ix+3]. */
|
||||
func (h *H3) Store(data []byte, mask uint, ix uint) {
|
||||
var key uint32 = HashBytesH3(data[ix&mask:])
|
||||
var off uint32 = uint32(ix>>3) % 2
|
||||
/* Wiggle the value with the bucket sweep range. */
|
||||
h.buckets_[key+off] = uint32(ix)
|
||||
}
|
||||
|
||||
func (h *H3) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
||||
var i uint
|
||||
for i = ix_start; i < ix_end; i++ {
|
||||
h.Store(data, mask, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *H3) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
||||
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-3)
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-2)
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-1)
|
||||
}
|
||||
}
|
||||
|
||||
func (*H3) PrepareDistanceCache(distance_cache []int) {
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
|
||||
up to the length of max_length and stores the position cur_ix in the
|
||||
hash table.
|
||||
|
||||
Does not look for matches longer than max_length.
|
||||
Does not look for matches further away than max_backward.
|
||||
Writes the best match into |out|.
|
||||
|out|->score is updated only if a better match is found. */
|
||||
func (h *H3) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
|
||||
var best_len_in uint = out.len
|
||||
var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
||||
var key uint32 = HashBytesH3(data[cur_ix_masked:])
|
||||
var compare_char int = int(data[cur_ix_masked+best_len_in])
|
||||
var best_score uint = out.score
|
||||
var best_len uint = best_len_in
|
||||
var cached_backward uint = uint(distance_cache[0])
|
||||
var prev_ix uint = cur_ix - cached_backward
|
||||
var bucket []uint32
|
||||
out.len_code_delta = 0
|
||||
if prev_ix < cur_ix {
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char == int(data[prev_ix+best_len]) {
|
||||
var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
|
||||
if best_score < score {
|
||||
best_score = score
|
||||
best_len = uint(len)
|
||||
out.len = uint(len)
|
||||
out.distance = cached_backward
|
||||
out.score = best_score
|
||||
compare_char = int(data[cur_ix_masked+best_len])
|
||||
if 2 == 1 {
|
||||
h.buckets_[key] = uint32(cur_ix)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if 2 == 1 {
|
||||
var backward uint
|
||||
var len uint
|
||||
|
||||
/* Only one to look for, don't bother to prepare for a loop. */
|
||||
prev_ix = uint(h.buckets_[key])
|
||||
|
||||
h.buckets_[key] = uint32(cur_ix)
|
||||
backward = cur_ix - prev_ix
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char != int(data[prev_ix+best_len_in]) {
|
||||
return
|
||||
}
|
||||
|
||||
if backward == 0 || backward > max_backward {
|
||||
return
|
||||
}
|
||||
|
||||
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScore(uint(len), backward)
|
||||
if best_score < score {
|
||||
out.len = uint(len)
|
||||
out.distance = backward
|
||||
out.score = score
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bucket = h.buckets_[key:]
|
||||
var i int
|
||||
prev_ix = uint(bucket[0])
|
||||
bucket = bucket[1:]
|
||||
for i = 0; i < 2; (func() { i++; tmp4 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp4[0]) })() {
|
||||
var backward uint = cur_ix - prev_ix
|
||||
var len uint
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char != int(data[prev_ix+best_len]) {
|
||||
continue
|
||||
}
|
||||
|
||||
if backward == 0 || backward > max_backward {
|
||||
continue
|
||||
}
|
||||
|
||||
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScore(uint(len), backward)
|
||||
if best_score < score {
|
||||
best_score = score
|
||||
best_len = uint(len)
|
||||
out.len = best_len
|
||||
out.distance = backward
|
||||
out.score = score
|
||||
compare_char = int(data[cur_ix_masked+best_len])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
h.buckets_[key+uint32((cur_ix>>3)%2)] = uint32(cur_ix)
|
||||
}
|
2
h35.go
2
h35.go
|
@ -57,7 +57,7 @@ func (h *H35) Prepare(one_shot bool, input_size uint, data []byte) {
|
|||
var common_a *HasherCommon
|
||||
var common_b *HasherCommon
|
||||
|
||||
h.ha = new(H3)
|
||||
h.ha = newHasher(3)
|
||||
common_a = h.ha.Common()
|
||||
common_a.params = h.params.hasher
|
||||
common_a.is_prepared_ = false
|
||||
|
|
208
h4.go
208
h4.go
|
@ -1,208 +0,0 @@
|
|||
package brotli
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
func (*H4) HashTypeLength() uint {
|
||||
return 8
|
||||
}
|
||||
|
||||
func (*H4) StoreLookahead() uint {
|
||||
return 8
|
||||
}
|
||||
|
||||
/* HashBytes is the function that chooses the bucket to place
|
||||
the address in. The HashLongestMatch and H4
|
||||
classes have separate, different implementations of hashing. */
|
||||
func HashBytesH4(data []byte) uint32 {
|
||||
var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64)
|
||||
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return uint32(h >> (64 - 17))
|
||||
}
|
||||
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
help create backward references to previous data.
|
||||
|
||||
This is a hash map of fixed size (BUCKET_SIZE). Starting from the
|
||||
given index, 4 buckets are used to store values of a key. */
|
||||
type H4 struct {
|
||||
HasherCommon
|
||||
buckets_ [(1 << 17) + 4]uint32
|
||||
}
|
||||
|
||||
func SelfH4(handle HasherHandle) *H4 {
|
||||
return handle.(*H4)
|
||||
}
|
||||
|
||||
func (*H4) Initialize(params *BrotliEncoderParams) {
|
||||
}
|
||||
|
||||
func (h *H4) Prepare(one_shot bool, input_size uint, data []byte) {
|
||||
var partial_prepare_threshold uint = (4 << 17) >> 7
|
||||
/* Partial preparation is 100 times slower (per socket). */
|
||||
if one_shot && input_size <= partial_prepare_threshold {
|
||||
var i uint
|
||||
for i = 0; i < input_size; i++ {
|
||||
var key uint32 = HashBytesH4(data[i:])
|
||||
for i := 0; i < int(4); i++ {
|
||||
h.buckets_[key:][i] = 0
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* It is not strictly necessary to fill this buffer here, but
|
||||
not filling will make the results of the compression stochastic
|
||||
(but correct). This is because random data would cause the
|
||||
system to find accidentally good backward references here and there. */
|
||||
var i int
|
||||
for i = 0; i < len(h.buckets_); i++ {
|
||||
h.buckets_[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Look at 5 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value somewhere within
|
||||
[ix .. ix+3]. */
|
||||
func (h *H4) Store(data []byte, mask uint, ix uint) {
|
||||
var key uint32 = HashBytesH4(data[ix&mask:])
|
||||
var off uint32 = uint32(ix>>3) % 4
|
||||
/* Wiggle the value with the bucket sweep range. */
|
||||
h.buckets_[key+off] = uint32(ix)
|
||||
}
|
||||
|
||||
func (h *H4) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
||||
var i uint
|
||||
for i = ix_start; i < ix_end; i++ {
|
||||
h.Store(data, mask, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *H4) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
||||
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-3)
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-2)
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-1)
|
||||
}
|
||||
}
|
||||
|
||||
func (*H4) PrepareDistanceCache(distance_cache []int) {
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
|
||||
up to the length of max_length and stores the position cur_ix in the
|
||||
hash table.
|
||||
|
||||
Does not look for matches longer than max_length.
|
||||
Does not look for matches further away than max_backward.
|
||||
Writes the best match into |out|.
|
||||
|out|->score is updated only if a better match is found. */
|
||||
func (h *H4) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
|
||||
var best_len_in uint = out.len
|
||||
var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
||||
var key uint32 = HashBytesH4(data[cur_ix_masked:])
|
||||
var compare_char int = int(data[cur_ix_masked+best_len_in])
|
||||
var min_score uint = out.score
|
||||
var best_score uint = out.score
|
||||
var best_len uint = best_len_in
|
||||
var cached_backward uint = uint(distance_cache[0])
|
||||
var prev_ix uint = cur_ix - cached_backward
|
||||
var bucket []uint32
|
||||
out.len_code_delta = 0
|
||||
if prev_ix < cur_ix {
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char == int(data[prev_ix+best_len]) {
|
||||
var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
|
||||
if best_score < score {
|
||||
best_score = score
|
||||
best_len = uint(len)
|
||||
out.len = uint(len)
|
||||
out.distance = cached_backward
|
||||
out.score = best_score
|
||||
compare_char = int(data[cur_ix_masked+best_len])
|
||||
if 4 == 1 {
|
||||
h.buckets_[key] = uint32(cur_ix)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if 4 == 1 {
|
||||
var backward uint
|
||||
var len uint
|
||||
|
||||
/* Only one to look for, don't bother to prepare for a loop. */
|
||||
prev_ix = uint(h.buckets_[key])
|
||||
|
||||
h.buckets_[key] = uint32(cur_ix)
|
||||
backward = cur_ix - prev_ix
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char != int(data[prev_ix+best_len_in]) {
|
||||
return
|
||||
}
|
||||
|
||||
if backward == 0 || backward > max_backward {
|
||||
return
|
||||
}
|
||||
|
||||
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScore(uint(len), backward)
|
||||
if best_score < score {
|
||||
out.len = uint(len)
|
||||
out.distance = backward
|
||||
out.score = score
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bucket = h.buckets_[key:]
|
||||
var i int
|
||||
prev_ix = uint(bucket[0])
|
||||
bucket = bucket[1:]
|
||||
for i = 0; i < 4; (func() { i++; tmp5 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp5[0]) })() {
|
||||
var backward uint = cur_ix - prev_ix
|
||||
var len uint
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char != int(data[prev_ix+best_len]) {
|
||||
continue
|
||||
}
|
||||
|
||||
if backward == 0 || backward > max_backward {
|
||||
continue
|
||||
}
|
||||
|
||||
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScore(uint(len), backward)
|
||||
if best_score < score {
|
||||
best_score = score
|
||||
best_len = uint(len)
|
||||
out.len = best_len
|
||||
out.distance = backward
|
||||
out.score = score
|
||||
compare_char = int(data[cur_ix_masked+best_len])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if min_score == out.score {
|
||||
SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
|
||||
}
|
||||
|
||||
h.buckets_[key+uint32((cur_ix>>3)%4)] = uint32(cur_ix)
|
||||
}
|
200
h54.go
200
h54.go
|
@ -1,200 +0,0 @@
|
|||
package brotli
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
func (*H54) HashTypeLength() uint {
|
||||
return 8
|
||||
}
|
||||
|
||||
func (*H54) StoreLookahead() uint {
|
||||
return 8
|
||||
}
|
||||
|
||||
/* HashBytes is the function that chooses the bucket to place
|
||||
the address in. The HashLongestMatch and H54
|
||||
classes have separate, different implementations of hashing. */
|
||||
func HashBytesH54(data []byte) uint32 {
|
||||
var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*7)) * kHashMul64)
|
||||
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return uint32(h >> (64 - 20))
|
||||
}
|
||||
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
help create backward references to previous data.
|
||||
|
||||
This is a hash map of fixed size ((1 << 20)). Starting from the
|
||||
given index, 4 buckets are used to store values of a key. */
|
||||
type H54 struct {
|
||||
HasherCommon
|
||||
buckets_ [(1 << 20) + 4]uint32
|
||||
}
|
||||
|
||||
func SelfH54(handle HasherHandle) *H54 {
|
||||
return handle.(*H54)
|
||||
}
|
||||
|
||||
func (*H54) Initialize(params *BrotliEncoderParams) {
|
||||
}
|
||||
|
||||
func (h *H54) Prepare(one_shot bool, input_size uint, data []byte) {
|
||||
var partial_prepare_threshold uint = (4 << 20) >> 7
|
||||
/* Partial preparation is 100 times slower (per socket). */
|
||||
if one_shot && input_size <= partial_prepare_threshold {
|
||||
var i uint
|
||||
for i = 0; i < input_size; i++ {
|
||||
var key uint32 = HashBytesH54(data[i:])
|
||||
for i := 0; i < int(4); i++ {
|
||||
h.buckets_[key:][i] = 0
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* It is not strictly necessary to fill this buffer here, but
|
||||
not filling will make the results of the compression stochastic
|
||||
(but correct). This is because random data would cause the
|
||||
system to find accidentally good backward references here and there. */
|
||||
h.buckets_ = [(1 << 20) + 4]uint32{}
|
||||
}
|
||||
}
|
||||
|
||||
/* Look at 5 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value somewhere within
|
||||
[ix .. ix+3]. */
|
||||
func (h *H54) Store(data []byte, mask uint, ix uint) {
|
||||
var key uint32 = HashBytesH54(data[ix&mask:])
|
||||
var off uint32 = uint32(ix>>3) % 4
|
||||
/* Wiggle the value with the bucket sweep range. */
|
||||
h.buckets_[key+off] = uint32(ix)
|
||||
}
|
||||
|
||||
func (h *H54) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
||||
var i uint
|
||||
for i = ix_start; i < ix_end; i++ {
|
||||
h.Store(data, mask, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *H54) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
||||
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-3)
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-2)
|
||||
h.Store(ringbuffer, ringbuffer_mask, position-1)
|
||||
}
|
||||
}
|
||||
|
||||
func (*H54) PrepareDistanceCache(distance_cache []int) {
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
|
||||
up to the length of max_length and stores the position cur_ix in the
|
||||
hash table.
|
||||
|
||||
Does not look for matches longer than max_length.
|
||||
Does not look for matches further away than max_backward.
|
||||
Writes the best match into |out|.
|
||||
|out|->score is updated only if a better match is found. */
|
||||
func (h *H54) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
|
||||
var best_len_in uint = out.len
|
||||
var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
||||
var key uint32 = HashBytesH54(data[cur_ix_masked:])
|
||||
var compare_char int = int(data[cur_ix_masked+best_len_in])
|
||||
var best_score uint = out.score
|
||||
var best_len uint = best_len_in
|
||||
var cached_backward uint = uint(distance_cache[0])
|
||||
var prev_ix uint = cur_ix - cached_backward
|
||||
var bucket []uint32
|
||||
out.len_code_delta = 0
|
||||
if prev_ix < cur_ix {
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char == int(data[prev_ix+best_len]) {
|
||||
var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
|
||||
if best_score < score {
|
||||
best_score = score
|
||||
best_len = uint(len)
|
||||
out.len = uint(len)
|
||||
out.distance = cached_backward
|
||||
out.score = best_score
|
||||
compare_char = int(data[cur_ix_masked+best_len])
|
||||
if 4 == 1 {
|
||||
h.buckets_[key] = uint32(cur_ix)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if 4 == 1 {
|
||||
var backward uint
|
||||
var len uint
|
||||
|
||||
/* Only one to look for, don't bother to prepare for a loop. */
|
||||
prev_ix = uint(h.buckets_[key])
|
||||
|
||||
h.buckets_[key] = uint32(cur_ix)
|
||||
backward = cur_ix - prev_ix
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char != int(data[prev_ix+best_len_in]) {
|
||||
return
|
||||
}
|
||||
|
||||
if backward == 0 || backward > max_backward {
|
||||
return
|
||||
}
|
||||
|
||||
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScore(uint(len), backward)
|
||||
if best_score < score {
|
||||
out.len = uint(len)
|
||||
out.distance = backward
|
||||
out.score = score
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bucket = h.buckets_[key:]
|
||||
var i int
|
||||
prev_ix = uint(bucket[0])
|
||||
bucket = bucket[1:]
|
||||
for i = 0; i < 4; (func() { i++; tmp9 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp9[0]) })() {
|
||||
var backward uint = cur_ix - prev_ix
|
||||
var len uint
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char != int(data[prev_ix+best_len]) {
|
||||
continue
|
||||
}
|
||||
|
||||
if backward == 0 || backward > max_backward {
|
||||
continue
|
||||
}
|
||||
|
||||
len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
||||
if len >= 4 {
|
||||
var score uint = BackwardReferenceScore(uint(len), backward)
|
||||
if best_score < score {
|
||||
best_score = score
|
||||
best_len = uint(len)
|
||||
out.len = best_len
|
||||
out.distance = backward
|
||||
out.score = score
|
||||
compare_char = int(data[cur_ix_masked+best_len])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
h.buckets_[key+uint32((cur_ix>>3)%4)] = uint32(cur_ix)
|
||||
}
|
2
h55.go
2
h55.go
|
@ -55,7 +55,7 @@ func (h *H55) Prepare(one_shot bool, input_size uint, data []byte) {
|
|||
var common_a *HasherCommon
|
||||
var common_b *HasherCommon
|
||||
|
||||
h.ha = new(H54)
|
||||
h.ha = newHasher(54)
|
||||
common_a = h.ha.Common()
|
||||
common_a.params = h.params.hasher
|
||||
common_a.is_prepared_ = false
|
||||
|
|
87
hash.go
87
hash.go
|
@ -1,6 +1,9 @@
|
|||
package brotli
|
||||
|
||||
import "encoding/binary"
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
/* Matches data against static dictionary words, and for each length l,
|
||||
for which a match is found, updates matches[l] to be the minimum possible
|
||||
|
@ -253,40 +256,66 @@ func HasherReset(handle HasherHandle) {
|
|||
handle.Common().is_prepared_ = false
|
||||
}
|
||||
|
||||
func newHasher(typ int) HasherHandle {
|
||||
switch typ {
|
||||
case 2:
|
||||
return &hashLongestMatchQuickly{
|
||||
bucketBits: 16,
|
||||
bucketSweep: 1,
|
||||
hashLen: 5,
|
||||
useDictionary: true,
|
||||
}
|
||||
case 3:
|
||||
return &hashLongestMatchQuickly{
|
||||
bucketBits: 16,
|
||||
bucketSweep: 2,
|
||||
hashLen: 5,
|
||||
useDictionary: false,
|
||||
}
|
||||
case 4:
|
||||
return &hashLongestMatchQuickly{
|
||||
bucketBits: 17,
|
||||
bucketSweep: 4,
|
||||
hashLen: 5,
|
||||
useDictionary: true,
|
||||
}
|
||||
case 5:
|
||||
return new(H5)
|
||||
case 6:
|
||||
return new(H6)
|
||||
case 40:
|
||||
return new(H40)
|
||||
case 41:
|
||||
return new(H41)
|
||||
case 42:
|
||||
return new(H42)
|
||||
case 54:
|
||||
return &hashLongestMatchQuickly{
|
||||
bucketBits: 20,
|
||||
bucketSweep: 4,
|
||||
hashLen: 7,
|
||||
useDictionary: false,
|
||||
}
|
||||
case 35:
|
||||
return new(H35)
|
||||
case 55:
|
||||
return new(H55)
|
||||
case 65:
|
||||
return new(H65)
|
||||
case 10:
|
||||
return new(H10)
|
||||
}
|
||||
|
||||
panic(fmt.Sprintf("unknown hasher type: %d", typ))
|
||||
}
|
||||
|
||||
func HasherSetup(handle *HasherHandle, params *BrotliEncoderParams, data []byte, position uint, input_size uint, is_last bool) {
|
||||
var self HasherHandle = nil
|
||||
var common *HasherCommon = nil
|
||||
var one_shot bool = (position == 0 && is_last)
|
||||
if *handle == nil {
|
||||
ChooseHasher(params, ¶ms.hasher)
|
||||
switch params.hasher.type_ {
|
||||
case 2:
|
||||
self = new(H2)
|
||||
case 3:
|
||||
self = new(H3)
|
||||
case 4:
|
||||
self = new(H4)
|
||||
case 5:
|
||||
self = new(H5)
|
||||
case 6:
|
||||
self = new(H6)
|
||||
case 40:
|
||||
self = new(H40)
|
||||
case 41:
|
||||
self = new(H41)
|
||||
case 42:
|
||||
self = new(H42)
|
||||
case 54:
|
||||
self = new(H54)
|
||||
case 35:
|
||||
self = new(H35)
|
||||
case 55:
|
||||
self = new(H55)
|
||||
case 65:
|
||||
self = new(H65)
|
||||
case 10:
|
||||
self = new(H10)
|
||||
}
|
||||
self = newHasher(params.hasher.type_)
|
||||
|
||||
*handle = self
|
||||
common = self.Common()
|
||||
|
|
|
@ -12,23 +12,23 @@ import "encoding/binary"
|
|||
/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
|
||||
a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
||||
and HTML inputs. */
|
||||
func (*H2) HashTypeLength() uint {
|
||||
func (*hashLongestMatchQuickly) HashTypeLength() uint {
|
||||
return 8
|
||||
}
|
||||
|
||||
func (*H2) StoreLookahead() uint {
|
||||
func (*hashLongestMatchQuickly) StoreLookahead() uint {
|
||||
return 8
|
||||
}
|
||||
|
||||
/* HashBytes is the function that chooses the bucket to place
|
||||
the address in. The HashLongestMatch and H2
|
||||
the address in. The HashLongestMatch and hashLongestMatchQuickly
|
||||
classes have separate, different implementations of hashing. */
|
||||
func HashBytesH2(data []byte) uint32 {
|
||||
var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64)
|
||||
func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 {
|
||||
var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64)
|
||||
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return uint32(h >> (64 - 16))
|
||||
return uint32(hash >> (64 - h.bucketBits))
|
||||
}
|
||||
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
|
@ -36,35 +36,39 @@ func HashBytesH2(data []byte) uint32 {
|
|||
|
||||
This is a hash map of fixed size (1 << 16). Starting from the
|
||||
given index, 1 buckets are used to store values of a key. */
|
||||
type H2 struct {
|
||||
type hashLongestMatchQuickly struct {
|
||||
HasherCommon
|
||||
buckets_ [(1 << 16) + 1]uint32
|
||||
|
||||
bucketBits uint
|
||||
bucketSweep int
|
||||
hashLen uint
|
||||
useDictionary bool
|
||||
|
||||
buckets []uint32
|
||||
}
|
||||
|
||||
func SelfH2(handle HasherHandle) *H2 {
|
||||
return handle.(*H2)
|
||||
func (h *hashLongestMatchQuickly) Initialize(params *BrotliEncoderParams) {
|
||||
h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep)
|
||||
}
|
||||
|
||||
func (*H2) Initialize(params *BrotliEncoderParams) {
|
||||
}
|
||||
|
||||
func (h *H2) Prepare(one_shot bool, input_size uint, data []byte) {
|
||||
var partial_prepare_threshold uint = (4 << 16) >> 7
|
||||
func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) {
|
||||
var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7
|
||||
/* Partial preparation is 100 times slower (per socket). */
|
||||
if one_shot && input_size <= partial_prepare_threshold {
|
||||
var i uint
|
||||
for i = 0; i < input_size; i++ {
|
||||
var key uint32 = HashBytesH2(data[i:])
|
||||
h.buckets_[key] = 0
|
||||
var key uint32 = h.HashBytes(data[i:])
|
||||
for j := 0; j < h.bucketSweep; j++ {
|
||||
h.buckets[key+uint32(j)] = 0
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* It is not strictly necessary to fill this buffer here, but
|
||||
not filling will make the results of the compression stochastic
|
||||
(but correct). This is because random data would cause the
|
||||
system to find accidentally good backward references here and there. */
|
||||
var i int
|
||||
for i = 0; i < len(h.buckets_); i++ {
|
||||
h.buckets_[i] = 0
|
||||
for i := range h.buckets {
|
||||
h.buckets[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -72,21 +76,21 @@ func (h *H2) Prepare(one_shot bool, input_size uint, data []byte) {
|
|||
/* Look at 5 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value somewhere within
|
||||
[ix .. ix+3]. */
|
||||
func (h *H2) Store(data []byte, mask uint, ix uint) {
|
||||
var key uint32 = HashBytesH2(data[ix&mask:])
|
||||
var off uint32 = uint32(ix>>3) % 1
|
||||
func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) {
|
||||
var key uint32 = h.HashBytes(data[ix&mask:])
|
||||
var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep)
|
||||
/* Wiggle the value with the bucket sweep range. */
|
||||
h.buckets_[key+off] = uint32(ix)
|
||||
h.buckets[key+off] = uint32(ix)
|
||||
}
|
||||
|
||||
func (h *H2) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
||||
func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
||||
var i uint
|
||||
for i = ix_start; i < ix_end; i++ {
|
||||
h.Store(data, mask, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *H2) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
||||
func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
||||
if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
|
@ -97,7 +101,7 @@ func (h *H2) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []b
|
|||
}
|
||||
}
|
||||
|
||||
func (*H2) PrepareDistanceCache(distance_cache []int) {
|
||||
func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) {
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
|
||||
|
@ -108,10 +112,10 @@ func (*H2) PrepareDistanceCache(distance_cache []int) {
|
|||
Does not look for matches further away than max_backward.
|
||||
Writes the best match into |out|.
|
||||
|out|->score is updated only if a better match is found. */
|
||||
func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
|
||||
func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
|
||||
var best_len_in uint = out.len
|
||||
var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
||||
var key uint32 = HashBytesH2(data[cur_ix_masked:])
|
||||
var key uint32 = h.HashBytes(data[cur_ix_masked:])
|
||||
var compare_char int = int(data[cur_ix_masked+best_len_in])
|
||||
var min_score uint = out.score
|
||||
var best_score uint = out.score
|
||||
|
@ -133,8 +137,8 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
|
|||
out.distance = cached_backward
|
||||
out.score = best_score
|
||||
compare_char = int(data[cur_ix_masked+best_len])
|
||||
if 1 == 1 {
|
||||
h.buckets_[key] = uint32(cur_ix)
|
||||
if h.bucketSweep == 1 {
|
||||
h.buckets[key] = uint32(cur_ix)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
@ -142,14 +146,14 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
|
|||
}
|
||||
}
|
||||
|
||||
if 1 == 1 {
|
||||
if h.bucketSweep == 1 {
|
||||
var backward uint
|
||||
var len uint
|
||||
|
||||
/* Only one to look for, don't bother to prepare for a loop. */
|
||||
prev_ix = uint(h.buckets_[key])
|
||||
prev_ix = uint(h.buckets[key])
|
||||
|
||||
h.buckets_[key] = uint32(cur_ix)
|
||||
h.buckets[key] = uint32(cur_ix)
|
||||
backward = cur_ix - prev_ix
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
if compare_char != int(data[prev_ix+best_len_in]) {
|
||||
|
@ -171,11 +175,11 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
bucket = h.buckets_[key:]
|
||||
bucket = h.buckets[key:]
|
||||
var i int
|
||||
prev_ix = uint(bucket[0])
|
||||
bucket = bucket[1:]
|
||||
for i = 0; i < 1; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
|
||||
for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
|
||||
var backward uint = cur_ix - prev_ix
|
||||
var len uint
|
||||
prev_ix &= uint(uint32(ring_buffer_mask))
|
||||
|
@ -202,9 +206,9 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
|
|||
}
|
||||
}
|
||||
|
||||
if min_score == out.score {
|
||||
if h.useDictionary && min_score == out.score {
|
||||
SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
|
||||
}
|
||||
|
||||
h.buckets_[key+uint32((cur_ix>>3)%1)] = uint32(cur_ix)
|
||||
h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix)
|
||||
}
|
Loading…
Reference in New Issue