From c4f1bfa34ff2a928395c0a11fffa69f88b961812 Mon Sep 17 00:00:00 2001 From: Andy Balholm Date: Sat, 9 Mar 2019 13:01:56 -0800 Subject: [PATCH] Start reducing duplication in hasher code. The C version defined several related hasher types with preprocessor tricks, but I split them up for the translation to Go. Now I'm recombining them. --- h3.go | 203 ------------------------ h35.go | 2 +- h4.go | 208 ------------------------- h54.go | 200 ------------------------ h55.go | 2 +- hash.go | 87 +++++++---- h2.go => hash_longest_match_quickly.go | 80 +++++----- 7 files changed, 102 insertions(+), 680 deletions(-) delete mode 100644 h3.go delete mode 100644 h4.go delete mode 100644 h54.go rename h2.go => hash_longest_match_quickly.go (68%) diff --git a/h3.go b/h3.go deleted file mode 100644 index 5bb0dfa..0000000 --- a/h3.go +++ /dev/null @@ -1,203 +0,0 @@ -package brotli - -import "encoding/binary" - -/* NOLINT(build/header_guard) */ -/* Copyright 2010 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ -func (*H3) HashTypeLength() uint { - return 8 -} - -func (*H3) StoreLookahead() uint { - return 8 -} - -/* HashBytes is the function that chooses the bucket to place - the address in. The HashLongestMatch and H3 - classes have separate, different implementations of hashing. */ -func HashBytesH3(data []byte) uint32 { - var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64) - - /* The higher bits contain more mixture from the multiplication, - so we take our results from there. */ - return uint32(h >> (64 - 16)) -} - -/* A (forgetful) hash table to the data seen by the compressor, to - help create backward references to previous data. - - This is a hash map of fixed size (BUCKET_SIZE). Starting from the - given index, 2 buckets are used to store values of a key. */ -type H3 struct { - HasherCommon - buckets_ [(1 << 16) + 2]uint32 -} - -func SelfH3(handle HasherHandle) *H3 { - return handle.(*H3) -} - -func (*H3) Initialize(params *BrotliEncoderParams) { -} - -func (h *H3) Prepare(one_shot bool, input_size uint, data []byte) { - var partial_prepare_threshold uint = (4 << 16) >> 7 - /* Partial preparation is 100 times slower (per socket). */ - if one_shot && input_size <= partial_prepare_threshold { - var i uint - for i = 0; i < input_size; i++ { - var key uint32 = HashBytesH3(data[i:]) - for i := 0; i < int(2); i++ { - h.buckets_[key:][i] = 0 - } - } - } else { - /* It is not strictly necessary to fill this buffer here, but - not filling will make the results of the compression stochastic - (but correct). This is because random data would cause the - system to find accidentally good backward references here and there. */ - var i int - for i = 0; i < len(h.buckets_); i++ { - h.buckets_[i] = 0 - } - } -} - -/* Look at 5 bytes at &data[ix & mask]. - Compute a hash from these, and store the value somewhere within - [ix .. ix+3]. */ -func (h *H3) Store(data []byte, mask uint, ix uint) { - var key uint32 = HashBytesH3(data[ix&mask:]) - var off uint32 = uint32(ix>>3) % 2 - /* Wiggle the value with the bucket sweep range. */ - h.buckets_[key+off] = uint32(ix) -} - -func (h *H3) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { - var i uint - for i = ix_start; i < ix_end; i++ { - h.Store(data, mask, i) - } -} - -func (h *H3) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) { - if num_bytes >= h.HashTypeLength()-1 && position >= 3 { - /* Prepare the hashes for three last bytes of the last write. - These could not be calculated before, since they require knowledge - of both the previous and the current block. */ - h.Store(ringbuffer, ringbuffer_mask, position-3) - h.Store(ringbuffer, ringbuffer_mask, position-2) - h.Store(ringbuffer, ringbuffer_mask, position-1) - } -} - -func (*H3) PrepareDistanceCache(distance_cache []int) { -} - -/* Find a longest backward match of &data[cur_ix & ring_buffer_mask] - up to the length of max_length and stores the position cur_ix in the - hash table. - - Does not look for matches longer than max_length. - Does not look for matches further away than max_backward. - Writes the best match into |out|. - |out|->score is updated only if a better match is found. */ -func (h *H3) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { - var best_len_in uint = out.len - var cur_ix_masked uint = cur_ix & ring_buffer_mask - var key uint32 = HashBytesH3(data[cur_ix_masked:]) - var compare_char int = int(data[cur_ix_masked+best_len_in]) - var best_score uint = out.score - var best_len uint = best_len_in - var cached_backward uint = uint(distance_cache[0]) - var prev_ix uint = cur_ix - cached_backward - var bucket []uint32 - out.len_code_delta = 0 - if prev_ix < cur_ix { - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char == int(data[prev_ix+best_len]) { - var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScoreUsingLastDistance(uint(len)) - if best_score < score { - best_score = score - best_len = uint(len) - out.len = uint(len) - out.distance = cached_backward - out.score = best_score - compare_char = int(data[cur_ix_masked+best_len]) - if 2 == 1 { - h.buckets_[key] = uint32(cur_ix) - return - } - } - } - } - } - - if 2 == 1 { - var backward uint - var len uint - - /* Only one to look for, don't bother to prepare for a loop. */ - prev_ix = uint(h.buckets_[key]) - - h.buckets_[key] = uint32(cur_ix) - backward = cur_ix - prev_ix - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char != int(data[prev_ix+best_len_in]) { - return - } - - if backward == 0 || backward > max_backward { - return - } - - len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScore(uint(len), backward) - if best_score < score { - out.len = uint(len) - out.distance = backward - out.score = score - return - } - } - } else { - bucket = h.buckets_[key:] - var i int - prev_ix = uint(bucket[0]) - bucket = bucket[1:] - for i = 0; i < 2; (func() { i++; tmp4 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp4[0]) })() { - var backward uint = cur_ix - prev_ix - var len uint - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char != int(data[prev_ix+best_len]) { - continue - } - - if backward == 0 || backward > max_backward { - continue - } - - len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScore(uint(len), backward) - if best_score < score { - best_score = score - best_len = uint(len) - out.len = best_len - out.distance = backward - out.score = score - compare_char = int(data[cur_ix_masked+best_len]) - } - } - } - } - - h.buckets_[key+uint32((cur_ix>>3)%2)] = uint32(cur_ix) -} diff --git a/h35.go b/h35.go index 3d8cbd2..fb00ff7 100644 --- a/h35.go +++ b/h35.go @@ -57,7 +57,7 @@ func (h *H35) Prepare(one_shot bool, input_size uint, data []byte) { var common_a *HasherCommon var common_b *HasherCommon - h.ha = new(H3) + h.ha = newHasher(3) common_a = h.ha.Common() common_a.params = h.params.hasher common_a.is_prepared_ = false diff --git a/h4.go b/h4.go deleted file mode 100644 index 5169a42..0000000 --- a/h4.go +++ /dev/null @@ -1,208 +0,0 @@ -package brotli - -import "encoding/binary" - -/* NOLINT(build/header_guard) */ -/* Copyright 2010 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ -func (*H4) HashTypeLength() uint { - return 8 -} - -func (*H4) StoreLookahead() uint { - return 8 -} - -/* HashBytes is the function that chooses the bucket to place - the address in. The HashLongestMatch and H4 - classes have separate, different implementations of hashing. */ -func HashBytesH4(data []byte) uint32 { - var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64) - - /* The higher bits contain more mixture from the multiplication, - so we take our results from there. */ - return uint32(h >> (64 - 17)) -} - -/* A (forgetful) hash table to the data seen by the compressor, to - help create backward references to previous data. - - This is a hash map of fixed size (BUCKET_SIZE). Starting from the - given index, 4 buckets are used to store values of a key. */ -type H4 struct { - HasherCommon - buckets_ [(1 << 17) + 4]uint32 -} - -func SelfH4(handle HasherHandle) *H4 { - return handle.(*H4) -} - -func (*H4) Initialize(params *BrotliEncoderParams) { -} - -func (h *H4) Prepare(one_shot bool, input_size uint, data []byte) { - var partial_prepare_threshold uint = (4 << 17) >> 7 - /* Partial preparation is 100 times slower (per socket). */ - if one_shot && input_size <= partial_prepare_threshold { - var i uint - for i = 0; i < input_size; i++ { - var key uint32 = HashBytesH4(data[i:]) - for i := 0; i < int(4); i++ { - h.buckets_[key:][i] = 0 - } - } - } else { - /* It is not strictly necessary to fill this buffer here, but - not filling will make the results of the compression stochastic - (but correct). This is because random data would cause the - system to find accidentally good backward references here and there. */ - var i int - for i = 0; i < len(h.buckets_); i++ { - h.buckets_[i] = 0 - } - } -} - -/* Look at 5 bytes at &data[ix & mask]. - Compute a hash from these, and store the value somewhere within - [ix .. ix+3]. */ -func (h *H4) Store(data []byte, mask uint, ix uint) { - var key uint32 = HashBytesH4(data[ix&mask:]) - var off uint32 = uint32(ix>>3) % 4 - /* Wiggle the value with the bucket sweep range. */ - h.buckets_[key+off] = uint32(ix) -} - -func (h *H4) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { - var i uint - for i = ix_start; i < ix_end; i++ { - h.Store(data, mask, i) - } -} - -func (h *H4) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) { - if num_bytes >= h.HashTypeLength()-1 && position >= 3 { - /* Prepare the hashes for three last bytes of the last write. - These could not be calculated before, since they require knowledge - of both the previous and the current block. */ - h.Store(ringbuffer, ringbuffer_mask, position-3) - h.Store(ringbuffer, ringbuffer_mask, position-2) - h.Store(ringbuffer, ringbuffer_mask, position-1) - } -} - -func (*H4) PrepareDistanceCache(distance_cache []int) { -} - -/* Find a longest backward match of &data[cur_ix & ring_buffer_mask] - up to the length of max_length and stores the position cur_ix in the - hash table. - - Does not look for matches longer than max_length. - Does not look for matches further away than max_backward. - Writes the best match into |out|. - |out|->score is updated only if a better match is found. */ -func (h *H4) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { - var best_len_in uint = out.len - var cur_ix_masked uint = cur_ix & ring_buffer_mask - var key uint32 = HashBytesH4(data[cur_ix_masked:]) - var compare_char int = int(data[cur_ix_masked+best_len_in]) - var min_score uint = out.score - var best_score uint = out.score - var best_len uint = best_len_in - var cached_backward uint = uint(distance_cache[0]) - var prev_ix uint = cur_ix - cached_backward - var bucket []uint32 - out.len_code_delta = 0 - if prev_ix < cur_ix { - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char == int(data[prev_ix+best_len]) { - var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScoreUsingLastDistance(uint(len)) - if best_score < score { - best_score = score - best_len = uint(len) - out.len = uint(len) - out.distance = cached_backward - out.score = best_score - compare_char = int(data[cur_ix_masked+best_len]) - if 4 == 1 { - h.buckets_[key] = uint32(cur_ix) - return - } - } - } - } - } - - if 4 == 1 { - var backward uint - var len uint - - /* Only one to look for, don't bother to prepare for a loop. */ - prev_ix = uint(h.buckets_[key]) - - h.buckets_[key] = uint32(cur_ix) - backward = cur_ix - prev_ix - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char != int(data[prev_ix+best_len_in]) { - return - } - - if backward == 0 || backward > max_backward { - return - } - - len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScore(uint(len), backward) - if best_score < score { - out.len = uint(len) - out.distance = backward - out.score = score - return - } - } - } else { - bucket = h.buckets_[key:] - var i int - prev_ix = uint(bucket[0]) - bucket = bucket[1:] - for i = 0; i < 4; (func() { i++; tmp5 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp5[0]) })() { - var backward uint = cur_ix - prev_ix - var len uint - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char != int(data[prev_ix+best_len]) { - continue - } - - if backward == 0 || backward > max_backward { - continue - } - - len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScore(uint(len), backward) - if best_score < score { - best_score = score - best_len = uint(len) - out.len = best_len - out.distance = backward - out.score = score - compare_char = int(data[cur_ix_masked+best_len]) - } - } - } - } - - if min_score == out.score { - SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true) - } - - h.buckets_[key+uint32((cur_ix>>3)%4)] = uint32(cur_ix) -} diff --git a/h54.go b/h54.go deleted file mode 100644 index 54154e4..0000000 --- a/h54.go +++ /dev/null @@ -1,200 +0,0 @@ -package brotli - -import "encoding/binary" - -/* NOLINT(build/header_guard) */ -/* Copyright 2010 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ -func (*H54) HashTypeLength() uint { - return 8 -} - -func (*H54) StoreLookahead() uint { - return 8 -} - -/* HashBytes is the function that chooses the bucket to place - the address in. The HashLongestMatch and H54 - classes have separate, different implementations of hashing. */ -func HashBytesH54(data []byte) uint32 { - var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*7)) * kHashMul64) - - /* The higher bits contain more mixture from the multiplication, - so we take our results from there. */ - return uint32(h >> (64 - 20)) -} - -/* A (forgetful) hash table to the data seen by the compressor, to - help create backward references to previous data. - - This is a hash map of fixed size ((1 << 20)). Starting from the - given index, 4 buckets are used to store values of a key. */ -type H54 struct { - HasherCommon - buckets_ [(1 << 20) + 4]uint32 -} - -func SelfH54(handle HasherHandle) *H54 { - return handle.(*H54) -} - -func (*H54) Initialize(params *BrotliEncoderParams) { -} - -func (h *H54) Prepare(one_shot bool, input_size uint, data []byte) { - var partial_prepare_threshold uint = (4 << 20) >> 7 - /* Partial preparation is 100 times slower (per socket). */ - if one_shot && input_size <= partial_prepare_threshold { - var i uint - for i = 0; i < input_size; i++ { - var key uint32 = HashBytesH54(data[i:]) - for i := 0; i < int(4); i++ { - h.buckets_[key:][i] = 0 - } - } - } else { - /* It is not strictly necessary to fill this buffer here, but - not filling will make the results of the compression stochastic - (but correct). This is because random data would cause the - system to find accidentally good backward references here and there. */ - h.buckets_ = [(1 << 20) + 4]uint32{} - } -} - -/* Look at 5 bytes at &data[ix & mask]. - Compute a hash from these, and store the value somewhere within - [ix .. ix+3]. */ -func (h *H54) Store(data []byte, mask uint, ix uint) { - var key uint32 = HashBytesH54(data[ix&mask:]) - var off uint32 = uint32(ix>>3) % 4 - /* Wiggle the value with the bucket sweep range. */ - h.buckets_[key+off] = uint32(ix) -} - -func (h *H54) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { - var i uint - for i = ix_start; i < ix_end; i++ { - h.Store(data, mask, i) - } -} - -func (h *H54) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) { - if num_bytes >= h.HashTypeLength()-1 && position >= 3 { - /* Prepare the hashes for three last bytes of the last write. - These could not be calculated before, since they require knowledge - of both the previous and the current block. */ - h.Store(ringbuffer, ringbuffer_mask, position-3) - h.Store(ringbuffer, ringbuffer_mask, position-2) - h.Store(ringbuffer, ringbuffer_mask, position-1) - } -} - -func (*H54) PrepareDistanceCache(distance_cache []int) { -} - -/* Find a longest backward match of &data[cur_ix & ring_buffer_mask] - up to the length of max_length and stores the position cur_ix in the - hash table. - - Does not look for matches longer than max_length. - Does not look for matches further away than max_backward. - Writes the best match into |out|. - |out|->score is updated only if a better match is found. */ -func (h *H54) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { - var best_len_in uint = out.len - var cur_ix_masked uint = cur_ix & ring_buffer_mask - var key uint32 = HashBytesH54(data[cur_ix_masked:]) - var compare_char int = int(data[cur_ix_masked+best_len_in]) - var best_score uint = out.score - var best_len uint = best_len_in - var cached_backward uint = uint(distance_cache[0]) - var prev_ix uint = cur_ix - cached_backward - var bucket []uint32 - out.len_code_delta = 0 - if prev_ix < cur_ix { - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char == int(data[prev_ix+best_len]) { - var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScoreUsingLastDistance(uint(len)) - if best_score < score { - best_score = score - best_len = uint(len) - out.len = uint(len) - out.distance = cached_backward - out.score = best_score - compare_char = int(data[cur_ix_masked+best_len]) - if 4 == 1 { - h.buckets_[key] = uint32(cur_ix) - return - } - } - } - } - } - - if 4 == 1 { - var backward uint - var len uint - - /* Only one to look for, don't bother to prepare for a loop. */ - prev_ix = uint(h.buckets_[key]) - - h.buckets_[key] = uint32(cur_ix) - backward = cur_ix - prev_ix - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char != int(data[prev_ix+best_len_in]) { - return - } - - if backward == 0 || backward > max_backward { - return - } - - len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScore(uint(len), backward) - if best_score < score { - out.len = uint(len) - out.distance = backward - out.score = score - return - } - } - } else { - bucket = h.buckets_[key:] - var i int - prev_ix = uint(bucket[0]) - bucket = bucket[1:] - for i = 0; i < 4; (func() { i++; tmp9 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp9[0]) })() { - var backward uint = cur_ix - prev_ix - var len uint - prev_ix &= uint(uint32(ring_buffer_mask)) - if compare_char != int(data[prev_ix+best_len]) { - continue - } - - if backward == 0 || backward > max_backward { - continue - } - - len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length) - if len >= 4 { - var score uint = BackwardReferenceScore(uint(len), backward) - if best_score < score { - best_score = score - best_len = uint(len) - out.len = best_len - out.distance = backward - out.score = score - compare_char = int(data[cur_ix_masked+best_len]) - } - } - } - } - - h.buckets_[key+uint32((cur_ix>>3)%4)] = uint32(cur_ix) -} diff --git a/h55.go b/h55.go index babd933..459eb37 100644 --- a/h55.go +++ b/h55.go @@ -55,7 +55,7 @@ func (h *H55) Prepare(one_shot bool, input_size uint, data []byte) { var common_a *HasherCommon var common_b *HasherCommon - h.ha = new(H54) + h.ha = newHasher(54) common_a = h.ha.Common() common_a.params = h.params.hasher common_a.is_prepared_ = false diff --git a/hash.go b/hash.go index f81d98c..1615761 100644 --- a/hash.go +++ b/hash.go @@ -1,6 +1,9 @@ package brotli -import "encoding/binary" +import ( + "encoding/binary" + "fmt" +) /* Matches data against static dictionary words, and for each length l, for which a match is found, updates matches[l] to be the minimum possible @@ -253,40 +256,66 @@ func HasherReset(handle HasherHandle) { handle.Common().is_prepared_ = false } +func newHasher(typ int) HasherHandle { + switch typ { + case 2: + return &hashLongestMatchQuickly{ + bucketBits: 16, + bucketSweep: 1, + hashLen: 5, + useDictionary: true, + } + case 3: + return &hashLongestMatchQuickly{ + bucketBits: 16, + bucketSweep: 2, + hashLen: 5, + useDictionary: false, + } + case 4: + return &hashLongestMatchQuickly{ + bucketBits: 17, + bucketSweep: 4, + hashLen: 5, + useDictionary: true, + } + case 5: + return new(H5) + case 6: + return new(H6) + case 40: + return new(H40) + case 41: + return new(H41) + case 42: + return new(H42) + case 54: + return &hashLongestMatchQuickly{ + bucketBits: 20, + bucketSweep: 4, + hashLen: 7, + useDictionary: false, + } + case 35: + return new(H35) + case 55: + return new(H55) + case 65: + return new(H65) + case 10: + return new(H10) + } + + panic(fmt.Sprintf("unknown hasher type: %d", typ)) +} + func HasherSetup(handle *HasherHandle, params *BrotliEncoderParams, data []byte, position uint, input_size uint, is_last bool) { var self HasherHandle = nil var common *HasherCommon = nil var one_shot bool = (position == 0 && is_last) if *handle == nil { ChooseHasher(params, ¶ms.hasher) - switch params.hasher.type_ { - case 2: - self = new(H2) - case 3: - self = new(H3) - case 4: - self = new(H4) - case 5: - self = new(H5) - case 6: - self = new(H6) - case 40: - self = new(H40) - case 41: - self = new(H41) - case 42: - self = new(H42) - case 54: - self = new(H54) - case 35: - self = new(H35) - case 55: - self = new(H55) - case 65: - self = new(H65) - case 10: - self = new(H10) - } + self = newHasher(params.hasher.type_) *handle = self common = self.Common() diff --git a/h2.go b/hash_longest_match_quickly.go similarity index 68% rename from h2.go rename to hash_longest_match_quickly.go index b0f529b..d66c61e 100644 --- a/h2.go +++ b/hash_longest_match_quickly.go @@ -12,23 +12,23 @@ import "encoding/binary" /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression a little faster (0.5% - 1%) and it compresses 0.15% better on small text and HTML inputs. */ -func (*H2) HashTypeLength() uint { +func (*hashLongestMatchQuickly) HashTypeLength() uint { return 8 } -func (*H2) StoreLookahead() uint { +func (*hashLongestMatchQuickly) StoreLookahead() uint { return 8 } /* HashBytes is the function that chooses the bucket to place - the address in. The HashLongestMatch and H2 + the address in. The HashLongestMatch and hashLongestMatchQuickly classes have separate, different implementations of hashing. */ -func HashBytesH2(data []byte) uint32 { - var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64) +func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 { + var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64) /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ - return uint32(h >> (64 - 16)) + return uint32(hash >> (64 - h.bucketBits)) } /* A (forgetful) hash table to the data seen by the compressor, to @@ -36,35 +36,39 @@ func HashBytesH2(data []byte) uint32 { This is a hash map of fixed size (1 << 16). Starting from the given index, 1 buckets are used to store values of a key. */ -type H2 struct { +type hashLongestMatchQuickly struct { HasherCommon - buckets_ [(1 << 16) + 1]uint32 + + bucketBits uint + bucketSweep int + hashLen uint + useDictionary bool + + buckets []uint32 } -func SelfH2(handle HasherHandle) *H2 { - return handle.(*H2) +func (h *hashLongestMatchQuickly) Initialize(params *BrotliEncoderParams) { + h.buckets = make([]uint32, 1<> 7 +func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) { + var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7 /* Partial preparation is 100 times slower (per socket). */ if one_shot && input_size <= partial_prepare_threshold { var i uint for i = 0; i < input_size; i++ { - var key uint32 = HashBytesH2(data[i:]) - h.buckets_[key] = 0 + var key uint32 = h.HashBytes(data[i:]) + for j := 0; j < h.bucketSweep; j++ { + h.buckets[key+uint32(j)] = 0 + } } } else { /* It is not strictly necessary to fill this buffer here, but not filling will make the results of the compression stochastic (but correct). This is because random data would cause the system to find accidentally good backward references here and there. */ - var i int - for i = 0; i < len(h.buckets_); i++ { - h.buckets_[i] = 0 + for i := range h.buckets { + h.buckets[i] = 0 } } } @@ -72,21 +76,21 @@ func (h *H2) Prepare(one_shot bool, input_size uint, data []byte) { /* Look at 5 bytes at &data[ix & mask]. Compute a hash from these, and store the value somewhere within [ix .. ix+3]. */ -func (h *H2) Store(data []byte, mask uint, ix uint) { - var key uint32 = HashBytesH2(data[ix&mask:]) - var off uint32 = uint32(ix>>3) % 1 +func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) { + var key uint32 = h.HashBytes(data[ix&mask:]) + var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep) /* Wiggle the value with the bucket sweep range. */ - h.buckets_[key+off] = uint32(ix) + h.buckets[key+off] = uint32(ix) } -func (h *H2) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { +func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { var i uint for i = ix_start; i < ix_end; i++ { h.Store(data, mask, i) } } -func (h *H2) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) { +func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) { if num_bytes >= h.HashTypeLength()-1 && position >= 3 { /* Prepare the hashes for three last bytes of the last write. These could not be calculated before, since they require knowledge @@ -97,7 +101,7 @@ func (h *H2) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []b } } -func (*H2) PrepareDistanceCache(distance_cache []int) { +func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) { } /* Find a longest backward match of &data[cur_ix & ring_buffer_mask] @@ -108,10 +112,10 @@ func (*H2) PrepareDistanceCache(distance_cache []int) { Does not look for matches further away than max_backward. Writes the best match into |out|. |out|->score is updated only if a better match is found. */ -func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { +func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { var best_len_in uint = out.len var cur_ix_masked uint = cur_ix & ring_buffer_mask - var key uint32 = HashBytesH2(data[cur_ix_masked:]) + var key uint32 = h.HashBytes(data[cur_ix_masked:]) var compare_char int = int(data[cur_ix_masked+best_len_in]) var min_score uint = out.score var best_score uint = out.score @@ -133,8 +137,8 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, out.distance = cached_backward out.score = best_score compare_char = int(data[cur_ix_masked+best_len]) - if 1 == 1 { - h.buckets_[key] = uint32(cur_ix) + if h.bucketSweep == 1 { + h.buckets[key] = uint32(cur_ix) return } } @@ -142,14 +146,14 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, } } - if 1 == 1 { + if h.bucketSweep == 1 { var backward uint var len uint /* Only one to look for, don't bother to prepare for a loop. */ - prev_ix = uint(h.buckets_[key]) + prev_ix = uint(h.buckets[key]) - h.buckets_[key] = uint32(cur_ix) + h.buckets[key] = uint32(cur_ix) backward = cur_ix - prev_ix prev_ix &= uint(uint32(ring_buffer_mask)) if compare_char != int(data[prev_ix+best_len_in]) { @@ -171,11 +175,11 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, } } } else { - bucket = h.buckets_[key:] + bucket = h.buckets[key:] var i int prev_ix = uint(bucket[0]) bucket = bucket[1:] - for i = 0; i < 1; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() { + for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() { var backward uint = cur_ix - prev_ix var len uint prev_ix &= uint(uint32(ring_buffer_mask)) @@ -202,9 +206,9 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, } } - if min_score == out.score { + if h.useDictionary && min_score == out.score { SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true) } - h.buckets_[key+uint32((cur_ix>>3)%1)] = uint32(cur_ix) + h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix) }