From ba67d8c1de05c935bd501fd9c07ecb4d4e568b5d Mon Sep 17 00:00:00 2001 From: Andy Balholm Date: Sat, 9 Mar 2019 13:54:31 -0800 Subject: [PATCH] Consolidate rolling hashes. --- h35.go | 2 +- h55.go | 2 +- h65.go | 2 +- hash.go | 8 +- hrolling.go => hash_rolling.go | 74 +++++++------- hrolling_fast.go | 171 --------------------------------- 6 files changed, 42 insertions(+), 217 deletions(-) rename hrolling.go => hash_rolling.go (60%) delete mode 100644 hrolling_fast.go diff --git a/h35.go b/h35.go index fb00ff7..f851006 100644 --- a/h35.go +++ b/h35.go @@ -65,7 +65,7 @@ func (h *H35) Prepare(one_shot bool, input_size uint, data []byte) { common_a.dict_num_matches = 0 h.ha.Initialize(h.params) - h.hb = new(HROLLING_FAST) + h.hb = &hashRolling{jump: 1} common_b = h.hb.Common() common_b.params = h.params.hasher common_b.is_prepared_ = false diff --git a/h55.go b/h55.go index 459eb37..3915bd3 100644 --- a/h55.go +++ b/h55.go @@ -63,7 +63,7 @@ func (h *H55) Prepare(one_shot bool, input_size uint, data []byte) { common_a.dict_num_matches = 0 h.ha.Initialize(h.params) - h.hb = new(HROLLING_FAST) + h.hb = &hashRolling{jump: 4} common_b = h.hb.Common() common_b.params = h.params.hasher common_b.is_prepared_ = false diff --git a/h65.go b/h65.go index 26c9270..e757817 100644 --- a/h65.go +++ b/h65.go @@ -63,7 +63,7 @@ func (h *H65) Prepare(one_shot bool, input_size uint, data []byte) { common_a.dict_num_matches = 0 h.ha.Initialize(h.params) - h.hb = new(HROLLING) + h.hb = &hashRolling{jump: 1} common_b = h.hb.Common() common_b.params = h.params.hasher common_b.is_prepared_ = false diff --git a/hash.go b/hash.go index 6bffcb5..efffd67 100644 --- a/hash.go +++ b/hash.go @@ -283,6 +283,10 @@ func newHasher(typ int) HasherHandle { return new(H5) case 6: return new(H6) + case 10: + return new(H10) + case 35: + return new(H35) case 40: return &hashForgetfulChain{ bucketBits: 15, @@ -311,14 +315,10 @@ func newHasher(typ int) HasherHandle { hashLen: 7, useDictionary: false, } - case 35: - return new(H35) case 55: return new(H55) case 65: return new(H65) - case 10: - return new(H10) } panic(fmt.Sprintf("unknown hasher type: %d", typ)) diff --git a/hrolling.go b/hash_rolling.go similarity index 60% rename from hrolling.go rename to hash_rolling.go index b5dc476..712a7e9 100644 --- a/hrolling.go +++ b/hash_rolling.go @@ -11,37 +11,40 @@ package brotli /* Rolling hash for long distance long string matches. Stores one position per bucket, bucket key is computed over a long region. */ -var kRollingHashMul32HROLLING uint32 = 69069 +var kRollingHashMul32hashRolling uint32 = 69069 -var kInvalidPosHROLLING uint32 = 0xffffffff +var kInvalidPosHashRolling uint32 = 0xffffffff /* This hasher uses a longer forward length, but returning a higher value here will hurt compression by the main hasher when combined with a composite hasher. The hasher tests for forward itself instead. */ -func (*HROLLING) HashTypeLength() uint { +func (*hashRolling) HashTypeLength() uint { return 4 } -func (*HROLLING) StoreLookahead() uint { +func (*hashRolling) StoreLookahead() uint { return 4 } /* Computes a code from a single byte. A lookup table of 256 values could be used, but simply adding 1 works about as good. */ -func HashByteHROLLING(byte byte) uint32 { - return uint32(byte) + 1 +func (*hashRolling) HashByte(b byte) uint32 { + return uint32(b) + 1 } -func HashRollingFunctionInitialHROLLING(state uint32, add byte, factor uint32) uint32 { - return uint32(factor*state + HashByteHROLLING(add)) +func (h *hashRolling) HashRollingFunctionInitial(state uint32, add byte, factor uint32) uint32 { + return uint32(factor*state + h.HashByte(add)) } -func HashRollingFunctionHROLLING(state uint32, add byte, rem byte, factor uint32, factor_remove uint32) uint32 { - return uint32(factor*state + HashByteHROLLING(add) - factor_remove*HashByteHROLLING(rem)) +func (h *hashRolling) HashRollingFunction(state uint32, add byte, rem byte, factor uint32, factor_remove uint32) uint32 { + return uint32(factor*state + h.HashByte(add) - factor_remove*h.HashByte(rem)) } -type HROLLING struct { +type hashRolling struct { HasherCommon + + jump int + state uint32 table []uint32 next_ix uint @@ -50,58 +53,51 @@ type HROLLING struct { factor_remove uint32 } -func SelfHROLLING(handle HasherHandle) *HROLLING { - return handle.(*HROLLING) -} - -func (h *HROLLING) Initialize(params *BrotliEncoderParams) { - var i uint +func (h *hashRolling) Initialize(params *BrotliEncoderParams) { h.state = 0 h.next_ix = 0 - h.factor = kRollingHashMul32HROLLING + h.factor = kRollingHashMul32hashRolling /* Compute the factor of the oldest byte to remove: factor**steps modulo 0xffffffff (the multiplications rely on 32-bit overflow) */ h.factor_remove = 1 - for i = 0; i < 32; i += 1 { + for i := 0; i < 32; i += h.jump { h.factor_remove *= h.factor } h.table = make([]uint32, 16777216) - for i = 0; i < 16777216; i++ { - h.table[i] = kInvalidPosHROLLING + for i := 0; i < 16777216; i++ { + h.table[i] = kInvalidPosHashRolling } } -func (h *HROLLING) Prepare(one_shot bool, input_size uint, data []byte) { - var i uint - +func (h *hashRolling) Prepare(one_shot bool, input_size uint, data []byte) { /* Too small size, cannot use this hasher. */ if input_size < 32 { return } h.state = 0 - for i = 0; i < 32; i += 1 { - h.state = HashRollingFunctionInitialHROLLING(h.state, data[i], h.factor) + for i := 0; i < 32; i += h.jump { + h.state = h.HashRollingFunctionInitial(h.state, data[i], h.factor) } } -func (*HROLLING) Store(data []byte, mask uint, ix uint) { +func (*hashRolling) Store(data []byte, mask uint, ix uint) { } -func (*HROLLING) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { +func (*hashRolling) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { } -func (h *HROLLING) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) { +func (h *hashRolling) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) { var position_masked uint /* In this case we must re-initialize the hasher from scratch from the current position. */ var available uint = num_bytes - if position&(1-1) != 0 { - var diff uint = 1 - (position & (1 - 1)) + if position&uint(h.jump-1) != 0 { + var diff uint = uint(h.jump) - (position & uint(h.jump-1)) if diff > available { available = 0 } else { @@ -121,14 +117,14 @@ func (h *HROLLING) StitchToPreviousBlock(num_bytes uint, position uint, ringbuff h.next_ix = position } -func (*HROLLING) PrepareDistanceCache(distance_cache []int) { +func (*hashRolling) PrepareDistanceCache(distance_cache []int) { } -func (h *HROLLING) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { +func (h *hashRolling) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { var cur_ix_masked uint = cur_ix & ring_buffer_mask var pos uint = h.next_ix - if cur_ix&(1-1) != 0 { + if cur_ix&uint(h.jump-1) != 0 { return } @@ -137,18 +133,18 @@ func (h *HROLLING) FindLongestMatch(dictionary *BrotliEncoderDictionary, data [] return } - for pos = h.next_ix; pos <= cur_ix; pos += 1 { + for pos = h.next_ix; pos <= cur_ix; pos += uint(h.jump) { var code uint32 = h.state & ((16777216 * 64) - 1) var rem byte = data[pos&ring_buffer_mask] var add byte = data[(pos+32)&ring_buffer_mask] - var found_ix uint = uint(kInvalidPosHROLLING) + var found_ix uint = uint(kInvalidPosHashRolling) - h.state = HashRollingFunctionHROLLING(h.state, add, rem, h.factor, h.factor_remove) + h.state = h.HashRollingFunction(h.state, add, rem, h.factor, h.factor_remove) if code < 16777216 { found_ix = uint(h.table[code]) h.table[code] = uint32(pos) - if pos == cur_ix && uint32(found_ix) != kInvalidPosHROLLING { + if pos == cur_ix && uint32(found_ix) != kInvalidPosHashRolling { /* The cast to 32-bit makes backward distances up to 4GB work even if cur_ix is above 4GB, despite using 32-bit values in the table. */ var backward uint = uint(uint32(cur_ix - found_ix)) @@ -169,5 +165,5 @@ func (h *HROLLING) FindLongestMatch(dictionary *BrotliEncoderDictionary, data [] } } - h.next_ix = cur_ix + 1 + h.next_ix = cur_ix + uint(h.jump) } diff --git a/hrolling_fast.go b/hrolling_fast.go deleted file mode 100644 index 9c4d46c..0000000 --- a/hrolling_fast.go +++ /dev/null @@ -1,171 +0,0 @@ -package brotli - -/* NOLINT(build/header_guard) */ -/* Copyright 2018 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Rolling hash for long distance long string matches. Stores one position - per bucket, bucket key is computed over a long region. */ -var kRollingHashMul32HROLLING_FAST uint32 = 69069 - -var kInvalidPosHROLLING_FAST uint32 = 0xffffffff - -/* This hasher uses a longer forward length, but returning a higher value here - will hurt compression by the main hasher when combined with a composite - hasher. The hasher tests for forward itself instead. */ -func (*HROLLING_FAST) HashTypeLength() uint { - return 4 -} - -func (*HROLLING_FAST) StoreLookahead() uint { - return 4 -} - -/* Computes a code from a single byte. A lookup table of 256 values could be - used, but simply adding 1 works about as good. */ -func HashByteHROLLING_FAST(byte byte) uint32 { - return uint32(byte) + 1 -} - -func HashRollingFunctionInitialHROLLING_FAST(state uint32, add byte, factor uint32) uint32 { - return uint32(factor*state + HashByteHROLLING_FAST(add)) -} - -func HashRollingFunctionHROLLING_FAST(state uint32, add byte, rem byte, factor uint32, factor_remove uint32) uint32 { - return uint32(factor*state + HashByteHROLLING_FAST(add) - factor_remove*HashByteHROLLING_FAST(rem)) -} - -type HROLLING_FAST struct { - HasherCommon - state uint32 - table []uint32 - next_ix uint - chunk_len uint32 - factor uint32 - factor_remove uint32 -} - -func SelfHROLLING_FAST(handle HasherHandle) *HROLLING_FAST { - return handle.(*HROLLING_FAST) -} - -func (h *HROLLING_FAST) Initialize(params *BrotliEncoderParams) { - var i uint - h.state = 0 - h.next_ix = 0 - - h.factor = kRollingHashMul32HROLLING_FAST - - /* Compute the factor of the oldest byte to remove: factor**steps modulo - 0xffffffff (the multiplications rely on 32-bit overflow) */ - h.factor_remove = 1 - - for i = 0; i < 32; i += 4 { - h.factor_remove *= h.factor - } - - h.table = make([]uint32, 16777216) - for i = 0; i < 16777216; i++ { - h.table[i] = kInvalidPosHROLLING_FAST - } -} - -func (h *HROLLING_FAST) Prepare(one_shot bool, input_size uint, data []byte) { - var i uint - - /* Too small size, cannot use this hasher. */ - if input_size < 32 { - return - } - h.state = 0 - for i = 0; i < 32; i += 4 { - h.state = HashRollingFunctionInitialHROLLING_FAST(h.state, data[i], h.factor) - } -} - -func (*HROLLING_FAST) Store(data []byte, mask uint, ix uint) { -} - -func (*HROLLING_FAST) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) { -} - -func (h *HROLLING_FAST) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ring_buffer_mask uint) { - var position_masked uint - /* In this case we must re-initialize the hasher from scratch from the - current position. */ - - var available uint = num_bytes - if position&(4-1) != 0 { - var diff uint = 4 - (position & (4 - 1)) - if diff > available { - available = 0 - } else { - available = available - diff - } - position += diff - } - - position_masked = position & ring_buffer_mask - - /* wrapping around ringbuffer not handled. */ - if available > ring_buffer_mask-position_masked { - available = ring_buffer_mask - position_masked - } - - h.Prepare(false, available, ringbuffer[position&ring_buffer_mask:]) - h.next_ix = position -} - -func (*HROLLING_FAST) PrepareDistanceCache(distance_cache []int) { -} - -func (h *HROLLING_FAST) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) { - var cur_ix_masked uint = cur_ix & ring_buffer_mask - var pos uint = h.next_ix - - if cur_ix&(4-1) != 0 { - return - } - - /* Not enough lookahead */ - if max_length < 32 { - return - } - - for pos = h.next_ix; pos <= cur_ix; pos += 4 { - var code uint32 = h.state & ((16777216 * 64) - 1) - var rem byte = data[pos&ring_buffer_mask] - var add byte = data[(pos+32)&ring_buffer_mask] - var found_ix uint = uint(kInvalidPosHROLLING_FAST) - - h.state = HashRollingFunctionHROLLING_FAST(h.state, add, rem, h.factor, h.factor_remove) - - if code < 16777216 { - found_ix = uint(h.table[code]) - h.table[code] = uint32(pos) - if pos == cur_ix && uint32(found_ix) != kInvalidPosHROLLING_FAST { - /* The cast to 32-bit makes backward distances up to 4GB work even - if cur_ix is above 4GB, despite using 32-bit values in the table. */ - var backward uint = uint(uint32(cur_ix - found_ix)) - if backward <= max_backward { - var found_ix_masked uint = found_ix & ring_buffer_mask - var len uint = FindMatchLengthWithLimit(data[found_ix_masked:], data[cur_ix_masked:], max_length) - if len >= 4 && len > out.len { - var score uint = BackwardReferenceScore(uint(len), backward) - if score > out.score { - out.len = uint(len) - out.distance = backward - out.score = score - out.len_code_delta = 0 - } - } - } - } - } - } - - h.next_ix = cur_ix + 4 -}