Start reducing duplication in hasher code.

The C version defined several related hasher types with preprocessor tricks, but I split them up for the translation to Go. Now I'm recombining them.
2019-03-09 13:01:56 -08:00 · 2019-03-09 13:01:56 -08:00 · c4f1bfa34f
parent 74ae18c776
commit c4f1bfa34f
7 changed files with 102 additions and 680 deletions
--- a/h3.go
+++ b/h3.go
@ -1,203 +0,0 @@
-package brotli
-
-import "encoding/binary"
-
-/* NOLINT(build/header_guard) */
-/* Copyright 2010 Google Inc. All Rights Reserved.
-
-   Distributed under MIT license.
-   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-func (*H3) HashTypeLength() uint {
-	return 8
-}
-
-func (*H3) StoreLookahead() uint {
-	return 8
-}
-
-/* HashBytes is the function that chooses the bucket to place
-   the address in. The HashLongestMatch and H3
-   classes have separate, different implementations of hashing. */
-func HashBytesH3(data []byte) uint32 {
-	var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64)
-
-	/* The higher bits contain more mixture from the multiplication,
-	   so we take our results from there. */
-	return uint32(h >> (64 - 16))
-}
-
-/* A (forgetful) hash table to the data seen by the compressor, to
-   help create backward references to previous data.
-
-   This is a hash map of fixed size (BUCKET_SIZE). Starting from the
-   given index, 2 buckets are used to store values of a key. */
-type H3 struct {
-	HasherCommon
-	buckets_ [(1 << 16) + 2]uint32
-}
-
-func SelfH3(handle HasherHandle) *H3 {
-	return handle.(*H3)
-}
-
-func (*H3) Initialize(params *BrotliEncoderParams) {
-}
-
-func (h *H3) Prepare(one_shot bool, input_size uint, data []byte) {
-	var partial_prepare_threshold uint = (4 << 16) >> 7
-	/* Partial preparation is 100 times slower (per socket). */
-	if one_shot && input_size <= partial_prepare_threshold {
-		var i uint
-		for i = 0; i < input_size; i++ {
-			var key uint32 = HashBytesH3(data[i:])
-			for i := 0; i < int(2); i++ {
-				h.buckets_[key:][i] = 0
-			}
-		}
-	} else {
-		/* It is not strictly necessary to fill this buffer here, but
-		   not filling will make the results of the compression stochastic
-		   (but correct). This is because random data would cause the
-		   system to find accidentally good backward references here and there. */
-		var i int
-		for i = 0; i < len(h.buckets_); i++ {
-			h.buckets_[i] = 0
-		}
-	}
-}
-
-/* Look at 5 bytes at &data[ix & mask].
-   Compute a hash from these, and store the value somewhere within
-   [ix .. ix+3]. */
-func (h *H3) Store(data []byte, mask uint, ix uint) {
-	var key uint32 = HashBytesH3(data[ix&mask:])
-	var off uint32 = uint32(ix>>3) % 2
-	/* Wiggle the value with the bucket sweep range. */
-	h.buckets_[key+off] = uint32(ix)
-}
-
-func (h *H3) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
-	var i uint
-	for i = ix_start; i < ix_end; i++ {
-		h.Store(data, mask, i)
-	}
-}
-
-func (h *H3) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
-	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
-		/* Prepare the hashes for three last bytes of the last write.
-		   These could not be calculated before, since they require knowledge
-		   of both the previous and the current block. */
-		h.Store(ringbuffer, ringbuffer_mask, position-3)
-		h.Store(ringbuffer, ringbuffer_mask, position-2)
-		h.Store(ringbuffer, ringbuffer_mask, position-1)
-	}
-}
-
-func (*H3) PrepareDistanceCache(distance_cache []int) {
-}
-
-/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
-   up to the length of max_length and stores the position cur_ix in the
-   hash table.
-
-   Does not look for matches longer than max_length.
-   Does not look for matches further away than max_backward.
-   Writes the best match into |out|.
-   |out|->score is updated only if a better match is found. */
-func (h *H3) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
-	var best_len_in uint = out.len
-	var cur_ix_masked uint = cur_ix & ring_buffer_mask
-	var key uint32 = HashBytesH3(data[cur_ix_masked:])
-	var compare_char int = int(data[cur_ix_masked+best_len_in])
-	var best_score uint = out.score
-	var best_len uint = best_len_in
-	var cached_backward uint = uint(distance_cache[0])
-	var prev_ix uint = cur_ix - cached_backward
-	var bucket []uint32
-	out.len_code_delta = 0
-	if prev_ix < cur_ix {
-		prev_ix &= uint(uint32(ring_buffer_mask))
-		if compare_char == int(data[prev_ix+best_len]) {
-			var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-			if len >= 4 {
-				var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
-				if best_score < score {
-					best_score = score
-					best_len = uint(len)
-					out.len = uint(len)
-					out.distance = cached_backward
-					out.score = best_score
-					compare_char = int(data[cur_ix_masked+best_len])
-					if 2 == 1 {
-						h.buckets_[key] = uint32(cur_ix)
-						return
-					}
-				}
-			}
-		}
-	}
-
-	if 2 == 1 {
-		var backward uint
-		var len uint
-
-		/* Only one to look for, don't bother to prepare for a loop. */
-		prev_ix = uint(h.buckets_[key])
-
-		h.buckets_[key] = uint32(cur_ix)
-		backward = cur_ix - prev_ix
-		prev_ix &= uint(uint32(ring_buffer_mask))
-		if compare_char != int(data[prev_ix+best_len_in]) {
-			return
-		}
-
-		if backward == 0 || backward > max_backward {
-			return
-		}
-
-		len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-		if len >= 4 {
-			var score uint = BackwardReferenceScore(uint(len), backward)
-			if best_score < score {
-				out.len = uint(len)
-				out.distance = backward
-				out.score = score
-				return
-			}
-		}
-	} else {
-		bucket = h.buckets_[key:]
-		var i int
-		prev_ix = uint(bucket[0])
-		bucket = bucket[1:]
-		for i = 0; i < 2; (func() { i++; tmp4 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp4[0]) })() {
-			var backward uint = cur_ix - prev_ix
-			var len uint
-			prev_ix &= uint(uint32(ring_buffer_mask))
-			if compare_char != int(data[prev_ix+best_len]) {
-				continue
-			}
-
-			if backward == 0 || backward > max_backward {
-				continue
-			}
-
-			len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-			if len >= 4 {
-				var score uint = BackwardReferenceScore(uint(len), backward)
-				if best_score < score {
-					best_score = score
-					best_len = uint(len)
-					out.len = best_len
-					out.distance = backward
-					out.score = score
-					compare_char = int(data[cur_ix_masked+best_len])
-				}
-			}
-		}
-	}
-
-	h.buckets_[key+uint32((cur_ix>>3)%2)] = uint32(cur_ix)
-}
--- a/h35.go
+++ b/h35.go
@ -57,7 +57,7 @@ func (h *H35) Prepare(one_shot bool, input_size uint, data []byte) {
 		var common_a *HasherCommon
 		var common_b *HasherCommon

-		h.ha = new(H3)
+		h.ha = newHasher(3)
 		common_a = h.ha.Common()
 		common_a.params = h.params.hasher
 		common_a.is_prepared_ = false
--- a/h4.go
+++ b/h4.go
@ -1,208 +0,0 @@
-package brotli
-
-import "encoding/binary"
-
-/* NOLINT(build/header_guard) */
-/* Copyright 2010 Google Inc. All Rights Reserved.
-
-   Distributed under MIT license.
-   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-func (*H4) HashTypeLength() uint {
-	return 8
-}
-
-func (*H4) StoreLookahead() uint {
-	return 8
-}
-
-/* HashBytes is the function that chooses the bucket to place
-   the address in. The HashLongestMatch and H4
-   classes have separate, different implementations of hashing. */
-func HashBytesH4(data []byte) uint32 {
-	var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64)
-
-	/* The higher bits contain more mixture from the multiplication,
-	   so we take our results from there. */
-	return uint32(h >> (64 - 17))
-}
-
-/* A (forgetful) hash table to the data seen by the compressor, to
-   help create backward references to previous data.
-
-   This is a hash map of fixed size (BUCKET_SIZE). Starting from the
-   given index, 4 buckets are used to store values of a key. */
-type H4 struct {
-	HasherCommon
-	buckets_ [(1 << 17) + 4]uint32
-}
-
-func SelfH4(handle HasherHandle) *H4 {
-	return handle.(*H4)
-}
-
-func (*H4) Initialize(params *BrotliEncoderParams) {
-}
-
-func (h *H4) Prepare(one_shot bool, input_size uint, data []byte) {
-	var partial_prepare_threshold uint = (4 << 17) >> 7
-	/* Partial preparation is 100 times slower (per socket). */
-	if one_shot && input_size <= partial_prepare_threshold {
-		var i uint
-		for i = 0; i < input_size; i++ {
-			var key uint32 = HashBytesH4(data[i:])
-			for i := 0; i < int(4); i++ {
-				h.buckets_[key:][i] = 0
-			}
-		}
-	} else {
-		/* It is not strictly necessary to fill this buffer here, but
-		   not filling will make the results of the compression stochastic
-		   (but correct). This is because random data would cause the
-		   system to find accidentally good backward references here and there. */
-		var i int
-		for i = 0; i < len(h.buckets_); i++ {
-			h.buckets_[i] = 0
-		}
-	}
-}
-
-/* Look at 5 bytes at &data[ix & mask].
-   Compute a hash from these, and store the value somewhere within
-   [ix .. ix+3]. */
-func (h *H4) Store(data []byte, mask uint, ix uint) {
-	var key uint32 = HashBytesH4(data[ix&mask:])
-	var off uint32 = uint32(ix>>3) % 4
-	/* Wiggle the value with the bucket sweep range. */
-	h.buckets_[key+off] = uint32(ix)
-}
-
-func (h *H4) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
-	var i uint
-	for i = ix_start; i < ix_end; i++ {
-		h.Store(data, mask, i)
-	}
-}
-
-func (h *H4) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
-	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
-		/* Prepare the hashes for three last bytes of the last write.
-		   These could not be calculated before, since they require knowledge
-		   of both the previous and the current block. */
-		h.Store(ringbuffer, ringbuffer_mask, position-3)
-		h.Store(ringbuffer, ringbuffer_mask, position-2)
-		h.Store(ringbuffer, ringbuffer_mask, position-1)
-	}
-}
-
-func (*H4) PrepareDistanceCache(distance_cache []int) {
-}
-
-/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
-   up to the length of max_length and stores the position cur_ix in the
-   hash table.
-
-   Does not look for matches longer than max_length.
-   Does not look for matches further away than max_backward.
-   Writes the best match into |out|.
-   |out|->score is updated only if a better match is found. */
-func (h *H4) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
-	var best_len_in uint = out.len
-	var cur_ix_masked uint = cur_ix & ring_buffer_mask
-	var key uint32 = HashBytesH4(data[cur_ix_masked:])
-	var compare_char int = int(data[cur_ix_masked+best_len_in])
-	var min_score uint = out.score
-	var best_score uint = out.score
-	var best_len uint = best_len_in
-	var cached_backward uint = uint(distance_cache[0])
-	var prev_ix uint = cur_ix - cached_backward
-	var bucket []uint32
-	out.len_code_delta = 0
-	if prev_ix < cur_ix {
-		prev_ix &= uint(uint32(ring_buffer_mask))
-		if compare_char == int(data[prev_ix+best_len]) {
-			var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-			if len >= 4 {
-				var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
-				if best_score < score {
-					best_score = score
-					best_len = uint(len)
-					out.len = uint(len)
-					out.distance = cached_backward
-					out.score = best_score
-					compare_char = int(data[cur_ix_masked+best_len])
-					if 4 == 1 {
-						h.buckets_[key] = uint32(cur_ix)
-						return
-					}
-				}
-			}
-		}
-	}
-
-	if 4 == 1 {
-		var backward uint
-		var len uint
-
-		/* Only one to look for, don't bother to prepare for a loop. */
-		prev_ix = uint(h.buckets_[key])
-
-		h.buckets_[key] = uint32(cur_ix)
-		backward = cur_ix - prev_ix
-		prev_ix &= uint(uint32(ring_buffer_mask))
-		if compare_char != int(data[prev_ix+best_len_in]) {
-			return
-		}
-
-		if backward == 0 || backward > max_backward {
-			return
-		}
-
-		len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-		if len >= 4 {
-			var score uint = BackwardReferenceScore(uint(len), backward)
-			if best_score < score {
-				out.len = uint(len)
-				out.distance = backward
-				out.score = score
-				return
-			}
-		}
-	} else {
-		bucket = h.buckets_[key:]
-		var i int
-		prev_ix = uint(bucket[0])
-		bucket = bucket[1:]
-		for i = 0; i < 4; (func() { i++; tmp5 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp5[0]) })() {
-			var backward uint = cur_ix - prev_ix
-			var len uint
-			prev_ix &= uint(uint32(ring_buffer_mask))
-			if compare_char != int(data[prev_ix+best_len]) {
-				continue
-			}
-
-			if backward == 0 || backward > max_backward {
-				continue
-			}
-
-			len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-			if len >= 4 {
-				var score uint = BackwardReferenceScore(uint(len), backward)
-				if best_score < score {
-					best_score = score
-					best_len = uint(len)
-					out.len = best_len
-					out.distance = backward
-					out.score = score
-					compare_char = int(data[cur_ix_masked+best_len])
-				}
-			}
-		}
-	}
-
-	if min_score == out.score {
-		SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
-	}
-
-	h.buckets_[key+uint32((cur_ix>>3)%4)] = uint32(cur_ix)
-}
--- a/h54.go
+++ b/h54.go
@ -1,200 +0,0 @@
-package brotli
-
-import "encoding/binary"
-
-/* NOLINT(build/header_guard) */
-/* Copyright 2010 Google Inc. All Rights Reserved.
-
-   Distributed under MIT license.
-   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-func (*H54) HashTypeLength() uint {
-	return 8
-}
-
-func (*H54) StoreLookahead() uint {
-	return 8
-}
-
-/* HashBytes is the function that chooses the bucket to place
-   the address in. The HashLongestMatch and H54
-   classes have separate, different implementations of hashing. */
-func HashBytesH54(data []byte) uint32 {
-	var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*7)) * kHashMul64)
-
-	/* The higher bits contain more mixture from the multiplication,
-	   so we take our results from there. */
-	return uint32(h >> (64 - 20))
-}
-
-/* A (forgetful) hash table to the data seen by the compressor, to
-   help create backward references to previous data.
-
-   This is a hash map of fixed size ((1 << 20)). Starting from the
-   given index, 4 buckets are used to store values of a key. */
-type H54 struct {
-	HasherCommon
-	buckets_ [(1 << 20) + 4]uint32
-}
-
-func SelfH54(handle HasherHandle) *H54 {
-	return handle.(*H54)
-}
-
-func (*H54) Initialize(params *BrotliEncoderParams) {
-}
-
-func (h *H54) Prepare(one_shot bool, input_size uint, data []byte) {
-	var partial_prepare_threshold uint = (4 << 20) >> 7
-	/* Partial preparation is 100 times slower (per socket). */
-	if one_shot && input_size <= partial_prepare_threshold {
-		var i uint
-		for i = 0; i < input_size; i++ {
-			var key uint32 = HashBytesH54(data[i:])
-			for i := 0; i < int(4); i++ {
-				h.buckets_[key:][i] = 0
-			}
-		}
-	} else {
-		/* It is not strictly necessary to fill this buffer here, but
-		   not filling will make the results of the compression stochastic
-		   (but correct). This is because random data would cause the
-		   system to find accidentally good backward references here and there. */
-		h.buckets_ = [(1 << 20) + 4]uint32{}
-	}
-}
-
-/* Look at 5 bytes at &data[ix & mask].
-   Compute a hash from these, and store the value somewhere within
-   [ix .. ix+3]. */
-func (h *H54) Store(data []byte, mask uint, ix uint) {
-	var key uint32 = HashBytesH54(data[ix&mask:])
-	var off uint32 = uint32(ix>>3) % 4
-	/* Wiggle the value with the bucket sweep range. */
-	h.buckets_[key+off] = uint32(ix)
-}
-
-func (h *H54) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
-	var i uint
-	for i = ix_start; i < ix_end; i++ {
-		h.Store(data, mask, i)
-	}
-}
-
-func (h *H54) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
-	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
-		/* Prepare the hashes for three last bytes of the last write.
-		   These could not be calculated before, since they require knowledge
-		   of both the previous and the current block. */
-		h.Store(ringbuffer, ringbuffer_mask, position-3)
-		h.Store(ringbuffer, ringbuffer_mask, position-2)
-		h.Store(ringbuffer, ringbuffer_mask, position-1)
-	}
-}
-
-func (*H54) PrepareDistanceCache(distance_cache []int) {
-}
-
-/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
-   up to the length of max_length and stores the position cur_ix in the
-   hash table.
-
-   Does not look for matches longer than max_length.
-   Does not look for matches further away than max_backward.
-   Writes the best match into |out|.
-   |out|->score is updated only if a better match is found. */
-func (h *H54) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
-	var best_len_in uint = out.len
-	var cur_ix_masked uint = cur_ix & ring_buffer_mask
-	var key uint32 = HashBytesH54(data[cur_ix_masked:])
-	var compare_char int = int(data[cur_ix_masked+best_len_in])
-	var best_score uint = out.score
-	var best_len uint = best_len_in
-	var cached_backward uint = uint(distance_cache[0])
-	var prev_ix uint = cur_ix - cached_backward
-	var bucket []uint32
-	out.len_code_delta = 0
-	if prev_ix < cur_ix {
-		prev_ix &= uint(uint32(ring_buffer_mask))
-		if compare_char == int(data[prev_ix+best_len]) {
-			var len uint = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-			if len >= 4 {
-				var score uint = BackwardReferenceScoreUsingLastDistance(uint(len))
-				if best_score < score {
-					best_score = score
-					best_len = uint(len)
-					out.len = uint(len)
-					out.distance = cached_backward
-					out.score = best_score
-					compare_char = int(data[cur_ix_masked+best_len])
-					if 4 == 1 {
-						h.buckets_[key] = uint32(cur_ix)
-						return
-					}
-				}
-			}
-		}
-	}
-
-	if 4 == 1 {
-		var backward uint
-		var len uint
-
-		/* Only one to look for, don't bother to prepare for a loop. */
-		prev_ix = uint(h.buckets_[key])
-
-		h.buckets_[key] = uint32(cur_ix)
-		backward = cur_ix - prev_ix
-		prev_ix &= uint(uint32(ring_buffer_mask))
-		if compare_char != int(data[prev_ix+best_len_in]) {
-			return
-		}
-
-		if backward == 0 || backward > max_backward {
-			return
-		}
-
-		len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-		if len >= 4 {
-			var score uint = BackwardReferenceScore(uint(len), backward)
-			if best_score < score {
-				out.len = uint(len)
-				out.distance = backward
-				out.score = score
-				return
-			}
-		}
-	} else {
-		bucket = h.buckets_[key:]
-		var i int
-		prev_ix = uint(bucket[0])
-		bucket = bucket[1:]
-		for i = 0; i < 4; (func() { i++; tmp9 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp9[0]) })() {
-			var backward uint = cur_ix - prev_ix
-			var len uint
-			prev_ix &= uint(uint32(ring_buffer_mask))
-			if compare_char != int(data[prev_ix+best_len]) {
-				continue
-			}
-
-			if backward == 0 || backward > max_backward {
-				continue
-			}
-
-			len = FindMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
-			if len >= 4 {
-				var score uint = BackwardReferenceScore(uint(len), backward)
-				if best_score < score {
-					best_score = score
-					best_len = uint(len)
-					out.len = best_len
-					out.distance = backward
-					out.score = score
-					compare_char = int(data[cur_ix_masked+best_len])
-				}
-			}
-		}
-	}
-
-	h.buckets_[key+uint32((cur_ix>>3)%4)] = uint32(cur_ix)
-}
--- a/h55.go
+++ b/h55.go
@ -55,7 +55,7 @@ func (h *H55) Prepare(one_shot bool, input_size uint, data []byte) {
 		var common_a *HasherCommon
 		var common_b *HasherCommon

-		h.ha = new(H54)
+		h.ha = newHasher(54)
 		common_a = h.ha.Common()
 		common_a.params = h.params.hasher
 		common_a.is_prepared_ = false
--- a/hash.go
+++ b/hash.go
@ -1,6 +1,9 @@
 package brotli

-import "encoding/binary"
+import (
+	"encoding/binary"
+	"fmt"
+)

 /* Matches data against static dictionary words, and for each length l,
   for which a match is found, updates matches[l] to be the minimum possible
@ -253,40 +256,66 @@ func HasherReset(handle HasherHandle) {
 	handle.Common().is_prepared_ = false
 }

+func newHasher(typ int) HasherHandle {
+	switch typ {
+	case 2:
+		return &hashLongestMatchQuickly{
+			bucketBits:    16,
+			bucketSweep:   1,
+			hashLen:       5,
+			useDictionary: true,
+		}
+	case 3:
+		return &hashLongestMatchQuickly{
+			bucketBits:    16,
+			bucketSweep:   2,
+			hashLen:       5,
+			useDictionary: false,
+		}
+	case 4:
+		return &hashLongestMatchQuickly{
+			bucketBits:    17,
+			bucketSweep:   4,
+			hashLen:       5,
+			useDictionary: true,
+		}
+	case 5:
+		return new(H5)
+	case 6:
+		return new(H6)
+	case 40:
+		return new(H40)
+	case 41:
+		return new(H41)
+	case 42:
+		return new(H42)
+	case 54:
+		return &hashLongestMatchQuickly{
+			bucketBits:    20,
+			bucketSweep:   4,
+			hashLen:       7,
+			useDictionary: false,
+		}
+	case 35:
+		return new(H35)
+	case 55:
+		return new(H55)
+	case 65:
+		return new(H65)
+	case 10:
+		return new(H10)
+	}
+
+	panic(fmt.Sprintf("unknown hasher type: %d", typ))
+}
+
 func HasherSetup(handle *HasherHandle, params *BrotliEncoderParams, data []byte, position uint, input_size uint, is_last bool) {
 	var self HasherHandle = nil
 	var common *HasherCommon = nil
 	var one_shot bool = (position == 0 && is_last)
 	if *handle == nil {
 		ChooseHasher(params, &params.hasher)
-		switch params.hasher.type_ {
-		case 2:
-			self = new(H2)
-		case 3:
-			self = new(H3)
-		case 4:
-			self = new(H4)
-		case 5:
-			self = new(H5)
-		case 6:
-			self = new(H6)
-		case 40:
-			self = new(H40)
-		case 41:
-			self = new(H41)
-		case 42:
-			self = new(H42)
-		case 54:
-			self = new(H54)
-		case 35:
-			self = new(H35)
-		case 55:
-			self = new(H55)
-		case 65:
-			self = new(H65)
-		case 10:
-			self = new(H10)
-		}
+		self = newHasher(params.hasher.type_)

 		*handle = self
 		common = self.Common()
--- a/hash_longest_match_quickly.go
+++ b/hash_longest_match_quickly.go
@ -12,23 +12,23 @@ import "encoding/binary"
 /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
   a little faster (0.5% - 1%) and it compresses 0.15% better on small text
   and HTML inputs. */
-func (*H2) HashTypeLength() uint {
+func (*hashLongestMatchQuickly) HashTypeLength() uint {
 	return 8
 }

-func (*H2) StoreLookahead() uint {
+func (*hashLongestMatchQuickly) StoreLookahead() uint {
 	return 8
 }

 /* HashBytes is the function that chooses the bucket to place
-   the address in. The HashLongestMatch and H2
+   the address in. The HashLongestMatch and hashLongestMatchQuickly
   classes have separate, different implementations of hashing. */
-func HashBytesH2(data []byte) uint32 {
-	var h uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*5)) * kHashMul64)
+func (h *hashLongestMatchQuickly) HashBytes(data []byte) uint32 {
+	var hash uint64 = ((binary.LittleEndian.Uint64(data) << (64 - 8*h.hashLen)) * kHashMul64)

 	/* The higher bits contain more mixture from the multiplication,
 	   so we take our results from there. */
-	return uint32(h >> (64 - 16))
+	return uint32(hash >> (64 - h.bucketBits))
 }

 /* A (forgetful) hash table to the data seen by the compressor, to
@ -36,35 +36,39 @@ func HashBytesH2(data []byte) uint32 {

   This is a hash map of fixed size (1 << 16). Starting from the
   given index, 1 buckets are used to store values of a key. */
-type H2 struct {
+type hashLongestMatchQuickly struct {
 	HasherCommon
-	buckets_ [(1 << 16) + 1]uint32
+
+	bucketBits    uint
+	bucketSweep   int
+	hashLen       uint
+	useDictionary bool
+
+	buckets []uint32
 }

-func SelfH2(handle HasherHandle) *H2 {
-	return handle.(*H2)
+func (h *hashLongestMatchQuickly) Initialize(params *BrotliEncoderParams) {
+	h.buckets = make([]uint32, 1<<h.bucketBits+h.bucketSweep)
 }

-func (*H2) Initialize(params *BrotliEncoderParams) {
-}
-
-func (h *H2) Prepare(one_shot bool, input_size uint, data []byte) {
-	var partial_prepare_threshold uint = (4 << 16) >> 7
+func (h *hashLongestMatchQuickly) Prepare(one_shot bool, input_size uint, data []byte) {
+	var partial_prepare_threshold uint = (4 << h.bucketBits) >> 7
 	/* Partial preparation is 100 times slower (per socket). */
 	if one_shot && input_size <= partial_prepare_threshold {
 		var i uint
 		for i = 0; i < input_size; i++ {
-			var key uint32 = HashBytesH2(data[i:])
-			h.buckets_[key] = 0
+			var key uint32 = h.HashBytes(data[i:])
+			for j := 0; j < h.bucketSweep; j++ {
+				h.buckets[key+uint32(j)] = 0
+			}
 		}
 	} else {
 		/* It is not strictly necessary to fill this buffer here, but
 		   not filling will make the results of the compression stochastic
 		   (but correct). This is because random data would cause the
 		   system to find accidentally good backward references here and there. */
-		var i int
-		for i = 0; i < len(h.buckets_); i++ {
-			h.buckets_[i] = 0
+		for i := range h.buckets {
+			h.buckets[i] = 0
 		}
 	}
 }
@ -72,21 +76,21 @@ func (h *H2) Prepare(one_shot bool, input_size uint, data []byte) {
 /* Look at 5 bytes at &data[ix & mask].
   Compute a hash from these, and store the value somewhere within
   [ix .. ix+3]. */
-func (h *H2) Store(data []byte, mask uint, ix uint) {
-	var key uint32 = HashBytesH2(data[ix&mask:])
-	var off uint32 = uint32(ix>>3) % 1
+func (h *hashLongestMatchQuickly) Store(data []byte, mask uint, ix uint) {
+	var key uint32 = h.HashBytes(data[ix&mask:])
+	var off uint32 = uint32(ix>>3) % uint32(h.bucketSweep)
 	/* Wiggle the value with the bucket sweep range. */
-	h.buckets_[key+off] = uint32(ix)
+	h.buckets[key+off] = uint32(ix)
 }

-func (h *H2) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
+func (h *hashLongestMatchQuickly) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
 	var i uint
 	for i = ix_start; i < ix_end; i++ {
 		h.Store(data, mask, i)
 	}
 }

-func (h *H2) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
+func (h *hashLongestMatchQuickly) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
 	if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
 		/* Prepare the hashes for three last bytes of the last write.
 		   These could not be calculated before, since they require knowledge
@ -97,7 +101,7 @@ func (h *H2) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []b
 	}
 }

-func (*H2) PrepareDistanceCache(distance_cache []int) {
+func (*hashLongestMatchQuickly) PrepareDistanceCache(distance_cache []int) {
 }

 /* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
@ -108,10 +112,10 @@ func (*H2) PrepareDistanceCache(distance_cache []int) {
   Does not look for matches further away than max_backward.
   Writes the best match into |out|.
   |out|->score is updated only if a better match is found. */
-func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
+func (h *hashLongestMatchQuickly) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *HasherSearchResult) {
 	var best_len_in uint = out.len
 	var cur_ix_masked uint = cur_ix & ring_buffer_mask
-	var key uint32 = HashBytesH2(data[cur_ix_masked:])
+	var key uint32 = h.HashBytes(data[cur_ix_masked:])
 	var compare_char int = int(data[cur_ix_masked+best_len_in])
 	var min_score uint = out.score
 	var best_score uint = out.score
@ -133,8 +137,8 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
 					out.distance = cached_backward
 					out.score = best_score
 					compare_char = int(data[cur_ix_masked+best_len])
-					if 1 == 1 {
-						h.buckets_[key] = uint32(cur_ix)
+					if h.bucketSweep == 1 {
+						h.buckets[key] = uint32(cur_ix)
 						return
 					}
 				}
@ -142,14 +146,14 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
 		}
 	}

-	if 1 == 1 {
+	if h.bucketSweep == 1 {
 		var backward uint
 		var len uint

 		/* Only one to look for, don't bother to prepare for a loop. */
-		prev_ix = uint(h.buckets_[key])
+		prev_ix = uint(h.buckets[key])

-		h.buckets_[key] = uint32(cur_ix)
+		h.buckets[key] = uint32(cur_ix)
 		backward = cur_ix - prev_ix
 		prev_ix &= uint(uint32(ring_buffer_mask))
 		if compare_char != int(data[prev_ix+best_len_in]) {
@ -171,11 +175,11 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
 			}
 		}
 	} else {
-		bucket = h.buckets_[key:]
+		bucket = h.buckets[key:]
 		var i int
 		prev_ix = uint(bucket[0])
 		bucket = bucket[1:]
-		for i = 0; i < 1; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
+		for i = 0; i < h.bucketSweep; (func() { i++; tmp3 := bucket; bucket = bucket[1:]; prev_ix = uint(tmp3[0]) })() {
 			var backward uint = cur_ix - prev_ix
 			var len uint
 			prev_ix &= uint(uint32(ring_buffer_mask))
@ -202,9 +206,9 @@ func (h *H2) FindLongestMatch(dictionary *BrotliEncoderDictionary, data []byte,
 		}
 	}

-	if min_score == out.score {
+	if h.useDictionary && min_score == out.score {
 		SearchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, true)
 	}

-	h.buckets_[key+uint32((cur_ix>>3)%1)] = uint32(cur_ix)
+	h.buckets[key+uint32((cur_ix>>3)%uint(h.bucketSweep))] = uint32(cur_ix)
 }