Make my matchfinder work more accessible.

This commit is contained in:
Andy Balholm 2024-01-11 17:31:05 -08:00
parent cf812c06f8
commit 17e5901d05
4 changed files with 118 additions and 41 deletions

View File

@ -2,6 +2,13 @@ This package is a brotli compressor and decompressor implemented in Go.
It was translated from the reference implementation (https://github.com/google/brotli)
with the `c2go` tool at https://github.com/andybalholm/c2go.
I have been working on new compression algorithms (not translated from C)
in the matchfinder package.
You can use them with the NewWriterV2 function.
Currently they give better results than the old implementation
(at least for compressing my test file, Newtons *Opticks*)
on levels 2 to 6.
I am using it in production with https://github.com/andybalholm/redwood.
API documentation is found at https://pkg.go.dev/github.com/andybalholm/brotli?tab=doc.

View File

@ -69,46 +69,6 @@ func TestEncoderEmptyWrite(t *testing.T) {
t.Errorf("Close()=%v, want nil", err)
}
}
func TestWriter(t *testing.T) {
for level := BestSpeed; level <= BestCompression; level++ {
// Test basic encoder usage.
input := []byte("<html><body><H1>Hello world</H1></body></html>")
out := bytes.Buffer{}
e := NewWriterOptions(&out, WriterOptions{Quality: level})
in := bytes.NewReader([]byte(input))
n, err := io.Copy(e, in)
if err != nil {
t.Errorf("Copy Error: %v", err)
}
if int(n) != len(input) {
t.Errorf("Copy() n=%v, want %v", n, len(input))
}
if err := e.Close(); err != nil {
t.Errorf("Close Error after copied %d bytes: %v", n, err)
}
if err := checkCompressedData(out.Bytes(), input); err != nil {
t.Error(err)
}
out2 := bytes.Buffer{}
e.Reset(&out2)
n2, err := e.Write(input)
if err != nil {
t.Errorf("Write error after Reset: %v", err)
}
if n2 != len(input) {
t.Errorf("Write() after Reset n=%d, want %d", n2, len(input))
}
if err := e.Close(); err != nil {
t.Errorf("Close error after Reset (copied %d) bytes: %v", n2, err)
}
if !bytes.Equal(out.Bytes(), out2.Bytes()) {
t.Error("Compressed data after Reset doesn't equal first time")
}
}
}
func TestIssue22(t *testing.T) {
f, err := os.Open("testdata/issue22.gz")
if err != nil {
@ -149,6 +109,45 @@ func TestIssue22(t *testing.T) {
}
}
func TestWriterV2(t *testing.T) {
for level := BestSpeed; level <= BestCompression; level++ {
// Test basic encoder usage.
input := []byte("<html><body><H1>Hello world</H1></body></html>")
out := bytes.Buffer{}
e := NewWriterV2(&out, level)
in := bytes.NewReader([]byte(input))
n, err := io.Copy(e, in)
if err != nil {
t.Errorf("Copy Error: %v", err)
}
if int(n) != len(input) {
t.Errorf("Copy() n=%v, want %v", n, len(input))
}
if err := e.Close(); err != nil {
t.Errorf("Close Error after copied %d bytes: %v", n, err)
}
if err := checkCompressedData(out.Bytes(), input); err != nil {
t.Error(err)
}
out2 := bytes.Buffer{}
e.Reset(&out2)
n2, err := e.Write(input)
if err != nil {
t.Errorf("Write error after Reset: %v", err)
}
if n2 != len(input) {
t.Errorf("Write() after Reset n=%d, want %d", n2, len(input))
}
if err := e.Close(); err != nil {
t.Errorf("Close error after Reset (copied %d) bytes: %v", n2, err)
}
if !bytes.Equal(out.Bytes(), out2.Bytes()) {
t.Error("Compressed data after Reset doesn't equal first time")
}
}
}
func TestEncoderStreams(t *testing.T) {
// Test that output is streamed.
// Adjust window size to ensure the encoder outputs at least enough bytes
@ -576,6 +575,30 @@ func BenchmarkEncodeLevelsReset(b *testing.B) {
}
}
func BenchmarkEncodeLevelsResetV2(b *testing.B) {
opticks, err := ioutil.ReadFile("testdata/Isaac.Newton-Opticks.txt")
if err != nil {
b.Fatal(err)
}
for level := BestSpeed; level <= 7; level++ {
buf := new(bytes.Buffer)
w := NewWriterV2(buf, level)
w.Write(opticks)
w.Close()
b.Run(fmt.Sprintf("%d", level), func(b *testing.B) {
b.ReportAllocs()
b.ReportMetric(float64(len(opticks))/float64(buf.Len()), "ratio")
b.SetBytes(int64(len(opticks)))
for i := 0; i < b.N; i++ {
w.Reset(ioutil.Discard)
w.Write(opticks)
w.Close()
}
})
}
}
func BenchmarkDecodeLevels(b *testing.B) {
opticks, err := ioutil.ReadFile("testdata/Isaac.Newton-Opticks.txt")
if err != nil {
@ -694,6 +717,10 @@ func BenchmarkEncodeM4Chain128(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, DistanceBitCost: 57}, 1<<16)
}
func BenchmarkEncodeM4Chain256(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 256, HashLen: 5, DistanceBitCost: 57}, 1<<16)
}
func TestEncodeM0(t *testing.T) {
test(t, "testdata/Isaac.Newton-Opticks.txt", matchfinder.M0{}, 1<<16)
}

View File

@ -20,7 +20,7 @@ func HTTPCompressor(w http.ResponseWriter, r *http.Request) io.WriteCloser {
switch encoding {
case "br":
w.Header().Set("Content-Encoding", "br")
return NewWriter(w)
return NewWriterV2(w, DefaultCompression)
case "gzip":
w.Header().Set("Content-Encoding", "gzip")
return gzip.NewWriter(w)

View File

@ -3,6 +3,8 @@ package brotli
import (
"errors"
"io"
"github.com/andybalholm/brotli/matchfinder"
)
const (
@ -117,3 +119,44 @@ type nopCloser struct {
}
func (nopCloser) Close() error { return nil }
// NewWriterV2 is like NewWriterLevel, but it uses the new implementation
// based on the matchfinder package. It currently supports up to level 7;
// if a higher level is specified, level 7 will be used.
func NewWriterV2(dst io.Writer, level int) *matchfinder.Writer {
var mf matchfinder.MatchFinder
if level < 2 {
mf = matchfinder.M0{Lazy: level == 1}
} else {
hashLen := 6
if level >= 6 {
hashLen = 5
}
chainLen := 64
switch level {
case 2:
chainLen = 0
case 3:
chainLen = 1
case 4:
chainLen = 2
case 5:
chainLen = 4
case 6:
chainLen = 8
}
mf = &matchfinder.M4{
MaxDistance: 1 << 20,
ChainLength: chainLen,
HashLen: hashLen,
DistanceBitCost: 57,
}
}
return &matchfinder.Writer{
Dest: dst,
MatchFinder: mf,
Encoder: &Encoder{},
BlockSize: 1 << 16,
}
}