From 17e5901d050574f228e7d5a3f754a30a7cb55d55 Mon Sep 17 00:00:00 2001 From: Andy Balholm Date: Thu, 11 Jan 2024 17:31:05 -0800 Subject: [PATCH] Make my matchfinder work more accessible. --- README.md | 7 ++++ brotli_test.go | 107 +++++++++++++++++++++++++++++++------------------ http.go | 2 +- writer.go | 43 ++++++++++++++++++++ 4 files changed, 118 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 1ea7fdb..0062521 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,13 @@ This package is a brotli compressor and decompressor implemented in Go. It was translated from the reference implementation (https://github.com/google/brotli) with the `c2go` tool at https://github.com/andybalholm/c2go. +I have been working on new compression algorithms (not translated from C) +in the matchfinder package. +You can use them with the NewWriterV2 function. +Currently they give better results than the old implementation +(at least for compressing my test file, Newton’s *Opticks*) +on levels 2 to 6. + I am using it in production with https://github.com/andybalholm/redwood. API documentation is found at https://pkg.go.dev/github.com/andybalholm/brotli?tab=doc. diff --git a/brotli_test.go b/brotli_test.go index bfec80c..b9734f8 100644 --- a/brotli_test.go +++ b/brotli_test.go @@ -69,46 +69,6 @@ func TestEncoderEmptyWrite(t *testing.T) { t.Errorf("Close()=%v, want nil", err) } } - -func TestWriter(t *testing.T) { - for level := BestSpeed; level <= BestCompression; level++ { - // Test basic encoder usage. - input := []byte("

Hello world

") - out := bytes.Buffer{} - e := NewWriterOptions(&out, WriterOptions{Quality: level}) - in := bytes.NewReader([]byte(input)) - n, err := io.Copy(e, in) - if err != nil { - t.Errorf("Copy Error: %v", err) - } - if int(n) != len(input) { - t.Errorf("Copy() n=%v, want %v", n, len(input)) - } - if err := e.Close(); err != nil { - t.Errorf("Close Error after copied %d bytes: %v", n, err) - } - if err := checkCompressedData(out.Bytes(), input); err != nil { - t.Error(err) - } - - out2 := bytes.Buffer{} - e.Reset(&out2) - n2, err := e.Write(input) - if err != nil { - t.Errorf("Write error after Reset: %v", err) - } - if n2 != len(input) { - t.Errorf("Write() after Reset n=%d, want %d", n2, len(input)) - } - if err := e.Close(); err != nil { - t.Errorf("Close error after Reset (copied %d) bytes: %v", n2, err) - } - if !bytes.Equal(out.Bytes(), out2.Bytes()) { - t.Error("Compressed data after Reset doesn't equal first time") - } - } -} - func TestIssue22(t *testing.T) { f, err := os.Open("testdata/issue22.gz") if err != nil { @@ -149,6 +109,45 @@ func TestIssue22(t *testing.T) { } } +func TestWriterV2(t *testing.T) { + for level := BestSpeed; level <= BestCompression; level++ { + // Test basic encoder usage. + input := []byte("

Hello world

") + out := bytes.Buffer{} + e := NewWriterV2(&out, level) + in := bytes.NewReader([]byte(input)) + n, err := io.Copy(e, in) + if err != nil { + t.Errorf("Copy Error: %v", err) + } + if int(n) != len(input) { + t.Errorf("Copy() n=%v, want %v", n, len(input)) + } + if err := e.Close(); err != nil { + t.Errorf("Close Error after copied %d bytes: %v", n, err) + } + if err := checkCompressedData(out.Bytes(), input); err != nil { + t.Error(err) + } + + out2 := bytes.Buffer{} + e.Reset(&out2) + n2, err := e.Write(input) + if err != nil { + t.Errorf("Write error after Reset: %v", err) + } + if n2 != len(input) { + t.Errorf("Write() after Reset n=%d, want %d", n2, len(input)) + } + if err := e.Close(); err != nil { + t.Errorf("Close error after Reset (copied %d) bytes: %v", n2, err) + } + if !bytes.Equal(out.Bytes(), out2.Bytes()) { + t.Error("Compressed data after Reset doesn't equal first time") + } + } +} + func TestEncoderStreams(t *testing.T) { // Test that output is streamed. // Adjust window size to ensure the encoder outputs at least enough bytes @@ -576,6 +575,30 @@ func BenchmarkEncodeLevelsReset(b *testing.B) { } } +func BenchmarkEncodeLevelsResetV2(b *testing.B) { + opticks, err := ioutil.ReadFile("testdata/Isaac.Newton-Opticks.txt") + if err != nil { + b.Fatal(err) + } + + for level := BestSpeed; level <= 7; level++ { + buf := new(bytes.Buffer) + w := NewWriterV2(buf, level) + w.Write(opticks) + w.Close() + b.Run(fmt.Sprintf("%d", level), func(b *testing.B) { + b.ReportAllocs() + b.ReportMetric(float64(len(opticks))/float64(buf.Len()), "ratio") + b.SetBytes(int64(len(opticks))) + for i := 0; i < b.N; i++ { + w.Reset(ioutil.Discard) + w.Write(opticks) + w.Close() + } + }) + } +} + func BenchmarkDecodeLevels(b *testing.B) { opticks, err := ioutil.ReadFile("testdata/Isaac.Newton-Opticks.txt") if err != nil { @@ -694,6 +717,10 @@ func BenchmarkEncodeM4Chain128(b *testing.B) { benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, DistanceBitCost: 57}, 1<<16) } +func BenchmarkEncodeM4Chain256(b *testing.B) { + benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 256, HashLen: 5, DistanceBitCost: 57}, 1<<16) +} + func TestEncodeM0(t *testing.T) { test(t, "testdata/Isaac.Newton-Opticks.txt", matchfinder.M0{}, 1<<16) } diff --git a/http.go b/http.go index b5b8904..3d3a8a0 100644 --- a/http.go +++ b/http.go @@ -20,7 +20,7 @@ func HTTPCompressor(w http.ResponseWriter, r *http.Request) io.WriteCloser { switch encoding { case "br": w.Header().Set("Content-Encoding", "br") - return NewWriter(w) + return NewWriterV2(w, DefaultCompression) case "gzip": w.Header().Set("Content-Encoding", "gzip") return gzip.NewWriter(w) diff --git a/writer.go b/writer.go index 39feaef..8a68811 100644 --- a/writer.go +++ b/writer.go @@ -3,6 +3,8 @@ package brotli import ( "errors" "io" + + "github.com/andybalholm/brotli/matchfinder" ) const ( @@ -117,3 +119,44 @@ type nopCloser struct { } func (nopCloser) Close() error { return nil } + +// NewWriterV2 is like NewWriterLevel, but it uses the new implementation +// based on the matchfinder package. It currently supports up to level 7; +// if a higher level is specified, level 7 will be used. +func NewWriterV2(dst io.Writer, level int) *matchfinder.Writer { + var mf matchfinder.MatchFinder + if level < 2 { + mf = matchfinder.M0{Lazy: level == 1} + } else { + hashLen := 6 + if level >= 6 { + hashLen = 5 + } + chainLen := 64 + switch level { + case 2: + chainLen = 0 + case 3: + chainLen = 1 + case 4: + chainLen = 2 + case 5: + chainLen = 4 + case 6: + chainLen = 8 + } + mf = &matchfinder.M4{ + MaxDistance: 1 << 20, + ChainLength: chainLen, + HashLen: hashLen, + DistanceBitCost: 57, + } + } + + return &matchfinder.Writer{ + Dest: dst, + MatchFinder: mf, + Encoder: &Encoder{}, + BlockSize: 1 << 16, + } +}