// Copyright 2018 Klaus Post. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Based on work Copyright (c) 2013, Yann Collet, released under BSD License. package fse import ( "errors" "fmt" ) // Compress the input bytes. Input must be < 2GB. // Provide a Scratch buffer to avoid memory allocations. // Note that the output is also kept in the scratch buffer. // If input is too hard to compress, ErrIncompressible is returned. // If input is a single byte value repeated ErrUseRLE is returned. func Compress(in []byte, s *Scratch) ([]byte, error) { if len(in) <= 1 { return nil, ErrIncompressible } if len(in) > (2<<30)-1 { return nil, errors.New("input too big, must be < 2GB") } s, err := s.prepare(in) if err != nil { return nil, err } // Create histogram, if none was provided. maxCount := s.maxCount if maxCount == 0 { maxCount = s.countSimple(in) } // Reset for next run. s.clearCount = true s.maxCount = 0 if maxCount == len(in) { // One symbol, use RLE return nil, ErrUseRLE } if maxCount == 1 || maxCount < (len(in)>>7) { // Each symbol present maximum once or too well distributed. return nil, ErrIncompressible } s.optimalTableLog() err = s.normalizeCount() if err != nil { return nil, err } err = s.writeCount() if err != nil { return nil, err } if false { err = s.validateNorm() if err != nil { return nil, err } } err = s.buildCTable() if err != nil { return nil, err } err = s.compress(in) if err != nil { return nil, err } s.Out = s.bw.out // Check if we compressed. if len(s.Out) >= len(in) { return nil, ErrIncompressible } return s.Out, nil } // cState contains the compression state of a stream. type cState struct { bw *bitWriter stateTable []uint16 state uint16 } // init will initialize the compression state to the first symbol of the stream. func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) { c.bw = bw c.stateTable = ct.stateTable nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16 im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + first.deltaFindState c.state = c.stateTable[lu] return } // encode the output symbol provided and write it to the bitstream. func (c *cState) encode(symbolTT symbolTransform) { nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState c.bw.addBits16NC(c.state, uint8(nbBitsOut)) c.state = c.stateTable[dstState] } // encode the output symbol provided and write it to the bitstream. func (c *cState) encodeZero(symbolTT symbolTransform) { nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut)) c.state = c.stateTable[dstState] } // flush will write the tablelog to the output and flush the remaining full bytes. func (c *cState) flush(tableLog uint8) { c.bw.flush32() c.bw.addBits16NC(c.state, tableLog) c.bw.flush() } // compress is the main compression loop that will encode the input from the last byte to the first. func (s *Scratch) compress(src []byte) error { if len(src) <= 2 { return errors.New("compress: src too small") } tt := s.ct.symbolTT[:256] s.bw.reset(s.Out) // Our two states each encodes every second byte. // Last byte encoded (first byte decoded) will always be encoded by c1. var c1, c2 cState // Encode so remaining size is divisible by 4. ip := len(src) if ip&1 == 1 { c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) c1.encodeZero(tt[src[ip-3]]) ip -= 3 } else { c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) ip -= 2 } if ip&2 != 0 { c2.encodeZero(tt[src[ip-1]]) c1.encodeZero(tt[src[ip-2]]) ip -= 2 } // Main compression loop. switch { case !s.zeroBits && s.actualTableLog <= 8: // We can encode 4 symbols without requiring a flush. // We do not need to check if any output is 0 bits. for ip >= 4 { s.bw.flush32() v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] c2.encode(tt[v0]) c1.encode(tt[v1]) c2.encode(tt[v2]) c1.encode(tt[v3]) ip -= 4 } case !s.zeroBits: // We do not need to check if any output is 0 bits. for ip >= 4 { s.bw.flush32() v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] c2.encode(tt[v0]) c1.encode(tt[v1]) s.bw.flush32() c2.encode(tt[v2]) c1.encode(tt[v3]) ip -= 4 } case s.actualTableLog <= 8: // We can encode 4 symbols without requiring a flush for ip >= 4 { s.bw.flush32() v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] c2.encodeZero(tt[v0]) c1.encodeZero(tt[v1]) c2.encodeZero(tt[v2]) c1.encodeZero(tt[v3]) ip -= 4 } default: for ip >= 4 { s.bw.flush32() v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] c2.encodeZero(tt[v0]) c1.encodeZero(tt[v1]) s.bw.flush32() c2.encodeZero(tt[v2]) c1.encodeZero(tt[v3]) ip -= 4 } } // Flush final state. // Used to initialize state when decoding. c2.flush(s.actualTableLog) c1.flush(s.actualTableLog) return s.bw.close() } // writeCount will write the normalized histogram count to header. // This is read back by readNCount. func (s *Scratch) writeCount() error { var ( tableLog = s.actualTableLog tableSize = 1 << tableLog previous0 bool charnum uint16 maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 // Write Table Size bitStream = uint32(tableLog - minTablelog) bitCount = uint(4) remaining = int16(tableSize + 1) /* +1 for extra accuracy */ threshold = int16(tableSize) nbBits = uint(tableLog + 1) ) if cap(s.Out) < maxHeaderSize { s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize) } outP := uint(0) out := s.Out[:maxHeaderSize] // stops at 1 for remaining > 1 { if previous0 { start := charnum for s.norm[charnum] == 0 { charnum++ } for charnum >= start+24 { start += 24 bitStream += uint32(0xFFFF) << bitCount out[outP] = byte(bitStream) out[outP+1] = byte(bitStream >> 8) outP += 2 bitStream >>= 16 } for charnum >= start+3 { start += 3 bitStream += 3 << bitCount bitCount += 2 } bitStream += uint32(charnum-start) << bitCount bitCount += 2 if bitCount > 16 { out[outP] = byte(bitStream) out[outP+1] = byte(bitStream >> 8) outP += 2 bitStream >>= 16 bitCount -= 16 } } count := s.norm[charnum] charnum++ max := (2*threshold - 1) - remaining if count < 0 { remaining += count } else { remaining -= count } count++ // +1 for extra accuracy if count >= threshold { count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ } bitStream += uint32(count) << bitCount bitCount += nbBits if count < max { bitCount-- } previous0 = count == 1 if remaining < 1 { return errors.New("internal error: remaining<1") } for remaining < threshold { nbBits-- threshold >>= 1 } if bitCount > 16 { out[outP] = byte(bitStream) out[outP+1] = byte(bitStream >> 8) outP += 2 bitStream >>= 16 bitCount -= 16 } } out[outP] = byte(bitStream) out[outP+1] = byte(bitStream >> 8) outP += (bitCount + 7) / 8 if uint16(charnum) > s.symbolLen { return errors.New("internal error: charnum > s.symbolLen") } s.Out = out[:outP] return nil } // symbolTransform contains the state transform for a symbol. type symbolTransform struct { deltaFindState int32 deltaNbBits uint32 } // String prints values as a human readable string. func (s symbolTransform) String() string { return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState) } // cTable contains tables used for compression. type cTable struct { tableSymbol []byte stateTable []uint16 symbolTT []symbolTransform } // allocCtable will allocate tables needed for compression. // If existing tables a re big enough, they are simply re-used. func (s *Scratch) allocCtable() { tableSize := 1 << s.actualTableLog // get tableSymbol that is big enough. if cap(s.ct.tableSymbol) < int(tableSize) { s.ct.tableSymbol = make([]byte, tableSize) } s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] ctSize := tableSize if cap(s.ct.stateTable) < ctSize { s.ct.stateTable = make([]uint16, ctSize) } s.ct.stateTable = s.ct.stateTable[:ctSize] if cap(s.ct.symbolTT) < 256 { s.ct.symbolTT = make([]symbolTransform, 256) } s.ct.symbolTT = s.ct.symbolTT[:256] } // buildCTable will populate the compression table so it is ready to be used. func (s *Scratch) buildCTable() error { tableSize := uint32(1 << s.actualTableLog) highThreshold := tableSize - 1 var cumul [maxSymbolValue + 2]int16 s.allocCtable() tableSymbol := s.ct.tableSymbol[:tableSize] // symbol start positions { cumul[0] = 0 for ui, v := range s.norm[:s.symbolLen-1] { u := byte(ui) // one less than reference if v == -1 { // Low proba symbol cumul[u+1] = cumul[u] + 1 tableSymbol[highThreshold] = u highThreshold-- } else { cumul[u+1] = cumul[u] + v } } // Encode last symbol separately to avoid overflowing u u := int(s.symbolLen - 1) v := s.norm[s.symbolLen-1] if v == -1 { // Low proba symbol cumul[u+1] = cumul[u] + 1 tableSymbol[highThreshold] = byte(u) highThreshold-- } else { cumul[u+1] = cumul[u] + v } if uint32(cumul[s.symbolLen]) != tableSize { return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize) } cumul[s.symbolLen] = int16(tableSize) + 1 } // Spread symbols s.zeroBits = false { step := tableStep(tableSize) tableMask := tableSize - 1 var position uint32 // if any symbol > largeLimit, we may have 0 bits output. largeLimit := int16(1 << (s.actualTableLog - 1)) for ui, v := range s.norm[:s.symbolLen] { symbol := byte(ui) if v > largeLimit { s.zeroBits = true } for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ { tableSymbol[position] = symbol position = (position + step) & tableMask for position > highThreshold { position = (position + step) & tableMask } /* Low proba area */ } } // Check if we have gone through all positions if position != 0 { return errors.New("position!=0") } } // Build table table := s.ct.stateTable { tsi := int(tableSize) for u, v := range tableSymbol { // TableU16 : sorted by symbol order; gives next state value table[cumul[v]] = uint16(tsi + u) cumul[v]++ } } // Build Symbol Transformation Table { total := int16(0) symbolTT := s.ct.symbolTT[:s.symbolLen] tableLog := s.actualTableLog tl := (uint32(tableLog) << 16) - (1 << tableLog) for i, v := range s.norm[:s.symbolLen] { switch v { case 0: case -1, 1: symbolTT[i].deltaNbBits = tl symbolTT[i].deltaFindState = int32(total - 1) total++ default: maxBitsOut := uint32(tableLog) - highBits(uint32(v-1)) minStatePlus := uint32(v) << maxBitsOut symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus symbolTT[i].deltaFindState = int32(total - v) total += v } } if total != int16(tableSize) { return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize) } } return nil } // countSimple will create a simple histogram in s.count. // Returns the biggest count. // Does not update s.clearCount. func (s *Scratch) countSimple(in []byte) (max int) { for _, v := range in { s.count[v]++ } m := uint32(0) for i, v := range s.count[:] { if v > m { m = v } if v > 0 { s.symbolLen = uint16(i) + 1 } } return int(m) } // minTableLog provides the minimum logSize to safely represent a distribution. func (s *Scratch) minTableLog() uint8 { minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1 minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2 if minBitsSrc < minBitsSymbols { return uint8(minBitsSrc) } return uint8(minBitsSymbols) } // optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog func (s *Scratch) optimalTableLog() { tableLog := s.TableLog minBits := s.minTableLog() maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2 if maxBitsSrc < tableLog { // Accuracy can be reduced tableLog = maxBitsSrc } if minBits > tableLog { tableLog = minBits } // Need a minimum to safely represent all symbol values if tableLog < minTablelog { tableLog = minTablelog } if tableLog > maxTableLog { tableLog = maxTableLog } s.actualTableLog = tableLog } var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000} // normalizeCount will normalize the count of the symbols so // the total is equal to the table size. func (s *Scratch) normalizeCount() error { var ( tableLog = s.actualTableLog scale = 62 - uint64(tableLog) step = (1 << 62) / uint64(s.br.remain()) vStep = uint64(1) << (scale - 20) stillToDistribute = int16(1 << tableLog) largest int largestP int16 lowThreshold = (uint32)(s.br.remain() >> tableLog) ) for i, cnt := range s.count[:s.symbolLen] { // already handled // if (count[s] == s.length) return 0; /* rle special case */ if cnt == 0 { s.norm[i] = 0 continue } if cnt <= lowThreshold { s.norm[i] = -1 stillToDistribute-- } else { proba := (int16)((uint64(cnt) * step) >> scale) if proba < 8 { restToBeat := vStep * uint64(rtbTable[proba]) v := uint64(cnt)*step - (uint64(proba) << scale) if v > restToBeat { proba++ } } if proba > largestP { largestP = proba largest = i } s.norm[i] = proba stillToDistribute -= proba } } if -stillToDistribute >= (s.norm[largest] >> 1) { // corner case, need another normalization method return s.normalizeCount2() } s.norm[largest] += stillToDistribute return nil } // Secondary normalization method. // To be used when primary method fails. func (s *Scratch) normalizeCount2() error { const notYetAssigned = -2 var ( distributed uint32 total = uint32(s.br.remain()) tableLog = s.actualTableLog lowThreshold = uint32(total >> tableLog) lowOne = uint32((total * 3) >> (tableLog + 1)) ) for i, cnt := range s.count[:s.symbolLen] { if cnt == 0 { s.norm[i] = 0 continue } if cnt <= lowThreshold { s.norm[i] = -1 distributed++ total -= cnt continue } if cnt <= lowOne { s.norm[i] = 1 distributed++ total -= cnt continue } s.norm[i] = notYetAssigned } toDistribute := (1 << tableLog) - distributed if (total / toDistribute) > lowOne { // risk of rounding to zero lowOne = uint32((total * 3) / (toDistribute * 2)) for i, cnt := range s.count[:s.symbolLen] { if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { s.norm[i] = 1 distributed++ total -= cnt continue } } toDistribute = (1 << tableLog) - distributed } if distributed == uint32(s.symbolLen)+1 { // all values are pretty poor; // probably incompressible data (should have already been detected); // find max, then give all remaining points to max var maxV int var maxC uint32 for i, cnt := range s.count[:s.symbolLen] { if cnt > maxC { maxV = i maxC = cnt } } s.norm[maxV] += int16(toDistribute) return nil } if total == 0 { // all of the symbols were low enough for the lowOne or lowThreshold for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) { if s.norm[i] > 0 { toDistribute-- s.norm[i]++ } } return nil } var ( vStepLog = 62 - uint64(tableLog) mid = uint64((1 << (vStepLog - 1)) - 1) rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining tmpTotal = mid ) for i, cnt := range s.count[:s.symbolLen] { if s.norm[i] == notYetAssigned { var ( end = tmpTotal + uint64(cnt)*rStep sStart = uint32(tmpTotal >> vStepLog) sEnd = uint32(end >> vStepLog) weight = sEnd - sStart ) if weight < 1 { return errors.New("weight < 1") } s.norm[i] = int16(weight) tmpTotal = end } } return nil } // validateNorm validates the normalized histogram table. func (s *Scratch) validateNorm() (err error) { var total int for _, v := range s.norm[:s.symbolLen] { if v >= 0 { total += int(v) } else { total -= int(v) } } defer func() { if err == nil { return } fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen) for i, v := range s.norm[:s.symbolLen] { fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v) } }() if total != (1 << s.actualTableLog) { return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog) } for i, v := range s.count[s.symbolLen:] { if v != 0 { return fmt.Errorf("warning: Found symbol out of range, %d after cut", i) } } return nil }