Squashed 'extern/rleplus/' content from commit 59d0714
git-subtree-dir: extern/rleplus git-subtree-split: 59d0714e9be58cf96d82cdce18fe727041f9001d
This commit is contained in:
commit
c57c47ffb5
154
internal/bitvector.go
Normal file
154
internal/bitvector.go
Normal file
@ -0,0 +1,154 @@
|
|||||||
|
package bitvector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"log"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// ErrOutOfRange - the index passed is out of range for the BitVector
|
||||||
|
ErrOutOfRange = errors.New("index out of range")
|
||||||
|
)
|
||||||
|
|
||||||
|
// BitNumbering indicates the ordering of bits, either
|
||||||
|
// least-significant bit in position 0, or most-significant bit
|
||||||
|
// in position 0.
|
||||||
|
//
|
||||||
|
// It it used in 3 ways with BitVector:
|
||||||
|
// 1. Ordering of bits within the Buf []byte structure
|
||||||
|
// 2. What order to add bits when using Extend()
|
||||||
|
// 3. What order to read bits when using Take()
|
||||||
|
//
|
||||||
|
// https://en.wikipedia.org/wiki/Bit_numbering
|
||||||
|
type BitNumbering int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// LSB0 - bit ordering starts with the low-order bit
|
||||||
|
LSB0 BitNumbering = iota
|
||||||
|
|
||||||
|
// MSB0 - bit ordering starts with the high-order bit
|
||||||
|
MSB0
|
||||||
|
)
|
||||||
|
|
||||||
|
// BitVector is used to manipulate ordered collections of bits
|
||||||
|
type BitVector struct {
|
||||||
|
Buf []byte
|
||||||
|
|
||||||
|
// BytePacking is the bit ordering within bytes
|
||||||
|
BytePacking BitNumbering
|
||||||
|
|
||||||
|
// Len is the logical number of bits in the vector.
|
||||||
|
// The last byte in Buf may have undefined bits if Len is not a multiple of 8
|
||||||
|
Len uint
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewBitVector constructs a new BitVector from a slice of bytes.
|
||||||
|
//
|
||||||
|
// The bytePacking parameter is required to know how to interpret the bit ordering within the bytes.
|
||||||
|
func NewBitVector(buf []byte, bytePacking BitNumbering) *BitVector {
|
||||||
|
return &BitVector{
|
||||||
|
BytePacking: bytePacking,
|
||||||
|
Buf: buf,
|
||||||
|
Len: uint(len(buf) * 8),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push adds a single bit to the BitVector.
|
||||||
|
//
|
||||||
|
// Although it takes a byte, only the low-order bit is used, so just use 0 or 1.
|
||||||
|
func (v *BitVector) Push(val byte) {
|
||||||
|
if v.Len%8 == 0 {
|
||||||
|
v.Buf = append(v.Buf, 0)
|
||||||
|
}
|
||||||
|
lastIdx := v.Len / 8
|
||||||
|
|
||||||
|
switch v.BytePacking {
|
||||||
|
case LSB0:
|
||||||
|
v.Buf[lastIdx] |= (val & 1) << (v.Len % 8)
|
||||||
|
default:
|
||||||
|
v.Buf[lastIdx] |= (val & 1) << (7 - (v.Len % 8))
|
||||||
|
}
|
||||||
|
|
||||||
|
v.Len++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get returns a single bit as a byte -- either 0 or 1
|
||||||
|
func (v *BitVector) Get(idx uint) (byte, error) {
|
||||||
|
if idx >= v.Len {
|
||||||
|
return 0, ErrOutOfRange
|
||||||
|
}
|
||||||
|
blockIdx := idx / 8
|
||||||
|
|
||||||
|
switch v.BytePacking {
|
||||||
|
case LSB0:
|
||||||
|
return v.Buf[blockIdx] >> (idx % 8) & 1, nil
|
||||||
|
default:
|
||||||
|
return v.Buf[blockIdx] >> (7 - idx%8) & 1, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend adds up to 8 bits to the receiver
|
||||||
|
//
|
||||||
|
// Given a byte b == 0b11010101
|
||||||
|
// v.Extend(b, 4, LSB0) would add < 1, 0, 1, 0 >
|
||||||
|
// v.Extend(b, 4, MSB0) would add < 1, 1, 0, 1 >
|
||||||
|
//
|
||||||
|
// Panics if count is out of range
|
||||||
|
func (v *BitVector) Extend(val byte, count uint, order BitNumbering) {
|
||||||
|
if count > 8 {
|
||||||
|
log.Panicf("invalid count")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := uint(0); i < count; i++ {
|
||||||
|
switch order {
|
||||||
|
case LSB0:
|
||||||
|
v.Push((val >> i) & 1)
|
||||||
|
default:
|
||||||
|
v.Push((val >> (7 - i)) & 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Take reads up to 8 bits at the given index.
|
||||||
|
//
|
||||||
|
// Given a BitVector < 1, 1, 0, 1, 0, 1, 0, 1 >
|
||||||
|
// v.Take(0, 4, LSB0) would return 0b00001011
|
||||||
|
// v.Take(0, 4, MSB0) would return 0b11010000
|
||||||
|
//
|
||||||
|
// Panics if count is out of range
|
||||||
|
func (v *BitVector) Take(index uint, count uint, order BitNumbering) (out byte) {
|
||||||
|
if count > 8 {
|
||||||
|
log.Panicf("invalid count")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := uint(0); i < count; i++ {
|
||||||
|
val, _ := v.Get(index + i)
|
||||||
|
|
||||||
|
switch order {
|
||||||
|
case LSB0:
|
||||||
|
out |= val << i
|
||||||
|
default:
|
||||||
|
out |= val << (7 - i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterator returns a function, which when invoked, returns the number
|
||||||
|
// of bits requested, and increments an internal cursor.
|
||||||
|
//
|
||||||
|
// When the end of the BitVector is reached, it returns zeroes indefinitely
|
||||||
|
//
|
||||||
|
// Panics if count is out of range
|
||||||
|
func (v *BitVector) Iterator(order BitNumbering) func(uint) byte {
|
||||||
|
cursor := uint(0)
|
||||||
|
return func(count uint) (out byte) {
|
||||||
|
if count > 8 {
|
||||||
|
log.Panicf("invalid count")
|
||||||
|
}
|
||||||
|
|
||||||
|
out = v.Take(cursor, count, order)
|
||||||
|
cursor += count
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
138
internal/bitvector_test.go
Normal file
138
internal/bitvector_test.go
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
package bitvector_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/filecoin-project/go-filecoin/rleplus/internal"
|
||||||
|
tf "github.com/filecoin-project/go-filecoin/testhelpers/testflags"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBitVector(t *testing.T) {
|
||||||
|
tf.UnitTest(t)
|
||||||
|
|
||||||
|
t.Run("zero value", func(t *testing.T) {
|
||||||
|
var v bitvector.BitVector
|
||||||
|
|
||||||
|
assert.Equal(t, bitvector.LSB0, v.BytePacking)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Push", func(t *testing.T) {
|
||||||
|
// MSB0 bit numbering
|
||||||
|
v := bitvector.BitVector{BytePacking: bitvector.MSB0}
|
||||||
|
v.Push(1)
|
||||||
|
v.Push(0)
|
||||||
|
v.Push(1)
|
||||||
|
v.Push(1)
|
||||||
|
|
||||||
|
assert.Equal(t, byte(176), v.Buf[0])
|
||||||
|
|
||||||
|
// LSB0 bit numbering
|
||||||
|
v = bitvector.BitVector{BytePacking: bitvector.LSB0}
|
||||||
|
v.Push(1)
|
||||||
|
v.Push(0)
|
||||||
|
v.Push(1)
|
||||||
|
v.Push(1)
|
||||||
|
|
||||||
|
assert.Equal(t, byte(13), v.Buf[0])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Get", func(t *testing.T) {
|
||||||
|
bits := []byte{1, 0, 1, 1, 0, 0, 1, 0}
|
||||||
|
|
||||||
|
for _, numbering := range []bitvector.BitNumbering{bitvector.MSB0, bitvector.LSB0} {
|
||||||
|
v := bitvector.BitVector{BytePacking: numbering}
|
||||||
|
|
||||||
|
for _, bit := range bits {
|
||||||
|
v.Push(bit)
|
||||||
|
}
|
||||||
|
|
||||||
|
for idx, expected := range bits {
|
||||||
|
actual, _ := v.Get(uint(idx))
|
||||||
|
assert.Equal(t, expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Extend", func(t *testing.T) {
|
||||||
|
val := byte(171) // 0b10101011
|
||||||
|
|
||||||
|
var v bitvector.BitVector
|
||||||
|
|
||||||
|
// MSB0 bit numbering
|
||||||
|
v = bitvector.BitVector{}
|
||||||
|
v.Extend(val, 4, bitvector.MSB0)
|
||||||
|
assertBitVector(t, []byte{1, 0, 1, 0}, v)
|
||||||
|
v.Extend(val, 5, bitvector.MSB0)
|
||||||
|
assertBitVector(t, []byte{1, 0, 1, 0, 1, 0, 1, 0, 1}, v)
|
||||||
|
|
||||||
|
// LSB0 bit numbering
|
||||||
|
v = bitvector.BitVector{}
|
||||||
|
v.Extend(val, 4, bitvector.LSB0)
|
||||||
|
assertBitVector(t, []byte{1, 1, 0, 1}, v)
|
||||||
|
v.Extend(val, 5, bitvector.LSB0)
|
||||||
|
assertBitVector(t, []byte{1, 1, 0, 1, 1, 1, 0, 1, 0}, v)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid counts to Take/Extend/Iterator cause panics", func(t *testing.T) {
|
||||||
|
v := bitvector.BitVector{BytePacking: bitvector.LSB0}
|
||||||
|
|
||||||
|
assert.Panics(t, func() { v.Extend(0xff, 9, bitvector.LSB0) })
|
||||||
|
|
||||||
|
assert.Panics(t, func() { v.Take(0, 9, bitvector.LSB0) })
|
||||||
|
|
||||||
|
next := v.Iterator(bitvector.LSB0)
|
||||||
|
assert.Panics(t, func() { next(9) })
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Take", func(t *testing.T) {
|
||||||
|
var v bitvector.BitVector
|
||||||
|
|
||||||
|
bits := []byte{1, 0, 1, 0, 1, 0, 1, 1}
|
||||||
|
for _, bit := range bits {
|
||||||
|
v.Push(bit)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, byte(176), v.Take(4, 4, bitvector.MSB0))
|
||||||
|
assert.Equal(t, byte(13), v.Take(4, 4, bitvector.LSB0))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Iterator", func(t *testing.T) {
|
||||||
|
var buf []byte
|
||||||
|
|
||||||
|
// make a bitvector of 256 sample bits
|
||||||
|
for i := 0; i < 32; i++ {
|
||||||
|
buf = append(buf, 128+32)
|
||||||
|
}
|
||||||
|
|
||||||
|
v := bitvector.NewBitVector(buf, bitvector.LSB0)
|
||||||
|
|
||||||
|
next := v.Iterator(bitvector.LSB0)
|
||||||
|
|
||||||
|
// compare to Get()
|
||||||
|
for i := uint(0); i < v.Len; i++ {
|
||||||
|
expected, _ := v.Get(i)
|
||||||
|
assert.Equal(t, expected, next(1))
|
||||||
|
}
|
||||||
|
|
||||||
|
// out of range should return zero
|
||||||
|
assert.Equal(t, byte(0), next(1))
|
||||||
|
assert.Equal(t, byte(0), next(8))
|
||||||
|
|
||||||
|
// compare to Take()
|
||||||
|
next = v.Iterator(bitvector.LSB0)
|
||||||
|
assert.Equal(t, next(5), v.Take(0, 5, bitvector.LSB0))
|
||||||
|
assert.Equal(t, next(8), v.Take(5, 8, bitvector.LSB0))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: When using this helper assertion, expectedBits should *only* be 0s and 1s.
|
||||||
|
func assertBitVector(t *testing.T, expectedBits []byte, actual bitvector.BitVector) {
|
||||||
|
assert.Equal(t, uint(len(expectedBits)), actual.Len)
|
||||||
|
|
||||||
|
for idx, bit := range expectedBits {
|
||||||
|
actualBit, err := actual.Get(uint(idx))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, bit, actualBit)
|
||||||
|
}
|
||||||
|
}
|
204
rleplus.go
Normal file
204
rleplus.go
Normal file
@ -0,0 +1,204 @@
|
|||||||
|
package rleplus
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/filecoin-project/go-filecoin/rleplus/internal"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Version is the 2 lowest bits of this constant
|
||||||
|
const Version = 0
|
||||||
|
|
||||||
|
var (
|
||||||
|
// ErrRunLengthTooLarge - data implies a run-length which isn't supported
|
||||||
|
ErrRunLengthTooLarge = fmt.Errorf("run length too large for RLE+ version %d", Version)
|
||||||
|
|
||||||
|
// ErrDecode - invalid encoding for this version
|
||||||
|
ErrDecode = fmt.Errorf("invalid encoding for RLE+ version %d", Version)
|
||||||
|
|
||||||
|
// ErrWrongVersion - wrong version of RLE+
|
||||||
|
ErrWrongVersion = errors.New("invalid RLE+ version")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Encode returns the RLE+ representation of the provided integers.
|
||||||
|
// Also returned is the number of bits required by this encoding,
|
||||||
|
// which is not necessarily on a byte boundary.
|
||||||
|
//
|
||||||
|
// The RLE+ spec is here: https://github.com/filecoin-project/specs/blob/master/data-structures.md#rle-bitset-encoding
|
||||||
|
// and is described by the BNF Grammar:
|
||||||
|
//
|
||||||
|
// <encoding> ::= <header> <blocks>
|
||||||
|
// <header> ::= <version> <bit>
|
||||||
|
// <version> ::= "00"
|
||||||
|
// <blocks> ::= <block> <blocks> | ""
|
||||||
|
// <block> ::= <block_single> | <block_short> | <block_long>
|
||||||
|
// <block_single> ::= "1"
|
||||||
|
// <block_short> ::= "01" <bit> <bit> <bit> <bit>
|
||||||
|
// <block_long> ::= "00" <unsigned_varint>
|
||||||
|
// <bit> ::= "0" | "1"
|
||||||
|
//
|
||||||
|
// Filecoin specific:
|
||||||
|
// The encoding is returned as a []byte, each byte packed starting with the low-order bit (LSB0)
|
||||||
|
func Encode(ints []uint64) ([]byte, uint, error) {
|
||||||
|
v := bitvector.BitVector{BytePacking: bitvector.LSB0}
|
||||||
|
firstBit, runs := RunLengths(ints)
|
||||||
|
|
||||||
|
// Add version header
|
||||||
|
v.Extend(Version, 2, bitvector.LSB0)
|
||||||
|
|
||||||
|
v.Push(firstBit)
|
||||||
|
|
||||||
|
for _, run := range runs {
|
||||||
|
switch {
|
||||||
|
case run == 1:
|
||||||
|
v.Push(1)
|
||||||
|
case run < 16:
|
||||||
|
v.Push(0)
|
||||||
|
v.Push(1)
|
||||||
|
v.Extend(byte(run), 4, bitvector.LSB0)
|
||||||
|
case run >= 16:
|
||||||
|
v.Push(0)
|
||||||
|
v.Push(0)
|
||||||
|
// 10 bytes needed to encode MaxUint64
|
||||||
|
buf := make([]byte, 10)
|
||||||
|
numBytes := binary.PutUvarint(buf, run)
|
||||||
|
for i := 0; i < numBytes; i++ {
|
||||||
|
v.Extend(buf[i], 8, bitvector.LSB0)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return nil, 0, ErrRunLengthTooLarge
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return v.Buf, v.Len, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode returns integers represented by the given RLE+ encoding
|
||||||
|
//
|
||||||
|
// The length of the encoding is not specified. It is inferred by
|
||||||
|
// reading zeroes from the (possibly depleted) BitVector, by virtue
|
||||||
|
// of the behavior of BitVector.Take() returning 0 when the end of
|
||||||
|
// the BitVector has been reached. This has the downside of not
|
||||||
|
// being able to detect corrupt encodings.
|
||||||
|
//
|
||||||
|
// The passed []byte should be packed in LSB0 bit numbering
|
||||||
|
func Decode(buf []byte) (ints []uint64, err error) {
|
||||||
|
if len(buf) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
v := bitvector.NewBitVector(buf, bitvector.LSB0)
|
||||||
|
take := v.Iterator(bitvector.LSB0)
|
||||||
|
|
||||||
|
// Read version and check
|
||||||
|
// Version check
|
||||||
|
ver := take(2)
|
||||||
|
if ver != Version {
|
||||||
|
return nil, ErrWrongVersion
|
||||||
|
}
|
||||||
|
|
||||||
|
curIdx := uint64(0)
|
||||||
|
curBit := take(1)
|
||||||
|
var runLength int
|
||||||
|
done := false
|
||||||
|
|
||||||
|
for done == false {
|
||||||
|
y := take(1)
|
||||||
|
switch y {
|
||||||
|
case 1:
|
||||||
|
runLength = 1
|
||||||
|
case 0:
|
||||||
|
val := take(1)
|
||||||
|
|
||||||
|
if val == 1 {
|
||||||
|
// short block
|
||||||
|
runLength = int(take(4))
|
||||||
|
} else {
|
||||||
|
// long block
|
||||||
|
var buf []byte
|
||||||
|
for {
|
||||||
|
b := take(8)
|
||||||
|
buf = append(buf, b)
|
||||||
|
|
||||||
|
if b&0x80 == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// 10 bytes is required to store math.MaxUint64 in a uvarint
|
||||||
|
if len(buf) > 10 {
|
||||||
|
return nil, ErrDecode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x, _ := binary.Uvarint(buf)
|
||||||
|
|
||||||
|
if x == 0 {
|
||||||
|
done = true
|
||||||
|
}
|
||||||
|
runLength = int(x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if curBit == 1 {
|
||||||
|
for j := 0; j < runLength; j++ {
|
||||||
|
ints = append(ints, curIdx+uint64(j))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
curIdx += uint64(runLength)
|
||||||
|
curBit = 1 - curBit
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunLengths transforms integers into its bit-set-run-length representation.
|
||||||
|
//
|
||||||
|
// A set of unsigned integers { 0, 2, 4, 5, 6 } can be thought of as
|
||||||
|
// indices into a bitset { 1, 0, 1, 0, 1, 1, 1 } where bitset[index] == 1.
|
||||||
|
//
|
||||||
|
// The bit set run lengths of this set would then be { 1, 1, 1, 1, 3 },
|
||||||
|
// representing lengths of runs alternating between 1 and 0, starting
|
||||||
|
// with a first bit of 1.
|
||||||
|
//
|
||||||
|
// Duplicated numbers are ignored.
|
||||||
|
//
|
||||||
|
// This is a helper function for Encode()
|
||||||
|
func RunLengths(ints []uint64) (firstBit byte, runs []uint64) {
|
||||||
|
if len(ints) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort our incoming numbers
|
||||||
|
sort.Slice(ints, func(i, j int) bool { return ints[i] < ints[j] })
|
||||||
|
|
||||||
|
prev := ints[0]
|
||||||
|
|
||||||
|
// Initialize our return value
|
||||||
|
if prev == 0 {
|
||||||
|
firstBit = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if firstBit == 0 {
|
||||||
|
// first run of zeroes
|
||||||
|
runs = append(runs, prev)
|
||||||
|
}
|
||||||
|
runs = append(runs, 1)
|
||||||
|
|
||||||
|
for _, cur := range ints[1:] {
|
||||||
|
delta := cur - prev
|
||||||
|
switch {
|
||||||
|
case delta == 1:
|
||||||
|
runs[len(runs)-1]++
|
||||||
|
case delta > 1:
|
||||||
|
// add run of zeroes if there is a gap
|
||||||
|
runs = append(runs, delta-1)
|
||||||
|
runs = append(runs, 1)
|
||||||
|
default:
|
||||||
|
// repeated number?
|
||||||
|
}
|
||||||
|
prev = cur
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
183
rleplus_test.go
Normal file
183
rleplus_test.go
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
package rleplus_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"sort"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/filecoin-project/go-filecoin/rleplus"
|
||||||
|
"github.com/filecoin-project/go-filecoin/rleplus/internal"
|
||||||
|
tf "github.com/filecoin-project/go-filecoin/testhelpers/testflags"
|
||||||
|
"gotest.tools/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRleplus(t *testing.T) {
|
||||||
|
tf.UnitTest(t)
|
||||||
|
|
||||||
|
t.Run("Encode", func(t *testing.T) {
|
||||||
|
// Encode an intset
|
||||||
|
ints := []uint64{
|
||||||
|
// run of 1
|
||||||
|
0,
|
||||||
|
// gap of 1
|
||||||
|
// run of 1
|
||||||
|
2,
|
||||||
|
// gap of 1
|
||||||
|
// run of 3
|
||||||
|
4, 5, 6,
|
||||||
|
// gap of 4
|
||||||
|
// run of 17
|
||||||
|
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedBits := []byte{
|
||||||
|
0, 0, // version
|
||||||
|
1, // first bit
|
||||||
|
1, // run of 1
|
||||||
|
1, // gap of 1
|
||||||
|
1, // run of 1
|
||||||
|
1, // gap of 1
|
||||||
|
0, 1, 1, 1, 0, 0, // run of 3
|
||||||
|
0, 1, 0, 0, 1, 0, // gap of 4
|
||||||
|
|
||||||
|
// run of 17 < 0 0 (varint) >
|
||||||
|
0, 0,
|
||||||
|
1, 0, 0, 0, 1, 0, 0, 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
v := bitvector.BitVector{}
|
||||||
|
for _, bit := range expectedBits {
|
||||||
|
v.Push(bit)
|
||||||
|
}
|
||||||
|
actualBytes, _, err := rleplus.Encode(ints)
|
||||||
|
assert.NilError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, len(v.Buf), len(actualBytes))
|
||||||
|
for idx, expected := range v.Buf {
|
||||||
|
assert.Equal(
|
||||||
|
t,
|
||||||
|
fmt.Sprintf("%08b", expected),
|
||||||
|
fmt.Sprintf("%08b", actualBytes[idx]),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Encode allows all runs sizes possible uint64", func(t *testing.T) {
|
||||||
|
// create a run of math.MaxUint64
|
||||||
|
ints := []uint64{math.MaxUint64}
|
||||||
|
_, _, err := rleplus.Encode(ints)
|
||||||
|
assert.NilError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Decode", func(t *testing.T) {
|
||||||
|
testCases := [][]uint64{
|
||||||
|
{},
|
||||||
|
{1},
|
||||||
|
{0},
|
||||||
|
{0, 1, 2, 3},
|
||||||
|
{
|
||||||
|
// run of 1
|
||||||
|
0,
|
||||||
|
// gap of 1
|
||||||
|
// run of 1
|
||||||
|
2,
|
||||||
|
// gap of 1
|
||||||
|
// run of 3
|
||||||
|
4, 5, 6,
|
||||||
|
// gap of 4
|
||||||
|
// run of 17
|
||||||
|
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
encoded, _, err := rleplus.Encode(tc)
|
||||||
|
assert.NilError(t, err)
|
||||||
|
|
||||||
|
result, err := rleplus.Decode(encoded)
|
||||||
|
assert.NilError(t, err)
|
||||||
|
|
||||||
|
sort.Slice(tc, func(i, j int) bool { return tc[i] < tc[j] })
|
||||||
|
sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
|
||||||
|
|
||||||
|
assert.Equal(t, len(tc), len(result))
|
||||||
|
|
||||||
|
for idx, expected := range tc {
|
||||||
|
assert.Equal(t, expected, result[idx])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Decode version check", func(t *testing.T) {
|
||||||
|
_, err := rleplus.Decode([]byte{0xff})
|
||||||
|
assert.Error(t, err, "invalid RLE+ version")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Decode returns an error with a bad encoding", func(t *testing.T) {
|
||||||
|
// create an encoding with a buffer with a run which is too long
|
||||||
|
_, err := rleplus.Decode([]byte{0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff})
|
||||||
|
assert.Error(t, err, "invalid encoding for RLE+ version 0")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("outputs same as reference implementation", func(t *testing.T) {
|
||||||
|
// Encoding bitvec![LittleEndian; 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
||||||
|
// in the Rust reference implementation gives an encoding of [223, 145, 136, 0] (without version field)
|
||||||
|
// The bit vector is equivalent to the integer set { 0, 2, 4, 5, 6, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 }
|
||||||
|
|
||||||
|
// This is the above reference output with a version header "00" manually added
|
||||||
|
referenceEncoding := []byte{124, 71, 34, 2}
|
||||||
|
|
||||||
|
expectedNumbers := []uint64{0, 2, 4, 5, 6, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}
|
||||||
|
|
||||||
|
encoded, _, err := rleplus.Encode(expectedNumbers)
|
||||||
|
assert.NilError(t, err)
|
||||||
|
|
||||||
|
// Our encoded bytes are the same as the ref bytes
|
||||||
|
assert.Equal(t, len(referenceEncoding), len(encoded))
|
||||||
|
for idx, expected := range referenceEncoding {
|
||||||
|
assert.Equal(t, expected, encoded[idx])
|
||||||
|
}
|
||||||
|
|
||||||
|
decoded, err := rleplus.Decode(referenceEncoding)
|
||||||
|
assert.NilError(t, err)
|
||||||
|
|
||||||
|
// Our decoded integers are the same as expected
|
||||||
|
sort.Slice(decoded, func(i, j int) bool { return decoded[i] < decoded[j] })
|
||||||
|
assert.Equal(t, len(expectedNumbers), len(decoded))
|
||||||
|
for idx, expected := range expectedNumbers {
|
||||||
|
assert.Equal(t, expected, decoded[idx])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("RunLengths", func(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
ints []uint64
|
||||||
|
first byte
|
||||||
|
runs []uint64
|
||||||
|
}{
|
||||||
|
// empty
|
||||||
|
{},
|
||||||
|
|
||||||
|
// leading with ones
|
||||||
|
{[]uint64{0}, 1, []uint64{1}},
|
||||||
|
{[]uint64{0, 1}, 1, []uint64{2}},
|
||||||
|
{[]uint64{0, 0xffffffff, 0xffffffff + 1}, 1, []uint64{1, 0xffffffff - 1, 2}},
|
||||||
|
|
||||||
|
// leading with zeroes
|
||||||
|
{[]uint64{1}, 0, []uint64{1, 1}},
|
||||||
|
{[]uint64{2}, 0, []uint64{2, 1}},
|
||||||
|
{[]uint64{10, 11, 13, 20}, 0, []uint64{10, 2, 1, 1, 6, 1}},
|
||||||
|
{[]uint64{10, 11, 11, 13, 20, 10, 11, 13, 20}, 0, []uint64{10, 2, 1, 1, 6, 1}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
first, runs := rleplus.RunLengths(testCase.ints)
|
||||||
|
assert.Equal(t, testCase.first, first)
|
||||||
|
assert.Equal(t, len(testCase.runs), len(runs))
|
||||||
|
for idx, runLength := range testCase.runs {
|
||||||
|
assert.Equal(t, runLength, runs[idx])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user