Add lazy RLE+ decoding
License: MIT Signed-off-by: Jakub Sztandera <kubuxu@protonmail.ch>
This commit is contained in:
parent
bbc61a8f86
commit
c557aa206f
154
lib/rlepluslazy/internal/bitvector.go
Normal file
154
lib/rlepluslazy/internal/bitvector.go
Normal file
@ -0,0 +1,154 @@
|
||||
package bitvector
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrOutOfRange - the index passed is out of range for the BitVector
|
||||
ErrOutOfRange = errors.New("index out of range")
|
||||
)
|
||||
|
||||
// BitNumbering indicates the ordering of bits, either
|
||||
// least-significant bit in position 0, or most-significant bit
|
||||
// in position 0.
|
||||
//
|
||||
// It it used in 3 ways with BitVector:
|
||||
// 1. Ordering of bits within the Buf []byte structure
|
||||
// 2. What order to add bits when using Extend()
|
||||
// 3. What order to read bits when using Take()
|
||||
//
|
||||
// https://en.wikipedia.org/wiki/Bit_numbering
|
||||
type BitNumbering int
|
||||
|
||||
const (
|
||||
// LSB0 - bit ordering starts with the low-order bit
|
||||
LSB0 BitNumbering = iota
|
||||
|
||||
// MSB0 - bit ordering starts with the high-order bit
|
||||
MSB0
|
||||
)
|
||||
|
||||
// BitVector is used to manipulate ordered collections of bits
|
||||
type BitVector struct {
|
||||
Buf []byte
|
||||
|
||||
// BytePacking is the bit ordering within bytes
|
||||
BytePacking BitNumbering
|
||||
|
||||
// Len is the logical number of bits in the vector.
|
||||
// The last byte in Buf may have undefined bits if Len is not a multiple of 8
|
||||
Len uint
|
||||
}
|
||||
|
||||
// NewBitVector constructs a new BitVector from a slice of bytes.
|
||||
//
|
||||
// The bytePacking parameter is required to know how to interpret the bit ordering within the bytes.
|
||||
func NewBitVector(buf []byte, bytePacking BitNumbering) *BitVector {
|
||||
return &BitVector{
|
||||
BytePacking: bytePacking,
|
||||
Buf: buf,
|
||||
Len: uint(len(buf) * 8),
|
||||
}
|
||||
}
|
||||
|
||||
// Push adds a single bit to the BitVector.
|
||||
//
|
||||
// Although it takes a byte, only the low-order bit is used, so just use 0 or 1.
|
||||
func (v *BitVector) Push(val byte) {
|
||||
if v.Len%8 == 0 {
|
||||
v.Buf = append(v.Buf, 0)
|
||||
}
|
||||
lastIdx := v.Len / 8
|
||||
|
||||
switch v.BytePacking {
|
||||
case LSB0:
|
||||
v.Buf[lastIdx] |= (val & 1) << (v.Len % 8)
|
||||
default:
|
||||
v.Buf[lastIdx] |= (val & 1) << (7 - (v.Len % 8))
|
||||
}
|
||||
|
||||
v.Len++
|
||||
}
|
||||
|
||||
// Get returns a single bit as a byte -- either 0 or 1
|
||||
func (v *BitVector) Get(idx uint) (byte, error) {
|
||||
if idx >= v.Len {
|
||||
return 0, ErrOutOfRange
|
||||
}
|
||||
blockIdx := idx / 8
|
||||
|
||||
switch v.BytePacking {
|
||||
case LSB0:
|
||||
return v.Buf[blockIdx] >> (idx % 8) & 1, nil
|
||||
default:
|
||||
return v.Buf[blockIdx] >> (7 - idx%8) & 1, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Extend adds up to 8 bits to the receiver
|
||||
//
|
||||
// Given a byte b == 0b11010101
|
||||
// v.Extend(b, 4, LSB0) would add < 1, 0, 1, 0 >
|
||||
// v.Extend(b, 4, MSB0) would add < 1, 1, 0, 1 >
|
||||
//
|
||||
// Panics if count is out of range
|
||||
func (v *BitVector) Extend(val byte, count uint, order BitNumbering) {
|
||||
if count > 8 {
|
||||
log.Panicf("invalid count")
|
||||
}
|
||||
|
||||
for i := uint(0); i < count; i++ {
|
||||
switch order {
|
||||
case LSB0:
|
||||
v.Push((val >> i) & 1)
|
||||
default:
|
||||
v.Push((val >> (7 - i)) & 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Take reads up to 8 bits at the given index.
|
||||
//
|
||||
// Given a BitVector < 1, 1, 0, 1, 0, 1, 0, 1 >
|
||||
// v.Take(0, 4, LSB0) would return 0b00001011
|
||||
// v.Take(0, 4, MSB0) would return 0b11010000
|
||||
//
|
||||
// Panics if count is out of range
|
||||
func (v *BitVector) Take(index uint, count uint, order BitNumbering) (out byte) {
|
||||
if count > 8 {
|
||||
log.Panicf("invalid count")
|
||||
}
|
||||
|
||||
for i := uint(0); i < count; i++ {
|
||||
val, _ := v.Get(index + i)
|
||||
|
||||
switch order {
|
||||
case LSB0:
|
||||
out |= val << i
|
||||
default:
|
||||
out |= val << (7 - i)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Iterator returns a function, which when invoked, returns the number
|
||||
// of bits requested, and increments an internal cursor.
|
||||
//
|
||||
// When the end of the BitVector is reached, it returns zeroes indefinitely
|
||||
//
|
||||
// Panics if count is out of range
|
||||
func (v *BitVector) Iterator(order BitNumbering) func(uint) byte {
|
||||
cursor := uint(0)
|
||||
return func(count uint) (out byte) {
|
||||
if count > 8 {
|
||||
log.Panicf("invalid count")
|
||||
}
|
||||
|
||||
out = v.Take(cursor, count, order)
|
||||
cursor += count
|
||||
return
|
||||
}
|
||||
}
|
136
lib/rlepluslazy/internal/bitvector_test.go
Normal file
136
lib/rlepluslazy/internal/bitvector_test.go
Normal file
@ -0,0 +1,136 @@
|
||||
package bitvector_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
bitvector "github.com/filecoin-project/go-lotus/lib/rlepluslazy/internal"
|
||||
)
|
||||
|
||||
func TestBitVector(t *testing.T) {
|
||||
t.Run("zero value", func(t *testing.T) {
|
||||
var v bitvector.BitVector
|
||||
|
||||
assert.Equal(t, bitvector.LSB0, v.BytePacking)
|
||||
})
|
||||
|
||||
t.Run("Push", func(t *testing.T) {
|
||||
// MSB0 bit numbering
|
||||
v := bitvector.BitVector{BytePacking: bitvector.MSB0}
|
||||
v.Push(1)
|
||||
v.Push(0)
|
||||
v.Push(1)
|
||||
v.Push(1)
|
||||
|
||||
assert.Equal(t, byte(176), v.Buf[0])
|
||||
|
||||
// LSB0 bit numbering
|
||||
v = bitvector.BitVector{BytePacking: bitvector.LSB0}
|
||||
v.Push(1)
|
||||
v.Push(0)
|
||||
v.Push(1)
|
||||
v.Push(1)
|
||||
|
||||
assert.Equal(t, byte(13), v.Buf[0])
|
||||
})
|
||||
|
||||
t.Run("Get", func(t *testing.T) {
|
||||
bits := []byte{1, 0, 1, 1, 0, 0, 1, 0}
|
||||
|
||||
for _, numbering := range []bitvector.BitNumbering{bitvector.MSB0, bitvector.LSB0} {
|
||||
v := bitvector.BitVector{BytePacking: numbering}
|
||||
|
||||
for _, bit := range bits {
|
||||
v.Push(bit)
|
||||
}
|
||||
|
||||
for idx, expected := range bits {
|
||||
actual, _ := v.Get(uint(idx))
|
||||
assert.Equal(t, expected, actual)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Extend", func(t *testing.T) {
|
||||
val := byte(171) // 0b10101011
|
||||
|
||||
var v bitvector.BitVector
|
||||
|
||||
// MSB0 bit numbering
|
||||
v = bitvector.BitVector{}
|
||||
v.Extend(val, 4, bitvector.MSB0)
|
||||
assertBitVector(t, []byte{1, 0, 1, 0}, v)
|
||||
v.Extend(val, 5, bitvector.MSB0)
|
||||
assertBitVector(t, []byte{1, 0, 1, 0, 1, 0, 1, 0, 1}, v)
|
||||
|
||||
// LSB0 bit numbering
|
||||
v = bitvector.BitVector{}
|
||||
v.Extend(val, 4, bitvector.LSB0)
|
||||
assertBitVector(t, []byte{1, 1, 0, 1}, v)
|
||||
v.Extend(val, 5, bitvector.LSB0)
|
||||
assertBitVector(t, []byte{1, 1, 0, 1, 1, 1, 0, 1, 0}, v)
|
||||
})
|
||||
|
||||
t.Run("invalid counts to Take/Extend/Iterator cause panics", func(t *testing.T) {
|
||||
v := bitvector.BitVector{BytePacking: bitvector.LSB0}
|
||||
|
||||
assert.Panics(t, func() { v.Extend(0xff, 9, bitvector.LSB0) })
|
||||
|
||||
assert.Panics(t, func() { v.Take(0, 9, bitvector.LSB0) })
|
||||
|
||||
next := v.Iterator(bitvector.LSB0)
|
||||
assert.Panics(t, func() { next(9) })
|
||||
})
|
||||
|
||||
t.Run("Take", func(t *testing.T) {
|
||||
var v bitvector.BitVector
|
||||
|
||||
bits := []byte{1, 0, 1, 0, 1, 0, 1, 1}
|
||||
for _, bit := range bits {
|
||||
v.Push(bit)
|
||||
}
|
||||
|
||||
assert.Equal(t, byte(176), v.Take(4, 4, bitvector.MSB0))
|
||||
assert.Equal(t, byte(13), v.Take(4, 4, bitvector.LSB0))
|
||||
})
|
||||
|
||||
t.Run("Iterator", func(t *testing.T) {
|
||||
var buf []byte
|
||||
|
||||
// make a bitvector of 256 sample bits
|
||||
for i := 0; i < 32; i++ {
|
||||
buf = append(buf, 128+32)
|
||||
}
|
||||
|
||||
v := bitvector.NewBitVector(buf, bitvector.LSB0)
|
||||
|
||||
next := v.Iterator(bitvector.LSB0)
|
||||
|
||||
// compare to Get()
|
||||
for i := uint(0); i < v.Len; i++ {
|
||||
expected, _ := v.Get(i)
|
||||
assert.Equal(t, expected, next(1))
|
||||
}
|
||||
|
||||
// out of range should return zero
|
||||
assert.Equal(t, byte(0), next(1))
|
||||
assert.Equal(t, byte(0), next(8))
|
||||
|
||||
// compare to Take()
|
||||
next = v.Iterator(bitvector.LSB0)
|
||||
assert.Equal(t, next(5), v.Take(0, 5, bitvector.LSB0))
|
||||
assert.Equal(t, next(8), v.Take(5, 8, bitvector.LSB0))
|
||||
})
|
||||
}
|
||||
|
||||
// Note: When using this helper assertion, expectedBits should *only* be 0s and 1s.
|
||||
func assertBitVector(t *testing.T, expectedBits []byte, actual bitvector.BitVector) {
|
||||
assert.Equal(t, uint(len(expectedBits)), actual.Len)
|
||||
|
||||
for idx, bit := range expectedBits {
|
||||
actualBit, err := actual.Get(uint(idx))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, bit, actualBit)
|
||||
}
|
||||
}
|
110
lib/rlepluslazy/rleplus.go
Normal file
110
lib/rlepluslazy/rleplus.go
Normal file
@ -0,0 +1,110 @@
|
||||
package rlepluslazy
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
bitvector "github.com/filecoin-project/go-lotus/lib/rlepluslazy/internal"
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
const Version = 0
|
||||
|
||||
var (
|
||||
ErrWrongVersion = errors.New("invalid RLE+ version")
|
||||
ErrDecode = fmt.Errorf("invalid encoding for RLE+ version %d", Version)
|
||||
)
|
||||
|
||||
type RLE struct {
|
||||
vec *bitvector.BitVector
|
||||
}
|
||||
|
||||
func FromBuf(buf []byte) (*RLE, error) {
|
||||
rle := &RLE{vec: bitvector.NewBitVector(buf, bitvector.LSB0)}
|
||||
|
||||
if err := rle.check(); err != nil {
|
||||
return nil, xerrors.Errorf("could not create RLE+ for a buffer: %w", err)
|
||||
}
|
||||
return rle, nil
|
||||
}
|
||||
|
||||
func (rle *RLE) check() error {
|
||||
ver := rle.vec.Take(0, 2, bitvector.LSB0)
|
||||
if ver != Version {
|
||||
return ErrWrongVersion
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rle *RLE) Iterator() (*iterator, error) {
|
||||
vit := rle.vec.Iterator(bitvector.LSB0)
|
||||
vit(2) // Take version
|
||||
|
||||
it := &iterator{next: vit}
|
||||
if err := it.prep(vit(1)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return it, nil
|
||||
}
|
||||
|
||||
type iterator struct {
|
||||
next func(uint) byte
|
||||
|
||||
curIdx uint64
|
||||
rep uint64
|
||||
}
|
||||
|
||||
func (it *iterator) HasNext() bool {
|
||||
return it.rep != 0
|
||||
}
|
||||
|
||||
func (it *iterator) prep(curBit byte) error {
|
||||
|
||||
loop:
|
||||
for it.rep == 0 {
|
||||
x := it.next(1)
|
||||
switch x {
|
||||
case 1:
|
||||
it.rep = 1
|
||||
case 0:
|
||||
y := it.next(1)
|
||||
switch y {
|
||||
case 1:
|
||||
it.rep = uint64(it.next(4))
|
||||
case 0:
|
||||
var buf = make([]byte, 0, 10)
|
||||
for {
|
||||
b := it.next(8)
|
||||
buf = append(buf, b)
|
||||
if b&0x80 == 0 {
|
||||
break
|
||||
}
|
||||
if len(buf) > 10 {
|
||||
return xerrors.Errorf("run too long: %w", ErrDecode)
|
||||
}
|
||||
}
|
||||
it.rep, _ = binary.Uvarint(buf)
|
||||
}
|
||||
|
||||
// run with 0 length means end
|
||||
if it.rep == 0 {
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
if curBit == 0 {
|
||||
curBit = 1
|
||||
it.curIdx = it.curIdx + it.rep
|
||||
it.rep = 0
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (it *iterator) Next() (uint64, error) {
|
||||
it.rep--
|
||||
res := it.curIdx
|
||||
it.curIdx++
|
||||
return res, it.prep(0)
|
||||
}
|
44
lib/rlepluslazy/rleplus_test.go
Normal file
44
lib/rlepluslazy/rleplus_test.go
Normal file
@ -0,0 +1,44 @@
|
||||
package rlepluslazy
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/filecoin-project/go-lotus/extern/rleplus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestDecode(t *testing.T) {
|
||||
// Encoding bitvec![LittleEndian; 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
||||
// in the Rust reference implementation gives an encoding of [223, 145, 136, 0] (without version field)
|
||||
// The bit vector is equivalent to the integer set { 0, 2, 4, 5, 6, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 }
|
||||
|
||||
// This is the above reference output with a version header "00" manually added
|
||||
referenceEncoding := []byte{124, 71, 34, 2}
|
||||
|
||||
expectedNumbers := []uint64{0, 2, 4, 5, 6, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}
|
||||
|
||||
encoded, _, err := rleplus.Encode(expectedNumbers)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Our encoded bytes are the same as the ref bytes
|
||||
assert.Equal(t, len(referenceEncoding), len(encoded))
|
||||
for idx, expected := range referenceEncoding {
|
||||
assert.Equal(t, expected, encoded[idx])
|
||||
}
|
||||
|
||||
rle, err := FromBuf(referenceEncoding)
|
||||
assert.NoError(t, err)
|
||||
decoded := make([]uint64, 0, len(expectedNumbers))
|
||||
|
||||
it, err := rle.Iterator()
|
||||
assert.NoError(t, err)
|
||||
for it.HasNext() {
|
||||
bit, err := it.Next()
|
||||
assert.NoError(t, err)
|
||||
decoded = append(decoded, bit)
|
||||
}
|
||||
|
||||
// Our decoded integers are the same as expected
|
||||
assert.Equal(t, expectedNumbers, decoded)
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user