Optimize and start wrapping it up
License: MIT Signed-off-by: Jakub Sztandera <kubuxu@protonmail.ch>
This commit is contained in:
parent
d3dfd8a73b
commit
01c0a6ec9f
88
lib/rlepluslazy/bitvec.go
Normal file
88
lib/rlepluslazy/bitvec.go
Normal file
@ -0,0 +1,88 @@
|
||||
package rlepluslazy
|
||||
|
||||
type rbitvec struct {
|
||||
index int
|
||||
|
||||
bits uint16
|
||||
bitCap byte
|
||||
|
||||
vec []byte
|
||||
}
|
||||
|
||||
func readBitvec(vec []byte) *rbitvec {
|
||||
bv := &rbitvec{vec: vec}
|
||||
if len(vec) > 0 {
|
||||
bv.bits = uint16(bv.vec[0])
|
||||
}
|
||||
return bv
|
||||
}
|
||||
|
||||
const (
|
||||
minCap = 8
|
||||
maxCap = 16
|
||||
)
|
||||
|
||||
var bitMasks = [9]byte{
|
||||
0x0,
|
||||
0x1,
|
||||
0x3,
|
||||
0x7,
|
||||
0xF,
|
||||
0x1F,
|
||||
0x3F,
|
||||
0x7F,
|
||||
0xFF,
|
||||
}
|
||||
|
||||
func (bv *rbitvec) Get(count byte) byte {
|
||||
res := byte(bv.bits) & bitMasks[count]
|
||||
bv.bits = bv.bits >> count
|
||||
bv.bitCap = bv.bitCap - count
|
||||
|
||||
if bv.index < len(bv.vec) {
|
||||
bv.bits = bv.bits | uint16(bv.vec[bv.index])<<bv.bitCap
|
||||
}
|
||||
// Here be dragons
|
||||
inc := (bv.bitCap - 8) >> 7 // inc == 1 iff bitcap<8 (+10% perf)
|
||||
bv.index = bv.index + int(inc)
|
||||
bv.bitCap = bv.bitCap + inc<<3
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func writeBitvec(buf []byte) *wbitvec {
|
||||
return &wbitvec{buf: buf[:0]}
|
||||
}
|
||||
|
||||
type wbitvec struct {
|
||||
buf []byte
|
||||
index int
|
||||
|
||||
bits uint16
|
||||
bitCap byte
|
||||
}
|
||||
|
||||
func (bv *wbitvec) Out() []byte {
|
||||
if bv.bitCap != 0 {
|
||||
bv.buf = append(bv.buf, 0)[:bv.index+1]
|
||||
bv.buf[bv.index] = byte(bv.bits)
|
||||
}
|
||||
if bv.bitCap > 8 {
|
||||
bv.buf = append(bv.buf, byte(bv.bitCap>>8))
|
||||
}
|
||||
return bv.buf
|
||||
}
|
||||
|
||||
func (bv *wbitvec) Put(val byte, count byte) {
|
||||
bv.bits = bv.bits | uint16(val)<<bv.bitCap
|
||||
bv.bitCap = bv.bitCap + count
|
||||
|
||||
bv.buf = append(bv.buf, 0)[:bv.index+1] // increase cap, keep len
|
||||
bv.buf[bv.index] = byte(bv.bits)
|
||||
|
||||
// Warning, dragons again
|
||||
inc := (^(bv.bitCap - 8)) >> 7 // inc == 1 iff bitcap>=8
|
||||
bv.index = bv.index + int(inc)
|
||||
bv.bitCap = bv.bitCap - inc<<3
|
||||
bv.bits = bv.bits >> (inc << 3)
|
||||
}
|
@ -1,11 +1,9 @@
|
||||
package rlepluslazy
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
bitvector "github.com/filecoin-project/go-lotus/lib/rlepluslazy/internal"
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
@ -17,155 +15,34 @@ var (
|
||||
)
|
||||
|
||||
type RLE struct {
|
||||
vec *bitvector.BitVector
|
||||
buf []byte
|
||||
|
||||
changes []change
|
||||
}
|
||||
|
||||
type change struct {
|
||||
set bool
|
||||
index uint64
|
||||
}
|
||||
|
||||
func FromBuf(buf []byte) (*RLE, error) {
|
||||
rle := &RLE{vec: bitvector.NewBitVector(buf, bitvector.LSB0)}
|
||||
rle := &RLE{buf: buf}
|
||||
|
||||
if err := rle.check(); err != nil {
|
||||
return nil, xerrors.Errorf("could not create RLE+ for a buffer: %w", err)
|
||||
if len(buf) > 0 && buf[0]&3 != Version {
|
||||
return nil, xerrors.Errorf("could not create RLE+ for a buffer: %w", ErrWrongVersion)
|
||||
}
|
||||
|
||||
return rle, nil
|
||||
}
|
||||
|
||||
func (rle *RLE) check() error {
|
||||
ver := rle.vec.Take(0, 2, bitvector.LSB0)
|
||||
if ver != Version {
|
||||
return ErrWrongVersion
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rle *RLE) RunIterator() (RunIterator, error) {
|
||||
vit := rle.vec.Iterator(bitvector.LSB0)
|
||||
vit(2) // Take version
|
||||
|
||||
it := &rleIterator{next: vit}
|
||||
// next run is previous in relation to prep
|
||||
// so we invert the value
|
||||
it.nextRun.Val = vit(1) != 1
|
||||
if err := it.prep(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return it, nil
|
||||
return DecodeRLE(rle.buf)
|
||||
}
|
||||
|
||||
type rleIterator struct {
|
||||
next func(uint) byte
|
||||
|
||||
nextRun Run
|
||||
func (rle *RLE) Set(index uint64) {
|
||||
rle.changes = append(rle.changes, change{set: true, index: index})
|
||||
}
|
||||
|
||||
func (it *rleIterator) HasNext() bool {
|
||||
return it.nextRun.Valid()
|
||||
}
|
||||
|
||||
func (it *rleIterator) NextRun() (Run, error) {
|
||||
ret := it.nextRun
|
||||
return ret, it.prep()
|
||||
}
|
||||
|
||||
func (it *rleIterator) prep() error {
|
||||
x := it.next(1)
|
||||
|
||||
switch x {
|
||||
case 1:
|
||||
it.nextRun.Len = 1
|
||||
|
||||
case 0:
|
||||
y := it.next(1)
|
||||
switch y {
|
||||
case 1:
|
||||
it.nextRun.Len = uint64(it.next(4))
|
||||
case 0:
|
||||
var buf = make([]byte, 0, 10)
|
||||
for {
|
||||
b := it.next(8)
|
||||
buf = append(buf, b)
|
||||
if b&0x80 == 0 {
|
||||
break
|
||||
}
|
||||
if len(buf) > 10 {
|
||||
return xerrors.Errorf("run too long: %w", ErrDecode)
|
||||
}
|
||||
}
|
||||
it.nextRun.Len, _ = binary.Uvarint(buf)
|
||||
}
|
||||
}
|
||||
|
||||
it.nextRun.Val = !it.nextRun.Val
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rle *RLE) Iterator() (*iterator, error) {
|
||||
vit := rle.vec.Iterator(bitvector.LSB0)
|
||||
vit(2) // Take version
|
||||
|
||||
it := &iterator{next: vit}
|
||||
if err := it.prep(vit(1)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return it, nil
|
||||
}
|
||||
|
||||
type iterator struct {
|
||||
next func(uint) byte
|
||||
|
||||
curIdx uint64
|
||||
rep uint64
|
||||
}
|
||||
|
||||
func (it *iterator) HasNext() bool {
|
||||
return it.rep != 0
|
||||
}
|
||||
|
||||
func (it *iterator) prep(curBit byte) error {
|
||||
|
||||
loop:
|
||||
for it.rep == 0 {
|
||||
x := it.next(1)
|
||||
switch x {
|
||||
case 1:
|
||||
it.rep = 1
|
||||
case 0:
|
||||
y := it.next(1)
|
||||
switch y {
|
||||
case 1:
|
||||
it.rep = uint64(it.next(4))
|
||||
case 0:
|
||||
var buf = make([]byte, 0, 10)
|
||||
for {
|
||||
b := it.next(8)
|
||||
buf = append(buf, b)
|
||||
if b&0x80 == 0 {
|
||||
break
|
||||
}
|
||||
if len(buf) > 10 {
|
||||
return xerrors.Errorf("run too long: %w", ErrDecode)
|
||||
}
|
||||
}
|
||||
it.rep, _ = binary.Uvarint(buf)
|
||||
}
|
||||
|
||||
// run with 0 length means end
|
||||
if it.rep == 0 {
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
if curBit == 0 {
|
||||
curBit = 1
|
||||
it.curIdx = it.curIdx + it.rep
|
||||
it.rep = 0
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (it *iterator) Next() (uint64, error) {
|
||||
it.rep--
|
||||
res := it.curIdx
|
||||
it.curIdx++
|
||||
return res, it.prep(0)
|
||||
func (rle *RLE) Clear(index uint64) {
|
||||
rle.changes = append(rle.changes, change{set: false, index: index})
|
||||
}
|
||||
|
73
lib/rlepluslazy/rleplus_reader.go
Normal file
73
lib/rlepluslazy/rleplus_reader.go
Normal file
@ -0,0 +1,73 @@
|
||||
package rlepluslazy
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
func DecodeRLE(buf []byte) (RunIterator, error) {
|
||||
bv := readBitvec(buf)
|
||||
|
||||
ver := bv.Get(2) // Read version
|
||||
if ver != Version {
|
||||
return nil, ErrWrongVersion
|
||||
}
|
||||
|
||||
it := &rleIterator{bv: bv}
|
||||
|
||||
// next run is previous in relation to prep
|
||||
// so we invert the value
|
||||
it.nextRun.Val = bv.Get(1) != 1
|
||||
if err := it.prep(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return it, nil
|
||||
}
|
||||
|
||||
type rleIterator struct {
|
||||
bv *rbitvec
|
||||
|
||||
nextRun Run
|
||||
}
|
||||
|
||||
func (it *rleIterator) HasNext() bool {
|
||||
return it.nextRun.Valid()
|
||||
}
|
||||
|
||||
func (it *rleIterator) NextRun() (Run, error) {
|
||||
ret := it.nextRun
|
||||
return ret, it.prep()
|
||||
}
|
||||
|
||||
func (it *rleIterator) prep() error {
|
||||
x := it.bv.Get(1)
|
||||
|
||||
switch x {
|
||||
case 1:
|
||||
it.nextRun.Len = 1
|
||||
|
||||
case 0:
|
||||
y := it.bv.Get(1)
|
||||
switch y {
|
||||
case 1:
|
||||
it.nextRun.Len = uint64(it.bv.Get(4))
|
||||
case 0:
|
||||
var buf = make([]byte, 0, 10)
|
||||
for {
|
||||
b := it.bv.Get(8)
|
||||
buf = append(buf, b)
|
||||
if b&0x80 == 0 {
|
||||
break
|
||||
}
|
||||
if len(buf) > 10 {
|
||||
return xerrors.Errorf("run too long: %w", ErrDecode)
|
||||
}
|
||||
}
|
||||
it.nextRun.Len, _ = binary.Uvarint(buf)
|
||||
}
|
||||
}
|
||||
|
||||
it.nextRun.Val = !it.nextRun.Val
|
||||
return nil
|
||||
}
|
@ -27,7 +27,7 @@ func TestDecode(t *testing.T) {
|
||||
assert.Equal(t, len(referenceEncoding), len(encoded))
|
||||
assert.Equal(t, referenceEncoding, encoded)
|
||||
|
||||
rle, err := FromBuf(referenceEncoding)
|
||||
rle, err := FromBuf(encoded)
|
||||
assert.NoError(t, err)
|
||||
decoded := make([]uint64, 0, len(expectedNumbers))
|
||||
|
||||
@ -105,22 +105,6 @@ func TestGoldenLoop(t *testing.T) {
|
||||
|
||||
var Res uint64 = 0
|
||||
|
||||
func BenchmarkIterator(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
var r uint64
|
||||
for i := 0; i < b.N; i++ {
|
||||
rle, _ := FromBuf(goldenRLE)
|
||||
it, _ := rle.Iterator()
|
||||
for it.HasNext() {
|
||||
bit, _ := it.Next()
|
||||
if bit < 1<<63 {
|
||||
r++
|
||||
}
|
||||
}
|
||||
}
|
||||
Res = Res + r
|
||||
}
|
||||
|
||||
func BenchmarkRunIterator(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
var r uint64
|
||||
@ -167,20 +151,20 @@ func BenchmarkOldRLE(b *testing.B) {
|
||||
func BenchmarkDecodeEncode(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
var r uint64
|
||||
out := make([]byte, 0, len(goldenRLE))
|
||||
for i := 0; i < b.N; i++ {
|
||||
rle, _ := FromBuf(goldenRLE)
|
||||
rit, _ := rle.RunIterator()
|
||||
out, _ = EncodeRuns(rit, out)
|
||||
r = r + uint64(len(out))
|
||||
}
|
||||
|
||||
/*
|
||||
out := make([]byte, 0, len(goldenRLE))
|
||||
for i := 0; i < b.N; i++ {
|
||||
rle, _ := rleplus.Decode(goldenRLE)
|
||||
out, _, _ := rleplus.Encode(rle)
|
||||
rle, _ := FromBuf(goldenRLE)
|
||||
rit, _ := rle.RunIterator()
|
||||
out, _ = EncodeRuns(rit, out)
|
||||
r = r + uint64(len(out))
|
||||
}
|
||||
*/
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
rle, _ := rleplus.Decode(goldenRLE)
|
||||
out, _, _ := rleplus.Encode(rle)
|
||||
r = r + uint64(len(out))
|
||||
}
|
||||
Res = Res + r
|
||||
}
|
||||
|
@ -2,13 +2,11 @@ package rlepluslazy
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
|
||||
bitvector "github.com/filecoin-project/go-lotus/lib/rlepluslazy/internal"
|
||||
)
|
||||
|
||||
func EncodeRuns(rit RunIterator, buf []byte) ([]byte, error) {
|
||||
v := bitvector.NewBitVector(buf[:0], bitvector.LSB0)
|
||||
v.Extend(0, 2, bitvector.LSB0) // Version
|
||||
bv := writeBitvec(buf)
|
||||
bv.Put(0, 2)
|
||||
|
||||
first := true
|
||||
varBuf := make([]byte, binary.MaxVarintLen64)
|
||||
@ -21,35 +19,33 @@ func EncodeRuns(rit RunIterator, buf []byte) ([]byte, error) {
|
||||
|
||||
if first {
|
||||
if run.Val {
|
||||
v.Push(1)
|
||||
bv.Put(1, 1)
|
||||
} else {
|
||||
v.Push(0)
|
||||
bv.Put(0, 1)
|
||||
}
|
||||
first = false
|
||||
}
|
||||
|
||||
switch {
|
||||
case run.Len == 1:
|
||||
v.Push(1)
|
||||
bv.Put(1, 1)
|
||||
case run.Len < 16:
|
||||
v.Push(0)
|
||||
v.Push(1)
|
||||
v.Extend(byte(run.Len), 4, bitvector.LSB0)
|
||||
bv.Put(2, 2)
|
||||
bv.Put(byte(run.Len), 4)
|
||||
case run.Len >= 16:
|
||||
v.Push(0)
|
||||
v.Push(0)
|
||||
bv.Put(0, 2)
|
||||
numBytes := binary.PutUvarint(varBuf, run.Len)
|
||||
for i := 0; i < numBytes; i++ {
|
||||
v.Extend(varBuf[i], 8, bitvector.LSB0)
|
||||
bv.Put(varBuf[i], 8)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if first {
|
||||
v.Push(0)
|
||||
bv.Put(0, 1)
|
||||
}
|
||||
|
||||
return v.Buf, nil
|
||||
return bv.Out(), nil
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user