Optimize and start wrapping it up

License: MIT
Signed-off-by: Jakub Sztandera <kubuxu@protonmail.ch>
This commit is contained in:
Jakub Sztandera 2019-09-23 19:58:58 +02:00 committed by Jakub Sztandera
parent d3dfd8a73b
commit 01c0a6ec9f
No known key found for this signature in database
GPG Key ID: 9A9AF56F8B3879BA
5 changed files with 200 additions and 182 deletions

88
lib/rlepluslazy/bitvec.go Normal file
View File

@ -0,0 +1,88 @@
package rlepluslazy
type rbitvec struct {
index int
bits uint16
bitCap byte
vec []byte
}
func readBitvec(vec []byte) *rbitvec {
bv := &rbitvec{vec: vec}
if len(vec) > 0 {
bv.bits = uint16(bv.vec[0])
}
return bv
}
const (
minCap = 8
maxCap = 16
)
var bitMasks = [9]byte{
0x0,
0x1,
0x3,
0x7,
0xF,
0x1F,
0x3F,
0x7F,
0xFF,
}
func (bv *rbitvec) Get(count byte) byte {
res := byte(bv.bits) & bitMasks[count]
bv.bits = bv.bits >> count
bv.bitCap = bv.bitCap - count
if bv.index < len(bv.vec) {
bv.bits = bv.bits | uint16(bv.vec[bv.index])<<bv.bitCap
}
// Here be dragons
inc := (bv.bitCap - 8) >> 7 // inc == 1 iff bitcap<8 (+10% perf)
bv.index = bv.index + int(inc)
bv.bitCap = bv.bitCap + inc<<3
return res
}
func writeBitvec(buf []byte) *wbitvec {
return &wbitvec{buf: buf[:0]}
}
type wbitvec struct {
buf []byte
index int
bits uint16
bitCap byte
}
func (bv *wbitvec) Out() []byte {
if bv.bitCap != 0 {
bv.buf = append(bv.buf, 0)[:bv.index+1]
bv.buf[bv.index] = byte(bv.bits)
}
if bv.bitCap > 8 {
bv.buf = append(bv.buf, byte(bv.bitCap>>8))
}
return bv.buf
}
func (bv *wbitvec) Put(val byte, count byte) {
bv.bits = bv.bits | uint16(val)<<bv.bitCap
bv.bitCap = bv.bitCap + count
bv.buf = append(bv.buf, 0)[:bv.index+1] // increase cap, keep len
bv.buf[bv.index] = byte(bv.bits)
// Warning, dragons again
inc := (^(bv.bitCap - 8)) >> 7 // inc == 1 iff bitcap>=8
bv.index = bv.index + int(inc)
bv.bitCap = bv.bitCap - inc<<3
bv.bits = bv.bits >> (inc << 3)
}

View File

@ -1,11 +1,9 @@
package rlepluslazy
import (
"encoding/binary"
"errors"
"fmt"
bitvector "github.com/filecoin-project/go-lotus/lib/rlepluslazy/internal"
"golang.org/x/xerrors"
)
@ -17,155 +15,34 @@ var (
)
type RLE struct {
vec *bitvector.BitVector
buf []byte
changes []change
}
type change struct {
set bool
index uint64
}
func FromBuf(buf []byte) (*RLE, error) {
rle := &RLE{vec: bitvector.NewBitVector(buf, bitvector.LSB0)}
rle := &RLE{buf: buf}
if err := rle.check(); err != nil {
return nil, xerrors.Errorf("could not create RLE+ for a buffer: %w", err)
if len(buf) > 0 && buf[0]&3 != Version {
return nil, xerrors.Errorf("could not create RLE+ for a buffer: %w", ErrWrongVersion)
}
return rle, nil
}
func (rle *RLE) check() error {
ver := rle.vec.Take(0, 2, bitvector.LSB0)
if ver != Version {
return ErrWrongVersion
}
return nil
}
func (rle *RLE) RunIterator() (RunIterator, error) {
vit := rle.vec.Iterator(bitvector.LSB0)
vit(2) // Take version
it := &rleIterator{next: vit}
// next run is previous in relation to prep
// so we invert the value
it.nextRun.Val = vit(1) != 1
if err := it.prep(); err != nil {
return nil, err
}
return it, nil
return DecodeRLE(rle.buf)
}
type rleIterator struct {
next func(uint) byte
nextRun Run
func (rle *RLE) Set(index uint64) {
rle.changes = append(rle.changes, change{set: true, index: index})
}
func (it *rleIterator) HasNext() bool {
return it.nextRun.Valid()
}
func (it *rleIterator) NextRun() (Run, error) {
ret := it.nextRun
return ret, it.prep()
}
func (it *rleIterator) prep() error {
x := it.next(1)
switch x {
case 1:
it.nextRun.Len = 1
case 0:
y := it.next(1)
switch y {
case 1:
it.nextRun.Len = uint64(it.next(4))
case 0:
var buf = make([]byte, 0, 10)
for {
b := it.next(8)
buf = append(buf, b)
if b&0x80 == 0 {
break
}
if len(buf) > 10 {
return xerrors.Errorf("run too long: %w", ErrDecode)
}
}
it.nextRun.Len, _ = binary.Uvarint(buf)
}
}
it.nextRun.Val = !it.nextRun.Val
return nil
}
func (rle *RLE) Iterator() (*iterator, error) {
vit := rle.vec.Iterator(bitvector.LSB0)
vit(2) // Take version
it := &iterator{next: vit}
if err := it.prep(vit(1)); err != nil {
return nil, err
}
return it, nil
}
type iterator struct {
next func(uint) byte
curIdx uint64
rep uint64
}
func (it *iterator) HasNext() bool {
return it.rep != 0
}
func (it *iterator) prep(curBit byte) error {
loop:
for it.rep == 0 {
x := it.next(1)
switch x {
case 1:
it.rep = 1
case 0:
y := it.next(1)
switch y {
case 1:
it.rep = uint64(it.next(4))
case 0:
var buf = make([]byte, 0, 10)
for {
b := it.next(8)
buf = append(buf, b)
if b&0x80 == 0 {
break
}
if len(buf) > 10 {
return xerrors.Errorf("run too long: %w", ErrDecode)
}
}
it.rep, _ = binary.Uvarint(buf)
}
// run with 0 length means end
if it.rep == 0 {
break loop
}
}
if curBit == 0 {
curBit = 1
it.curIdx = it.curIdx + it.rep
it.rep = 0
}
}
return nil
}
func (it *iterator) Next() (uint64, error) {
it.rep--
res := it.curIdx
it.curIdx++
return res, it.prep(0)
func (rle *RLE) Clear(index uint64) {
rle.changes = append(rle.changes, change{set: false, index: index})
}

View File

@ -0,0 +1,73 @@
package rlepluslazy
import (
"encoding/binary"
"golang.org/x/xerrors"
)
func DecodeRLE(buf []byte) (RunIterator, error) {
bv := readBitvec(buf)
ver := bv.Get(2) // Read version
if ver != Version {
return nil, ErrWrongVersion
}
it := &rleIterator{bv: bv}
// next run is previous in relation to prep
// so we invert the value
it.nextRun.Val = bv.Get(1) != 1
if err := it.prep(); err != nil {
return nil, err
}
return it, nil
}
type rleIterator struct {
bv *rbitvec
nextRun Run
}
func (it *rleIterator) HasNext() bool {
return it.nextRun.Valid()
}
func (it *rleIterator) NextRun() (Run, error) {
ret := it.nextRun
return ret, it.prep()
}
func (it *rleIterator) prep() error {
x := it.bv.Get(1)
switch x {
case 1:
it.nextRun.Len = 1
case 0:
y := it.bv.Get(1)
switch y {
case 1:
it.nextRun.Len = uint64(it.bv.Get(4))
case 0:
var buf = make([]byte, 0, 10)
for {
b := it.bv.Get(8)
buf = append(buf, b)
if b&0x80 == 0 {
break
}
if len(buf) > 10 {
return xerrors.Errorf("run too long: %w", ErrDecode)
}
}
it.nextRun.Len, _ = binary.Uvarint(buf)
}
}
it.nextRun.Val = !it.nextRun.Val
return nil
}

View File

@ -27,7 +27,7 @@ func TestDecode(t *testing.T) {
assert.Equal(t, len(referenceEncoding), len(encoded))
assert.Equal(t, referenceEncoding, encoded)
rle, err := FromBuf(referenceEncoding)
rle, err := FromBuf(encoded)
assert.NoError(t, err)
decoded := make([]uint64, 0, len(expectedNumbers))
@ -105,22 +105,6 @@ func TestGoldenLoop(t *testing.T) {
var Res uint64 = 0
func BenchmarkIterator(b *testing.B) {
b.ReportAllocs()
var r uint64
for i := 0; i < b.N; i++ {
rle, _ := FromBuf(goldenRLE)
it, _ := rle.Iterator()
for it.HasNext() {
bit, _ := it.Next()
if bit < 1<<63 {
r++
}
}
}
Res = Res + r
}
func BenchmarkRunIterator(b *testing.B) {
b.ReportAllocs()
var r uint64
@ -167,20 +151,20 @@ func BenchmarkOldRLE(b *testing.B) {
func BenchmarkDecodeEncode(b *testing.B) {
b.ReportAllocs()
var r uint64
out := make([]byte, 0, len(goldenRLE))
for i := 0; i < b.N; i++ {
rle, _ := FromBuf(goldenRLE)
rit, _ := rle.RunIterator()
out, _ = EncodeRuns(rit, out)
r = r + uint64(len(out))
}
/*
out := make([]byte, 0, len(goldenRLE))
for i := 0; i < b.N; i++ {
rle, _ := rleplus.Decode(goldenRLE)
out, _, _ := rleplus.Encode(rle)
rle, _ := FromBuf(goldenRLE)
rit, _ := rle.RunIterator()
out, _ = EncodeRuns(rit, out)
r = r + uint64(len(out))
}
*/
for i := 0; i < b.N; i++ {
rle, _ := rleplus.Decode(goldenRLE)
out, _, _ := rleplus.Encode(rle)
r = r + uint64(len(out))
}
Res = Res + r
}

View File

@ -2,13 +2,11 @@ package rlepluslazy
import (
"encoding/binary"
bitvector "github.com/filecoin-project/go-lotus/lib/rlepluslazy/internal"
)
func EncodeRuns(rit RunIterator, buf []byte) ([]byte, error) {
v := bitvector.NewBitVector(buf[:0], bitvector.LSB0)
v.Extend(0, 2, bitvector.LSB0) // Version
bv := writeBitvec(buf)
bv.Put(0, 2)
first := true
varBuf := make([]byte, binary.MaxVarintLen64)
@ -21,35 +19,33 @@ func EncodeRuns(rit RunIterator, buf []byte) ([]byte, error) {
if first {
if run.Val {
v.Push(1)
bv.Put(1, 1)
} else {
v.Push(0)
bv.Put(0, 1)
}
first = false
}
switch {
case run.Len == 1:
v.Push(1)
bv.Put(1, 1)
case run.Len < 16:
v.Push(0)
v.Push(1)
v.Extend(byte(run.Len), 4, bitvector.LSB0)
bv.Put(2, 2)
bv.Put(byte(run.Len), 4)
case run.Len >= 16:
v.Push(0)
v.Push(0)
bv.Put(0, 2)
numBytes := binary.PutUvarint(varBuf, run.Len)
for i := 0; i < numBytes; i++ {
v.Extend(varBuf[i], 8, bitvector.LSB0)
bv.Put(varBuf[i], 8)
}
}
}
if first {
v.Push(0)
bv.Put(0, 1)
}
return v.Buf, nil
return bv.Out(), nil
}