fixes for issues uncovered in integration

This commit is contained in:
Ian Norden 2019-05-17 10:23:39 -05:00
parent b83c0371d9
commit 34393ffb3f
290 changed files with 108690 additions and 893 deletions

135
Gopkg.lock generated
View File

@ -13,6 +13,14 @@
pruneopts = "UT"
revision = "65cc252bf6691cb3c7014bcb2c8dc29de91e3a7e"
[[projects]]
branch = "master"
digest = "1:6716c9fe6333591128e72848f246fc01dc72240e1e64185d8b4e124e7280b35d"
name = "github.com/AndreasBriese/bbloom"
packages = ["."]
pruneopts = "UT"
revision = "e2d15f34fcf99d5dbb871c820ec73f710fca9815"
[[projects]]
branch = "master"
digest = "1:482fe066e308f0058abdfa302b9b5ff0fa4e89f6c55f103a2ac5e0af329f72cd"
@ -120,6 +128,37 @@
revision = "cbaa98ba5575e67703b32b4b19f73c91f3c4159e"
version = "v1.7.1"
[[projects]]
digest = "1:5f5090f05382959db941fa45acbeb7f4c5241aa8ac0f8f4393dec696e5953f53"
name = "github.com/dgraph-io/badger"
packages = [
".",
"options",
"protos",
"skl",
"table",
"y",
]
pruneopts = "UT"
revision = "99233d725dbdd26d156c61b2f42ae1671b794656"
version = "gx/v1.5.4"
[[projects]]
branch = "master"
digest = "1:6e8109ce247a59ab1eeb5330166c12735f6590de99c9647b6162d11518d32c9a"
name = "github.com/dgryski/go-farm"
packages = ["."]
pruneopts = "UT"
revision = "6a90982ecee230ff6cba02d5bd386acc030be9d3"
[[projects]]
digest = "1:6f9339c912bbdda81302633ad7e99a28dfa5a639c864061f1929510a9a64aa74"
name = "github.com/dustin/go-humanize"
packages = ["."]
pruneopts = "UT"
revision = "9f541cc9db5d55bce703bd99987c9d5cb8eea45e"
version = "v1.0.0"
[[projects]]
digest = "1:edb569dd02419a41ddd98768cc0e7aec922ef19dae139731e5ca750afcf6f4c5"
name = "github.com/edsrzf/mmap-go"
@ -129,8 +168,8 @@
version = "v1.0.0"
[[projects]]
branch = "rpc_statediffs_at_head"
digest = "1:02b56bb807b0b602f7d64b786c3ad5277f0ee2dc841738904b0bd14576f4d9ed"
branch = "rpc_statediffing"
digest = "1:134065ee8e48c2543ac07aa4db259518cd125725c176ebf8262c3abc27daa227"
name = "github.com/ethereum/go-ethereum"
packages = [
".",
@ -192,7 +231,7 @@
"trie",
]
pruneopts = "T"
revision = "edf001e1d2296951e7e592c55e66ce074bd62807"
revision = "3018a1b5a4e2e4153874087fa4d8f3597ee4a17c"
source = "github.com/vulcanize/go-ethereum"
[[projects]]
@ -469,6 +508,30 @@
revision = "aa9190c18f1576be98e974359fd08c64ca0b5a94"
version = "v0.0.5"
[[projects]]
digest = "1:8270de0224f4c8ef01e23463a6c6f2a5026a2d3ccf3f2e3145ffcd67d7b9a62c"
name = "github.com/ipfs/go-ds-badger"
packages = ["."]
pruneopts = "UT"
revision = "7fe0af0808f565d460fa8d3851a5808d77f72628"
version = "v0.0.3"
[[projects]]
digest = "1:f896dc92ae70c70f57ac8d47c0aa3e9fe185afcc35ee807975a621766ee6028f"
name = "github.com/ipfs/go-ds-flatfs"
packages = ["."]
pruneopts = "UT"
revision = "d5e3c1fa14d2fcc187a4a996eea3f48de9d7a5cd"
version = "v0.0.2"
[[projects]]
digest = "1:ab70bd10c780d127a66393a14061ae69ae0145027e7207b7c43db68524f3f64a"
name = "github.com/ipfs/go-ds-leveldb"
packages = ["."]
pruneopts = "UT"
revision = "47a9627082eeb3e52570a75eb4fdfaff8b2f19a9"
version = "v0.0.2"
[[projects]]
digest = "1:afbc88b3730097cd76ea72695941270547a5b3ed00d870ee0612897ac9943d79"
name = "github.com/ipfs/go-ds-measure"
@ -486,11 +549,12 @@
version = "v0.0.1"
[[projects]]
digest = "1:15f5e953da8605b4edc4e2d5bae64680fe31a8c8da066bcfea4fb87112c8187c"
digest = "1:7b3d464292c42bb5d6e816688aaeb29195ce99df4fbd207e44db63fc38af859d"
name = "github.com/ipfs/go-ipfs"
packages = [
".",
"core",
"core/coredag",
"dagutils",
"exchange/reprovide",
"filestore",
@ -502,6 +566,12 @@
"p2p",
"pin",
"pin/internal/pb",
"plugin",
"plugin/loader",
"plugin/plugins/badgerds",
"plugin/plugins/flatfs",
"plugin/plugins/git",
"plugin/plugins/levelds",
"provider",
"repo",
"repo/common",
@ -760,6 +830,14 @@
revision = "e6e9ea4d16a85d09cafb4dace15b978e984fa672"
version = "v0.0.1"
[[projects]]
digest = "1:fe257dab08c7455ab2afb1836d9933f09719d53e16c61df9a2e23316798ccfc2"
name = "github.com/ipfs/go-ipld-git"
packages = ["."]
pruneopts = "UT"
revision = "ee620e932c0cf00124e7c1c6f434eb89530caaf8"
version = "v0.0.2"
[[projects]]
digest = "1:4638b57014e4a204350087e3a2d5631f8aaa197bb6af688ca6e280457a7a46fa"
name = "github.com/ipfs/go-ipns"
@ -1531,6 +1609,28 @@
revision = "fa473d140ef3c6adf42d6b391fe76707f1f243c8"
version = "v1.0.0"
[[projects]]
branch = "master"
digest = "1:ae08d850ba158ea3ba4a7bb90f8372608172d8920644e5a6693b940a1f4e5d01"
name = "github.com/mmcloughlin/avo"
packages = [
"attr",
"build",
"buildtags",
"gotypes",
"internal/prnt",
"internal/stack",
"ir",
"operand",
"pass",
"printer",
"reg",
"src",
"x86",
]
pruneopts = "UT"
revision = "83fbad1a6b3cba8ac7711170e57953fd12cdc40a"
[[projects]]
digest = "1:cf5b7fbff2c87cff6c0e11f87b30edc21abc6592e6a76f41003ca6d5a712cf48"
name = "github.com/mr-tron/base58"
@ -1861,7 +1961,7 @@
"pkg/wrappers/rlp",
]
pruneopts = "UT"
revision = "b24f61a2b476a6ca31d1b182ca6c4838534b96ab"
revision = "97be848bcc7036b354e7d7b6f10a7a3ac6eed1b1"
[[projects]]
branch = "master"
@ -2038,7 +2138,7 @@
[[projects]]
branch = "master"
digest = "1:e3fb02bc270f8fc06628d2a1dc6811d3753ccaef05ad060c9f6e7c2340ca0e1f"
digest = "1:3d7db3c6e27f9667f5f7c187e18972af1a0e1c6476e0d82c78c78bad398a1442"
name = "golang.org/x/net"
packages = [
"bpf",
@ -2050,8 +2150,10 @@
]
"internal/iana",
"internal/socket",
"internal/timeseries",
"ipv4",
"ipv6",
"trace",
"websocket",
]
pruneopts = "UT"
@ -2106,6 +2208,24 @@
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
version = "v0.3.0"
[[projects]]
branch = "master"
digest = "1:f5ce0b59aeb99ebb725de7b7a35483600e5f119b4daf081e88329322a2de95bc"
name = "golang.org/x/tools"
packages = [
"go/ast/astutil",
"go/gcexportdata",
"go/internal/gcimporter",
"go/internal/packagesdriver",
"go/packages",
"go/types/typeutil",
"internal/fastwalk",
"internal/gopathwalk",
"internal/semver",
]
pruneopts = "UT"
revision = "bffc5affc6df36a7c1fee87811e47b69912e721f"
[[projects]]
branch = "master"
digest = "1:d9d3a231c70b17470fc134900032da48d5c146f0f4a2ce12ceb92704dae8d67d"
@ -2175,6 +2295,8 @@
"github.com/ethereum/go-ethereum/crypto",
"github.com/ethereum/go-ethereum/ethclient",
"github.com/ethereum/go-ethereum/ethdb",
"github.com/ethereum/go-ethereum/log",
"github.com/ethereum/go-ethereum/node",
"github.com/ethereum/go-ethereum/p2p",
"github.com/ethereum/go-ethereum/p2p/discv5",
"github.com/ethereum/go-ethereum/rlp",
@ -2186,6 +2308,7 @@
"github.com/ipfs/go-blockservice",
"github.com/ipfs/go-cid",
"github.com/ipfs/go-ipfs/core",
"github.com/ipfs/go-ipfs/plugin/loader",
"github.com/ipfs/go-ipfs/repo/fsrepo",
"github.com/jmoiron/sqlx",
"github.com/lib/pq",

View File

@ -62,6 +62,10 @@
name = "github.com/ipfs/go-ipfs"
version = "0.4.20"
[[override]]
name = "github.com/ipfs/go-ds-badger"
version = "0.0.3"
[prune]
go-tests = true
unused-packages = true

View File

@ -66,7 +66,7 @@ func syncAndPublish() {
}
wg := &syn.WaitGroup{}
err = processor.Process(wg)
err = processor.SyncAndPublish(wg, nil, nil)
if err != nil {
log.Fatal(err)
}

View File

@ -29,5 +29,5 @@ type RPCClient interface {
BatchCall(batch []client.BatchElem) error
IpcPath() string
SupportedModules() (map[string]string, error)
Subscribe(namespace string, payloadChan interface{}, args ...interface{}) (*rpc.ClientSubscription, error)
Subscribe(namespace string, payloadChan interface{}, subName string, args ...interface{}) (*rpc.ClientSubscription, error)
}

View File

@ -188,6 +188,6 @@ func (client *MockRPCClient) AssertBatchCalledWith(method string, lengthOfBatch
Expect(client.passedMethod).To(Equal(method))
}
func (client *MockRpcClient) Subscribe(namespace string, payloadChan interface{}, args ...interface{}) (*rpc.ClientSubscription, error) {
func (client *MockRpcClient) Subscribe(namespace string, payloadChan interface{}, subName string, args ...interface{}) (*rpc.ClientSubscription, error) {
panic("implement me")
}

View File

@ -42,7 +42,7 @@ func NewPublicSeedNodeAPI(snp SyncPublishAndServe) *PublicSeedNodeAPI {
}
// Subscribe is the public method to setup a subscription that fires off state-diff payloads as they are created
func (api *PublicSeedNodeAPI) Subscribe(ctx context.Context, params *Params) (*rpc.Subscription, error) {
func (api *PublicSeedNodeAPI) Subscribe(ctx context.Context, payloadChan chan ResponsePayload, params *Params) (*rpc.Subscription, error) {
// ensure that the RPC connection supports subscriptions
notifier, supported := rpc.NotifierFromContext(ctx)
if !supported {

View File

@ -73,7 +73,7 @@ func (pc *Converter) Convert(payload statediff.Payload) (*IPLDPayload, error) {
return nil, err
}
txMeta := &TrxMetaData{
To: trx.To().Hex(),
To: handleNullAddr(trx.To()),
From: from.Hex(),
}
// txMeta will have same index as its corresponding trx in the convertedPayload.BlockBody
@ -152,3 +152,10 @@ func (pc *Converter) Convert(payload statediff.Payload) (*IPLDPayload, error) {
}
return convertedPayload, nil
}
func handleNullAddr(to *common.Address) string {
if to == nil {
return "0x0000000000000000000000000000000000000000000000000000000000000000"
}
return to.Hex()
}

View File

@ -22,6 +22,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
rlp2 "github.com/ethereum/go-ethereum/rlp"
"github.com/ipfs/go-ipfs/plugin/loader"
"github.com/vulcanize/eth-block-extractor/pkg/ipfs"
"github.com/vulcanize/eth-block-extractor/pkg/ipfs/eth_block_header"
@ -49,6 +50,18 @@ type Publisher struct {
// NewIPLDPublisher creates a pointer to a new Publisher which satisfies the IPLDPublisher interface
func NewIPLDPublisher(ipfsPath string) (*Publisher, error) {
l, err := loader.NewPluginLoader("~/.ipfs/plugins")
if err != nil {
return nil, err
}
err = l.Initialize()
if err != nil {
return nil, err
}
err = l.Inject()
if err != nil {
return nil, err
}
node, err := ipfs.InitIPFSNode(ipfsPath)
if err != nil {
return nil, err
@ -98,13 +111,13 @@ func (pub *Publisher) Publish(payload *IPLDPayload) (*CIDPayload, error) {
}
// Process and publish state leafs
stateLeafCids, err := pub.publishStateNodes(payload.StateNodes)
stateNodeCids, err := pub.publishStateNodes(payload.StateNodes)
if err != nil {
return nil, err
}
// Process and publish storage leafs
storageLeafCids, err := pub.publishStorageNodes(payload.StorageNodes)
storageNodeCids, err := pub.publishStorageNodes(payload.StorageNodes)
if err != nil {
return nil, err
}
@ -117,8 +130,8 @@ func (pub *Publisher) Publish(payload *IPLDPayload) (*CIDPayload, error) {
UncleCIDS: uncleCids,
TransactionCIDs: transactionCids,
ReceiptCIDs: receiptsCids,
StateNodeCIDs: stateLeafCids,
StorageNodeCIDs: storageLeafCids,
StateNodeCIDs: stateNodeCids,
StorageNodeCIDs: storageNodeCids,
}, nil
}

View File

@ -14,6 +14,7 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Still seeing some errors from tx and storage indexing processes... due to fk constraints being broken
package ipfs
import (
@ -50,24 +51,27 @@ func (repo *Repository) Index(cidPayload *CIDPayload) error {
}
for uncleHash, cid := range cidPayload.UncleCIDS {
err = repo.indexUncleCID(tx, cid, cidPayload.BlockNumber, uncleHash.Hex())
if err != nil {
tx.Rollback()
return err
}
}
err = repo.indexTransactionAndReceiptCIDs(tx, cidPayload, headerID)
tx.Commit()
err = repo.indexTransactionAndReceiptCIDs(cidPayload, headerID)
if err != nil {
tx.Rollback()
return err
}
err = repo.indexStateAndStorageCIDs(tx, cidPayload, headerID)
err = repo.indexStateAndStorageCIDs(cidPayload, headerID)
if err != nil {
tx.Rollback()
return err
}
return tx.Commit()
return nil
}
func (repo *Repository) indexHeaderCID(tx *sqlx.Tx, cid, blockNumber, hash string) (int64, error) {
var headerID int64
err := tx.QueryRowx(`INSERT INTO public.header_cids (block_number, block_hash, cid, uncle) VALUES ($1, $2, $3, $4)
ON CONFLICT DO UPDATE SET (cid, uncle) = ($3, $4)
ON CONFLICT (block_number, block_hash) DO UPDATE SET (cid, uncle) = ($3, $4)
RETURNING id`,
blockNumber, hash, cid, false).Scan(&headerID)
return headerID, err
@ -75,61 +79,67 @@ func (repo *Repository) indexHeaderCID(tx *sqlx.Tx, cid, blockNumber, hash strin
func (repo *Repository) indexUncleCID(tx *sqlx.Tx, cid, blockNumber, hash string) error {
_, err := tx.Queryx(`INSERT INTO public.header_cids (block_number, block_hash, cid, uncle) VALUES ($1, $2, $3, $4)
ON CONFLICT DO UPDATE SET (cid, uncle) = ($3, $4)`,
ON CONFLICT (block_number, block_hash) DO UPDATE SET (cid, uncle) = ($3, $4)`,
blockNumber, hash, cid, true)
return err
}
func (repo *Repository) indexTransactionAndReceiptCIDs(tx *sqlx.Tx, payload *CIDPayload, headerID int64) error {
func (repo *Repository) indexTransactionAndReceiptCIDs(payload *CIDPayload, headerID int64) error {
tx, _ := repo.db.Beginx()
for hash, trxCidMeta := range payload.TransactionCIDs {
var txID int64
err := tx.QueryRowx(`INSERT INTO public.transaction_cids (header_id, tx_hash, cid, dst, src) VALUES ($1, $2, $3, $4, $5)
ON CONFLICT DO UPDATE SET (cid, dst, src) = ($3, $4, $5)
ON CONFLICT (header_id, tx_hash) DO UPDATE SET (cid, dst, src) = ($3, $4, $5)
RETURNING id`,
headerID, hash.Hex(), trxCidMeta.CID, trxCidMeta.To, trxCidMeta.From).Scan(&txID)
if err != nil {
tx.Rollback()
return err
}
receiptCidMeta, ok := payload.ReceiptCIDs[hash]
if ok {
err = repo.indexReceiptCID(tx, receiptCidMeta, txID)
if err != nil {
tx.Rollback()
return err
}
}
}
return nil
return tx.Commit()
}
func (repo *Repository) indexReceiptCID(tx *sqlx.Tx, cidMeta *ReceiptMetaData, txID int64) error {
_, err := tx.Exec(`INSERT INTO public.receipt_cids (tx_id, cid, topic0s) VALUES ($1, $2, $3)
ON CONFLICT DO UPDATE SET (cid, topic0s) = ($2, $3)`, txID, cidMeta.CID, pq.Array(cidMeta.Topic0s))
_, err := tx.Exec(`INSERT INTO public.receipt_cids (tx_id, cid, topic0s) VALUES ($1, $2, $3)`,
txID, cidMeta.CID, pq.Array(cidMeta.Topic0s))
return err
}
func (repo *Repository) indexStateAndStorageCIDs(tx *sqlx.Tx, payload *CIDPayload, headerID int64) error {
func (repo *Repository) indexStateAndStorageCIDs(payload *CIDPayload, headerID int64) error {
tx, _ := repo.db.Beginx()
for accountKey, stateCID := range payload.StateNodeCIDs {
var stateID int64
err := tx.QueryRowx(`INSERT INTO public.state_cids (header_id, state_key, cid, leaf) VALUES ($1, $2, $3, $4)
ON CONFLICT DO UPDATE SET (cid, leaf) = ($3, $4)
ON CONFLICT (header_id, state_key) DO UPDATE SET (cid, leaf) = ($3, $4)
RETURNING id`,
headerID, accountKey.Hex(), stateCID.CID, stateCID.Leaf).Scan(&stateID)
if err != nil {
tx.Rollback()
return err
}
for _, storageCID := range payload.StorageNodeCIDs[accountKey] {
err = repo.indexStorageCID(tx, storageCID, stateID)
if err != nil {
tx.Rollback()
return err
}
}
}
return nil
return tx.Commit()
}
func (repo *Repository) indexStorageCID(tx *sqlx.Tx, storageCID StorageNodeCID, stateID int64) error {
_, err := repo.db.Exec(`INSERT INTO public.storage_cids (state_id, storage_key, cid, leaf) VALUES ($1, $2, $3, $4)
ON CONFLICT DO UPDATE SET (cid, leaf) = ($3, $4)`,
stateID, storageCID.Key, storageCID.CID, storageCID.Leaf)
ON CONFLICT (state_id, storage_key) DO UPDATE SET (cid, leaf) = ($3, $4)`,
stateID, storageCID.Key.Hex(), storageCID.CID, storageCID.Leaf)
return err
}

View File

@ -30,8 +30,7 @@ type StateDiffStreamer interface {
// Streamer is the underlying struct for the StateDiffStreamer interface
type Streamer struct {
Client core.RpcClient
PayloadChan chan statediff.Payload
Client core.RpcClient
}
// NewStateDiffStreamer creates a pointer to a new Streamer which satisfies the StateDiffStreamer interface
@ -43,5 +42,5 @@ func NewStateDiffStreamer(client core.RpcClient) *Streamer {
// Stream is the main loop for subscribing to data from the Geth state diff process
func (sds *Streamer) Stream(payloadChan chan statediff.Payload) (*rpc.ClientSubscription, error) {
return sds.Client.Subscribe("statediff", sds.PayloadChan)
return sds.Client.Subscribe("statediff", payloadChan, "subscribe")
}

1
vendor/github.com/AndreasBriese/bbloom/.travis.yml generated vendored Normal file
View File

@ -0,0 +1 @@
language: go

35
vendor/github.com/AndreasBriese/bbloom/LICENSE generated vendored Normal file
View File

@ -0,0 +1,35 @@
bbloom.go
// The MIT License (MIT)
// Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
// the Software, and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
siphash.go
// https://github.com/dchest/siphash
//
// Written in 2012 by Dmitry Chestnykh.
//
// To the extent possible under law, the author have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// http://creativecommons.org/publicdomain/zero/1.0/
//
// Package siphash implements SipHash-2-4, a fast short-input PRF
// created by Jean-Philippe Aumasson and Daniel J. Bernstein.

131
vendor/github.com/AndreasBriese/bbloom/README.md generated vendored Normal file
View File

@ -0,0 +1,131 @@
## bbloom: a bitset Bloom filter for go/golang
===
[![Build Status](https://travis-ci.org/AndreasBriese/bbloom.png?branch=master)](http://travis-ci.org/AndreasBriese/bbloom)
package implements a fast bloom filter with real 'bitset' and JSONMarshal/JSONUnmarshal to store/reload the Bloom filter.
NOTE: the package uses unsafe.Pointer to set and read the bits from the bitset. If you're uncomfortable with using the unsafe package, please consider using my bloom filter package at github.com/AndreasBriese/bloom
===
changelog 11/2015: new thread safe methods AddTS(), HasTS(), AddIfNotHasTS() following a suggestion from Srdjan Marinovic (github @a-little-srdjan), who used this to code a bloomfilter cache.
This bloom filter was developed to strengthen a website-log database and was tested and optimized for this log-entry mask: "2014/%02i/%02i %02i:%02i:%02i /info.html".
Nonetheless bbloom should work with any other form of entries.
~~Hash function is a modified Berkeley DB sdbm hash (to optimize for smaller strings). sdbm http://www.cse.yorku.ca/~oz/hash.html~~
Found sipHash (SipHash-2-4, a fast short-input PRF created by Jean-Philippe Aumasson and Daniel J. Bernstein.) to be about as fast. sipHash had been ported by Dimtry Chestnyk to Go (github.com/dchest/siphash )
Minimum hashset size is: 512 ([4]uint64; will be set automatically).
###install
```sh
go get github.com/AndreasBriese/bbloom
```
###test
+ change to folder ../bbloom
+ create wordlist in file "words.txt" (you might use `python permut.py`)
+ run 'go test -bench=.' within the folder
```go
go test -bench=.
```
~~If you've installed the GOCONVEY TDD-framework http://goconvey.co/ you can run the tests automatically.~~
using go's testing framework now (have in mind that the op timing is related to 65536 operations of Add, Has, AddIfNotHas respectively)
### usage
after installation add
```go
import (
...
"github.com/AndreasBriese/bbloom"
...
)
```
at your header. In the program use
```go
// create a bloom filter for 65536 items and 1 % wrong-positive ratio
bf := bbloom.New(float64(1<<16), float64(0.01))
// or
// create a bloom filter with 650000 for 65536 items and 7 locs per hash explicitly
// bf = bbloom.New(float64(650000), float64(7))
// or
bf = bbloom.New(650000.0, 7.0)
// add one item
bf.Add([]byte("butter"))
// Number of elements added is exposed now
// Note: ElemNum will not be included in JSON export (for compatability to older version)
nOfElementsInFilter := bf.ElemNum
// check if item is in the filter
isIn := bf.Has([]byte("butter")) // should be true
isNotIn := bf.Has([]byte("Butter")) // should be false
// 'add only if item is new' to the bloomfilter
added := bf.AddIfNotHas([]byte("butter")) // should be false because 'butter' is already in the set
added = bf.AddIfNotHas([]byte("buTTer")) // should be true because 'buTTer' is new
// thread safe versions for concurrent use: AddTS, HasTS, AddIfNotHasTS
// add one item
bf.AddTS([]byte("peanutbutter"))
// check if item is in the filter
isIn = bf.HasTS([]byte("peanutbutter")) // should be true
isNotIn = bf.HasTS([]byte("peanutButter")) // should be false
// 'add only if item is new' to the bloomfilter
added = bf.AddIfNotHasTS([]byte("butter")) // should be false because 'peanutbutter' is already in the set
added = bf.AddIfNotHasTS([]byte("peanutbuTTer")) // should be true because 'penutbuTTer' is new
// convert to JSON ([]byte)
Json := bf.JSONMarshal()
// bloomfilters Mutex is exposed for external un-/locking
// i.e. mutex lock while doing JSON conversion
bf.Mtx.Lock()
Json = bf.JSONMarshal()
bf.Mtx.Unlock()
// restore a bloom filter from storage
bfNew := bbloom.JSONUnmarshal(Json)
isInNew := bfNew.Has([]byte("butter")) // should be true
isNotInNew := bfNew.Has([]byte("Butter")) // should be false
```
to work with the bloom filter.
### why 'fast'?
It's about 3 times faster than William Fitzgeralds bitset bloom filter https://github.com/willf/bloom . And it is about so fast as my []bool set variant for Boom filters (see https://github.com/AndreasBriese/bloom ) but having a 8times smaller memory footprint:
Bloom filter (filter size 524288, 7 hashlocs)
github.com/AndreasBriese/bbloom 'Add' 65536 items (10 repetitions): 6595800 ns (100 ns/op)
github.com/AndreasBriese/bbloom 'Has' 65536 items (10 repetitions): 5986600 ns (91 ns/op)
github.com/AndreasBriese/bloom 'Add' 65536 items (10 repetitions): 6304684 ns (96 ns/op)
github.com/AndreasBriese/bloom 'Has' 65536 items (10 repetitions): 6568663 ns (100 ns/op)
github.com/willf/bloom 'Add' 65536 items (10 repetitions): 24367224 ns (371 ns/op)
github.com/willf/bloom 'Test' 65536 items (10 repetitions): 21881142 ns (333 ns/op)
github.com/dataence/bloom/standard 'Add' 65536 items (10 repetitions): 23041644 ns (351 ns/op)
github.com/dataence/bloom/standard 'Check' 65536 items (10 repetitions): 19153133 ns (292 ns/op)
github.com/cabello/bloom 'Add' 65536 items (10 repetitions): 131921507 ns (2012 ns/op)
github.com/cabello/bloom 'Contains' 65536 items (10 repetitions): 131108962 ns (2000 ns/op)
(on MBPro15 OSX10.8.5 i7 4Core 2.4Ghz)
With 32bit bloom filters (bloom32) using modified sdbm, bloom32 does hashing with only 2 bit shifts, one xor and one substraction per byte. smdb is about as fast as fnv64a but gives less collisions with the dataset (see mask above). bloom.New(float64(10 * 1<<16),float64(7)) populated with 1<<16 random items from the dataset (see above) and tested against the rest results in less than 0.05% collisions.

270
vendor/github.com/AndreasBriese/bbloom/bbloom.go generated vendored Normal file
View File

@ -0,0 +1,270 @@
// The MIT License (MIT)
// Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
// the Software, and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package bbloom
import (
"bytes"
"encoding/json"
"log"
"math"
"sync"
"unsafe"
)
// helper
var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128}
func getSize(ui64 uint64) (size uint64, exponent uint64) {
if ui64 < uint64(512) {
ui64 = uint64(512)
}
size = uint64(1)
for size < ui64 {
size <<= 1
exponent++
}
return size, exponent
}
func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) {
size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2)
locs := math.Ceil(float64(0.69314718056) * size / numEntries)
return uint64(size), uint64(locs)
}
// New
// returns a new bloomfilter
func New(params ...float64) (bloomfilter Bloom) {
var entries, locs uint64
if len(params) == 2 {
if params[1] < 1 {
entries, locs = calcSizeByWrongPositives(params[0], params[1])
} else {
entries, locs = uint64(params[0]), uint64(params[1])
}
} else {
log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations)) i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries), float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))")
}
size, exponent := getSize(uint64(entries))
bloomfilter = Bloom{
sizeExp: exponent,
size: size - 1,
setLocs: locs,
shift: 64 - exponent,
}
bloomfilter.Size(size)
return bloomfilter
}
// NewWithBoolset
// takes a []byte slice and number of locs per entry
// returns the bloomfilter with a bitset populated according to the input []byte
func NewWithBoolset(bs *[]byte, locs uint64) (bloomfilter Bloom) {
bloomfilter = New(float64(len(*bs)<<3), float64(locs))
ptr := uintptr(unsafe.Pointer(&bloomfilter.bitset[0]))
for _, b := range *bs {
*(*uint8)(unsafe.Pointer(ptr)) = b
ptr++
}
return bloomfilter
}
// bloomJSONImExport
// Im/Export structure used by JSONMarshal / JSONUnmarshal
type bloomJSONImExport struct {
FilterSet []byte
SetLocs uint64
}
// JSONUnmarshal
// takes JSON-Object (type bloomJSONImExport) as []bytes
// returns bloom32 / bloom64 object
func JSONUnmarshal(dbData []byte) Bloom {
bloomImEx := bloomJSONImExport{}
json.Unmarshal(dbData, &bloomImEx)
buf := bytes.NewBuffer(bloomImEx.FilterSet)
bs := buf.Bytes()
bf := NewWithBoolset(&bs, bloomImEx.SetLocs)
return bf
}
//
// Bloom filter
type Bloom struct {
Mtx sync.Mutex
ElemNum uint64
bitset []uint64
sizeExp uint64
size uint64
setLocs uint64
shift uint64
}
// <--- http://www.cse.yorku.ca/~oz/hash.html
// modified Berkeley DB Hash (32bit)
// hash is casted to l, h = 16bit fragments
// func (bl Bloom) absdbm(b *[]byte) (l, h uint64) {
// hash := uint64(len(*b))
// for _, c := range *b {
// hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash
// }
// h = hash >> bl.shift
// l = hash << bl.shift >> bl.shift
// return l, h
// }
// Update: found sipHash of Jean-Philippe Aumasson & Daniel J. Bernstein to be even faster than absdbm()
// https://131002.net/siphash/
// siphash was implemented for Go by Dmitry Chestnykh https://github.com/dchest/siphash
// Add
// set the bit(s) for entry; Adds an entry to the Bloom filter
func (bl *Bloom) Add(entry []byte) {
l, h := bl.sipHash(entry)
for i := uint64(0); i < (*bl).setLocs; i++ {
(*bl).Set((h + i*l) & (*bl).size)
(*bl).ElemNum++
}
}
// AddTS
// Thread safe: Mutex.Lock the bloomfilter for the time of processing the entry
func (bl *Bloom) AddTS(entry []byte) {
bl.Mtx.Lock()
defer bl.Mtx.Unlock()
bl.Add(entry[:])
}
// Has
// check if bit(s) for entry is/are set
// returns true if the entry was added to the Bloom Filter
func (bl Bloom) Has(entry []byte) bool {
l, h := bl.sipHash(entry)
for i := uint64(0); i < bl.setLocs; i++ {
switch bl.IsSet((h + i*l) & bl.size) {
case false:
return false
}
}
return true
}
// HasTS
// Thread safe: Mutex.Lock the bloomfilter for the time of processing the entry
func (bl *Bloom) HasTS(entry []byte) bool {
bl.Mtx.Lock()
defer bl.Mtx.Unlock()
return bl.Has(entry[:])
}
// AddIfNotHas
// Only Add entry if it's not present in the bloomfilter
// returns true if entry was added
// returns false if entry was allready registered in the bloomfilter
func (bl Bloom) AddIfNotHas(entry []byte) (added bool) {
if bl.Has(entry[:]) {
return added
}
bl.Add(entry[:])
return true
}
// AddIfNotHasTS
// Tread safe: Only Add entry if it's not present in the bloomfilter
// returns true if entry was added
// returns false if entry was allready registered in the bloomfilter
func (bl *Bloom) AddIfNotHasTS(entry []byte) (added bool) {
bl.Mtx.Lock()
defer bl.Mtx.Unlock()
return bl.AddIfNotHas(entry[:])
}
// Size
// make Bloom filter with as bitset of size sz
func (bl *Bloom) Size(sz uint64) {
(*bl).bitset = make([]uint64, sz>>6)
}
// Clear
// resets the Bloom filter
func (bl *Bloom) Clear() {
for i, _ := range (*bl).bitset {
(*bl).bitset[i] = 0
}
}
// Set
// set the bit[idx] of bitsit
func (bl *Bloom) Set(idx uint64) {
ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
*(*uint8)(ptr) |= mask[idx%8]
}
// IsSet
// check if bit[idx] of bitset is set
// returns true/false
func (bl *Bloom) IsSet(idx uint64) bool {
ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1
return r == 1
}
// JSONMarshal
// returns JSON-object (type bloomJSONImExport) as []byte
func (bl Bloom) JSONMarshal() []byte {
bloomImEx := bloomJSONImExport{}
bloomImEx.SetLocs = uint64(bl.setLocs)
bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
ptr := uintptr(unsafe.Pointer(&bl.bitset[0]))
for i := range bloomImEx.FilterSet {
bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(ptr))
ptr++
}
data, err := json.Marshal(bloomImEx)
if err != nil {
log.Fatal("json.Marshal failed: ", err)
}
return data
}
// // alternative hashFn
// func (bl Bloom) fnv64a(b *[]byte) (l, h uint64) {
// h64 := fnv.New64a()
// h64.Write(*b)
// hash := h64.Sum64()
// h = hash >> 32
// l = hash << 32 >> 32
// return l, h
// }
//
// // <-- http://partow.net/programming/hashfunctions/index.html
// // citation: An algorithm proposed by Donald E. Knuth in The Art Of Computer Programming Volume 3,
// // under the topic of sorting and search chapter 6.4.
// // modified to fit with boolset-length
// func (bl Bloom) DEKHash(b *[]byte) (l, h uint64) {
// hash := uint64(len(*b))
// for _, c := range *b {
// hash = ((hash << 5) ^ (hash >> bl.shift)) ^ uint64(c)
// }
// h = hash >> bl.shift
// l = hash << bl.sizeExp >> bl.sizeExp
// return l, h
// }

225
vendor/github.com/AndreasBriese/bbloom/sipHash.go generated vendored Normal file
View File

@ -0,0 +1,225 @@
// Written in 2012 by Dmitry Chestnykh.
//
// To the extent possible under law, the author have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// http://creativecommons.org/publicdomain/zero/1.0/
//
// Package siphash implements SipHash-2-4, a fast short-input PRF
// created by Jean-Philippe Aumasson and Daniel J. Bernstein.
package bbloom
// Hash returns the 64-bit SipHash-2-4 of the given byte slice with two 64-bit
// parts of 128-bit key: k0 and k1.
func (bl Bloom) sipHash(p []byte) (l, h uint64) {
// Initialization.
v0 := uint64(8317987320269560794) // k0 ^ 0x736f6d6570736575
v1 := uint64(7237128889637516672) // k1 ^ 0x646f72616e646f6d
v2 := uint64(7816392314733513934) // k0 ^ 0x6c7967656e657261
v3 := uint64(8387220255325274014) // k1 ^ 0x7465646279746573
t := uint64(len(p)) << 56
// Compression.
for len(p) >= 8 {
m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 |
uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
v3 ^= m
// Round 1.
v0 += v1
v1 = v1<<13 | v1>>51
v1 ^= v0
v0 = v0<<32 | v0>>32
v2 += v3
v3 = v3<<16 | v3>>48
v3 ^= v2
v0 += v3
v3 = v3<<21 | v3>>43
v3 ^= v0
v2 += v1
v1 = v1<<17 | v1>>47
v1 ^= v2
v2 = v2<<32 | v2>>32
// Round 2.
v0 += v1
v1 = v1<<13 | v1>>51
v1 ^= v0
v0 = v0<<32 | v0>>32
v2 += v3
v3 = v3<<16 | v3>>48
v3 ^= v2
v0 += v3
v3 = v3<<21 | v3>>43
v3 ^= v0
v2 += v1
v1 = v1<<17 | v1>>47
v1 ^= v2
v2 = v2<<32 | v2>>32
v0 ^= m
p = p[8:]
}
// Compress last block.
switch len(p) {
case 7:
t |= uint64(p[6]) << 48
fallthrough
case 6:
t |= uint64(p[5]) << 40
fallthrough
case 5:
t |= uint64(p[4]) << 32
fallthrough
case 4:
t |= uint64(p[3]) << 24
fallthrough
case 3:
t |= uint64(p[2]) << 16
fallthrough
case 2:
t |= uint64(p[1]) << 8
fallthrough
case 1:
t |= uint64(p[0])
}
v3 ^= t
// Round 1.
v0 += v1
v1 = v1<<13 | v1>>51
v1 ^= v0
v0 = v0<<32 | v0>>32
v2 += v3
v3 = v3<<16 | v3>>48
v3 ^= v2
v0 += v3
v3 = v3<<21 | v3>>43
v3 ^= v0
v2 += v1
v1 = v1<<17 | v1>>47
v1 ^= v2
v2 = v2<<32 | v2>>32
// Round 2.
v0 += v1
v1 = v1<<13 | v1>>51
v1 ^= v0
v0 = v0<<32 | v0>>32
v2 += v3
v3 = v3<<16 | v3>>48
v3 ^= v2
v0 += v3
v3 = v3<<21 | v3>>43
v3 ^= v0
v2 += v1
v1 = v1<<17 | v1>>47
v1 ^= v2
v2 = v2<<32 | v2>>32
v0 ^= t
// Finalization.
v2 ^= 0xff
// Round 1.
v0 += v1
v1 = v1<<13 | v1>>51
v1 ^= v0
v0 = v0<<32 | v0>>32
v2 += v3
v3 = v3<<16 | v3>>48
v3 ^= v2
v0 += v3
v3 = v3<<21 | v3>>43
v3 ^= v0
v2 += v1
v1 = v1<<17 | v1>>47
v1 ^= v2
v2 = v2<<32 | v2>>32
// Round 2.
v0 += v1
v1 = v1<<13 | v1>>51
v1 ^= v0
v0 = v0<<32 | v0>>32
v2 += v3
v3 = v3<<16 | v3>>48
v3 ^= v2
v0 += v3
v3 = v3<<21 | v3>>43
v3 ^= v0
v2 += v1
v1 = v1<<17 | v1>>47
v1 ^= v2
v2 = v2<<32 | v2>>32
// Round 3.
v0 += v1
v1 = v1<<13 | v1>>51
v1 ^= v0
v0 = v0<<32 | v0>>32
v2 += v3
v3 = v3<<16 | v3>>48
v3 ^= v2
v0 += v3
v3 = v3<<21 | v3>>43
v3 ^= v0
v2 += v1
v1 = v1<<17 | v1>>47
v1 ^= v2
v2 = v2<<32 | v2>>32
// Round 4.
v0 += v1
v1 = v1<<13 | v1>>51
v1 ^= v0
v0 = v0<<32 | v0>>32
v2 += v3
v3 = v3<<16 | v3>>48
v3 ^= v2
v0 += v3
v3 = v3<<21 | v3>>43
v3 ^= v0
v2 += v1
v1 = v1<<17 | v1>>47
v1 ^= v2
v2 = v2<<32 | v2>>32
// return v0 ^ v1 ^ v2 ^ v3
hash := v0 ^ v1 ^ v2 ^ v3
h = hash >> bl.shift
l = hash << bl.shift >> bl.shift
return l, h
}

140
vendor/github.com/AndreasBriese/bbloom/words.txt generated vendored Normal file
View File

@ -0,0 +1,140 @@
2014/01/01 00:00:00 /info.html
2014/01/01 00:00:00 /info.html
2014/01/01 00:00:01 /info.html
2014/01/01 00:00:02 /info.html
2014/01/01 00:00:03 /info.html
2014/01/01 00:00:04 /info.html
2014/01/01 00:00:05 /info.html
2014/01/01 00:00:06 /info.html
2014/01/01 00:00:07 /info.html
2014/01/01 00:00:08 /info.html
2014/01/01 00:00:09 /info.html
2014/01/01 00:00:10 /info.html
2014/01/01 00:00:11 /info.html
2014/01/01 00:00:12 /info.html
2014/01/01 00:00:13 /info.html
2014/01/01 00:00:14 /info.html
2014/01/01 00:00:15 /info.html
2014/01/01 00:00:16 /info.html
2014/01/01 00:00:17 /info.html
2014/01/01 00:00:18 /info.html
2014/01/01 00:00:19 /info.html
2014/01/01 00:00:20 /info.html
2014/01/01 00:00:21 /info.html
2014/01/01 00:00:22 /info.html
2014/01/01 00:00:23 /info.html
2014/01/01 00:00:24 /info.html
2014/01/01 00:00:25 /info.html
2014/01/01 00:00:26 /info.html
2014/01/01 00:00:27 /info.html
2014/01/01 00:00:28 /info.html
2014/01/01 00:00:29 /info.html
2014/01/01 00:00:30 /info.html
2014/01/01 00:00:31 /info.html
2014/01/01 00:00:32 /info.html
2014/01/01 00:00:33 /info.html
2014/01/01 00:00:34 /info.html
2014/01/01 00:00:35 /info.html
2014/01/01 00:00:36 /info.html
2014/01/01 00:00:37 /info.html
2014/01/01 00:00:38 /info.html
2014/01/01 00:00:39 /info.html
2014/01/01 00:00:40 /info.html
2014/01/01 00:00:41 /info.html
2014/01/01 00:00:42 /info.html
2014/01/01 00:00:43 /info.html
2014/01/01 00:00:44 /info.html
2014/01/01 00:00:45 /info.html
2014/01/01 00:00:46 /info.html
2014/01/01 00:00:47 /info.html
2014/01/01 00:00:48 /info.html
2014/01/01 00:00:49 /info.html
2014/01/01 00:00:50 /info.html
2014/01/01 00:00:51 /info.html
2014/01/01 00:00:52 /info.html
2014/01/01 00:00:53 /info.html
2014/01/01 00:00:54 /info.html
2014/01/01 00:00:55 /info.html
2014/01/01 00:00:56 /info.html
2014/01/01 00:00:57 /info.html
2014/01/01 00:00:58 /info.html
2014/01/01 00:00:59 /info.html
2014/01/01 00:01:00 /info.html
2014/01/01 00:01:01 /info.html
2014/01/01 00:01:02 /info.html
2014/01/01 00:01:03 /info.html
2014/01/01 00:01:04 /info.html
2014/01/01 00:01:05 /info.html
2014/01/01 00:01:06 /info.html
2014/01/01 00:01:07 /info.html
2014/01/01 00:01:08 /info.html
2014/01/01 00:01:09 /info.html
2014/01/01 00:01:10 /info.html
2014/01/01 00:01:11 /info.html
2014/01/01 00:01:12 /info.html
2014/01/01 00:01:13 /info.html
2014/01/01 00:01:14 /info.html
2014/01/01 00:01:15 /info.html
2014/01/01 00:01:16 /info.html
2014/01/01 00:01:17 /info.html
2014/01/01 00:01:18 /info.html
2014/01/01 00:01:19 /info.html
2014/01/01 00:01:20 /info.html
2014/01/01 00:01:21 /info.html
2014/01/01 00:01:22 /info.html
2014/01/01 00:01:23 /info.html
2014/01/01 00:01:24 /info.html
2014/01/01 00:01:25 /info.html
2014/01/01 00:01:26 /info.html
2014/01/01 00:01:27 /info.html
2014/01/01 00:01:28 /info.html
2014/01/01 00:01:29 /info.html
2014/01/01 00:01:30 /info.html
2014/01/01 00:01:31 /info.html
2014/01/01 00:01:32 /info.html
2014/01/01 00:01:33 /info.html
2014/01/01 00:01:34 /info.html
2014/01/01 00:01:35 /info.html
2014/01/01 00:01:36 /info.html
2014/01/01 00:01:37 /info.html
2014/01/01 00:01:38 /info.html
2014/01/01 00:01:39 /info.html
2014/01/01 00:01:40 /info.html
2014/01/01 00:01:41 /info.html
2014/01/01 00:01:42 /info.html
2014/01/01 00:01:43 /info.html
2014/01/01 00:01:44 /info.html
2014/01/01 00:01:45 /info.html
2014/01/01 00:01:46 /info.html
2014/01/01 00:01:47 /info.html
2014/01/01 00:01:48 /info.html
2014/01/01 00:01:49 /info.html
2014/01/01 00:01:50 /info.html
2014/01/01 00:01:51 /info.html
2014/01/01 00:01:52 /info.html
2014/01/01 00:01:53 /info.html
2014/01/01 00:01:54 /info.html
2014/01/01 00:01:55 /info.html
2014/01/01 00:01:56 /info.html
2014/01/01 00:01:57 /info.html
2014/01/01 00:01:58 /info.html
2014/01/01 00:01:59 /info.html
2014/01/01 00:02:00 /info.html
2014/01/01 00:02:01 /info.html
2014/01/01 00:02:02 /info.html
2014/01/01 00:02:03 /info.html
2014/01/01 00:02:04 /info.html
2014/01/01 00:02:05 /info.html
2014/01/01 00:02:06 /info.html
2014/01/01 00:02:07 /info.html
2014/01/01 00:02:08 /info.html
2014/01/01 00:02:09 /info.html
2014/01/01 00:02:10 /info.html
2014/01/01 00:02:11 /info.html
2014/01/01 00:02:12 /info.html
2014/01/01 00:02:13 /info.html
2014/01/01 00:02:14 /info.html
2014/01/01 00:02:15 /info.html
2014/01/01 00:02:16 /info.html
2014/01/01 00:02:17 /info.html
2014/01/01 00:02:18 /info.html

100
vendor/github.com/dgraph-io/badger/CHANGELOG.md generated vendored Normal file
View File

@ -0,0 +1,100 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [1.5.3] - 2018-07-11
Bug Fixes:
* Fix a panic caused due to item.vptr not copying over vs.Value, when looking
for a move key.
## [1.5.2] - 2018-06-19
Bug Fixes:
* Fix the way move key gets generated.
* If a transaction has unclosed, or multiple iterators running simultaneously,
throw a panic. Every iterator must be properly closed. At any point in time,
only one iterator per transaction can be running. This is to avoid bugs in a
transaction data structure which is thread unsafe.
* *Warning: This change might cause panics in user code. Fix is to properly
close your iterators, and only have one running at a time per transaction.*
## [1.5.1] - 2018-06-04
Bug Fixes:
* Fix for infinite yieldItemValue recursion. #503
* Fix recursive addition of `badgerMove` prefix. https://github.com/dgraph-io/badger/commit/2e3a32f0ccac3066fb4206b28deb39c210c5266f
* Use file size based window size for sampling, instead of fixing it to 10MB. #501
Cleanup:
* Clarify comments and documentation.
* Move badger tool one directory level up.
## [1.5.0] - 2018-05-08
* Introduce `NumVersionsToKeep` option. This option is used to discard many
versions of the same key, which saves space.
* Add a new `SetWithDiscard` method, which would indicate that all the older
versions of the key are now invalid. Those versions would be discarded during
compactions.
* Value log GC moves are now bound to another keyspace to ensure latest versions
of data are always at the top in LSM tree.
* Introduce `ValueLogMaxEntries` to restrict the number of key-value pairs per
value log file. This helps bound the time it takes to garbage collect one
file.
## [1.4.0] - 2018-05-04
* Make mmap-ing of value log optional.
* Run GC multiple times, based on recorded discard statistics.
* Add MergeOperator.
* Force compact L0 on clsoe (#439).
* Add truncate option to warn about data loss (#452).
* Discard key versions during compaction (#464).
* Introduce new `LSMOnlyOptions`, to make Badger act like a typical LSM based DB.
Bug fix:
* (Temporary) Check max version across all tables in Get (removed in next
release).
* Update commit and read ts while loading from backup.
* Ensure all transaction entries are part of the same value log file.
* On commit, run unlock callbacks before doing writes (#413).
* Wait for goroutines to finish before closing iterators (#421).
## [1.3.0] - 2017-12-12
* Add `DB.NextSequence()` method to generate monotonically increasing integer
sequences.
* Add `DB.Size()` method to return the size of LSM and value log files.
* Tweaked mmap code to make Windows 32-bit builds work.
* Tweaked build tags on some files to make iOS builds work.
* Fix `DB.PurgeOlderVersions()` to not violate some constraints.
## [1.2.0] - 2017-11-30
* Expose a `Txn.SetEntry()` method to allow setting the key-value pair
and all the metadata at the same time.
## [1.1.1] - 2017-11-28
* Fix bug where txn.Get was returing key deleted in same transaction.
* Fix race condition while decrementing reference in oracle.
* Update doneCommit in the callback for CommitAsync.
* Iterator see writes of current txn.
## [1.1.0] - 2017-11-13
* Create Badger directory if it does not exist when `badger.Open` is called.
* Added `Item.ValueCopy()` to avoid deadlocks in long-running iterations
* Fixed 64-bit alignment issues to make Badger run on Arm v7
## [1.0.1] - 2017-11-06
* Fix an uint16 overflow when resizing key slice
[Unreleased]: https://github.com/dgraph-io/badger/compare/v1.5.3...HEAD
[1.5.3]: https://github.com/dgraph-io/badger/compare/v1.5.2...v1.5.3
[1.5.2]: https://github.com/dgraph-io/badger/compare/v1.5.1...v1.5.2
[1.5.1]: https://github.com/dgraph-io/badger/compare/v1.5.0...v1.5.1
[1.5.0]: https://github.com/dgraph-io/badger/compare/v1.4.0...v1.5.0
[1.4.0]: https://github.com/dgraph-io/badger/compare/v1.3.0...v1.4.0
[1.3.0]: https://github.com/dgraph-io/badger/compare/v1.2.0...v1.3.0
[1.2.0]: https://github.com/dgraph-io/badger/compare/v1.1.1...v1.2.0
[1.1.1]: https://github.com/dgraph-io/badger/compare/v1.1.0...v1.1.1
[1.1.0]: https://github.com/dgraph-io/badger/compare/v1.0.1...v1.1.0
[1.0.1]: https://github.com/dgraph-io/badger/compare/v1.0.0...v1.0.1

View File

@ -0,0 +1,5 @@
# Code of Conduct
Our Code of Conduct can be found here:
https://dgraph.io/conduct

176
vendor/github.com/dgraph-io/badger/LICENSE generated vendored Normal file
View File

@ -0,0 +1,176 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

773
vendor/github.com/dgraph-io/badger/README.md generated vendored Normal file
View File

@ -0,0 +1,773 @@
# BadgerDB [![GoDoc](https://godoc.org/github.com/dgraph-io/badger?status.svg)](https://godoc.org/github.com/dgraph-io/badger) [![Go Report Card](https://goreportcard.com/badge/github.com/dgraph-io/badger)](https://goreportcard.com/report/github.com/dgraph-io/badger) [![Sourcegraph](https://sourcegraph.com/github.com/dgraph-io/badger/-/badge.svg)](https://sourcegraph.com/github.com/dgraph-io/badger?badge) [![Build Status](https://teamcity.dgraph.io/guestAuth/app/rest/builds/buildType:(id:Badger_UnitTests)/statusIcon.svg)](https://teamcity.dgraph.io/viewLog.html?buildTypeId=Badger_UnitTests&buildId=lastFinished&guest=1) ![Appveyor](https://ci.appveyor.com/api/projects/status/github/dgraph-io/badger?branch=master&svg=true) [![Coverage Status](https://coveralls.io/repos/github/dgraph-io/badger/badge.svg?branch=master)](https://coveralls.io/github/dgraph-io/badger?branch=master)
![Badger mascot](images/diggy-shadow.png)
BadgerDB is an embeddable, persistent and fast key-value (KV) database
written in pure Go. It's meant to be a performant alternative to non-Go-based
key-value stores like [RocksDB](https://github.com/facebook/rocksdb).
## Project Status [Oct 27, 2018]
Badger is stable and is being used to serve data sets worth hundreds of
terabytes. Badger supports concurrent ACID transactions with serializable
snapshot isolation (SSI) guarantees. A Jepsen-style bank test runs nightly for
8h, with `--race` flag and ensures maintainance of transactional guarantees.
Badger has also been tested to work with filesystem level anomalies, to ensure
persistence and consistency.
Badger v1.0 was released in Nov 2017, with a Badger v2.0 release coming up in a
few months. The [Changelog] is kept fairly up-to-date.
[Changelog]:https://github.com/dgraph-io/badger/blob/master/CHANGELOG.md
## Table of Contents
* [Getting Started](#getting-started)
+ [Installing](#installing)
+ [Opening a database](#opening-a-database)
+ [Transactions](#transactions)
- [Read-only transactions](#read-only-transactions)
- [Read-write transactions](#read-write-transactions)
- [Managing transactions manually](#managing-transactions-manually)
+ [Using key/value pairs](#using-keyvalue-pairs)
+ [Monotonically increasing integers](#monotonically-increasing-integers)
* [Merge Operations](#merge-operations)
+ [Setting Time To Live(TTL) and User Metadata on Keys](#setting-time-to-livettl-and-user-metadata-on-keys)
+ [Iterating over keys](#iterating-over-keys)
- [Prefix scans](#prefix-scans)
- [Key-only iteration](#key-only-iteration)
+ [Stream](#stream)
+ [Garbage Collection](#garbage-collection)
+ [Database backup](#database-backup)
+ [Memory usage](#memory-usage)
+ [Statistics](#statistics)
* [Resources](#resources)
+ [Blog Posts](#blog-posts)
* [Contact](#contact)
* [Design](#design)
+ [Comparisons](#comparisons)
+ [Benchmarks](#benchmarks)
* [Other Projects Using Badger](#other-projects-using-badger)
* [Frequently Asked Questions](#frequently-asked-questions)
## Getting Started
### Installing
To start using Badger, install Go 1.8 or above and run `go get`:
```sh
$ go get github.com/dgraph-io/badger/...
```
This will retrieve the library and install the `badger_info` command line
utility into your `$GOBIN` path.
### Opening a database
The top-level object in Badger is a `DB`. It represents multiple files on disk
in specific directories, which contain the data for a single database.
To open your database, use the `badger.Open()` function, with the appropriate
options. The `Dir` and `ValueDir` options are mandatory and must be
specified by the client. They can be set to the same value to simplify things.
```go
package main
import (
"log"
"github.com/dgraph-io/badger"
)
func main() {
// Open the Badger database located in the /tmp/badger directory.
// It will be created if it doesn't exist.
opts := badger.DefaultOptions
opts.Dir = "/tmp/badger"
opts.ValueDir = "/tmp/badger"
db, err := badger.Open(opts)
if err != nil {
log.Fatal(err)
}
defer db.Close()
 // Your code here…
}
```
Please note that Badger obtains a lock on the directories so multiple processes
cannot open the same database at the same time.
### Transactions
#### Read-only transactions
To start a read-only transaction, you can use the `DB.View()` method:
```go
err := db.View(func(txn *badger.Txn) error {
 // Your code here…
 return nil
})
```
You cannot perform any writes or deletes within this transaction. Badger
ensures that you get a consistent view of the database within this closure. Any
writes that happen elsewhere after the transaction has started, will not be
seen by calls made within the closure.
#### Read-write transactions
To start a read-write transaction, you can use the `DB.Update()` method:
```go
err := db.Update(func(txn *badger.Txn) error {
 // Your code here…
 return nil
})
```
All database operations are allowed inside a read-write transaction.
Always check the returned error value. If you return an error
within your closure it will be passed through.
An `ErrConflict` error will be reported in case of a conflict. Depending on the state
of your application, you have the option to retry the operation if you receive
this error.
An `ErrTxnTooBig` will be reported in case the number of pending writes/deletes in
the transaction exceed a certain limit. In that case, it is best to commit the
transaction and start a new transaction immediately. Here is an example (we are
not checking for errors in some places for simplicity):
```go
updates := make(map[string]string)
txn := db.NewTransaction(true)
for k,v := range updates {
if err := txn.Set([]byte(k),[]byte(v)); err == ErrTxnTooBig {
_ = txn.Commit()
txn = db.NewTransaction(..)
_ = txn.Set([]byte(k),[]byte(v))
}
}
_ = txn.Commit()
```
#### Managing transactions manually
The `DB.View()` and `DB.Update()` methods are wrappers around the
`DB.NewTransaction()` and `Txn.Commit()` methods (or `Txn.Discard()` in case of
read-only transactions). These helper methods will start the transaction,
execute a function, and then safely discard your transaction if an error is
returned. This is the recommended way to use Badger transactions.
However, sometimes you may want to manually create and commit your
transactions. You can use the `DB.NewTransaction()` function directly, which
takes in a boolean argument to specify whether a read-write transaction is
required. For read-write transactions, it is necessary to call `Txn.Commit()`
to ensure the transaction is committed. For read-only transactions, calling
`Txn.Discard()` is sufficient. `Txn.Commit()` also calls `Txn.Discard()`
internally to cleanup the transaction, so just calling `Txn.Commit()` is
sufficient for read-write transaction. However, if your code doesnt call
`Txn.Commit()` for some reason (for e.g it returns prematurely with an error),
then please make sure you call `Txn.Discard()` in a `defer` block. Refer to the
code below.
```go
// Start a writable transaction.
txn := db.NewTransaction(true)
defer txn.Discard()
// Use the transaction...
err := txn.Set([]byte("answer"), []byte("42"))
if err != nil {
return err
}
// Commit the transaction and check for error.
if err := txn.Commit(); err != nil {
return err
}
```
The first argument to `DB.NewTransaction()` is a boolean stating if the transaction
should be writable.
Badger allows an optional callback to the `Txn.Commit()` method. Normally, the
callback can be set to `nil`, and the method will return after all the writes
have succeeded. However, if this callback is provided, the `Txn.Commit()`
method returns as soon as it has checked for any conflicts. The actual writing
to the disk happens asynchronously, and the callback is invoked once the
writing has finished, or an error has occurred. This can improve the throughput
of the application in some cases. But it also means that a transaction is not
durable until the callback has been invoked with a `nil` error value.
### Using key/value pairs
To save a key/value pair, use the `Txn.Set()` method:
```go
err := db.Update(func(txn *badger.Txn) error {
err := txn.Set([]byte("answer"), []byte("42"))
return err
})
```
This will set the value of the `"answer"` key to `"42"`. To retrieve this
value, we can use the `Txn.Get()` method:
```go
err := db.View(func(txn *badger.Txn) error {
item, err := txn.Get([]byte("answer"))
handle(err)
var valNot, valCopy []byte
err := item.Value(func(val []byte) error {
// This func with val would only be called if item.Value encounters no error.
// Accessing val here is valid.
fmt.Printf("The answer is: %s\n", val)
// Copying or parsing val is valid.
valCopy = append([]byte{}, val...)
// Assigning val slice to another variable is NOT OK.
valNot = val // Do not do this.
return nil
})
handle(err)
// DO NOT access val here. It is the most common cause of bugs.
fmt.Printf("NEVER do this. %s\n", valNot)
// You must copy it to use it outside item.Value(...).
fmt.Printf("The answer is: %s\n", valCopy)
// Alternatively, you could also use item.ValueCopy().
valCopy, err = item.ValueCopy(nil)
handle(err)
fmt.Printf("The answer is: %s\n", valCopy)
return nil
})
```
`Txn.Get()` returns `ErrKeyNotFound` if the value is not found.
Please note that values returned from `Get()` are only valid while the
transaction is open. If you need to use a value outside of the transaction
then you must use `copy()` to copy it to another byte slice.
Use the `Txn.Delete()` method to delete a key.
### Monotonically increasing integers
To get unique monotonically increasing integers with strong durability, you can
use the `DB.GetSequence` method. This method returns a `Sequence` object, which
is thread-safe and can be used concurrently via various goroutines.
Badger would lease a range of integers to hand out from memory, with the
bandwidth provided to `DB.GetSequence`. The frequency at which disk writes are
done is determined by this lease bandwidth and the frequency of `Next`
invocations. Setting a bandwith too low would do more disk writes, setting it
too high would result in wasted integers if Badger is closed or crashes.
To avoid wasted integers, call `Release` before closing Badger.
```go
seq, err := db.GetSequence(key, 1000)
defer seq.Release()
for {
num, err := seq.Next()
}
```
### Merge Operations
Badger provides support for unordered merge operations. You can define a func
of type `MergeFunc` which takes in an existing value, and a value to be
_merged_ with it. It returns a new value which is the result of the _merge_
operation. All values are specified in byte arrays. For e.g., here is a merge
function (`add`) which adds a `uint64` value to an existing `uint64` value.
```Go
func uint64ToBytes(i uint64) []byte {
var buf [8]byte
binary.BigEndian.PutUint64(buf[:], i)
return buf[:]
}
func bytesToUint64(b []byte) uint64 {
return binary.BigEndian.Uint64(b)
}
// Merge function to add two uint64 numbers
func add(existing, new []byte) []byte {
return uint64ToBytes(bytesToUint64(existing) + bytesToUint64(new))
}
```
This function can then be passed to the `DB.GetMergeOperator()` method, along
with a key, and a duration value. The duration specifies how often the merge
function is run on values that have been added using the `MergeOperator.Add()`
method.
`MergeOperator.Get()` method can be used to retrieve the cumulative value of the key
associated with the merge operation.
```Go
key := []byte("merge")
m := db.GetMergeOperator(key, add, 200*time.Millisecond)
defer m.Stop()
m.Add(uint64ToBytes(1))
m.Add(uint64ToBytes(2))
m.Add(uint64ToBytes(3))
res, err := m.Get() // res should have value 6 encoded
fmt.Println(bytesToUint64(res))
```
### Setting Time To Live(TTL) and User Metadata on Keys
Badger allows setting an optional Time to Live (TTL) value on keys. Once the TTL has
elapsed, the key will no longer be retrievable and will be eligible for garbage
collection. A TTL can be set as a `time.Duration` value using the `Txn.SetWithTTL()`
API method.
An optional user metadata value can be set on each key. A user metadata value
is represented by a single byte. It can be used to set certain bits along
with the key to aid in interpreting or decoding the key-value pair. User
metadata can be set using the `Txn.SetWithMeta()` API method.
`Txn.SetEntry()` can be used to set the key, value, user metatadata and TTL,
all at once.
### Iterating over keys
To iterate over keys, we can use an `Iterator`, which can be obtained using the
`Txn.NewIterator()` method. Iteration happens in byte-wise lexicographical sorting
order.
```go
err := db.View(func(txn *badger.Txn) error {
opts := badger.DefaultIteratorOptions
opts.PrefetchSize = 10
it := txn.NewIterator(opts)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
k := item.Key()
err := item.Value(func(v []byte) error {
fmt.Printf("key=%s, value=%s\n", k, v)
return nil
})
if err != nil {
return err
}
}
return nil
})
```
The iterator allows you to move to a specific point in the list of keys and move
forward or backward through the keys one at a time.
By default, Badger prefetches the values of the next 100 items. You can adjust
that with the `IteratorOptions.PrefetchSize` field. However, setting it to
a value higher than GOMAXPROCS (which we recommend to be 128 or higher)
shouldnt give any additional benefits. You can also turn off the fetching of
values altogether. See section below on key-only iteration.
#### Prefix scans
To iterate over a key prefix, you can combine `Seek()` and `ValidForPrefix()`:
```go
db.View(func(txn *badger.Txn) error {
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
prefix := []byte("1234")
for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
item := it.Item()
k := item.Key()
err := item.Value(func(v []byte) error {
fmt.Printf("key=%s, value=%s\n", k, v)
return nil
})
if err != nil {
return err
}
}
return nil
})
```
#### Key-only iteration
Badger supports a unique mode of iteration called _key-only_ iteration. It is
several order of magnitudes faster than regular iteration, because it involves
access to the LSM-tree only, which is usually resident entirely in RAM. To
enable key-only iteration, you need to set the `IteratorOptions.PrefetchValues`
field to `false`. This can also be used to do sparse reads for selected keys
during an iteration, by calling `item.Value()` only when required.
```go
err := db.View(func(txn *badger.Txn) error {
opts := badger.DefaultIteratorOptions
opts.PrefetchValues = false
it := txn.NewIterator(opts)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
k := item.Key()
fmt.Printf("key=%s\n", k)
}
return nil
})
```
### Stream
Badger provides a Stream framework, which concurrently iterates over all or a
portion of the DB, converting data into custom key-values, and streams it out
serially to be sent over network, written to disk, or even written back to
Badger. This is a lot faster way to iterate over Badger than using a single
Iterator. Stream supports Badger in both managed and normal mode.
Stream uses the natural boundaries created by SSTables within the LSM tree, to
quickly generate key ranges. Each goroutine then picks a range and runs an
iterator to iterate over it. Each iterator iterates over all versions of values
and is created from the same transaction, thus working over a snapshot of the
DB. Every time a new key is encountered, it calls `ChooseKey(item)`, followed
by `KeyToList(key, itr)`. This allows a user to select or reject that key, and
if selected, convert the value versions into custom key-values. The goroutine
batches up 4MB worth of key-values, before sending it over to a channel.
Another goroutine further batches up data from this channel using *smart
batching* algorithm and calls `Send` serially.
This framework is designed for high throughput key-value iteration, spreading
the work of iteration across many goroutines. `DB.Backup` uses this framework to
provide full and incremental backups quickly. Dgraph is a heavy user of this
framework. In fact, this framework was developed and used within Dgraph, before
getting ported over to Badger.
```go
stream := db.NewStream()
// db.NewStreamAt(readTs) for managed mode.
// -- Optional settings
stream.NumGo = 16 // Set number of goroutines to use for iteration.
stream.Prefix = []byte("some-prefix") // Leave nil for iteration over the whole DB.
stream.LogPrefix = "Badger.Streaming" // For identifying stream logs. Outputs to Logger.
// ChooseKey is called concurrently for every key. If left nil, assumes true by default.
stream.ChooseKey = func(item *badger.Item) bool {
return bytes.HasSuffix(item.Key(), []byte("er"))
}
// KeyToList is called concurrently for chosen keys. This can be used to convert
// Badger data into custom key-values. If nil, uses stream.ToList, a default
// implementation, which picks all valid key-values.
stream.KeyToList = nil
// -- End of optional settings.
// Send is called serially, while Stream.Orchestrate is running.
stream.Send = func(list *pb.KVList) error {
return proto.MarshalText(w, list) // Write to w.
}
// Run the stream
if err := stream.Orchestrate(context.Background()); err != nil {
return err
}
// Done.
```
### Garbage Collection
Badger values need to be garbage collected, because of two reasons:
* Badger keeps values separately from the LSM tree. This means that the compaction operations
that clean up the LSM tree do not touch the values at all. Values need to be cleaned up
separately.
* Concurrent read/write transactions could leave behind multiple values for a single key, because they
are stored with different versions. These could accumulate, and take up unneeded space beyond the
time these older versions are needed.
Badger relies on the client to perform garbage collection at a time of their choosing. It provides
the following method, which can be invoked at an appropriate time:
* `DB.RunValueLogGC()`: This method is designed to do garbage collection while
Badger is online. Along with randomly picking a file, it uses statistics generated by the
LSM-tree compactions to pick files that are likely to lead to maximum space
reclamation. It is recommended to be called during periods of low activity in
your system, or periodically. One call would only result in removal of at max
one log file. As an optimization, you could also immediately re-run it whenever
it returns nil error (indicating a successful value log GC), as shown below.
```go
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for range ticker.C {
again:
err := db.RunValueLogGC(0.7)
if err == nil {
goto again
}
}
```
* `DB.PurgeOlderVersions()`: This method is **DEPRECATED** since v1.5.0. Now, Badger's LSM tree automatically discards older/invalid versions of keys.
**Note: The RunValueLogGC method would not garbage collect the latest value log.**
### Database backup
There are two public API methods `DB.Backup()` and `DB.Load()` which can be
used to do online backups and restores. Badger v0.9 provides a CLI tool
`badger`, which can do offline backup/restore. Make sure you have `$GOPATH/bin`
in your PATH to use this tool.
The command below will create a version-agnostic backup of the database, to a
file `badger.bak` in the current working directory
```
badger backup --dir <path/to/badgerdb>
```
To restore `badger.bak` in the current working directory to a new database:
```
badger restore --dir <path/to/badgerdb>
```
See `badger --help` for more details.
If you have a Badger database that was created using v0.8 (or below), you can
use the `badger_backup` tool provided in v0.8.1, and then restore it using the
command above to upgrade your database to work with the latest version.
```
badger_backup --dir <path/to/badgerdb> --backup-file badger.bak
```
We recommend all users to use the `Backup` and `Restore` APIs and tools. However,
Badger is also rsync-friendly because all files are immutable, barring the
latest value log which is append-only. So, rsync can be used as rudimentary way
to perform a backup. In the following script, we repeat rsync to ensure that the
LSM tree remains consistent with the MANIFEST file while doing a full backup.
```
#!/bin/bash
set -o history
set -o histexpand
# Makes a complete copy of a Badger database directory.
# Repeat rsync if the MANIFEST and SSTables are updated.
rsync -avz --delete db/ dst
while !! | grep -q "(MANIFEST\|\.sst)$"; do :; done
```
### Memory usage
Badger's memory usage can be managed by tweaking several options available in
the `Options` struct that is passed in when opening the database using
`DB.Open`.
- `Options.ValueLogLoadingMode` can be set to `options.FileIO` (instead of the
default `options.MemoryMap`) to avoid memory-mapping log files. This can be
useful in environments with low RAM.
- Number of memtables (`Options.NumMemtables`)
- If you modify `Options.NumMemtables`, also adjust `Options.NumLevelZeroTables` and
`Options.NumLevelZeroTablesStall` accordingly.
- Number of concurrent compactions (`Options.NumCompactors`)
- Mode in which LSM tree is loaded (`Options.TableLoadingMode`)
- Size of table (`Options.MaxTableSize`)
- Size of value log file (`Options.ValueLogFileSize`)
If you want to decrease the memory usage of Badger instance, tweak these
options (ideally one at a time) until you achieve the desired
memory usage.
### Statistics
Badger records metrics using the [expvar] package, which is included in the Go
standard library. All the metrics are documented in [y/metrics.go][metrics]
file.
`expvar` package adds a handler in to the default HTTP server (which has to be
started explicitly), and serves up the metrics at the `/debug/vars` endpoint.
These metrics can then be collected by a system like [Prometheus], to get
better visibility into what Badger is doing.
[expvar]: https://golang.org/pkg/expvar/
[metrics]: https://github.com/dgraph-io/badger/blob/master/y/metrics.go
[Prometheus]: https://prometheus.io/
## Resources
### Blog Posts
1. [Introducing Badger: A fast key-value store written natively in
Go](https://open.dgraph.io/post/badger/)
2. [Make Badger crash resilient with ALICE](https://blog.dgraph.io/post/alice/)
3. [Badger vs LMDB vs BoltDB: Benchmarking key-value databases in Go](https://blog.dgraph.io/post/badger-lmdb-boltdb/)
4. [Concurrent ACID Transactions in Badger](https://blog.dgraph.io/post/badger-txn/)
## Design
Badger was written with these design goals in mind:
- Write a key-value database in pure Go.
- Use latest research to build the fastest KV database for data sets spanning terabytes.
- Optimize for SSDs.
Badgers design is based on a paper titled _[WiscKey: Separating Keys from
Values in SSD-conscious Storage][wisckey]_.
[wisckey]: https://www.usenix.org/system/files/conference/fast16/fast16-papers-lu.pdf
### Comparisons
| Feature | Badger | RocksDB | BoltDB |
| ------- | ------ | ------- | ------ |
| Design | LSM tree with value log | LSM tree only | B+ tree |
| High Read throughput | Yes | No | Yes |
| High Write throughput | Yes | Yes | No |
| Designed for SSDs | Yes (with latest research <sup>1</sup>) | Not specifically <sup>2</sup> | No |
| Embeddable | Yes | Yes | Yes |
| Sorted KV access | Yes | Yes | Yes |
| Pure Go (no Cgo) | Yes | No | Yes |
| Transactions | Yes, ACID, concurrent with SSI<sup>3</sup> | Yes (but non-ACID) | Yes, ACID |
| Snapshots | Yes | Yes | Yes |
| TTL support | Yes | Yes | No |
| 3D access (key-value-version) | Yes<sup>4</sup> | No | No |
<sup>1</sup> The [WISCKEY paper][wisckey] (on which Badger is based) saw big
wins with separating values from keys, significantly reducing the write
amplification compared to a typical LSM tree.
<sup>2</sup> RocksDB is an SSD optimized version of LevelDB, which was designed specifically for rotating disks.
As such RocksDB's design isn't aimed at SSDs.
<sup>3</sup> SSI: Serializable Snapshot Isolation. For more details, see the blog post [Concurrent ACID Transactions in Badger](https://blog.dgraph.io/post/badger-txn/)
<sup>4</sup> Badger provides direct access to value versions via its Iterator API.
Users can also specify how many versions to keep per key via Options.
### Benchmarks
We have run comprehensive benchmarks against RocksDB, Bolt and LMDB. The
benchmarking code, and the detailed logs for the benchmarks can be found in the
[badger-bench] repo. More explanation, including graphs can be found the blog posts (linked
above).
[badger-bench]: https://github.com/dgraph-io/badger-bench
## Other Projects Using Badger
Below is a list of known projects that use Badger:
* [0-stor](https://github.com/zero-os/0-stor) - Single device object store.
* [Dgraph](https://github.com/dgraph-io/dgraph) - Distributed graph database.
* [Dispatch Protocol](https://github.com/dispatchlabs/disgo) - Blockchain protocol for distributed application data analytics.
* [Sandglass](https://github.com/celrenheit/sandglass) - distributed, horizontally scalable, persistent, time sorted message queue.
* [Usenet Express](https://usenetexpress.com/) - Serving over 300TB of data with Badger.
* [go-ipfs](https://github.com/ipfs/go-ipfs) - Go client for the InterPlanetary File System (IPFS), a new hypermedia distribution protocol.
* [gorush](https://github.com/appleboy/gorush) - A push notification server written in Go.
* [emitter](https://github.com/emitter-io/emitter) - Scalable, low latency, distributed pub/sub broker with message storage, uses MQTT, gossip and badger.
* [GarageMQ](https://github.com/valinurovam/garagemq) - AMQP server written in Go.
* [RedixDB](https://alash3al.github.io/redix/) - A real-time persistent key-value store with the same redis protocol.
* [BBVA](https://github.com/BBVA/raft-badger) - Raft backend implementation using BadgerDB for Hashicorp raft.
* [Riot](https://github.com/go-ego/riot) - An open-source, distributed search engine.
* [Fantom](https://github.com/Fantom-foundation/go-lachesis) - aBFT Consensus platform for distributed applications.
* [decred](https://github.com/decred/dcrdata) - An open, progressive, and self-funding cryptocurrency with a system of community-based governance integrated into its blockchain.
* [OpenNetSys](https://github.com/opennetsys/c3-go) - Create useful dApps in any software language.
* [HoneyTrap](https://github.com/honeytrap/honeytrap) - An extensible and opensource system for running, monitoring and managing honeypots.
* [Insolar](https://github.com/insolar/insolar) - Enterprise-ready blockchain platform.
* [IoTeX](https://github.com/iotexproject/iotex-core) - The next generation of the decentralized network for IoT powered by scalability- and privacy-centric blockchains.
* [go-sessions](https://github.com/kataras/go-sessions) - The sessions manager for Go net/http and fasthttp.
* [Babble](https://github.com/mosaicnetworks/babble) - BFT Consensus platform for distributed applications.
* [Tormenta](https://github.com/jpincas/tormenta) - Embedded object-persistence layer / simple JSON database for Go projects.
* [BadgerHold](https://github.com/timshannon/badgerhold) - An embeddable NoSQL store for querying Go types built on Badger
If you are using Badger in a project please send a pull request to add it to the list.
## Frequently Asked Questions
- **My writes are getting stuck. Why?**
**Update: With the new `Value(func(v []byte))` API, this deadlock can no longer
happen.**
The following is true for users on Badger v1.x.
This can happen if a long running iteration with `Prefetch` is set to false, but
a `Item::Value` call is made internally in the loop. That causes Badger to
acquire read locks over the value log files to avoid value log GC removing the
file from underneath. As a side effect, this also blocks a new value log GC
file from being created, when the value log file boundary is hit.
Please see Github issues [#293](https://github.com/dgraph-io/badger/issues/293)
and [#315](https://github.com/dgraph-io/badger/issues/315).
There are multiple workarounds during iteration:
1. Use `Item::ValueCopy` instead of `Item::Value` when retrieving value.
1. Set `Prefetch` to true. Badger would then copy over the value and release the
file lock immediately.
1. When `Prefetch` is false, don't call `Item::Value` and do a pure key-only
iteration. This might be useful if you just want to delete a lot of keys.
1. Do the writes in a separate transaction after the reads.
- **My writes are really slow. Why?**
Are you creating a new transaction for every single key update, and waiting for
it to `Commit` fully before creating a new one? This will lead to very low
throughput.
We have created `WriteBatch` API which provides a way to batch up
many updates into a single transaction and `Commit` that transaction using
callbacks to avoid blocking. This amortizes the cost of a transaction really
well, and provides the most efficient way to do bulk writes.
```go
wb := db.NewWriteBatch()
defer wb.Cancel()
for i := 0; i < N; i++ {
err := wb.Set(key(i), value(i), 0) // Will create txns as needed.
handle(err)
}
handle(wb.Flush()) // Wait for all txns to finish.
```
Note that `WriteBatch` API does not allow any reads. For read-modify-write
workloads, you should be using the `Transaction` API.
- **I don't see any disk write. Why?**
If you're using Badger with `SyncWrites=false`, then your writes might not be written to value log
and won't get synced to disk immediately. Writes to LSM tree are done inmemory first, before they
get compacted to disk. The compaction would only happen once `MaxTableSize` has been reached. So, if
you're doing a few writes and then checking, you might not see anything on disk. Once you `Close`
the database, you'll see these writes on disk.
- **Reverse iteration doesn't give me the right results.**
Just like forward iteration goes to the first key which is equal or greater than the SEEK key, reverse iteration goes to the first key which is equal or lesser than the SEEK key. Therefore, SEEK key would not be part of the results. You can typically add a `0xff` byte as a suffix to the SEEK key to include it in the results. See the following issues: [#436](https://github.com/dgraph-io/badger/issues/436) and [#347](https://github.com/dgraph-io/badger/issues/347).
- **Which instances should I use for Badger?**
We recommend using instances which provide local SSD storage, without any limit
on the maximum IOPS. In AWS, these are storage optimized instances like i3. They
provide local SSDs which clock 100K IOPS over 4KB blocks easily.
- **I'm getting a closed channel error. Why?**
```
panic: close of closed channel
panic: send on closed channel
```
If you're seeing panics like above, this would be because you're operating on a closed DB. This can happen, if you call `Close()` before sending a write, or multiple times. You should ensure that you only call `Close()` once, and all your read/write operations finish before closing.
- **Are there any Go specific settings that I should use?**
We *highly* recommend setting a high number for GOMAXPROCS, which allows Go to
observe the full IOPS throughput provided by modern SSDs. In Dgraph, we have set
it to 128. For more details, [see this
thread](https://groups.google.com/d/topic/golang-nuts/jPb_h3TvlKE/discussion).
- **Are there any linux specific settings that I should use?**
We recommend setting max file descriptors to a high number depending upon the expected size of you data.
## Contact
- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, feature requests and discussions.
- Please use [Github issue tracker](https://github.com/dgraph-io/badger/issues) for filing bugs or feature requests.
- Join [![Slack Status](http://slack.dgraph.io/badge.svg)](http://slack.dgraph.io).
- Follow us on Twitter [@dgraphlabs](https://twitter.com/dgraphlabs).

48
vendor/github.com/dgraph-io/badger/appveyor.yml generated vendored Normal file
View File

@ -0,0 +1,48 @@
# version format
version: "{build}"
# Operating system (build VM template)
os: Windows Server 2012 R2
# Platform.
platform: x64
clone_folder: c:\gopath\src\github.com\dgraph-io\badger
# Environment variables
environment:
GOVERSION: 1.8.3
GOPATH: c:\gopath
# scripts that run after cloning repository
install:
- set PATH=%GOPATH%\bin;c:\go\bin;%PATH%
- go version
- go env
- python --version
# To run your custom scripts instead of automatic MSBuild
build_script:
# We need to disable firewall - https://github.com/appveyor/ci/issues/1579#issuecomment-309830648
- ps: Disable-NetFirewallRule -DisplayName 'File and Printer Sharing (SMB-Out)'
- cd c:\gopath\src\github.com\dgraph-io\badger
- git branch
- go get -t ./...
# To run your custom scripts instead of automatic tests
test_script:
# Unit tests
- ps: Add-AppveyorTest "Unit Tests" -Outcome Running
- go test -v github.com/dgraph-io/badger/...
- go test -v -vlog_mmap=false github.com/dgraph-io/badger/...
- ps: Update-AppveyorTest "Unit Tests" -Outcome Passed
notifications:
- provider: Email
to:
- pawan@dgraph.io
on_build_failure: true
on_build_status_changed: true
# to disable deployment
deploy: off

226
vendor/github.com/dgraph-io/badger/backup.go generated vendored Normal file
View File

@ -0,0 +1,226 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bufio"
"bytes"
"context"
"encoding/binary"
"io"
"sync"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/pb"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
// Backup is a wrapper function over Stream.Backup to generate full and incremental backups of the
// DB. For more control over how many goroutines are used to generate the backup, or if you wish to
// backup only a certain range of keys, use Stream.Backup directly.
func (db *DB) Backup(w io.Writer, since uint64) (uint64, error) {
stream := db.NewStream()
stream.LogPrefix = "DB.Backup"
return stream.Backup(w, since)
}
// Backup dumps a protobuf-encoded list of all entries in the database into the
// given writer, that are newer than the specified version. It returns a
// timestamp indicating when the entries were dumped which can be passed into a
// later invocation to generate an incremental dump, of entries that have been
// added/modified since the last invocation of Stream.Backup().
//
// This can be used to backup the data in a database at a given point in time.
func (stream *Stream) Backup(w io.Writer, since uint64) (uint64, error) {
stream.KeyToList = func(key []byte, itr *Iterator) (*pb.KVList, error) {
list := &pb.KVList{}
for ; itr.Valid(); itr.Next() {
item := itr.Item()
if !bytes.Equal(item.Key(), key) {
return list, nil
}
if item.Version() < since {
// Ignore versions less than given timestamp, or skip older
// versions of the given key.
return list, nil
}
var valCopy []byte
if !item.IsDeletedOrExpired() {
// No need to copy value, if item is deleted or expired.
var err error
valCopy, err = item.ValueCopy(nil)
if err != nil {
stream.db.opt.Errorf("Key [%x, %d]. Error while fetching value [%v]\n",
item.Key(), item.Version(), err)
return nil, err
}
}
// clear txn bits
meta := item.meta &^ (bitTxn | bitFinTxn)
kv := &pb.KV{
Key: item.KeyCopy(nil),
Value: valCopy,
UserMeta: []byte{item.UserMeta()},
Version: item.Version(),
ExpiresAt: item.ExpiresAt(),
Meta: []byte{meta},
}
list.Kv = append(list.Kv, kv)
switch {
case item.DiscardEarlierVersions():
// If we need to discard earlier versions of this item, add a delete
// marker just below the current version.
list.Kv = append(list.Kv, &pb.KV{
Key: item.KeyCopy(nil),
Version: item.Version() - 1,
Meta: []byte{bitDelete},
})
return list, nil
case item.IsDeletedOrExpired():
return list, nil
}
}
return list, nil
}
var maxVersion uint64
stream.Send = func(list *pb.KVList) error {
for _, kv := range list.Kv {
if maxVersion < kv.Version {
maxVersion = kv.Version
}
if err := writeTo(kv, w); err != nil {
return err
}
}
return nil
}
if err := stream.Orchestrate(context.Background()); err != nil {
return 0, err
}
return maxVersion, nil
}
func writeTo(entry *pb.KV, w io.Writer) error {
if err := binary.Write(w, binary.LittleEndian, uint64(entry.Size())); err != nil {
return err
}
buf, err := entry.Marshal()
if err != nil {
return err
}
_, err = w.Write(buf)
return err
}
// Load reads a protobuf-encoded list of all entries from a reader and writes
// them to the database. This can be used to restore the database from a backup
// made by calling DB.Backup().
//
// DB.Load() should be called on a database that is not running any other
// concurrent transactions while it is running.
func (db *DB) Load(r io.Reader) error {
br := bufio.NewReaderSize(r, 16<<10)
unmarshalBuf := make([]byte, 1<<10)
var entries []*Entry
var wg sync.WaitGroup
errChan := make(chan error, 1)
// func to check for pending error before sending off a batch for writing
batchSetAsyncIfNoErr := func(entries []*Entry) error {
select {
case err := <-errChan:
return err
default:
wg.Add(1)
return db.batchSetAsync(entries, func(err error) {
defer wg.Done()
if err != nil {
select {
case errChan <- err:
default:
}
}
})
}
}
for {
var sz uint64
err := binary.Read(br, binary.LittleEndian, &sz)
if err == io.EOF {
break
} else if err != nil {
return err
}
if cap(unmarshalBuf) < int(sz) {
unmarshalBuf = make([]byte, sz)
}
e := &pb.KV{}
if _, err = io.ReadFull(br, unmarshalBuf[:sz]); err != nil {
return err
}
if err = e.Unmarshal(unmarshalBuf[:sz]); err != nil {
return err
}
var userMeta byte
if len(e.UserMeta) > 0 {
userMeta = e.UserMeta[0]
}
entries = append(entries, &Entry{
Key: y.KeyWithTs(e.Key, e.Version),
Value: e.Value,
UserMeta: userMeta,
ExpiresAt: e.ExpiresAt,
meta: e.Meta[0],
})
// Update nextTxnTs, memtable stores this timestamp in badger head
// when flushed.
if e.Version >= db.orc.nextTxnTs {
db.orc.nextTxnTs = e.Version + 1
}
if len(entries) == 1000 {
if err := batchSetAsyncIfNoErr(entries); err != nil {
return err
}
entries = make([]*Entry, 0, 1000)
}
}
if len(entries) > 0 {
if err := batchSetAsyncIfNoErr(entries); err != nil {
return err
}
}
wg.Wait()
select {
case err := <-errChan:
return err
default:
// Mark all versions done up until nextTxnTs.
db.orc.txnMark.Done(db.orc.nextTxnTs - 1)
return nil
}
}

519
vendor/github.com/dgraph-io/badger/backup_test.go generated vendored Normal file
View File

@ -0,0 +1,519 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"reflect"
"strconv"
"testing"
"time"
"github.com/stretchr/testify/require"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/pb"
)
func TestBackupRestore1(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
db, err := Open(getTestOptions(dir))
require.NoError(t, err)
// Write some stuff
entries := []struct {
key []byte
val []byte
userMeta byte
version uint64
}{
{key: []byte("answer1"), val: []byte("42"), version: 1},
{key: []byte("answer2"), val: []byte("43"), userMeta: 1, version: 2},
}
err = db.Update(func(txn *Txn) error {
e := entries[0]
err := txn.SetWithMeta(e.key, e.val, e.userMeta)
if err != nil {
return err
}
return nil
})
require.NoError(t, err)
err = db.Update(func(txn *Txn) error {
e := entries[1]
err := txn.SetWithMeta(e.key, e.val, e.userMeta)
if err != nil {
return err
}
return nil
})
require.NoError(t, err)
// Use different directory.
dir, err = ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
bak, err := ioutil.TempFile(dir, "badgerbak")
require.NoError(t, err)
ts, err := db.Backup(bak, 0)
t.Logf("New ts: %d\n", ts)
require.NoError(t, err)
require.NoError(t, bak.Close())
require.NoError(t, db.Close())
db, err = Open(getTestOptions(dir))
require.NoError(t, err)
defer db.Close()
bak, err = os.Open(bak.Name())
require.NoError(t, err)
defer bak.Close()
require.NoError(t, db.Load(bak))
err = db.View(func(txn *Txn) error {
opts := DefaultIteratorOptions
opts.AllVersions = true
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
val, err := item.ValueCopy(nil)
if err != nil {
return err
}
require.Equal(t, entries[count].key, item.Key())
require.Equal(t, entries[count].val, val)
require.Equal(t, entries[count].version, item.Version())
require.Equal(t, entries[count].userMeta, item.UserMeta())
count++
}
require.Equal(t, count, 2)
return nil
})
require.NoError(t, err)
}
func TestBackupRestore2(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "badger-test")
if err != nil {
t.Fatal(err)
}
defer func() {
os.RemoveAll(tmpdir)
}()
s1Path := filepath.Join(tmpdir, "test1")
s2Path := filepath.Join(tmpdir, "test2")
s3Path := filepath.Join(tmpdir, "test3")
opts := DefaultOptions
opts.Dir = s1Path
opts.ValueDir = s1Path
db1, err := Open(opts)
if err != nil {
t.Fatal(err)
}
key1 := []byte("key1")
key2 := []byte("key2")
rawValue := []byte("NotLongValue")
N := byte(251)
err = db1.Update(func(tx *Txn) error {
if err := tx.Set(key1, rawValue); err != nil {
return err
}
return tx.Set(key2, rawValue)
})
if err != nil {
t.Fatal(err)
}
for i := byte(1); i < N; i++ {
err = db1.Update(func(tx *Txn) error {
if err := tx.Set(append(key1, i), rawValue); err != nil {
return err
}
return tx.Set(append(key2, i), rawValue)
})
if err != nil {
t.Fatal(err)
}
}
var backup bytes.Buffer
_, err = db1.Backup(&backup, 0)
if err != nil {
t.Fatal(err)
}
fmt.Println("backup1 length:", backup.Len())
opts = DefaultOptions
opts.Dir = s2Path
opts.ValueDir = s2Path
db2, err := Open(opts)
if err != nil {
t.Fatal(err)
}
err = db2.Load(&backup)
if err != nil {
t.Fatal(err)
}
for i := byte(1); i < N; i++ {
err = db2.View(func(tx *Txn) error {
k := append(key1, i)
item, err := tx.Get(k)
if err != nil {
if err == ErrKeyNotFound {
return fmt.Errorf("Key %q has been not found, but was set\n", k)
}
return err
}
v, err := item.ValueCopy(nil)
if err != nil {
return err
}
if !reflect.DeepEqual(v, rawValue) {
return fmt.Errorf("Values not match, got %v, expected %v", v, rawValue)
}
return nil
})
if err != nil {
t.Fatal(err)
}
}
for i := byte(1); i < N; i++ {
err = db2.Update(func(tx *Txn) error {
if err := tx.Set(append(key1, i), rawValue); err != nil {
return err
}
return tx.Set(append(key2, i), rawValue)
})
if err != nil {
t.Fatal(err)
}
}
backup.Reset()
_, err = db2.Backup(&backup, 0)
if err != nil {
t.Fatal(err)
}
fmt.Println("backup2 length:", backup.Len())
opts = DefaultOptions
opts.Dir = s3Path
opts.ValueDir = s3Path
db3, err := Open(opts)
if err != nil {
t.Fatal(err)
}
err = db3.Load(&backup)
if err != nil {
t.Fatal(err)
}
for i := byte(1); i < N; i++ {
err = db3.View(func(tx *Txn) error {
k := append(key1, i)
item, err := tx.Get(k)
if err != nil {
if err == ErrKeyNotFound {
return fmt.Errorf("Key %q has been not found, but was set\n", k)
}
return err
}
v, err := item.ValueCopy(nil)
if err != nil {
return err
}
if !reflect.DeepEqual(v, rawValue) {
return fmt.Errorf("Values not match, got %v, expected %v", v, rawValue)
}
return nil
})
if err != nil {
t.Fatal(err)
}
}
}
var randSrc = rand.NewSource(time.Now().UnixNano())
func createEntries(n int) []*pb.KV {
entries := make([]*pb.KV, n)
for i := 0; i < n; i++ {
entries[i] = &pb.KV{
Key: []byte(fmt.Sprint("key", i)),
Value: []byte{1},
UserMeta: []byte{0},
Meta: []byte{0},
}
}
return entries
}
func populateEntries(db *DB, entries []*pb.KV) error {
return db.Update(func(txn *Txn) error {
var err error
for i, e := range entries {
if err = txn.Set(e.Key, e.Value); err != nil {
return err
}
entries[i].Version = 1
}
return nil
})
}
func TestBackup(t *testing.T) {
var bb bytes.Buffer
tmpdir, err := ioutil.TempDir("", "badger-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tmpdir)
opts := DefaultOptions
opts.Dir = filepath.Join(tmpdir, "backup0")
opts.ValueDir = opts.Dir
db1, err := Open(opts)
if err != nil {
t.Fatal(err)
}
N := 1000
entries := createEntries(N)
require.NoError(t, populateEntries(db1, entries))
_, err = db1.Backup(&bb, 0)
require.NoError(t, err)
err = db1.View(func(txn *Txn) error {
opts := DefaultIteratorOptions
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
idx, err := strconv.Atoi(string(item.Key())[3:])
if err != nil {
return err
}
if idx > N || !bytes.Equal(entries[idx].Key, item.Key()) {
return fmt.Errorf("%s: %s", string(item.Key()), ErrKeyNotFound)
}
count++
}
if N != count {
return fmt.Errorf("wrong number of items: %d expected, %d actual", N, count)
}
return nil
})
require.NoError(t, err)
}
func TestBackupRestore3(t *testing.T) {
var bb bytes.Buffer
tmpdir, err := ioutil.TempDir("", "badger-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tmpdir)
opts := DefaultOptions
N := 1000
entries := createEntries(N)
// backup
{
opts.Dir = filepath.Join(tmpdir, "backup1")
opts.ValueDir = opts.Dir
db1, err := Open(opts)
if err != nil {
t.Fatal(err)
}
require.NoError(t, populateEntries(db1, entries))
_, err = db1.Backup(&bb, 0)
require.NoError(t, err)
require.NoError(t, db1.Close())
}
require.True(t, len(entries) == N)
require.True(t, bb.Len() > 0)
// restore
opts.Dir = filepath.Join(tmpdir, "restore1")
opts.ValueDir = opts.Dir
db2, err := Open(opts)
if err != nil {
t.Fatal(err)
}
require.NoError(t, db2.Load(&bb))
// verify
err = db2.View(func(txn *Txn) error {
opts := DefaultIteratorOptions
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
idx, err := strconv.Atoi(string(item.Key())[3:])
if err != nil {
return err
}
if idx > N || !bytes.Equal(entries[idx].Key, item.Key()) {
return fmt.Errorf("%s: %s", string(item.Key()), ErrKeyNotFound)
}
count++
}
if N != count {
return fmt.Errorf("wrong number of items: %d expected, %d actual", N, count)
}
return nil
})
require.NoError(t, err)
}
func TestBackupLoadIncremental(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "badger-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tmpdir)
opts := DefaultOptions
N := 100
entries := createEntries(N)
updates := make(map[int]byte)
var bb bytes.Buffer
// backup
{
opts.Dir = filepath.Join(tmpdir, "backup2")
opts.ValueDir = opts.Dir
db1, err := Open(opts)
if err != nil {
t.Fatal(err)
}
require.NoError(t, populateEntries(db1, entries))
since, err := db1.Backup(&bb, 0)
require.NoError(t, err)
ints := rand.New(randSrc).Perm(N)
// pick 10 items to mark as deleted.
err = db1.Update(func(txn *Txn) error {
for _, i := range ints[:10] {
if err := txn.Delete(entries[i].Key); err != nil {
return err
}
updates[i] = bitDelete
}
return nil
})
require.NoError(t, err)
since, err = db1.Backup(&bb, since)
require.NoError(t, err)
// pick 5 items to mark as expired.
err = db1.Update(func(txn *Txn) error {
for _, i := range (ints)[10:15] {
if err := txn.SetWithTTL(
entries[i].Key, entries[i].Value, -time.Hour); err != nil {
return err
}
updates[i] = bitDelete // expired
}
return nil
})
require.NoError(t, err)
since, err = db1.Backup(&bb, since)
require.NoError(t, err)
// pick 5 items to mark as discard.
err = db1.Update(func(txn *Txn) error {
for _, i := range ints[15:20] {
if err := txn.SetWithDiscard(entries[i].Key, entries[i].Value, 0); err != nil {
return err
}
updates[i] = bitDiscardEarlierVersions
}
return nil
})
require.NoError(t, err)
_, err = db1.Backup(&bb, since)
require.NoError(t, err)
require.NoError(t, db1.Close())
}
require.True(t, len(entries) == N)
require.True(t, bb.Len() > 0)
// restore
opts.Dir = filepath.Join(tmpdir, "restore2")
opts.ValueDir = opts.Dir
db2, err := Open(opts)
if err != nil {
t.Fatal(err)
}
require.NoError(t, db2.Load(&bb))
// verify
actual := make(map[int]byte)
err = db2.View(func(txn *Txn) error {
opts := DefaultIteratorOptions
opts.AllVersions = true
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
idx, err := strconv.Atoi(string(item.Key())[3:])
if err != nil {
return err
}
if item.IsDeletedOrExpired() {
_, ok := updates[idx]
if !ok {
return fmt.Errorf("%s: not expected to be updated but it is",
string(item.Key()))
}
actual[idx] = item.meta
count++
continue
}
}
if len(updates) != count {
return fmt.Errorf("mismatched updated items: %d expected, %d actual",
len(updates), count)
}
return nil
})
require.NoError(t, err, "%v %v", updates, actual)
}

1
vendor/github.com/dgraph-io/badger/badger/.gitignore generated vendored Normal file
View File

@ -0,0 +1 @@
/badger

View File

@ -0,0 +1,72 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"os"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger"
"gx/ipfs/QmXj63M2w2Pq7mnBpcrs7Va8prmfhvfMUNqVhJ9TgjiMbT/cobra"
)
var backupFile string
var truncate bool
// backupCmd represents the backup command
var backupCmd = &cobra.Command{
Use: "backup",
Short: "Backup Badger database.",
Long: `Backup Badger database to a file in a version-agnostic manner.
Iterates over each key-value pair, encodes it along with its metadata and
version in protocol buffers and writes them to a file. This file can later be
used by the restore command to create an identical copy of the
database.`,
RunE: doBackup,
}
func init() {
RootCmd.AddCommand(backupCmd)
backupCmd.Flags().StringVarP(&backupFile, "backup-file", "f",
"badger.bak", "File to backup to")
backupCmd.Flags().BoolVarP(&truncate, "truncate", "t",
false, "Allow value log truncation if required.")
}
func doBackup(cmd *cobra.Command, args []string) error {
// Open DB
opts := badger.DefaultOptions
opts.Dir = sstDir
opts.ValueDir = vlogDir
opts.Truncate = truncate
db, err := badger.Open(opts)
if err != nil {
return err
}
defer db.Close()
// Create File
f, err := os.Create(backupFile)
if err != nil {
return err
}
defer f.Close()
// Run Backup
_, err = db.Backup(f, 0)
return err
}

451
vendor/github.com/dgraph-io/badger/badger/cmd/bank.go generated vendored Normal file
View File

@ -0,0 +1,451 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"errors"
"fmt"
"log"
"math"
"math/rand"
"strconv"
"sync"
"sync/atomic"
"time"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmXj63M2w2Pq7mnBpcrs7Va8prmfhvfMUNqVhJ9TgjiMbT/cobra"
)
var testCmd = &cobra.Command{
Use: "bank",
Short: "Run bank test on Badger.",
Long: `
This command runs bank test on Badger, inspired by Jepsen. It creates many
accounts and moves money among them transactionally. It also reads the sum total
of all the accounts, to ensure that the total never changes.
`,
}
var bankTest = &cobra.Command{
Use: "test",
Short: "Execute bank test on Badger.",
RunE: runTest,
}
var bankDisect = &cobra.Command{
Use: "disect",
Short: "Disect the bank output.",
Long: `
Disect the bank output BadgerDB to find the first transaction which causes
failure of the total invariant.
`,
RunE: runDisect,
}
var numGoroutines, numAccounts, numPrevious int
var duration string
var stopAll int32
var mmap bool
const keyPrefix = "account:"
const initialBal uint64 = 100
func init() {
RootCmd.AddCommand(testCmd)
testCmd.AddCommand(bankTest)
testCmd.AddCommand(bankDisect)
testCmd.Flags().IntVarP(
&numAccounts, "accounts", "a", 10000, "Number of accounts in the bank.")
bankTest.Flags().IntVarP(
&numGoroutines, "conc", "c", 16, "Number of concurrent transactions to run.")
bankTest.Flags().StringVarP(&duration, "duration", "d", "3m", "How long to run the test.")
bankTest.Flags().BoolVarP(&mmap, "mmap", "m", false, "If true, mmap LSM tree. Default is RAM.")
bankDisect.Flags().IntVarP(&numPrevious, "previous", "p", 12,
"Starting from the violation txn, how many previous versions to retrieve.")
}
func key(account int) []byte {
return []byte(fmt.Sprintf("%s%s", keyPrefix, strconv.Itoa(account)))
}
func toAccount(key []byte) int {
i, err := strconv.Atoi(string(key[len(keyPrefix):]))
y.Check(err)
return i
}
func toUint64(val []byte) uint64 {
u, err := strconv.ParseUint(string(val), 10, 64)
y.Check(err)
return uint64(u)
}
func toSlice(bal uint64) []byte {
return []byte(strconv.FormatUint(bal, 10))
}
func getBalance(txn *badger.Txn, account int) (uint64, error) {
item, err := txn.Get(key(account))
if err != nil {
return 0, err
}
var bal uint64
err = item.Value(func(v []byte) error {
bal = toUint64(v)
return nil
})
return bal, err
}
func putBalance(txn *badger.Txn, account int, bal uint64) error {
return txn.Set(key(account), toSlice(bal))
}
func min(a, b uint64) uint64 {
if a < b {
return a
}
return b
}
var errAbandoned = errors.New("Transaction abandonded due to insufficient balance")
func moveMoney(db *badger.DB, from, to int) error {
return db.Update(func(txn *badger.Txn) error {
balf, err := getBalance(txn, from)
if err != nil {
return err
}
balt, err := getBalance(txn, to)
if err != nil {
return err
}
floor := min(balf, balt)
if floor < 5 {
return errAbandoned
}
// Move the money.
balf -= 5
balt += 5
if err = putBalance(txn, from, balf); err != nil {
return err
}
return putBalance(txn, to, balt)
})
}
type account struct {
Id int
Bal uint64
}
func diff(a, b []account) string {
var buf bytes.Buffer
y.AssertTruef(len(a) == len(b), "len(a)=%d. len(b)=%d\n", len(a), len(b))
for i := range a {
ai := a[i]
bi := b[i]
if ai.Id != bi.Id || ai.Bal != bi.Bal {
buf.WriteString(fmt.Sprintf("Index: %d. Account [%+v] -> [%+v]\n", i, ai, bi))
}
}
return buf.String()
}
var errFailure = errors.New("Found an balance mismatch. Test failed.")
// seekTotal retrives the total of all accounts by seeking for each account key.
func seekTotal(txn *badger.Txn) ([]account, error) {
expected := uint64(numAccounts) * uint64(initialBal)
var accounts []account
var total uint64
for i := 0; i < numAccounts; i++ {
item, err := txn.Get(key(i))
if err != nil {
log.Printf("Error for account: %d. err=%v. key=%q\n", i, err, key(i))
return accounts, err
}
val, err := item.ValueCopy(nil)
if err != nil {
return accounts, err
}
acc := account{
Id: i,
Bal: toUint64(val),
}
accounts = append(accounts, acc)
total += acc.Bal
}
if total != expected {
log.Printf("Balance did NOT match up. Expected: %d. Received: %d",
expected, total)
atomic.AddInt32(&stopAll, 1)
return accounts, errFailure
}
return accounts, nil
}
// Range is [lowTs, highTs).
func findFirstInvalidTxn(db *badger.DB, lowTs, highTs uint64) uint64 {
checkAt := func(ts uint64) error {
txn := db.NewTransactionAt(ts, false)
_, err := seekTotal(txn)
txn.Discard()
return err
}
if highTs-lowTs < 1 {
log.Printf("Checking at lowTs: %d\n", lowTs)
err := checkAt(lowTs)
if err == errFailure {
fmt.Printf("Violation at ts: %d\n", lowTs)
return lowTs
} else if err != nil {
log.Printf("Error at lowTs: %d. Err=%v\n", lowTs, err)
return 0
}
fmt.Printf("No violation found at ts: %d\n", lowTs)
return 0
}
midTs := (lowTs + highTs) / 2
log.Println()
log.Printf("Checking. low=%d. high=%d. mid=%d\n", lowTs, highTs, midTs)
err := checkAt(midTs)
if err == badger.ErrKeyNotFound || err == nil {
// If no failure, move to higher ts.
return findFirstInvalidTxn(db, midTs+1, highTs)
}
// Found an error.
return findFirstInvalidTxn(db, lowTs, midTs)
}
func compareTwo(db *badger.DB, before, after uint64) {
fmt.Printf("Comparing @ts=%d with @ts=%d\n", before, after)
txn := db.NewTransactionAt(before, false)
prev, err := seekTotal(txn)
if err == errFailure {
// pass
} else {
y.Check(err)
}
txn.Discard()
txn = db.NewTransactionAt(after, false)
now, err := seekTotal(txn)
if err == errFailure {
// pass
} else {
y.Check(err)
}
txn.Discard()
fmt.Println(diff(prev, now))
}
func runDisect(cmd *cobra.Command, args []string) error {
opts := badger.DefaultOptions
opts.Dir = sstDir
opts.ValueDir = vlogDir
opts.ReadOnly = true
// The total did not match up. So, let's disect the DB to find the
// transction which caused the total mismatch.
db, err := badger.OpenManaged(opts)
if err != nil {
return err
}
fmt.Println("opened db")
var min, max uint64 = math.MaxUint64, 0
{
txn := db.NewTransactionAt(uint64(math.MaxUint32), false)
iopt := badger.DefaultIteratorOptions
iopt.AllVersions = true
itr := txn.NewIterator(iopt)
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
if min > item.Version() {
min = item.Version()
}
if max < item.Version() {
max = item.Version()
}
}
itr.Close()
txn.Discard()
}
log.Printf("min=%d. max=%d\n", min, max)
ts := findFirstInvalidTxn(db, min, max)
fmt.Println()
if ts == 0 {
fmt.Println("Nothing found. Exiting.")
return nil
}
for i := 0; i < numPrevious; i++ {
compareTwo(db, ts-1-uint64(i), ts-uint64(i))
}
return nil
}
func runTest(cmd *cobra.Command, args []string) error {
rand.Seed(time.Now().UnixNano())
// Open DB
opts := badger.DefaultOptions
opts.Dir = sstDir
opts.ValueDir = vlogDir
opts.MaxTableSize = 4 << 20 // Force more compactions.
opts.NumLevelZeroTables = 2
opts.NumMemtables = 2
// Do not GC any versions, because we need them for the disect.
opts.NumVersionsToKeep = int(math.MaxInt32)
opts.ValueThreshold = 1 // Make all values go to value log.
if mmap {
opts.TableLoadingMode = options.MemoryMap
}
log.Printf("Opening DB with options: %+v\n", opts)
db, err := badger.Open(opts)
if err != nil {
return err
}
defer db.Close()
wb := db.NewWriteBatch()
for i := 0; i < numAccounts; i++ {
y.Check(wb.Set(key(i), toSlice(initialBal), 0))
}
log.Println("Waiting for writes to be done...")
y.Check(wb.Flush())
log.Println("Bank initialization OK. Commencing test.")
log.Printf("Running with %d accounts, and %d goroutines.\n", numAccounts, numGoroutines)
log.Printf("Using keyPrefix: %s\n", keyPrefix)
dur, err := time.ParseDuration(duration)
y.Check(err)
// startTs := time.Now()
endTs := time.Now().Add(dur)
var total, errors, reads uint64
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for range ticker.C {
if atomic.LoadInt32(&stopAll) > 0 {
// Do not proceed.
return
}
// log.Printf("[%6s] Total: %d. Errors: %d Reads: %d.\n",
// time.Since(startTs).Round(time.Second).String(),
// atomic.LoadUint64(&total),
// atomic.LoadUint64(&errors),
// atomic.LoadUint64(&reads))
if time.Now().After(endTs) {
return
}
}
}()
// RW goroutines.
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func() {
defer wg.Done()
ticker := time.NewTicker(10 * time.Microsecond)
defer ticker.Stop()
for range ticker.C {
if atomic.LoadInt32(&stopAll) > 0 {
// Do not proceed.
return
}
if time.Now().After(endTs) {
return
}
from := rand.Intn(numAccounts)
to := rand.Intn(numAccounts)
if from == to {
continue
}
err := moveMoney(db, from, to)
atomic.AddUint64(&total, 1)
if err == nil {
log.Printf("Moved $5. %d -> %d\n", from, to)
} else {
atomic.AddUint64(&errors, 1)
}
}
}()
}
// RO goroutine.
wg.Add(1)
go func() {
defer wg.Done()
ticker := time.NewTicker(10 * time.Microsecond)
defer ticker.Stop()
for range ticker.C {
if atomic.LoadInt32(&stopAll) > 0 {
// Do not proceed.
return
}
if time.Now().After(endTs) {
return
}
y.Check(db.View(func(txn *badger.Txn) error {
_, err := seekTotal(txn)
if err != nil {
log.Printf("Error while calculating total: %v", err)
} else {
atomic.AddUint64(&reads, 1)
}
return nil
}))
}
}()
wg.Wait()
if atomic.LoadInt32(&stopAll) == 0 {
log.Println("Test OK")
return nil
}
log.Println("Test FAILED")
return fmt.Errorf("Test FAILED")
}

93
vendor/github.com/dgraph-io/badger/badger/cmd/fill.go generated vendored Normal file
View File

@ -0,0 +1,93 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"crypto/rand"
"time"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmXj63M2w2Pq7mnBpcrs7Va8prmfhvfMUNqVhJ9TgjiMbT/cobra"
)
var fillCmd = &cobra.Command{
Use: "fill",
Short: "Fill Badger with random data.",
Long: `
This command would fill Badger with random data. Useful for testing and performance analysis.
`,
RunE: fill,
}
var keySz, valSz int
var numKeys float64
var force bool
const mil float64 = 1e6
func init() {
RootCmd.AddCommand(fillCmd)
fillCmd.Flags().IntVarP(&keySz, "key-size", "k", 32, "Size of key")
fillCmd.Flags().IntVarP(&valSz, "val-size", "v", 128, "Size of value")
fillCmd.Flags().Float64VarP(&numKeys, "keys-mil", "m", 10.0,
"Number of keys to add in millions")
fillCmd.Flags().BoolVarP(&force, "force-compact", "f", true, "Force compact level 0 on close.")
}
func fill(cmd *cobra.Command, args []string) error {
opts := badger.DefaultOptions
opts.Dir = sstDir
opts.ValueDir = vlogDir
opts.Truncate = truncate
opts.SyncWrites = false
opts.CompactL0OnClose = force
opts.TableLoadingMode = options.FileIO
opts.ValueLogLoadingMode = options.FileIO
db, err := badger.Open(opts)
if err != nil {
return err
}
defer func() {
start := time.Now()
err := db.Close()
opts.Infof("DB.Close. Error: %v. Time taken: %s", err, time.Since(start))
}()
start := time.Now()
batch := db.NewWriteBatch()
num := int64(numKeys * mil)
for i := int64(1); i <= num; i++ {
k := make([]byte, keySz)
v := make([]byte, valSz)
y.Check2(rand.Read(k))
y.Check2(rand.Read(v))
if err := batch.Set(k, v, 0); err != nil {
return err
}
if i%1e5 == 0 {
opts.Infof("Written keys: %d\n", i)
}
}
if err := batch.Flush(); err != nil {
return err
}
opts.Infof("%d keys written. Time taken: %s\n", num, time.Since(start))
return nil
}

View File

@ -0,0 +1,56 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger"
"gx/ipfs/QmXj63M2w2Pq7mnBpcrs7Va8prmfhvfMUNqVhJ9TgjiMbT/cobra"
)
var flattenCmd = &cobra.Command{
Use: "flatten",
Short: "Flatten the LSM tree.",
Long: `
This command would compact all the LSM tables into one level.
`,
RunE: flatten,
}
var numWorkers int
func init() {
RootCmd.AddCommand(flattenCmd)
flattenCmd.Flags().IntVarP(&numWorkers, "num-workers", "w", 1,
"Number of concurrent compactors to run. More compactors would use more"+
" server resources to potentially achieve faster compactions.")
}
func flatten(cmd *cobra.Command, args []string) error {
opts := badger.DefaultOptions
opts.Dir = sstDir
opts.ValueDir = vlogDir
opts.Truncate = truncate
opts.NumCompactors = 0
db, err := badger.Open(opts)
if err != nil {
return err
}
defer db.Close()
return db.Flatten(numWorkers)
}

294
vendor/github.com/dgraph-io/badger/badger/cmd/info.go generated vendored Normal file
View File

@ -0,0 +1,294 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sort"
"strings"
"time"
humanize "gx/ipfs/QmQMxG9D52TirZd9eLA37nxiNspnMRkKbyPWrVAa1gvtSy/go-humanize"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/table"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmXj63M2w2Pq7mnBpcrs7Va8prmfhvfMUNqVhJ9TgjiMbT/cobra"
)
var infoCmd = &cobra.Command{
Use: "info",
Short: "Health info about Badger database.",
Long: `
This command prints information about the badger key-value store. It reads MANIFEST and prints its
info. It also prints info about missing/extra files, and general information about the value log
files (which are not referenced by the manifest). Use this tool to report any issues about Badger
to the Dgraph team.
`,
Run: func(cmd *cobra.Command, args []string) {
err := printInfo(sstDir, vlogDir)
if err != nil {
fmt.Println("Error:", err.Error())
os.Exit(1)
}
if !showTables {
return
}
err = tableInfo(sstDir, vlogDir)
if err != nil {
fmt.Println("Error:", err.Error())
os.Exit(1)
}
},
}
var showTables bool
func init() {
RootCmd.AddCommand(infoCmd)
infoCmd.Flags().BoolVarP(&showTables, "show-tables", "s", false,
"If set to true, show tables as well.")
}
func hbytes(sz int64) string {
return humanize.Bytes(uint64(sz))
}
func dur(src, dst time.Time) string {
return humanize.RelTime(dst, src, "earlier", "later")
}
func tableInfo(dir, valueDir string) error {
// Open DB
opts := badger.DefaultOptions
opts.TableLoadingMode = options.MemoryMap
opts.Dir = sstDir
opts.ValueDir = vlogDir
opts.ReadOnly = true
db, err := badger.Open(opts)
if err != nil {
return err
}
defer db.Close()
tables := db.Tables()
for _, t := range tables {
lk, lv := y.ParseKey(t.Left), y.ParseTs(t.Left)
rk, rv := y.ParseKey(t.Right), y.ParseTs(t.Right)
fmt.Printf("SSTable [L%d, %03d] [%20X, v%-10d -> %20X, v%-10d]\n",
t.Level, t.ID, lk, lv, rk, rv)
}
return nil
}
func printInfo(dir, valueDir string) error {
if dir == "" {
return fmt.Errorf("--dir not supplied")
}
if valueDir == "" {
valueDir = dir
}
fp, err := os.Open(filepath.Join(dir, badger.ManifestFilename))
if err != nil {
return err
}
defer func() {
if fp != nil {
fp.Close()
}
}()
manifest, truncOffset, err := badger.ReplayManifestFile(fp)
if err != nil {
return err
}
fp.Close()
fp = nil
fileinfos, err := ioutil.ReadDir(dir)
if err != nil {
return err
}
fileinfoByName := make(map[string]os.FileInfo)
fileinfoMarked := make(map[string]bool)
for _, info := range fileinfos {
fileinfoByName[info.Name()] = info
fileinfoMarked[info.Name()] = false
}
fmt.Println()
var baseTime time.Time
// fmt.Print("\n[Manifest]\n")
manifestTruncated := false
manifestInfo, ok := fileinfoByName[badger.ManifestFilename]
if ok {
fileinfoMarked[badger.ManifestFilename] = true
truncatedString := ""
if truncOffset != manifestInfo.Size() {
truncatedString = fmt.Sprintf(" [TRUNCATED to %d]", truncOffset)
manifestTruncated = true
}
baseTime = manifestInfo.ModTime()
fmt.Printf("[%25s] %-12s %6s MA%s\n", manifestInfo.ModTime().Format(time.RFC3339),
manifestInfo.Name(), hbytes(manifestInfo.Size()), truncatedString)
} else {
fmt.Printf("%s [MISSING]\n", manifestInfo.Name())
}
numMissing := 0
numEmpty := 0
levelSizes := make([]int64, len(manifest.Levels))
for level, lm := range manifest.Levels {
// fmt.Printf("\n[Level %d]\n", level)
// We create a sorted list of table ID's so that output is in consistent order.
tableIDs := make([]uint64, 0, len(lm.Tables))
for id := range lm.Tables {
tableIDs = append(tableIDs, id)
}
sort.Slice(tableIDs, func(i, j int) bool {
return tableIDs[i] < tableIDs[j]
})
for _, tableID := range tableIDs {
tableFile := table.IDToFilename(tableID)
tm, ok1 := manifest.Tables[tableID]
file, ok2 := fileinfoByName[tableFile]
if ok1 && ok2 {
fileinfoMarked[tableFile] = true
emptyString := ""
fileSize := file.Size()
if fileSize == 0 {
emptyString = " [EMPTY]"
numEmpty++
}
levelSizes[level] += fileSize
// (Put level on every line to make easier to process with sed/perl.)
fmt.Printf("[%25s] %-12s %6s L%d %x%s\n", dur(baseTime, file.ModTime()),
tableFile, hbytes(fileSize), level, tm.Checksum, emptyString)
} else {
fmt.Printf("%s [MISSING]\n", tableFile)
numMissing++
}
}
}
valueDirFileinfos := fileinfos
if valueDir != dir {
valueDirFileinfos, err = ioutil.ReadDir(valueDir)
if err != nil {
return err
}
}
// If valueDir is different from dir, holds extra files in the value dir.
valueDirExtras := []os.FileInfo{}
valueLogSize := int64(0)
// fmt.Print("\n[Value Log]\n")
for _, file := range valueDirFileinfos {
if !strings.HasSuffix(file.Name(), ".vlog") {
if valueDir != dir {
valueDirExtras = append(valueDirExtras, file)
}
continue
}
fileSize := file.Size()
emptyString := ""
if fileSize == 0 {
emptyString = " [EMPTY]"
numEmpty++
}
valueLogSize += fileSize
fmt.Printf("[%25s] %-12s %6s VL%s\n", dur(baseTime, file.ModTime()), file.Name(),
hbytes(fileSize), emptyString)
fileinfoMarked[file.Name()] = true
}
numExtra := 0
for _, file := range fileinfos {
if fileinfoMarked[file.Name()] {
continue
}
if numExtra == 0 {
fmt.Print("\n[EXTRA]\n")
}
fmt.Printf("[%s] %-12s %6s\n", file.ModTime().Format(time.RFC3339),
file.Name(), hbytes(file.Size()))
numExtra++
}
numValueDirExtra := 0
for _, file := range valueDirExtras {
if numValueDirExtra == 0 {
fmt.Print("\n[ValueDir EXTRA]\n")
}
fmt.Printf("[%s] %-12s %6s\n", file.ModTime().Format(time.RFC3339),
file.Name(), hbytes(file.Size()))
numValueDirExtra++
}
fmt.Print("\n[Summary]\n")
totalIndexSize := int64(0)
for i, sz := range levelSizes {
fmt.Printf("Level %d size: %12s\n", i, hbytes(sz))
totalIndexSize += sz
}
fmt.Printf("Total index size: %8s\n", hbytes(totalIndexSize))
fmt.Printf("Value log size: %10s\n", hbytes(valueLogSize))
fmt.Println()
totalExtra := numExtra + numValueDirExtra
if totalExtra == 0 && numMissing == 0 && numEmpty == 0 && !manifestTruncated {
fmt.Println("Abnormalities: None.")
} else {
fmt.Println("Abnormalities:")
}
fmt.Printf("%d extra %s.\n", totalExtra, pluralFiles(totalExtra))
fmt.Printf("%d missing %s.\n", numMissing, pluralFiles(numMissing))
fmt.Printf("%d empty %s.\n", numEmpty, pluralFiles(numEmpty))
fmt.Printf("%d truncated %s.\n", boolToNum(manifestTruncated),
pluralManifest(manifestTruncated))
return nil
}
func boolToNum(x bool) int {
if x {
return 1
}
return 0
}
func pluralManifest(manifestTruncated bool) string {
if manifestTruncated {
return "manifest"
}
return "manifests"
}
func pluralFiles(count int) string {
if count == 1 {
return "file"
}
return "files"
}

View File

@ -0,0 +1,81 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"errors"
"os"
"path"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger"
"gx/ipfs/QmXj63M2w2Pq7mnBpcrs7Va8prmfhvfMUNqVhJ9TgjiMbT/cobra"
)
var restoreFile string
// restoreCmd represents the restore command
var restoreCmd = &cobra.Command{
Use: "restore",
Short: "Restore Badger database.",
Long: `Restore Badger database from a file.
It reads a file generated using the backup command (or by calling the
DB.Backup() API method) and writes each key-value pair found in the file to
the Badger database.
Restore creates a new database, and currently does not work on an already
existing database.`,
RunE: doRestore,
}
func init() {
RootCmd.AddCommand(restoreCmd)
restoreCmd.Flags().StringVarP(&restoreFile, "backup-file", "f",
"badger.bak", "File to restore from")
}
func doRestore(cmd *cobra.Command, args []string) error {
// Check if the DB already exists
manifestFile := path.Join(sstDir, badger.ManifestFilename)
if _, err := os.Stat(manifestFile); err == nil { // No error. File already exists.
return errors.New("Cannot restore to an already existing database")
} else if os.IsNotExist(err) {
// pass
} else { // Return an error if anything other than the error above
return err
}
// Open DB
opts := badger.DefaultOptions
opts.Dir = sstDir
opts.ValueDir = vlogDir
db, err := badger.Open(opts)
if err != nil {
return err
}
defer db.Close()
// Open File
f, err := os.Open(restoreFile)
if err != nil {
return err
}
defer f.Close()
// Run restore
return db.Load(f)
}

65
vendor/github.com/dgraph-io/badger/badger/cmd/root.go generated vendored Normal file
View File

@ -0,0 +1,65 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"errors"
"fmt"
"os"
"strings"
"gx/ipfs/QmXj63M2w2Pq7mnBpcrs7Va8prmfhvfMUNqVhJ9TgjiMbT/cobra"
)
var sstDir, vlogDir string
// RootCmd represents the base command when called without any subcommands
var RootCmd = &cobra.Command{
Use: "badger",
Short: "Tools to manage Badger database.",
PersistentPreRunE: validateRootCmdArgs,
}
// Execute adds all child commands to the root command and sets flags appropriately.
// This is called by main.main(). It only needs to happen once to the rootCmd.
func Execute() {
if err := RootCmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}
func init() {
RootCmd.PersistentFlags().StringVar(&sstDir, "dir", "",
"Directory where the LSM tree files are located. (required)")
RootCmd.PersistentFlags().StringVar(&vlogDir, "vlog-dir", "",
"Directory where the value log files are located, if different from --dir")
}
func validateRootCmdArgs(cmd *cobra.Command, args []string) error {
if strings.HasPrefix(cmd.Use, "help ") { // No need to validate if it is help
return nil
}
if sstDir == "" {
return errors.New("--dir not specified")
}
if vlogDir == "" {
vlogDir = sstDir
}
return nil
}

42
vendor/github.com/dgraph-io/badger/badger/main.go generated vendored Normal file
View File

@ -0,0 +1,42 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"fmt"
"net/http"
_ "net/http/pprof"
"runtime"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/badger/cmd"
)
func main() {
go func() {
for i := 8080; i < 9080; i++ {
fmt.Printf("Listening for /debug HTTP requests at port: %d\n", i)
if err := http.ListenAndServe(fmt.Sprintf("localhost:%d", i), nil); err != nil {
fmt.Println("Port busy. Trying another one...")
continue
}
}
}()
runtime.SetBlockProfileRate(100)
runtime.GOMAXPROCS(128)
cmd.Execute()
}

153
vendor/github.com/dgraph-io/badger/batch.go generated vendored Normal file
View File

@ -0,0 +1,153 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"sync"
"time"
)
// WriteBatch holds the necessary info to perform batched writes.
type WriteBatch struct {
sync.Mutex
txn *Txn
db *DB
wg sync.WaitGroup
err error
}
// NewWriteBatch creates a new WriteBatch. This provides a way to conveniently do a lot of writes,
// batching them up as tightly as possible in a single transaction and using callbacks to avoid
// waiting for them to commit, thus achieving good performance. This API hides away the logic of
// creating and committing transactions. Due to the nature of SSI guaratees provided by Badger,
// blind writes can never encounter transaction conflicts (ErrConflict).
func (db *DB) NewWriteBatch() *WriteBatch {
return &WriteBatch{db: db, txn: db.newTransaction(true, true)}
}
// Cancel function must be called if there's a chance that Flush might not get
// called. If neither Flush or Cancel is called, the transaction oracle would
// never get a chance to clear out the row commit timestamp map, thus causing an
// unbounded memory consumption. Typically, you can call Cancel as a defer
// statement right after NewWriteBatch is called.
//
// Note that any committed writes would still go through despite calling Cancel.
func (wb *WriteBatch) Cancel() {
wb.wg.Wait()
wb.txn.Discard()
}
func (wb *WriteBatch) callback(err error) {
// sync.WaitGroup is thread-safe, so it doesn't need to be run inside wb.Lock.
defer wb.wg.Done()
if err == nil {
return
}
wb.Lock()
defer wb.Unlock()
if wb.err != nil {
return
}
wb.err = err
}
// SetEntry is the equivalent of Txn.SetEntry.
func (wb *WriteBatch) SetEntry(e *Entry) error {
wb.Lock()
defer wb.Unlock()
if err := wb.txn.SetEntry(e); err != ErrTxnTooBig {
return err
}
// Txn has reached it's zenith. Commit now.
if cerr := wb.commit(); cerr != nil {
return cerr
}
// This time the error must not be ErrTxnTooBig, otherwise, we make the
// error permanent.
if err := wb.txn.SetEntry(e); err != nil {
wb.err = err
return err
}
return nil
}
// Set is equivalent of Txn.SetWithMeta.
func (wb *WriteBatch) Set(k, v []byte, meta byte) error {
e := &Entry{Key: k, Value: v, UserMeta: meta}
return wb.SetEntry(e)
}
// SetWithTTL is equivalent of Txn.SetWithTTL.
func (wb *WriteBatch) SetWithTTL(key, val []byte, dur time.Duration) error {
expire := time.Now().Add(dur).Unix()
e := &Entry{Key: key, Value: val, ExpiresAt: uint64(expire)}
return wb.SetEntry(e)
}
// Delete is equivalent of Txn.Delete.
func (wb *WriteBatch) Delete(k []byte) error {
wb.Lock()
defer wb.Unlock()
if err := wb.txn.Delete(k); err != ErrTxnTooBig {
return err
}
if err := wb.commit(); err != nil {
return err
}
if err := wb.txn.Delete(k); err != nil {
wb.err = err
return err
}
return nil
}
// Caller to commit must hold a write lock.
func (wb *WriteBatch) commit() error {
if wb.err != nil {
return wb.err
}
// Get a new txn before we commit this one. So, the new txn doesn't need
// to wait for this one to commit.
wb.wg.Add(1)
wb.txn.CommitWith(wb.callback)
wb.txn = wb.db.newTransaction(true, true)
wb.txn.readTs = 0 // We're not reading anything.
return wb.err
}
// Flush must be called at the end to ensure that any pending writes get committed to Badger. Flush
// returns any error stored by WriteBatch.
func (wb *WriteBatch) Flush() error {
wb.Lock()
_ = wb.commit()
wb.txn.Discard()
wb.Unlock()
wb.wg.Wait()
// Safe to access error without any synchronization here.
return wb.err
}
// Error returns any errors encountered so far. No commits would be run once an error is detected.
func (wb *WriteBatch) Error() error {
wb.Lock()
defer wb.Unlock()
return wb.err
}

69
vendor/github.com/dgraph-io/badger/batch_test.go generated vendored Normal file
View File

@ -0,0 +1,69 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestWriteBatch(t *testing.T) {
key := func(i int) []byte {
return []byte(fmt.Sprintf("%10d", i))
}
val := func(i int) []byte {
return []byte(fmt.Sprintf("%128d", i))
}
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
wb := db.NewWriteBatch()
defer wb.Cancel()
N, M := 50000, 1000
start := time.Now()
for i := 0; i < N; i++ {
require.NoError(t, wb.Set(key(i), val(i), 0))
}
for i := 0; i < M; i++ {
require.NoError(t, wb.Delete(key(i)))
}
require.NoError(t, wb.Flush())
t.Logf("Time taken for %d writes (w/ test options): %s\n", N+M, time.Since(start))
err := db.View(func(txn *Txn) error {
itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()
i := M
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
require.Equal(t, string(key(i)), string(item.Key()))
valcopy, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, val(i), valcopy)
i++
}
require.Equal(t, N, i)
return nil
})
require.NoError(t, err)
})
}

208
vendor/github.com/dgraph-io/badger/compaction.go generated vendored Normal file
View File

@ -0,0 +1,208 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"fmt"
"log"
"math"
"sync"
"gx/ipfs/QmRvYNctevGUW52urgmoFZscT6buMKqhHezLUS64WepGWn/go-net/trace"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/table"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
type keyRange struct {
left []byte
right []byte
inf bool
}
var infRange = keyRange{inf: true}
func (r keyRange) String() string {
return fmt.Sprintf("[left=%x, right=%x, inf=%v]", r.left, r.right, r.inf)
}
func (r keyRange) equals(dst keyRange) bool {
return bytes.Equal(r.left, dst.left) &&
bytes.Equal(r.right, dst.right) &&
r.inf == dst.inf
}
func (r keyRange) overlapsWith(dst keyRange) bool {
if r.inf || dst.inf {
return true
}
// If my left is greater than dst right, we have no overlap.
if y.CompareKeys(r.left, dst.right) > 0 {
return false
}
// If my right is less than dst left, we have no overlap.
if y.CompareKeys(r.right, dst.left) < 0 {
return false
}
// We have overlap.
return true
}
func getKeyRange(tables []*table.Table) keyRange {
y.AssertTrue(len(tables) > 0)
smallest := tables[0].Smallest()
biggest := tables[0].Biggest()
for i := 1; i < len(tables); i++ {
if y.CompareKeys(tables[i].Smallest(), smallest) < 0 {
smallest = tables[i].Smallest()
}
if y.CompareKeys(tables[i].Biggest(), biggest) > 0 {
biggest = tables[i].Biggest()
}
}
return keyRange{
left: y.KeyWithTs(y.ParseKey(smallest), math.MaxUint64),
right: y.KeyWithTs(y.ParseKey(biggest), 0),
}
}
type levelCompactStatus struct {
ranges []keyRange
delSize int64
}
func (lcs *levelCompactStatus) debug() string {
var b bytes.Buffer
for _, r := range lcs.ranges {
b.WriteString(r.String())
}
return b.String()
}
func (lcs *levelCompactStatus) overlapsWith(dst keyRange) bool {
for _, r := range lcs.ranges {
if r.overlapsWith(dst) {
return true
}
}
return false
}
func (lcs *levelCompactStatus) remove(dst keyRange) bool {
final := lcs.ranges[:0]
var found bool
for _, r := range lcs.ranges {
if !r.equals(dst) {
final = append(final, r)
} else {
found = true
}
}
lcs.ranges = final
return found
}
type compactStatus struct {
sync.RWMutex
levels []*levelCompactStatus
}
func (cs *compactStatus) toLog(tr trace.Trace) {
cs.RLock()
defer cs.RUnlock()
tr.LazyPrintf("Compaction status:")
for i, l := range cs.levels {
if len(l.debug()) == 0 {
continue
}
tr.LazyPrintf("[%d] %s", i, l.debug())
}
}
func (cs *compactStatus) overlapsWith(level int, this keyRange) bool {
cs.RLock()
defer cs.RUnlock()
thisLevel := cs.levels[level]
return thisLevel.overlapsWith(this)
}
func (cs *compactStatus) delSize(l int) int64 {
cs.RLock()
defer cs.RUnlock()
return cs.levels[l].delSize
}
type thisAndNextLevelRLocked struct{}
// compareAndAdd will check whether we can run this compactDef. That it doesn't overlap with any
// other running compaction. If it can be run, it would store this run in the compactStatus state.
func (cs *compactStatus) compareAndAdd(_ thisAndNextLevelRLocked, cd compactDef) bool {
cs.Lock()
defer cs.Unlock()
level := cd.thisLevel.level
y.AssertTruef(level < len(cs.levels)-1, "Got level %d. Max levels: %d", level, len(cs.levels))
thisLevel := cs.levels[level]
nextLevel := cs.levels[level+1]
if thisLevel.overlapsWith(cd.thisRange) {
return false
}
if nextLevel.overlapsWith(cd.nextRange) {
return false
}
// Check whether this level really needs compaction or not. Otherwise, we'll end up
// running parallel compactions for the same level.
// Update: We should not be checking size here. Compaction priority already did the size checks.
// Here we should just be executing the wish of others.
thisLevel.ranges = append(thisLevel.ranges, cd.thisRange)
nextLevel.ranges = append(nextLevel.ranges, cd.nextRange)
thisLevel.delSize += cd.thisSize
return true
}
func (cs *compactStatus) delete(cd compactDef) {
cs.Lock()
defer cs.Unlock()
level := cd.thisLevel.level
y.AssertTruef(level < len(cs.levels)-1, "Got level %d. Max levels: %d", level, len(cs.levels))
thisLevel := cs.levels[level]
nextLevel := cs.levels[level+1]
thisLevel.delSize -= cd.thisSize
found := thisLevel.remove(cd.thisRange)
found = nextLevel.remove(cd.nextRange) && found
if !found {
this := cd.thisRange
next := cd.nextRange
fmt.Printf("Looking for: [%q, %q, %v] in this level.\n", this.left, this.right, this.inf)
fmt.Printf("This Level:\n%s\n", thisLevel.debug())
fmt.Println()
fmt.Printf("Looking for: [%q, %q, %v] in next level.\n", next.left, next.right, next.inf)
fmt.Printf("Next Level:\n%s\n", nextLevel.debug())
log.Fatal("keyRange not found")
}
}

23
vendor/github.com/dgraph-io/badger/contrib/cover.sh generated vendored Normal file
View File

@ -0,0 +1,23 @@
#!/bin/bash
SRC="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.."
TMP=$(mktemp /tmp/badger-coverage-XXXXX.txt)
BUILD=$1
OUT=$2
set -e
pushd $SRC &> /dev/null
# create coverage output
echo 'mode: atomic' > $OUT
for PKG in $(go list ./...|grep -v -E 'vendor'); do
go test -covermode=atomic -coverprofile=$TMP $PKG
tail -n +2 $TMP >> $OUT
done
# Another round of tests after turning off mmap
go test -v -vlog_mmap=false github.com/dgraph-io/badger
popd &> /dev/null

1296
vendor/github.com/dgraph-io/badger/db.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

325
vendor/github.com/dgraph-io/badger/db2_test.go generated vendored Normal file
View File

@ -0,0 +1,325 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"flag"
"fmt"
"io/ioutil"
"log"
"math/rand"
"os"
"path"
"regexp"
"testing"
"github.com/stretchr/testify/require"
)
func TestTruncateVlogWithClose(t *testing.T) {
key := func(i int) []byte {
return []byte(fmt.Sprintf("%d%10d", i, i))
}
data := func(l int) []byte {
m := make([]byte, l)
_, err := rand.Read(m)
require.NoError(t, err)
return m
}
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.SyncWrites = true
opt.Truncate = true
opt.ValueThreshold = 1 // Force all reads from value log.
db, err := Open(opt)
require.NoError(t, err)
err = db.Update(func(txn *Txn) error {
return txn.Set(key(0), data(4055))
})
require.NoError(t, err)
// Close the DB.
require.NoError(t, db.Close())
require.NoError(t, os.Truncate(path.Join(dir, "000000.vlog"), 4096))
// Reopen and write some new data.
db, err = Open(opt)
require.NoError(t, err)
for i := 0; i < 32; i++ {
err := db.Update(func(txn *Txn) error {
return txn.Set(key(i), data(10))
})
require.NoError(t, err)
}
// Read it back to ensure that we can read it now.
for i := 0; i < 32; i++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get(key(i))
require.NoError(t, err)
val := getItemValue(t, item)
require.Equal(t, 10, len(val))
return nil
})
require.NoError(t, err)
}
require.NoError(t, db.Close())
// Reopen and read the data again.
db, err = Open(opt)
require.NoError(t, err)
for i := 0; i < 32; i++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get(key(i))
require.NoError(t, err)
val := getItemValue(t, item)
require.Equal(t, 10, len(val))
return nil
})
require.NoError(t, err)
}
require.NoError(t, db.Close())
}
var manual = flag.Bool("manual", false, "Set when manually running some tests.")
// The following 3 TruncateVlogNoClose tests should be run one after another.
// None of these close the DB, simulating a crash. They should be run with a
// script, which truncates the value log to 4096, lining up with the end of the
// first entry in the txn. At <4096, it would cause the entry to be truncated
// immediately, at >4096, same thing.
func TestTruncateVlogNoClose(t *testing.T) {
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
fmt.Println("running")
dir := "p"
opts := getTestOptions(dir)
opts.SyncWrites = true
opts.Truncate = true
kv, err := Open(opts)
require.NoError(t, err)
key := func(i int) string {
return fmt.Sprintf("%d%10d", i, i)
}
data := fmt.Sprintf("%4055d", 1)
err = kv.Update(func(txn *Txn) error {
return txn.Set([]byte(key(0)), []byte(data))
})
require.NoError(t, err)
}
func TestTruncateVlogNoClose2(t *testing.T) {
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
dir := "p"
opts := getTestOptions(dir)
opts.SyncWrites = true
opts.Truncate = true
kv, err := Open(opts)
require.NoError(t, err)
key := func(i int) string {
return fmt.Sprintf("%d%10d", i, i)
}
data := fmt.Sprintf("%10d", 1)
for i := 32; i < 64; i++ {
err := kv.Update(func(txn *Txn) error {
return txn.Set([]byte(key(i)), []byte(data))
})
require.NoError(t, err)
}
for i := 32; i < 64; i++ {
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key(i)))
require.NoError(t, err)
val := getItemValue(t, item)
require.NotNil(t, val)
require.True(t, len(val) > 0)
return nil
}))
}
}
func TestTruncateVlogNoClose3(t *testing.T) {
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
fmt.Print("Running")
dir := "p"
opts := getTestOptions(dir)
opts.SyncWrites = true
opts.Truncate = true
kv, err := Open(opts)
require.NoError(t, err)
key := func(i int) string {
return fmt.Sprintf("%d%10d", i, i)
}
for i := 32; i < 64; i++ {
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key(i)))
require.NoError(t, err)
val := getItemValue(t, item)
require.NotNil(t, val)
require.True(t, len(val) > 0)
return nil
}))
}
}
func TestBigKeyValuePairs(t *testing.T) {
// This test takes too much memory. So, run separately.
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
opts := DefaultOptions
opts.MaxTableSize = 1 << 20
opts.ValueLogMaxEntries = 64
runBadgerTest(t, &opts, func(t *testing.T, db *DB) {
bigK := make([]byte, 65001)
bigV := make([]byte, db.opt.ValueLogFileSize+1)
small := make([]byte, 65000)
txn := db.NewTransaction(true)
require.Regexp(t, regexp.MustCompile("Key.*exceeded"), txn.Set(bigK, small))
require.Regexp(t, regexp.MustCompile("Value.*exceeded"), txn.Set(small, bigV))
require.NoError(t, txn.Set(small, small))
require.Regexp(t, regexp.MustCompile("Key.*exceeded"), txn.Set(bigK, bigV))
require.NoError(t, db.View(func(txn *Txn) error {
_, err := txn.Get(small)
require.Equal(t, ErrKeyNotFound, err)
return nil
}))
// Now run a longer test, which involves value log GC.
data := fmt.Sprintf("%100d", 1)
key := func(i int) string {
return fmt.Sprintf("%65000d", i)
}
saveByKey := func(key string, value []byte) error {
return db.Update(func(txn *Txn) error {
return txn.Set([]byte(key), value)
})
}
getByKey := func(key string) error {
return db.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key))
if err != nil {
return err
}
return item.Value(func(val []byte) error {
if len(val) == 0 {
log.Fatalf("key not found %q", len(key))
}
return nil
})
})
}
for i := 0; i < 32; i++ {
if i < 30 {
require.NoError(t, saveByKey(key(i), []byte(data)))
} else {
require.NoError(t, saveByKey(key(i), []byte(fmt.Sprintf("%100d", i))))
}
}
for j := 0; j < 5; j++ {
for i := 0; i < 32; i++ {
if i < 30 {
require.NoError(t, saveByKey(key(i), []byte(data)))
} else {
require.NoError(t, saveByKey(key(i), []byte(fmt.Sprintf("%100d", i))))
}
}
}
for i := 0; i < 32; i++ {
require.NoError(t, getByKey(key(i)))
}
var loops int
var err error
for err == nil {
err = db.RunValueLogGC(0.5)
require.NotRegexp(t, regexp.MustCompile("truncate"), err)
loops++
}
t.Logf("Ran value log GC %d times. Last error: %v\n", loops, err)
})
}
// The following test checks for issue #585.
func TestPushValueLogLimit(t *testing.T) {
// This test takes too much memory. So, run separately.
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
opt := DefaultOptions
opt.ValueLogMaxEntries = 64
opt.ValueLogFileSize = 2 << 30
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
data := []byte(fmt.Sprintf("%30d", 1))
key := func(i int) string {
return fmt.Sprintf("%100d", i)
}
for i := 0; i < 32; i++ {
if i == 4 {
v := make([]byte, 2<<30)
err := db.Update(func(txn *Txn) error {
return txn.Set([]byte(key(i)), v)
})
require.NoError(t, err)
} else {
err := db.Update(func(txn *Txn) error {
return txn.Set([]byte(key(i)), data)
})
require.NoError(t, err)
}
}
for i := 0; i < 32; i++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key(i)))
require.NoError(t, err, "Getting key: %s", key(i))
err = item.Value(func(v []byte) error {
_ = v
return nil
})
require.NoError(t, err, "Getting value: %s", key(i))
return nil
})
require.NoError(t, err)
}
})
}

1708
vendor/github.com/dgraph-io/badger/db_test.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

100
vendor/github.com/dgraph-io/badger/dir_unix.go generated vendored Normal file
View File

@ -0,0 +1,100 @@
// +build !windows
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"gx/ipfs/QmVGjyM9i2msKvLXwh9VosCTgP4mL91kC7hDmqnwTTx6Hu/sys/unix"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
// directoryLockGuard holds a lock on a directory and a pid file inside. The pid file isn't part
// of the locking mechanism, it's just advisory.
type directoryLockGuard struct {
// File handle on the directory, which we've flocked.
f *os.File
// The absolute path to our pid file.
path string
// Was this a shared lock for a read-only database?
readOnly bool
}
// acquireDirectoryLock gets a lock on the directory (using flock). If
// this is not read-only, it will also write our pid to
// dirPath/pidFileName for convenience.
func acquireDirectoryLock(dirPath string, pidFileName string, readOnly bool) (*directoryLockGuard, error) {
// Convert to absolute path so that Release still works even if we do an unbalanced
// chdir in the meantime.
absPidFilePath, err := filepath.Abs(filepath.Join(dirPath, pidFileName))
if err != nil {
return nil, errors.Wrap(err, "cannot get absolute path for pid lock file")
}
f, err := os.Open(dirPath)
if err != nil {
return nil, errors.Wrapf(err, "cannot open directory %q", dirPath)
}
opts := unix.LOCK_EX | unix.LOCK_NB
if readOnly {
opts = unix.LOCK_SH | unix.LOCK_NB
}
err = unix.Flock(int(f.Fd()), opts)
if err != nil {
f.Close()
return nil, errors.Wrapf(err,
"Cannot acquire directory lock on %q. Another process is using this Badger database.",
dirPath)
}
if !readOnly {
// Yes, we happily overwrite a pre-existing pid file. We're the
// only read-write badger process using this directory.
err = ioutil.WriteFile(absPidFilePath, []byte(fmt.Sprintf("%d\n", os.Getpid())), 0666)
if err != nil {
f.Close()
return nil, errors.Wrapf(err,
"Cannot write pid file %q", absPidFilePath)
}
}
return &directoryLockGuard{f, absPidFilePath, readOnly}, nil
}
// Release deletes the pid file and releases our lock on the directory.
func (guard *directoryLockGuard) release() error {
var err error
if !guard.readOnly {
// It's important that we remove the pid file first.
err = os.Remove(guard.path)
}
if closeErr := guard.f.Close(); err == nil {
err = closeErr
}
guard.path = ""
guard.f = nil
return err
}
// openDir opens a directory for syncing.
func openDir(path string) (*os.File, error) { return os.Open(path) }

106
vendor/github.com/dgraph-io/badger/dir_windows.go generated vendored Normal file
View File

@ -0,0 +1,106 @@
// +build windows
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
// OpenDir opens a directory in windows with write access for syncing.
import (
"os"
"path/filepath"
"syscall"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
// FILE_ATTRIBUTE_TEMPORARY - A file that is being used for temporary storage.
// FILE_FLAG_DELETE_ON_CLOSE - The file is to be deleted immediately after all of its handles are
// closed, which includes the specified handle and any other open or duplicated handles.
// See: https://docs.microsoft.com/en-us/windows/desktop/FileIO/file-attribute-constants
// NOTE: Added here to avoid importing golang.org/x/sys/windows
const (
FILE_ATTRIBUTE_TEMPORARY = 0x00000100
FILE_FLAG_DELETE_ON_CLOSE = 0x04000000
)
func openDir(path string) (*os.File, error) {
fd, err := openDirWin(path)
if err != nil {
return nil, err
}
return os.NewFile(uintptr(fd), path), nil
}
func openDirWin(path string) (fd syscall.Handle, err error) {
if len(path) == 0 {
return syscall.InvalidHandle, syscall.ERROR_FILE_NOT_FOUND
}
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return syscall.InvalidHandle, err
}
access := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE)
sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE)
createmode := uint32(syscall.OPEN_EXISTING)
fl := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS)
return syscall.CreateFile(pathp, access, sharemode, nil, createmode, fl, 0)
}
// DirectoryLockGuard holds a lock on the directory.
type directoryLockGuard struct {
h syscall.Handle
path string
}
// AcquireDirectoryLock acquires exclusive access to a directory.
func acquireDirectoryLock(dirPath string, pidFileName string, readOnly bool) (*directoryLockGuard, error) {
if readOnly {
return nil, ErrWindowsNotSupported
}
// Convert to absolute path so that Release still works even if we do an unbalanced
// chdir in the meantime.
absLockFilePath, err := filepath.Abs(filepath.Join(dirPath, pidFileName))
if err != nil {
return nil, errors.Wrap(err, "Cannot get absolute path for pid lock file")
}
// This call creates a file handler in memory that only one process can use at a time. When
// that process ends, the file is deleted by the system.
// FILE_ATTRIBUTE_TEMPORARY is used to tell Windows to try to create the handle in memory.
// FILE_FLAG_DELETE_ON_CLOSE is not specified in syscall_windows.go but tells Windows to delete
// the file when all processes holding the handler are closed.
// XXX: this works but it's a bit klunky. i'd prefer to use LockFileEx but it needs unsafe pkg.
h, err := syscall.CreateFile(
syscall.StringToUTF16Ptr(absLockFilePath), 0, 0, nil,
syscall.OPEN_ALWAYS,
uint32(FILE_ATTRIBUTE_TEMPORARY|FILE_FLAG_DELETE_ON_CLOSE),
0)
if err != nil {
return nil, errors.Wrapf(err,
"Cannot create lock file %q. Another process is using this Badger database",
absLockFilePath)
}
return &directoryLockGuard{h: h, path: absLockFilePath}, nil
}
// Release removes the directory lock.
func (g *directoryLockGuard) release() error {
g.path = ""
return syscall.CloseHandle(g.h)
}

28
vendor/github.com/dgraph-io/badger/doc.go generated vendored Normal file
View File

@ -0,0 +1,28 @@
/*
Package badger implements an embeddable, simple and fast key-value database,
written in pure Go. It is designed to be highly performant for both reads and
writes simultaneously. Badger uses Multi-Version Concurrency Control (MVCC), and
supports transactions. It runs transactions concurrently, with serializable
snapshot isolation guarantees.
Badger uses an LSM tree along with a value log to separate keys from values,
hence reducing both write amplification and the size of the LSM tree. This
allows LSM tree to be served entirely from RAM, while the values are served
from SSD.
Usage
Badger has the following main types: DB, Txn, Item and Iterator. DB contains
keys that are associated with values. It must be opened with the appropriate
options before it can be accessed.
All operations happen inside a Txn. Txn represents a transaction, which can
be read-only or read-write. Read-only transactions can read values for a
given key (which are returned inside an Item), or iterate over a set of
key-value pairs using an Iterator (which are returned as Item type values as
well). Read-write transactions can also update and delete keys from the DB.
See the examples for more usage details.
*/
package badger

105
vendor/github.com/dgraph-io/badger/errors.go generated vendored Normal file
View File

@ -0,0 +1,105 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
var (
// ErrValueLogSize is returned when opt.ValueLogFileSize option is not within the valid
// range.
ErrValueLogSize = errors.New("Invalid ValueLogFileSize, must be between 1MB and 2GB")
// ErrValueThreshold is returned when ValueThreshold is set to a value close to or greater than
// uint16.
ErrValueThreshold = errors.New("Invalid ValueThreshold, must be lower than uint16")
// ErrKeyNotFound is returned when key isn't found on a txn.Get.
ErrKeyNotFound = errors.New("Key not found")
// ErrTxnTooBig is returned if too many writes are fit into a single transaction.
ErrTxnTooBig = errors.New("Txn is too big to fit into one request")
// ErrConflict is returned when a transaction conflicts with another transaction. This can happen if
// the read rows had been updated concurrently by another transaction.
ErrConflict = errors.New("Transaction Conflict. Please retry")
// ErrReadOnlyTxn is returned if an update function is called on a read-only transaction.
ErrReadOnlyTxn = errors.New("No sets or deletes are allowed in a read-only transaction")
// ErrDiscardedTxn is returned if a previously discarded transaction is re-used.
ErrDiscardedTxn = errors.New("This transaction has been discarded. Create a new one")
// ErrEmptyKey is returned if an empty key is passed on an update function.
ErrEmptyKey = errors.New("Key cannot be empty")
// ErrInvalidKey is returned if the key has a special !badger! prefix,
// reserved for internal usage.
ErrInvalidKey = errors.New("Key is using a reserved !badger! prefix")
// ErrRetry is returned when a log file containing the value is not found.
// This usually indicates that it may have been garbage collected, and the
// operation needs to be retried.
ErrRetry = errors.New("Unable to find log file. Please retry")
// ErrThresholdZero is returned if threshold is set to zero, and value log GC is called.
// In such a case, GC can't be run.
ErrThresholdZero = errors.New(
"Value log GC can't run because threshold is set to zero")
// ErrNoRewrite is returned if a call for value log GC doesn't result in a log file rewrite.
ErrNoRewrite = errors.New(
"Value log GC attempt didn't result in any cleanup")
// ErrRejected is returned if a value log GC is called either while another GC is running, or
// after DB::Close has been called.
ErrRejected = errors.New("Value log GC request rejected")
// ErrInvalidRequest is returned if the user request is invalid.
ErrInvalidRequest = errors.New("Invalid request")
// ErrManagedTxn is returned if the user tries to use an API which isn't
// allowed due to external management of transactions, when using ManagedDB.
ErrManagedTxn = errors.New(
"Invalid API request. Not allowed to perform this action using ManagedDB")
// ErrInvalidDump if a data dump made previously cannot be loaded into the database.
ErrInvalidDump = errors.New("Data dump cannot be read")
// ErrZeroBandwidth is returned if the user passes in zero bandwidth for sequence.
ErrZeroBandwidth = errors.New("Bandwidth must be greater than zero")
// ErrInvalidLoadingMode is returned when opt.ValueLogLoadingMode option is not
// within the valid range
ErrInvalidLoadingMode = errors.New("Invalid ValueLogLoadingMode, must be FileIO or MemoryMap")
// ErrReplayNeeded is returned when opt.ReadOnly is set but the
// database requires a value log replay.
ErrReplayNeeded = errors.New("Database was not properly closed, cannot open read-only")
// ErrWindowsNotSupported is returned when opt.ReadOnly is used on Windows
ErrWindowsNotSupported = errors.New("Read-only mode is not supported on Windows")
// ErrTruncateNeeded is returned when the value log gets corrupt, and requires truncation of
// corrupt data to allow Badger to run properly.
ErrTruncateNeeded = errors.New("Value log truncate required to run DB. This might result in data loss")
// ErrBlockedWrites is returned if the user called DropAll. During the process of dropping all
// data from Badger, we stop accepting new writes, by returning this error.
ErrBlockedWrites = errors.New("Writes are blocked, possibly due to DropAll or Close")
)

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

View File

@ -0,0 +1 @@
/testgc

View File

@ -0,0 +1,218 @@
package main
import (
"encoding/binary"
"fmt"
"log"
"math/rand"
"net/http"
_ "net/http/pprof"
"os"
"sync"
"sync/atomic"
"time"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
var maxValue int64 = 10000000
var suffix = make([]byte, 128)
type testSuite struct {
sync.Mutex
vals map[uint64]uint64
count uint64 // Not under mutex lock.
}
func encoded(i uint64) []byte {
out := make([]byte, 8)
binary.BigEndian.PutUint64(out, i)
return out
}
func (s *testSuite) write(db *badger.DB) error {
return db.Update(func(txn *badger.Txn) error {
for i := 0; i < 10; i++ {
// These keys would be overwritten.
keyi := uint64(rand.Int63n(maxValue))
key := encoded(keyi)
vali := atomic.AddUint64(&s.count, 1)
val := encoded(vali)
val = append(val, suffix...)
if err := txn.Set(key, val); err != nil {
return err
}
}
for i := 0; i < 20; i++ {
// These keys would be new and never overwritten.
keyi := atomic.AddUint64(&s.count, 1)
if keyi%1000000 == 0 {
log.Printf("Count: %d\n", keyi)
}
key := encoded(keyi)
val := append(key, suffix...)
if err := txn.Set(key, val); err != nil {
return err
}
}
return nil
})
}
func (s *testSuite) read(db *badger.DB) error {
max := int64(atomic.LoadUint64(&s.count))
keyi := uint64(rand.Int63n(max))
key := encoded(keyi)
err := db.View(func(txn *badger.Txn) error {
item, err := txn.Get(key)
if err != nil {
return err
}
val, err := item.ValueCopy(nil)
if err != nil {
return err
}
y.AssertTruef(len(val) == len(suffix)+8, "Found val of len: %d\n", len(val))
vali := binary.BigEndian.Uint64(val[0:8])
s.Lock()
expected := s.vals[keyi]
if vali < expected {
log.Fatalf("Expected: %d. Found: %d. Key: %d\n", expected, vali, keyi)
} else if vali == expected {
// pass
} else {
s.vals[keyi] = vali
}
s.Unlock()
return nil
})
if err == badger.ErrKeyNotFound {
return nil
}
return err
}
func main() {
fmt.Println("Badger Integration test for value log GC.")
dir := "/mnt/drive/badgertest"
os.RemoveAll(dir)
opts := badger.DefaultOptions
opts.Dir = dir
opts.ValueDir = dir
opts.TableLoadingMode = options.MemoryMap
opts.ValueLogLoadingMode = options.FileIO
opts.SyncWrites = false
db, err := badger.Open(opts)
if err != nil {
log.Fatal(err)
}
defer db.Close()
go http.ListenAndServe("localhost:8080", nil)
closer := y.NewCloser(11)
go func() {
// Run value log GC.
defer closer.Done()
var count int
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for range ticker.C {
again:
select {
case <-closer.HasBeenClosed():
log.Printf("Num times value log GC was successful: %d\n", count)
return
default:
}
log.Printf("Starting a value log GC")
err := db.RunValueLogGC(0.1)
log.Printf("Result of value log GC: %v\n", err)
if err == nil {
count++
goto again
}
}
}()
s := testSuite{
count: uint64(maxValue),
vals: make(map[uint64]uint64),
}
var numLoops uint64
ticker := time.NewTicker(5 * time.Second)
for i := 0; i < 10; i++ {
go func() {
defer closer.Done()
for {
if err := s.write(db); err != nil {
log.Fatal(err)
}
for j := 0; j < 10; j++ {
if err := s.read(db); err != nil {
log.Fatal(err)
}
}
nl := atomic.AddUint64(&numLoops, 1)
select {
case <-closer.HasBeenClosed():
return
case <-ticker.C:
log.Printf("Num loops: %d\n", nl)
default:
}
}
}()
}
time.Sleep(5 * time.Minute)
log.Println("Signaling...")
closer.SignalAndWait()
log.Println("Wait done. Now iterating over everything.")
err = db.View(func(txn *badger.Txn) error {
iopts := badger.DefaultIteratorOptions
itr := txn.NewIterator(iopts)
defer itr.Close()
var total, tested int
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
key := item.Key()
keyi := binary.BigEndian.Uint64(key)
total++
val, err := item.ValueCopy(nil)
if err != nil {
return err
}
if len(val) < 8 {
log.Printf("Unexpected value: %x\n", val)
continue
}
vali := binary.BigEndian.Uint64(val[0:8])
expected, ok := s.vals[keyi] // Not all keys must be in vals map.
if ok {
tested++
if vali < expected {
// vali must be equal or greater than what's in the map.
log.Fatalf("Expected: %d. Got: %d. Key: %d\n", expected, vali, keyi)
}
}
}
log.Printf("Total iterated: %d. Tested values: %d\n", total, tested)
return nil
})
if err != nil {
log.Fatalf("Error while iterating: %v", err)
}
log.Println("Iteration done. Test successful.")
time.Sleep(time.Minute) // Time to do some poking around.
}

678
vendor/github.com/dgraph-io/badger/iterator.go generated vendored Normal file
View File

@ -0,0 +1,678 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"fmt"
"hash/crc32"
"sync"
"sync/atomic"
"time"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/table"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
type prefetchStatus uint8
const (
prefetched prefetchStatus = iota + 1
)
// Item is returned during iteration. Both the Key() and Value() output is only valid until
// iterator.Next() is called.
type Item struct {
status prefetchStatus
err error
wg sync.WaitGroup
db *DB
key []byte
vptr []byte
meta byte // We need to store meta to know about bitValuePointer.
userMeta byte
expiresAt uint64
val []byte
slice *y.Slice // Used only during prefetching.
next *Item
version uint64
txn *Txn
}
// String returns a string representation of Item
func (item *Item) String() string {
return fmt.Sprintf("key=%q, version=%d, meta=%x", item.Key(), item.Version(), item.meta)
}
// Key returns the key.
//
// Key is only valid as long as item is valid, or transaction is valid. If you need to use it
// outside its validity, please use KeyCopy.
func (item *Item) Key() []byte {
return item.key
}
// KeyCopy returns a copy of the key of the item, writing it to dst slice.
// If nil is passed, or capacity of dst isn't sufficient, a new slice would be allocated and
// returned.
func (item *Item) KeyCopy(dst []byte) []byte {
return y.SafeCopy(dst, item.key)
}
// Version returns the commit timestamp of the item.
func (item *Item) Version() uint64 {
return item.version
}
// Value retrieves the value of the item from the value log.
//
// This method must be called within a transaction. Calling it outside a
// transaction is considered undefined behavior. If an iterator is being used,
// then Item.Value() is defined in the current iteration only, because items are
// reused.
//
// If you need to use a value outside a transaction, please use Item.ValueCopy
// instead, or copy it yourself. Value might change once discard or commit is called.
// Use ValueCopy if you want to do a Set after Get.
func (item *Item) Value(fn func(val []byte) error) error {
item.wg.Wait()
if item.status == prefetched {
if item.err == nil && fn != nil {
if err := fn(item.val); err != nil {
return err
}
}
return item.err
}
buf, cb, err := item.yieldItemValue()
defer runCallback(cb)
if err != nil {
return err
}
if fn != nil {
return fn(buf)
}
return nil
}
// ValueCopy returns a copy of the value of the item from the value log, writing it to dst slice.
// If nil is passed, or capacity of dst isn't sufficient, a new slice would be allocated and
// returned. Tip: It might make sense to reuse the returned slice as dst argument for the next call.
//
// This function is useful in long running iterate/update transactions to avoid a write deadlock.
// See Github issue: https://github.com/dgraph-io/badger/issues/315
func (item *Item) ValueCopy(dst []byte) ([]byte, error) {
item.wg.Wait()
if item.status == prefetched {
return y.SafeCopy(dst, item.val), item.err
}
buf, cb, err := item.yieldItemValue()
defer runCallback(cb)
return y.SafeCopy(dst, buf), err
}
func (item *Item) hasValue() bool {
if item.meta == 0 && item.vptr == nil {
// key not found
return false
}
return true
}
// IsDeletedOrExpired returns true if item contains deleted or expired value.
func (item *Item) IsDeletedOrExpired() bool {
return isDeletedOrExpired(item.meta, item.expiresAt)
}
// DiscardEarlierVersions returns whether the iterator was created with the
// option to discard earlier versions of a key when multiple are available.
func (item *Item) DiscardEarlierVersions() bool {
return item.meta&bitDiscardEarlierVersions > 0
}
func (item *Item) yieldItemValue() ([]byte, func(), error) {
key := item.Key() // No need to copy.
for {
if !item.hasValue() {
return nil, nil, nil
}
if item.slice == nil {
item.slice = new(y.Slice)
}
if (item.meta & bitValuePointer) == 0 {
val := item.slice.Resize(len(item.vptr))
copy(val, item.vptr)
return val, nil, nil
}
var vp valuePointer
vp.Decode(item.vptr)
result, cb, err := item.db.vlog.Read(vp, item.slice)
if err != ErrRetry {
return result, cb, err
}
if bytes.HasPrefix(key, badgerMove) {
// err == ErrRetry
// Error is retry even after checking the move keyspace. So, let's
// just assume that value is not present.
return nil, cb, nil
}
// The value pointer is pointing to a deleted value log. Look for the
// move key and read that instead.
runCallback(cb)
// Do not put badgerMove on the left in append. It seems to cause some sort of manipulation.
keyTs := y.KeyWithTs(item.Key(), item.Version())
key = make([]byte, len(badgerMove)+len(keyTs))
n := copy(key, badgerMove)
copy(key[n:], keyTs)
// Note that we can't set item.key to move key, because that would
// change the key user sees before and after this call. Also, this move
// logic is internal logic and should not impact the external behavior
// of the retrieval.
vs, err := item.db.get(key)
if err != nil {
return nil, nil, err
}
if vs.Version != item.Version() {
return nil, nil, nil
}
// Bug fix: Always copy the vs.Value into vptr here. Otherwise, when item is reused this
// slice gets overwritten.
item.vptr = y.SafeCopy(item.vptr, vs.Value)
item.meta &^= bitValuePointer // Clear the value pointer bit.
if vs.Meta&bitValuePointer > 0 {
item.meta |= bitValuePointer // This meta would only be about value pointer.
}
}
}
func runCallback(cb func()) {
if cb != nil {
cb()
}
}
func (item *Item) prefetchValue() {
val, cb, err := item.yieldItemValue()
defer runCallback(cb)
item.err = err
item.status = prefetched
if val == nil {
return
}
if item.db.opt.ValueLogLoadingMode == options.MemoryMap {
buf := item.slice.Resize(len(val))
copy(buf, val)
item.val = buf
} else {
item.val = val
}
}
// EstimatedSize returns the approximate size of the key-value pair.
//
// This can be called while iterating through a store to quickly estimate the
// size of a range of key-value pairs (without fetching the corresponding
// values).
func (item *Item) EstimatedSize() int64 {
if !item.hasValue() {
return 0
}
if (item.meta & bitValuePointer) == 0 {
return int64(len(item.key) + len(item.vptr))
}
var vp valuePointer
vp.Decode(item.vptr)
return int64(vp.Len) // includes key length.
}
// ValueSize returns the exact size of the value.
//
// This can be called to quickly estimate the size of a value without fetching
// it.
func (item *Item) ValueSize() int64 {
if !item.hasValue() {
return 0
}
if (item.meta & bitValuePointer) == 0 {
return int64(len(item.vptr))
}
var vp valuePointer
vp.Decode(item.vptr)
klen := int64(len(item.key) + 8) // 8 bytes for timestamp.
return int64(vp.Len) - klen - headerBufSize - crc32.Size
}
// UserMeta returns the userMeta set by the user. Typically, this byte, optionally set by the user
// is used to interpret the value.
func (item *Item) UserMeta() byte {
return item.userMeta
}
// ExpiresAt returns a Unix time value indicating when the item will be
// considered expired. 0 indicates that the item will never expire.
func (item *Item) ExpiresAt() uint64 {
return item.expiresAt
}
// TODO: Switch this to use linked list container in Go.
type list struct {
head *Item
tail *Item
}
func (l *list) push(i *Item) {
i.next = nil
if l.tail == nil {
l.head = i
l.tail = i
return
}
l.tail.next = i
l.tail = i
}
func (l *list) pop() *Item {
if l.head == nil {
return nil
}
i := l.head
if l.head == l.tail {
l.tail = nil
l.head = nil
} else {
l.head = i.next
}
i.next = nil
return i
}
// IteratorOptions is used to set options when iterating over Badger key-value
// stores.
//
// This package provides DefaultIteratorOptions which contains options that
// should work for most applications. Consider using that as a starting point
// before customizing it for your own needs.
type IteratorOptions struct {
// Indicates whether we should prefetch values during iteration and store them.
PrefetchValues bool
// How many KV pairs to prefetch while iterating. Valid only if PrefetchValues is true.
PrefetchSize int
Reverse bool // Direction of iteration. False is forward, true is backward.
AllVersions bool // Fetch all valid versions of the same key.
// The following option is used to narrow down the SSTables that iterator picks up. If
// Prefix is specified, only tables which could have this prefix are picked based on their range
// of keys.
Prefix []byte // Only iterate over this given prefix.
prefixIsKey bool // If set, use the prefix for bloom filter lookup.
internalAccess bool // Used to allow internal access to badger keys.
}
func (opt *IteratorOptions) pickTable(t table.TableInterface) bool {
if len(opt.Prefix) == 0 {
return true
}
trim := func(key []byte) []byte {
if len(key) > len(opt.Prefix) {
return key[:len(opt.Prefix)]
}
return key
}
if bytes.Compare(trim(t.Smallest()), opt.Prefix) > 0 {
return false
}
if bytes.Compare(trim(t.Biggest()), opt.Prefix) < 0 {
return false
}
// Bloom filter lookup would only work if opt.Prefix does NOT have the read
// timestamp as part of the key.
if opt.prefixIsKey && t.DoesNotHave(opt.Prefix) {
return false
}
return true
}
// DefaultIteratorOptions contains default options when iterating over Badger key-value stores.
var DefaultIteratorOptions = IteratorOptions{
PrefetchValues: true,
PrefetchSize: 100,
Reverse: false,
AllVersions: false,
}
// Iterator helps iterating over the KV pairs in a lexicographically sorted order.
type Iterator struct {
iitr *y.MergeIterator
txn *Txn
readTs uint64
opt IteratorOptions
item *Item
data list
waste list
lastKey []byte // Used to skip over multiple versions of the same key.
closed bool
}
// NewIterator returns a new iterator. Depending upon the options, either only keys, or both
// key-value pairs would be fetched. The keys are returned in lexicographically sorted order.
// Using prefetch is recommended if you're doing a long running iteration, for performance.
//
// Multiple Iterators:
// For a read-only txn, multiple iterators can be running simultaneously. However, for a read-write
// txn, only one can be running at one time to avoid race conditions, because Txn is thread-unsafe.
func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator {
if txn.discarded {
panic("Transaction has already been discarded")
}
// Do not change the order of the next if. We must track the number of running iterators.
if atomic.AddInt32(&txn.numIterators, 1) > 1 && txn.update {
atomic.AddInt32(&txn.numIterators, -1)
panic("Only one iterator can be active at one time, for a RW txn.")
}
// TODO: If Prefix is set, only pick those memtables which have keys with
// the prefix.
tables, decr := txn.db.getMemTables()
defer decr()
txn.db.vlog.incrIteratorCount()
var iters []y.Iterator
if itr := txn.newPendingWritesIterator(opt.Reverse); itr != nil {
iters = append(iters, itr)
}
for i := 0; i < len(tables); i++ {
iters = append(iters, tables[i].NewUniIterator(opt.Reverse))
}
iters = txn.db.lc.appendIterators(iters, &opt) // This will increment references.
res := &Iterator{
txn: txn,
iitr: y.NewMergeIterator(iters, opt.Reverse),
opt: opt,
readTs: txn.readTs,
}
return res
}
// NewKeyIterator is just like NewIterator, but allows the user to iterate over all versions of a
// single key. Internally, it sets the Prefix option in provided opt, and uses that prefix to
// additionally run bloom filter lookups before picking tables from the LSM tree.
func (txn *Txn) NewKeyIterator(key []byte, opt IteratorOptions) *Iterator {
if len(opt.Prefix) > 0 {
panic("opt.Prefix should be nil for NewKeyIterator.")
}
opt.Prefix = key // This key must be without the timestamp.
opt.prefixIsKey = true
return txn.NewIterator(opt)
}
func (it *Iterator) newItem() *Item {
item := it.waste.pop()
if item == nil {
item = &Item{slice: new(y.Slice), db: it.txn.db, txn: it.txn}
}
return item
}
// Item returns pointer to the current key-value pair.
// This item is only valid until it.Next() gets called.
func (it *Iterator) Item() *Item {
tx := it.txn
tx.addReadKey(it.item.Key())
return it.item
}
// Valid returns false when iteration is done.
func (it *Iterator) Valid() bool {
if it.item == nil {
return false
}
return bytes.HasPrefix(it.item.key, it.opt.Prefix)
}
// ValidForPrefix returns false when iteration is done
// or when the current key is not prefixed by the specified prefix.
func (it *Iterator) ValidForPrefix(prefix []byte) bool {
return it.Valid() && bytes.HasPrefix(it.item.key, prefix)
}
// Close would close the iterator. It is important to call this when you're done with iteration.
func (it *Iterator) Close() {
if it.closed {
return
}
it.closed = true
it.iitr.Close()
// It is important to wait for the fill goroutines to finish. Otherwise, we might leave zombie
// goroutines behind, which are waiting to acquire file read locks after DB has been closed.
waitFor := func(l list) {
item := l.pop()
for item != nil {
item.wg.Wait()
item = l.pop()
}
}
waitFor(it.waste)
waitFor(it.data)
// TODO: We could handle this error.
_ = it.txn.db.vlog.decrIteratorCount()
atomic.AddInt32(&it.txn.numIterators, -1)
}
// Next would advance the iterator by one. Always check it.Valid() after a Next()
// to ensure you have access to a valid it.Item().
func (it *Iterator) Next() {
// Reuse current item
it.item.wg.Wait() // Just cleaner to wait before pushing to avoid doing ref counting.
it.waste.push(it.item)
// Set next item to current
it.item = it.data.pop()
for it.iitr.Valid() {
if it.parseItem() {
// parseItem calls one extra next.
// This is used to deal with the complexity of reverse iteration.
break
}
}
}
func isDeletedOrExpired(meta byte, expiresAt uint64) bool {
if meta&bitDelete > 0 {
return true
}
if expiresAt == 0 {
return false
}
return expiresAt <= uint64(time.Now().Unix())
}
// parseItem is a complex function because it needs to handle both forward and reverse iteration
// implementation. We store keys such that their versions are sorted in descending order. This makes
// forward iteration efficient, but revese iteration complicated. This tradeoff is better because
// forward iteration is more common than reverse.
//
// This function advances the iterator.
func (it *Iterator) parseItem() bool {
mi := it.iitr
key := mi.Key()
setItem := func(item *Item) {
if it.item == nil {
it.item = item
} else {
it.data.push(item)
}
}
// Skip badger keys.
if !it.opt.internalAccess && bytes.HasPrefix(key, badgerPrefix) {
mi.Next()
return false
}
// Skip any versions which are beyond the readTs.
version := y.ParseTs(key)
if version > it.readTs {
mi.Next()
return false
}
if it.opt.AllVersions {
// Return deleted or expired values also, otherwise user can't figure out
// whether the key was deleted.
item := it.newItem()
it.fill(item)
setItem(item)
mi.Next()
return true
}
// If iterating in forward direction, then just checking the last key against current key would
// be sufficient.
if !it.opt.Reverse {
if y.SameKey(it.lastKey, key) {
mi.Next()
return false
}
// Only track in forward direction.
// We should update lastKey as soon as we find a different key in our snapshot.
// Consider keys: a 5, b 7 (del), b 5. When iterating, lastKey = a.
// Then we see b 7, which is deleted. If we don't store lastKey = b, we'll then return b 5,
// which is wrong. Therefore, update lastKey here.
it.lastKey = y.SafeCopy(it.lastKey, mi.Key())
}
FILL:
// If deleted, advance and return.
vs := mi.Value()
if isDeletedOrExpired(vs.Meta, vs.ExpiresAt) {
mi.Next()
return false
}
item := it.newItem()
it.fill(item)
// fill item based on current cursor position. All Next calls have returned, so reaching here
// means no Next was called.
mi.Next() // Advance but no fill item yet.
if !it.opt.Reverse || !mi.Valid() { // Forward direction, or invalid.
setItem(item)
return true
}
// Reverse direction.
nextTs := y.ParseTs(mi.Key())
mik := y.ParseKey(mi.Key())
if nextTs <= it.readTs && bytes.Equal(mik, item.key) {
// This is a valid potential candidate.
goto FILL
}
// Ignore the next candidate. Return the current one.
setItem(item)
return true
}
func (it *Iterator) fill(item *Item) {
vs := it.iitr.Value()
item.meta = vs.Meta
item.userMeta = vs.UserMeta
item.expiresAt = vs.ExpiresAt
item.version = y.ParseTs(it.iitr.Key())
item.key = y.SafeCopy(item.key, y.ParseKey(it.iitr.Key()))
item.vptr = y.SafeCopy(item.vptr, vs.Value)
item.val = nil
if it.opt.PrefetchValues {
item.wg.Add(1)
go func() {
// FIXME we are not handling errors here.
item.prefetchValue()
item.wg.Done()
}()
}
}
func (it *Iterator) prefetch() {
prefetchSize := 2
if it.opt.PrefetchValues && it.opt.PrefetchSize > 1 {
prefetchSize = it.opt.PrefetchSize
}
i := it.iitr
var count int
it.item = nil
for i.Valid() {
if !it.parseItem() {
continue
}
count++
if count == prefetchSize {
break
}
}
}
// Seek would seek to the provided key if present. If absent, it would seek to the next smallest key
// greater than the provided key if iterating in the forward direction. Behavior would be reversed if
// iterating backwards.
func (it *Iterator) Seek(key []byte) {
for i := it.data.pop(); i != nil; i = it.data.pop() {
i.wg.Wait()
it.waste.push(i)
}
it.lastKey = it.lastKey[:0]
if len(key) == 0 {
key = it.opt.Prefix
}
if len(key) == 0 {
it.iitr.Rewind()
it.prefetch()
return
}
if !it.opt.Reverse {
key = y.KeyWithTs(key, it.txn.readTs)
} else {
key = y.KeyWithTs(key, 0)
}
it.iitr.Seek(key)
it.prefetch()
}
// Rewind would rewind the iterator cursor all the way to zero-th position, which would be the
// smallest key if iterating forward, and largest if iterating backward. It does not keep track of
// whether the cursor started with a Seek().
func (it *Iterator) Rewind() {
it.Seek(nil)
}

244
vendor/github.com/dgraph-io/badger/iterator_test.go generated vendored Normal file
View File

@ -0,0 +1,244 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/require"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
type tableMock struct {
left, right []byte
}
func (tm *tableMock) Smallest() []byte { return tm.left }
func (tm *tableMock) Biggest() []byte { return tm.right }
func (tm *tableMock) DoesNotHave(key []byte) bool { return false }
func TestPickTables(t *testing.T) {
opt := DefaultIteratorOptions
within := func(prefix, left, right string) {
opt.Prefix = []byte(prefix)
tm := &tableMock{left: []byte(left), right: []byte(right)}
require.True(t, opt.pickTable(tm))
}
outside := func(prefix, left, right string) {
opt.Prefix = []byte(prefix)
tm := &tableMock{left: []byte(left), right: []byte(right)}
require.False(t, opt.pickTable(tm))
}
within("abc", "ab", "ad")
within("abc", "abc", "ad")
within("abc", "abb123", "ad")
within("abc", "abc123", "abd234")
within("abc", "abc123", "abc456")
outside("abd", "abe", "ad")
outside("abd", "ac", "ad")
outside("abd", "b", "e")
outside("abd", "a", "ab")
outside("abd", "ab", "abc")
outside("abd", "ab", "abc123")
}
func TestIteratePrefix(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
bkey := func(i int) []byte {
return []byte(fmt.Sprintf("%04d", i))
}
val := []byte("OK")
n := 10000
batch := db.NewWriteBatch()
for i := 0; i < n; i++ {
if (i % 1000) == 0 {
t.Logf("Put i=%d\n", i)
}
require.NoError(t, batch.Set(bkey(i), val, 0))
}
require.NoError(t, batch.Flush())
countKeys := func(prefix string) int {
t.Logf("Testing with prefix: %s", prefix)
var count int
opt := DefaultIteratorOptions
opt.Prefix = []byte(prefix)
err := db.View(func(txn *Txn) error {
itr := txn.NewIterator(opt)
defer itr.Close()
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
err := item.Value(func(v []byte) error {
require.Equal(t, val, v)
return nil
})
require.NoError(t, err)
require.True(t, bytes.HasPrefix(item.Key(), opt.Prefix))
count++
}
return nil
})
require.NoError(t, err)
return count
}
countOneKey := func(key []byte) int {
var count int
err := db.View(func(txn *Txn) error {
itr := txn.NewKeyIterator(key, DefaultIteratorOptions)
defer itr.Close()
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
err := item.Value(func(v []byte) error {
require.Equal(t, val, v)
return nil
})
require.NoError(t, err)
require.Equal(t, key, item.Key())
count++
}
return nil
})
require.NoError(t, err)
return count
}
for i := 0; i <= 9; i++ {
require.Equal(t, 1, countKeys(fmt.Sprintf("%d%d%d%d", i, i, i, i)))
require.Equal(t, 10, countKeys(fmt.Sprintf("%d%d%d", i, i, i)))
require.Equal(t, 100, countKeys(fmt.Sprintf("%d%d", i, i)))
require.Equal(t, 1000, countKeys(fmt.Sprintf("%d", i)))
}
require.Equal(t, 10000, countKeys(""))
t.Logf("Testing each key with key iterator")
for i := 0; i < n; i++ {
require.Equal(t, 1, countOneKey(bkey(i)))
}
})
}
// go test -v -run=XXX -bench=BenchmarkIterate -benchtime=3s
// Benchmark with opt.Prefix set ===
// goos: linux
// goarch: amd64
// pkg: github.com/dgraph-io/badger
// BenchmarkIteratePrefixSingleKey/Key_lookups-4 10000 365539 ns/op
// --- BENCH: BenchmarkIteratePrefixSingleKey/Key_lookups-4
// iterator_test.go:147: Inner b.N: 1
// iterator_test.go:147: Inner b.N: 100
// iterator_test.go:147: Inner b.N: 10000
// --- BENCH: BenchmarkIteratePrefixSingleKey
// iterator_test.go:143: LSM files: 79
// iterator_test.go:145: Outer b.N: 1
// PASS
// ok github.com/dgraph-io/badger 41.586s
//
// Benchmark with NO opt.Prefix set ===
// goos: linux
// goarch: amd64
// pkg: github.com/dgraph-io/badger
// BenchmarkIteratePrefixSingleKey/Key_lookups-4 10000 460924 ns/op
// --- BENCH: BenchmarkIteratePrefixSingleKey/Key_lookups-4
// iterator_test.go:147: Inner b.N: 1
// iterator_test.go:147: Inner b.N: 100
// iterator_test.go:147: Inner b.N: 10000
// --- BENCH: BenchmarkIteratePrefixSingleKey
// iterator_test.go:143: LSM files: 83
// iterator_test.go:145: Outer b.N: 1
// PASS
// ok github.com/dgraph-io/badger 41.836s
//
// Only my laptop there's a 20% improvement in latency with ~80 files.
func BenchmarkIteratePrefixSingleKey(b *testing.B) {
dir, err := ioutil.TempDir(".", "badger-test")
y.Check(err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.TableLoadingMode = options.LoadToRAM
db, err := Open(opts)
y.Check(err)
defer db.Close()
N := 100000 // Should generate around 80 SSTables.
val := []byte("OK")
bkey := func(i int) []byte {
return []byte(fmt.Sprintf("%06d", i))
}
batch := db.NewWriteBatch()
for i := 0; i < N; i++ {
y.Check(batch.Set(bkey(i), val, 0))
}
y.Check(batch.Flush())
var lsmFiles int
err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if strings.HasSuffix(path, ".sst") {
lsmFiles++
}
if err != nil {
return err
}
return nil
})
y.Check(err)
b.Logf("LSM files: %d", lsmFiles)
b.Logf("Key splits: %v", db.KeySplits(nil))
b.Logf("Key splits with prefix: %v", db.KeySplits([]byte("09")))
b.Logf("Outer b.N: %d", b.N)
b.Run("Key lookups", func(b *testing.B) {
b.Logf("Inner b.N: %d", b.N)
for i := 0; i < b.N; i++ {
key := bkey(rand.Intn(N))
err := db.View(func(txn *Txn) error {
opt := DefaultIteratorOptions
// NOTE: Comment opt.Prefix out here to compare the performance
// difference between providing Prefix as an option, v/s not. I
// see a 20% improvement when there are ~80 SSTables.
opt.Prefix = key
opt.AllVersions = true
itr := txn.NewIterator(opt)
defer itr.Close()
var count int
for itr.Seek(key); itr.ValidForPrefix(key); itr.Next() {
count++
}
if count != 1 {
b.Fatalf("Count must be one key: %s. Found: %d", key, count)
}
return nil
})
if err != nil {
b.Fatalf("Error while View: %v", err)
}
}
})
}

304
vendor/github.com/dgraph-io/badger/level_handler.go generated vendored Normal file
View File

@ -0,0 +1,304 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"sort"
"sync"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/table"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
type levelHandler struct {
// Guards tables, totalSize.
sync.RWMutex
// For level >= 1, tables are sorted by key ranges, which do not overlap.
// For level 0, tables are sorted by time.
// For level 0, newest table are at the back. Compact the oldest one first, which is at the front.
tables []*table.Table
totalSize int64
// The following are initialized once and const.
level int
strLevel string
maxTotalSize int64
db *DB
}
func (s *levelHandler) getTotalSize() int64 {
s.RLock()
defer s.RUnlock()
return s.totalSize
}
// initTables replaces s.tables with given tables. This is done during loading.
func (s *levelHandler) initTables(tables []*table.Table) {
s.Lock()
defer s.Unlock()
s.tables = tables
s.totalSize = 0
for _, t := range tables {
s.totalSize += t.Size()
}
if s.level == 0 {
// Key range will overlap. Just sort by fileID in ascending order
// because newer tables are at the end of level 0.
sort.Slice(s.tables, func(i, j int) bool {
return s.tables[i].ID() < s.tables[j].ID()
})
} else {
// Sort tables by keys.
sort.Slice(s.tables, func(i, j int) bool {
return y.CompareKeys(s.tables[i].Smallest(), s.tables[j].Smallest()) < 0
})
}
}
// deleteTables remove tables idx0, ..., idx1-1.
func (s *levelHandler) deleteTables(toDel []*table.Table) error {
s.Lock() // s.Unlock() below
toDelMap := make(map[uint64]struct{})
for _, t := range toDel {
toDelMap[t.ID()] = struct{}{}
}
// Make a copy as iterators might be keeping a slice of tables.
var newTables []*table.Table
for _, t := range s.tables {
_, found := toDelMap[t.ID()]
if !found {
newTables = append(newTables, t)
continue
}
s.totalSize -= t.Size()
}
s.tables = newTables
s.Unlock() // Unlock s _before_ we DecrRef our tables, which can be slow.
return decrRefs(toDel)
}
// replaceTables will replace tables[left:right] with newTables. Note this EXCLUDES tables[right].
// You must call decr() to delete the old tables _after_ writing the update to the manifest.
func (s *levelHandler) replaceTables(newTables []*table.Table) error {
// Need to re-search the range of tables in this level to be replaced as other goroutines might
// be changing it as well. (They can't touch our tables, but if they add/remove other tables,
// the indices get shifted around.)
if len(newTables) == 0 {
return nil
}
s.Lock() // We s.Unlock() below.
// Increase totalSize first.
for _, tbl := range newTables {
s.totalSize += tbl.Size()
tbl.IncrRef()
}
kr := keyRange{
left: newTables[0].Smallest(),
right: newTables[len(newTables)-1].Biggest(),
}
left, right := s.overlappingTables(levelHandlerRLocked{}, kr)
toDecr := make([]*table.Table, right-left)
// Update totalSize and reference counts.
for i := left; i < right; i++ {
tbl := s.tables[i]
s.totalSize -= tbl.Size()
toDecr[i-left] = tbl
}
// To be safe, just make a copy. TODO: Be more careful and avoid copying.
numDeleted := right - left
numAdded := len(newTables)
tables := make([]*table.Table, len(s.tables)-numDeleted+numAdded)
y.AssertTrue(left == copy(tables, s.tables[:left]))
t := tables[left:]
y.AssertTrue(numAdded == copy(t, newTables))
t = t[numAdded:]
y.AssertTrue(len(s.tables[right:]) == copy(t, s.tables[right:]))
s.tables = tables
s.Unlock() // s.Unlock before we DecrRef tables -- that can be slow.
return decrRefs(toDecr)
}
func decrRefs(tables []*table.Table) error {
for _, table := range tables {
if err := table.DecrRef(); err != nil {
return err
}
}
return nil
}
func newLevelHandler(db *DB, level int) *levelHandler {
return &levelHandler{
level: level,
strLevel: fmt.Sprintf("l%d", level),
db: db,
}
}
// tryAddLevel0Table returns true if ok and no stalling.
func (s *levelHandler) tryAddLevel0Table(t *table.Table) bool {
y.AssertTrue(s.level == 0)
// Need lock as we may be deleting the first table during a level 0 compaction.
s.Lock()
defer s.Unlock()
if len(s.tables) >= s.db.opt.NumLevelZeroTablesStall {
return false
}
s.tables = append(s.tables, t)
t.IncrRef()
s.totalSize += t.Size()
return true
}
func (s *levelHandler) numTables() int {
s.RLock()
defer s.RUnlock()
return len(s.tables)
}
func (s *levelHandler) close() error {
s.RLock()
defer s.RUnlock()
var err error
for _, t := range s.tables {
if closeErr := t.Close(); closeErr != nil && err == nil {
err = closeErr
}
}
return errors.Wrap(err, "levelHandler.close")
}
// getTableForKey acquires a read-lock to access s.tables. It returns a list of tableHandlers.
func (s *levelHandler) getTableForKey(key []byte) ([]*table.Table, func() error) {
s.RLock()
defer s.RUnlock()
if s.level == 0 {
// For level 0, we need to check every table. Remember to make a copy as s.tables may change
// once we exit this function, and we don't want to lock s.tables while seeking in tables.
// CAUTION: Reverse the tables.
out := make([]*table.Table, 0, len(s.tables))
for i := len(s.tables) - 1; i >= 0; i-- {
out = append(out, s.tables[i])
s.tables[i].IncrRef()
}
return out, func() error {
for _, t := range out {
if err := t.DecrRef(); err != nil {
return err
}
}
return nil
}
}
// For level >= 1, we can do a binary search as key range does not overlap.
idx := sort.Search(len(s.tables), func(i int) bool {
return y.CompareKeys(s.tables[i].Biggest(), key) >= 0
})
if idx >= len(s.tables) {
// Given key is strictly > than every element we have.
return nil, func() error { return nil }
}
tbl := s.tables[idx]
tbl.IncrRef()
return []*table.Table{tbl}, tbl.DecrRef
}
// get returns value for a given key or the key after that. If not found, return nil.
func (s *levelHandler) get(key []byte) (y.ValueStruct, error) {
tables, decr := s.getTableForKey(key)
keyNoTs := y.ParseKey(key)
var maxVs y.ValueStruct
for _, th := range tables {
if th.DoesNotHave(keyNoTs) {
y.NumLSMBloomHits.Add(s.strLevel, 1)
continue
}
it := th.NewIterator(false)
defer it.Close()
y.NumLSMGets.Add(s.strLevel, 1)
it.Seek(key)
if !it.Valid() {
continue
}
if y.SameKey(key, it.Key()) {
if version := y.ParseTs(it.Key()); maxVs.Version < version {
maxVs = it.Value()
maxVs.Version = version
}
}
}
return maxVs, decr()
}
// appendIterators appends iterators to an array of iterators, for merging.
// Note: This obtains references for the table handlers. Remember to close these iterators.
func (s *levelHandler) appendIterators(iters []y.Iterator, opt *IteratorOptions) []y.Iterator {
s.RLock()
defer s.RUnlock()
tables := make([]*table.Table, 0, len(s.tables))
for _, t := range s.tables {
if opt.pickTable(t) {
tables = append(tables, t)
}
}
if len(tables) == 0 {
return iters
}
if s.level == 0 {
// Remember to add in reverse order!
// The newer table at the end of s.tables should be added first as it takes precedence.
return appendIteratorsReversed(iters, tables, opt.Reverse)
}
return append(iters, table.NewConcatIterator(tables, opt.Reverse))
}
type levelHandlerRLocked struct{}
// overlappingTables returns the tables that intersect with key range. Returns a half-interval.
// This function should already have acquired a read lock, and this is so important the caller must
// pass an empty parameter declaring such.
func (s *levelHandler) overlappingTables(_ levelHandlerRLocked, kr keyRange) (int, int) {
left := sort.Search(len(s.tables), func(i int) bool {
return y.CompareKeys(kr.left, s.tables[i].Biggest()) <= 0
})
right := sort.Search(len(s.tables), func(i int) bool {
return y.CompareKeys(kr.right, s.tables[i].Smallest()) < 0
})
return left, right
}

886
vendor/github.com/dgraph-io/badger/levels.go generated vendored Normal file
View File

@ -0,0 +1,886 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"math"
"math/rand"
"os"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"gx/ipfs/QmRvYNctevGUW52urgmoFZscT6buMKqhHezLUS64WepGWn/go-net/trace"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/pb"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/table"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
type levelsController struct {
nextFileID uint64 // Atomic
elog trace.EventLog
// The following are initialized once and const.
levels []*levelHandler
kv *DB
cstatus compactStatus
}
var (
// This is for getting timings between stalls.
lastUnstalled time.Time
)
// revertToManifest checks that all necessary table files exist and removes all table files not
// referenced by the manifest. idMap is a set of table file id's that were read from the directory
// listing.
func revertToManifest(kv *DB, mf *Manifest, idMap map[uint64]struct{}) error {
// 1. Check all files in manifest exist.
for id := range mf.Tables {
if _, ok := idMap[id]; !ok {
return fmt.Errorf("file does not exist for table %d", id)
}
}
// 2. Delete files that shouldn't exist.
for id := range idMap {
if _, ok := mf.Tables[id]; !ok {
kv.elog.Printf("Table file %d not referenced in MANIFEST\n", id)
filename := table.NewFilename(id, kv.opt.Dir)
if err := os.Remove(filename); err != nil {
return y.Wrapf(err, "While removing table %d", id)
}
}
}
return nil
}
func newLevelsController(db *DB, mf *Manifest) (*levelsController, error) {
y.AssertTrue(db.opt.NumLevelZeroTablesStall > db.opt.NumLevelZeroTables)
s := &levelsController{
kv: db,
elog: db.elog,
levels: make([]*levelHandler, db.opt.MaxLevels),
}
s.cstatus.levels = make([]*levelCompactStatus, db.opt.MaxLevels)
for i := 0; i < db.opt.MaxLevels; i++ {
s.levels[i] = newLevelHandler(db, i)
if i == 0 {
// Do nothing.
} else if i == 1 {
// Level 1 probably shouldn't be too much bigger than level 0.
s.levels[i].maxTotalSize = db.opt.LevelOneSize
} else {
s.levels[i].maxTotalSize = s.levels[i-1].maxTotalSize * int64(db.opt.LevelSizeMultiplier)
}
s.cstatus.levels[i] = new(levelCompactStatus)
}
// Compare manifest against directory, check for existent/non-existent files, and remove.
if err := revertToManifest(db, mf, getIDMap(db.opt.Dir)); err != nil {
return nil, err
}
// Some files may be deleted. Let's reload.
var flags uint32 = y.Sync
if db.opt.ReadOnly {
flags |= y.ReadOnly
}
var mu sync.Mutex
tables := make([][]*table.Table, db.opt.MaxLevels)
var maxFileID uint64
// We found that using 3 goroutines allows disk throughput to be utilized to its max.
// Disk utilization is the main thing we should focus on, while trying to read the data. That's
// the one factor that remains constant between HDD and SSD.
throttle := y.NewThrottle(3)
start := time.Now()
var numOpened int32
tick := time.NewTicker(3 * time.Second)
defer tick.Stop()
for fileID, tf := range mf.Tables {
fname := table.NewFilename(fileID, db.opt.Dir)
select {
case <-tick.C:
db.opt.Infof("%d tables out of %d opened in %s\n", atomic.LoadInt32(&numOpened),
len(mf.Tables), time.Since(start).Round(time.Millisecond))
default:
}
if err := throttle.Do(); err != nil {
closeAllTables(tables)
return nil, err
}
if fileID > maxFileID {
maxFileID = fileID
}
go func(fname string, tf TableManifest) {
var rerr error
defer func() {
throttle.Done(rerr)
atomic.AddInt32(&numOpened, 1)
}()
fd, err := y.OpenExistingFile(fname, flags)
if err != nil {
rerr = errors.Wrapf(err, "Opening file: %q", fname)
return
}
t, err := table.OpenTable(fd, db.opt.TableLoadingMode, tf.Checksum)
if err != nil {
if strings.HasPrefix(err.Error(), "CHECKSUM_MISMATCH:") {
db.opt.Errorf(err.Error())
db.opt.Errorf("Ignoring table %s", fd.Name())
// Do not set rerr. We will continue without this table.
} else {
rerr = errors.Wrapf(err, "Opening table: %q", fname)
}
return
}
mu.Lock()
tables[tf.Level] = append(tables[tf.Level], t)
mu.Unlock()
}(fname, tf)
}
if err := throttle.Finish(); err != nil {
closeAllTables(tables)
return nil, err
}
db.opt.Infof("All %d tables opened in %s\n", atomic.LoadInt32(&numOpened),
time.Since(start).Round(time.Millisecond))
s.nextFileID = maxFileID + 1
for i, tbls := range tables {
s.levels[i].initTables(tbls)
}
// Make sure key ranges do not overlap etc.
if err := s.validate(); err != nil {
_ = s.cleanupLevels()
return nil, errors.Wrap(err, "Level validation")
}
// Sync directory (because we have at least removed some files, or previously created the
// manifest file).
if err := syncDir(db.opt.Dir); err != nil {
_ = s.close()
return nil, err
}
return s, nil
}
// Closes the tables, for cleanup in newLevelsController. (We Close() instead of using DecrRef()
// because that would delete the underlying files.) We ignore errors, which is OK because tables
// are read-only.
func closeAllTables(tables [][]*table.Table) {
for _, tableSlice := range tables {
for _, table := range tableSlice {
_ = table.Close()
}
}
}
func (s *levelsController) cleanupLevels() error {
var firstErr error
for _, l := range s.levels {
if err := l.close(); err != nil && firstErr == nil {
firstErr = err
}
}
return firstErr
}
// This function picks all tables from all levels, creates a manifest changeset,
// applies it, and then decrements the refs of these tables, which would result
// in their deletion.
func (s *levelsController) deleteLSMTree() (int, error) {
// First pick all tables, so we can create a manifest changelog.
var all []*table.Table
for _, l := range s.levels {
l.RLock()
all = append(all, l.tables...)
l.RUnlock()
}
if len(all) == 0 {
return 0, nil
}
// Generate the manifest changes.
changes := []*pb.ManifestChange{}
for _, table := range all {
changes = append(changes, newDeleteChange(table.ID()))
}
changeSet := pb.ManifestChangeSet{Changes: changes}
if err := s.kv.manifest.addChanges(changeSet.Changes); err != nil {
return 0, err
}
// Now that manifest has been successfully written, we can delete the tables.
for _, l := range s.levels {
l.Lock()
l.totalSize = 0
l.tables = l.tables[:0]
l.Unlock()
}
for _, table := range all {
if err := table.DecrRef(); err != nil {
return 0, err
}
}
return len(all), nil
}
func (s *levelsController) startCompact(lc *y.Closer) {
n := s.kv.opt.NumCompactors
lc.AddRunning(n - 1)
for i := 0; i < n; i++ {
go s.runWorker(lc)
}
}
func (s *levelsController) runWorker(lc *y.Closer) {
defer lc.Done()
randomDelay := time.NewTimer(time.Duration(rand.Int31n(1000)) * time.Millisecond)
select {
case <-randomDelay.C:
case <-lc.HasBeenClosed():
randomDelay.Stop()
return
}
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
select {
// Can add a done channel or other stuff.
case <-ticker.C:
prios := s.pickCompactLevels()
for _, p := range prios {
if err := s.doCompact(p); err == nil {
break
} else if err == errFillTables {
// pass
} else {
s.kv.opt.Warningf("While running doCompact: %v\n", err)
}
}
case <-lc.HasBeenClosed():
return
}
}
}
// Returns true if level zero may be compacted, without accounting for compactions that already
// might be happening.
func (s *levelsController) isLevel0Compactable() bool {
return s.levels[0].numTables() >= s.kv.opt.NumLevelZeroTables
}
// Returns true if the non-zero level may be compacted. delSize provides the size of the tables
// which are currently being compacted so that we treat them as already having started being
// compacted (because they have been, yet their size is already counted in getTotalSize).
func (l *levelHandler) isCompactable(delSize int64) bool {
return l.getTotalSize()-delSize >= l.maxTotalSize
}
type compactionPriority struct {
level int
score float64
}
// pickCompactLevel determines which level to compact.
// Based on: https://github.com/facebook/rocksdb/wiki/Leveled-Compaction
func (s *levelsController) pickCompactLevels() (prios []compactionPriority) {
// This function must use identical criteria for guaranteeing compaction's progress that
// addLevel0Table uses.
// cstatus is checked to see if level 0's tables are already being compacted
if !s.cstatus.overlapsWith(0, infRange) && s.isLevel0Compactable() {
pri := compactionPriority{
level: 0,
score: float64(s.levels[0].numTables()) / float64(s.kv.opt.NumLevelZeroTables),
}
prios = append(prios, pri)
}
for i, l := range s.levels[1:] {
// Don't consider those tables that are already being compacted right now.
delSize := s.cstatus.delSize(i + 1)
if l.isCompactable(delSize) {
pri := compactionPriority{
level: i + 1,
score: float64(l.getTotalSize()-delSize) / float64(l.maxTotalSize),
}
prios = append(prios, pri)
}
}
sort.Slice(prios, func(i, j int) bool {
return prios[i].score > prios[j].score
})
return prios
}
// compactBuildTables merge topTables and botTables to form a list of new tables.
func (s *levelsController) compactBuildTables(
l int, cd compactDef) ([]*table.Table, func() error, error) {
topTables := cd.top
botTables := cd.bot
var hasOverlap bool
{
kr := getKeyRange(cd.top)
for i, lh := range s.levels {
if i <= l { // Skip upper levels.
continue
}
lh.RLock()
left, right := lh.overlappingTables(levelHandlerRLocked{}, kr)
lh.RUnlock()
if right-left > 0 {
hasOverlap = true
break
}
}
cd.elog.LazyPrintf("Key range overlaps with lower levels: %v", hasOverlap)
}
// Try to collect stats so that we can inform value log about GC. That would help us find which
// value log file should be GCed.
discardStats := make(map[uint32]int64)
updateStats := func(vs y.ValueStruct) {
if vs.Meta&bitValuePointer > 0 {
var vp valuePointer
vp.Decode(vs.Value)
discardStats[vp.Fid] += int64(vp.Len)
}
}
// Create iterators across all the tables involved first.
var iters []y.Iterator
if l == 0 {
iters = appendIteratorsReversed(iters, topTables, false)
} else {
y.AssertTrue(len(topTables) == 1)
iters = []y.Iterator{topTables[0].NewIterator(false)}
}
// Next level has level>=1 and we can use ConcatIterator as key ranges do not overlap.
iters = append(iters, table.NewConcatIterator(botTables, false))
it := y.NewMergeIterator(iters, false)
defer it.Close() // Important to close the iterator to do ref counting.
it.Rewind()
// Pick a discard ts, so we can discard versions below this ts. We should
// never discard any versions starting from above this timestamp, because
// that would affect the snapshot view guarantee provided by transactions.
discardTs := s.kv.orc.discardAtOrBelow()
// Start generating new tables.
type newTableResult struct {
table *table.Table
err error
}
resultCh := make(chan newTableResult)
var numBuilds, numVersions int
var lastKey, skipKey []byte
for it.Valid() {
timeStart := time.Now()
builder := table.NewTableBuilder()
var numKeys, numSkips uint64
for ; it.Valid(); it.Next() {
// See if we need to skip this key.
if len(skipKey) > 0 {
if y.SameKey(it.Key(), skipKey) {
numSkips++
updateStats(it.Value())
continue
} else {
skipKey = skipKey[:0]
}
}
if !y.SameKey(it.Key(), lastKey) {
if builder.ReachedCapacity(s.kv.opt.MaxTableSize) {
// Only break if we are on a different key, and have reached capacity. We want
// to ensure that all versions of the key are stored in the same sstable, and
// not divided across multiple tables at the same level.
break
}
lastKey = y.SafeCopy(lastKey, it.Key())
numVersions = 0
}
vs := it.Value()
version := y.ParseTs(it.Key())
if version <= discardTs {
// Keep track of the number of versions encountered for this key. Only consider the
// versions which are below the minReadTs, otherwise, we might end up discarding the
// only valid version for a running transaction.
numVersions++
lastValidVersion := vs.Meta&bitDiscardEarlierVersions > 0
if isDeletedOrExpired(vs.Meta, vs.ExpiresAt) ||
numVersions > s.kv.opt.NumVersionsToKeep ||
lastValidVersion {
// If this version of the key is deleted or expired, skip all the rest of the
// versions. Ensure that we're only removing versions below readTs.
skipKey = y.SafeCopy(skipKey, it.Key())
if lastValidVersion {
// Add this key. We have set skipKey, so the following key versions
// would be skipped.
} else if hasOverlap {
// If this key range has overlap with lower levels, then keep the deletion
// marker with the latest version, discarding the rest. We have set skipKey,
// so the following key versions would be skipped.
} else {
// If no overlap, we can skip all the versions, by continuing here.
numSkips++
updateStats(vs)
continue // Skip adding this key.
}
}
}
numKeys++
y.Check(builder.Add(it.Key(), it.Value()))
}
// It was true that it.Valid() at least once in the loop above, which means we
// called Add() at least once, and builder is not Empty().
cd.elog.LazyPrintf("Added %d keys. Skipped %d keys.", numKeys, numSkips)
cd.elog.LazyPrintf("LOG Compact. Iteration took: %v\n", time.Since(timeStart))
if !builder.Empty() {
numBuilds++
fileID := s.reserveFileID()
go func(builder *table.Builder) {
defer builder.Close()
fd, err := y.CreateSyncedFile(table.NewFilename(fileID, s.kv.opt.Dir), true)
if err != nil {
resultCh <- newTableResult{nil, errors.Wrapf(err, "While opening new table: %d", fileID)}
return
}
if _, err := fd.Write(builder.Finish()); err != nil {
resultCh <- newTableResult{nil, errors.Wrapf(err, "Unable to write to file: %d", fileID)}
return
}
tbl, err := table.OpenTable(fd, s.kv.opt.TableLoadingMode, nil)
// decrRef is added below.
resultCh <- newTableResult{tbl, errors.Wrapf(err, "Unable to open table: %q", fd.Name())}
}(builder)
}
}
newTables := make([]*table.Table, 0, 20)
// Wait for all table builders to finish.
var firstErr error
for x := 0; x < numBuilds; x++ {
res := <-resultCh
newTables = append(newTables, res.table)
if firstErr == nil {
firstErr = res.err
}
}
if firstErr == nil {
// Ensure created files' directory entries are visible. We don't mind the extra latency
// from not doing this ASAP after all file creation has finished because this is a
// background operation.
firstErr = syncDir(s.kv.opt.Dir)
}
if firstErr != nil {
// An error happened. Delete all the newly created table files (by calling DecrRef
// -- we're the only holders of a ref).
for j := 0; j < numBuilds; j++ {
if newTables[j] != nil {
newTables[j].DecrRef()
}
}
errorReturn := errors.Wrapf(firstErr, "While running compaction for: %+v", cd)
return nil, nil, errorReturn
}
sort.Slice(newTables, func(i, j int) bool {
return y.CompareKeys(newTables[i].Biggest(), newTables[j].Biggest()) < 0
})
s.kv.vlog.updateGCStats(discardStats)
cd.elog.LazyPrintf("Discard stats: %v", discardStats)
return newTables, func() error { return decrRefs(newTables) }, nil
}
func buildChangeSet(cd *compactDef, newTables []*table.Table) pb.ManifestChangeSet {
changes := []*pb.ManifestChange{}
for _, table := range newTables {
changes = append(changes,
newCreateChange(table.ID(), cd.nextLevel.level, table.Checksum))
}
for _, table := range cd.top {
changes = append(changes, newDeleteChange(table.ID()))
}
for _, table := range cd.bot {
changes = append(changes, newDeleteChange(table.ID()))
}
return pb.ManifestChangeSet{Changes: changes}
}
type compactDef struct {
elog trace.Trace
thisLevel *levelHandler
nextLevel *levelHandler
top []*table.Table
bot []*table.Table
thisRange keyRange
nextRange keyRange
thisSize int64
}
func (cd *compactDef) lockLevels() {
cd.thisLevel.RLock()
cd.nextLevel.RLock()
}
func (cd *compactDef) unlockLevels() {
cd.nextLevel.RUnlock()
cd.thisLevel.RUnlock()
}
func (s *levelsController) fillTablesL0(cd *compactDef) bool {
cd.lockLevels()
defer cd.unlockLevels()
cd.top = make([]*table.Table, len(cd.thisLevel.tables))
copy(cd.top, cd.thisLevel.tables)
if len(cd.top) == 0 {
return false
}
cd.thisRange = infRange
kr := getKeyRange(cd.top)
left, right := cd.nextLevel.overlappingTables(levelHandlerRLocked{}, kr)
cd.bot = make([]*table.Table, right-left)
copy(cd.bot, cd.nextLevel.tables[left:right])
if len(cd.bot) == 0 {
cd.nextRange = kr
} else {
cd.nextRange = getKeyRange(cd.bot)
}
if !s.cstatus.compareAndAdd(thisAndNextLevelRLocked{}, *cd) {
return false
}
return true
}
func (s *levelsController) fillTables(cd *compactDef) bool {
cd.lockLevels()
defer cd.unlockLevels()
tbls := make([]*table.Table, len(cd.thisLevel.tables))
copy(tbls, cd.thisLevel.tables)
if len(tbls) == 0 {
return false
}
// Find the biggest table, and compact that first.
// TODO: Try other table picking strategies.
sort.Slice(tbls, func(i, j int) bool {
return tbls[i].Size() > tbls[j].Size()
})
for _, t := range tbls {
cd.thisSize = t.Size()
cd.thisRange = keyRange{
// We pick all the versions of the smallest and the biggest key.
left: y.KeyWithTs(y.ParseKey(t.Smallest()), math.MaxUint64),
// Note that version zero would be the rightmost key.
right: y.KeyWithTs(y.ParseKey(t.Biggest()), 0),
}
if s.cstatus.overlapsWith(cd.thisLevel.level, cd.thisRange) {
continue
}
cd.top = []*table.Table{t}
left, right := cd.nextLevel.overlappingTables(levelHandlerRLocked{}, cd.thisRange)
cd.bot = make([]*table.Table, right-left)
copy(cd.bot, cd.nextLevel.tables[left:right])
if len(cd.bot) == 0 {
cd.bot = []*table.Table{}
cd.nextRange = cd.thisRange
if !s.cstatus.compareAndAdd(thisAndNextLevelRLocked{}, *cd) {
continue
}
return true
}
cd.nextRange = getKeyRange(cd.bot)
if s.cstatus.overlapsWith(cd.nextLevel.level, cd.nextRange) {
continue
}
if !s.cstatus.compareAndAdd(thisAndNextLevelRLocked{}, *cd) {
continue
}
return true
}
return false
}
func (s *levelsController) runCompactDef(l int, cd compactDef) (err error) {
timeStart := time.Now()
thisLevel := cd.thisLevel
nextLevel := cd.nextLevel
// Table should never be moved directly between levels, always be rewritten to allow discarding
// invalid versions.
newTables, decr, err := s.compactBuildTables(l, cd)
if err != nil {
return err
}
defer func() {
// Only assign to err, if it's not already nil.
if decErr := decr(); err == nil {
err = decErr
}
}()
changeSet := buildChangeSet(&cd, newTables)
// We write to the manifest _before_ we delete files (and after we created files)
if err := s.kv.manifest.addChanges(changeSet.Changes); err != nil {
return err
}
// See comment earlier in this function about the ordering of these ops, and the order in which
// we access levels when reading.
if err := nextLevel.replaceTables(newTables); err != nil {
return err
}
if err := thisLevel.deleteTables(cd.top); err != nil {
return err
}
// Note: For level 0, while doCompact is running, it is possible that new tables are added.
// However, the tables are added only to the end, so it is ok to just delete the first table.
cd.elog.LazyPrintf("LOG Compact %d->%d, del %d tables, add %d tables, took %v\n",
l, l+1, len(cd.top)+len(cd.bot), len(newTables), time.Since(timeStart))
return nil
}
var errFillTables = errors.New("Unable to fill tables")
// doCompact picks some table on level l and compacts it away to the next level.
func (s *levelsController) doCompact(p compactionPriority) error {
l := p.level
y.AssertTrue(l+1 < s.kv.opt.MaxLevels) // Sanity check.
cd := compactDef{
elog: trace.New(fmt.Sprintf("Badger.L%d", l), "Compact"),
thisLevel: s.levels[l],
nextLevel: s.levels[l+1],
}
cd.elog.SetMaxEvents(100)
defer cd.elog.Finish()
cd.elog.LazyPrintf("Got compaction priority: %+v", p)
// While picking tables to be compacted, both levels' tables are expected to
// remain unchanged.
if l == 0 {
if !s.fillTablesL0(&cd) {
cd.elog.LazyPrintf("fillTables failed for level: %d\n", l)
return errFillTables
}
} else {
if !s.fillTables(&cd) {
cd.elog.LazyPrintf("fillTables failed for level: %d\n", l)
return errFillTables
}
}
defer s.cstatus.delete(cd) // Remove the ranges from compaction status.
cd.elog.LazyPrintf("Running for level: %d\n", cd.thisLevel.level)
s.cstatus.toLog(cd.elog)
if err := s.runCompactDef(l, cd); err != nil {
// This compaction couldn't be done successfully.
cd.elog.LazyPrintf("\tLOG Compact FAILED with error: %+v: %+v", err, cd)
return err
}
s.cstatus.toLog(cd.elog)
cd.elog.LazyPrintf("Compaction for level: %d DONE", cd.thisLevel.level)
return nil
}
func (s *levelsController) addLevel0Table(t *table.Table) error {
// We update the manifest _before_ the table becomes part of a levelHandler, because at that
// point it could get used in some compaction. This ensures the manifest file gets updated in
// the proper order. (That means this update happens before that of some compaction which
// deletes the table.)
err := s.kv.manifest.addChanges([]*pb.ManifestChange{
newCreateChange(t.ID(), 0, t.Checksum),
})
if err != nil {
return err
}
for !s.levels[0].tryAddLevel0Table(t) {
// Stall. Make sure all levels are healthy before we unstall.
var timeStart time.Time
{
s.elog.Printf("STALLED STALLED STALLED: %v\n", time.Since(lastUnstalled))
s.cstatus.RLock()
for i := 0; i < s.kv.opt.MaxLevels; i++ {
s.elog.Printf("level=%d. Status=%s Size=%d\n",
i, s.cstatus.levels[i].debug(), s.levels[i].getTotalSize())
}
s.cstatus.RUnlock()
timeStart = time.Now()
}
// Before we unstall, we need to make sure that level 0 and 1 are healthy. Otherwise, we
// will very quickly fill up level 0 again and if the compaction strategy favors level 0,
// then level 1 is going to super full.
for i := 0; ; i++ {
// Passing 0 for delSize to compactable means we're treating incomplete compactions as
// not having finished -- we wait for them to finish. Also, it's crucial this behavior
// replicates pickCompactLevels' behavior in computing compactability in order to
// guarantee progress.
if !s.isLevel0Compactable() && !s.levels[1].isCompactable(0) {
break
}
time.Sleep(10 * time.Millisecond)
if i%100 == 0 {
prios := s.pickCompactLevels()
s.elog.Printf("Waiting to add level 0 table. Compaction priorities: %+v\n", prios)
i = 0
}
}
{
s.elog.Printf("UNSTALLED UNSTALLED UNSTALLED: %v\n", time.Since(timeStart))
lastUnstalled = time.Now()
}
}
return nil
}
func (s *levelsController) close() error {
err := s.cleanupLevels()
return errors.Wrap(err, "levelsController.Close")
}
// get returns the found value if any. If not found, we return nil.
func (s *levelsController) get(key []byte, maxVs *y.ValueStruct) (y.ValueStruct, error) {
// It's important that we iterate the levels from 0 on upward. The reason is, if we iterated
// in opposite order, or in parallel (naively calling all the h.RLock() in some order) we could
// read level L's tables post-compaction and level L+1's tables pre-compaction. (If we do
// parallelize this, we will need to call the h.RLock() function by increasing order of level
// number.)
version := y.ParseTs(key)
for _, h := range s.levels {
vs, err := h.get(key) // Calls h.RLock() and h.RUnlock().
if err != nil {
return y.ValueStruct{}, errors.Wrapf(err, "get key: %q", key)
}
if vs.Value == nil && vs.Meta == 0 {
continue
}
if maxVs == nil || vs.Version == version {
return vs, nil
}
if maxVs.Version < vs.Version {
*maxVs = vs
}
}
if maxVs != nil {
return *maxVs, nil
}
return y.ValueStruct{}, nil
}
func appendIteratorsReversed(out []y.Iterator, th []*table.Table, reversed bool) []y.Iterator {
for i := len(th) - 1; i >= 0; i-- {
// This will increment the reference of the table handler.
out = append(out, th[i].NewIterator(reversed))
}
return out
}
// appendIterators appends iterators to an array of iterators, for merging.
// Note: This obtains references for the table handlers. Remember to close these iterators.
func (s *levelsController) appendIterators(
iters []y.Iterator, opt *IteratorOptions) []y.Iterator {
// Just like with get, it's important we iterate the levels from 0 on upward, to avoid missing
// data when there's a compaction.
for _, level := range s.levels {
iters = level.appendIterators(iters, opt)
}
return iters
}
// TableInfo represents the information about a table.
type TableInfo struct {
ID uint64
Level int
Left []byte
Right []byte
}
func (s *levelsController) getTableInfo() (result []TableInfo) {
for _, l := range s.levels {
for _, t := range l.tables {
info := TableInfo{
ID: t.ID(),
Level: l.level,
Left: t.Smallest(),
Right: t.Biggest(),
}
result = append(result, info)
}
}
sort.Slice(result, func(i, j int) bool {
if result[i].Level != result[j].Level {
return result[i].Level < result[j].Level
}
return result[i].ID < result[j].ID
})
return
}

72
vendor/github.com/dgraph-io/badger/logger.go generated vendored Normal file
View File

@ -0,0 +1,72 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"log"
"os"
)
// Logger is implemented by any logging system that is used for standard logs.
type Logger interface {
Errorf(string, ...interface{})
Infof(string, ...interface{})
Warningf(string, ...interface{})
}
// Errorf logs an ERROR log message to the logger specified in opts or to the
// global logger if no logger is specified in opts.
func (opt *Options) Errorf(format string, v ...interface{}) {
if opt.Logger == nil {
return
}
opt.Logger.Errorf(format, v...)
}
// Infof logs an INFO message to the logger specified in opts.
func (opt *Options) Infof(format string, v ...interface{}) {
if opt.Logger == nil {
return
}
opt.Logger.Infof(format, v...)
}
// Warningf logs a WARNING message to the logger specified in opts.
func (opt *Options) Warningf(format string, v ...interface{}) {
if opt.Logger == nil {
return
}
opt.Logger.Warningf(format, v...)
}
type defaultLog struct {
*log.Logger
}
var defaultLogger = &defaultLog{Logger: log.New(os.Stderr, "badger ", log.LstdFlags)}
func (l *defaultLog) Errorf(f string, v ...interface{}) {
l.Printf("ERROR: "+f, v...)
}
func (l *defaultLog) Infof(f string, v ...interface{}) {
l.Printf("INFO: "+f, v...)
}
func (l *defaultLog) Warningf(f string, v ...interface{}) {
l.Printf("WARNING: "+f, v...)
}

67
vendor/github.com/dgraph-io/badger/logger_test.go generated vendored Normal file
View File

@ -0,0 +1,67 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
type mockLogger struct {
output string
}
func (l *mockLogger) Errorf(f string, v ...interface{}) {
l.output = fmt.Sprintf("ERROR: "+f, v...)
}
func (l *mockLogger) Infof(f string, v ...interface{}) {
l.output = fmt.Sprintf("INFO: "+f, v...)
}
func (l *mockLogger) Warningf(f string, v ...interface{}) {
l.output = fmt.Sprintf("WARNING: "+f, v...)
}
// Test that the DB-specific log is used instead of the global log.
func TestDbLog(t *testing.T) {
l := &mockLogger{}
opt := Options{Logger: l}
opt.Errorf("test")
require.Equal(t, "ERROR: test", l.output)
opt.Infof("test")
require.Equal(t, "INFO: test", l.output)
opt.Warningf("test")
require.Equal(t, "WARNING: test", l.output)
}
// Test that the global logger is used when no logger is specified in Options.
func TestNoDbLog(t *testing.T) {
l := &mockLogger{}
opt := Options{}
opt.Logger = l
opt.Errorf("test")
require.Equal(t, "ERROR: test", l.output)
opt.Infof("test")
require.Equal(t, "INFO: test", l.output)
opt.Warningf("test")
require.Equal(t, "WARNING: test", l.output)
}

68
vendor/github.com/dgraph-io/badger/managed_db.go generated vendored Normal file
View File

@ -0,0 +1,68 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
// OpenManaged returns a new DB, which allows more control over setting
// transaction timestamps, aka managed mode.
//
// This is only useful for databases built on top of Badger (like Dgraph), and
// can be ignored by most users.
func OpenManaged(opts Options) (*DB, error) {
opts.managedTxns = true
return Open(opts)
}
// NewTransactionAt follows the same logic as DB.NewTransaction(), but uses the
// provided read timestamp.
//
// This is only useful for databases built on top of Badger (like Dgraph), and
// can be ignored by most users.
func (db *DB) NewTransactionAt(readTs uint64, update bool) *Txn {
if !db.opt.managedTxns {
panic("Cannot use NewTransactionAt with managedDB=false. Use NewTransaction instead.")
}
txn := db.newTransaction(update, true)
txn.readTs = readTs
return txn
}
// CommitAt commits the transaction, following the same logic as Commit(), but
// at the given commit timestamp. This will panic if not used with managed transactions.
//
// This is only useful for databases built on top of Badger (like Dgraph), and
// can be ignored by most users.
func (txn *Txn) CommitAt(commitTs uint64, callback func(error)) error {
if !txn.db.opt.managedTxns {
panic("Cannot use CommitAt with managedDB=false. Use Commit instead.")
}
txn.commitTs = commitTs
if callback == nil {
return txn.Commit()
}
txn.CommitWith(callback)
return nil
}
// SetDiscardTs sets a timestamp at or below which, any invalid or deleted
// versions can be discarded from the LSM tree, and thence from the value log to
// reclaim disk space. Can only be used with managed transactions.
func (db *DB) SetDiscardTs(ts uint64) {
if !db.opt.managedTxns {
panic("Cannot use SetDiscardTs with managedDB=false.")
}
db.orc.setDiscardTs(ts)
}

353
vendor/github.com/dgraph-io/badger/managed_db_test.go generated vendored Normal file
View File

@ -0,0 +1,353 @@
package badger
import (
"io/ioutil"
"math"
"math/rand"
"os"
"runtime"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/require"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
func val(large bool) []byte {
var buf []byte
if large {
buf = make([]byte, 8192)
} else {
buf = make([]byte, 16)
}
rand.Read(buf)
return buf
}
func numKeys(db *DB) int {
var count int
err := db.View(func(txn *Txn) error {
itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()
for itr.Rewind(); itr.Valid(); itr.Next() {
count++
}
return nil
})
y.Check(err)
return count
}
func numKeysManaged(db *DB, readTs uint64) int {
txn := db.NewTransactionAt(readTs, false)
defer txn.Discard()
itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()
var count int
for itr.Rewind(); itr.Valid(); itr.Next() {
count++
}
return count
}
func TestDropAllManaged(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.managedTxns = true
opts.ValueLogFileSize = 5 << 20
db, err := Open(opts)
require.NoError(t, err)
N := uint64(10000)
populate := func(db *DB, start uint64) {
var wg sync.WaitGroup
for i := start; i < start+N; i++ {
wg.Add(1)
txn := db.NewTransactionAt(math.MaxUint64, true)
require.NoError(t, txn.Set([]byte(key("key", int(i))), val(true)))
require.NoError(t, txn.CommitAt(uint64(i), func(err error) {
require.NoError(t, err)
wg.Done()
}))
}
wg.Wait()
}
populate(db, N)
require.Equal(t, int(N), numKeysManaged(db, math.MaxUint64))
require.NoError(t, db.DropAll())
require.NoError(t, db.DropAll()) // Just call it twice, for fun.
require.Equal(t, 0, numKeysManaged(db, math.MaxUint64))
// Check that we can still write to mdb, and using lower timestamps.
populate(db, 1)
require.Equal(t, int(N), numKeysManaged(db, math.MaxUint64))
db.Close()
// Ensure that value log is correctly replayed, that we are preserving badgerHead.
opts.managedTxns = true
db2, err := Open(opts)
require.NoError(t, err)
require.Equal(t, int(N), numKeysManaged(db2, math.MaxUint64))
db2.Close()
}
func TestDropAll(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.ValueLogFileSize = 5 << 20
db, err := Open(opts)
require.NoError(t, err)
N := uint64(10000)
populate := func(db *DB) {
writer := db.NewWriteBatch()
for i := uint64(0); i < N; i++ {
require.NoError(t, writer.Set([]byte(key("key", int(i))), val(true), 0))
}
require.NoError(t, writer.Flush())
}
populate(db)
require.Equal(t, int(N), numKeys(db))
require.NoError(t, db.DropAll())
require.Equal(t, 0, numKeys(db))
// Check that we can still write to mdb, and using lower timestamps.
populate(db)
require.Equal(t, int(N), numKeys(db))
db.Close()
// Ensure that value log is correctly replayed.
db2, err := Open(opts)
require.NoError(t, err)
require.Equal(t, int(N), numKeys(db2))
db2.Close()
}
func TestDropAllTwice(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.ValueLogFileSize = 5 << 20
db, err := Open(opts)
require.NoError(t, err)
N := uint64(10000)
populate := func(db *DB) {
writer := db.NewWriteBatch()
for i := uint64(0); i < N; i++ {
require.NoError(t, writer.Set([]byte(key("key", int(i))), val(true), 0))
}
require.NoError(t, writer.Flush())
}
populate(db)
require.Equal(t, int(N), numKeys(db))
require.NoError(t, db.DropAll())
require.Equal(t, 0, numKeys(db))
// Call DropAll again.
require.NoError(t, db.DropAll())
}
func TestDropAllWithPendingTxn(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.ValueLogFileSize = 5 << 20
db, err := Open(opts)
require.NoError(t, err)
N := uint64(10000)
populate := func(db *DB) {
writer := db.NewWriteBatch()
for i := uint64(0); i < N; i++ {
require.NoError(t, writer.Set([]byte(key("key", int(i))), val(true), 0))
}
require.NoError(t, writer.Flush())
}
populate(db)
require.Equal(t, int(N), numKeys(db))
txn := db.NewTransaction(true)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()
var keys []string
for {
var count int
for itr.Rewind(); itr.Valid(); itr.Next() {
count++
item := itr.Item()
keys = append(keys, string(item.KeyCopy(nil)))
_, err := item.ValueCopy(nil)
if err != nil {
t.Logf("Got error during value copy: %v", err)
return
}
}
t.Logf("Got number of keys: %d\n", count)
for _, key := range keys {
item, err := txn.Get([]byte(key))
if err != nil {
t.Logf("Got error during key lookup: %v", err)
return
}
if _, err := item.ValueCopy(nil); err != nil {
t.Logf("Got error during second value copy: %v", err)
return
}
}
}
}()
// Do not cancel txn.
go func() {
time.Sleep(2 * time.Second)
require.NoError(t, db.DropAll())
}()
wg.Wait()
}
func TestDropReadOnly(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.ValueLogFileSize = 5 << 20
db, err := Open(opts)
require.NoError(t, err)
N := uint64(1000)
populate := func(db *DB) {
writer := db.NewWriteBatch()
for i := uint64(0); i < N; i++ {
require.NoError(t, writer.Set([]byte(key("key", int(i))), val(true), 0))
}
require.NoError(t, writer.Flush())
}
populate(db)
require.Equal(t, int(N), numKeys(db))
require.NoError(t, db.Close())
opts.ReadOnly = true
db2, err := Open(opts)
// acquireDirectoryLock returns ErrWindowsNotSupported on Windows. It can be ignored safely.
if runtime.GOOS == "windows" {
require.Equal(t, err, ErrWindowsNotSupported)
} else {
require.NoError(t, err)
}
require.Panics(t, func() { db2.DropAll() })
}
func TestWriteAfterClose(t *testing.T) {
dir, err := ioutil.TempDir(".", "badger-test")
require.NoError(t, err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.ValueLogFileSize = 5 << 20
db, err := Open(opts)
require.NoError(t, err)
N := uint64(1000)
populate := func(db *DB) {
writer := db.NewWriteBatch()
for i := uint64(0); i < N; i++ {
require.NoError(t, writer.Set([]byte(key("key", int(i))), val(true), 0))
}
require.NoError(t, writer.Flush())
}
populate(db)
require.Equal(t, int(N), numKeys(db))
require.NoError(t, db.Close())
err = db.Update(func(txn *Txn) error {
return txn.Set([]byte("a"), []byte("b"))
})
require.Equal(t, ErrBlockedWrites, err)
}
func TestDropAllRace(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.managedTxns = true
db, err := Open(opts)
require.NoError(t, err)
N := 10000
// Start a goroutine to keep trying to write to DB while DropAll happens.
closer := y.NewCloser(1)
go func() {
defer closer.Done()
ticker := time.NewTicker(time.Millisecond)
defer ticker.Stop()
i := N + 1 // Writes would happen above N.
var errors int32
for {
select {
case <-ticker.C:
i++
txn := db.NewTransactionAt(math.MaxUint64, true)
require.NoError(t, txn.Set([]byte(key("key", i)), val(false)))
if err := txn.CommitAt(uint64(i), func(err error) {
if err != nil {
atomic.AddInt32(&errors, 1)
}
}); err != nil {
atomic.AddInt32(&errors, 1)
}
case <-closer.HasBeenClosed():
// The following causes a data race.
// t.Logf("i: %d. Number of (expected) write errors: %d.\n", i, errors)
return
}
}
}()
var wg sync.WaitGroup
for i := 1; i <= N; i++ {
wg.Add(1)
txn := db.NewTransactionAt(math.MaxUint64, true)
require.NoError(t, txn.Set([]byte(key("key", i)), val(false)))
require.NoError(t, txn.CommitAt(uint64(i), func(err error) {
require.NoError(t, err)
wg.Done()
}))
}
wg.Wait()
before := numKeysManaged(db, math.MaxUint64)
require.True(t, before > N)
require.NoError(t, db.DropAll())
closer.SignalAndWait()
after := numKeysManaged(db, math.MaxUint64)
t.Logf("Before: %d. After dropall: %d\n", before, after)
require.True(t, after < before)
db.Close()
}

436
vendor/github.com/dgraph-io/badger/manifest.go generated vendored Normal file
View File

@ -0,0 +1,436 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bufio"
"bytes"
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"os"
"path/filepath"
"sync"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/pb"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
// Manifest represents the contents of the MANIFEST file in a Badger store.
//
// The MANIFEST file describes the startup state of the db -- all LSM files and what level they're
// at.
//
// It consists of a sequence of ManifestChangeSet objects. Each of these is treated atomically,
// and contains a sequence of ManifestChange's (file creations/deletions) which we use to
// reconstruct the manifest at startup.
type Manifest struct {
Levels []levelManifest
Tables map[uint64]TableManifest
// Contains total number of creation and deletion changes in the manifest -- used to compute
// whether it'd be useful to rewrite the manifest.
Creations int
Deletions int
}
func createManifest() Manifest {
levels := make([]levelManifest, 0)
return Manifest{
Levels: levels,
Tables: make(map[uint64]TableManifest),
}
}
// levelManifest contains information about LSM tree levels
// in the MANIFEST file.
type levelManifest struct {
Tables map[uint64]struct{} // Set of table id's
}
// TableManifest contains information about a specific level
// in the LSM tree.
type TableManifest struct {
Level uint8
Checksum []byte
}
// manifestFile holds the file pointer (and other info) about the manifest file, which is a log
// file we append to.
type manifestFile struct {
fp *os.File
directory string
// We make this configurable so that unit tests can hit rewrite() code quickly
deletionsRewriteThreshold int
// Guards appends, which includes access to the manifest field.
appendLock sync.Mutex
// Used to track the current state of the manifest, used when rewriting.
manifest Manifest
}
const (
// ManifestFilename is the filename for the manifest file.
ManifestFilename = "MANIFEST"
manifestRewriteFilename = "MANIFEST-REWRITE"
manifestDeletionsRewriteThreshold = 10000
manifestDeletionsRatio = 10
)
// asChanges returns a sequence of changes that could be used to recreate the Manifest in its
// present state.
func (m *Manifest) asChanges() []*pb.ManifestChange {
changes := make([]*pb.ManifestChange, 0, len(m.Tables))
for id, tm := range m.Tables {
changes = append(changes, newCreateChange(id, int(tm.Level), tm.Checksum))
}
return changes
}
func (m *Manifest) clone() Manifest {
changeSet := pb.ManifestChangeSet{Changes: m.asChanges()}
ret := createManifest()
y.Check(applyChangeSet(&ret, &changeSet))
return ret
}
// openOrCreateManifestFile opens a Badger manifest file if it exists, or creates on if
// one doesnt.
func openOrCreateManifestFile(dir string, readOnly bool) (ret *manifestFile, result Manifest, err error) {
return helpOpenOrCreateManifestFile(dir, readOnly, manifestDeletionsRewriteThreshold)
}
func helpOpenOrCreateManifestFile(dir string, readOnly bool, deletionsThreshold int) (ret *manifestFile, result Manifest, err error) {
path := filepath.Join(dir, ManifestFilename)
var flags uint32
if readOnly {
flags |= y.ReadOnly
}
fp, err := y.OpenExistingFile(path, flags) // We explicitly sync in addChanges, outside the lock.
if err != nil {
if !os.IsNotExist(err) {
return nil, Manifest{}, err
}
if readOnly {
return nil, Manifest{}, fmt.Errorf("no manifest found, required for read-only db")
}
m := createManifest()
fp, netCreations, err := helpRewrite(dir, &m)
if err != nil {
return nil, Manifest{}, err
}
y.AssertTrue(netCreations == 0)
mf := &manifestFile{
fp: fp,
directory: dir,
manifest: m.clone(),
deletionsRewriteThreshold: deletionsThreshold,
}
return mf, m, nil
}
manifest, truncOffset, err := ReplayManifestFile(fp)
if err != nil {
_ = fp.Close()
return nil, Manifest{}, err
}
if !readOnly {
// Truncate file so we don't have a half-written entry at the end.
if err := fp.Truncate(truncOffset); err != nil {
_ = fp.Close()
return nil, Manifest{}, err
}
}
if _, err = fp.Seek(0, io.SeekEnd); err != nil {
_ = fp.Close()
return nil, Manifest{}, err
}
mf := &manifestFile{
fp: fp,
directory: dir,
manifest: manifest.clone(),
deletionsRewriteThreshold: deletionsThreshold,
}
return mf, manifest, nil
}
func (mf *manifestFile) close() error {
return mf.fp.Close()
}
// addChanges writes a batch of changes, atomically, to the file. By "atomically" that means when
// we replay the MANIFEST file, we'll either replay all the changes or none of them. (The truth of
// this depends on the filesystem -- some might append garbage data if a system crash happens at
// the wrong time.)
func (mf *manifestFile) addChanges(changesParam []*pb.ManifestChange) error {
changes := pb.ManifestChangeSet{Changes: changesParam}
buf, err := changes.Marshal()
if err != nil {
return err
}
// Maybe we could use O_APPEND instead (on certain file systems)
mf.appendLock.Lock()
if err := applyChangeSet(&mf.manifest, &changes); err != nil {
mf.appendLock.Unlock()
return err
}
// Rewrite manifest if it'd shrink by 1/10 and it's big enough to care
if mf.manifest.Deletions > mf.deletionsRewriteThreshold &&
mf.manifest.Deletions > manifestDeletionsRatio*(mf.manifest.Creations-mf.manifest.Deletions) {
if err := mf.rewrite(); err != nil {
mf.appendLock.Unlock()
return err
}
} else {
var lenCrcBuf [8]byte
binary.BigEndian.PutUint32(lenCrcBuf[0:4], uint32(len(buf)))
binary.BigEndian.PutUint32(lenCrcBuf[4:8], crc32.Checksum(buf, y.CastagnoliCrcTable))
buf = append(lenCrcBuf[:], buf...)
if _, err := mf.fp.Write(buf); err != nil {
mf.appendLock.Unlock()
return err
}
}
mf.appendLock.Unlock()
return mf.fp.Sync()
}
// Has to be 4 bytes. The value can never change, ever, anyway.
var magicText = [4]byte{'B', 'd', 'g', 'r'}
// The magic version number.
const magicVersion = 4
func helpRewrite(dir string, m *Manifest) (*os.File, int, error) {
rewritePath := filepath.Join(dir, manifestRewriteFilename)
// We explicitly sync.
fp, err := y.OpenTruncFile(rewritePath, false)
if err != nil {
return nil, 0, err
}
buf := make([]byte, 8)
copy(buf[0:4], magicText[:])
binary.BigEndian.PutUint32(buf[4:8], magicVersion)
netCreations := len(m.Tables)
changes := m.asChanges()
set := pb.ManifestChangeSet{Changes: changes}
changeBuf, err := set.Marshal()
if err != nil {
fp.Close()
return nil, 0, err
}
var lenCrcBuf [8]byte
binary.BigEndian.PutUint32(lenCrcBuf[0:4], uint32(len(changeBuf)))
binary.BigEndian.PutUint32(lenCrcBuf[4:8], crc32.Checksum(changeBuf, y.CastagnoliCrcTable))
buf = append(buf, lenCrcBuf[:]...)
buf = append(buf, changeBuf...)
if _, err := fp.Write(buf); err != nil {
fp.Close()
return nil, 0, err
}
if err := fp.Sync(); err != nil {
fp.Close()
return nil, 0, err
}
// In Windows the files should be closed before doing a Rename.
if err = fp.Close(); err != nil {
return nil, 0, err
}
manifestPath := filepath.Join(dir, ManifestFilename)
if err := os.Rename(rewritePath, manifestPath); err != nil {
return nil, 0, err
}
fp, err = y.OpenExistingFile(manifestPath, 0)
if err != nil {
return nil, 0, err
}
if _, err := fp.Seek(0, io.SeekEnd); err != nil {
fp.Close()
return nil, 0, err
}
if err := syncDir(dir); err != nil {
fp.Close()
return nil, 0, err
}
return fp, netCreations, nil
}
// Must be called while appendLock is held.
func (mf *manifestFile) rewrite() error {
// In Windows the files should be closed before doing a Rename.
if err := mf.fp.Close(); err != nil {
return err
}
fp, netCreations, err := helpRewrite(mf.directory, &mf.manifest)
if err != nil {
return err
}
mf.fp = fp
mf.manifest.Creations = netCreations
mf.manifest.Deletions = 0
return nil
}
type countingReader struct {
wrapped *bufio.Reader
count int64
}
func (r *countingReader) Read(p []byte) (n int, err error) {
n, err = r.wrapped.Read(p)
r.count += int64(n)
return
}
func (r *countingReader) ReadByte() (b byte, err error) {
b, err = r.wrapped.ReadByte()
if err == nil {
r.count++
}
return
}
var (
errBadMagic = errors.New("manifest has bad magic")
)
// ReplayManifestFile reads the manifest file and constructs two manifest objects. (We need one
// immutable copy and one mutable copy of the manifest. Easiest way is to construct two of them.)
// Also, returns the last offset after a completely read manifest entry -- the file must be
// truncated at that point before further appends are made (if there is a partial entry after
// that). In normal conditions, truncOffset is the file size.
func ReplayManifestFile(fp *os.File) (ret Manifest, truncOffset int64, err error) {
r := countingReader{wrapped: bufio.NewReader(fp)}
var magicBuf [8]byte
if _, err := io.ReadFull(&r, magicBuf[:]); err != nil {
return Manifest{}, 0, errBadMagic
}
if !bytes.Equal(magicBuf[0:4], magicText[:]) {
return Manifest{}, 0, errBadMagic
}
version := binary.BigEndian.Uint32(magicBuf[4:8])
if version != magicVersion {
return Manifest{}, 0,
fmt.Errorf("manifest has unsupported version: %d (we support %d)", version, magicVersion)
}
build := createManifest()
var offset int64
for {
offset = r.count
var lenCrcBuf [8]byte
_, err := io.ReadFull(&r, lenCrcBuf[:])
if err != nil {
if err == io.EOF || err == io.ErrUnexpectedEOF {
break
}
return Manifest{}, 0, err
}
length := binary.BigEndian.Uint32(lenCrcBuf[0:4])
var buf = make([]byte, length)
if _, err := io.ReadFull(&r, buf); err != nil {
if err == io.EOF || err == io.ErrUnexpectedEOF {
break
}
return Manifest{}, 0, err
}
if crc32.Checksum(buf, y.CastagnoliCrcTable) != binary.BigEndian.Uint32(lenCrcBuf[4:8]) {
break
}
var changeSet pb.ManifestChangeSet
if err := changeSet.Unmarshal(buf); err != nil {
return Manifest{}, 0, err
}
if err := applyChangeSet(&build, &changeSet); err != nil {
return Manifest{}, 0, err
}
}
return build, offset, err
}
func applyManifestChange(build *Manifest, tc *pb.ManifestChange) error {
switch tc.Op {
case pb.ManifestChange_CREATE:
if _, ok := build.Tables[tc.Id]; ok {
return fmt.Errorf("MANIFEST invalid, table %d exists", tc.Id)
}
build.Tables[tc.Id] = TableManifest{
Level: uint8(tc.Level),
Checksum: append([]byte{}, tc.Checksum...),
}
for len(build.Levels) <= int(tc.Level) {
build.Levels = append(build.Levels, levelManifest{make(map[uint64]struct{})})
}
build.Levels[tc.Level].Tables[tc.Id] = struct{}{}
build.Creations++
case pb.ManifestChange_DELETE:
tm, ok := build.Tables[tc.Id]
if !ok {
return fmt.Errorf("MANIFEST removes non-existing table %d", tc.Id)
}
delete(build.Levels[tm.Level].Tables, tc.Id)
delete(build.Tables, tc.Id)
build.Deletions++
default:
return fmt.Errorf("MANIFEST file has invalid manifestChange op")
}
return nil
}
// This is not a "recoverable" error -- opening the KV store fails because the MANIFEST file is
// just plain broken.
func applyChangeSet(build *Manifest, changeSet *pb.ManifestChangeSet) error {
for _, change := range changeSet.Changes {
if err := applyManifestChange(build, change); err != nil {
return err
}
}
return nil
}
func newCreateChange(id uint64, level int, checksum []byte) *pb.ManifestChange {
return &pb.ManifestChange{
Id: id,
Op: pb.ManifestChange_CREATE,
Level: uint32(level),
Checksum: checksum,
}
}
func newDeleteChange(id uint64) *pb.ManifestChange {
return &pb.ManifestChange{
Id: id,
Op: pb.ManifestChange_DELETE,
}
}

244
vendor/github.com/dgraph-io/badger/manifest_test.go generated vendored Normal file
View File

@ -0,0 +1,244 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"sort"
"testing"
"gx/ipfs/QmRvYNctevGUW52urgmoFZscT6buMKqhHezLUS64WepGWn/go-net/trace"
"github.com/stretchr/testify/require"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/pb"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/table"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
func TestManifestBasic(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
{
kv, err := Open(opt)
require.NoError(t, err)
n := 5000
for i := 0; i < n; i++ {
if (i % 10000) == 0 {
fmt.Printf("Putting i=%d\n", i)
}
k := []byte(fmt.Sprintf("%16x", rand.Int63()))
txnSet(t, kv, k, k, 0x00)
}
txnSet(t, kv, []byte("testkey"), []byte("testval"), 0x05)
kv.validate()
require.NoError(t, kv.Close())
}
kv, err := Open(opt)
require.NoError(t, err)
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get([]byte("testkey"))
require.NoError(t, err)
require.EqualValues(t, "testval", string(getItemValue(t, item)))
require.EqualValues(t, byte(0x05), item.UserMeta())
return nil
}))
require.NoError(t, kv.Close())
}
func helpTestManifestFileCorruption(t *testing.T, off int64, errorContent string) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
{
kv, err := Open(opt)
require.NoError(t, err)
require.NoError(t, kv.Close())
}
fp, err := os.OpenFile(filepath.Join(dir, ManifestFilename), os.O_RDWR, 0)
require.NoError(t, err)
// Mess with magic value or version to force error
_, err = fp.WriteAt([]byte{'X'}, off)
require.NoError(t, err)
require.NoError(t, fp.Close())
kv, err := Open(opt)
defer func() {
if kv != nil {
kv.Close()
}
}()
require.Error(t, err)
require.Contains(t, err.Error(), errorContent)
}
func TestManifestMagic(t *testing.T) {
helpTestManifestFileCorruption(t, 3, "bad magic")
}
func TestManifestVersion(t *testing.T) {
helpTestManifestFileCorruption(t, 4, "unsupported version")
}
func key(prefix string, i int) string {
return prefix + fmt.Sprintf("%04d", i)
}
func buildTestTable(t *testing.T, prefix string, n int) *os.File {
y.AssertTrue(n <= 10000)
keyValues := make([][]string, n)
for i := 0; i < n; i++ {
k := key(prefix, i)
v := fmt.Sprintf("%d", i)
keyValues[i] = []string{k, v}
}
return buildTable(t, keyValues)
}
// TODO - Move these to somewhere where table package can also use it.
// keyValues is n by 2 where n is number of pairs.
func buildTable(t *testing.T, keyValues [][]string) *os.File {
b := table.NewTableBuilder()
defer b.Close()
// TODO: Add test for file garbage collection here. No files should be left after the tests here.
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
if t != nil {
require.NoError(t, err)
} else {
y.Check(err)
}
sort.Slice(keyValues, func(i, j int) bool {
return keyValues[i][0] < keyValues[j][0]
})
for _, kv := range keyValues {
y.AssertTrue(len(kv) == 2)
err := b.Add(y.KeyWithTs([]byte(kv[0]), 10), y.ValueStruct{
Value: []byte(kv[1]),
Meta: 'A',
UserMeta: 0,
})
if t != nil {
require.NoError(t, err)
} else {
y.Check(err)
}
}
f.Write(b.Finish())
f.Close()
f, _ = y.OpenSyncedFile(filename, true)
return f
}
func TestOverlappingKeyRangeError(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := DefaultOptions
opt.Dir = dir
opt.ValueDir = dir
kv, err := Open(opt)
require.NoError(t, err)
lh0 := newLevelHandler(kv, 0)
lh1 := newLevelHandler(kv, 1)
f := buildTestTable(t, "k", 2)
t1, err := table.OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer t1.DecrRef()
done := lh0.tryAddLevel0Table(t1)
require.Equal(t, true, done)
cd := compactDef{
thisLevel: lh0,
nextLevel: lh1,
elog: trace.New("Badger", "Compact"),
}
manifest := createManifest()
lc, err := newLevelsController(kv, &manifest)
require.NoError(t, err)
done = lc.fillTablesL0(&cd)
require.Equal(t, true, done)
lc.runCompactDef(0, cd)
f = buildTestTable(t, "l", 2)
t2, err := table.OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer t2.DecrRef()
done = lh0.tryAddLevel0Table(t2)
require.Equal(t, true, done)
cd = compactDef{
thisLevel: lh0,
nextLevel: lh1,
elog: trace.New("Badger", "Compact"),
}
lc.fillTablesL0(&cd)
lc.runCompactDef(0, cd)
}
func TestManifestRewrite(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
deletionsThreshold := 10
mf, m, err := helpOpenOrCreateManifestFile(dir, false, deletionsThreshold)
defer func() {
if mf != nil {
mf.close()
}
}()
require.NoError(t, err)
require.Equal(t, 0, m.Creations)
require.Equal(t, 0, m.Deletions)
err = mf.addChanges([]*pb.ManifestChange{
newCreateChange(0, 0, nil),
})
require.NoError(t, err)
for i := uint64(0); i < uint64(deletionsThreshold*3); i++ {
ch := []*pb.ManifestChange{
newCreateChange(i+1, 0, nil),
newDeleteChange(i),
}
err := mf.addChanges(ch)
require.NoError(t, err)
}
err = mf.close()
require.NoError(t, err)
mf = nil
mf, m, err = helpOpenOrCreateManifestFile(dir, false, deletionsThreshold)
require.NoError(t, err)
require.Equal(t, map[uint64]TableManifest{
uint64(deletionsThreshold * 3): {Level: 0, Checksum: []byte{}},
}, m.Tables)
}

173
vendor/github.com/dgraph-io/badger/merge.go generated vendored Normal file
View File

@ -0,0 +1,173 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"sync"
"time"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
// MergeOperator represents a Badger merge operator.
type MergeOperator struct {
sync.RWMutex
f MergeFunc
db *DB
key []byte
closer *y.Closer
}
// MergeFunc accepts two byte slices, one representing an existing value, and
// another representing a new value that needs to be merged into it. MergeFunc
// contains the logic to perform the merge and return an updated value.
// MergeFunc could perform operations like integer addition, list appends etc.
// Note that the ordering of the operands is unspecified, so the merge func
// should either be agnostic to ordering or do additional handling if ordering
// is required.
type MergeFunc func(existing, val []byte) []byte
// GetMergeOperator creates a new MergeOperator for a given key and returns a
// pointer to it. It also fires off a goroutine that performs a compaction using
// the merge function that runs periodically, as specified by dur.
func (db *DB) GetMergeOperator(key []byte,
f MergeFunc, dur time.Duration) *MergeOperator {
op := &MergeOperator{
f: f,
db: db,
key: key,
closer: y.NewCloser(1),
}
go op.runCompactions(dur)
return op
}
var errNoMerge = errors.New("No need for merge")
func (op *MergeOperator) iterateAndMerge(txn *Txn) (val []byte, err error) {
opt := DefaultIteratorOptions
opt.AllVersions = true
it := txn.NewIterator(opt)
defer it.Close()
var numVersions int
for it.Rewind(); it.ValidForPrefix(op.key); it.Next() {
item := it.Item()
numVersions++
if numVersions == 1 {
val, err = item.ValueCopy(val)
if err != nil {
return nil, err
}
} else {
if err := item.Value(func(newVal []byte) error {
val = op.f(val, newVal)
return nil
}); err != nil {
return nil, err
}
}
if item.DiscardEarlierVersions() {
break
}
}
if numVersions == 0 {
return nil, ErrKeyNotFound
} else if numVersions == 1 {
return val, errNoMerge
}
return val, nil
}
func (op *MergeOperator) compact() error {
op.Lock()
defer op.Unlock()
err := op.db.Update(func(txn *Txn) error {
var (
val []byte
err error
)
val, err = op.iterateAndMerge(txn)
if err != nil {
return err
}
// Write value back to db
return txn.SetWithDiscard(op.key, val, 0)
})
if err == ErrKeyNotFound || err == errNoMerge {
// pass.
} else if err != nil {
return err
}
return nil
}
func (op *MergeOperator) runCompactions(dur time.Duration) {
ticker := time.NewTicker(dur)
defer op.closer.Done()
var stop bool
for {
select {
case <-op.closer.HasBeenClosed():
stop = true
case <-ticker.C: // wait for tick
}
if err := op.compact(); err != nil {
op.db.opt.Errorf("failure while running merge operation: %s", err)
}
if stop {
ticker.Stop()
break
}
}
}
// Add records a value in Badger which will eventually be merged by a background
// routine into the values that were recorded by previous invocations to Add().
func (op *MergeOperator) Add(val []byte) error {
return op.db.Update(func(txn *Txn) error {
return txn.Set(op.key, val)
})
}
// Get returns the latest value for the merge operator, which is derived by
// applying the merge function to all the values added so far.
//
// If Add has not been called even once, Get will return ErrKeyNotFound.
func (op *MergeOperator) Get() ([]byte, error) {
op.RLock()
defer op.RUnlock()
var existing []byte
err := op.db.View(func(txn *Txn) (err error) {
existing, err = op.iterateAndMerge(txn)
return err
})
if err == errNoMerge {
return existing, nil
}
return existing, err
}
// Stop waits for any pending merge to complete and then stops the background
// goroutine.
func (op *MergeOperator) Stop() {
op.closer.SignalAndWait()
}

165
vendor/github.com/dgraph-io/badger/options.go generated vendored Normal file
View File

@ -0,0 +1,165 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
)
// NOTE: Keep the comments in the following to 75 chars width, so they
// format nicely in godoc.
// Options are params for creating DB object.
//
// This package provides DefaultOptions which contains options that should
// work for most applications. Consider using that as a starting point before
// customizing it for your own needs.
type Options struct {
// 1. Mandatory flags
// -------------------
// Directory to store the data in. If it doesn't exist, Badger will
// try to create it for you.
Dir string
// Directory to store the value log in. Can be the same as Dir. If it
// doesn't exist, Badger will try to create it for you.
ValueDir string
// 2. Frequently modified flags
// -----------------------------
// Sync all writes to disk. Setting this to false would achieve better
// performance, but may cause data to be lost.
SyncWrites bool
// How should LSM tree be accessed.
TableLoadingMode options.FileLoadingMode
// How should value log be accessed.
ValueLogLoadingMode options.FileLoadingMode
// How many versions to keep per key.
NumVersionsToKeep int
// 3. Flags that user might want to review
// ----------------------------------------
// The following affect all levels of LSM tree.
MaxTableSize int64 // Each table (or file) is at most this size.
LevelSizeMultiplier int // Equals SizeOf(Li+1)/SizeOf(Li).
MaxLevels int // Maximum number of levels of compaction.
// If value size >= this threshold, only store value offsets in tree.
ValueThreshold int
// Maximum number of tables to keep in memory, before stalling.
NumMemtables int
// The following affect how we handle LSM tree L0.
// Maximum number of Level 0 tables before we start compacting.
NumLevelZeroTables int
// If we hit this number of Level 0 tables, we will stall until L0 is
// compacted away.
NumLevelZeroTablesStall int
// Maximum total size for L1.
LevelOneSize int64
// Size of single value log file.
ValueLogFileSize int64
// Max number of entries a value log file can hold (approximately). A value log file would be
// determined by the smaller of its file size and max entries.
ValueLogMaxEntries uint32
// Number of compaction workers to run concurrently. Setting this to zero would stop compactions
// to happen within LSM tree. If set to zero, writes could block forever.
NumCompactors int
// When closing the DB, force compact Level 0. This ensures that both reads and writes are
// efficient when the DB is opened later.
CompactL0OnClose bool
// Transaction start and commit timestamps are managed by end-user.
// This is only useful for databases built on top of Badger (like Dgraph).
// Not recommended for most users.
managedTxns bool
// 4. Flags for testing purposes
// ------------------------------
maxBatchCount int64 // max entries in batch
maxBatchSize int64 // max batch size in bytes
// Open the DB as read-only. With this set, multiple processes can
// open the same Badger DB. Note: if the DB being opened had crashed
// before and has vlog data to be replayed, ReadOnly will cause Open
// to fail with an appropriate message.
ReadOnly bool
// Truncate value log to delete corrupt data, if any. Would not truncate if ReadOnly is set.
Truncate bool
// DB-specific logger which will override the global logger.
Logger Logger
}
// DefaultOptions sets a list of recommended options for good performance.
// Feel free to modify these to suit your needs.
var DefaultOptions = Options{
LevelOneSize: 256 << 20,
LevelSizeMultiplier: 10,
TableLoadingMode: options.LoadToRAM,
ValueLogLoadingMode: options.MemoryMap,
// table.MemoryMap to mmap() the tables.
// table.Nothing to not preload the tables.
MaxLevels: 7,
MaxTableSize: 64 << 20,
NumCompactors: 2, // Compactions can be expensive. Only run 2.
NumLevelZeroTables: 5,
NumLevelZeroTablesStall: 10,
NumMemtables: 5,
SyncWrites: true,
NumVersionsToKeep: 1,
CompactL0OnClose: true,
// Nothing to read/write value log using standard File I/O
// MemoryMap to mmap() the value log files
// (2^30 - 1)*2 when mmapping < 2^31 - 1, max int32.
// -1 so 2*ValueLogFileSize won't overflow on 32-bit systems.
ValueLogFileSize: 1<<30 - 1,
ValueLogMaxEntries: 1000000,
ValueThreshold: 32,
Truncate: false,
Logger: defaultLogger,
}
// LSMOnlyOptions follows from DefaultOptions, but sets a higher ValueThreshold
// so values would be colocated with the LSM tree, with value log largely acting
// as a write-ahead log only. These options would reduce the disk usage of value
// log, and make Badger act more like a typical LSM tree.
var LSMOnlyOptions = Options{}
func init() {
LSMOnlyOptions = DefaultOptions
LSMOnlyOptions.ValueThreshold = 65500 // Max value length which fits in uint16.
// Let's not set any other options, because they can cause issues with the
// size of key-value a user can pass to Badger. For e.g., if we set
// ValueLogFileSize to 64MB, a user can't pass a value more than that.
// Setting it to ValueLogMaxEntries to 1000, can generate too many files.
// These options are better configured on a usage basis, than broadly here.
// The ValueThreshold is the most important setting a user needs to do to
// achieve a heavier usage of LSM tree.
// NOTE: If a user does not want to set 64KB as the ValueThreshold because
// of performance reasons, 1KB would be a good option too, allowing
// values smaller than 1KB to be colocated with the keys in the LSM tree.
}

30
vendor/github.com/dgraph-io/badger/options/options.go generated vendored Normal file
View File

@ -0,0 +1,30 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package options
// FileLoadingMode specifies how data in LSM table files and value log files should
// be loaded.
type FileLoadingMode int
const (
// FileIO indicates that files must be loaded using standard I/O
FileIO FileLoadingMode = iota
// LoadToRAM indicates that file must be loaded into RAM
LoadToRAM
// MemoryMap indicates that that the file must be memory-mapped
MemoryMap
)

72
vendor/github.com/dgraph-io/badger/package.json generated vendored Normal file
View File

@ -0,0 +1,72 @@
{
"author": "dgraph-io",
"bugs": {
"url": "https://github.com/dgraph-io/badger"
},
"gx": {
"dvcsimport": "github.com/dgraph-io/badger"
},
"gxDependencies": [
{
"author": "whyrusleeping",
"hash": "QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy",
"name": "errors",
"version": "0.0.1"
},
{
"author": "magik6k",
"hash": "Qmbvv2urkn5Wtwws4yzjE85qRjB293EodchZofJsrTRuvN",
"name": "go-lz4",
"version": "1.0.0"
},
{
"author": "kubuxu",
"hash": "QmWaLViWQF8jgyoLLqqcSrnp6dJpHESiJfzor1vrfDyTZf",
"name": "bbloom",
"version": "0.1.2"
},
{
"author": "kubuxu",
"hash": "QmVGjyM9i2msKvLXwh9VosCTgP4mL91kC7hDmqnwTTx6Hu",
"name": "sys",
"version": "0.2.0"
},
{
"author": "whyrusleeping",
"hash": "QmRvYNctevGUW52urgmoFZscT6buMKqhHezLUS64WepGWn",
"name": "go-net",
"version": "0.2.0"
},
{
"author": "magik6k",
"hash": "QmRFFHk2jw9tgjxv12bCuuTnSbVXxEvYQkuNCLMEv9eUwP",
"name": "go-farm",
"version": "1.0.0"
},
{
"author": "magik6k",
"hash": "QmQMxG9D52TirZd9eLA37nxiNspnMRkKbyPWrVAa1gvtSy",
"name": "go-humanize",
"version": "1.0.1"
},
{
"author": "GoGo",
"hash": "QmddjPSGZb3ieihSseFeCfVRpZzcqczPNsD2DvarSwnjJB",
"name": "gogo-protobuf",
"version": "1.2.1"
},
{
"author": "magik6k",
"hash": "QmXj63M2w2Pq7mnBpcrs7Va8prmfhvfMUNqVhJ9TgjiMbT",
"name": "cobra",
"version": "0.0.1"
}
],
"gxVersion": "0.10.0",
"language": "go",
"license": "Apache 2.0",
"name": "badger",
"releaseCmd": "git commit -a -m \"gx publish $VERSION\"",
"version": "2.11.4"
}

7
vendor/github.com/dgraph-io/badger/pb/gen.sh generated vendored Normal file
View File

@ -0,0 +1,7 @@
#!/bin/bash
# You might need to go get -v github.com/gogo/protobuf/...
protos=${GOPATH-$HOME/go}/src/github.com/dgraph-io/badger/pb
pushd $protos > /dev/null
protoc --gogofaster_out=. -I=. pb.proto

1236
vendor/github.com/dgraph-io/badger/pb/pb.pb.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

49
vendor/github.com/dgraph-io/badger/pb/pb.proto generated vendored Normal file
View File

@ -0,0 +1,49 @@
/*
* Copyright (C) 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Use protos/gen.sh to generate .pb.go files.
syntax = "proto3";
package pb;
message KV {
bytes key = 1;
bytes value = 2;
bytes user_meta = 3;
uint64 version = 4;
uint64 expires_at = 5;
bytes meta = 6;
}
message KVList {
repeated KV kv = 1;
}
message ManifestChangeSet {
// A set of changes that are applied atomically.
repeated ManifestChange changes = 1;
}
message ManifestChange {
uint64 Id = 1;
enum Operation {
CREATE = 0;
DELETE = 1;
}
Operation Op = 2;
uint32 Level = 3; // Only used for CREATE
bytes Checksum = 4; // Only used for CREATE
}

113
vendor/github.com/dgraph-io/badger/skl/README.md generated vendored Normal file
View File

@ -0,0 +1,113 @@
This is much better than `skiplist` and `slist`.
```
BenchmarkReadWrite/frac_0-8 3000000 537 ns/op
BenchmarkReadWrite/frac_1-8 3000000 503 ns/op
BenchmarkReadWrite/frac_2-8 3000000 492 ns/op
BenchmarkReadWrite/frac_3-8 3000000 475 ns/op
BenchmarkReadWrite/frac_4-8 3000000 440 ns/op
BenchmarkReadWrite/frac_5-8 5000000 442 ns/op
BenchmarkReadWrite/frac_6-8 5000000 380 ns/op
BenchmarkReadWrite/frac_7-8 5000000 338 ns/op
BenchmarkReadWrite/frac_8-8 5000000 294 ns/op
BenchmarkReadWrite/frac_9-8 10000000 268 ns/op
BenchmarkReadWrite/frac_10-8 100000000 26.3 ns/op
```
And even better than a simple map with read-write lock:
```
BenchmarkReadWriteMap/frac_0-8 2000000 774 ns/op
BenchmarkReadWriteMap/frac_1-8 2000000 647 ns/op
BenchmarkReadWriteMap/frac_2-8 3000000 605 ns/op
BenchmarkReadWriteMap/frac_3-8 3000000 603 ns/op
BenchmarkReadWriteMap/frac_4-8 3000000 556 ns/op
BenchmarkReadWriteMap/frac_5-8 3000000 472 ns/op
BenchmarkReadWriteMap/frac_6-8 3000000 476 ns/op
BenchmarkReadWriteMap/frac_7-8 3000000 457 ns/op
BenchmarkReadWriteMap/frac_8-8 5000000 444 ns/op
BenchmarkReadWriteMap/frac_9-8 5000000 361 ns/op
BenchmarkReadWriteMap/frac_10-8 10000000 212 ns/op
```
# Node Pooling
Command used
```
rm -Rf tmp && /usr/bin/time -l ./populate -keys_mil 10
```
For pprof results, we run without using /usr/bin/time. There are four runs below.
Results seem to vary quite a bit between runs.
## Before node pooling
```
1311.53MB of 1338.69MB total (97.97%)
Dropped 30 nodes (cum <= 6.69MB)
Showing top 10 nodes out of 37 (cum >= 12.50MB)
flat flat% sum% cum cum%
523.04MB 39.07% 39.07% 523.04MB 39.07% github.com/dgraph-io/badger/skl.(*Skiplist).Put
184.51MB 13.78% 52.85% 184.51MB 13.78% runtime.stringtoslicebyte
166.01MB 12.40% 65.25% 689.04MB 51.47% github.com/dgraph-io/badger/mem.(*Table).Put
165MB 12.33% 77.58% 165MB 12.33% runtime.convT2E
116.92MB 8.73% 86.31% 116.92MB 8.73% bytes.makeSlice
62.50MB 4.67% 90.98% 62.50MB 4.67% main.newValue
34.50MB 2.58% 93.56% 34.50MB 2.58% github.com/dgraph-io/badger/table.(*BlockIterator).parseKV
25.50MB 1.90% 95.46% 100.06MB 7.47% github.com/dgraph-io/badger/y.(*MergeIterator).Next
21.06MB 1.57% 97.04% 21.06MB 1.57% github.com/dgraph-io/badger/table.(*Table).read
12.50MB 0.93% 97.97% 12.50MB 0.93% github.com/dgraph-io/badger/table.header.Encode
128.31 real 329.37 user 17.11 sys
3355660288 maximum resident set size
0 average shared memory size
0 average unshared data size
0 average unshared stack size
2203080 page reclaims
764 page faults
0 swaps
275 block input operations
76 block output operations
0 messages sent
0 messages received
0 signals received
49173 voluntary context switches
599922 involuntary context switches
```
## After node pooling
```
1963.13MB of 2026.09MB total (96.89%)
Dropped 29 nodes (cum <= 10.13MB)
Showing top 10 nodes out of 41 (cum >= 185.62MB)
flat flat% sum% cum cum%
658.05MB 32.48% 32.48% 658.05MB 32.48% github.com/dgraph-io/badger/skl.glob..func1
297.51MB 14.68% 47.16% 297.51MB 14.68% runtime.convT2E
257.51MB 12.71% 59.87% 257.51MB 12.71% runtime.stringtoslicebyte
249.01MB 12.29% 72.16% 1007.06MB 49.70% github.com/dgraph-io/badger/mem.(*Table).Put
142.43MB 7.03% 79.19% 142.43MB 7.03% bytes.makeSlice
100MB 4.94% 84.13% 758.05MB 37.41% github.com/dgraph-io/badger/skl.newNode
99.50MB 4.91% 89.04% 99.50MB 4.91% main.newValue
75MB 3.70% 92.74% 75MB 3.70% github.com/dgraph-io/badger/table.(*BlockIterator).parseKV
44.62MB 2.20% 94.94% 44.62MB 2.20% github.com/dgraph-io/badger/table.(*Table).read
39.50MB 1.95% 96.89% 185.62MB 9.16% github.com/dgraph-io/badger/y.(*MergeIterator).Next
135.58 real 374.29 user 17.65 sys
3740614656 maximum resident set size
0 average shared memory size
0 average unshared data size
0 average unshared stack size
2276566 page reclaims
770 page faults
0 swaps
128 block input operations
90 block output operations
0 messages sent
0 messages received
0 signals received
46434 voluntary context switches
597049 involuntary context switches
```

136
vendor/github.com/dgraph-io/badger/skl/arena.go generated vendored Normal file
View File

@ -0,0 +1,136 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package skl
import (
"sync/atomic"
"unsafe"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
const (
offsetSize = int(unsafe.Sizeof(uint32(0)))
// Always align nodes on 64-bit boundaries, even on 32-bit architectures,
// so that the node.value field is 64-bit aligned. This is necessary because
// node.getValueOffset uses atomic.LoadUint64, which expects its input
// pointer to be 64-bit aligned.
nodeAlign = int(unsafe.Sizeof(uint64(0))) - 1
)
// Arena should be lock-free.
type Arena struct {
n uint32
buf []byte
}
// newArena returns a new arena.
func newArena(n int64) *Arena {
// Don't store data at position 0 in order to reserve offset=0 as a kind
// of nil pointer.
out := &Arena{
n: 1,
buf: make([]byte, n),
}
return out
}
func (s *Arena) size() int64 {
return int64(atomic.LoadUint32(&s.n))
}
func (s *Arena) reset() {
atomic.StoreUint32(&s.n, 0)
}
// putNode allocates a node in the arena. The node is aligned on a pointer-sized
// boundary. The arena offset of the node is returned.
func (s *Arena) putNode(height int) uint32 {
// Compute the amount of the tower that will never be used, since the height
// is less than maxHeight.
unusedSize := (maxHeight - height) * offsetSize
// Pad the allocation with enough bytes to ensure pointer alignment.
l := uint32(MaxNodeSize - unusedSize + nodeAlign)
n := atomic.AddUint32(&s.n, l)
y.AssertTruef(int(n) <= len(s.buf),
"Arena too small, toWrite:%d newTotal:%d limit:%d",
l, n, len(s.buf))
// Return the aligned offset.
m := (n - l + uint32(nodeAlign)) & ^uint32(nodeAlign)
return m
}
// Put will *copy* val into arena. To make better use of this, reuse your input
// val buffer. Returns an offset into buf. User is responsible for remembering
// size of val. We could also store this size inside arena but the encoding and
// decoding will incur some overhead.
func (s *Arena) putVal(v y.ValueStruct) uint32 {
l := uint32(v.EncodedSize())
n := atomic.AddUint32(&s.n, l)
y.AssertTruef(int(n) <= len(s.buf),
"Arena too small, toWrite:%d newTotal:%d limit:%d",
l, n, len(s.buf))
m := n - l
v.Encode(s.buf[m:])
return m
}
func (s *Arena) putKey(key []byte) uint32 {
l := uint32(len(key))
n := atomic.AddUint32(&s.n, l)
y.AssertTruef(int(n) <= len(s.buf),
"Arena too small, toWrite:%d newTotal:%d limit:%d",
l, n, len(s.buf))
m := n - l
y.AssertTrue(len(key) == copy(s.buf[m:n], key))
return m
}
// getNode returns a pointer to the node located at offset. If the offset is
// zero, then the nil node pointer is returned.
func (s *Arena) getNode(offset uint32) *node {
if offset == 0 {
return nil
}
return (*node)(unsafe.Pointer(&s.buf[offset]))
}
// getKey returns byte slice at offset.
func (s *Arena) getKey(offset uint32, size uint16) []byte {
return s.buf[offset : offset+uint32(size)]
}
// getVal returns byte slice at offset. The given size should be just the value
// size and should NOT include the meta bytes.
func (s *Arena) getVal(offset uint32, size uint16) (ret y.ValueStruct) {
ret.Decode(s.buf[offset : offset+uint32(size)])
return
}
// getNodeOffset returns the offset of node in the arena. If the node pointer is
// nil, then the zero offset is returned.
func (s *Arena) getNodeOffset(nd *node) uint32 {
if nd == nil {
return 0
}
return uint32(uintptr(unsafe.Pointer(nd)) - uintptr(unsafe.Pointer(&s.buf[0])))
}

516
vendor/github.com/dgraph-io/badger/skl/skl.go generated vendored Normal file
View File

@ -0,0 +1,516 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
Adapted from RocksDB inline skiplist.
Key differences:
- No optimization for sequential inserts (no "prev").
- No custom comparator.
- Support overwrites. This requires care when we see the same key when inserting.
For RocksDB or LevelDB, overwrites are implemented as a newer sequence number in the key, so
there is no need for values. We don't intend to support versioning. In-place updates of values
would be more efficient.
- We discard all non-concurrent code.
- We do not support Splices. This simplifies the code a lot.
- No AllocateNode or other pointer arithmetic.
- We combine the findLessThan, findGreaterOrEqual, etc into one function.
*/
package skl
import (
"math"
"math/rand"
"sync/atomic"
"unsafe"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
const (
maxHeight = 20
heightIncrease = math.MaxUint32 / 3
)
// MaxNodeSize is the memory footprint of a node of maximum height.
const MaxNodeSize = int(unsafe.Sizeof(node{}))
type node struct {
// Multiple parts of the value are encoded as a single uint64 so that it
// can be atomically loaded and stored:
// value offset: uint32 (bits 0-31)
// value size : uint16 (bits 32-47)
value uint64
// A byte slice is 24 bytes. We are trying to save space here.
keyOffset uint32 // Immutable. No need to lock to access key.
keySize uint16 // Immutable. No need to lock to access key.
// Height of the tower.
height uint16
// Most nodes do not need to use the full height of the tower, since the
// probability of each successive level decreases exponentially. Because
// these elements are never accessed, they do not need to be allocated.
// Therefore, when a node is allocated in the arena, its memory footprint
// is deliberately truncated to not include unneeded tower elements.
//
// All accesses to elements should use CAS operations, with no need to lock.
tower [maxHeight]uint32
}
// Skiplist maps keys to values (in memory)
type Skiplist struct {
height int32 // Current height. 1 <= height <= kMaxHeight. CAS.
head *node
ref int32
arena *Arena
}
// IncrRef increases the refcount
func (s *Skiplist) IncrRef() {
atomic.AddInt32(&s.ref, 1)
}
// DecrRef decrements the refcount, deallocating the Skiplist when done using it
func (s *Skiplist) DecrRef() {
newRef := atomic.AddInt32(&s.ref, -1)
if newRef > 0 {
return
}
s.arena.reset()
// Indicate we are closed. Good for testing. Also, lets GC reclaim memory. Race condition
// here would suggest we are accessing skiplist when we are supposed to have no reference!
s.arena = nil
}
func (s *Skiplist) valid() bool { return s.arena != nil }
func newNode(arena *Arena, key []byte, v y.ValueStruct, height int) *node {
// The base level is already allocated in the node struct.
offset := arena.putNode(height)
node := arena.getNode(offset)
node.keyOffset = arena.putKey(key)
node.keySize = uint16(len(key))
node.height = uint16(height)
node.value = encodeValue(arena.putVal(v), v.EncodedSize())
return node
}
func encodeValue(valOffset uint32, valSize uint16) uint64 {
return uint64(valSize)<<32 | uint64(valOffset)
}
func decodeValue(value uint64) (valOffset uint32, valSize uint16) {
valOffset = uint32(value)
valSize = uint16(value >> 32)
return
}
// NewSkiplist makes a new empty skiplist, with a given arena size
func NewSkiplist(arenaSize int64) *Skiplist {
arena := newArena(arenaSize)
head := newNode(arena, nil, y.ValueStruct{}, maxHeight)
return &Skiplist{
height: 1,
head: head,
arena: arena,
ref: 1,
}
}
func (s *node) getValueOffset() (uint32, uint16) {
value := atomic.LoadUint64(&s.value)
return decodeValue(value)
}
func (s *node) key(arena *Arena) []byte {
return arena.getKey(s.keyOffset, s.keySize)
}
func (s *node) setValue(arena *Arena, v y.ValueStruct) {
valOffset := arena.putVal(v)
value := encodeValue(valOffset, v.EncodedSize())
atomic.StoreUint64(&s.value, value)
}
func (s *node) getNextOffset(h int) uint32 {
return atomic.LoadUint32(&s.tower[h])
}
func (s *node) casNextOffset(h int, old, val uint32) bool {
return atomic.CompareAndSwapUint32(&s.tower[h], old, val)
}
// Returns true if key is strictly > n.key.
// If n is nil, this is an "end" marker and we return false.
//func (s *Skiplist) keyIsAfterNode(key []byte, n *node) bool {
// y.AssertTrue(n != s.head)
// return n != nil && y.CompareKeys(key, n.key) > 0
//}
func randomHeight() int {
h := 1
for h < maxHeight && rand.Uint32() <= heightIncrease {
h++
}
return h
}
func (s *Skiplist) getNext(nd *node, height int) *node {
return s.arena.getNode(nd.getNextOffset(height))
}
// findNear finds the node near to key.
// If less=true, it finds rightmost node such that node.key < key (if allowEqual=false) or
// node.key <= key (if allowEqual=true).
// If less=false, it finds leftmost node such that node.key > key (if allowEqual=false) or
// node.key >= key (if allowEqual=true).
// Returns the node found. The bool returned is true if the node has key equal to given key.
func (s *Skiplist) findNear(key []byte, less bool, allowEqual bool) (*node, bool) {
x := s.head
level := int(s.getHeight() - 1)
for {
// Assume x.key < key.
next := s.getNext(x, level)
if next == nil {
// x.key < key < END OF LIST
if level > 0 {
// Can descend further to iterate closer to the end.
level--
continue
}
// Level=0. Cannot descend further. Let's return something that makes sense.
if !less {
return nil, false
}
// Try to return x. Make sure it is not a head node.
if x == s.head {
return nil, false
}
return x, false
}
nextKey := next.key(s.arena)
cmp := y.CompareKeys(key, nextKey)
if cmp > 0 {
// x.key < next.key < key. We can continue to move right.
x = next
continue
}
if cmp == 0 {
// x.key < key == next.key.
if allowEqual {
return next, true
}
if !less {
// We want >, so go to base level to grab the next bigger note.
return s.getNext(next, 0), false
}
// We want <. If not base level, we should go closer in the next level.
if level > 0 {
level--
continue
}
// On base level. Return x.
if x == s.head {
return nil, false
}
return x, false
}
// cmp < 0. In other words, x.key < key < next.
if level > 0 {
level--
continue
}
// At base level. Need to return something.
if !less {
return next, false
}
// Try to return x. Make sure it is not a head node.
if x == s.head {
return nil, false
}
return x, false
}
}
// findSpliceForLevel returns (outBefore, outAfter) with outBefore.key <= key <= outAfter.key.
// The input "before" tells us where to start looking.
// If we found a node with the same key, then we return outBefore = outAfter.
// Otherwise, outBefore.key < key < outAfter.key.
func (s *Skiplist) findSpliceForLevel(key []byte, before *node, level int) (*node, *node) {
for {
// Assume before.key < key.
next := s.getNext(before, level)
if next == nil {
return before, next
}
nextKey := next.key(s.arena)
cmp := y.CompareKeys(key, nextKey)
if cmp == 0 {
// Equality case.
return next, next
}
if cmp < 0 {
// before.key < key < next.key. We are done for this level.
return before, next
}
before = next // Keep moving right on this level.
}
}
func (s *Skiplist) getHeight() int32 {
return atomic.LoadInt32(&s.height)
}
// Put inserts the key-value pair.
func (s *Skiplist) Put(key []byte, v y.ValueStruct) {
// Since we allow overwrite, we may not need to create a new node. We might not even need to
// increase the height. Let's defer these actions.
listHeight := s.getHeight()
var prev [maxHeight + 1]*node
var next [maxHeight + 1]*node
prev[listHeight] = s.head
next[listHeight] = nil
for i := int(listHeight) - 1; i >= 0; i-- {
// Use higher level to speed up for current level.
prev[i], next[i] = s.findSpliceForLevel(key, prev[i+1], i)
if prev[i] == next[i] {
prev[i].setValue(s.arena, v)
return
}
}
// We do need to create a new node.
height := randomHeight()
x := newNode(s.arena, key, v, height)
// Try to increase s.height via CAS.
listHeight = s.getHeight()
for height > int(listHeight) {
if atomic.CompareAndSwapInt32(&s.height, listHeight, int32(height)) {
// Successfully increased skiplist.height.
break
}
listHeight = s.getHeight()
}
// We always insert from the base level and up. After you add a node in base level, we cannot
// create a node in the level above because it would have discovered the node in the base level.
for i := 0; i < height; i++ {
for {
if prev[i] == nil {
y.AssertTrue(i > 1) // This cannot happen in base level.
// We haven't computed prev, next for this level because height exceeds old listHeight.
// For these levels, we expect the lists to be sparse, so we can just search from head.
prev[i], next[i] = s.findSpliceForLevel(key, s.head, i)
// Someone adds the exact same key before we are able to do so. This can only happen on
// the base level. But we know we are not on the base level.
y.AssertTrue(prev[i] != next[i])
}
nextOffset := s.arena.getNodeOffset(next[i])
x.tower[i] = nextOffset
if prev[i].casNextOffset(i, nextOffset, s.arena.getNodeOffset(x)) {
// Managed to insert x between prev[i] and next[i]. Go to the next level.
break
}
// CAS failed. We need to recompute prev and next.
// It is unlikely to be helpful to try to use a different level as we redo the search,
// because it is unlikely that lots of nodes are inserted between prev[i] and next[i].
prev[i], next[i] = s.findSpliceForLevel(key, prev[i], i)
if prev[i] == next[i] {
y.AssertTruef(i == 0, "Equality can happen only on base level: %d", i)
prev[i].setValue(s.arena, v)
return
}
}
}
}
// Empty returns if the Skiplist is empty.
func (s *Skiplist) Empty() bool {
return s.findLast() == nil
}
// findLast returns the last element. If head (empty list), we return nil. All the find functions
// will NEVER return the head nodes.
func (s *Skiplist) findLast() *node {
n := s.head
level := int(s.getHeight()) - 1
for {
next := s.getNext(n, level)
if next != nil {
n = next
continue
}
if level == 0 {
if n == s.head {
return nil
}
return n
}
level--
}
}
// Get gets the value associated with the key. It returns a valid value if it finds equal or earlier
// version of the same key.
func (s *Skiplist) Get(key []byte) y.ValueStruct {
n, _ := s.findNear(key, false, true) // findGreaterOrEqual.
if n == nil {
return y.ValueStruct{}
}
nextKey := s.arena.getKey(n.keyOffset, n.keySize)
if !y.SameKey(key, nextKey) {
return y.ValueStruct{}
}
valOffset, valSize := n.getValueOffset()
vs := s.arena.getVal(valOffset, valSize)
vs.Version = y.ParseTs(nextKey)
return vs
}
// NewIterator returns a skiplist iterator. You have to Close() the iterator.
func (s *Skiplist) NewIterator() *Iterator {
s.IncrRef()
return &Iterator{list: s}
}
// MemSize returns the size of the Skiplist in terms of how much memory is used within its internal
// arena.
func (s *Skiplist) MemSize() int64 { return s.arena.size() }
// Iterator is an iterator over skiplist object. For new objects, you just
// need to initialize Iterator.list.
type Iterator struct {
list *Skiplist
n *node
}
// Close frees the resources held by the iterator
func (s *Iterator) Close() error {
s.list.DecrRef()
return nil
}
// Valid returns true iff the iterator is positioned at a valid node.
func (s *Iterator) Valid() bool { return s.n != nil }
// Key returns the key at the current position.
func (s *Iterator) Key() []byte {
return s.list.arena.getKey(s.n.keyOffset, s.n.keySize)
}
// Value returns value.
func (s *Iterator) Value() y.ValueStruct {
valOffset, valSize := s.n.getValueOffset()
return s.list.arena.getVal(valOffset, valSize)
}
// Next advances to the next position.
func (s *Iterator) Next() {
y.AssertTrue(s.Valid())
s.n = s.list.getNext(s.n, 0)
}
// Prev advances to the previous position.
func (s *Iterator) Prev() {
y.AssertTrue(s.Valid())
s.n, _ = s.list.findNear(s.Key(), true, false) // find <. No equality allowed.
}
// Seek advances to the first entry with a key >= target.
func (s *Iterator) Seek(target []byte) {
s.n, _ = s.list.findNear(target, false, true) // find >=.
}
// SeekForPrev finds an entry with key <= target.
func (s *Iterator) SeekForPrev(target []byte) {
s.n, _ = s.list.findNear(target, true, true) // find <=.
}
// SeekToFirst seeks position at the first entry in list.
// Final state of iterator is Valid() iff list is not empty.
func (s *Iterator) SeekToFirst() {
s.n = s.list.getNext(s.list.head, 0)
}
// SeekToLast seeks position at the last entry in list.
// Final state of iterator is Valid() iff list is not empty.
func (s *Iterator) SeekToLast() {
s.n = s.list.findLast()
}
// UniIterator is a unidirectional memtable iterator. It is a thin wrapper around
// Iterator. We like to keep Iterator as before, because it is more powerful and
// we might support bidirectional iterators in the future.
type UniIterator struct {
iter *Iterator
reversed bool
}
// NewUniIterator returns a UniIterator.
func (s *Skiplist) NewUniIterator(reversed bool) *UniIterator {
return &UniIterator{
iter: s.NewIterator(),
reversed: reversed,
}
}
// Next implements y.Interface
func (s *UniIterator) Next() {
if !s.reversed {
s.iter.Next()
} else {
s.iter.Prev()
}
}
// Rewind implements y.Interface
func (s *UniIterator) Rewind() {
if !s.reversed {
s.iter.SeekToFirst()
} else {
s.iter.SeekToLast()
}
}
// Seek implements y.Interface
func (s *UniIterator) Seek(key []byte) {
if !s.reversed {
s.iter.Seek(key)
} else {
s.iter.SeekForPrev(key)
}
}
// Key implements y.Interface
func (s *UniIterator) Key() []byte { return s.iter.Key() }
// Value implements y.Interface
func (s *UniIterator) Value() y.ValueStruct { return s.iter.Value() }
// Valid implements y.Interface
func (s *UniIterator) Valid() bool { return s.iter.Valid() }
// Close implements y.Interface (and frees up the iter's resources)
func (s *UniIterator) Close() error { return s.iter.Close() }

475
vendor/github.com/dgraph-io/badger/skl/skl_test.go generated vendored Normal file
View File

@ -0,0 +1,475 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package skl
import (
"encoding/binary"
"fmt"
"math/rand"
"strconv"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/require"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
const arenaSize = 1 << 20
func newValue(v int) []byte {
return []byte(fmt.Sprintf("%05d", v))
}
// length iterates over skiplist to give exact size.
func length(s *Skiplist) int {
x := s.getNext(s.head, 0)
count := 0
for x != nil {
count++
x = s.getNext(x, 0)
}
return count
}
func TestEmpty(t *testing.T) {
key := []byte("aaa")
l := NewSkiplist(arenaSize)
v := l.Get(key)
require.True(t, v.Value == nil) // Cannot use require.Nil for unsafe.Pointer nil.
for _, less := range []bool{true, false} {
for _, allowEqual := range []bool{true, false} {
n, found := l.findNear(key, less, allowEqual)
require.Nil(t, n)
require.False(t, found)
}
}
it := l.NewIterator()
require.False(t, it.Valid())
it.SeekToFirst()
require.False(t, it.Valid())
it.SeekToLast()
require.False(t, it.Valid())
it.Seek(key)
require.False(t, it.Valid())
l.DecrRef()
require.True(t, l.valid()) // Check the reference counting.
it.Close()
require.False(t, l.valid()) // Check the reference counting.
}
// TestBasic tests single-threaded inserts and updates and gets.
func TestBasic(t *testing.T) {
l := NewSkiplist(arenaSize)
val1 := newValue(42)
val2 := newValue(52)
val3 := newValue(62)
val4 := newValue(72)
// Try inserting values.
// Somehow require.Nil doesn't work when checking for unsafe.Pointer(nil).
l.Put(y.KeyWithTs([]byte("key1"), 0), y.ValueStruct{Value: val1, Meta: 55, UserMeta: 0})
l.Put(y.KeyWithTs([]byte("key2"), 2), y.ValueStruct{Value: val2, Meta: 56, UserMeta: 0})
l.Put(y.KeyWithTs([]byte("key3"), 0), y.ValueStruct{Value: val3, Meta: 57, UserMeta: 0})
v := l.Get(y.KeyWithTs([]byte("key"), 0))
require.True(t, v.Value == nil)
v = l.Get(y.KeyWithTs([]byte("key1"), 0))
require.True(t, v.Value != nil)
require.EqualValues(t, "00042", string(v.Value))
require.EqualValues(t, 55, v.Meta)
v = l.Get(y.KeyWithTs([]byte("key2"), 0))
require.True(t, v.Value == nil)
v = l.Get(y.KeyWithTs([]byte("key3"), 0))
require.True(t, v.Value != nil)
require.EqualValues(t, "00062", string(v.Value))
require.EqualValues(t, 57, v.Meta)
l.Put(y.KeyWithTs([]byte("key3"), 1), y.ValueStruct{Value: val4, Meta: 12, UserMeta: 0})
v = l.Get(y.KeyWithTs([]byte("key3"), 1))
require.True(t, v.Value != nil)
require.EqualValues(t, "00072", string(v.Value))
require.EqualValues(t, 12, v.Meta)
}
// TestConcurrentBasic tests concurrent writes followed by concurrent reads.
func TestConcurrentBasic(t *testing.T) {
const n = 1000
l := NewSkiplist(arenaSize)
var wg sync.WaitGroup
key := func(i int) []byte {
return y.KeyWithTs([]byte(fmt.Sprintf("%05d", i)), 0)
}
for i := 0; i < n; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
l.Put(key(i),
y.ValueStruct{Value: newValue(i), Meta: 0, UserMeta: 0})
}(i)
}
wg.Wait()
// Check values. Concurrent reads.
for i := 0; i < n; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
v := l.Get(key(i))
require.True(t, v.Value != nil)
require.EqualValues(t, newValue(i), v.Value)
}(i)
}
wg.Wait()
require.EqualValues(t, n, length(l))
}
// TestOneKey will read while writing to one single key.
func TestOneKey(t *testing.T) {
const n = 100
key := y.KeyWithTs([]byte("thekey"), 0)
l := NewSkiplist(arenaSize)
defer l.DecrRef()
var wg sync.WaitGroup
for i := 0; i < n; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
l.Put(key, y.ValueStruct{Value: newValue(i), Meta: 0, UserMeta: 0})
}(i)
}
// We expect that at least some write made it such that some read returns a value.
var sawValue int32
for i := 0; i < n; i++ {
wg.Add(1)
go func() {
defer wg.Done()
p := l.Get(key)
if p.Value == nil {
return
}
atomic.AddInt32(&sawValue, 1)
v, err := strconv.Atoi(string(p.Value))
require.NoError(t, err)
require.True(t, 0 <= v && v < n, fmt.Sprintf("invalid value %d", v))
}()
}
wg.Wait()
require.True(t, sawValue > 0)
require.EqualValues(t, 1, length(l))
}
func TestFindNear(t *testing.T) {
l := NewSkiplist(arenaSize)
defer l.DecrRef()
for i := 0; i < 1000; i++ {
key := fmt.Sprintf("%05d", i*10+5)
l.Put(y.KeyWithTs([]byte(key), 0), y.ValueStruct{Value: newValue(i), Meta: 0, UserMeta: 0})
}
n, eq := l.findNear(y.KeyWithTs([]byte("00001"), 0), false, false)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("00005"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("00001"), 0), false, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("00005"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("00001"), 0), true, false)
require.Nil(t, n)
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("00001"), 0), true, true)
require.Nil(t, n)
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("00005"), 0), false, false)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("00015"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("00005"), 0), false, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("00005"), 0), string(n.key(l.arena)))
require.True(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("00005"), 0), true, false)
require.Nil(t, n)
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("00005"), 0), true, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("00005"), 0), string(n.key(l.arena)))
require.True(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("05555"), 0), false, false)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("05565"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("05555"), 0), false, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("05555"), 0), string(n.key(l.arena)))
require.True(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("05555"), 0), true, false)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("05545"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("05555"), 0), true, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("05555"), 0), string(n.key(l.arena)))
require.True(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("05558"), 0), false, false)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("05565"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("05558"), 0), false, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("05565"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("05558"), 0), true, false)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("05555"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("05558"), 0), true, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("05555"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("09995"), 0), false, false)
require.Nil(t, n)
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("09995"), 0), false, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("09995"), 0), string(n.key(l.arena)))
require.True(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("09995"), 0), true, false)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("09985"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("09995"), 0), true, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("09995"), 0), string(n.key(l.arena)))
require.True(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("59995"), 0), false, false)
require.Nil(t, n)
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("59995"), 0), false, true)
require.Nil(t, n)
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("59995"), 0), true, false)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("09995"), 0), string(n.key(l.arena)))
require.False(t, eq)
n, eq = l.findNear(y.KeyWithTs([]byte("59995"), 0), true, true)
require.NotNil(t, n)
require.EqualValues(t, y.KeyWithTs([]byte("09995"), 0), string(n.key(l.arena)))
require.False(t, eq)
}
// TestIteratorNext tests a basic iteration over all nodes from the beginning.
func TestIteratorNext(t *testing.T) {
const n = 100
l := NewSkiplist(arenaSize)
defer l.DecrRef()
it := l.NewIterator()
defer it.Close()
require.False(t, it.Valid())
it.SeekToFirst()
require.False(t, it.Valid())
for i := n - 1; i >= 0; i-- {
l.Put(y.KeyWithTs([]byte(fmt.Sprintf("%05d", i)), 0),
y.ValueStruct{Value: newValue(i), Meta: 0, UserMeta: 0})
}
it.SeekToFirst()
for i := 0; i < n; i++ {
require.True(t, it.Valid())
v := it.Value()
require.EqualValues(t, newValue(i), v.Value)
it.Next()
}
require.False(t, it.Valid())
}
// TestIteratorPrev tests a basic iteration over all nodes from the end.
func TestIteratorPrev(t *testing.T) {
const n = 100
l := NewSkiplist(arenaSize)
defer l.DecrRef()
it := l.NewIterator()
defer it.Close()
require.False(t, it.Valid())
it.SeekToFirst()
require.False(t, it.Valid())
for i := 0; i < n; i++ {
l.Put(y.KeyWithTs([]byte(fmt.Sprintf("%05d", i)), 0),
y.ValueStruct{Value: newValue(i), Meta: 0, UserMeta: 0})
}
it.SeekToLast()
for i := n - 1; i >= 0; i-- {
require.True(t, it.Valid())
v := it.Value()
require.EqualValues(t, newValue(i), v.Value)
it.Prev()
}
require.False(t, it.Valid())
}
// TestIteratorSeek tests Seek and SeekForPrev.
func TestIteratorSeek(t *testing.T) {
const n = 100
l := NewSkiplist(arenaSize)
defer l.DecrRef()
it := l.NewIterator()
defer it.Close()
require.False(t, it.Valid())
it.SeekToFirst()
require.False(t, it.Valid())
// 1000, 1010, 1020, ..., 1990.
for i := n - 1; i >= 0; i-- {
v := i*10 + 1000
l.Put(y.KeyWithTs([]byte(fmt.Sprintf("%05d", i*10+1000)), 0),
y.ValueStruct{Value: newValue(v), Meta: 0, UserMeta: 0})
}
it.SeekToFirst()
require.True(t, it.Valid())
v := it.Value()
require.EqualValues(t, "01000", v.Value)
it.Seek(y.KeyWithTs([]byte("01000"), 0))
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, "01000", v.Value)
it.Seek(y.KeyWithTs([]byte("01005"), 0))
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, "01010", v.Value)
it.Seek(y.KeyWithTs([]byte("01010"), 0))
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, "01010", v.Value)
it.Seek(y.KeyWithTs([]byte("99999"), 0))
require.False(t, it.Valid())
// Try SeekForPrev.
it.SeekForPrev(y.KeyWithTs([]byte("00"), 0))
require.False(t, it.Valid())
it.SeekForPrev(y.KeyWithTs([]byte("01000"), 0))
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, "01000", v.Value)
it.SeekForPrev(y.KeyWithTs([]byte("01005"), 0))
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, "01000", v.Value)
it.SeekForPrev(y.KeyWithTs([]byte("01010"), 0))
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, "01010", v.Value)
it.SeekForPrev(y.KeyWithTs([]byte("99999"), 0))
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, "01990", v.Value)
}
func randomKey(rng *rand.Rand) []byte {
b := make([]byte, 8)
key := rng.Uint32()
key2 := rng.Uint32()
binary.LittleEndian.PutUint32(b, key)
binary.LittleEndian.PutUint32(b[4:], key2)
return y.KeyWithTs(b, 0)
}
// Standard test. Some fraction is read. Some fraction is write. Writes have
// to go through mutex lock.
func BenchmarkReadWrite(b *testing.B) {
value := newValue(123)
for i := 0; i <= 10; i++ {
readFrac := float32(i) / 10.0
b.Run(fmt.Sprintf("frac_%d", i), func(b *testing.B) {
l := NewSkiplist(int64((b.N + 1) * MaxNodeSize))
defer l.DecrRef()
b.ResetTimer()
var count int
b.RunParallel(func(pb *testing.PB) {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
for pb.Next() {
if rng.Float32() < readFrac {
v := l.Get(randomKey(rng))
if v.Value != nil {
count++
}
} else {
l.Put(randomKey(rng), y.ValueStruct{Value: value, Meta: 0, UserMeta: 0})
}
}
})
})
}
}
// Standard test. Some fraction is read. Some fraction is write. Writes have
// to go through mutex lock.
func BenchmarkReadWriteMap(b *testing.B) {
value := newValue(123)
for i := 0; i <= 10; i++ {
readFrac := float32(i) / 10.0
b.Run(fmt.Sprintf("frac_%d", i), func(b *testing.B) {
m := make(map[string][]byte)
var mutex sync.RWMutex
b.ResetTimer()
var count int
b.RunParallel(func(pb *testing.PB) {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
for pb.Next() {
if rand.Float32() < readFrac {
mutex.RLock()
_, ok := m[string(randomKey(rng))]
mutex.RUnlock()
if ok {
count++
}
} else {
mutex.Lock()
m[string(randomKey(rng))] = value
mutex.Unlock()
}
}
})
})
}
}

347
vendor/github.com/dgraph-io/badger/stream.go generated vendored Normal file
View File

@ -0,0 +1,347 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"context"
"sync"
"time"
humanize "gx/ipfs/QmQMxG9D52TirZd9eLA37nxiNspnMRkKbyPWrVAa1gvtSy/go-humanize"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/pb"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
const pageSize = 4 << 20 // 4MB
// Stream provides a framework to concurrently iterate over a snapshot of Badger, pick up
// key-values, batch them up and call Send. Stream does concurrent iteration over many smaller key
// ranges. It does NOT send keys in lexicographical sorted order. To get keys in sorted
// order, use Iterator.
type Stream struct {
// Prefix to only iterate over certain range of keys. If set to nil (default), Stream would
// iterate over the entire DB.
Prefix []byte
// Number of goroutines to use for iterating over key ranges. Defaults to 16.
NumGo int
// Badger would produce log entries in Infof to indicate the progress of Stream. LogPrefix can
// be used to help differentiate them from other activities. Default is "Badger.Stream".
LogPrefix string
// ChooseKey is invoked each time a new key is encountered. Note that this is not called
// on every version of the value, only the first encountered version (i.e. the highest version
// of the value a key has). ChooseKey can be left nil to select all keys.
//
// Note: Calls to ChooseKey are concurrent.
ChooseKey func(item *Item) bool
// KeyToList, similar to ChooseKey, is only invoked on the highest version of the value. It
// is upto the caller to iterate over the versions and generate zero, one or more KVs. It
// is expected that the user would advance the iterator to go through the versions of the
// values. However, the user MUST immediately return from this function on the first encounter
// with a mismatching key. See example usage in ToList function. Can be left nil to use ToList
// function by default.
//
// Note: Calls to KeyToList are concurrent.
KeyToList func(key []byte, itr *Iterator) (*pb.KVList, error)
// This is the method where Stream sends the final output. All calls to Send are done by a
// single goroutine, i.e. logic within Send method can expect single threaded execution.
Send func(*pb.KVList) error
readTs uint64
db *DB
rangeCh chan keyRange
kvChan chan *pb.KVList
}
// ToList is a default implementation of KeyToList. It picks up all valid versions of the key,
// skipping over deleted or expired keys.
func (st *Stream) ToList(key []byte, itr *Iterator) (*pb.KVList, error) {
list := &pb.KVList{}
for ; itr.Valid(); itr.Next() {
item := itr.Item()
if item.IsDeletedOrExpired() {
break
}
if !bytes.Equal(key, item.Key()) {
// Break out on the first encounter with another key.
break
}
valCopy, err := item.ValueCopy(nil)
if err != nil {
return nil, err
}
kv := &pb.KV{
Key: item.KeyCopy(nil),
Value: valCopy,
UserMeta: []byte{item.UserMeta()},
Version: item.Version(),
ExpiresAt: item.ExpiresAt(),
}
list.Kv = append(list.Kv, kv)
if st.db.opt.NumVersionsToKeep == 1 {
break
}
if item.DiscardEarlierVersions() {
break
}
}
return list, nil
}
// keyRange is [start, end), including start, excluding end. Do ensure that the start,
// end byte slices are owned by keyRange struct.
func (st *Stream) produceRanges(ctx context.Context) {
splits := st.db.KeySplits(st.Prefix)
start := y.SafeCopy(nil, st.Prefix)
for _, key := range splits {
st.rangeCh <- keyRange{left: start, right: y.SafeCopy(nil, []byte(key))}
start = y.SafeCopy(nil, []byte(key))
}
// Edge case: prefix is empty and no splits exist. In that case, we should have at least one
// keyRange output.
st.rangeCh <- keyRange{left: start}
close(st.rangeCh)
}
// produceKVs picks up ranges from rangeCh, generates KV lists and sends them to kvChan.
func (st *Stream) produceKVs(ctx context.Context) error {
var size int
var txn *Txn
if st.readTs > 0 {
txn = st.db.NewTransactionAt(st.readTs, false)
} else {
txn = st.db.NewTransaction(false)
}
defer txn.Discard()
iterate := func(kr keyRange) error {
iterOpts := DefaultIteratorOptions
iterOpts.AllVersions = true
iterOpts.Prefix = st.Prefix
iterOpts.PrefetchValues = false
itr := txn.NewIterator(iterOpts)
defer itr.Close()
outList := new(pb.KVList)
var prevKey []byte
for itr.Seek(kr.left); itr.Valid(); {
// it.Valid would only return true for keys with the provided Prefix in iterOpts.
item := itr.Item()
if bytes.Equal(item.Key(), prevKey) {
itr.Next()
continue
}
prevKey = append(prevKey[:0], item.Key()...)
// Check if we reached the end of the key range.
if len(kr.right) > 0 && bytes.Compare(item.Key(), kr.right) >= 0 {
break
}
// Check if we should pick this key.
if st.ChooseKey != nil && !st.ChooseKey(item) {
continue
}
// Now convert to key value.
list, err := st.KeyToList(item.KeyCopy(nil), itr)
if err != nil {
return err
}
if list == nil || len(list.Kv) == 0 {
continue
}
outList.Kv = append(outList.Kv, list.Kv...)
size += list.Size()
if size >= pageSize {
st.kvChan <- outList
outList = new(pb.KVList)
size = 0
}
}
if len(outList.Kv) > 0 {
st.kvChan <- outList
}
return nil
}
for {
select {
case kr, ok := <-st.rangeCh:
if !ok {
// Done with the keys.
return nil
}
if err := iterate(kr); err != nil {
return err
}
case <-ctx.Done():
return ctx.Err()
}
}
}
func (st *Stream) streamKVs(ctx context.Context) error {
var count int
var bytesSent uint64
t := time.NewTicker(time.Second)
defer t.Stop()
now := time.Now()
slurp := func(batch *pb.KVList) error {
loop:
for {
select {
case kvs, ok := <-st.kvChan:
if !ok {
break loop
}
y.AssertTrue(kvs != nil)
batch.Kv = append(batch.Kv, kvs.Kv...)
default:
break loop
}
}
sz := uint64(batch.Size())
bytesSent += sz
count += len(batch.Kv)
t := time.Now()
if err := st.Send(batch); err != nil {
return err
}
st.db.opt.Infof("%s Created batch of size: %s in %s.\n",
st.LogPrefix, humanize.Bytes(sz), time.Since(t))
return nil
}
outer:
for {
var batch *pb.KVList
select {
case <-ctx.Done():
return ctx.Err()
case <-t.C:
dur := time.Since(now)
durSec := uint64(dur.Seconds())
if durSec == 0 {
continue
}
speed := bytesSent / durSec
st.db.opt.Infof("%s Time elapsed: %s, bytes sent: %s, speed: %s/sec\n", st.LogPrefix,
y.FixedDuration(dur), humanize.Bytes(bytesSent), humanize.Bytes(speed))
case kvs, ok := <-st.kvChan:
if !ok {
break outer
}
y.AssertTrue(kvs != nil)
batch = kvs
if err := slurp(batch); err != nil {
return err
}
}
}
st.db.opt.Infof("%s Sent %d keys\n", st.LogPrefix, count)
return nil
}
// Orchestrate runs Stream. It picks up ranges from the SSTables, then runs NumGo number of
// goroutines to iterate over these ranges and batch up KVs in lists. It concurrently runs a single
// goroutine to pick these lists, batch them up further and send to Output.Send. Orchestrate also
// spits logs out to Infof, using provided LogPrefix. Note that all calls to Output.Send
// are serial. In case any of these steps encounter an error, Orchestrate would stop execution and
// return that error. Orchestrate can be called multiple times, but in serial order.
func (st *Stream) Orchestrate(ctx context.Context) error {
st.rangeCh = make(chan keyRange, 3) // Contains keys for posting lists.
// kvChan should only have a small capacity to ensure that we don't buffer up too much data if
// sending is slow. Page size is set to 4MB, which is used to lazily cap the size of each
// KVList. To get around 64MB buffer, we can set the channel size to 16.
st.kvChan = make(chan *pb.KVList, 16)
if st.KeyToList == nil {
st.KeyToList = st.ToList
}
// Picks up ranges from Badger, and sends them to rangeCh.
go st.produceRanges(ctx)
errCh := make(chan error, 1) // Stores error by consumeKeys.
var wg sync.WaitGroup
for i := 0; i < st.NumGo; i++ {
wg.Add(1)
go func() {
defer wg.Done()
// Picks up ranges from rangeCh, generates KV lists, and sends them to kvChan.
if err := st.produceKVs(ctx); err != nil {
select {
case errCh <- err:
default:
}
}
}()
}
// Pick up key-values from kvChan and send to stream.
kvErr := make(chan error, 1)
go func() {
// Picks up KV lists from kvChan, and sends them to Output.
kvErr <- st.streamKVs(ctx)
}()
wg.Wait() // Wait for produceKVs to be over.
close(st.kvChan) // Now we can close kvChan.
select {
case err := <-errCh: // Check error from produceKVs.
return err
default:
}
// Wait for key streaming to be over.
err := <-kvErr
return err
}
func (db *DB) newStream() *Stream {
return &Stream{db: db, NumGo: 16, LogPrefix: "Badger.Stream"}
}
// NewStream creates a new Stream.
func (db *DB) NewStream() *Stream {
if db.opt.managedTxns {
panic("This API can not be called in managed mode.")
}
return db.newStream()
}
// NewStreamAt creates a new Stream at a particular timestamp. Should only be used with managed DB.
func (db *DB) NewStreamAt(readTs uint64) *Stream {
if !db.opt.managedTxns {
panic("This API can only be called in managed mode.")
}
stream := db.newStream()
stream.readTs = readTs
return stream
}

169
vendor/github.com/dgraph-io/badger/stream_test.go generated vendored Normal file
View File

@ -0,0 +1,169 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"context"
"fmt"
"io/ioutil"
"math"
"os"
"strconv"
"strings"
"testing"
"github.com/stretchr/testify/require"
bpb "gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/pb"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
func openManaged(dir string) (*DB, error) {
opt := DefaultOptions
opt.Dir = dir
opt.ValueDir = dir
return OpenManaged(opt)
}
func keyWithPrefix(prefix string, k int) []byte {
return []byte(fmt.Sprintf("%s-%d", prefix, k))
}
func keyToInt(k []byte) (string, int) {
splits := strings.Split(string(k), "-")
key, err := strconv.Atoi(splits[1])
y.Check(err)
return splits[0], key
}
func value(k int) []byte {
return []byte(fmt.Sprintf("%08d", k))
}
type collector struct {
kv []*bpb.KV
}
func (c *collector) Send(list *bpb.KVList) error {
c.kv = append(c.kv, list.Kv...)
return nil
}
var ctxb = context.Background()
func TestStream(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
db, err := openManaged(dir)
require.NoError(t, err)
var count int
for _, prefix := range []string{"p0", "p1", "p2"} {
txn := db.NewTransactionAt(math.MaxUint64, true)
for i := 1; i <= 100; i++ {
require.NoError(t, txn.Set(keyWithPrefix(prefix, i), value(i)))
count++
}
require.NoError(t, txn.CommitAt(5, nil))
}
stream := db.NewStreamAt(math.MaxUint64)
stream.LogPrefix = "Testing"
c := &collector{}
stream.Send = func(list *bpb.KVList) error {
return c.Send(list)
}
// Test case 1. Retrieve everything.
err = stream.Orchestrate(ctxb)
require.NoError(t, err)
require.Equal(t, 300, len(c.kv), "Expected 300. Got: %d", len(c.kv))
m := make(map[string]int)
for _, kv := range c.kv {
prefix, ki := keyToInt(kv.Key)
expected := value(ki)
require.Equal(t, expected, kv.Value)
m[prefix]++
}
require.Equal(t, 3, len(m))
for pred, count := range m {
require.Equal(t, 100, count, "Count mismatch for pred: %s", pred)
}
// Test case 2. Retrieve only 1 predicate.
stream.Prefix = []byte("p1")
c.kv = c.kv[:0]
err = stream.Orchestrate(ctxb)
require.NoError(t, err)
require.Equal(t, 100, len(c.kv), "Expected 100. Got: %d", len(c.kv))
m = make(map[string]int)
for _, kv := range c.kv {
prefix, ki := keyToInt(kv.Key)
expected := value(ki)
require.Equal(t, expected, kv.Value)
m[prefix]++
}
require.Equal(t, 1, len(m))
for pred, count := range m {
require.Equal(t, 100, count, "Count mismatch for pred: %s", pred)
}
// Test case 3. Retrieve select keys within the predicate.
c.kv = c.kv[:0]
stream.ChooseKey = func(item *Item) bool {
_, k := keyToInt(item.Key())
return k%2 == 0
}
err = stream.Orchestrate(ctxb)
require.NoError(t, err)
require.Equal(t, 50, len(c.kv), "Expected 50. Got: %d", len(c.kv))
m = make(map[string]int)
for _, kv := range c.kv {
prefix, ki := keyToInt(kv.Key)
expected := value(ki)
require.Equal(t, expected, kv.Value)
m[prefix]++
}
require.Equal(t, 1, len(m))
for pred, count := range m {
require.Equal(t, 50, count, "Count mismatch for pred: %s", pred)
}
// Test case 4. Retrieve select keys from all predicates.
c.kv = c.kv[:0]
stream.Prefix = []byte{}
err = stream.Orchestrate(ctxb)
require.NoError(t, err)
require.Equal(t, 150, len(c.kv), "Expected 150. Got: %d", len(c.kv))
m = make(map[string]int)
for _, kv := range c.kv {
prefix, ki := keyToInt(kv.Key)
expected := value(ki)
require.Equal(t, expected, kv.Value)
m[prefix]++
}
require.Equal(t, 3, len(m))
for pred, count := range m {
require.Equal(t, 50, count, "Count mismatch for pred: %s", pred)
}
}

132
vendor/github.com/dgraph-io/badger/structs.go generated vendored Normal file
View File

@ -0,0 +1,132 @@
package badger
import (
"bytes"
"encoding/binary"
"fmt"
"hash/crc32"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
type valuePointer struct {
Fid uint32
Len uint32
Offset uint32
}
func (p valuePointer) Less(o valuePointer) bool {
if p.Fid != o.Fid {
return p.Fid < o.Fid
}
if p.Offset != o.Offset {
return p.Offset < o.Offset
}
return p.Len < o.Len
}
func (p valuePointer) IsZero() bool {
return p.Fid == 0 && p.Offset == 0 && p.Len == 0
}
const vptrSize = 12
// Encode encodes Pointer into byte buffer.
func (p valuePointer) Encode(b []byte) []byte {
binary.BigEndian.PutUint32(b[:4], p.Fid)
binary.BigEndian.PutUint32(b[4:8], p.Len)
binary.BigEndian.PutUint32(b[8:12], p.Offset)
return b[:vptrSize]
}
func (p *valuePointer) Decode(b []byte) {
p.Fid = binary.BigEndian.Uint32(b[:4])
p.Len = binary.BigEndian.Uint32(b[4:8])
p.Offset = binary.BigEndian.Uint32(b[8:12])
}
// header is used in value log as a header before Entry.
type header struct {
klen uint32
vlen uint32
expiresAt uint64
meta byte
userMeta byte
}
const (
headerBufSize = 18
)
func (h header) Encode(out []byte) {
y.AssertTrue(len(out) >= headerBufSize)
binary.BigEndian.PutUint32(out[0:4], h.klen)
binary.BigEndian.PutUint32(out[4:8], h.vlen)
binary.BigEndian.PutUint64(out[8:16], h.expiresAt)
out[16] = h.meta
out[17] = h.userMeta
}
// Decodes h from buf.
func (h *header) Decode(buf []byte) {
h.klen = binary.BigEndian.Uint32(buf[0:4])
h.vlen = binary.BigEndian.Uint32(buf[4:8])
h.expiresAt = binary.BigEndian.Uint64(buf[8:16])
h.meta = buf[16]
h.userMeta = buf[17]
}
// Entry provides Key, Value, UserMeta and ExpiresAt. This struct can be used by the user to set data.
type Entry struct {
Key []byte
Value []byte
UserMeta byte
ExpiresAt uint64 // time.Unix
meta byte
// Fields maintained internally.
offset uint32
}
func (e *Entry) estimateSize(threshold int) int {
if len(e.Value) < threshold {
return len(e.Key) + len(e.Value) + 2 // Meta, UserMeta
}
return len(e.Key) + 12 + 2 // 12 for ValuePointer, 2 for metas.
}
// Encodes e to buf. Returns number of bytes written.
func encodeEntry(e *Entry, buf *bytes.Buffer) (int, error) {
h := header{
klen: uint32(len(e.Key)),
vlen: uint32(len(e.Value)),
expiresAt: e.ExpiresAt,
meta: e.meta,
userMeta: e.UserMeta,
}
var headerEnc [headerBufSize]byte
h.Encode(headerEnc[:])
hash := crc32.New(y.CastagnoliCrcTable)
buf.Write(headerEnc[:])
hash.Write(headerEnc[:])
buf.Write(e.Key)
hash.Write(e.Key)
buf.Write(e.Value)
hash.Write(e.Value)
var crcBuf [crc32.Size]byte
binary.BigEndian.PutUint32(crcBuf[:], hash.Sum32())
buf.Write(crcBuf[:])
return len(headerEnc) + len(e.Key) + len(e.Value) + len(crcBuf), nil
}
func (e Entry) print(prefix string) {
fmt.Printf("%s Key: %s Meta: %d UserMeta: %d Offset: %d len(val)=%d",
prefix, e.Key, e.meta, e.UserMeta, e.offset, len(e.Value))
}

51
vendor/github.com/dgraph-io/badger/table/README.md generated vendored Normal file
View File

@ -0,0 +1,51 @@
# BenchmarkRead
```
$ go test -bench Read$ -count 3
Size of table: 105843444
BenchmarkRead-8 3 343846914 ns/op
BenchmarkRead-8 3 351790907 ns/op
BenchmarkRead-8 3 351762823 ns/op
```
Size of table is 105,843,444 bytes, which is ~101M.
The rate is ~287M/s which matches our read speed. This is using mmap.
To read a 64M table, this would take ~0.22s, which is negligible.
```
$ go test -bench BenchmarkReadAndBuild -count 3
BenchmarkReadAndBuild-8 1 2341034225 ns/op
BenchmarkReadAndBuild-8 1 2346349671 ns/op
BenchmarkReadAndBuild-8 1 2364064576 ns/op
```
The rate is ~43M/s. To build a ~64M table, this would take ~1.5s. Note that this
does NOT include the flushing of the table to disk. All we are doing above is
to read one table (mmaped) and write one table in memory.
The table building takes 1.5-0.22 ~ 1.3s.
If we are writing out up to 10 tables, this would take 1.5*10 ~ 15s, and ~13s
is spent building the tables.
When running populate, building one table in memory tends to take ~1.5s to ~2.5s
on my system. Where does this overhead come from? Let's investigate the merging.
Below, we merge 5 tables. The total size remains unchanged at ~101M.
```
$ go test -bench ReadMerged -count 3
BenchmarkReadMerged-8 1 1321190264 ns/op
BenchmarkReadMerged-8 1 1296958737 ns/op
BenchmarkReadMerged-8 1 1314381178 ns/op
```
The rate is ~76M/s. To build a 64M table, this would take ~0.84s. The writing
takes ~1.3s as we saw above. So in total, we expect around 0.84+1.3 ~ 2.1s.
This roughly matches what we observe when running populate. There might be
some additional overhead due to the concurrent writes going on, in flushing the
table to disk. Also, the tables tend to be slightly bigger than 64M/s.

235
vendor/github.com/dgraph-io/badger/table/builder.go generated vendored Normal file
View File

@ -0,0 +1,235 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package table
import (
"bytes"
"encoding/binary"
"io"
"math"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmWaLViWQF8jgyoLLqqcSrnp6dJpHESiJfzor1vrfDyTZf/bbloom"
)
var (
restartInterval = 100 // Might want to change this to be based on total size instead of numKeys.
)
func newBuffer(sz int) *bytes.Buffer {
b := new(bytes.Buffer)
b.Grow(sz)
return b
}
type header struct {
plen uint16 // Overlap with base key.
klen uint16 // Length of the diff.
vlen uint16 // Length of value.
prev uint32 // Offset for the previous key-value pair. The offset is relative to block base offset.
}
// Encode encodes the header.
func (h header) Encode(b []byte) {
binary.BigEndian.PutUint16(b[0:2], h.plen)
binary.BigEndian.PutUint16(b[2:4], h.klen)
binary.BigEndian.PutUint16(b[4:6], h.vlen)
binary.BigEndian.PutUint32(b[6:10], h.prev)
}
// Decode decodes the header.
func (h *header) Decode(buf []byte) int {
h.plen = binary.BigEndian.Uint16(buf[0:2])
h.klen = binary.BigEndian.Uint16(buf[2:4])
h.vlen = binary.BigEndian.Uint16(buf[4:6])
h.prev = binary.BigEndian.Uint32(buf[6:10])
return h.Size()
}
// Size returns size of the header. Currently it's just a constant.
func (h header) Size() int { return 10 }
// Builder is used in building a table.
type Builder struct {
counter int // Number of keys written for the current block.
// Typically tens or hundreds of meg. This is for one single file.
buf *bytes.Buffer
baseKey []byte // Base key for the current block.
baseOffset uint32 // Offset for the current block.
restarts []uint32 // Base offsets of every block.
// Tracks offset for the previous key-value pair. Offset is relative to block base offset.
prevOffset uint32
keyBuf *bytes.Buffer
keyCount int
}
// NewTableBuilder makes a new TableBuilder.
func NewTableBuilder() *Builder {
return &Builder{
keyBuf: newBuffer(1 << 20),
buf: newBuffer(1 << 20),
prevOffset: math.MaxUint32, // Used for the first element!
}
}
// Close closes the TableBuilder.
func (b *Builder) Close() {}
// Empty returns whether it's empty.
func (b *Builder) Empty() bool { return b.buf.Len() == 0 }
// keyDiff returns a suffix of newKey that is different from b.baseKey.
func (b Builder) keyDiff(newKey []byte) []byte {
var i int
for i = 0; i < len(newKey) && i < len(b.baseKey); i++ {
if newKey[i] != b.baseKey[i] {
break
}
}
return newKey[i:]
}
func (b *Builder) addHelper(key []byte, v y.ValueStruct) {
// Add key to bloom filter.
if len(key) > 0 {
var klen [2]byte
keyNoTs := y.ParseKey(key)
binary.BigEndian.PutUint16(klen[:], uint16(len(keyNoTs)))
b.keyBuf.Write(klen[:])
b.keyBuf.Write(keyNoTs)
b.keyCount++
}
// diffKey stores the difference of key with baseKey.
var diffKey []byte
if len(b.baseKey) == 0 {
// Make a copy. Builder should not keep references. Otherwise, caller has to be very careful
// and will have to make copies of keys every time they add to builder, which is even worse.
b.baseKey = append(b.baseKey[:0], key...)
diffKey = key
} else {
diffKey = b.keyDiff(key)
}
h := header{
plen: uint16(len(key) - len(diffKey)),
klen: uint16(len(diffKey)),
vlen: uint16(v.EncodedSize()),
prev: b.prevOffset, // prevOffset is the location of the last key-value added.
}
b.prevOffset = uint32(b.buf.Len()) - b.baseOffset // Remember current offset for the next Add call.
// Layout: header, diffKey, value.
var hbuf [10]byte
h.Encode(hbuf[:])
b.buf.Write(hbuf[:])
b.buf.Write(diffKey) // We only need to store the key difference.
v.EncodeTo(b.buf)
b.counter++ // Increment number of keys added for this current block.
}
func (b *Builder) finishBlock() {
// When we are at the end of the block and Valid=false, and the user wants to do a Prev,
// we need a dummy header to tell us the offset of the previous key-value pair.
b.addHelper([]byte{}, y.ValueStruct{})
}
// Add adds a key-value pair to the block.
// If doNotRestart is true, we will not restart even if b.counter >= restartInterval.
func (b *Builder) Add(key []byte, value y.ValueStruct) error {
if b.counter >= restartInterval {
b.finishBlock()
// Start a new block. Initialize the block.
b.restarts = append(b.restarts, uint32(b.buf.Len()))
b.counter = 0
b.baseKey = []byte{}
b.baseOffset = uint32(b.buf.Len())
b.prevOffset = math.MaxUint32 // First key-value pair of block has header.prev=MaxInt.
}
b.addHelper(key, value)
return nil // Currently, there is no meaningful error.
}
// TODO: vvv this was the comment on ReachedCapacity.
// FinalSize returns the *rough* final size of the array, counting the header which is not yet written.
// TODO: Look into why there is a discrepancy. I suspect it is because of Write(empty, empty)
// at the end. The diff can vary.
// ReachedCapacity returns true if we... roughly (?) reached capacity?
func (b *Builder) ReachedCapacity(cap int64) bool {
estimateSz := b.buf.Len() + 8 /* empty header */ + 4*len(b.restarts) + 8 // 8 = end of buf offset + len(restarts).
return int64(estimateSz) > cap
}
// blockIndex generates the block index for the table.
// It is mainly a list of all the block base offsets.
func (b *Builder) blockIndex() []byte {
// Store the end offset, so we know the length of the final block.
b.restarts = append(b.restarts, uint32(b.buf.Len()))
// Add 4 because we want to write out number of restarts at the end.
sz := 4*len(b.restarts) + 4
out := make([]byte, sz)
buf := out
for _, r := range b.restarts {
binary.BigEndian.PutUint32(buf[:4], r)
buf = buf[4:]
}
binary.BigEndian.PutUint32(buf[:4], uint32(len(b.restarts)))
return out
}
// Finish finishes the table by appending the index.
func (b *Builder) Finish() []byte {
bf, _ := bbloom.New(float64(b.keyCount), 0.01)
var klen [2]byte
key := make([]byte, 1024)
for {
if _, err := b.keyBuf.Read(klen[:]); err == io.EOF {
break
} else if err != nil {
y.Check(err)
}
kl := int(binary.BigEndian.Uint16(klen[:]))
if cap(key) < kl {
key = make([]byte, 2*int(kl)) // 2 * uint16 will overflow
}
key = key[:kl]
y.Check2(b.keyBuf.Read(key))
bf.Add(key)
}
b.finishBlock() // This will never start a new block.
index := b.blockIndex()
b.buf.Write(index)
// Write bloom filter.
bdata, _ := bf.JSONMarshal()
n, err := b.buf.Write(bdata)
y.Check(err)
var buf [4]byte
binary.BigEndian.PutUint32(buf[:], uint32(n))
b.buf.Write(buf[:])
return b.buf.Bytes()
}

539
vendor/github.com/dgraph-io/badger/table/iterator.go generated vendored Normal file
View File

@ -0,0 +1,539 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package table
import (
"bytes"
"io"
"math"
"sort"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
type blockIterator struct {
data []byte
pos uint32
err error
baseKey []byte
key []byte
val []byte
init bool
last header // The last header we saw.
}
func (itr *blockIterator) Reset() {
itr.pos = 0
itr.err = nil
itr.baseKey = []byte{}
itr.key = []byte{}
itr.val = []byte{}
itr.init = false
itr.last = header{}
}
func (itr *blockIterator) Init() {
if !itr.init {
itr.Next()
}
}
func (itr *blockIterator) Valid() bool {
return itr != nil && itr.err == nil
}
func (itr *blockIterator) Error() error {
return itr.err
}
func (itr *blockIterator) Close() {}
var (
origin = 0
current = 1
)
// Seek brings us to the first block element that is >= input key.
func (itr *blockIterator) Seek(key []byte, whence int) {
itr.err = nil
switch whence {
case origin:
itr.Reset()
case current:
}
var done bool
for itr.Init(); itr.Valid(); itr.Next() {
k := itr.Key()
if y.CompareKeys(k, key) >= 0 {
// We are done as k is >= key.
done = true
break
}
}
if !done {
itr.err = io.EOF
}
}
func (itr *blockIterator) SeekToFirst() {
itr.err = nil
itr.Init()
}
// SeekToLast brings us to the last element. Valid should return true.
func (itr *blockIterator) SeekToLast() {
itr.err = nil
for itr.Init(); itr.Valid(); itr.Next() {
}
itr.Prev()
}
// parseKV would allocate a new byte slice for key and for value.
func (itr *blockIterator) parseKV(h header) {
if cap(itr.key) < int(h.plen+h.klen) {
sz := int(h.plen) + int(h.klen) // Convert to int before adding to avoid uint16 overflow.
itr.key = make([]byte, 2*sz)
}
itr.key = itr.key[:h.plen+h.klen]
copy(itr.key, itr.baseKey[:h.plen])
copy(itr.key[h.plen:], itr.data[itr.pos:itr.pos+uint32(h.klen)])
itr.pos += uint32(h.klen)
if itr.pos+uint32(h.vlen) > uint32(len(itr.data)) {
itr.err = errors.Errorf("Value exceeded size of block: %d %d %d %d %v",
itr.pos, h.klen, h.vlen, len(itr.data), h)
return
}
itr.val = y.SafeCopy(itr.val, itr.data[itr.pos:itr.pos+uint32(h.vlen)])
itr.pos += uint32(h.vlen)
}
func (itr *blockIterator) Next() {
itr.init = true
itr.err = nil
if itr.pos >= uint32(len(itr.data)) {
itr.err = io.EOF
return
}
var h header
itr.pos += uint32(h.Decode(itr.data[itr.pos:]))
itr.last = h // Store the last header.
if h.klen == 0 && h.plen == 0 {
// Last entry in the table.
itr.err = io.EOF
return
}
// Populate baseKey if it isn't set yet. This would only happen for the first Next.
if len(itr.baseKey) == 0 {
// This should be the first Next() for this block. Hence, prefix length should be zero.
y.AssertTrue(h.plen == 0)
itr.baseKey = itr.data[itr.pos : itr.pos+uint32(h.klen)]
}
itr.parseKV(h)
}
func (itr *blockIterator) Prev() {
if !itr.init {
return
}
itr.err = nil
if itr.last.prev == math.MaxUint32 {
// This is the first element of the block!
itr.err = io.EOF
itr.pos = 0
return
}
// Move back using current header's prev.
itr.pos = itr.last.prev
var h header
y.AssertTruef(itr.pos < uint32(len(itr.data)), "%d %d", itr.pos, len(itr.data))
itr.pos += uint32(h.Decode(itr.data[itr.pos:]))
itr.parseKV(h)
itr.last = h
}
func (itr *blockIterator) Key() []byte {
if itr.err != nil {
return nil
}
return itr.key
}
func (itr *blockIterator) Value() []byte {
if itr.err != nil {
return nil
}
return itr.val
}
// Iterator is an iterator for a Table.
type Iterator struct {
t *Table
bpos int
bi *blockIterator
err error
// Internally, Iterator is bidirectional. However, we only expose the
// unidirectional functionality for now.
reversed bool
}
// NewIterator returns a new iterator of the Table
func (t *Table) NewIterator(reversed bool) *Iterator {
t.IncrRef() // Important.
ti := &Iterator{t: t, reversed: reversed}
ti.next()
return ti
}
// Close closes the iterator (and it must be called).
func (itr *Iterator) Close() error {
return itr.t.DecrRef()
}
func (itr *Iterator) reset() {
itr.bpos = 0
itr.err = nil
}
// Valid follows the y.Iterator interface
func (itr *Iterator) Valid() bool {
return itr.err == nil
}
func (itr *Iterator) seekToFirst() {
numBlocks := len(itr.t.blockIndex)
if numBlocks == 0 {
itr.err = io.EOF
return
}
itr.bpos = 0
block, err := itr.t.block(itr.bpos)
if err != nil {
itr.err = err
return
}
itr.bi = block.NewIterator()
itr.bi.SeekToFirst()
itr.err = itr.bi.Error()
}
func (itr *Iterator) seekToLast() {
numBlocks := len(itr.t.blockIndex)
if numBlocks == 0 {
itr.err = io.EOF
return
}
itr.bpos = numBlocks - 1
block, err := itr.t.block(itr.bpos)
if err != nil {
itr.err = err
return
}
itr.bi = block.NewIterator()
itr.bi.SeekToLast()
itr.err = itr.bi.Error()
}
func (itr *Iterator) seekHelper(blockIdx int, key []byte) {
itr.bpos = blockIdx
block, err := itr.t.block(blockIdx)
if err != nil {
itr.err = err
return
}
itr.bi = block.NewIterator()
itr.bi.Seek(key, origin)
itr.err = itr.bi.Error()
}
// seekFrom brings us to a key that is >= input key.
func (itr *Iterator) seekFrom(key []byte, whence int) {
itr.err = nil
switch whence {
case origin:
itr.reset()
case current:
}
idx := sort.Search(len(itr.t.blockIndex), func(idx int) bool {
ko := itr.t.blockIndex[idx]
return y.CompareKeys(ko.key, key) > 0
})
if idx == 0 {
// The smallest key in our table is already strictly > key. We can return that.
// This is like a SeekToFirst.
itr.seekHelper(0, key)
return
}
// block[idx].smallest is > key.
// Since idx>0, we know block[idx-1].smallest is <= key.
// There are two cases.
// 1) Everything in block[idx-1] is strictly < key. In this case, we should go to the first
// element of block[idx].
// 2) Some element in block[idx-1] is >= key. We should go to that element.
itr.seekHelper(idx-1, key)
if itr.err == io.EOF {
// Case 1. Need to visit block[idx].
if idx == len(itr.t.blockIndex) {
// If idx == len(itr.t.blockIndex), then input key is greater than ANY element of table.
// There's nothing we can do. Valid() should return false as we seek to end of table.
return
}
// Since block[idx].smallest is > key. This is essentially a block[idx].SeekToFirst.
itr.seekHelper(idx, key)
}
// Case 2: No need to do anything. We already did the seek in block[idx-1].
}
// seek will reset iterator and seek to >= key.
func (itr *Iterator) seek(key []byte) {
itr.seekFrom(key, origin)
}
// seekForPrev will reset iterator and seek to <= key.
func (itr *Iterator) seekForPrev(key []byte) {
// TODO: Optimize this. We shouldn't have to take a Prev step.
itr.seekFrom(key, origin)
if !bytes.Equal(itr.Key(), key) {
itr.prev()
}
}
func (itr *Iterator) next() {
itr.err = nil
if itr.bpos >= len(itr.t.blockIndex) {
itr.err = io.EOF
return
}
if itr.bi == nil {
block, err := itr.t.block(itr.bpos)
if err != nil {
itr.err = err
return
}
itr.bi = block.NewIterator()
itr.bi.SeekToFirst()
itr.err = itr.bi.Error()
return
}
itr.bi.Next()
if !itr.bi.Valid() {
itr.bpos++
itr.bi = nil
itr.next()
return
}
}
func (itr *Iterator) prev() {
itr.err = nil
if itr.bpos < 0 {
itr.err = io.EOF
return
}
if itr.bi == nil {
block, err := itr.t.block(itr.bpos)
if err != nil {
itr.err = err
return
}
itr.bi = block.NewIterator()
itr.bi.SeekToLast()
itr.err = itr.bi.Error()
return
}
itr.bi.Prev()
if !itr.bi.Valid() {
itr.bpos--
itr.bi = nil
itr.prev()
return
}
}
// Key follows the y.Iterator interface
func (itr *Iterator) Key() []byte {
return itr.bi.Key()
}
// Value follows the y.Iterator interface
func (itr *Iterator) Value() (ret y.ValueStruct) {
ret.Decode(itr.bi.Value())
return
}
// Next follows the y.Iterator interface
func (itr *Iterator) Next() {
if !itr.reversed {
itr.next()
} else {
itr.prev()
}
}
// Rewind follows the y.Iterator interface
func (itr *Iterator) Rewind() {
if !itr.reversed {
itr.seekToFirst()
} else {
itr.seekToLast()
}
}
// Seek follows the y.Iterator interface
func (itr *Iterator) Seek(key []byte) {
if !itr.reversed {
itr.seek(key)
} else {
itr.seekForPrev(key)
}
}
// ConcatIterator concatenates the sequences defined by several iterators. (It only works with
// TableIterators, probably just because it's faster to not be so generic.)
type ConcatIterator struct {
idx int // Which iterator is active now.
cur *Iterator
iters []*Iterator // Corresponds to tables.
tables []*Table // Disregarding reversed, this is in ascending order.
reversed bool
}
// NewConcatIterator creates a new concatenated iterator
func NewConcatIterator(tbls []*Table, reversed bool) *ConcatIterator {
iters := make([]*Iterator, len(tbls))
for i := 0; i < len(tbls); i++ {
iters[i] = tbls[i].NewIterator(reversed)
}
return &ConcatIterator{
reversed: reversed,
iters: iters,
tables: tbls,
idx: -1, // Not really necessary because s.it.Valid()=false, but good to have.
}
}
func (s *ConcatIterator) setIdx(idx int) {
s.idx = idx
if idx < 0 || idx >= len(s.iters) {
s.cur = nil
} else {
s.cur = s.iters[s.idx]
}
}
// Rewind implements y.Interface
func (s *ConcatIterator) Rewind() {
if len(s.iters) == 0 {
return
}
if !s.reversed {
s.setIdx(0)
} else {
s.setIdx(len(s.iters) - 1)
}
s.cur.Rewind()
}
// Valid implements y.Interface
func (s *ConcatIterator) Valid() bool {
return s.cur != nil && s.cur.Valid()
}
// Key implements y.Interface
func (s *ConcatIterator) Key() []byte {
return s.cur.Key()
}
// Value implements y.Interface
func (s *ConcatIterator) Value() y.ValueStruct {
return s.cur.Value()
}
// Seek brings us to element >= key if reversed is false. Otherwise, <= key.
func (s *ConcatIterator) Seek(key []byte) {
var idx int
if !s.reversed {
idx = sort.Search(len(s.tables), func(i int) bool {
return y.CompareKeys(s.tables[i].Biggest(), key) >= 0
})
} else {
n := len(s.tables)
idx = n - 1 - sort.Search(n, func(i int) bool {
return y.CompareKeys(s.tables[n-1-i].Smallest(), key) <= 0
})
}
if idx >= len(s.tables) || idx < 0 {
s.setIdx(-1)
return
}
// For reversed=false, we know s.tables[i-1].Biggest() < key. Thus, the
// previous table cannot possibly contain key.
s.setIdx(idx)
s.cur.Seek(key)
}
// Next advances our concat iterator.
func (s *ConcatIterator) Next() {
s.cur.Next()
if s.cur.Valid() {
// Nothing to do. Just stay with the current table.
return
}
for { // In case there are empty tables.
if !s.reversed {
s.setIdx(s.idx + 1)
} else {
s.setIdx(s.idx - 1)
}
if s.cur == nil {
// End of list. Valid will become false.
return
}
s.cur.Rewind()
if s.cur.Valid() {
break
}
}
}
// Close implements y.Interface.
func (s *ConcatIterator) Close() error {
for _, it := range s.iters {
if err := it.Close(); err != nil {
return errors.Wrap(err, "ConcatIterator")
}
}
return nil
}

356
vendor/github.com/dgraph-io/badger/table/table.go generated vendored Normal file
View File

@ -0,0 +1,356 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package table
import (
"bytes"
"crypto/sha256"
"encoding/binary"
"fmt"
"io"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
"sync/atomic"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
"gx/ipfs/QmWaLViWQF8jgyoLLqqcSrnp6dJpHESiJfzor1vrfDyTZf/bbloom"
)
const fileSuffix = ".sst"
type keyOffset struct {
key []byte
offset int
len int
}
// TableInterface is useful for testing.
type TableInterface interface {
Smallest() []byte
Biggest() []byte
DoesNotHave(key []byte) bool
}
// Table represents a loaded table file with the info we have about it
type Table struct {
sync.Mutex
fd *os.File // Own fd.
tableSize int // Initialized in OpenTable, using fd.Stat().
blockIndex []keyOffset
ref int32 // For file garbage collection. Atomic.
loadingMode options.FileLoadingMode
mmap []byte // Memory mapped.
// The following are initialized once and const.
smallest, biggest []byte // Smallest and largest keys.
id uint64 // file id, part of filename
bf bbloom.Bloom
Checksum []byte
}
// IncrRef increments the refcount (having to do with whether the file should be deleted)
func (t *Table) IncrRef() {
atomic.AddInt32(&t.ref, 1)
}
// DecrRef decrements the refcount and possibly deletes the table
func (t *Table) DecrRef() error {
newRef := atomic.AddInt32(&t.ref, -1)
if newRef == 0 {
// We can safely delete this file, because for all the current files, we always have
// at least one reference pointing to them.
// It's necessary to delete windows files
if t.loadingMode == options.MemoryMap {
y.Munmap(t.mmap)
}
if err := t.fd.Truncate(0); err != nil {
// This is very important to let the FS know that the file is deleted.
return err
}
filename := t.fd.Name()
if err := t.fd.Close(); err != nil {
return err
}
if err := os.Remove(filename); err != nil {
return err
}
}
return nil
}
type block struct {
offset int
data []byte
}
func (b block) NewIterator() *blockIterator {
return &blockIterator{data: b.data}
}
// OpenTable assumes file has only one table and opens it. Takes ownership of fd upon function
// entry. Returns a table with one reference count on it (decrementing which may delete the file!
// -- consider t.Close() instead). The fd has to writeable because we call Truncate on it before
// deleting.
func OpenTable(fd *os.File, mode options.FileLoadingMode, cksum []byte) (*Table, error) {
fileInfo, err := fd.Stat()
if err != nil {
// It's OK to ignore fd.Close() errs in this function because we have only read
// from the file.
_ = fd.Close()
return nil, y.Wrap(err)
}
filename := fileInfo.Name()
id, ok := ParseFileID(filename)
if !ok {
_ = fd.Close()
return nil, errors.Errorf("Invalid filename: %s", filename)
}
t := &Table{
fd: fd,
ref: 1, // Caller is given one reference.
id: id,
loadingMode: mode,
}
t.tableSize = int(fileInfo.Size())
// We first load to RAM, so we can read the index and do checksum.
if err := t.loadToRAM(); err != nil {
return nil, err
}
// Enforce checksum before we read index. Otherwise, if the file was
// truncated, we'd end up with panics in readIndex.
if len(cksum) > 0 && !bytes.Equal(t.Checksum, cksum) {
return nil, fmt.Errorf(
"CHECKSUM_MISMATCH: Table checksum does not match checksum in MANIFEST."+
" NOT including table %s. This would lead to missing data."+
"\n sha256 %x Expected\n sha256 %x Found\n", filename, cksum, t.Checksum)
}
if err := t.readIndex(); err != nil {
return nil, y.Wrap(err)
}
it := t.NewIterator(false)
defer it.Close()
it.Rewind()
if it.Valid() {
t.smallest = it.Key()
}
it2 := t.NewIterator(true)
defer it2.Close()
it2.Rewind()
if it2.Valid() {
t.biggest = it2.Key()
}
switch mode {
case options.LoadToRAM:
// No need to do anything. t.mmap is already filled.
case options.MemoryMap:
t.mmap, err = y.Mmap(fd, false, fileInfo.Size())
if err != nil {
_ = fd.Close()
return nil, y.Wrapf(err, "Unable to map file")
}
case options.FileIO:
t.mmap = nil
default:
panic(fmt.Sprintf("Invalid loading mode: %v", mode))
}
return t, nil
}
// Close closes the open table. (Releases resources back to the OS.)
func (t *Table) Close() error {
if t.loadingMode == options.MemoryMap {
y.Munmap(t.mmap)
}
return t.fd.Close()
}
func (t *Table) read(off int, sz int) ([]byte, error) {
if len(t.mmap) > 0 {
if len(t.mmap[off:]) < sz {
return nil, y.ErrEOF
}
return t.mmap[off : off+sz], nil
}
res := make([]byte, sz)
nbr, err := t.fd.ReadAt(res, int64(off))
y.NumReads.Add(1)
y.NumBytesRead.Add(int64(nbr))
return res, err
}
func (t *Table) readNoFail(off int, sz int) []byte {
res, err := t.read(off, sz)
y.Check(err)
return res
}
func (t *Table) readIndex() error {
if len(t.mmap) != t.tableSize {
panic("Table size does not match the read bytes")
}
readPos := t.tableSize
// Read bloom filter.
readPos -= 4
buf := t.readNoFail(readPos, 4)
bloomLen := int(binary.BigEndian.Uint32(buf))
readPos -= bloomLen
data := t.readNoFail(readPos, bloomLen)
t.bf = *bbloom.JSONUnmarshal(data)
readPos -= 4
buf = t.readNoFail(readPos, 4)
restartsLen := int(binary.BigEndian.Uint32(buf))
readPos -= 4 * restartsLen
buf = t.readNoFail(readPos, 4*restartsLen)
offsets := make([]int, restartsLen)
for i := 0; i < restartsLen; i++ {
offsets[i] = int(binary.BigEndian.Uint32(buf[:4]))
buf = buf[4:]
}
// The last offset stores the end of the last block.
for i := 0; i < len(offsets); i++ {
var o int
if i == 0 {
o = 0
} else {
o = offsets[i-1]
}
ko := keyOffset{
offset: o,
len: offsets[i] - o,
}
t.blockIndex = append(t.blockIndex, ko)
}
// Execute this index read serially, because we already have table data in memory.
var h header
for idx := range t.blockIndex {
ko := &t.blockIndex[idx]
hbuf := t.readNoFail(ko.offset, h.Size())
h.Decode(hbuf)
y.AssertTrue(h.plen == 0)
key := t.readNoFail(ko.offset+len(hbuf), int(h.klen))
ko.key = append([]byte{}, key...)
}
return nil
}
func (t *Table) block(idx int) (block, error) {
y.AssertTruef(idx >= 0, "idx=%d", idx)
if idx >= len(t.blockIndex) {
return block{}, errors.New("block out of index")
}
ko := t.blockIndex[idx]
blk := block{
offset: ko.offset,
}
var err error
blk.data, err = t.read(blk.offset, ko.len)
return blk, err
}
// Size is its file size in bytes
func (t *Table) Size() int64 { return int64(t.tableSize) }
// Smallest is its smallest key, or nil if there are none
func (t *Table) Smallest() []byte { return t.smallest }
// Biggest is its biggest key, or nil if there are none
func (t *Table) Biggest() []byte { return t.biggest }
// Filename is NOT the file name. Just kidding, it is.
func (t *Table) Filename() string { return t.fd.Name() }
// ID is the table's ID number (used to make the file name).
func (t *Table) ID() uint64 { return t.id }
// DoesNotHave returns true if (but not "only if") the table does not have the key. It does a
// bloom filter lookup.
func (t *Table) DoesNotHave(key []byte) bool { return !t.bf.Has(key) }
// ParseFileID reads the file id out of a filename.
func ParseFileID(name string) (uint64, bool) {
name = path.Base(name)
if !strings.HasSuffix(name, fileSuffix) {
return 0, false
}
// suffix := name[len(fileSuffix):]
name = strings.TrimSuffix(name, fileSuffix)
id, err := strconv.Atoi(name)
if err != nil {
return 0, false
}
y.AssertTrue(id >= 0)
return uint64(id), true
}
// IDToFilename does the inverse of ParseFileID
func IDToFilename(id uint64) string {
return fmt.Sprintf("%06d", id) + fileSuffix
}
// NewFilename should be named TableFilepath -- it combines the dir with the ID to make a table
// filepath.
func NewFilename(id uint64, dir string) string {
return filepath.Join(dir, IDToFilename(id))
}
func (t *Table) loadToRAM() error {
if _, err := t.fd.Seek(0, io.SeekStart); err != nil {
return err
}
t.mmap = make([]byte, t.tableSize)
sum := sha256.New()
tee := io.TeeReader(t.fd, sum)
read, err := tee.Read(t.mmap)
if err != nil || read != t.tableSize {
return y.Wrapf(err, "Unable to load file in memory. Table file: %s", t.Filename())
}
t.Checksum = sum.Sum(nil)
y.NumReads.Add(1)
y.NumBytesRead.Add(int64(read))
return nil
}

729
vendor/github.com/dgraph-io/badger/table/table_test.go generated vendored Normal file
View File

@ -0,0 +1,729 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package table
import (
"fmt"
"math/rand"
"os"
"sort"
"testing"
"github.com/stretchr/testify/require"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
func key(prefix string, i int) string {
return prefix + fmt.Sprintf("%04d", i)
}
func buildTestTable(t *testing.T, prefix string, n int) *os.File {
y.AssertTrue(n <= 10000)
keyValues := make([][]string, n)
for i := 0; i < n; i++ {
k := key(prefix, i)
v := fmt.Sprintf("%d", i)
keyValues[i] = []string{k, v}
}
return buildTable(t, keyValues)
}
// keyValues is n by 2 where n is number of pairs.
func buildTable(t *testing.T, keyValues [][]string) *os.File {
b := NewTableBuilder()
defer b.Close()
// TODO: Add test for file garbage collection here. No files should be left after the tests here.
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
if t != nil {
require.NoError(t, err)
} else {
y.Check(err)
}
sort.Slice(keyValues, func(i, j int) bool {
return keyValues[i][0] < keyValues[j][0]
})
for _, kv := range keyValues {
y.AssertTrue(len(kv) == 2)
err := b.Add(y.KeyWithTs([]byte(kv[0]), 0), y.ValueStruct{Value: []byte(kv[1]), Meta: 'A', UserMeta: 0})
if t != nil {
require.NoError(t, err)
} else {
y.Check(err)
}
}
f.Write(b.Finish())
f.Close()
f, _ = y.OpenSyncedFile(filename, true)
return f
}
func TestTableIterator(t *testing.T) {
for _, n := range []int{99, 100, 101} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
count := 0
for it.Rewind(); it.Valid(); it.Next() {
v := it.Value()
k := y.KeyWithTs([]byte(key("key", count)), 0)
require.EqualValues(t, k, it.Key())
require.EqualValues(t, fmt.Sprintf("%d", count), string(v.Value))
count++
}
require.Equal(t, count, n)
})
}
}
func TestSeekToFirst(t *testing.T) {
for _, n := range []int{99, 100, 101, 199, 200, 250, 9999, 10000} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
it.seekToFirst()
require.True(t, it.Valid())
v := it.Value()
require.EqualValues(t, "0", string(v.Value))
require.EqualValues(t, 'A', v.Meta)
})
}
}
func TestSeekToLast(t *testing.T) {
for _, n := range []int{99, 100, 101, 199, 200, 250, 9999, 10000} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
it.seekToLast()
require.True(t, it.Valid())
v := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", n-1), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
it.prev()
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, fmt.Sprintf("%d", n-2), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
})
}
}
func TestSeek(t *testing.T) {
f := buildTestTable(t, "k", 10000)
table, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
var data = []struct {
in string
valid bool
out string
}{
{"abc", true, "k0000"},
{"k0100", true, "k0100"},
{"k0100b", true, "k0101"}, // Test case where we jump to next block.
{"k1234", true, "k1234"},
{"k1234b", true, "k1235"},
{"k9999", true, "k9999"},
{"z", false, ""},
}
for _, tt := range data {
it.seek(y.KeyWithTs([]byte(tt.in), 0))
if !tt.valid {
require.False(t, it.Valid())
continue
}
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, tt.out, string(y.ParseKey(k)))
}
}
func TestSeekForPrev(t *testing.T) {
f := buildTestTable(t, "k", 10000)
table, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
var data = []struct {
in string
valid bool
out string
}{
{"abc", false, ""},
{"k0100", true, "k0100"},
{"k0100b", true, "k0100"}, // Test case where we jump to next block.
{"k1234", true, "k1234"},
{"k1234b", true, "k1234"},
{"k9999", true, "k9999"},
{"z", true, "k9999"},
}
for _, tt := range data {
it.seekForPrev(y.KeyWithTs([]byte(tt.in), 0))
if !tt.valid {
require.False(t, it.Valid())
continue
}
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, tt.out, string(y.ParseKey(k)))
}
}
func TestIterateFromStart(t *testing.T) {
// Vary the number of elements added.
for _, n := range []int{99, 100, 101, 199, 200, 250, 9999, 10000} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer table.DecrRef()
ti := table.NewIterator(false)
defer ti.Close()
ti.reset()
ti.seekToFirst()
require.True(t, ti.Valid())
// No need to do a Next.
// ti.Seek brings us to the first key >= "". Essentially a SeekToFirst.
var count int
for ; ti.Valid(); ti.next() {
v := ti.Value()
require.EqualValues(t, fmt.Sprintf("%d", count), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
count++
}
require.EqualValues(t, n, count)
})
}
}
func TestIterateFromEnd(t *testing.T) {
// Vary the number of elements added.
for _, n := range []int{99, 100, 101, 199, 200, 250, 9999, 10000} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.FileIO, nil)
require.NoError(t, err)
defer table.DecrRef()
ti := table.NewIterator(false)
defer ti.Close()
ti.reset()
ti.seek(y.KeyWithTs([]byte("zzzzzz"), 0)) // Seek to end, an invalid element.
require.False(t, ti.Valid())
for i := n - 1; i >= 0; i-- {
ti.prev()
require.True(t, ti.Valid())
v := ti.Value()
require.EqualValues(t, fmt.Sprintf("%d", i), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
}
ti.prev()
require.False(t, ti.Valid())
})
}
}
func TestTable(t *testing.T) {
f := buildTestTable(t, "key", 10000)
table, err := OpenTable(f, options.FileIO, nil)
require.NoError(t, err)
defer table.DecrRef()
ti := table.NewIterator(false)
defer ti.Close()
kid := 1010
seek := y.KeyWithTs([]byte(key("key", kid)), 0)
for ti.seek(seek); ti.Valid(); ti.next() {
k := ti.Key()
require.EqualValues(t, string(y.ParseKey(k)), key("key", kid))
kid++
}
if kid != 10000 {
t.Errorf("Expected kid: 10000. Got: %v", kid)
}
ti.seek(y.KeyWithTs([]byte(key("key", 99999)), 0))
require.False(t, ti.Valid())
ti.seek(y.KeyWithTs([]byte(key("key", -1)), 0))
require.True(t, ti.Valid())
k := ti.Key()
require.EqualValues(t, string(y.ParseKey(k)), key("key", 0))
}
func TestIterateBackAndForth(t *testing.T) {
f := buildTestTable(t, "key", 10000)
table, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer table.DecrRef()
seek := y.KeyWithTs([]byte(key("key", 1010)), 0)
it := table.NewIterator(false)
defer it.Close()
it.seek(seek)
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, seek, k)
it.prev()
it.prev()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, key("key", 1008), string(y.ParseKey(k)))
it.next()
it.next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, key("key", 1010), y.ParseKey(k))
it.seek(y.KeyWithTs([]byte(key("key", 2000)), 0))
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, key("key", 2000), y.ParseKey(k))
it.prev()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, key("key", 1999), y.ParseKey(k))
it.seekToFirst()
k = it.Key()
require.EqualValues(t, key("key", 0), y.ParseKey(k))
}
func TestUniIterator(t *testing.T) {
f := buildTestTable(t, "key", 10000)
table, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer table.DecrRef()
{
it := table.NewIterator(false)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
v := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", count), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
count++
}
require.EqualValues(t, 10000, count)
}
{
it := table.NewIterator(true)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
v := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", 10000-1-count), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
count++
}
require.EqualValues(t, 10000, count)
}
}
// Try having only one table.
func TestConcatIteratorOneTable(t *testing.T) {
f := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
tbl, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer tbl.DecrRef()
it := NewConcatIterator([]*Table{tbl}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
}
func TestConcatIterator(t *testing.T) {
f := buildTestTable(t, "keya", 10000)
f2 := buildTestTable(t, "keyb", 10000)
f3 := buildTestTable(t, "keyc", 10000)
tbl, err := OpenTable(f, options.MemoryMap, nil)
require.NoError(t, err)
defer tbl.DecrRef()
tbl2, err := OpenTable(f2, options.LoadToRAM, nil)
require.NoError(t, err)
defer tbl2.DecrRef()
tbl3, err := OpenTable(f3, options.LoadToRAM, nil)
require.NoError(t, err)
defer tbl3.DecrRef()
{
it := NewConcatIterator([]*Table{tbl, tbl2, tbl3}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
var count int
for ; it.Valid(); it.Next() {
vs := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", count%10000), string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
count++
}
require.EqualValues(t, 30000, count)
it.Seek(y.KeyWithTs([]byte("a"), 0))
require.EqualValues(t, "keya0000", string(y.ParseKey(it.Key())))
vs := it.Value()
require.EqualValues(t, "0", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyb"), 0))
require.EqualValues(t, "keyb0000", string(y.ParseKey(it.Key())))
vs = it.Value()
require.EqualValues(t, "0", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyb9999b"), 0))
require.EqualValues(t, "keyc0000", string(y.ParseKey(it.Key())))
vs = it.Value()
require.EqualValues(t, "0", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyd"), 0))
require.False(t, it.Valid())
}
{
it := NewConcatIterator([]*Table{tbl, tbl2, tbl3}, true)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
var count int
for ; it.Valid(); it.Next() {
vs := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", 10000-(count%10000)-1), string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
count++
}
require.EqualValues(t, 30000, count)
it.Seek(y.KeyWithTs([]byte("a"), 0))
require.False(t, it.Valid())
it.Seek(y.KeyWithTs([]byte("keyb"), 0))
require.EqualValues(t, "keya9999", string(y.ParseKey(it.Key())))
vs := it.Value()
require.EqualValues(t, "9999", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyb9999b"), 0))
require.EqualValues(t, "keyb9999", string(y.ParseKey(it.Key())))
vs = it.Value()
require.EqualValues(t, "9999", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyd"), 0))
require.EqualValues(t, "keyc9999", string(y.ParseKey(it.Key())))
vs = it.Value()
require.EqualValues(t, "9999", string(vs.Value))
}
}
func TestMergingIterator(t *testing.T) {
f1 := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
f2 := buildTable(t, [][]string{
{"k1", "b1"},
{"k2", "b2"},
})
tbl1, err := OpenTable(f1, options.LoadToRAM, nil)
require.NoError(t, err)
defer tbl1.DecrRef()
tbl2, err := OpenTable(f2, options.LoadToRAM, nil)
require.NoError(t, err)
defer tbl2.DecrRef()
it1 := tbl1.NewIterator(false)
it2 := NewConcatIterator([]*Table{tbl2}, false)
it := y.NewMergeIterator([]y.Iterator{it1, it2}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, "k2", string(y.ParseKey(k)))
vs = it.Value()
require.EqualValues(t, "a2", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.False(t, it.Valid())
}
func TestMergingIteratorReversed(t *testing.T) {
f1 := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
f2 := buildTable(t, [][]string{
{"k1", "b1"},
{"k2", "b2"},
})
tbl1, err := OpenTable(f1, options.LoadToRAM, nil)
require.NoError(t, err)
defer tbl1.DecrRef()
tbl2, err := OpenTable(f2, options.LoadToRAM, nil)
require.NoError(t, err)
defer tbl2.DecrRef()
it1 := tbl1.NewIterator(true)
it2 := NewConcatIterator([]*Table{tbl2}, true)
it := y.NewMergeIterator([]y.Iterator{it1, it2}, true)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k2", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a2", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs = it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.False(t, it.Valid())
}
// Take only the first iterator.
func TestMergingIteratorTakeOne(t *testing.T) {
f1 := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
f2 := buildTable(t, [][]string{})
t1, err := OpenTable(f1, options.LoadToRAM, nil)
require.NoError(t, err)
defer t1.DecrRef()
t2, err := OpenTable(f2, options.LoadToRAM, nil)
require.NoError(t, err)
defer t2.DecrRef()
it1 := NewConcatIterator([]*Table{t1}, false)
it2 := NewConcatIterator([]*Table{t2}, false)
it := y.NewMergeIterator([]y.Iterator{it1, it2}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, "k2", string(y.ParseKey(k)))
vs = it.Value()
require.EqualValues(t, "a2", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.False(t, it.Valid())
}
// Take only the second iterator.
func TestMergingIteratorTakeTwo(t *testing.T) {
f1 := buildTable(t, [][]string{})
f2 := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
t1, err := OpenTable(f1, options.LoadToRAM, nil)
require.NoError(t, err)
defer t1.DecrRef()
t2, err := OpenTable(f2, options.LoadToRAM, nil)
require.NoError(t, err)
defer t2.DecrRef()
it1 := NewConcatIterator([]*Table{t1}, false)
it2 := NewConcatIterator([]*Table{t2}, false)
it := y.NewMergeIterator([]y.Iterator{it1, it2}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, "k2", string(y.ParseKey(k)))
vs = it.Value()
require.EqualValues(t, "a2", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.False(t, it.Valid())
}
func BenchmarkRead(b *testing.B) {
n := 5 << 20
builder := NewTableBuilder()
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
y.Check(err)
for i := 0; i < n; i++ {
k := fmt.Sprintf("%016x", i)
v := fmt.Sprintf("%d", i)
y.Check(builder.Add([]byte(k), y.ValueStruct{Value: []byte(v), Meta: 123, UserMeta: 0}))
}
f.Write(builder.Finish())
tbl, err := OpenTable(f, options.MemoryMap, nil)
y.Check(err)
defer tbl.DecrRef()
// y.Printf("Size of table: %d\n", tbl.Size())
b.ResetTimer()
// Iterate b.N times over the entire table.
for i := 0; i < b.N; i++ {
func() {
it := tbl.NewIterator(false)
defer it.Close()
for it.seekToFirst(); it.Valid(); it.next() {
}
}()
}
}
func BenchmarkReadAndBuild(b *testing.B) {
n := 5 << 20
builder := NewTableBuilder()
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
y.Check(err)
for i := 0; i < n; i++ {
k := fmt.Sprintf("%016x", i)
v := fmt.Sprintf("%d", i)
y.Check(builder.Add([]byte(k), y.ValueStruct{Value: []byte(v), Meta: 123, UserMeta: 0}))
}
f.Write(builder.Finish())
tbl, err := OpenTable(f, options.MemoryMap, nil)
y.Check(err)
defer tbl.DecrRef()
// y.Printf("Size of table: %d\n", tbl.Size())
b.ResetTimer()
// Iterate b.N times over the entire table.
for i := 0; i < b.N; i++ {
func() {
newBuilder := NewTableBuilder()
it := tbl.NewIterator(false)
defer it.Close()
for it.seekToFirst(); it.Valid(); it.next() {
vs := it.Value()
newBuilder.Add(it.Key(), vs)
}
newBuilder.Finish()
}()
}
}
func BenchmarkReadMerged(b *testing.B) {
n := 5 << 20
m := 5 // Number of tables.
y.AssertTrue((n % m) == 0)
tableSize := n / m
var tables []*Table
for i := 0; i < m; i++ {
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
builder := NewTableBuilder()
f, err := y.OpenSyncedFile(filename, true)
y.Check(err)
for j := 0; j < tableSize; j++ {
id := j*m + i // Arrays are interleaved.
// id := i*tableSize+j (not interleaved)
k := fmt.Sprintf("%016x", id)
v := fmt.Sprintf("%d", id)
y.Check(builder.Add([]byte(k), y.ValueStruct{Value: []byte(v), Meta: 123, UserMeta: 0}))
}
f.Write(builder.Finish())
tbl, err := OpenTable(f, options.MemoryMap, nil)
y.Check(err)
tables = append(tables, tbl)
defer tbl.DecrRef()
}
b.ResetTimer()
// Iterate b.N times over the entire table.
for i := 0; i < b.N; i++ {
func() {
var iters []y.Iterator
for _, tbl := range tables {
iters = append(iters, tbl.NewIterator(false))
}
it := y.NewMergeIterator(iters, false)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
}
}()
}
}

24
vendor/github.com/dgraph-io/badger/test.sh generated vendored Normal file
View File

@ -0,0 +1,24 @@
#!/bin/bash
set -e
# Ensure that we can compile the binary.
pushd badger
go build -v .
popd
# Run the memory intensive tests first.
go test -v --manual=true -run='TestBigKeyValuePairs$'
go test -v --manual=true -run='TestPushValueLogLimit'
# Run the special Truncate test.
rm -R p || true
go test -v --manual=true -run='TestTruncateVlogNoClose$' .
truncate --size=4096 p/000000.vlog
go test -v --manual=true -run='TestTruncateVlogNoClose2$' .
go test -v --manual=true -run='TestTruncateVlogNoClose3$' .
rm -R p
# Then the normal tests.
go test -v --vlog_mmap=true -race ./...
go test -v --vlog_mmap=false -race ./...

753
vendor/github.com/dgraph-io/badger/txn.go generated vendored Normal file
View File

@ -0,0 +1,753 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"context"
"encoding/hex"
"math"
"sort"
"strconv"
"sync"
"sync/atomic"
"time"
farm "gx/ipfs/QmRFFHk2jw9tgjxv12bCuuTnSbVXxEvYQkuNCLMEv9eUwP/go-farm"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
type oracle struct {
// A 64-bit integer must be at the top for memory alignment. See issue #311.
refCount int64
isManaged bool // Does not change value, so no locking required.
sync.Mutex // For nextTxnTs and commits.
// writeChLock lock is for ensuring that transactions go to the write
// channel in the same order as their commit timestamps.
writeChLock sync.Mutex
nextTxnTs uint64
// Used to block NewTransaction, so all previous commits are visible to a new read.
txnMark *y.WaterMark
// Either of these is used to determine which versions can be permanently
// discarded during compaction.
discardTs uint64 // Used by ManagedDB.
readMark *y.WaterMark // Used by DB.
// commits stores a key fingerprint and latest commit counter for it.
// refCount is used to clear out commits map to avoid a memory blowup.
commits map[uint64]uint64
// closer is used to stop watermarks.
closer *y.Closer
}
func newOracle(opt Options) *oracle {
orc := &oracle{
isManaged: opt.managedTxns,
commits: make(map[uint64]uint64),
// We're not initializing nextTxnTs and readOnlyTs. It would be done after replay in Open.
//
// WaterMarks must be 64-bit aligned for atomic package, hence we must use pointers here.
// See https://golang.org/pkg/sync/atomic/#pkg-note-BUG.
readMark: &y.WaterMark{Name: "badger.PendingReads"},
txnMark: &y.WaterMark{Name: "badger.TxnTimestamp"},
closer: y.NewCloser(2),
}
orc.readMark.Init(orc.closer)
orc.txnMark.Init(orc.closer)
return orc
}
func (o *oracle) Stop() {
o.closer.SignalAndWait()
}
func (o *oracle) addRef() {
atomic.AddInt64(&o.refCount, 1)
}
func (o *oracle) decrRef() {
if atomic.AddInt64(&o.refCount, -1) != 0 {
return
}
// Clear out commits maps to release memory.
o.Lock()
defer o.Unlock()
// Avoids the race where something new is added to commitsMap
// after we check refCount and before we take Lock.
if atomic.LoadInt64(&o.refCount) != 0 {
return
}
if len(o.commits) >= 1000 { // If the map is still small, let it slide.
o.commits = make(map[uint64]uint64)
}
}
func (o *oracle) readTs() uint64 {
if o.isManaged {
panic("ReadTs should not be retrieved for managed DB")
}
var readTs uint64
o.Lock()
readTs = o.nextTxnTs - 1
o.readMark.Begin(readTs)
o.Unlock()
// Wait for all txns which have no conflicts, have been assigned a commit
// timestamp and are going through the write to value log and LSM tree
// process. Not waiting here could mean that some txns which have been
// committed would not be read.
y.Check(o.txnMark.WaitForMark(context.Background(), readTs))
return readTs
}
func (o *oracle) nextTs() uint64 {
o.Lock()
defer o.Unlock()
return o.nextTxnTs
}
// Any deleted or invalid versions at or below ts would be discarded during
// compaction to reclaim disk space in LSM tree and thence value log.
func (o *oracle) setDiscardTs(ts uint64) {
o.Lock()
defer o.Unlock()
o.discardTs = ts
}
func (o *oracle) discardAtOrBelow() uint64 {
if o.isManaged {
o.Lock()
defer o.Unlock()
return o.discardTs
}
return o.readMark.DoneUntil()
}
// hasConflict must be called while having a lock.
func (o *oracle) hasConflict(txn *Txn) bool {
if len(txn.reads) == 0 {
return false
}
for _, ro := range txn.reads {
// A commit at the read timestamp is expected.
// But, any commit after the read timestamp should cause a conflict.
if ts, has := o.commits[ro]; has && ts > txn.readTs {
return true
}
}
return false
}
func (o *oracle) newCommitTs(txn *Txn) uint64 {
o.Lock()
defer o.Unlock()
if o.hasConflict(txn) {
return 0
}
var ts uint64
if !o.isManaged {
// This is the general case, when user doesn't specify the read and commit ts.
ts = o.nextTxnTs
o.nextTxnTs++
o.txnMark.Begin(ts)
} else {
// If commitTs is set, use it instead.
ts = txn.commitTs
}
for _, w := range txn.writes {
o.commits[w] = ts // Update the commitTs.
}
return ts
}
func (o *oracle) doneCommit(cts uint64) {
if o.isManaged {
// No need to update anything.
return
}
o.txnMark.Done(cts)
}
// Txn represents a Badger transaction.
type Txn struct {
readTs uint64
commitTs uint64
update bool // update is used to conditionally keep track of reads.
reads []uint64 // contains fingerprints of keys read.
writes []uint64 // contains fingerprints of keys written.
pendingWrites map[string]*Entry // cache stores any writes done by txn.
db *DB
discarded bool
size int64
count int64
numIterators int32
}
type pendingWritesIterator struct {
entries []*Entry
nextIdx int
readTs uint64
reversed bool
}
func (pi *pendingWritesIterator) Next() {
pi.nextIdx++
}
func (pi *pendingWritesIterator) Rewind() {
pi.nextIdx = 0
}
func (pi *pendingWritesIterator) Seek(key []byte) {
key = y.ParseKey(key)
pi.nextIdx = sort.Search(len(pi.entries), func(idx int) bool {
cmp := bytes.Compare(pi.entries[idx].Key, key)
if !pi.reversed {
return cmp >= 0
}
return cmp <= 0
})
}
func (pi *pendingWritesIterator) Key() []byte {
y.AssertTrue(pi.Valid())
entry := pi.entries[pi.nextIdx]
return y.KeyWithTs(entry.Key, pi.readTs)
}
func (pi *pendingWritesIterator) Value() y.ValueStruct {
y.AssertTrue(pi.Valid())
entry := pi.entries[pi.nextIdx]
return y.ValueStruct{
Value: entry.Value,
Meta: entry.meta,
UserMeta: entry.UserMeta,
ExpiresAt: entry.ExpiresAt,
Version: pi.readTs,
}
}
func (pi *pendingWritesIterator) Valid() bool {
return pi.nextIdx < len(pi.entries)
}
func (pi *pendingWritesIterator) Close() error {
return nil
}
func (txn *Txn) newPendingWritesIterator(reversed bool) *pendingWritesIterator {
if !txn.update || len(txn.pendingWrites) == 0 {
return nil
}
entries := make([]*Entry, 0, len(txn.pendingWrites))
for _, e := range txn.pendingWrites {
entries = append(entries, e)
}
// Number of pending writes per transaction shouldn't be too big in general.
sort.Slice(entries, func(i, j int) bool {
cmp := bytes.Compare(entries[i].Key, entries[j].Key)
if !reversed {
return cmp < 0
}
return cmp > 0
})
return &pendingWritesIterator{
readTs: txn.readTs,
entries: entries,
reversed: reversed,
}
}
func (txn *Txn) checkSize(e *Entry) error {
count := txn.count + 1
// Extra bytes for version in key.
size := txn.size + int64(e.estimateSize(txn.db.opt.ValueThreshold)) + 10
if count >= txn.db.opt.maxBatchCount || size >= txn.db.opt.maxBatchSize {
return ErrTxnTooBig
}
txn.count, txn.size = count, size
return nil
}
// Set adds a key-value pair to the database.
//
// It will return ErrReadOnlyTxn if update flag was set to false when creating the
// transaction.
//
// The current transaction keeps a reference to the key and val byte slice
// arguments. Users must not modify key and val until the end of the transaction.
func (txn *Txn) Set(key, val []byte) error {
e := &Entry{
Key: key,
Value: val,
}
return txn.SetEntry(e)
}
// SetWithMeta adds a key-value pair to the database, along with a metadata
// byte.
//
// This byte is stored alongside the key, and can be used as an aid to
// interpret the value or store other contextual bits corresponding to the
// key-value pair.
//
// The current transaction keeps a reference to the key and val byte slice
// arguments. Users must not modify key and val until the end of the transaction.
func (txn *Txn) SetWithMeta(key, val []byte, meta byte) error {
e := &Entry{Key: key, Value: val, UserMeta: meta}
return txn.SetEntry(e)
}
// SetWithDiscard acts like SetWithMeta, but adds a marker to discard earlier
// versions of the key.
//
// This method is only useful if you have set a higher limit for
// options.NumVersionsToKeep. The default setting is 1, in which case, this
// function doesn't add any more benefit than just calling the normal
// SetWithMeta (or Set) function. If however, you have a higher setting for
// NumVersionsToKeep (in Dgraph, we set it to infinity), you can use this method
// to indicate that all the older versions can be discarded and removed during
// compactions.
//
// The current transaction keeps a reference to the key and val byte slice
// arguments. Users must not modify key and val until the end of the
// transaction.
func (txn *Txn) SetWithDiscard(key, val []byte, meta byte) error {
e := &Entry{
Key: key,
Value: val,
UserMeta: meta,
meta: bitDiscardEarlierVersions,
}
return txn.SetEntry(e)
}
// SetWithTTL adds a key-value pair to the database, along with a time-to-live
// (TTL) setting. A key stored with a TTL would automatically expire after the
// time has elapsed , and be eligible for garbage collection.
//
// The current transaction keeps a reference to the key and val byte slice
// arguments. Users must not modify key and val until the end of the
// transaction.
func (txn *Txn) SetWithTTL(key, val []byte, dur time.Duration) error {
expire := time.Now().Add(dur).Unix()
e := &Entry{Key: key, Value: val, ExpiresAt: uint64(expire)}
return txn.SetEntry(e)
}
func exceedsSize(prefix string, max int64, key []byte) error {
return errors.Errorf("%s with size %d exceeded %d limit. %s:\n%s",
prefix, len(key), max, prefix, hex.Dump(key[:1<<10]))
}
func (txn *Txn) modify(e *Entry) error {
const maxKeySize = 65000
switch {
case !txn.update:
return ErrReadOnlyTxn
case txn.discarded:
return ErrDiscardedTxn
case len(e.Key) == 0:
return ErrEmptyKey
case bytes.HasPrefix(e.Key, badgerPrefix):
return ErrInvalidKey
case len(e.Key) > maxKeySize:
// Key length can't be more than uint16, as determined by table::header. To
// keep things safe and allow badger move prefix and a timestamp suffix, let's
// cut it down to 65000, instead of using 65536.
return exceedsSize("Key", maxKeySize, e.Key)
case int64(len(e.Value)) > txn.db.opt.ValueLogFileSize:
return exceedsSize("Value", txn.db.opt.ValueLogFileSize, e.Value)
}
if err := txn.checkSize(e); err != nil {
return err
}
fp := farm.Fingerprint64(e.Key) // Avoid dealing with byte arrays.
txn.writes = append(txn.writes, fp)
txn.pendingWrites[string(e.Key)] = e
return nil
}
// SetEntry takes an Entry struct and adds the key-value pair in the struct,
// along with other metadata to the database.
//
// The current transaction keeps a reference to the entry passed in argument.
// Users must not modify the entry until the end of the transaction.
func (txn *Txn) SetEntry(e *Entry) error {
return txn.modify(e)
}
// Delete deletes a key.
//
// This is done by adding a delete marker for the key at commit timestamp. Any
// reads happening before this timestamp would be unaffected. Any reads after
// this commit would see the deletion.
//
// The current transaction keeps a reference to the key byte slice argument.
// Users must not modify the key until the end of the transaction.
func (txn *Txn) Delete(key []byte) error {
e := &Entry{
Key: key,
meta: bitDelete,
}
return txn.modify(e)
}
// Get looks for key and returns corresponding Item.
// If key is not found, ErrKeyNotFound is returned.
func (txn *Txn) Get(key []byte) (item *Item, rerr error) {
if len(key) == 0 {
return nil, ErrEmptyKey
} else if txn.discarded {
return nil, ErrDiscardedTxn
}
item = new(Item)
if txn.update {
if e, has := txn.pendingWrites[string(key)]; has && bytes.Equal(key, e.Key) {
if isDeletedOrExpired(e.meta, e.ExpiresAt) {
return nil, ErrKeyNotFound
}
// Fulfill from cache.
item.meta = e.meta
item.val = e.Value
item.userMeta = e.UserMeta
item.key = key
item.status = prefetched
item.version = txn.readTs
item.expiresAt = e.ExpiresAt
// We probably don't need to set db on item here.
return item, nil
}
// Only track reads if this is update txn. No need to track read if txn serviced it
// internally.
txn.addReadKey(key)
}
seek := y.KeyWithTs(key, txn.readTs)
vs, err := txn.db.get(seek)
if err != nil {
return nil, errors.Wrapf(err, "DB::Get key: %q", key)
}
if vs.Value == nil && vs.Meta == 0 {
return nil, ErrKeyNotFound
}
if isDeletedOrExpired(vs.Meta, vs.ExpiresAt) {
return nil, ErrKeyNotFound
}
item.key = key
item.version = vs.Version
item.meta = vs.Meta
item.userMeta = vs.UserMeta
item.db = txn.db
item.vptr = vs.Value // TODO: Do we need to copy this over?
item.txn = txn
item.expiresAt = vs.ExpiresAt
return item, nil
}
func (txn *Txn) addReadKey(key []byte) {
if txn.update {
fp := farm.Fingerprint64(key)
txn.reads = append(txn.reads, fp)
}
}
// Discard discards a created transaction. This method is very important and must be called. Commit
// method calls this internally, however, calling this multiple times doesn't cause any issues. So,
// this can safely be called via a defer right when transaction is created.
//
// NOTE: If any operations are run on a discarded transaction, ErrDiscardedTxn is returned.
func (txn *Txn) Discard() {
if txn.discarded { // Avoid a re-run.
return
}
if atomic.LoadInt32(&txn.numIterators) > 0 {
panic("Unclosed iterator at time of Txn.Discard.")
}
txn.discarded = true
if !txn.db.orc.isManaged {
txn.db.orc.readMark.Done(txn.readTs)
}
if txn.update {
txn.db.orc.decrRef()
}
}
func (txn *Txn) commitAndSend() (func() error, error) {
orc := txn.db.orc
// Ensure that the order in which we get the commit timestamp is the same as
// the order in which we push these updates to the write channel. So, we
// acquire a writeChLock before getting a commit timestamp, and only release
// it after pushing the entries to it.
orc.writeChLock.Lock()
defer orc.writeChLock.Unlock()
commitTs := orc.newCommitTs(txn)
if commitTs == 0 {
return nil, ErrConflict
}
// The following debug information is what led to determining the cause of
// bank txn violation bug, and it took a whole bunch of effort to narrow it
// down to here. So, keep this around for at least a couple of months.
// var b strings.Builder
// fmt.Fprintf(&b, "Read: %d. Commit: %d. reads: %v. writes: %v. Keys: ",
// txn.readTs, commitTs, txn.reads, txn.writes)
entries := make([]*Entry, 0, len(txn.pendingWrites)+1)
for _, e := range txn.pendingWrites {
// fmt.Fprintf(&b, "[%q : %q], ", e.Key, e.Value)
// Suffix the keys with commit ts, so the key versions are sorted in
// descending order of commit timestamp.
e.Key = y.KeyWithTs(e.Key, commitTs)
e.meta |= bitTxn
entries = append(entries, e)
}
// log.Printf("%s\n", b.String())
e := &Entry{
Key: y.KeyWithTs(txnKey, commitTs),
Value: []byte(strconv.FormatUint(commitTs, 10)),
meta: bitFinTxn,
}
entries = append(entries, e)
req, err := txn.db.sendToWriteCh(entries)
if err != nil {
orc.doneCommit(commitTs)
return nil, err
}
ret := func() error {
err := req.Wait()
// Wait before marking commitTs as done.
// We can't defer doneCommit above, because it is being called from a
// callback here.
orc.doneCommit(commitTs)
return err
}
return ret, nil
}
func (txn *Txn) commitPrecheck() {
if txn.commitTs == 0 && txn.db.opt.managedTxns {
panic("Commit cannot be called with managedDB=true. Use CommitAt.")
}
if txn.discarded {
panic("Trying to commit a discarded txn")
}
}
// Commit commits the transaction, following these steps:
//
// 1. If there are no writes, return immediately.
//
// 2. Check if read rows were updated since txn started. If so, return ErrConflict.
//
// 3. If no conflict, generate a commit timestamp and update written rows' commit ts.
//
// 4. Batch up all writes, write them to value log and LSM tree.
//
// 5. If callback is provided, Badger will return immediately after checking
// for conflicts. Writes to the database will happen in the background. If
// there is a conflict, an error will be returned and the callback will not
// run. If there are no conflicts, the callback will be called in the
// background upon successful completion of writes or any error during write.
//
// If error is nil, the transaction is successfully committed. In case of a non-nil error, the LSM
// tree won't be updated, so there's no need for any rollback.
func (txn *Txn) Commit() error {
txn.commitPrecheck() // Precheck before discarding txn.
defer txn.Discard()
if len(txn.writes) == 0 {
return nil // Nothing to do.
}
txnCb, err := txn.commitAndSend()
if err != nil {
return err
}
// If batchSet failed, LSM would not have been updated. So, no need to rollback anything.
// TODO: What if some of the txns successfully make it to value log, but others fail.
// Nothing gets updated to LSM, until a restart happens.
return txnCb()
}
type txnCb struct {
commit func() error
user func(error)
err error
}
func runTxnCallback(cb *txnCb) {
switch {
case cb == nil:
panic("txn callback is nil")
case cb.user == nil:
panic("Must have caught a nil callback for txn.CommitWith")
case cb.err != nil:
cb.user(cb.err)
case cb.commit != nil:
err := cb.commit()
cb.user(err)
default:
cb.user(nil)
}
}
// CommitWith acts like Commit, but takes a callback, which gets run via a
// goroutine to avoid blocking this function. The callback is guaranteed to run,
// so it is safe to increment sync.WaitGroup before calling CommitWith, and
// decrementing it in the callback; to block until all callbacks are run.
func (txn *Txn) CommitWith(cb func(error)) {
txn.commitPrecheck() // Precheck before discarding txn.
defer txn.Discard()
if cb == nil {
panic("Nil callback provided to CommitWith")
}
if len(txn.writes) == 0 {
// Do not run these callbacks from here, because the CommitWith and the
// callback might be acquiring the same locks. Instead run the callback
// from another goroutine.
go runTxnCallback(&txnCb{user: cb, err: nil})
return
}
commitCb, err := txn.commitAndSend()
if err != nil {
go runTxnCallback(&txnCb{user: cb, err: err})
return
}
go runTxnCallback(&txnCb{user: cb, commit: commitCb})
}
// ReadTs returns the read timestamp of the transaction.
func (txn *Txn) ReadTs() uint64 {
return txn.readTs
}
// NewTransaction creates a new transaction. Badger supports concurrent execution of transactions,
// providing serializable snapshot isolation, avoiding write skews. Badger achieves this by tracking
// the keys read and at Commit time, ensuring that these read keys weren't concurrently modified by
// another transaction.
//
// For read-only transactions, set update to false. In this mode, we don't track the rows read for
// any changes. Thus, any long running iterations done in this mode wouldn't pay this overhead.
//
// Running transactions concurrently is OK. However, a transaction itself isn't thread safe, and
// should only be run serially. It doesn't matter if a transaction is created by one goroutine and
// passed down to other, as long as the Txn APIs are called serially.
//
// When you create a new transaction, it is absolutely essential to call
// Discard(). This should be done irrespective of what the update param is set
// to. Commit API internally runs Discard, but running it twice wouldn't cause
// any issues.
//
// txn := db.NewTransaction(false)
// defer txn.Discard()
// // Call various APIs.
func (db *DB) NewTransaction(update bool) *Txn {
return db.newTransaction(update, false)
}
func (db *DB) newTransaction(update, isManaged bool) *Txn {
if db.opt.ReadOnly && update {
// DB is read-only, force read-only transaction.
update = false
}
txn := &Txn{
update: update,
db: db,
count: 1, // One extra entry for BitFin.
size: int64(len(txnKey) + 10), // Some buffer for the extra entry.
}
if update {
txn.pendingWrites = make(map[string]*Entry)
txn.db.orc.addRef()
}
// It is important that the oracle addRef happens BEFORE we retrieve a read
// timestamp. Otherwise, it is possible that the oracle commit map would
// become nil after we get the read timestamp.
// The sequence of events can be:
// 1. This txn gets a read timestamp.
// 2. Another txn working on the same keyset commits them, and decrements
// the reference to oracle.
// 3. Oracle ref reaches zero, resetting commit map.
// 4. This txn increments the oracle reference.
// 5. Now this txn would go on to commit the keyset, and no conflicts
// would be detected.
// See issue: https://github.com/dgraph-io/badger/issues/574
if !isManaged {
txn.readTs = db.orc.readTs()
}
return txn
}
// View executes a function creating and managing a read-only transaction for the user. Error
// returned by the function is relayed by the View method.
// If View is used with managed transactions, it would assume a read timestamp of MaxUint64.
func (db *DB) View(fn func(txn *Txn) error) error {
var txn *Txn
if db.opt.managedTxns {
txn = db.NewTransactionAt(math.MaxUint64, false)
} else {
txn = db.NewTransaction(false)
}
defer txn.Discard()
return fn(txn)
}
// Update executes a function, creating and managing a read-write transaction
// for the user. Error returned by the function is relayed by the Update method.
// Update cannot be used with managed transactions.
func (db *DB) Update(fn func(txn *Txn) error) error {
if db.opt.managedTxns {
panic("Update can only be used with managedDB=false.")
}
txn := db.NewTransaction(true)
defer txn.Discard()
if err := fn(txn); err != nil {
return err
}
return txn.Commit()
}

845
vendor/github.com/dgraph-io/badger/txn_test.go generated vendored Normal file
View File

@ -0,0 +1,845 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"io/ioutil"
"math/rand"
"os"
"strconv"
"sync"
"testing"
"time"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"github.com/stretchr/testify/require"
)
func TestTxnSimple(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
txn := db.NewTransaction(true)
for i := 0; i < 10; i++ {
k := []byte(fmt.Sprintf("key=%d", i))
v := []byte(fmt.Sprintf("val=%d", i))
txn.Set(k, v)
}
item, err := txn.Get([]byte("key=8"))
require.NoError(t, err)
require.NoError(t, item.Value(func(val []byte) error {
require.Equal(t, []byte("val=8"), val)
return nil
}))
require.Panics(t, func() { txn.CommitAt(100, nil) })
require.NoError(t, txn.Commit())
})
}
func TestTxnReadAfterWrite(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
var wg sync.WaitGroup
N := 100
wg.Add(N)
for i := 0; i < N; i++ {
go func(i int) {
defer wg.Done()
key := []byte(fmt.Sprintf("key%d", i))
err := db.Update(func(tx *Txn) error {
return tx.Set(key, key)
})
require.NoError(t, err)
err = db.View(func(tx *Txn) error {
item, err := tx.Get(key)
require.NoError(t, err)
val, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, val, key)
return nil
})
require.NoError(t, err)
}(i)
}
wg.Wait()
})
}
func TestTxnCommitAsync(t *testing.T) {
key := func(i int) []byte {
return []byte(fmt.Sprintf("key=%d", i))
}
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
txn := db.NewTransaction(true)
for i := 0; i < 40; i++ {
err := txn.Set(key(i), []byte(strconv.Itoa(100)))
require.NoError(t, err)
}
require.NoError(t, txn.Commit())
txn.Discard()
closer := y.NewCloser(1)
go func() {
defer closer.Done()
for {
select {
case <-closer.HasBeenClosed():
return
default:
}
// Keep checking balance variant
txn := db.NewTransaction(false)
totalBalance := 0
for i := 0; i < 40; i++ {
item, err := txn.Get(key(i))
require.NoError(t, err)
val, err := item.ValueCopy(nil)
require.NoError(t, err)
bal, err := strconv.Atoi(string(val))
require.NoError(t, err)
totalBalance += bal
}
require.Equal(t, totalBalance, 4000)
txn.Discard()
}
}()
var wg sync.WaitGroup
wg.Add(100)
for i := 0; i < 100; i++ {
go func() {
txn := db.NewTransaction(true)
delta := rand.Intn(100)
for i := 0; i < 20; i++ {
err := txn.Set(key(i), []byte(strconv.Itoa(100-delta)))
require.NoError(t, err)
}
for i := 20; i < 40; i++ {
err := txn.Set(key(i), []byte(strconv.Itoa(100+delta)))
require.NoError(t, err)
}
// We are only doing writes, so there won't be any conflicts.
txn.CommitWith(func(err error) {})
txn.Discard()
wg.Done()
}()
}
wg.Wait()
closer.SignalAndWait()
time.Sleep(time.Millisecond * 10) // allow goroutine to complete.
})
}
func TestTxnVersions(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
k := []byte("key")
for i := 1; i < 10; i++ {
txn := db.NewTransaction(true)
txn.Set(k, []byte(fmt.Sprintf("valversion=%d", i)))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(i), db.orc.readTs())
}
checkIterator := func(itr *Iterator, i int) {
defer itr.Close()
count := 0
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
require.Equal(t, k, item.Key())
val, err := item.ValueCopy(nil)
require.NoError(t, err)
exp := fmt.Sprintf("valversion=%d", i)
require.Equal(t, exp, string(val), "i=%d", i)
count++
}
require.Equal(t, 1, count, "i=%d", i) // Should only loop once.
}
checkAllVersions := func(itr *Iterator, i int) {
var version uint64
if itr.opt.Reverse {
version = 1
} else {
version = uint64(i)
}
count := 0
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
require.Equal(t, k, item.Key())
require.Equal(t, version, item.Version())
val, err := item.ValueCopy(nil)
require.NoError(t, err)
exp := fmt.Sprintf("valversion=%d", version)
require.Equal(t, exp, string(val), "v=%d", version)
count++
if itr.opt.Reverse {
version++
} else {
version--
}
}
require.Equal(t, i, count, "i=%d", i) // Should loop as many times as i.
}
for i := 1; i < 10; i++ {
txn := db.NewTransaction(true)
txn.readTs = uint64(i) // Read version at i.
item, err := txn.Get(k)
require.NoError(t, err)
val, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, []byte(fmt.Sprintf("valversion=%d", i)), val,
"Expected versions to match up at i=%d", i)
// Try retrieving the latest version forward and reverse.
itr := txn.NewIterator(DefaultIteratorOptions)
checkIterator(itr, i)
opt := DefaultIteratorOptions
opt.Reverse = true
itr = txn.NewIterator(opt)
checkIterator(itr, i)
// Now try retrieving all versions forward and reverse.
opt = DefaultIteratorOptions
opt.AllVersions = true
itr = txn.NewIterator(opt)
checkAllVersions(itr, i)
itr.Close()
opt = DefaultIteratorOptions
opt.AllVersions = true
opt.Reverse = true
itr = txn.NewIterator(opt)
checkAllVersions(itr, i)
itr.Close()
txn.Discard()
}
txn := db.NewTransaction(true)
defer txn.Discard()
item, err := txn.Get(k)
require.NoError(t, err)
val, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, []byte("valversion=9"), val)
})
}
func TestTxnWriteSkew(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// Accounts
ax := []byte("x")
ay := []byte("y")
// Set balance to $100 in each account.
txn := db.NewTransaction(true)
defer txn.Discard()
val := []byte(strconv.Itoa(100))
txn.Set(ax, val)
txn.Set(ay, val)
require.NoError(t, txn.Commit())
require.Equal(t, uint64(1), db.orc.readTs())
getBal := func(txn *Txn, key []byte) (bal int) {
item, err := txn.Get(key)
require.NoError(t, err)
val, err := item.ValueCopy(nil)
require.NoError(t, err)
bal, err = strconv.Atoi(string(val))
require.NoError(t, err)
return bal
}
// Start two transactions, each would read both accounts and deduct from one account.
txn1 := db.NewTransaction(true)
sum := getBal(txn1, ax)
sum += getBal(txn1, ay)
require.Equal(t, 200, sum)
txn1.Set(ax, []byte("0")) // Deduct 100 from ax.
// Let's read this back.
sum = getBal(txn1, ax)
require.Equal(t, 0, sum)
sum += getBal(txn1, ay)
require.Equal(t, 100, sum)
// Don't commit yet.
txn2 := db.NewTransaction(true)
sum = getBal(txn2, ax)
sum += getBal(txn2, ay)
require.Equal(t, 200, sum)
txn2.Set(ay, []byte("0")) // Deduct 100 from ay.
// Let's read this back.
sum = getBal(txn2, ax)
require.Equal(t, 100, sum)
sum += getBal(txn2, ay)
require.Equal(t, 100, sum)
// Commit both now.
require.NoError(t, txn1.Commit())
require.Error(t, txn2.Commit()) // This should fail.
require.Equal(t, uint64(2), db.orc.readTs())
})
}
// a3, a2, b4 (del), b3, c2, c1
// Read at ts=4 -> a3, c2
// Read at ts=4(Uncommitted) -> a3, b4
// Read at ts=3 -> a3, b3, c2
// Read at ts=2 -> a2, c2
// Read at ts=1 -> c1
func TestTxnIterationEdgeCase(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
ka := []byte("a")
kb := []byte("b")
kc := []byte("c")
// c1
txn := db.NewTransaction(true)
txn.Set(kc, []byte("c1"))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(1), db.orc.readTs())
// a2, c2
txn = db.NewTransaction(true)
txn.Set(ka, []byte("a2"))
txn.Set(kc, []byte("c2"))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(2), db.orc.readTs())
// b3
txn = db.NewTransaction(true)
txn.Set(ka, []byte("a3"))
txn.Set(kb, []byte("b3"))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(3), db.orc.readTs())
// b4, c4(del) (Uncommitted)
txn4 := db.NewTransaction(true)
require.NoError(t, txn4.Set(kb, []byte("b4")))
require.NoError(t, txn4.Delete(kc))
require.Equal(t, uint64(3), db.orc.readTs())
// b4 (del)
txn = db.NewTransaction(true)
txn.Delete(kb)
require.NoError(t, txn.Commit())
require.Equal(t, uint64(4), db.orc.readTs())
checkIterator := func(itr *Iterator, expected []string) {
defer itr.Close()
var i int
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
val, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, expected[i], string(val), "readts=%d", itr.readTs)
i++
}
require.Equal(t, len(expected), i)
}
txn = db.NewTransaction(true)
defer txn.Discard()
itr := txn.NewIterator(DefaultIteratorOptions)
itr5 := txn4.NewIterator(DefaultIteratorOptions)
checkIterator(itr, []string{"a3", "c2"})
checkIterator(itr5, []string{"a3", "b4"})
rev := DefaultIteratorOptions
rev.Reverse = true
itr = txn.NewIterator(rev)
itr5 = txn4.NewIterator(rev)
checkIterator(itr, []string{"c2", "a3"})
checkIterator(itr5, []string{"b4", "a3"})
txn.readTs = 3
itr = txn.NewIterator(DefaultIteratorOptions)
checkIterator(itr, []string{"a3", "b3", "c2"})
itr = txn.NewIterator(rev)
checkIterator(itr, []string{"c2", "b3", "a3"})
txn.readTs = 2
itr = txn.NewIterator(DefaultIteratorOptions)
checkIterator(itr, []string{"a2", "c2"})
itr = txn.NewIterator(rev)
checkIterator(itr, []string{"c2", "a2"})
txn.readTs = 1
itr = txn.NewIterator(DefaultIteratorOptions)
checkIterator(itr, []string{"c1"})
itr = txn.NewIterator(rev)
checkIterator(itr, []string{"c1"})
})
}
// a2, a3, b4 (del), b3, c2, c1
// Read at ts=4 -> a3, c2
// Read at ts=3 -> a3, b3, c2
// Read at ts=2 -> a2, c2
// Read at ts=1 -> c1
func TestTxnIterationEdgeCase2(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
ka := []byte("a")
kb := []byte("aa")
kc := []byte("aaa")
// c1
txn := db.NewTransaction(true)
txn.Set(kc, []byte("c1"))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(1), db.orc.readTs())
// a2, c2
txn = db.NewTransaction(true)
txn.Set(ka, []byte("a2"))
txn.Set(kc, []byte("c2"))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(2), db.orc.readTs())
// b3
txn = db.NewTransaction(true)
txn.Set(ka, []byte("a3"))
txn.Set(kb, []byte("b3"))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(3), db.orc.readTs())
// b4 (del)
txn = db.NewTransaction(true)
txn.Delete(kb)
require.NoError(t, txn.Commit())
require.Equal(t, uint64(4), db.orc.readTs())
checkIterator := func(itr *Iterator, expected []string) {
defer itr.Close()
var i int
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
val, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, expected[i], string(val), "readts=%d", itr.readTs)
i++
}
require.Equal(t, len(expected), i)
}
txn = db.NewTransaction(true)
defer txn.Discard()
rev := DefaultIteratorOptions
rev.Reverse = true
itr := txn.NewIterator(DefaultIteratorOptions)
checkIterator(itr, []string{"a3", "c2"})
itr = txn.NewIterator(rev)
checkIterator(itr, []string{"c2", "a3"})
txn.readTs = 5
itr = txn.NewIterator(DefaultIteratorOptions)
itr.Seek(ka)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), ka)
itr.Seek(kc)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kc)
itr.Close()
itr = txn.NewIterator(rev)
itr.Seek(ka)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), ka)
itr.Seek(kc)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kc)
itr.Close()
txn.readTs = 3
itr = txn.NewIterator(DefaultIteratorOptions)
checkIterator(itr, []string{"a3", "b3", "c2"})
itr = txn.NewIterator(rev)
checkIterator(itr, []string{"c2", "b3", "a3"})
txn.readTs = 2
itr = txn.NewIterator(DefaultIteratorOptions)
checkIterator(itr, []string{"a2", "c2"})
itr = txn.NewIterator(rev)
checkIterator(itr, []string{"c2", "a2"})
txn.readTs = 1
itr = txn.NewIterator(DefaultIteratorOptions)
checkIterator(itr, []string{"c1"})
itr = txn.NewIterator(rev)
checkIterator(itr, []string{"c1"})
})
}
func TestTxnIterationEdgeCase3(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
kb := []byte("abc")
kc := []byte("acd")
kd := []byte("ade")
// c1
txn := db.NewTransaction(true)
txn.Set(kc, []byte("c1"))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(1), db.orc.readTs())
// b2
txn = db.NewTransaction(true)
txn.Set(kb, []byte("b2"))
require.NoError(t, txn.Commit())
require.Equal(t, uint64(2), db.orc.readTs())
txn2 := db.NewTransaction(true)
require.NoError(t, txn2.Set(kd, []byte("d2")))
require.NoError(t, txn2.Delete(kc))
txn = db.NewTransaction(true)
defer txn.Discard()
rev := DefaultIteratorOptions
rev.Reverse = true
itr := txn.NewIterator(DefaultIteratorOptions)
itr.Seek([]byte("ab"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Seek([]byte("ac"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kc)
itr.Seek(nil)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Seek([]byte("ac"))
itr.Rewind()
itr.Seek(nil)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Seek([]byte("ac"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kc)
itr.Close()
// Keys: "abc", "ade"
// Read pending writes.
itr = txn2.NewIterator(DefaultIteratorOptions)
itr.Seek([]byte("ab"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Seek([]byte("ac"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kd)
itr.Seek(nil)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Seek([]byte("ac"))
itr.Rewind()
itr.Seek(nil)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Seek([]byte("ad"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kd)
itr.Close()
itr = txn.NewIterator(rev)
itr.Seek([]byte("ac"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Seek([]byte("ad"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kc)
itr.Seek(nil)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kc)
itr.Seek([]byte("ac"))
itr.Rewind()
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kc)
itr.Seek([]byte("ad"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kc)
itr.Close()
// Keys: "abc", "ade"
itr = txn2.NewIterator(rev)
itr.Seek([]byte("ad"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Seek([]byte("ae"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kd)
itr.Seek(nil)
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kd)
itr.Seek([]byte("ab"))
itr.Rewind()
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kd)
itr.Seek([]byte("ac"))
require.True(t, itr.Valid())
require.Equal(t, itr.item.Key(), kb)
itr.Close()
})
}
func TestIteratorAllVersionsWithDeleted(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// Write two keys
err := db.Update(func(txn *Txn) error {
txn.Set([]byte("answer1"), []byte("42"))
txn.Set([]byte("answer2"), []byte("43"))
return nil
})
require.NoError(t, err)
// Delete the specific key version from underlying db directly
err = db.View(func(txn *Txn) error {
item, err := txn.Get([]byte("answer1"))
require.NoError(t, err)
err = txn.db.batchSet([]*Entry{
{
Key: y.KeyWithTs(item.key, item.version),
meta: bitDelete,
},
})
require.NoError(t, err)
return err
})
require.NoError(t, err)
opts := DefaultIteratorOptions
opts.AllVersions = true
opts.PrefetchValues = false
// Verify that deleted shows up when AllVersions is set.
err = db.View(func(txn *Txn) error {
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
count++
item := it.Item()
if count == 1 {
require.Equal(t, []byte("answer1"), item.Key())
require.True(t, item.meta&bitDelete > 0)
} else {
require.Equal(t, []byte("answer2"), item.Key())
}
}
require.Equal(t, 2, count)
return nil
})
require.NoError(t, err)
})
}
func TestIteratorAllVersionsWithDeleted2(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// Set and delete alternatively
for i := 0; i < 4; i++ {
err := db.Update(func(txn *Txn) error {
if i%2 == 0 {
txn.Set([]byte("key"), []byte("value"))
return nil
}
txn.Delete([]byte("key"))
return nil
})
require.NoError(t, err)
}
opts := DefaultIteratorOptions
opts.AllVersions = true
opts.PrefetchValues = false
// Verify that deleted shows up when AllVersions is set.
err := db.View(func(txn *Txn) error {
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
require.Equal(t, []byte("key"), item.Key())
if count%2 != 0 {
val, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, val, []byte("value"))
} else {
require.True(t, item.meta&bitDelete > 0)
}
count++
}
require.Equal(t, 4, count)
return nil
})
require.NoError(t, err)
})
}
func TestManagedDB(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.managedTxns = true
db, err := Open(opt)
require.NoError(t, err)
defer db.Close()
key := func(i int) []byte {
return []byte(fmt.Sprintf("key-%02d", i))
}
val := func(i int) []byte {
return []byte(fmt.Sprintf("val-%d", i))
}
require.Panics(t, func() {
db.Update(func(tx *Txn) error { return nil })
})
err = db.View(func(tx *Txn) error { return nil })
require.NoError(t, err)
// Write data at t=3.
txn := db.NewTransactionAt(3, true)
for i := 0; i <= 3; i++ {
require.NoError(t, txn.Set(key(i), val(i)))
}
require.Panics(t, func() { txn.Commit() })
require.NoError(t, txn.CommitAt(3, nil))
// Read data at t=2.
txn = db.NewTransactionAt(2, false)
for i := 0; i <= 3; i++ {
_, err := txn.Get(key(i))
require.Equal(t, ErrKeyNotFound, err)
}
txn.Discard()
// Read data at t=3.
txn = db.NewTransactionAt(3, false)
for i := 0; i <= 3; i++ {
item, err := txn.Get(key(i))
require.NoError(t, err)
require.Equal(t, uint64(3), item.Version())
v, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, val(i), v)
}
txn.Discard()
// Write data at t=7.
txn = db.NewTransactionAt(6, true)
for i := 0; i <= 7; i++ {
_, err := txn.Get(key(i))
if err == nil {
continue // Don't overwrite existing keys.
}
require.NoError(t, txn.Set(key(i), val(i)))
}
require.NoError(t, txn.CommitAt(7, nil))
// Read data at t=9.
txn = db.NewTransactionAt(9, false)
for i := 0; i <= 9; i++ {
item, err := txn.Get(key(i))
if i <= 7 {
require.NoError(t, err)
} else {
require.Equal(t, ErrKeyNotFound, err)
}
if i <= 3 {
require.Equal(t, uint64(3), item.Version())
} else if i <= 7 {
require.Equal(t, uint64(7), item.Version())
}
if i <= 7 {
v, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, val(i), v)
}
}
txn.Discard()
}
func TestArmV7Issue311Fix(t *testing.T) {
dir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
config := DefaultOptions
config.TableLoadingMode = options.MemoryMap
config.ValueLogFileSize = 16 << 20
config.LevelOneSize = 8 << 20
config.MaxTableSize = 2 << 20
config.Dir = dir
config.ValueDir = dir
config.SyncWrites = false
db, err := Open(config)
if err != nil {
t.Fatalf("cannot open db at location %s: %v", dir, err)
}
err = db.View(func(txn *Txn) error { return nil })
if err != nil {
t.Fatal(err)
}
err = db.Update(func(txn *Txn) error {
return txn.Set([]byte{0x11}, []byte{0x22})
})
if err != nil {
t.Fatal(err)
}
err = db.Update(func(txn *Txn) error {
return txn.Set([]byte{0x11}, []byte{0x22})
})
if err != nil {
t.Fatal(err)
}
if err = db.Close(); err != nil {
t.Fatal(err)
}
}

141
vendor/github.com/dgraph-io/badger/util.go generated vendored Normal file
View File

@ -0,0 +1,141 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"encoding/hex"
"io/ioutil"
"math/rand"
"sync/atomic"
"time"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/table"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
// summary is produced when DB is closed. Currently it is used only for testing.
type summary struct {
fileIDs map[uint64]bool
}
func (s *levelsController) getSummary() *summary {
out := &summary{
fileIDs: make(map[uint64]bool),
}
for _, l := range s.levels {
l.getSummary(out)
}
return out
}
func (s *levelHandler) getSummary(sum *summary) {
s.RLock()
defer s.RUnlock()
for _, t := range s.tables {
sum.fileIDs[t.ID()] = true
}
}
func (s *DB) validate() error { return s.lc.validate() }
func (s *levelsController) validate() error {
for _, l := range s.levels {
if err := l.validate(); err != nil {
return errors.Wrap(err, "Levels Controller")
}
}
return nil
}
// Check does some sanity check on one level of data or in-memory index.
func (s *levelHandler) validate() error {
if s.level == 0 {
return nil
}
s.RLock()
defer s.RUnlock()
numTables := len(s.tables)
for j := 1; j < numTables; j++ {
if j >= len(s.tables) {
return errors.Errorf("Level %d, j=%d numTables=%d", s.level, j, numTables)
}
if y.CompareKeys(s.tables[j-1].Biggest(), s.tables[j].Smallest()) >= 0 {
return errors.Errorf(
"Inter: Biggest(j-1) \n%s\n vs Smallest(j): \n%s\n: level=%d j=%d numTables=%d",
hex.Dump(s.tables[j-1].Biggest()), hex.Dump(s.tables[j].Smallest()),
s.level, j, numTables)
}
if y.CompareKeys(s.tables[j].Smallest(), s.tables[j].Biggest()) > 0 {
return errors.Errorf(
"Intra: %q vs %q: level=%d j=%d numTables=%d",
s.tables[j].Smallest(), s.tables[j].Biggest(), s.level, j, numTables)
}
}
return nil
}
// func (s *KV) debugPrintMore() { s.lc.debugPrintMore() }
// // debugPrintMore shows key ranges of each level.
// func (s *levelsController) debugPrintMore() {
// s.Lock()
// defer s.Unlock()
// for i := 0; i < s.kv.opt.MaxLevels; i++ {
// s.levels[i].debugPrintMore()
// }
// }
// func (s *levelHandler) debugPrintMore() {
// s.RLock()
// defer s.RUnlock()
// s.elog.Printf("Level %d:", s.level)
// for _, t := range s.tables {
// y.Printf(" [%s, %s]", t.Smallest(), t.Biggest())
// }
// y.Printf("\n")
// }
// reserveFileID reserves a unique file id.
func (s *levelsController) reserveFileID() uint64 {
id := atomic.AddUint64(&s.nextFileID, 1)
return id - 1
}
func getIDMap(dir string) map[uint64]struct{} {
fileInfos, err := ioutil.ReadDir(dir)
y.Check(err)
idMap := make(map[uint64]struct{})
for _, info := range fileInfos {
if info.IsDir() {
continue
}
fileID, ok := table.ParseFileID(info.Name())
if !ok {
continue
}
idMap[fileID] = struct{}{}
}
return idMap
}
func init() {
rand.Seed(time.Now().UnixNano())
}

1314
vendor/github.com/dgraph-io/badger/value.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

880
vendor/github.com/dgraph-io/badger/value_test.go generated vendored Normal file
View File

@ -0,0 +1,880 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"io/ioutil"
"math/rand"
"os"
"sync"
"testing"
"github.com/stretchr/testify/require"
humanize "gx/ipfs/QmQMxG9D52TirZd9eLA37nxiNspnMRkKbyPWrVAa1gvtSy/go-humanize"
"gx/ipfs/QmRvYNctevGUW52urgmoFZscT6buMKqhHezLUS64WepGWn/go-net/trace"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/options"
"gx/ipfs/QmU4emVTYFKnoJ5yK3pPEN9joyEx6U7y892PDx26ZtNxQd/badger/y"
)
func TestValueBasic(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
y.Check(err)
defer os.RemoveAll(dir)
kv, _ := Open(getTestOptions(dir))
defer kv.Close()
log := &kv.vlog
// Use value big enough that the value log writes them even if SyncWrites is false.
const val1 = "sampleval012345678901234567890123"
const val2 = "samplevalb012345678901234567890123"
require.True(t, len(val1) >= kv.opt.ValueThreshold)
e := &Entry{
Key: []byte("samplekey"),
Value: []byte(val1),
meta: bitValuePointer,
}
e2 := &Entry{
Key: []byte("samplekeyb"),
Value: []byte(val2),
meta: bitValuePointer,
}
b := new(request)
b.Entries = []*Entry{e, e2}
log.write([]*request{b})
require.Len(t, b.Ptrs, 2)
t.Logf("Pointer written: %+v %+v\n", b.Ptrs[0], b.Ptrs[1])
s := new(y.Slice)
buf1, cb1, err1 := log.readValueBytes(b.Ptrs[0], s)
buf2, cb2, err2 := log.readValueBytes(b.Ptrs[1], s)
require.NoError(t, err1)
require.NoError(t, err2)
defer runCallback(cb1)
defer runCallback(cb2)
readEntries := []Entry{valueBytesToEntry(buf1), valueBytesToEntry(buf2)}
require.EqualValues(t, []Entry{
{
Key: []byte("samplekey"),
Value: []byte(val1),
meta: bitValuePointer,
},
{
Key: []byte("samplekeyb"),
Value: []byte(val2),
meta: bitValuePointer,
},
}, readEntries)
}
func TestValueGCManaged(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
N := 10000
opt := getTestOptions(dir)
opt.ValueLogMaxEntries = uint32(N / 10)
opt.managedTxns = true
db, err := Open(opt)
require.NoError(t, err)
defer db.Close()
var ts uint64
newTs := func() uint64 {
ts++
return ts
}
sz := 64 << 10
var wg sync.WaitGroup
for i := 0; i < N; i++ {
v := make([]byte, sz)
rand.Read(v[:rand.Intn(sz)])
wg.Add(1)
txn := db.NewTransactionAt(newTs(), true)
require.NoError(t, txn.Set([]byte(fmt.Sprintf("key%d", i)), v))
require.NoError(t, txn.CommitAt(newTs(), func(err error) {
wg.Done()
require.NoError(t, err)
}))
}
for i := 0; i < N; i++ {
wg.Add(1)
txn := db.NewTransactionAt(newTs(), true)
require.NoError(t, txn.Delete([]byte(fmt.Sprintf("key%d", i))))
require.NoError(t, txn.CommitAt(newTs(), func(err error) {
wg.Done()
require.NoError(t, err)
}))
}
wg.Wait()
files, err := ioutil.ReadDir(dir)
require.NoError(t, err)
for _, fi := range files {
t.Logf("File: %s. Size: %s\n", fi.Name(), humanize.Bytes(uint64(fi.Size())))
}
for i := 0; i < 100; i++ {
// Try at max 100 times to GC even a single value log file.
if err := db.RunValueLogGC(0.0001); err == nil {
return // Done
}
}
require.Fail(t, "Unable to GC even a single value log file.")
}
func TestValueGC(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.ValueLogFileSize = 1 << 20
kv, _ := Open(opt)
defer kv.Close()
sz := 32 << 10
txn := kv.NewTransaction(true)
for i := 0; i < 100; i++ {
v := make([]byte, sz)
rand.Read(v[:rand.Intn(sz)])
require.NoError(t, txn.Set([]byte(fmt.Sprintf("key%d", i)), v))
if i%20 == 0 {
require.NoError(t, txn.Commit())
txn = kv.NewTransaction(true)
}
}
require.NoError(t, txn.Commit())
for i := 0; i < 45; i++ {
txnDelete(t, kv, []byte(fmt.Sprintf("key%d", i)))
}
kv.vlog.filesLock.RLock()
lf := kv.vlog.filesMap[kv.vlog.sortedFids()[0]]
kv.vlog.filesLock.RUnlock()
// lf.iterate(0, func(e Entry) bool {
// e.print("lf")
// return true
// })
tr := trace.New("Test", "Test")
defer tr.Finish()
kv.vlog.rewrite(lf, tr)
for i := 45; i < 100; i++ {
key := []byte(fmt.Sprintf("key%d", i))
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get(key)
require.NoError(t, err)
val := getItemValue(t, item)
require.NotNil(t, val)
require.True(t, len(val) == sz, "Size found: %d", len(val))
return nil
}))
}
}
func TestValueGC2(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.ValueLogFileSize = 1 << 20
kv, _ := Open(opt)
defer kv.Close()
sz := 32 << 10
txn := kv.NewTransaction(true)
for i := 0; i < 100; i++ {
v := make([]byte, sz)
rand.Read(v[:rand.Intn(sz)])
require.NoError(t, txn.Set([]byte(fmt.Sprintf("key%d", i)), v))
if i%20 == 0 {
require.NoError(t, txn.Commit())
txn = kv.NewTransaction(true)
}
}
require.NoError(t, txn.Commit())
for i := 0; i < 5; i++ {
txnDelete(t, kv, []byte(fmt.Sprintf("key%d", i)))
}
for i := 5; i < 10; i++ {
v := []byte(fmt.Sprintf("value%d", i))
txnSet(t, kv, []byte(fmt.Sprintf("key%d", i)), v, 0)
}
kv.vlog.filesLock.RLock()
lf := kv.vlog.filesMap[kv.vlog.sortedFids()[0]]
kv.vlog.filesLock.RUnlock()
// lf.iterate(0, func(e Entry) bool {
// e.print("lf")
// return true
// })
tr := trace.New("Test", "Test")
defer tr.Finish()
kv.vlog.rewrite(lf, tr)
for i := 0; i < 5; i++ {
key := []byte(fmt.Sprintf("key%d", i))
require.NoError(t, kv.View(func(txn *Txn) error {
_, err := txn.Get(key)
require.Equal(t, ErrKeyNotFound, err)
return nil
}))
}
for i := 5; i < 10; i++ {
key := []byte(fmt.Sprintf("key%d", i))
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get(key)
require.NoError(t, err)
val := getItemValue(t, item)
require.NotNil(t, val)
require.Equal(t, string(val), fmt.Sprintf("value%d", i))
return nil
}))
}
for i := 10; i < 100; i++ {
key := []byte(fmt.Sprintf("key%d", i))
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get(key)
require.NoError(t, err)
val := getItemValue(t, item)
require.NotNil(t, val)
require.True(t, len(val) == sz, "Size found: %d", len(val))
return nil
}))
}
}
func TestValueGC3(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.ValueLogFileSize = 1 << 20
kv, err := Open(opt)
require.NoError(t, err)
defer kv.Close()
// We want to test whether an iterator can continue through a value log GC.
valueSize := 32 << 10
var value3 []byte
txn := kv.NewTransaction(true)
for i := 0; i < 100; i++ {
v := make([]byte, valueSize) // 32K * 100 will take >=3'276'800 B.
if i == 3 {
value3 = v
}
rand.Read(v[:])
// Keys key000, key001, key002, such that sorted order matches insertion order
require.NoError(t, txn.Set([]byte(fmt.Sprintf("key%03d", i)), v))
if i%20 == 0 {
require.NoError(t, txn.Commit())
txn = kv.NewTransaction(true)
}
}
require.NoError(t, txn.Commit())
// Start an iterator to keys in the first value log file
itOpt := IteratorOptions{
PrefetchValues: false,
PrefetchSize: 0,
Reverse: false,
}
txn = kv.NewTransaction(true)
it := txn.NewIterator(itOpt)
defer it.Close()
// Walk a few keys
it.Rewind()
require.True(t, it.Valid())
item := it.Item()
require.Equal(t, []byte("key000"), item.Key())
it.Next()
require.True(t, it.Valid())
item = it.Item()
require.Equal(t, []byte("key001"), item.Key())
it.Next()
require.True(t, it.Valid())
item = it.Item()
require.Equal(t, []byte("key002"), item.Key())
// Like other tests, we pull out a logFile to rewrite it directly
kv.vlog.filesLock.RLock()
logFile := kv.vlog.filesMap[kv.vlog.sortedFids()[0]]
kv.vlog.filesLock.RUnlock()
tr := trace.New("Test", "Test")
defer tr.Finish()
kv.vlog.rewrite(logFile, tr)
it.Next()
require.True(t, it.Valid())
item = it.Item()
require.Equal(t, []byte("key003"), item.Key())
v3, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, value3, v3)
}
func TestValueGC4(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.ValueLogFileSize = 1 << 20
opt.Truncate = true
kv, err := Open(opt)
require.NoError(t, err)
defer kv.Close()
sz := 128 << 10 // 5 entries per value log file.
txn := kv.NewTransaction(true)
for i := 0; i < 24; i++ {
v := make([]byte, sz)
rand.Read(v[:rand.Intn(sz)])
require.NoError(t, txn.Set([]byte(fmt.Sprintf("key%d", i)), v))
if i%3 == 0 {
require.NoError(t, txn.Commit())
txn = kv.NewTransaction(true)
}
}
require.NoError(t, txn.Commit())
for i := 0; i < 8; i++ {
txnDelete(t, kv, []byte(fmt.Sprintf("key%d", i)))
}
for i := 8; i < 16; i++ {
v := []byte(fmt.Sprintf("value%d", i))
txnSet(t, kv, []byte(fmt.Sprintf("key%d", i)), v, 0)
}
kv.vlog.filesLock.RLock()
lf0 := kv.vlog.filesMap[kv.vlog.sortedFids()[0]]
lf1 := kv.vlog.filesMap[kv.vlog.sortedFids()[1]]
kv.vlog.filesLock.RUnlock()
// lf.iterate(0, func(e Entry) bool {
// e.print("lf")
// return true
// })
tr := trace.New("Test", "Test")
defer tr.Finish()
kv.vlog.rewrite(lf0, tr)
kv.vlog.rewrite(lf1, tr)
err = kv.vlog.Close()
require.NoError(t, err)
err = kv.vlog.open(kv, valuePointer{Fid: 2}, kv.replayFunction())
require.NoError(t, err)
for i := 0; i < 8; i++ {
key := []byte(fmt.Sprintf("key%d", i))
require.NoError(t, kv.View(func(txn *Txn) error {
_, err := txn.Get(key)
require.Equal(t, ErrKeyNotFound, err)
return nil
}))
}
for i := 8; i < 16; i++ {
key := []byte(fmt.Sprintf("key%d", i))
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get(key)
require.NoError(t, err)
val := getItemValue(t, item)
require.NotNil(t, val)
require.Equal(t, string(val), fmt.Sprintf("value%d", i))
return nil
}))
}
}
func TestChecksums(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
// Set up SST with K1=V1
opts := getTestOptions(dir)
opts.Truncate = true
opts.ValueLogFileSize = 100 * 1024 * 1024 // 100Mb
kv, err := Open(opts)
require.NoError(t, err)
require.NoError(t, kv.Close())
var (
k0 = []byte("k0")
k1 = []byte("k1")
k2 = []byte("k2")
k3 = []byte("k3")
v0 = []byte("value0-012345678901234567890123012345678901234567890123")
v1 = []byte("value1-012345678901234567890123012345678901234567890123")
v2 = []byte("value2-012345678901234567890123012345678901234567890123")
v3 = []byte("value3-012345678901234567890123012345678901234567890123")
)
// Make sure the value log would actually store the item
require.True(t, len(v0) >= kv.opt.ValueThreshold)
// Use a vlog with K0=V0 and a (corrupted) second transaction(k1,k2)
buf := createVlog(t, []*Entry{
{Key: k0, Value: v0},
{Key: k1, Value: v1},
{Key: k2, Value: v2},
})
buf[len(buf)-1]++ // Corrupt last byte
require.NoError(t, ioutil.WriteFile(vlogFilePath(dir, 0), buf, 0777))
// K1 should exist, but K2 shouldn't.
kv, err = Open(opts)
require.NoError(t, err)
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get(k0)
require.NoError(t, err)
require.Equal(t, getItemValue(t, item), v0)
_, err = txn.Get(k1)
require.Equal(t, ErrKeyNotFound, err)
_, err = txn.Get(k2)
require.Equal(t, ErrKeyNotFound, err)
return nil
}))
// Write K3 at the end of the vlog.
txnSet(t, kv, k3, v3, 0)
require.NoError(t, kv.Close())
// The vlog should contain K0 and K3 (K1 and k2 was lost when Badger started up
// last due to checksum failure).
kv, err = Open(opts)
require.NoError(t, err)
{
txn := kv.NewTransaction(false)
iter := txn.NewIterator(DefaultIteratorOptions)
iter.Seek(k0)
require.True(t, iter.Valid())
it := iter.Item()
require.Equal(t, it.Key(), k0)
require.Equal(t, getItemValue(t, it), v0)
iter.Next()
require.True(t, iter.Valid())
it = iter.Item()
require.Equal(t, it.Key(), k3)
require.Equal(t, getItemValue(t, it), v3)
iter.Close()
txn.Discard()
}
require.NoError(t, kv.Close())
}
func TestPartialAppendToValueLog(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
// Create skeleton files.
opts := getTestOptions(dir)
opts.Truncate = true
opts.ValueLogFileSize = 100 * 1024 * 1024 // 100Mb
kv, err := Open(opts)
require.NoError(t, err)
require.NoError(t, kv.Close())
var (
k0 = []byte("k0")
k1 = []byte("k1")
k2 = []byte("k2")
k3 = []byte("k3")
v0 = []byte("value0-01234567890123456789012012345678901234567890123")
v1 = []byte("value1-01234567890123456789012012345678901234567890123")
v2 = []byte("value2-01234567890123456789012012345678901234567890123")
v3 = []byte("value3-01234567890123456789012012345678901234567890123")
)
// Values need to be long enough to actually get written to value log.
require.True(t, len(v3) >= kv.opt.ValueThreshold)
// Create truncated vlog to simulate a partial append.
// k0 - single transaction, k1 and k2 in another transaction
buf := createVlog(t, []*Entry{
{Key: k0, Value: v0},
{Key: k1, Value: v1},
{Key: k2, Value: v2},
})
buf = buf[:len(buf)-6]
require.NoError(t, ioutil.WriteFile(vlogFilePath(dir, 0), buf, 0777))
// Badger should now start up
kv, err = Open(opts)
require.NoError(t, err)
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get(k0)
require.NoError(t, err)
require.Equal(t, v0, getItemValue(t, item))
_, err = txn.Get(k1)
require.Equal(t, ErrKeyNotFound, err)
_, err = txn.Get(k2)
require.Equal(t, ErrKeyNotFound, err)
return nil
}))
// When K3 is set, it should be persisted after a restart.
txnSet(t, kv, k3, v3, 0)
require.NoError(t, kv.Close())
kv, err = Open(opts)
require.NoError(t, err)
checkKeys(t, kv, [][]byte{k3})
// Replay value log from beginning, badger head is past k2.
require.NoError(t, kv.vlog.Close())
require.NoError(t,
kv.vlog.open(kv, valuePointer{Fid: 0}, kv.replayFunction()))
require.NoError(t, kv.Close())
}
func TestReadOnlyOpenWithPartialAppendToValueLog(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
// Create skeleton files.
opts := getTestOptions(dir)
opts.ValueLogFileSize = 100 * 1024 * 1024 // 100Mb
kv, err := Open(opts)
require.NoError(t, err)
require.NoError(t, kv.Close())
var (
k0 = []byte("k0")
k1 = []byte("k1")
k2 = []byte("k2")
v0 = []byte("value0-012345678901234567890123")
v1 = []byte("value1-012345678901234567890123")
v2 = []byte("value2-012345678901234567890123")
)
// Create truncated vlog to simulate a partial append.
// k0 - single transaction, k1 and k2 in another transaction
buf := createVlog(t, []*Entry{
{Key: k0, Value: v0},
{Key: k1, Value: v1},
{Key: k2, Value: v2},
})
buf = buf[:len(buf)-6]
require.NoError(t, ioutil.WriteFile(vlogFilePath(dir, 0), buf, 0777))
opts.ReadOnly = true
// Badger should fail a read-only open with values to replay
kv, err = Open(opts)
require.Error(t, err)
require.Regexp(t, "Database was not properly closed, cannot open read-only|Read-only mode is not supported on Windows", err.Error())
}
func TestValueLogTrigger(t *testing.T) {
t.Skip("Difficult to trigger compaction, so skipping. Re-enable after fixing #226")
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.ValueLogFileSize = 1 << 20
kv, err := Open(opt)
require.NoError(t, err)
// Write a lot of data, so it creates some work for valug log GC.
sz := 32 << 10
txn := kv.NewTransaction(true)
for i := 0; i < 100; i++ {
v := make([]byte, sz)
rand.Read(v[:rand.Intn(sz)])
require.NoError(t, txn.Set([]byte(fmt.Sprintf("key%d", i)), v))
if i%20 == 0 {
require.NoError(t, txn.Commit())
txn = kv.NewTransaction(true)
}
}
require.NoError(t, txn.Commit())
for i := 0; i < 45; i++ {
txnDelete(t, kv, []byte(fmt.Sprintf("key%d", i)))
}
require.NoError(t, kv.RunValueLogGC(0.5))
require.NoError(t, kv.Close())
err = kv.RunValueLogGC(0.5)
require.Equal(t, ErrRejected, err, "Error should be returned after closing DB.")
}
func createVlog(t *testing.T, entries []*Entry) []byte {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.ValueLogFileSize = 100 * 1024 * 1024 // 100Mb
kv, err := Open(opts)
require.NoError(t, err)
txnSet(t, kv, entries[0].Key, entries[0].Value, entries[0].meta)
entries = entries[1:]
txn := kv.NewTransaction(true)
for _, entry := range entries {
require.NoError(t, txn.SetWithMeta(entry.Key, entry.Value, entry.meta))
}
require.NoError(t, txn.Commit())
require.NoError(t, kv.Close())
filename := vlogFilePath(dir, 0)
buf, err := ioutil.ReadFile(filename)
require.NoError(t, err)
return buf
}
func TestPenultimateLogCorruption(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.ValueLogLoadingMode = options.FileIO
// Each txn generates at least two entries. 3 txns will fit each file.
opt.ValueLogMaxEntries = 5
db0, err := Open(opt)
require.NoError(t, err)
h := testHelper{db: db0, t: t}
h.writeRange(0, 7)
h.readRange(0, 7)
for i := 2; i >= 0; i-- {
fpath := vlogFilePath(dir, uint32(i))
fi, err := os.Stat(fpath)
require.NoError(t, err)
require.True(t, fi.Size() > 0, "Empty file at log=%d", i)
if i == 0 {
err := os.Truncate(fpath, fi.Size()-1)
require.NoError(t, err)
}
}
// Simulate a crash by not closing db0, but releasing the locks.
if db0.dirLockGuard != nil {
require.NoError(t, db0.dirLockGuard.release())
}
if db0.valueDirGuard != nil {
require.NoError(t, db0.valueDirGuard.release())
}
opt.Truncate = true
db1, err := Open(opt)
require.NoError(t, err)
h.db = db1
h.readRange(0, 1) // Only 2 should be gone, because it is at the end of logfile 0.
h.readRange(3, 7)
err = db1.View(func(txn *Txn) error {
_, err := txn.Get(h.key(2)) // Verify that 2 is gone.
require.Equal(t, ErrKeyNotFound, err)
return nil
})
require.NoError(t, err)
require.NoError(t, db1.Close())
}
func checkKeys(t *testing.T, kv *DB, keys [][]byte) {
i := 0
txn := kv.NewTransaction(false)
iter := txn.NewIterator(IteratorOptions{})
for iter.Seek(keys[0]); iter.Valid(); iter.Next() {
require.Equal(t, iter.Item().Key(), keys[i])
i++
}
require.Equal(t, i, len(keys))
}
type testHelper struct {
db *DB
t *testing.T
val []byte
}
func (th *testHelper) key(i int) []byte {
return []byte(fmt.Sprintf("%010d", i))
}
func (th *testHelper) value() []byte {
if len(th.val) > 0 {
return th.val
}
th.val = make([]byte, 100)
y.Check2(rand.Read(th.val))
return th.val
}
// writeRange [from, to].
func (th *testHelper) writeRange(from, to int) {
for i := from; i <= to; i++ {
err := th.db.Update(func(txn *Txn) error {
return txn.Set(th.key(i), th.value())
})
require.NoError(th.t, err)
}
}
func (th *testHelper) readRange(from, to int) {
for i := from; i <= to; i++ {
err := th.db.View(func(txn *Txn) error {
item, err := txn.Get(th.key(i))
if err != nil {
return err
}
return item.Value(func(val []byte) error {
require.Equal(th.t, val, th.value(), "key=%q", th.key(i))
return nil
})
})
require.NoError(th.t, err, "key=%q", th.key(i))
}
}
// Test Bug #578, which showed that if a value is moved during value log GC, an
// older version can end up at a higher level in the LSM tree than a newer
// version, causing the data to not be returned.
func TestBug578(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
y.Check(err)
defer os.RemoveAll(dir)
opts := DefaultOptions
opts.Dir = dir
opts.ValueDir = dir
opts.ValueLogMaxEntries = 64
opts.MaxTableSize = 1 << 13
db, err := Open(opts)
require.NoError(t, err)
h := testHelper{db: db, t: t}
// Let's run this whole thing a few times.
for j := 0; j < 10; j++ {
t.Logf("Cycle: %d\n", j)
h.writeRange(0, 32)
h.writeRange(0, 10)
h.writeRange(50, 72)
h.writeRange(40, 72)
h.writeRange(40, 72)
// Run value log GC a few times.
for i := 0; i < 5; i++ {
db.RunValueLogGC(0.5)
}
h.readRange(0, 10)
}
}
func BenchmarkReadWrite(b *testing.B) {
rwRatio := []float32{
0.1, 0.2, 0.5, 1.0,
}
valueSize := []int{
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
}
for _, vsz := range valueSize {
for _, rw := range rwRatio {
b.Run(fmt.Sprintf("%3.1f,%04d", rw, vsz), func(b *testing.B) {
dir, err := ioutil.TempDir("", "vlog-benchmark")
y.Check(err)
defer os.RemoveAll(dir)
db, err := Open(getTestOptions(dir))
y.Check(err)
vl := &db.vlog
b.ResetTimer()
for i := 0; i < b.N; i++ {
e := new(Entry)
e.Key = make([]byte, 16)
e.Value = make([]byte, vsz)
bl := new(request)
bl.Entries = []*Entry{e}
var ptrs []valuePointer
vl.write([]*request{bl})
ptrs = append(ptrs, bl.Ptrs...)
f := rand.Float32()
if f < rw {
vl.write([]*request{bl})
} else {
ln := len(ptrs)
if ln == 0 {
b.Fatalf("Zero length of ptrs")
}
idx := rand.Intn(ln)
s := new(y.Slice)
buf, cb, err := vl.readValueBytes(ptrs[idx], s)
if err != nil {
b.Fatalf("Benchmark Read: %v", err)
}
e := valueBytesToEntry(buf)
if len(e.Key) != 16 {
b.Fatalf("Key is invalid")
}
if len(e.Value) != vsz {
b.Fatalf("Value is invalid")
}
cb()
}
}
})
}
}
}

83
vendor/github.com/dgraph-io/badger/y/error.go generated vendored Normal file
View File

@ -0,0 +1,83 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
// This file contains some functions for error handling. Note that we are moving
// towards using x.Trace, i.e., rpc tracing using net/tracer. But for now, these
// functions are useful for simple checks logged on one machine.
// Some common use cases are:
// (1) You receive an error from external lib, and would like to check/log fatal.
// For this, use x.Check, x.Checkf. These will check for err != nil, which is
// more common in Go. If you want to check for boolean being true, use
// x.Assert, x.Assertf.
// (2) You receive an error from external lib, and would like to pass on with some
// stack trace information. In this case, use x.Wrap or x.Wrapf.
// (3) You want to generate a new error with stack trace info. Use x.Errorf.
import (
"fmt"
"log"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
var debugMode = true
// Check logs fatal if err != nil.
func Check(err error) {
if err != nil {
log.Fatalf("%+v", Wrap(err))
}
}
// Check2 acts as convenience wrapper around Check, using the 2nd argument as error.
func Check2(_ interface{}, err error) {
Check(err)
}
// AssertTrue asserts that b is true. Otherwise, it would log fatal.
func AssertTrue(b bool) {
if !b {
log.Fatalf("%+v", errors.Errorf("Assert failed"))
}
}
// AssertTruef is AssertTrue with extra info.
func AssertTruef(b bool, format string, args ...interface{}) {
if !b {
log.Fatalf("%+v", errors.Errorf(format, args...))
}
}
// Wrap wraps errors from external lib.
func Wrap(err error) error {
if !debugMode {
return err
}
return errors.Wrap(err, "")
}
// Wrapf is Wrap with extra info.
func Wrapf(err error, format string, args ...interface{}) error {
if !debugMode {
if err == nil {
return nil
}
return fmt.Errorf(format+" error: %+v", append(args, err)...)
}
return errors.Wrapf(err, format, args...)
}

25
vendor/github.com/dgraph-io/badger/y/file_dsync.go generated vendored Normal file
View File

@ -0,0 +1,25 @@
// +build !dragonfly,!freebsd,!windows
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import "gx/ipfs/QmVGjyM9i2msKvLXwh9VosCTgP4mL91kC7hDmqnwTTx6Hu/sys/unix"
func init() {
datasyncFileFlag = unix.O_DSYNC
}

25
vendor/github.com/dgraph-io/badger/y/file_nodsync.go generated vendored Normal file
View File

@ -0,0 +1,25 @@
// +build dragonfly freebsd windows
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import "syscall"
func init() {
datasyncFileFlag = syscall.O_SYNC
}

264
vendor/github.com/dgraph-io/badger/y/iterator.go generated vendored Normal file
View File

@ -0,0 +1,264 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import (
"bytes"
"container/heap"
"encoding/binary"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
// ValueStruct represents the value info that can be associated with a key, but also the internal
// Meta field.
type ValueStruct struct {
Meta byte
UserMeta byte
ExpiresAt uint64
Value []byte
Version uint64 // This field is not serialized. Only for internal usage.
}
func sizeVarint(x uint64) (n int) {
for {
n++
x >>= 7
if x == 0 {
break
}
}
return n
}
// EncodedSize is the size of the ValueStruct when encoded
func (v *ValueStruct) EncodedSize() uint16 {
sz := len(v.Value) + 2 // meta, usermeta.
if v.ExpiresAt == 0 {
return uint16(sz + 1)
}
enc := sizeVarint(v.ExpiresAt)
return uint16(sz + enc)
}
// Decode uses the length of the slice to infer the length of the Value field.
func (v *ValueStruct) Decode(b []byte) {
v.Meta = b[0]
v.UserMeta = b[1]
var sz int
v.ExpiresAt, sz = binary.Uvarint(b[2:])
v.Value = b[2+sz:]
}
// Encode expects a slice of length at least v.EncodedSize().
func (v *ValueStruct) Encode(b []byte) {
b[0] = v.Meta
b[1] = v.UserMeta
sz := binary.PutUvarint(b[2:], v.ExpiresAt)
copy(b[2+sz:], v.Value)
}
// EncodeTo should be kept in sync with the Encode function above. The reason
// this function exists is to avoid creating byte arrays per key-value pair in
// table/builder.go.
func (v *ValueStruct) EncodeTo(buf *bytes.Buffer) {
buf.WriteByte(v.Meta)
buf.WriteByte(v.UserMeta)
var enc [binary.MaxVarintLen64]byte
sz := binary.PutUvarint(enc[:], v.ExpiresAt)
buf.Write(enc[:sz])
buf.Write(v.Value)
}
// Iterator is an interface for a basic iterator.
type Iterator interface {
Next()
Rewind()
Seek(key []byte)
Key() []byte
Value() ValueStruct
Valid() bool
// All iterators should be closed so that file garbage collection works.
Close() error
}
type elem struct {
itr Iterator
nice int
reversed bool
}
type elemHeap []*elem
func (eh elemHeap) Len() int { return len(eh) }
func (eh elemHeap) Swap(i, j int) { eh[i], eh[j] = eh[j], eh[i] }
func (eh *elemHeap) Push(x interface{}) { *eh = append(*eh, x.(*elem)) }
func (eh *elemHeap) Pop() interface{} {
// Remove the last element, because Go has already swapped 0th elem <-> last.
old := *eh
n := len(old)
x := old[n-1]
*eh = old[0 : n-1]
return x
}
func (eh elemHeap) Less(i, j int) bool {
cmp := CompareKeys(eh[i].itr.Key(), eh[j].itr.Key())
if cmp < 0 {
return !eh[i].reversed
}
if cmp > 0 {
return eh[i].reversed
}
// The keys are equal. In this case, lower nice take precedence. This is important.
return eh[i].nice < eh[j].nice
}
// MergeIterator merges multiple iterators.
// NOTE: MergeIterator owns the array of iterators and is responsible for closing them.
type MergeIterator struct {
h elemHeap
curKey []byte
reversed bool
all []Iterator
}
// NewMergeIterator returns a new MergeIterator from a list of Iterators.
func NewMergeIterator(iters []Iterator, reversed bool) *MergeIterator {
m := &MergeIterator{all: iters, reversed: reversed}
m.h = make(elemHeap, 0, len(iters))
m.initHeap()
return m
}
func (s *MergeIterator) storeKey(smallest Iterator) {
if cap(s.curKey) < len(smallest.Key()) {
s.curKey = make([]byte, 2*len(smallest.Key()))
}
s.curKey = s.curKey[:len(smallest.Key())]
copy(s.curKey, smallest.Key())
}
// initHeap checks all iterators and initializes our heap and array of keys.
// Whenever we reverse direction, we need to run this.
func (s *MergeIterator) initHeap() {
s.h = s.h[:0]
for idx, itr := range s.all {
if !itr.Valid() {
continue
}
e := &elem{itr: itr, nice: idx, reversed: s.reversed}
s.h = append(s.h, e)
}
heap.Init(&s.h)
for len(s.h) > 0 {
it := s.h[0].itr
if it == nil || !it.Valid() {
heap.Pop(&s.h)
continue
}
s.storeKey(s.h[0].itr)
break
}
}
// Valid returns whether the MergeIterator is at a valid element.
func (s *MergeIterator) Valid() bool {
if s == nil {
return false
}
if len(s.h) == 0 {
return false
}
return s.h[0].itr.Valid()
}
// Key returns the key associated with the current iterator
func (s *MergeIterator) Key() []byte {
if len(s.h) == 0 {
return nil
}
return s.h[0].itr.Key()
}
// Value returns the value associated with the iterator.
func (s *MergeIterator) Value() ValueStruct {
if len(s.h) == 0 {
return ValueStruct{}
}
return s.h[0].itr.Value()
}
// Next returns the next element. If it is the same as the current key, ignore it.
func (s *MergeIterator) Next() {
if len(s.h) == 0 {
return
}
smallest := s.h[0].itr
smallest.Next()
for len(s.h) > 0 {
smallest = s.h[0].itr
if !smallest.Valid() {
heap.Pop(&s.h)
continue
}
heap.Fix(&s.h, 0)
smallest = s.h[0].itr
if smallest.Valid() {
if !bytes.Equal(smallest.Key(), s.curKey) {
break
}
smallest.Next()
}
}
if !smallest.Valid() {
return
}
s.storeKey(smallest)
}
// Rewind seeks to first element (or last element for reverse iterator).
func (s *MergeIterator) Rewind() {
for _, itr := range s.all {
itr.Rewind()
}
s.initHeap()
}
// Seek brings us to element with key >= given key.
func (s *MergeIterator) Seek(key []byte) {
for _, itr := range s.all {
itr.Seek(key)
}
s.initHeap()
}
// Close implements y.Iterator
func (s *MergeIterator) Close() error {
for _, itr := range s.all {
if err := itr.Close(); err != nil {
return errors.Wrap(err, "MergeIterator")
}
}
return nil
}

234
vendor/github.com/dgraph-io/badger/y/iterator_test.go generated vendored Normal file
View File

@ -0,0 +1,234 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import (
"sort"
"testing"
"github.com/stretchr/testify/require"
)
type SimpleIterator struct {
keys [][]byte
vals [][]byte
idx int
reversed bool
}
var (
closeCount int
)
func (s *SimpleIterator) Close() error { closeCount++; return nil }
func (s *SimpleIterator) Next() {
if !s.reversed {
s.idx++
} else {
s.idx--
}
}
func (s *SimpleIterator) Rewind() {
if !s.reversed {
s.idx = 0
} else {
s.idx = len(s.keys) - 1
}
}
func (s *SimpleIterator) Seek(key []byte) {
key = KeyWithTs(key, 0)
if !s.reversed {
s.idx = sort.Search(len(s.keys), func(i int) bool {
return CompareKeys(s.keys[i], key) >= 0
})
} else {
n := len(s.keys)
s.idx = n - 1 - sort.Search(n, func(i int) bool {
return CompareKeys(s.keys[n-1-i], key) <= 0
})
}
}
func (s *SimpleIterator) Key() []byte { return s.keys[s.idx] }
func (s *SimpleIterator) Value() ValueStruct {
return ValueStruct{
Value: s.vals[s.idx],
UserMeta: 55,
Meta: 0,
}
}
func (s *SimpleIterator) Valid() bool {
return s.idx >= 0 && s.idx < len(s.keys)
}
func newSimpleIterator(keys []string, vals []string, reversed bool) *SimpleIterator {
k := make([][]byte, len(keys))
v := make([][]byte, len(vals))
AssertTrue(len(keys) == len(vals))
for i := 0; i < len(keys); i++ {
k[i] = KeyWithTs([]byte(keys[i]), 0)
v[i] = []byte(vals[i])
}
return &SimpleIterator{
keys: k,
vals: v,
idx: -1,
reversed: reversed,
}
}
func getAll(it Iterator) ([]string, []string) {
var keys, vals []string
for ; it.Valid(); it.Next() {
k := it.Key()
keys = append(keys, string(ParseKey(k)))
v := it.Value()
vals = append(vals, string(v.Value))
}
return keys, vals
}
func closeAndCheck(t *testing.T, it Iterator, expected int) {
closeCount = 0
it.Close()
require.EqualValues(t, expected, closeCount)
}
func TestSimpleIterator(t *testing.T) {
keys := []string{"1", "2", "3"}
vals := []string{"v1", "v2", "v3"}
it := newSimpleIterator(keys, vals, false)
it.Rewind()
k, v := getAll(it)
require.EqualValues(t, keys, k)
require.EqualValues(t, vals, v)
closeAndCheck(t, it, 1)
}
func reversed(a []string) []string {
var out []string
for i := len(a) - 1; i >= 0; i-- {
out = append(out, a[i])
}
return out
}
func TestMergeSingle(t *testing.T) {
keys := []string{"1", "2", "3"}
vals := []string{"v1", "v2", "v3"}
it := newSimpleIterator(keys, vals, false)
mergeIt := NewMergeIterator([]Iterator{it}, false)
mergeIt.Rewind()
k, v := getAll(mergeIt)
require.EqualValues(t, keys, k)
require.EqualValues(t, vals, v)
closeAndCheck(t, mergeIt, 1)
}
func TestMergeSingleReversed(t *testing.T) {
keys := []string{"1", "2", "3"}
vals := []string{"v1", "v2", "v3"}
it := newSimpleIterator(keys, vals, true)
mergeIt := NewMergeIterator([]Iterator{it}, true)
mergeIt.Rewind()
k, v := getAll(mergeIt)
require.EqualValues(t, reversed(keys), k)
require.EqualValues(t, reversed(vals), v)
closeAndCheck(t, mergeIt, 1)
}
func TestMergeMore(t *testing.T) {
it := newSimpleIterator([]string{"1", "3", "7"}, []string{"a1", "a3", "a7"}, false)
it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false)
it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false)
it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false)
mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false)
expectedKeys := []string{"1", "2", "3", "5", "7", "9"}
expectedVals := []string{"a1", "b2", "a3", "b5", "a7", "d9"}
mergeIt.Rewind()
k, v := getAll(mergeIt)
require.EqualValues(t, expectedKeys, k)
require.EqualValues(t, expectedVals, v)
closeAndCheck(t, mergeIt, 4)
}
// Ensure MergeIterator satisfies the Iterator interface
func TestMergeIteratorNested(t *testing.T) {
keys := []string{"1", "2", "3"}
vals := []string{"v1", "v2", "v3"}
it := newSimpleIterator(keys, vals, false)
mergeIt := NewMergeIterator([]Iterator{it}, false)
mergeIt2 := NewMergeIterator([]Iterator{mergeIt}, false)
mergeIt2.Rewind()
k, v := getAll(mergeIt2)
require.EqualValues(t, keys, k)
require.EqualValues(t, vals, v)
closeAndCheck(t, mergeIt2, 1)
}
func TestMergeIteratorSeek(t *testing.T) {
it := newSimpleIterator([]string{"1", "3", "7"}, []string{"a1", "a3", "a7"}, false)
it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false)
it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false)
it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false)
mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false)
mergeIt.Seek([]byte("4"))
k, v := getAll(mergeIt)
require.EqualValues(t, []string{"5", "7", "9"}, k)
require.EqualValues(t, []string{"b5", "a7", "d9"}, v)
closeAndCheck(t, mergeIt, 4)
}
func TestMergeIteratorSeekReversed(t *testing.T) {
it := newSimpleIterator([]string{"1", "3", "7"}, []string{"a1", "a3", "a7"}, true)
it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, true)
it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, true)
it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, true)
mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, true)
mergeIt.Seek([]byte("5"))
k, v := getAll(mergeIt)
require.EqualValues(t, []string{"5", "3", "2", "1"}, k)
require.EqualValues(t, []string{"b5", "a3", "b2", "a1"}, v)
closeAndCheck(t, mergeIt, 4)
}
func TestMergeIteratorSeekInvalid(t *testing.T) {
it := newSimpleIterator([]string{"1", "3", "7"}, []string{"a1", "a3", "a7"}, false)
it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, false)
it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, false)
it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, false)
mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, false)
mergeIt.Seek([]byte("f"))
require.False(t, mergeIt.Valid())
closeAndCheck(t, mergeIt, 4)
}
func TestMergeIteratorSeekInvalidReversed(t *testing.T) {
it := newSimpleIterator([]string{"1", "3", "7"}, []string{"a1", "a3", "a7"}, true)
it2 := newSimpleIterator([]string{"2", "3", "5"}, []string{"b2", "b3", "b5"}, true)
it3 := newSimpleIterator([]string{"1"}, []string{"c1"}, true)
it4 := newSimpleIterator([]string{"1", "7", "9"}, []string{"d1", "d7", "d9"}, true)
mergeIt := NewMergeIterator([]Iterator{it, it2, it3, it4}, true)
mergeIt.Seek([]byte("0"))
require.False(t, mergeIt.Valid())
closeAndCheck(t, mergeIt, 4)
}

68
vendor/github.com/dgraph-io/badger/y/metrics.go generated vendored Normal file
View File

@ -0,0 +1,68 @@
/*
* Copyright (C) 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import "expvar"
var (
// LSMSize has size of the LSM in bytes
LSMSize *expvar.Map
// VlogSize has size of the value log in bytes
VlogSize *expvar.Map
// PendingWrites tracks the number of pending writes.
PendingWrites *expvar.Map
// These are cumulative
// NumReads has cumulative number of reads
NumReads *expvar.Int
// NumWrites has cumulative number of writes
NumWrites *expvar.Int
// NumBytesRead has cumulative number of bytes read
NumBytesRead *expvar.Int
// NumBytesWritten has cumulative number of bytes written
NumBytesWritten *expvar.Int
// NumLSMGets is number of LMS gets
NumLSMGets *expvar.Map
// NumLSMBloomHits is number of LMS bloom hits
NumLSMBloomHits *expvar.Map
// NumGets is number of gets
NumGets *expvar.Int
// NumPuts is number of puts
NumPuts *expvar.Int
// NumBlockedPuts is number of blocked puts
NumBlockedPuts *expvar.Int
// NumMemtableGets is number of memtable gets
NumMemtableGets *expvar.Int
)
// These variables are global and have cumulative values for all kv stores.
func init() {
NumReads = expvar.NewInt("badger_disk_reads_total")
NumWrites = expvar.NewInt("badger_disk_writes_total")
NumBytesRead = expvar.NewInt("badger_read_bytes")
NumBytesWritten = expvar.NewInt("badger_written_bytes")
NumLSMGets = expvar.NewMap("badger_lsm_level_gets_total")
NumLSMBloomHits = expvar.NewMap("badger_lsm_bloom_hits_total")
NumGets = expvar.NewInt("badger_gets_total")
NumPuts = expvar.NewInt("badger_puts_total")
NumBlockedPuts = expvar.NewInt("badger_blocked_puts_total")
NumMemtableGets = expvar.NewInt("badger_memtable_gets_total")
LSMSize = expvar.NewMap("badger_lsm_size_bytes")
VlogSize = expvar.NewMap("badger_vlog_size_bytes")
PendingWrites = expvar.NewMap("badger_pending_writes_total")
}

63
vendor/github.com/dgraph-io/badger/y/mmap_unix.go generated vendored Normal file
View File

@ -0,0 +1,63 @@
// +build !windows
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import (
"os"
"syscall"
"unsafe"
"gx/ipfs/QmVGjyM9i2msKvLXwh9VosCTgP4mL91kC7hDmqnwTTx6Hu/sys/unix"
)
// Mmap uses the mmap system call to memory-map a file. If writable is true,
// memory protection of the pages is set so that they may be written to as well.
func Mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
mtype := unix.PROT_READ
if writable {
mtype |= unix.PROT_WRITE
}
return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED)
}
// Munmap unmaps a previously mapped slice.
func Munmap(b []byte) error {
return unix.Munmap(b)
}
// Madvise uses the madvise system call to give advise about the use of memory
// when using a slice that is memory-mapped to a file. Set the readahead flag to
// false if page references are expected in random order.
func Madvise(b []byte, readahead bool) error {
flags := unix.MADV_NORMAL
if !readahead {
flags = unix.MADV_RANDOM
}
return madvise(b, flags)
}
// This is required because the unix package does not support the madvise system call on OS X.
func madvise(b []byte, advice int) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])),
uintptr(len(b)), uintptr(advice))
if e1 != 0 {
err = e1
}
return
}

90
vendor/github.com/dgraph-io/badger/y/mmap_windows.go generated vendored Normal file
View File

@ -0,0 +1,90 @@
// +build windows
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import (
"fmt"
"os"
"syscall"
"unsafe"
)
func Mmap(fd *os.File, write bool, size int64) ([]byte, error) {
protect := syscall.PAGE_READONLY
access := syscall.FILE_MAP_READ
if write {
protect = syscall.PAGE_READWRITE
access = syscall.FILE_MAP_WRITE
}
fi, err := fd.Stat()
if err != nil {
return nil, err
}
// Truncate the database to the size of the mmap.
if fi.Size() < size {
if err := fd.Truncate(size); err != nil {
return nil, fmt.Errorf("truncate: %s", err)
}
}
// Open a file mapping handle.
sizelo := uint32(size >> 32)
sizehi := uint32(size) & 0xffffffff
handler, err := syscall.CreateFileMapping(syscall.Handle(fd.Fd()), nil,
uint32(protect), sizelo, sizehi, nil)
if err != nil {
return nil, os.NewSyscallError("CreateFileMapping", err)
}
// Create the memory map.
addr, err := syscall.MapViewOfFile(handler, uint32(access), 0, 0, uintptr(size))
if addr == 0 {
return nil, os.NewSyscallError("MapViewOfFile", err)
}
// Close mapping handle.
if err := syscall.CloseHandle(syscall.Handle(handler)); err != nil {
return nil, os.NewSyscallError("CloseHandle", err)
}
// Slice memory layout
// Copied this snippet from golang/sys package
var sl = struct {
addr uintptr
len int
cap int
}{addr, int(size), int(size)}
// Use unsafe to turn sl into a []byte.
data := *(*[]byte)(unsafe.Pointer(&sl))
return data, nil
}
func Munmap(b []byte) error {
return syscall.UnmapViewOfFile(uintptr(unsafe.Pointer(&b[0])))
}
func Madvise(b []byte, readahead bool) error {
// Do Nothing. We dont care about this setting on Windows
return nil
}

233
vendor/github.com/dgraph-io/badger/y/watermark.go generated vendored Normal file
View File

@ -0,0 +1,233 @@
/*
* Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import (
"container/heap"
"context"
"sync/atomic"
"gx/ipfs/QmRvYNctevGUW52urgmoFZscT6buMKqhHezLUS64WepGWn/go-net/trace"
)
type uint64Heap []uint64
func (u uint64Heap) Len() int { return len(u) }
func (u uint64Heap) Less(i int, j int) bool { return u[i] < u[j] }
func (u uint64Heap) Swap(i int, j int) { u[i], u[j] = u[j], u[i] }
func (u *uint64Heap) Push(x interface{}) { *u = append(*u, x.(uint64)) }
func (u *uint64Heap) Pop() interface{} {
old := *u
n := len(old)
x := old[n-1]
*u = old[0 : n-1]
return x
}
// mark contains one of more indices, along with a done boolean to indicate the
// status of the index: begin or done. It also contains waiters, who could be
// waiting for the watermark to reach >= a certain index.
type mark struct {
// Either this is an (index, waiter) pair or (index, done) or (indices, done).
index uint64
waiter chan struct{}
indices []uint64
done bool // Set to true if the index is done.
}
// WaterMark is used to keep track of the minimum un-finished index. Typically, an index k becomes
// finished or "done" according to a WaterMark once Done(k) has been called
// 1. as many times as Begin(k) has, AND
// 2. a positive number of times.
//
// An index may also become "done" by calling SetDoneUntil at a time such that it is not
// inter-mingled with Begin/Done calls.
//
// Since doneUntil and lastIndex addresses are passed to sync/atomic packages, we ensure that they
// are 64-bit aligned by putting them at the beginning of the structure.
type WaterMark struct {
doneUntil uint64
lastIndex uint64
Name string
markCh chan mark
elog trace.EventLog
}
// Init initializes a WaterMark struct. MUST be called before using it.
func (w *WaterMark) Init(closer *Closer) {
w.markCh = make(chan mark, 100)
w.elog = trace.NewEventLog("Watermark", w.Name)
go w.process(closer)
}
// Begin sets the last index to the given value.
func (w *WaterMark) Begin(index uint64) {
atomic.StoreUint64(&w.lastIndex, index)
w.markCh <- mark{index: index, done: false}
}
// BeginMany works like Begin but accepts multiple indices.
func (w *WaterMark) BeginMany(indices []uint64) {
atomic.StoreUint64(&w.lastIndex, indices[len(indices)-1])
w.markCh <- mark{index: 0, indices: indices, done: false}
}
// Done sets a single index as done.
func (w *WaterMark) Done(index uint64) {
w.markCh <- mark{index: index, done: true}
}
// DoneMany works like Done but accepts multiple indices.
func (w *WaterMark) DoneMany(indices []uint64) {
w.markCh <- mark{index: 0, indices: indices, done: true}
}
// DoneUntil returns the maximum index that has the property that all indices
// less than or equal to it are done.
func (w *WaterMark) DoneUntil() uint64 {
return atomic.LoadUint64(&w.doneUntil)
}
// SetDoneUntil sets the maximum index that has the property that all indices
// less than or equal to it are done.
func (w *WaterMark) SetDoneUntil(val uint64) {
atomic.StoreUint64(&w.doneUntil, val)
}
// LastIndex returns the last index for which Begin has been called.
func (w *WaterMark) LastIndex() uint64 {
return atomic.LoadUint64(&w.lastIndex)
}
// WaitForMark waits until the given index is marked as done.
func (w *WaterMark) WaitForMark(ctx context.Context, index uint64) error {
if w.DoneUntil() >= index {
return nil
}
waitCh := make(chan struct{})
w.markCh <- mark{index: index, waiter: waitCh}
select {
case <-ctx.Done():
return ctx.Err()
case <-waitCh:
return nil
}
}
// process is used to process the Mark channel. This is not thread-safe,
// so only run one goroutine for process. One is sufficient, because
// all goroutine ops use purely memory and cpu.
// Each index has to emit atleast one begin watermark in serial order otherwise waiters
// can get blocked idefinitely. Example: We had an watermark at 100 and a waiter at 101,
// if no watermark is emitted at index 101 then waiter would get stuck indefinitely as it
// can't decide whether the task at 101 has decided not to emit watermark or it didn't get
// scheduled yet.
func (w *WaterMark) process(closer *Closer) {
defer closer.Done()
var indices uint64Heap
// pending maps raft proposal index to the number of pending mutations for this proposal.
pending := make(map[uint64]int)
waiters := make(map[uint64][]chan struct{})
heap.Init(&indices)
var loop uint64
processOne := func(index uint64, done bool) {
// If not already done, then set. Otherwise, don't undo a done entry.
prev, present := pending[index]
if !present {
heap.Push(&indices, index)
}
delta := 1
if done {
delta = -1
}
pending[index] = prev + delta
loop++
if len(indices) > 0 && loop%10000 == 0 {
min := indices[0]
w.elog.Printf("WaterMark %s: Done entry %4d. Size: %4d Watermark: %-4d Looking for: %-4d. Value: %d\n",
w.Name, index, len(indices), w.DoneUntil(), min, pending[min])
}
// Update mark by going through all indices in order; and checking if they have
// been done. Stop at the first index, which isn't done.
doneUntil := w.DoneUntil()
if doneUntil > index {
AssertTruef(false, "Name: %s doneUntil: %d. Index: %d", w.Name, doneUntil, index)
}
until := doneUntil
loops := 0
for len(indices) > 0 {
min := indices[0]
if done := pending[min]; done > 0 {
break // len(indices) will be > 0.
}
// Even if done is called multiple times causing it to become
// negative, we should still pop the index.
heap.Pop(&indices)
delete(pending, min)
until = min
loops++
}
for i := doneUntil + 1; i <= until; i++ {
toNotify := waiters[i]
for _, ch := range toNotify {
close(ch)
}
delete(waiters, i) // Release the memory back.
}
if until != doneUntil {
AssertTrue(atomic.CompareAndSwapUint64(&w.doneUntil, doneUntil, until))
w.elog.Printf("%s: Done until %d. Loops: %d\n", w.Name, until, loops)
}
}
for {
select {
case <-closer.HasBeenClosed():
return
case mark := <-w.markCh:
if mark.waiter != nil {
doneUntil := atomic.LoadUint64(&w.doneUntil)
if doneUntil >= mark.index {
close(mark.waiter)
} else {
ws, ok := waiters[mark.index]
if !ok {
waiters[mark.index] = []chan struct{}{mark.waiter}
} else {
waiters[mark.index] = append(ws, mark.waiter)
}
}
} else {
if mark.index > 0 {
processOne(mark.index, mark.done)
}
for _, index := range mark.indices {
processOne(index, mark.done)
}
}
}
}
}

286
vendor/github.com/dgraph-io/badger/y/y.go generated vendored Normal file
View File

@ -0,0 +1,286 @@
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import (
"bytes"
"encoding/binary"
"fmt"
"hash/crc32"
"math"
"os"
"sync"
"time"
"gx/ipfs/QmVmDhyTTUcQXFD1rRQ64fGLMSAoaQvNH3hwuaCFAPq2hy/errors"
)
// ErrEOF indicates an end of file when trying to read from a memory mapped file
// and encountering the end of slice.
var ErrEOF = errors.New("End of mapped region")
const (
// Sync indicates that O_DSYNC should be set on the underlying file,
// ensuring that data writes do not return until the data is flushed
// to disk.
Sync = 1 << iota
// ReadOnly opens the underlying file on a read-only basis.
ReadOnly
)
var (
// This is O_DSYNC (datasync) on platforms that support it -- see file_unix.go
datasyncFileFlag = 0x0
// CastagnoliCrcTable is a CRC32 polynomial table
CastagnoliCrcTable = crc32.MakeTable(crc32.Castagnoli)
)
// OpenExistingFile opens an existing file, errors if it doesn't exist.
func OpenExistingFile(filename string, flags uint32) (*os.File, error) {
openFlags := os.O_RDWR
if flags&ReadOnly != 0 {
openFlags = os.O_RDONLY
}
if flags&Sync != 0 {
openFlags |= datasyncFileFlag
}
return os.OpenFile(filename, openFlags, 0)
}
// CreateSyncedFile creates a new file (using O_EXCL), errors if it already existed.
func CreateSyncedFile(filename string, sync bool) (*os.File, error) {
flags := os.O_RDWR | os.O_CREATE | os.O_EXCL
if sync {
flags |= datasyncFileFlag
}
return os.OpenFile(filename, flags, 0666)
}
// OpenSyncedFile creates the file if one doesn't exist.
func OpenSyncedFile(filename string, sync bool) (*os.File, error) {
flags := os.O_RDWR | os.O_CREATE
if sync {
flags |= datasyncFileFlag
}
return os.OpenFile(filename, flags, 0666)
}
// OpenTruncFile opens the file with O_RDWR | O_CREATE | O_TRUNC
func OpenTruncFile(filename string, sync bool) (*os.File, error) {
flags := os.O_RDWR | os.O_CREATE | os.O_TRUNC
if sync {
flags |= datasyncFileFlag
}
return os.OpenFile(filename, flags, 0666)
}
// SafeCopy does append(a[:0], src...).
func SafeCopy(a []byte, src []byte) []byte {
return append(a[:0], src...)
}
// Copy copies a byte slice and returns the copied slice.
func Copy(a []byte) []byte {
b := make([]byte, len(a))
copy(b, a)
return b
}
// KeyWithTs generates a new key by appending ts to key.
func KeyWithTs(key []byte, ts uint64) []byte {
out := make([]byte, len(key)+8)
copy(out, key)
binary.BigEndian.PutUint64(out[len(key):], math.MaxUint64-ts)
return out
}
// ParseTs parses the timestamp from the key bytes.
func ParseTs(key []byte) uint64 {
if len(key) <= 8 {
return 0
}
return math.MaxUint64 - binary.BigEndian.Uint64(key[len(key)-8:])
}
// CompareKeys checks the key without timestamp and checks the timestamp if keyNoTs
// is same.
// a<timestamp> would be sorted higher than aa<timestamp> if we use bytes.compare
// All keys should have timestamp.
func CompareKeys(key1 []byte, key2 []byte) int {
AssertTrue(len(key1) > 8 && len(key2) > 8)
if cmp := bytes.Compare(key1[:len(key1)-8], key2[:len(key2)-8]); cmp != 0 {
return cmp
}
return bytes.Compare(key1[len(key1)-8:], key2[len(key2)-8:])
}
// ParseKey parses the actual key from the key bytes.
func ParseKey(key []byte) []byte {
if key == nil {
return nil
}
AssertTrue(len(key) > 8)
return key[:len(key)-8]
}
// SameKey checks for key equality ignoring the version timestamp suffix.
func SameKey(src, dst []byte) bool {
if len(src) != len(dst) {
return false
}
return bytes.Equal(ParseKey(src), ParseKey(dst))
}
// Slice holds a reusable buf, will reallocate if you request a larger size than ever before.
// One problem is with n distinct sizes in random order it'll reallocate log(n) times.
type Slice struct {
buf []byte
}
// Resize reuses the Slice's buffer (or makes a new one) and returns a slice in that buffer of
// length sz.
func (s *Slice) Resize(sz int) []byte {
if cap(s.buf) < sz {
s.buf = make([]byte, sz)
}
return s.buf[0:sz]
}
// FixedDuration returns a string representation of the given duration with the
// hours, minutes, and seconds.
func FixedDuration(d time.Duration) string {
str := fmt.Sprintf("%02ds", int(d.Seconds())%60)
if d >= time.Minute {
str = fmt.Sprintf("%02dm", int(d.Minutes())%60) + str
}
if d >= time.Hour {
str = fmt.Sprintf("%02dh", int(d.Hours())) + str
}
return str
}
// Closer holds the two things we need to close a goroutine and wait for it to finish: a chan
// to tell the goroutine to shut down, and a WaitGroup with which to wait for it to finish shutting
// down.
type Closer struct {
closed chan struct{}
waiting sync.WaitGroup
}
// NewCloser constructs a new Closer, with an initial count on the WaitGroup.
func NewCloser(initial int) *Closer {
ret := &Closer{closed: make(chan struct{})}
ret.waiting.Add(initial)
return ret
}
// AddRunning Add()'s delta to the WaitGroup.
func (lc *Closer) AddRunning(delta int) {
lc.waiting.Add(delta)
}
// Signal signals the HasBeenClosed signal.
func (lc *Closer) Signal() {
close(lc.closed)
}
// HasBeenClosed gets signaled when Signal() is called.
func (lc *Closer) HasBeenClosed() <-chan struct{} {
return lc.closed
}
// Done calls Done() on the WaitGroup.
func (lc *Closer) Done() {
lc.waiting.Done()
}
// Wait waits on the WaitGroup. (It waits for NewCloser's initial value, AddRunning, and Done
// calls to balance out.)
func (lc *Closer) Wait() {
lc.waiting.Wait()
}
// SignalAndWait calls Signal(), then Wait().
func (lc *Closer) SignalAndWait() {
lc.Signal()
lc.Wait()
}
// Throttle allows a limited number of workers to run at a time. It also
// provides a mechanism to check for errors encountered by workers and wait for
// them to finish.
type Throttle struct {
wg sync.WaitGroup
ch chan struct{}
errCh chan error
}
// NewThrottle creates a new throttle with a max number of workers.
func NewThrottle(max int) *Throttle {
return &Throttle{
ch: make(chan struct{}, max),
errCh: make(chan error, max),
}
}
// Do should be called by workers before they start working. It blocks if there
// are already maximum number of workers working. If it detects an error from
// previously Done workers, it would return it.
func (t *Throttle) Do() error {
for {
select {
case t.ch <- struct{}{}:
t.wg.Add(1)
return nil
case err := <-t.errCh:
if err != nil {
return err
}
}
}
}
// Done should be called by workers when they finish working. They can also
// pass the error status of work done.
func (t *Throttle) Done(err error) {
if err != nil {
t.errCh <- err
}
select {
case <-t.ch:
default:
panic("Throttle Do Done mismatch")
}
t.wg.Done()
}
// Finish waits until all workers have finished working. It would return any
// error passed by Done.
func (t *Throttle) Finish() error {
t.wg.Wait()
close(t.ch)
close(t.errCh)
for err := range t.errCh {
if err != nil {
return err
}
}
return nil
}

24
vendor/github.com/dgryski/go-farm/.gitignore generated vendored Normal file
View File

@ -0,0 +1,24 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
*.exe
*.test
*.prof
target

39
vendor/github.com/dgryski/go-farm/.travis.yml generated vendored Normal file
View File

@ -0,0 +1,39 @@
language: go
sudo: false
branches:
except:
- release
branches:
only:
- master
- develop
- travis
go:
- 1.11.x
- 1.12.x
- tip
matrix:
allow_failures:
- go: tip
before_install:
- if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi;
- if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi;
- go get github.com/mattn/goveralls
before_script:
- make deps
script:
- make qa
after_failure:
- cat ./target/test/report.xml
after_success:
- if [ "$TRAVIS_GO_VERSION" = "1.9" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi;

23
vendor/github.com/dgryski/go-farm/LICENSE generated vendored Normal file
View File

@ -0,0 +1,23 @@
As this is a highly derivative work, I have placed it under the same license as the original implementation:
Copyright (c) 2014-2017 Damian Gryski
Copyright (c) 2016-2017 Nicola Asuni - Tecnick.com
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

203
vendor/github.com/dgryski/go-farm/Makefile generated vendored Normal file
View File

@ -0,0 +1,203 @@
# MAKEFILE
#
# @author Nicola Asuni <info@tecnick.com>
# @link https://github.com/dgryski/go-farm
#
# This file is intended to be executed in a Linux-compatible system.
# It also assumes that the project has been cloned in the right path under GOPATH:
# $GOPATH/src/github.com/dgryski/go-farm
#
# ------------------------------------------------------------------------------
# List special make targets that are not associated with files
.PHONY: help all test format fmtcheck vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan qa deps clean nuke
# Use bash as shell (Note: Ubuntu now uses dash which doesn't support PIPESTATUS).
SHELL=/bin/bash
# CVS path (path to the parent dir containing the project)
CVSPATH=github.com/dgryski
# Project owner
OWNER=dgryski
# Project vendor
VENDOR=dgryski
# Project name
PROJECT=go-farm
# Project version
VERSION=$(shell cat VERSION)
# Name of RPM or DEB package
PKGNAME=${VENDOR}-${PROJECT}
# Current directory
CURRENTDIR=$(shell pwd)
# GO lang path
ifneq ($(GOPATH),)
ifeq ($(findstring $(GOPATH),$(CURRENTDIR)),)
# the defined GOPATH is not valid
GOPATH=
endif
endif
ifeq ($(GOPATH),)
# extract the GOPATH
GOPATH=$(firstword $(subst /src/, ,$(CURRENTDIR)))
endif
# --- MAKE TARGETS ---
# Display general help about this command
help:
@echo ""
@echo "$(PROJECT) Makefile."
@echo "GOPATH=$(GOPATH)"
@echo "The following commands are available:"
@echo ""
@echo " make qa : Run all the tests"
@echo " make test : Run the unit tests"
@echo ""
@echo " make format : Format the source code"
@echo " make fmtcheck : Check if the source code has been formatted"
@echo " make vet : Check for suspicious constructs"
@echo " make lint : Check for style errors"
@echo " make coverage : Generate the coverage report"
@echo " make cyclo : Generate the cyclomatic complexity report"
@echo " make ineffassign : Detect ineffectual assignments"
@echo " make misspell : Detect commonly misspelled words in source files"
@echo " make structcheck : Find unused struct fields"
@echo " make varcheck : Find unused global variables and constants"
@echo " make errcheck : Check that error return values are used"
@echo " make gosimple : Suggest code simplifications"
@echo " make astscan : GO AST scanner"
@echo ""
@echo " make docs : Generate source code documentation"
@echo ""
@echo " make deps : Get the dependencies"
@echo " make clean : Remove any build artifact"
@echo " make nuke : Deletes any intermediate file"
@echo ""
# Alias for help target
all: help
# Run the unit tests
test:
@mkdir -p target/test
@mkdir -p target/report
GOPATH=$(GOPATH) \
go test \
-covermode=atomic \
-bench=. \
-race \
-cpuprofile=target/report/cpu.out \
-memprofile=target/report/mem.out \
-mutexprofile=target/report/mutex.out \
-coverprofile=target/report/coverage.out \
-v ./... | \
tee >(PATH=$(GOPATH)/bin:$(PATH) go-junit-report > target/test/report.xml); \
test $${PIPESTATUS[0]} -eq 0
# Format the source code
format:
@find . -type f -name "*.go" -exec gofmt -s -w {} \;
# Check if the source code has been formatted
fmtcheck:
@mkdir -p target
@find . -type f -name "*.go" -exec gofmt -s -d {} \; | tee target/format.diff
@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
# Check for syntax errors
vet:
GOPATH=$(GOPATH) go vet .
# Check for style errors
lint:
GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint .
# Generate the coverage report
coverage:
@mkdir -p target/report
GOPATH=$(GOPATH) \
go tool cover -html=target/report/coverage.out -o target/report/coverage.html
# Report cyclomatic complexity
cyclo:
@mkdir -p target/report
GOPATH=$(GOPATH) gocyclo -avg ./ | tee target/report/cyclo.txt ; test $${PIPESTATUS[0]} -eq 0
# Detect ineffectual assignments
ineffassign:
@mkdir -p target/report
GOPATH=$(GOPATH) ineffassign ./ | tee target/report/ineffassign.txt ; test $${PIPESTATUS[0]} -eq 0
# Detect commonly misspelled words in source files
misspell:
@mkdir -p target/report
GOPATH=$(GOPATH) misspell -error ./ | tee target/report/misspell.txt ; test $${PIPESTATUS[0]} -eq 0
# Find unused struct fields
structcheck:
@mkdir -p target/report
GOPATH=$(GOPATH) structcheck -a ./ | tee target/report/structcheck.txt
# Find unused global variables and constants
varcheck:
@mkdir -p target/report
GOPATH=$(GOPATH) varcheck -e ./ | tee target/report/varcheck.txt
# Check that error return values are used
errcheck:
@mkdir -p target/report
GOPATH=$(GOPATH) errcheck ./ | tee target/report/errcheck.txt
# Suggest code simplifications
gosimple:
@mkdir -p target/report
GOPATH=$(GOPATH) gosimple ./ | tee target/report/gosimple.txt
# AST scanner
astscan:
@mkdir -p target/report
GOPATH=$(GOPATH) gas .//*.go | tee target/report/astscan.txt
# Generate source docs
docs:
@mkdir -p target/docs
nohup sh -c 'GOPATH=$(GOPATH) godoc -http=127.0.0.1:6060' > target/godoc_server.log 2>&1 &
wget --directory-prefix=target/docs/ --execute robots=off --retry-connrefused --recursive --no-parent --adjust-extension --page-requisites --convert-links http://127.0.0.1:6060/pkg/github.com/${VENDOR}/${PROJECT}/ ; kill -9 `lsof -ti :6060`
@echo '<html><head><meta http-equiv="refresh" content="0;./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html"/></head><a href="./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html">'${PKGNAME}' Documentation ...</a></html>' > target/docs/index.html
# Alias to run all quality-assurance checks
qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan
# --- INSTALL ---
# Get the dependencies
deps:
GOPATH=$(GOPATH) go get ./...
GOPATH=$(GOPATH) go get golang.org/x/lint/golint
GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report
GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov
GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo
GOPATH=$(GOPATH) go get github.com/gordonklaus/ineffassign
GOPATH=$(GOPATH) go get github.com/client9/misspell/cmd/misspell
GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/structcheck
GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck
GOPATH=$(GOPATH) go get github.com/kisielk/errcheck
GOPATH=$(GOPATH) go get honnef.co/go/tools/cmd/gosimple
GOPATH=$(GOPATH) go get github.com/GoASTScanner/gas
# Remove any build artifact
clean:
GOPATH=$(GOPATH) go clean ./...
# Deletes any intermediate file
nuke:
rm -rf ./target
GOPATH=$(GOPATH) go clean -i ./...

41
vendor/github.com/dgryski/go-farm/README.md generated vendored Normal file
View File

@ -0,0 +1,41 @@
# go-farm
*Google's FarmHash hash functions implemented in Go*
[![Master Branch](https://img.shields.io/badge/-master:-gray.svg)](https://github.com/dgryski/go-farm/tree/master)
[![Master Build Status](https://secure.travis-ci.org/dgryski/go-farm.png?branch=master)](https://travis-ci.org/dgryski/go-farm?branch=master)
[![Master Coverage Status](https://coveralls.io/repos/dgryski/go-farm/badge.svg?branch=master&service=github)](https://coveralls.io/github/dgryski/go-farm?branch=master)
[![Go Report Card](https://goreportcard.com/badge/github.com/dgryski/go-farm)](https://goreportcard.com/report/github.com/dgryski/go-farm)
[![GoDoc](https://godoc.org/github.com/dgryski/go-farm?status.svg)](http://godoc.org/github.com/dgryski/go-farm)
## Description
FarmHash, a family of hash functions.
This is a (mechanical) translation of the non-SSE4/non-AESNI hash functions from Google's FarmHash (https://github.com/google/farmhash).
FarmHash provides hash functions for strings and other data.
The functions mix the input bits thoroughly but are not suitable for cryptography.
All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others.
For more information please consult https://github.com/google/farmhash
## Getting started
This application is written in Go language, please refer to the guides in https://golang.org for getting started.
This project include a Makefile that allows you to test and build the project with simple commands.
To see all available options:
```bash
make help
```
## Running all tests
Before committing the code, please check if it passes all tests using
```bash
make qa
```

1
vendor/github.com/dgryski/go-farm/VERSION generated vendored Normal file
View File

@ -0,0 +1 @@
2.0.1

898
vendor/github.com/dgryski/go-farm/asm.go generated vendored Normal file
View File

@ -0,0 +1,898 @@
// +build ignore
package main
import (
"flag"
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
const k0 uint64 = 0xc3a5c85c97cb3127
const k1 uint64 = 0xb492b66fbe98f273
const k2 uint64 = 0x9ae16a3b2f90404f
const c1 uint32 = 0xcc9e2d51
const c2 uint32 = 0x1b873593
func shiftMix(val GPVirtual) GPVirtual {
r := GP64()
MOVQ(val, r)
SHRQ(Imm(47), r)
XORQ(val, r)
return r
}
func shiftMix64(val uint64) uint64 {
return val ^ (val >> 47)
}
func hashLen16MulLine(a, b, c, d, k, mul GPVirtual) GPVirtual {
tmpa := GP64()
MOVQ(a, tmpa)
ADDQ(b, tmpa)
RORQ(Imm(43), tmpa)
ADDQ(d, tmpa)
tmpc := GP64()
MOVQ(c, tmpc)
RORQ(Imm(30), tmpc)
ADDQ(tmpc, tmpa)
ADDQ(c, a)
ADDQ(k, b)
RORQ(Imm(18), b)
ADDQ(b, a)
r := hashLen16Mul(tmpa, a, mul)
return r
}
func hashLen16Mul(u, v, mul GPVirtual) GPVirtual {
XORQ(v, u)
IMULQ(mul, u)
a := shiftMix(u)
XORQ(a, v)
IMULQ(mul, v)
b := shiftMix(v)
IMULQ(mul, b)
return b
}
func hashLen0to16(sbase, slen GPVirtual) {
CMPQ(slen, Imm(8))
JL(LabelRef("check4"))
{
a := GP64()
MOVQ(Mem{Base: sbase}, a)
b := GP64()
t := GP64()
MOVQ(slen, t)
SUBQ(Imm(8), t)
ADDQ(sbase, t)
MOVQ(Mem{Base: t}, b)
rk2 := GP64()
MOVQ(Imm(k2), rk2)
ADDQ(rk2, a)
mul := slen
SHLQ(Imm(1), mul)
ADDQ(rk2, mul)
c := GP64()
MOVQ(b, c)
RORQ(Imm(37), c)
IMULQ(mul, c)
ADDQ(a, c)
d := GP64()
MOVQ(a, d)
RORQ(Imm(25), d)
ADDQ(b, d)
IMULQ(mul, d)
r := hashLen16Mul(c, d, mul)
Store(r, ReturnIndex(0))
RET()
}
Label("check4")
CMPQ(slen, Imm(4))
JL(LabelRef("check0"))
{
rk2 := GP64()
MOVQ(Imm(k2), rk2)
mul := GP64()
MOVQ(slen, mul)
SHLQ(Imm(1), mul)
ADDQ(rk2, mul)
a := GP64()
MOVL(Mem{Base: sbase}, a.As32())
SHLQ(Imm(3), a)
ADDQ(slen, a)
b := GP64()
SUBQ(Imm(4), slen)
ADDQ(slen, sbase)
MOVL(Mem{Base: sbase}, b.As32())
r := hashLen16Mul(a, b, mul)
Store(r, ReturnIndex(0))
RET()
}
Label("check0")
TESTQ(slen, slen)
JZ(LabelRef("empty"))
{
a := GP64()
MOVBQZX(Mem{Base: sbase}, a)
base := GP64()
MOVQ(slen, base)
SHRQ(Imm(1), base)
b := GP64()
ADDQ(sbase, base)
MOVBQZX(Mem{Base: base}, b)
MOVQ(slen, base)
SUBQ(Imm(1), base)
c := GP64()
ADDQ(sbase, base)
MOVBQZX(Mem{Base: base}, c)
SHLQ(Imm(8), b)
ADDQ(b, a)
y := a
SHLQ(Imm(2), c)
ADDQ(c, slen)
z := slen
rk0 := GP64()
MOVQ(Imm(k0), rk0)
IMULQ(rk0, z)
rk2 := GP64()
MOVQ(Imm(k2), rk2)
IMULQ(rk2, y)
XORQ(y, z)
r := shiftMix(z)
IMULQ(rk2, r)
Store(r, ReturnIndex(0))
RET()
}
Label("empty")
ret := GP64()
MOVQ(Imm(k2), ret)
Store(ret, ReturnIndex(0))
RET()
}
func hashLen17to32(sbase, slen GPVirtual) {
mul := GP64()
MOVQ(slen, mul)
SHLQ(Imm(1), mul)
rk2 := GP64()
MOVQ(Imm(k2), rk2)
ADDQ(rk2, mul)
a := GP64()
MOVQ(Mem{Base: sbase}, a)
rk1 := GP64()
MOVQ(Imm(k1), rk1)
IMULQ(rk1, a)
b := GP64()
MOVQ(Mem{Base: sbase, Disp: 8}, b)
base := GP64()
MOVQ(slen, base)
SUBQ(Imm(16), base)
ADDQ(sbase, base)
c := GP64()
MOVQ(Mem{Base: base, Disp: 8}, c)
IMULQ(mul, c)
d := GP64()
MOVQ(Mem{Base: base}, d)
IMULQ(rk2, d)
r := hashLen16MulLine(a, b, c, d, rk2, mul)
Store(r, ReturnIndex(0))
RET()
}
// Return an 8-byte hash for 33 to 64 bytes.
func hashLen33to64(sbase, slen GPVirtual) {
mul := GP64()
MOVQ(slen, mul)
SHLQ(Imm(1), mul)
rk2 := GP64()
MOVQ(Imm(k2), rk2)
ADDQ(rk2, mul)
a := GP64()
MOVQ(Mem{Base: sbase}, a)
IMULQ(rk2, a)
b := GP64()
MOVQ(Mem{Base: sbase, Disp: 8}, b)
base := GP64()
MOVQ(slen, base)
SUBQ(Imm(16), base)
ADDQ(sbase, base)
c := GP64()
MOVQ(Mem{Base: base, Disp: 8}, c)
IMULQ(mul, c)
d := GP64()
MOVQ(Mem{Base: base}, d)
IMULQ(rk2, d)
y := GP64()
MOVQ(a, y)
ADDQ(b, y)
RORQ(Imm(43), y)
ADDQ(d, y)
tmpc := GP64()
MOVQ(c, tmpc)
RORQ(Imm(30), tmpc)
ADDQ(tmpc, y)
ADDQ(a, c)
ADDQ(rk2, b)
RORQ(Imm(18), b)
ADDQ(b, c)
tmpy := GP64()
MOVQ(y, tmpy)
z := hashLen16Mul(tmpy, c, mul)
e := GP64()
MOVQ(Mem{Base: sbase, Disp: 16}, e)
IMULQ(mul, e)
f := GP64()
MOVQ(Mem{Base: sbase, Disp: 24}, f)
base = GP64()
MOVQ(slen, base)
SUBQ(Imm(32), base)
ADDQ(sbase, base)
g := GP64()
MOVQ(Mem{Base: base}, g)
ADDQ(y, g)
IMULQ(mul, g)
h := GP64()
MOVQ(Mem{Base: base, Disp: 8}, h)
ADDQ(z, h)
IMULQ(mul, h)
r := hashLen16MulLine(e, f, g, h, a, mul)
Store(r, ReturnIndex(0))
RET()
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
func weakHashLen32WithSeeds(sbase GPVirtual, disp int, a, b GPVirtual) {
w := Mem{Base: sbase, Disp: disp + 0}
x := Mem{Base: sbase, Disp: disp + 8}
y := Mem{Base: sbase, Disp: disp + 16}
z := Mem{Base: sbase, Disp: disp + 24}
// a += w
ADDQ(w, a)
// b = bits.RotateLeft64(b+a+z, -21)
ADDQ(a, b)
ADDQ(z, b)
RORQ(Imm(21), b)
// c := a
c := GP64()
MOVQ(a, c)
// a += x
// a += y
ADDQ(x, a)
ADDQ(y, a)
// b += bits.RotateLeft64(a, -44)
atmp := GP64()
MOVQ(a, atmp)
RORQ(Imm(44), atmp)
ADDQ(atmp, b)
// a += z
// b += c
ADDQ(z, a)
ADDQ(c, b)
XCHGQ(a, b)
}
func hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase GPVirtual, mul1 GPVirtual, mul2 uint64) {
ADDQ(y, x)
ADDQ(vlo, x)
ADDQ(Mem{Base: sbase, Disp: 8}, x)
RORQ(Imm(37), x)
IMULQ(mul1, x)
ADDQ(vhi, y)
ADDQ(Mem{Base: sbase, Disp: 48}, y)
RORQ(Imm(42), y)
IMULQ(mul1, y)
if mul2 != 1 {
t := GP64()
MOVQ(U32(mul2), t)
IMULQ(whi, t)
XORQ(t, x)
} else {
XORQ(whi, x)
}
if mul2 != 1 {
t := GP64()
MOVQ(U32(mul2), t)
IMULQ(vlo, t)
ADDQ(t, y)
} else {
ADDQ(vlo, y)
}
ADDQ(Mem{Base: sbase, Disp: 40}, y)
ADDQ(wlo, z)
RORQ(Imm(33), z)
IMULQ(mul1, z)
{
IMULQ(mul1, vhi)
MOVQ(x, vlo)
ADDQ(wlo, vlo)
weakHashLen32WithSeeds(sbase, 0, vhi, vlo)
}
{
ADDQ(z, whi)
MOVQ(y, wlo)
ADDQ(Mem{Base: sbase, Disp: 16}, wlo)
weakHashLen32WithSeeds(sbase, 32, whi, wlo)
}
XCHGQ(z, x)
}
func fp64() {
TEXT("Fingerprint64", NOSPLIT, "func(s []byte) uint64")
slen := GP64()
sbase := GP64()
Load(Param("s").Base(), sbase)
Load(Param("s").Len(), slen)
CMPQ(slen, Imm(16))
JG(LabelRef("check32"))
hashLen0to16(sbase, slen)
Label("check32")
CMPQ(slen, Imm(32))
JG(LabelRef("check64"))
hashLen17to32(sbase, slen)
Label("check64")
CMPQ(slen, Imm(64))
JG(LabelRef("long"))
hashLen33to64(sbase, slen)
Label("long")
seed := uint64(81)
vlo, vhi, wlo, whi := GP64(), GP64(), GP64(), GP64()
XORQ(vlo, vlo)
XORQ(vhi, vhi)
XORQ(wlo, wlo)
XORQ(whi, whi)
x := GP64()
eightOne := uint64(81)
MOVQ(Imm(eightOne*k2), x)
ADDQ(Mem{Base: sbase}, x)
y := GP64()
y64 := uint64(seed*k1) + 113
MOVQ(Imm(y64), y)
z := GP64()
MOVQ(Imm(shiftMix64(y64*k2+113)*k2), z)
endIdx := GP64()
MOVQ(slen, endIdx)
tmp := GP64()
SUBQ(Imm(1), endIdx)
MOVQ(U64(^uint64(63)), tmp)
ANDQ(tmp, endIdx)
last64Idx := GP64()
MOVQ(slen, last64Idx)
SUBQ(Imm(1), last64Idx)
ANDQ(Imm(63), last64Idx)
SUBQ(Imm(63), last64Idx)
ADDQ(endIdx, last64Idx)
last64 := GP64()
MOVQ(last64Idx, last64)
ADDQ(sbase, last64)
end := GP64()
MOVQ(slen, end)
Label("loop")
rk1 := GP64()
MOVQ(Imm(k1), rk1)
hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase, rk1, 1)
ADDQ(Imm(64), sbase)
SUBQ(Imm(64), end)
CMPQ(end, Imm(64))
JG(LabelRef("loop"))
MOVQ(last64, sbase)
mul := GP64()
MOVQ(z, mul)
ANDQ(Imm(0xff), mul)
SHLQ(Imm(1), mul)
ADDQ(rk1, mul)
MOVQ(last64, sbase)
SUBQ(Imm(1), slen)
ANDQ(Imm(63), slen)
ADDQ(slen, wlo)
ADDQ(wlo, vlo)
ADDQ(vlo, wlo)
hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase, mul, 9)
{
a := hashLen16Mul(vlo, wlo, mul)
ADDQ(z, a)
b := shiftMix(y)
rk0 := GP64()
MOVQ(Imm(k0), rk0)
IMULQ(rk0, b)
ADDQ(b, a)
c := hashLen16Mul(vhi, whi, mul)
ADDQ(x, c)
r := hashLen16Mul(a, c, mul)
Store(r, ReturnIndex(0))
}
RET()
}
func fmix(h GPVirtual) GPVirtual {
h2 := GP32()
MOVL(h, h2)
SHRL(Imm(16), h2)
XORL(h2, h)
MOVL(Imm(0x85ebca6b), h2)
IMULL(h2, h)
MOVL(h, h2)
SHRL(Imm(13), h2)
XORL(h2, h)
MOVL(Imm(0xc2b2ae35), h2)
IMULL(h2, h)
MOVL(h, h2)
SHRL(Imm(16), h2)
XORL(h2, h)
return h
}
func mur(a, h GPVirtual) GPVirtual {
imul3l(c1, a, a)
RORL(Imm(17), a)
imul3l(c2, a, a)
XORL(a, h)
RORL(Imm(19), h)
LEAL(Mem{Base: h, Index: h, Scale: 4}, a)
LEAL(Mem{Base: a, Disp: 0xe6546b64}, h)
return h
}
func hash32Len5to12(sbase, slen GPVirtual) {
a := GP32()
MOVL(slen.As32(), a)
b := GP32()
MOVL(a, b)
SHLL(Imm(2), b)
ADDL(a, b)
c := GP32()
MOVL(U32(9), c)
d := GP32()
MOVL(b, d)
ADDL(Mem{Base: sbase, Disp: 0}, a)
t := GP64()
MOVQ(slen, t)
SUBQ(Imm(4), t)
ADDQ(sbase, t)
ADDL(Mem{Base: t}, b)
MOVQ(slen, t)
SHRQ(Imm(1), t)
ANDQ(Imm(4), t)
ADDQ(sbase, t)
ADDL(Mem{Base: t}, c)
t = mur(a, d)
t = mur(b, t)
t = mur(c, t)
t = fmix(t)
Store(t, ReturnIndex(0))
RET()
}
func hash32Len13to24Seed(sbase, slen GPVirtual) {
slen2 := GP64()
MOVQ(slen, slen2)
SHRQ(Imm(1), slen2)
ADDQ(sbase, slen2)
a := GP32()
MOVL(Mem{Base: slen2, Disp: -4}, a)
b := GP32()
MOVL(Mem{Base: sbase, Disp: 4}, b)
send := GP64()
MOVQ(slen, send)
ADDQ(sbase, send)
c := GP32()
MOVL(Mem{Base: send, Disp: -8}, c)
d := GP32()
MOVL(Mem{Base: slen2}, d)
e := GP32()
MOVL(Mem{Base: sbase}, e)
f := GP32()
MOVL(Mem{Base: send, Disp: -4}, f)
h := GP32()
MOVL(U32(c1), h)
IMULL(d, h)
ADDL(slen.As32(), h)
RORL(Imm(12), a)
ADDL(f, a)
ctmp := GP32()
MOVL(c, ctmp)
h = mur(ctmp, h)
ADDL(a, h)
RORL(Imm(3), a)
ADDL(c, a)
h = mur(e, h)
ADDL(a, h)
ADDL(f, a)
RORL(Imm(12), a)
ADDL(d, a)
h = mur(b, h)
ADDL(a, h)
h = fmix(h)
Store(h, ReturnIndex(0))
RET()
}
func hash32Len0to4(sbase, slen GPVirtual) {
b := GP32()
c := GP32()
XORL(b, b)
MOVL(U32(9), c)
TESTQ(slen, slen)
JZ(LabelRef("done"))
l := GP64()
v := GP32()
MOVQ(slen, l)
c1reg := GP32()
MOVL(U32(c1), c1reg)
for i := 0; i < 4; i++ {
IMULL(c1reg, b)
MOVBLSX(Mem{Base: sbase, Disp: i}, v)
ADDL(v, b)
XORL(b, c)
SUBQ(Imm(1), l)
TESTQ(l, l)
JZ(LabelRef("done"))
}
Label("done")
s32 := GP32()
MOVL(slen.As32(), s32)
r := mur(s32, c)
r = mur(b, r)
r = fmix(r)
Store(r, ReturnIndex(0))
RET()
}
func fp32() {
TEXT("Fingerprint32", NOSPLIT, "func(s []byte) uint32")
sbase := GP64()
slen := GP64()
Load(Param("s").Base(), sbase)
Load(Param("s").Len(), slen)
CMPQ(slen, Imm(24))
JG(LabelRef("long"))
CMPQ(slen, Imm(12))
JG(LabelRef("hash_13_24"))
CMPQ(slen, Imm(4))
JG(LabelRef("hash_5_12"))
hash32Len0to4(sbase, slen)
Label("hash_5_12")
hash32Len5to12(sbase, slen)
Label("hash_13_24")
hash32Len13to24Seed(sbase, slen)
Label("long")
h := GP32()
MOVL(slen.As32(), h)
g := GP32()
MOVL(U32(c1), g)
IMULL(h, g)
f := GP32()
MOVL(g, f)
// len > 24
send := GP64()
MOVQ(slen, send)
ADDQ(sbase, send)
c1reg := GP32()
MOVL(U32(c1), c1reg)
c2reg := GP32()
MOVL(U32(c2), c2reg)
shuf := func(r GPVirtual, disp int) {
a := GP32()
MOVL(Mem{Base: send, Disp: disp}, a)
IMULL(c1reg, a)
RORL(Imm(17), a)
IMULL(c2reg, a)
XORL(a, r)
RORL(Imm(19), r)
MOVL(r, a)
SHLL(Imm(2), a)
ADDL(a, r)
ADDL(Imm(0xe6546b64), r)
}
shuf(h, -4)
shuf(g, -8)
shuf(h, -16)
shuf(g, -12)
PREFETCHT0(Mem{Base: sbase})
{
a := GP32()
MOVL(Mem{Base: send, Disp: -20}, a)
IMULL(c1reg, a)
RORL(Imm(17), a)
IMULL(c2reg, a)
ADDL(a, f)
RORL(Imm(19), f)
ADDL(Imm(113), f)
}
loop32Body := func(f, g, h, sbase, slen GPVirtual, disp int) {
a, b, c, d, e := GP32(), GP32(), GP32(), GP32(), GP32()
MOVL(Mem{Base: sbase, Disp: disp + 0}, a)
ADDL(a, h)
MOVL(Mem{Base: sbase, Disp: disp + 4}, b)
ADDL(b, g)
MOVL(Mem{Base: sbase, Disp: disp + 8}, c)
ADDL(c, f)
MOVL(Mem{Base: sbase, Disp: disp + 12}, d)
t := GP32()
MOVL(d, t)
h = mur(t, h)
MOVL(Mem{Base: sbase, Disp: disp + 16}, e)
ADDL(e, h)
MOVL(c, t)
g = mur(t, g)
ADDL(a, g)
imul3l(c1, e, t)
ADDL(b, t)
f = mur(t, f)
ADDL(d, f)
ADDL(g, f)
ADDL(f, g)
}
Label("loop80")
CMPQ(slen, Imm(80+20))
JL(LabelRef("loop20"))
{
PREFETCHT0(Mem{Base: sbase, Disp: 20})
loop32Body(f, g, h, sbase, slen, 0)
PREFETCHT0(Mem{Base: sbase, Disp: 40})
loop32Body(f, g, h, sbase, slen, 20)
PREFETCHT0(Mem{Base: sbase, Disp: 60})
loop32Body(f, g, h, sbase, slen, 40)
PREFETCHT0(Mem{Base: sbase, Disp: 80})
loop32Body(f, g, h, sbase, slen, 60)
ADDQ(Imm(80), sbase)
SUBQ(Imm(80), slen)
JMP(LabelRef("loop80"))
}
Label("loop20")
CMPQ(slen, Imm(20))
JLE(LabelRef("after"))
{
loop32Body(f, g, h, sbase, slen, 0)
ADDQ(Imm(20), sbase)
SUBQ(Imm(20), slen)
JMP(LabelRef("loop20"))
}
Label("after")
c1reg = GP32()
MOVL(U32(c1), c1reg)
RORL(Imm(11), g)
IMULL(c1reg, g)
RORL(Imm(17), g)
IMULL(c1reg, g)
RORL(Imm(11), f)
IMULL(c1reg, f)
RORL(Imm(17), f)
IMULL(c1reg, f)
ADDL(g, h)
RORL(Imm(19), h)
t := GP32()
MOVL(h, t)
SHLL(Imm(2), t)
ADDL(t, h)
ADDL(Imm(0xe6546b64), h)
RORL(Imm(17), h)
IMULL(c1reg, h)
ADDL(f, h)
RORL(Imm(19), h)
t = GP32()
MOVL(h, t)
SHLL(Imm(2), t)
ADDL(t, h)
ADDL(Imm(0xe6546b64), h)
RORL(Imm(17), h)
IMULL(c1reg, h)
Store(h, ReturnIndex(0))
RET()
}
var go111 = flag.Bool("go111", true, "use assembly instructions present in go1.11 and later")
func imul3l(m uint32, x, y Register) {
if *go111 {
IMUL3L(U32(m), x, y)
} else {
t := GP32()
MOVL(U32(m), t)
IMULL(t, x)
MOVL(x, y)
}
}
func main() {
flag.Parse()
ConstraintExpr("amd64,!purego")
fp64()
fp32()
Generate()
}

32
vendor/github.com/dgryski/go-farm/basics.go generated vendored Normal file
View File

@ -0,0 +1,32 @@
package farm
import "math/bits"
// Some primes between 2^63 and 2^64 for various uses.
const k0 uint64 = 0xc3a5c85c97cb3127
const k1 uint64 = 0xb492b66fbe98f273
const k2 uint64 = 0x9ae16a3b2f90404f
// Magic numbers for 32-bit hashing. Copied from Murmur3.
const c1 uint32 = 0xcc9e2d51
const c2 uint32 = 0x1b873593
// A 32-bit to 32-bit integer hash copied from Murmur3.
func fmix(h uint32) uint32 {
h ^= h >> 16
h *= 0x85ebca6b
h ^= h >> 13
h *= 0xc2b2ae35
h ^= h >> 16
return h
}
func mur(a, h uint32) uint32 {
// Helper from Murmur3 for combining two 32-bit values.
a *= c1
a = bits.RotateLeft32(a, -17)
a *= c2
h ^= a
h = bits.RotateLeft32(h, -19)
return h*5 + 0xe6546b64
}

Some files were not shown because too many files have changed in this diff Show More