Skip to content

Commit

Permalink
trie/pathdb: state iterator (snapshot integration pt 4) (#30654)
Browse files Browse the repository at this point in the history
In this pull request, the state iterator is implemented. It's mostly a copy-paste
from the original state snapshot package, but still has some important changes
to highlight here:

(a) The iterator for the disk layer consists of a diff iterator and a disk iterator.

Originally, the disk layer in the state snapshot was a wrapper around the disk, 
and its corresponding iterator was also a wrapper around the disk iterator.
However, due to structural differences, the disk layer iterator is divided into
two parts:

- The disk iterator, which traverses the content stored on disk.
- The diff iterator, which traverses the aggregated state buffer.

Checkout `BinaryIterator` and `FastIterator` for more details.

(b) The staleness management is improved in the diffAccountIterator and
diffStorageIterator

Originally, in the `diffAccountIterator`, the layer’s staleness had to be checked 
within the Next function to ensure the iterator remained usable. Additionally, 
a read lock on the associated diff layer was required to first retrieve the account 
blob. This read lock protection is essential to prevent concurrent map read/write. 
Afterward, a staleness check was performed to ensure the retrieved data was 
not outdated.

The entire logic can be simplified as follows: a loadAccount callback is provided 
to retrieve account data. If the corresponding state is immutable (e.g., diff layers
in the path database), the staleness check can be skipped, and a single account 
data retrieval is sufficient. However, if the corresponding state is mutable (e.g., 
the disk layer in the path database), the callback can operate as follows:

```go
func(hash common.Hash) ([]byte, error) {
    dl.lock.RLock()
    defer dl.lock.RUnlock()

    if dl.stale {
        return nil, errSnapshotStale
    }
    return dl.buffer.states.mustAccount(hash)
}
```

The callback solution can eliminate the complexity for managing
concurrency with the read lock for atomic operation.
  • Loading branch information
rjl493456442 authored Dec 16, 2024
1 parent f808d73 commit bc1ec69
Show file tree
Hide file tree
Showing 10 changed files with 2,634 additions and 63 deletions.
12 changes: 12 additions & 0 deletions triedb/pathdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -555,3 +555,15 @@ func (db *Database) StorageHistory(address common.Address, slot common.Hash, sta
func (db *Database) HistoryRange() (uint64, uint64, error) {
return historyRange(db.freezer)
}

// AccountIterator creates a new account iterator for the specified root hash and
// seeks to a starting account hash.
func (db *Database) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
return newFastAccountIterator(db, root, seek)
}

// StorageIterator creates a new storage iterator for the specified root hash and
// account. The iterator will be moved to the specific start position.
func (db *Database) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
return newFastStorageIterator(db, root, account, seek)
}
97 changes: 97 additions & 0 deletions triedb/pathdb/holdable_iterator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package pathdb

import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
)

// holdableIterator is a wrapper of underlying database iterator. It extends
// the basic iterator interface by adding Hold which can hold the element
// locally where the iterator is currently located and serve it up next time.
type holdableIterator struct {
it ethdb.Iterator
key []byte
val []byte
atHeld bool
}

// newHoldableIterator initializes the holdableIterator with the given iterator.
func newHoldableIterator(it ethdb.Iterator) *holdableIterator {
return &holdableIterator{it: it}
}

// Hold holds the element locally where the iterator is currently located which
// can be served up next time.
func (it *holdableIterator) Hold() {
if it.it.Key() == nil {
return // nothing to hold
}
it.key = common.CopyBytes(it.it.Key())
it.val = common.CopyBytes(it.it.Value())
it.atHeld = false
}

// Next moves the iterator to the next key/value pair. It returns whether the
// iterator is exhausted.
func (it *holdableIterator) Next() bool {
if !it.atHeld && it.key != nil {
it.atHeld = true
} else if it.atHeld {
it.atHeld = false
it.key = nil
it.val = nil
}
if it.key != nil {
return true // shifted to locally held value
}
return it.it.Next()
}

// Error returns any accumulated error. Exhausting all the key/value pairs
// is not considered to be an error.
func (it *holdableIterator) Error() error { return it.it.Error() }

// Release releases associated resources. Release should always succeed and can
// be called multiple times without causing error.
func (it *holdableIterator) Release() {
it.atHeld = false
it.key = nil
it.val = nil
it.it.Release()
}

// Key returns the key of the current key/value pair, or nil if done. The caller
// should not modify the contents of the returned slice, and its contents may
// change on the next call to Next.
func (it *holdableIterator) Key() []byte {
if it.key != nil {
return it.key
}
return it.it.Key()
}

// Value returns the value of the current key/value pair, or nil if done. The
// caller should not modify the contents of the returned slice, and its contents
// may change on the next call to Next.
func (it *holdableIterator) Value() []byte {
if it.val != nil {
return it.val
}
return it.it.Value()
}
176 changes: 176 additions & 0 deletions triedb/pathdb/holdable_iterator_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package pathdb

import (
"bytes"
"testing"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/ethdb/memorydb"
)

func TestIteratorHold(t *testing.T) {
// Create the key-value data store
var (
content = map[string]string{"k1": "v1", "k2": "v2", "k3": "v3"}
order = []string{"k1", "k2", "k3"}
db = rawdb.NewMemoryDatabase()
)
for key, val := range content {
if err := db.Put([]byte(key), []byte(val)); err != nil {
t.Fatalf("failed to insert item %s:%s into database: %v", key, val, err)
}
}
// Iterate over the database with the given configs and verify the results
it, idx := newHoldableIterator(db.NewIterator(nil, nil)), 0

// Nothing should be affected for calling Discard on non-initialized iterator
it.Hold()

for it.Next() {
if len(content) <= idx {
t.Errorf("more items than expected: checking idx=%d (key %q), expecting len=%d", idx, it.Key(), len(order))
break
}
if !bytes.Equal(it.Key(), []byte(order[idx])) {
t.Errorf("item %d: key mismatch: have %s, want %s", idx, string(it.Key()), order[idx])
}
if !bytes.Equal(it.Value(), []byte(content[order[idx]])) {
t.Errorf("item %d: value mismatch: have %s, want %s", idx, string(it.Value()), content[order[idx]])
}
// Should be safe to call discard multiple times
it.Hold()
it.Hold()

// Shift iterator to the discarded element
it.Next()
if !bytes.Equal(it.Key(), []byte(order[idx])) {
t.Errorf("item %d: key mismatch: have %s, want %s", idx, string(it.Key()), order[idx])
}
if !bytes.Equal(it.Value(), []byte(content[order[idx]])) {
t.Errorf("item %d: value mismatch: have %s, want %s", idx, string(it.Value()), content[order[idx]])
}

// Discard/Next combo should work always
it.Hold()
it.Next()
if !bytes.Equal(it.Key(), []byte(order[idx])) {
t.Errorf("item %d: key mismatch: have %s, want %s", idx, string(it.Key()), order[idx])
}
if !bytes.Equal(it.Value(), []byte(content[order[idx]])) {
t.Errorf("item %d: value mismatch: have %s, want %s", idx, string(it.Value()), content[order[idx]])
}
idx++
}
if err := it.Error(); err != nil {
t.Errorf("iteration failed: %v", err)
}
if idx != len(order) {
t.Errorf("iteration terminated prematurely: have %d, want %d", idx, len(order))
}
db.Close()
}

func TestReopenIterator(t *testing.T) {
var (
content = map[common.Hash]string{
common.HexToHash("a1"): "v1",
common.HexToHash("a2"): "v2",
common.HexToHash("a3"): "v3",
common.HexToHash("a4"): "v4",
common.HexToHash("a5"): "v5",
common.HexToHash("a6"): "v6",
}
order = []common.Hash{
common.HexToHash("a1"),
common.HexToHash("a2"),
common.HexToHash("a3"),
common.HexToHash("a4"),
common.HexToHash("a5"),
common.HexToHash("a6"),
}
db = rawdb.NewMemoryDatabase()

reopen = func(db ethdb.KeyValueStore, iter *holdableIterator) *holdableIterator {
if !iter.Next() {
iter.Release()
return newHoldableIterator(memorydb.New().NewIterator(nil, nil))
}
next := iter.Key()
iter.Release()
return newHoldableIterator(db.NewIterator(rawdb.SnapshotAccountPrefix, next[1:]))
}
)
for key, val := range content {
rawdb.WriteAccountSnapshot(db, key, []byte(val))
}
checkVal := func(it *holdableIterator, index int) {
if !bytes.Equal(it.Key(), append(rawdb.SnapshotAccountPrefix, order[index].Bytes()...)) {
t.Fatalf("Unexpected data entry key, want %v got %v", order[index], it.Key())
}
if !bytes.Equal(it.Value(), []byte(content[order[index]])) {
t.Fatalf("Unexpected data entry key, want %v got %v", []byte(content[order[index]]), it.Value())
}
}
// Iterate over the database with the given configs and verify the results
dbIter := db.NewIterator(rawdb.SnapshotAccountPrefix, nil)
iter, idx := newHoldableIterator(rawdb.NewKeyLengthIterator(dbIter, 1+common.HashLength)), -1

idx++
iter.Next()
checkVal(iter, idx)

iter = reopen(db, iter)
idx++
iter.Next()
checkVal(iter, idx)

// reopen twice
iter = reopen(db, iter)
iter = reopen(db, iter)
idx++
iter.Next()
checkVal(iter, idx)

// reopen iterator with held value
iter.Next()
iter.Hold()
iter = reopen(db, iter)
idx++
iter.Next()
checkVal(iter, idx)

// reopen twice iterator with held value
iter.Next()
iter.Hold()
iter = reopen(db, iter)
iter = reopen(db, iter)
idx++
iter.Next()
checkVal(iter, idx)

// shift to the end and reopen
iter.Next() // the end
iter = reopen(db, iter)
iter.Next()
if iter.Key() != nil {
t.Fatal("Unexpected iterated entry")
}
}
Loading

0 comments on commit bc1ec69

Please sign in to comment.