Skip to content

Commit

Permalink
triedb/pathdb: polish code, improve code comment
Browse files Browse the repository at this point in the history
  • Loading branch information
rjl493456442 committed Dec 16, 2024
1 parent a037ad4 commit 6cef4a6
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 55 deletions.
8 changes: 1 addition & 7 deletions triedb/pathdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,17 +559,11 @@ func (db *Database) HistoryRange() (uint64, uint64, error) {
// AccountIterator creates a new account iterator for the specified root hash and
// seeks to a starting account hash.
func (db *Database) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
//if gen := db.tree.bottom().generator; gen != nil && !gen.completed() {
// return nil, errNotConstructed
//}
return newFastAccountIterator(db, root, seek)
}

// StorageIterator creates a new storage iterator for the specified root hash and
// account. The iterator will be move to the specific start position.
// account. The iterator will be moved to the specific start position.
func (db *Database) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
//if gen := db.tree.bottom().generator; gen != nil && !gen.completed() {
// return nil, errNotConstructed
//}
return newFastStorageIterator(db, root, account, seek)
}
8 changes: 4 additions & 4 deletions triedb/pathdb/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ func (it *diffAccountIterator) Release() {}
// To simplify, the staleness of the persistent state is not tracked. The disk
// iterator is not intended to be used alone. It should always be wrapped with
// a diff iterator, as the bottom-most disk layer uses both the in-memory
// aggregated buffer and the persistent disk layer as data sources. The staleness
// of the diff iterator is sufficient to invalidate the iterator pair.
// aggregated buffer and the persistent disk layer as the data sources. The
// staleness of the diff iterator is sufficient to invalidate the iterator pair.
type diskAccountIterator struct {
it ethdb.Iterator
}
Expand Down Expand Up @@ -304,8 +304,8 @@ func (it *diffStorageIterator) Release() {}
// To simplify, the staleness of the persistent state is not tracked. The disk
// iterator is not intended to be used alone. It should always be wrapped with
// a diff iterator, as the bottom-most disk layer uses both the in-memory
// aggregated buffer and the persistent disk layer as data sources. The staleness
// of the diff iterator is sufficient to invalidate the iterator pair.
// aggregated buffer and the persistent disk layer as the data sources. The
// staleness of the diff iterator is sufficient to invalidate the iterator pair.
type diskStorageIterator struct {
account common.Hash
it ethdb.Iterator
Expand Down
126 changes: 87 additions & 39 deletions triedb/pathdb/iterator_binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@ import (

// binaryIterator is a simplistic iterator to step over the accounts or storage
// in a snapshot, which may or may not be composed of multiple layers. Performance
// wise this iterator is slow, it's meant for cross validating the fast one,
// wise this iterator is slow, it's meant for cross validating the fast one.
//
// This iterator cannot be used on its own; it should be wrapped with an outer
// iterator, such as accountBinaryIterator or storageBinaryIterator.
//
// This iterator can only traverse the keys of the entries stored in the layers,
// but cannot obtain the corresponding values. Besides, the deleted entry will
// also be traversed, the outer iterator must check the emptiness before returning.
type binaryIterator struct {
a Iterator
b Iterator
Expand All @@ -41,18 +48,20 @@ func (dl *diskLayer) initBinaryAccountIterator(seek common.Hash) *binaryIterator
// Create two iterators for state buffer and the persistent state in disk
// respectively and combine them as a binary iterator.
l := &binaryIterator{
// The state set in the disk layer is mutable, and the entire state becomes stale
// if a diff layer above is merged into it. Therefore, staleness must be checked,
// and the account data should be retrieved with read lock protection.
a: newDiffAccountIterator(seek, dl.buffer.states, func(hash common.Hash) ([]byte, error) {
dl.lock.RLock()
defer dl.lock.RUnlock()
// The account loader function is unnecessary; the account key list
// produced by the supplied buffer alone is sufficient for iteration.
//
// The account key list for iteration is deterministic once the iterator
// is constructed, no matter the referenced disk layer is stale or not
// later.
a: newDiffAccountIterator(seek, dl.buffer.states, nil),

if dl.stale {
return nil, errSnapshotStale
}
return dl.buffer.states.mustAccount(hash)
}),
// The account keys for iteration might be changed if the persistent
// state in disk is mutated. Therefore, the disk iterator might produce
// some invalid account key (e.g. the newly created account which was
// not in the disk). In this case, the staleness will be detected by
// the outer iterator like accountBinaryIterator before returning in
// the Next function.
b: newDiskAccountIterator(dl.db.diskdb, seek),
}
l.aDone = !l.a.Next()
Expand All @@ -67,19 +76,31 @@ func (dl *diffLayer) initBinaryAccountIterator(seek common.Hash) *binaryIterator
parent, ok := dl.parent.(*diffLayer)
if !ok {
l := &binaryIterator{
// The state set in diff layer is immutable and will never be stale,
// so the read lock protection is unnecessary.
a: newDiffAccountIterator(seek, dl.states.stateSet, dl.states.mustAccount),
// The account loader function is unnecessary; the account key list
// produced by the supplied state set alone is sufficient for iteration.
//
// The account key list for iteration is deterministic once the iterator
// is constructed, no matter the referenced disk layer is stale or not
// later.
a: newDiffAccountIterator(seek, dl.states.stateSet, nil),

// Recursively construct the iterator of the parent disk layer.
b: dl.parent.(*diskLayer).initBinaryAccountIterator(seek),
}
l.aDone = !l.a.Next()
l.bDone = !l.b.Next()
return l
}
l := &binaryIterator{
// The state set in diff layer is immutable and will never
// be stale. The supplied staleness checker is nil.
a: newDiffAccountIterator(seek, dl.states.stateSet, dl.states.mustAccount),
// The account loader function is unnecessary; the account key list
// produced by the supplied state set alone is sufficient for iteration.
//
// The account key list for iteration is deterministic once the iterator
// is constructed, no matter the referenced disk layer is stale or not
// later.
a: newDiffAccountIterator(seek, dl.states.stateSet, nil),

// Recursively construct the iterator of the parent diff layer.
b: parent.initBinaryAccountIterator(seek),
}
l.aDone = !l.a.Next()
Expand All @@ -94,18 +115,20 @@ func (dl *diskLayer) initBinaryStorageIterator(account common.Hash, seek common.
// Create two iterators for state buffer and the persistent state in disk
// respectively and combine them as a binary iterator.
l := &binaryIterator{
// The state set in the disk layer is mutable, and the entire state becomes stale
// if a diff layer above is merged into it. Therefore, staleness must be checked,
// and the storage slot should be retrieved with read lock protection.
a: newDiffStorageIterator(account, seek, dl.buffer.states, func(addrHash common.Hash, slotHash common.Hash) ([]byte, error) {
dl.lock.RLock()
defer dl.lock.RUnlock()
// The storage loader function is unnecessary; the storage key list
// produced by the supplied buffer alone is sufficient for iteration.
//
// The storage key list for iteration is deterministic once the iterator
// is constructed, no matter the referenced disk layer is stale or not
// later.
a: newDiffStorageIterator(account, seek, dl.buffer.states, nil),

if dl.stale {
return nil, errSnapshotStale
}
return dl.buffer.states.mustStorage(addrHash, slotHash)
}),
// The storage keys for iteration might be changed if the persistent
// state in disk is mutated. Therefore, the disk iterator might produce
// some invalid storage key (e.g. the newly created storage which was
// not in the disk). In this case, the staleness will be detected by
// the outer iterator like storageBinaryIterator before returning in
// the Next function.
b: newDiskStorageIterator(dl.db.diskdb, account, seek),
}
l.aDone = !l.a.Next()
Expand All @@ -120,19 +143,27 @@ func (dl *diffLayer) initBinaryStorageIterator(account common.Hash, seek common.
parent, ok := dl.parent.(*diffLayer)
if !ok {
l := &binaryIterator{
// The state set in diff layer is immutable and will never be stale,
// so the read lock protection is unnecessary.
a: newDiffStorageIterator(account, seek, dl.states.stateSet, dl.states.mustStorage),
// The storage loader function is unnecessary; the storage key list
// produced by the supplied state set alone is sufficient for iteration.
//
// The storage key list for iteration is deterministic once the iterator
// is constructed, no matter the referenced disk layer is stale or not
// later.
a: newDiffStorageIterator(account, seek, dl.states.stateSet, nil),

// Recursively construct the iterator of the parent disk layer.
b: dl.parent.(*diskLayer).initBinaryStorageIterator(account, seek),
}
l.aDone = !l.a.Next()
l.bDone = !l.b.Next()
return l
}
l := &binaryIterator{
// The state set in diff layer is immutable and will never
// be stale. The supplied staleness checker is nil.
// The storage loader function is unnecessary; the storage key list
// produced by the supplied state set alone is sufficient for iteration.
a: newDiffStorageIterator(account, seek, dl.states.stateSet, nil),

// Recursively construct the iterator of the parent diff layer.
b: parent.initBinaryStorageIterator(account, seek),
}
l.aDone = !l.a.Next()
Expand All @@ -145,7 +176,12 @@ func (dl *diffLayer) initBinaryStorageIterator(account common.Hash, seek common.
// (e.g., when an account is deleted but still accessible for iteration).
// The outer iterator must verify emptiness before terminating the iteration.
//
// TODO (rjl493456442) the error of two iterators are not checked, please fix it.
// There’s no need to check for errors in the two iterators, as we only iterate
// through the entries without retrieving their values.
//
// The disk iterator may be stale, causing some outdated entries to be traversed.
// However, this staleness will be detected by the binary iterator wrappers
// (accountBinaryIterator or storageBinaryIterator) before Next returns.
func (it *binaryIterator) Next() bool {
if it.aDone && it.bDone {
return false
Expand Down Expand Up @@ -229,11 +265,17 @@ func (it *accountBinaryIterator) Next() bool {
if !it.binaryIterator.Next() {
return false
}
// Retrieve the account data referred by the current iterator, the
// associated layers might be outdated due to chain progressing,
// the relative error will be set to it.fail just in case.
//
// Skip the null account which was deleted before and move to the
// next account.
if len(it.Account()) != 0 {
return true
}
// it.fail might be set if error occurs by calling
// it.Account() stop iteration if so.
// it.fail might be set if error occurs by calling it.Account().
// Stop iteration if so.
if it.fail != nil {
return false
}
Expand Down Expand Up @@ -292,11 +334,17 @@ func (it *storageBinaryIterator) Next() bool {
if !it.binaryIterator.Next() {
return false
}
// Retrieve the storage data referred by the current iterator, the
// associated layers might be outdated due to chain progressing,
// the relative error will be set to it.fail just in case.
//
// Skip the null storage which was deleted before and move to the
// next account.
if len(it.Slot()) != 0 {
return true
}
// it.fail might be set if error occurs by calling
// it.Slot() stop iteration if so.
// it.fail might be set if error occurs by calling it.Slot().
// Stop iteration if so.
if it.fail != nil {
return false
}
Expand Down
5 changes: 0 additions & 5 deletions triedb/pathdb/iterator_fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ func (it *weightedIterator) Cmp(other *weightedIterator) int {
// fastIterator is a more optimized multi-layer iterator which maintains a
// direct mapping of all iterators leading down to the bottom layer.
type fastIterator struct {
db *Database // Database to reinitialize stale sub-iterators with
root common.Hash // Root hash to reinitialize stale sub-iterators through

curAccount []byte
curSlot []byte

Expand All @@ -78,8 +75,6 @@ func newFastIterator(db *Database, root common.Hash, account common.Hash, seek c
return nil, fmt.Errorf("unknown layer: %x", root)
}
fi := &fastIterator{
db: db,
root: root,
account: accountIterator,
}
for depth := 0; current != nil; depth++ {
Expand Down
47 changes: 47 additions & 0 deletions triedb/pathdb/iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,53 @@ func TestStorageIteratorDeletions(t *testing.T) {
verifyIterator(t, 2, db.tree.get(common.HexToHash("0x06")).(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa"), common.Hash{}), verifyStorage)
}

// TestStaleIterator tests if the iterator could correctly terminate the iteration
// if the associated layers are outdated.
func TestStaleIterator(t *testing.T) {
testStaleIterator(t, func(db *Database, hash common.Hash) Iterator {
it, _ := db.StorageIterator(hash, common.HexToHash("0xaa"), common.Hash{})
return it
})
testStaleIterator(t, func(db *Database, hash common.Hash) Iterator {
head := db.tree.get(hash)
return head.(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa"), common.Hash{})
})
}

func testStaleIterator(t *testing.T, newIter func(db *Database, hash common.Hash) Iterator) {
config := &Config{
WriteBufferSize: 16 * 1024 * 1024,
}
db := New(rawdb.NewMemoryDatabase(), config, false)
// db.WaitGeneration()

// [02 (disk), 03]
db.Update(common.HexToHash("0x02"), types.EmptyRootHash, 1, trienode.NewMergedNodeSet(),
NewStateSetWithOrigin(randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01"}}, nil), nil, nil))
db.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), 2, trienode.NewMergedNodeSet(),
NewStateSetWithOrigin(randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x02"}}, nil), nil, nil))
db.tree.cap(common.HexToHash("0x03"), 1)

// [02 (disk), 03, 04]
db.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), 3, trienode.NewMergedNodeSet(),
NewStateSetWithOrigin(randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x03"}}, nil), nil, nil))
iter := newIter(db, common.HexToHash("0x04"))

// [04 (disk), 05]
db.Update(common.HexToHash("0x05"), common.HexToHash("0x04"), 3, trienode.NewMergedNodeSet(),
NewStateSetWithOrigin(randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x04"}}, nil), nil, nil))
db.tree.cap(common.HexToHash("0x05"), 1)

// Iterator can't finish the traversal as the layer 02 has becoming stale.
for iter.Next() {
}
err := iter.Error()
t.Log(err)
if err == nil {
t.Fatalf("Expected iterator error is not reported")
}
}

// BenchmarkAccountIteratorTraversal is a bit notorious -- all layers contain the
// exact same 200 accounts. That means that we need to process 2000 items, but
// only spit out 200 values eventually.
Expand Down

0 comments on commit 6cef4a6

Please sign in to comment.