diff --git a/triedb/pathdb/database.go b/triedb/pathdb/database.go index 69076bca16f9d..914b17de5ba09 100644 --- a/triedb/pathdb/database.go +++ b/triedb/pathdb/database.go @@ -559,17 +559,11 @@ func (db *Database) HistoryRange() (uint64, uint64, error) { // AccountIterator creates a new account iterator for the specified root hash and // seeks to a starting account hash. func (db *Database) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) { - //if gen := db.tree.bottom().generator; gen != nil && !gen.completed() { - // return nil, errNotConstructed - //} return newFastAccountIterator(db, root, seek) } // StorageIterator creates a new storage iterator for the specified root hash and -// account. The iterator will be move to the specific start position. +// account. The iterator will be moved to the specific start position. func (db *Database) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { - //if gen := db.tree.bottom().generator; gen != nil && !gen.completed() { - // return nil, errNotConstructed - //} return newFastStorageIterator(db, root, account, seek) } diff --git a/triedb/pathdb/iterator.go b/triedb/pathdb/iterator.go index e99601a9962e0..980f228cf5d2d 100644 --- a/triedb/pathdb/iterator.go +++ b/triedb/pathdb/iterator.go @@ -159,8 +159,8 @@ func (it *diffAccountIterator) Release() {} // To simplify, the staleness of the persistent state is not tracked. The disk // iterator is not intended to be used alone. It should always be wrapped with // a diff iterator, as the bottom-most disk layer uses both the in-memory -// aggregated buffer and the persistent disk layer as data sources. The staleness -// of the diff iterator is sufficient to invalidate the iterator pair. +// aggregated buffer and the persistent disk layer as the data sources. The +// staleness of the diff iterator is sufficient to invalidate the iterator pair. type diskAccountIterator struct { it ethdb.Iterator } @@ -304,8 +304,8 @@ func (it *diffStorageIterator) Release() {} // To simplify, the staleness of the persistent state is not tracked. The disk // iterator is not intended to be used alone. It should always be wrapped with // a diff iterator, as the bottom-most disk layer uses both the in-memory -// aggregated buffer and the persistent disk layer as data sources. The staleness -// of the diff iterator is sufficient to invalidate the iterator pair. +// aggregated buffer and the persistent disk layer as the data sources. The +// staleness of the diff iterator is sufficient to invalidate the iterator pair. type diskStorageIterator struct { account common.Hash it ethdb.Iterator diff --git a/triedb/pathdb/iterator_binary.go b/triedb/pathdb/iterator_binary.go index e04210b08ec6b..7e6e2e405c114 100644 --- a/triedb/pathdb/iterator_binary.go +++ b/triedb/pathdb/iterator_binary.go @@ -24,7 +24,14 @@ import ( // binaryIterator is a simplistic iterator to step over the accounts or storage // in a snapshot, which may or may not be composed of multiple layers. Performance -// wise this iterator is slow, it's meant for cross validating the fast one, +// wise this iterator is slow, it's meant for cross validating the fast one. +// +// This iterator cannot be used on its own; it should be wrapped with an outer +// iterator, such as accountBinaryIterator or storageBinaryIterator. +// +// This iterator can only traverse the keys of the entries stored in the layers, +// but cannot obtain the corresponding values. Besides, the deleted entry will +// also be traversed, the outer iterator must check the emptiness before returning. type binaryIterator struct { a Iterator b Iterator @@ -41,18 +48,20 @@ func (dl *diskLayer) initBinaryAccountIterator(seek common.Hash) *binaryIterator // Create two iterators for state buffer and the persistent state in disk // respectively and combine them as a binary iterator. l := &binaryIterator{ - // The state set in the disk layer is mutable, and the entire state becomes stale - // if a diff layer above is merged into it. Therefore, staleness must be checked, - // and the account data should be retrieved with read lock protection. - a: newDiffAccountIterator(seek, dl.buffer.states, func(hash common.Hash) ([]byte, error) { - dl.lock.RLock() - defer dl.lock.RUnlock() + // The account loader function is unnecessary; the account key list + // produced by the supplied buffer alone is sufficient for iteration. + // + // The account key list for iteration is deterministic once the iterator + // is constructed, no matter the referenced disk layer is stale or not + // later. + a: newDiffAccountIterator(seek, dl.buffer.states, nil), - if dl.stale { - return nil, errSnapshotStale - } - return dl.buffer.states.mustAccount(hash) - }), + // The account keys for iteration might be changed if the persistent + // state in disk is mutated. Therefore, the disk iterator might produce + // some invalid account key (e.g. the newly created account which was + // not in the disk). In this case, the staleness will be detected by + // the outer iterator like accountBinaryIterator before returning in + // the Next function. b: newDiskAccountIterator(dl.db.diskdb, seek), } l.aDone = !l.a.Next() @@ -67,9 +76,15 @@ func (dl *diffLayer) initBinaryAccountIterator(seek common.Hash) *binaryIterator parent, ok := dl.parent.(*diffLayer) if !ok { l := &binaryIterator{ - // The state set in diff layer is immutable and will never be stale, - // so the read lock protection is unnecessary. - a: newDiffAccountIterator(seek, dl.states.stateSet, dl.states.mustAccount), + // The account loader function is unnecessary; the account key list + // produced by the supplied state set alone is sufficient for iteration. + // + // The account key list for iteration is deterministic once the iterator + // is constructed, no matter the referenced disk layer is stale or not + // later. + a: newDiffAccountIterator(seek, dl.states.stateSet, nil), + + // Recursively construct the iterator of the parent disk layer. b: dl.parent.(*diskLayer).initBinaryAccountIterator(seek), } l.aDone = !l.a.Next() @@ -77,9 +92,15 @@ func (dl *diffLayer) initBinaryAccountIterator(seek common.Hash) *binaryIterator return l } l := &binaryIterator{ - // The state set in diff layer is immutable and will never - // be stale. The supplied staleness checker is nil. - a: newDiffAccountIterator(seek, dl.states.stateSet, dl.states.mustAccount), + // The account loader function is unnecessary; the account key list + // produced by the supplied state set alone is sufficient for iteration. + // + // The account key list for iteration is deterministic once the iterator + // is constructed, no matter the referenced disk layer is stale or not + // later. + a: newDiffAccountIterator(seek, dl.states.stateSet, nil), + + // Recursively construct the iterator of the parent diff layer. b: parent.initBinaryAccountIterator(seek), } l.aDone = !l.a.Next() @@ -94,18 +115,20 @@ func (dl *diskLayer) initBinaryStorageIterator(account common.Hash, seek common. // Create two iterators for state buffer and the persistent state in disk // respectively and combine them as a binary iterator. l := &binaryIterator{ - // The state set in the disk layer is mutable, and the entire state becomes stale - // if a diff layer above is merged into it. Therefore, staleness must be checked, - // and the storage slot should be retrieved with read lock protection. - a: newDiffStorageIterator(account, seek, dl.buffer.states, func(addrHash common.Hash, slotHash common.Hash) ([]byte, error) { - dl.lock.RLock() - defer dl.lock.RUnlock() + // The storage loader function is unnecessary; the storage key list + // produced by the supplied buffer alone is sufficient for iteration. + // + // The storage key list for iteration is deterministic once the iterator + // is constructed, no matter the referenced disk layer is stale or not + // later. + a: newDiffStorageIterator(account, seek, dl.buffer.states, nil), - if dl.stale { - return nil, errSnapshotStale - } - return dl.buffer.states.mustStorage(addrHash, slotHash) - }), + // The storage keys for iteration might be changed if the persistent + // state in disk is mutated. Therefore, the disk iterator might produce + // some invalid storage key (e.g. the newly created storage which was + // not in the disk). In this case, the staleness will be detected by + // the outer iterator like storageBinaryIterator before returning in + // the Next function. b: newDiskStorageIterator(dl.db.diskdb, account, seek), } l.aDone = !l.a.Next() @@ -120,9 +143,15 @@ func (dl *diffLayer) initBinaryStorageIterator(account common.Hash, seek common. parent, ok := dl.parent.(*diffLayer) if !ok { l := &binaryIterator{ - // The state set in diff layer is immutable and will never be stale, - // so the read lock protection is unnecessary. - a: newDiffStorageIterator(account, seek, dl.states.stateSet, dl.states.mustStorage), + // The storage loader function is unnecessary; the storage key list + // produced by the supplied state set alone is sufficient for iteration. + // + // The storage key list for iteration is deterministic once the iterator + // is constructed, no matter the referenced disk layer is stale or not + // later. + a: newDiffStorageIterator(account, seek, dl.states.stateSet, nil), + + // Recursively construct the iterator of the parent disk layer. b: dl.parent.(*diskLayer).initBinaryStorageIterator(account, seek), } l.aDone = !l.a.Next() @@ -130,9 +159,11 @@ func (dl *diffLayer) initBinaryStorageIterator(account common.Hash, seek common. return l } l := &binaryIterator{ - // The state set in diff layer is immutable and will never - // be stale. The supplied staleness checker is nil. + // The storage loader function is unnecessary; the storage key list + // produced by the supplied state set alone is sufficient for iteration. a: newDiffStorageIterator(account, seek, dl.states.stateSet, nil), + + // Recursively construct the iterator of the parent diff layer. b: parent.initBinaryStorageIterator(account, seek), } l.aDone = !l.a.Next() @@ -145,7 +176,12 @@ func (dl *diffLayer) initBinaryStorageIterator(account common.Hash, seek common. // (e.g., when an account is deleted but still accessible for iteration). // The outer iterator must verify emptiness before terminating the iteration. // -// TODO (rjl493456442) the error of two iterators are not checked, please fix it. +// There’s no need to check for errors in the two iterators, as we only iterate +// through the entries without retrieving their values. +// +// The disk iterator may be stale, causing some outdated entries to be traversed. +// However, this staleness will be detected by the binary iterator wrappers +// (accountBinaryIterator or storageBinaryIterator) before Next returns. func (it *binaryIterator) Next() bool { if it.aDone && it.bDone { return false @@ -229,11 +265,17 @@ func (it *accountBinaryIterator) Next() bool { if !it.binaryIterator.Next() { return false } + // Retrieve the account data referred by the current iterator, the + // associated layers might be outdated due to chain progressing, + // the relative error will be set to it.fail just in case. + // + // Skip the null account which was deleted before and move to the + // next account. if len(it.Account()) != 0 { return true } - // it.fail might be set if error occurs by calling - // it.Account() stop iteration if so. + // it.fail might be set if error occurs by calling it.Account(). + // Stop iteration if so. if it.fail != nil { return false } @@ -292,11 +334,17 @@ func (it *storageBinaryIterator) Next() bool { if !it.binaryIterator.Next() { return false } + // Retrieve the storage data referred by the current iterator, the + // associated layers might be outdated due to chain progressing, + // the relative error will be set to it.fail just in case. + // + // Skip the null storage which was deleted before and move to the + // next account. if len(it.Slot()) != 0 { return true } - // it.fail might be set if error occurs by calling - // it.Slot() stop iteration if so. + // it.fail might be set if error occurs by calling it.Slot(). + // Stop iteration if so. if it.fail != nil { return false } diff --git a/triedb/pathdb/iterator_fast.go b/triedb/pathdb/iterator_fast.go index 9781750f586a0..217b211fccb1a 100644 --- a/triedb/pathdb/iterator_fast.go +++ b/triedb/pathdb/iterator_fast.go @@ -57,9 +57,6 @@ func (it *weightedIterator) Cmp(other *weightedIterator) int { // fastIterator is a more optimized multi-layer iterator which maintains a // direct mapping of all iterators leading down to the bottom layer. type fastIterator struct { - db *Database // Database to reinitialize stale sub-iterators with - root common.Hash // Root hash to reinitialize stale sub-iterators through - curAccount []byte curSlot []byte @@ -78,8 +75,6 @@ func newFastIterator(db *Database, root common.Hash, account common.Hash, seek c return nil, fmt.Errorf("unknown layer: %x", root) } fi := &fastIterator{ - db: db, - root: root, account: accountIterator, } for depth := 0; current != nil; depth++ { diff --git a/triedb/pathdb/iterator_test.go b/triedb/pathdb/iterator_test.go index e50c9942c7168..48b5870b5bfd1 100644 --- a/triedb/pathdb/iterator_test.go +++ b/triedb/pathdb/iterator_test.go @@ -889,6 +889,53 @@ func TestStorageIteratorDeletions(t *testing.T) { verifyIterator(t, 2, db.tree.get(common.HexToHash("0x06")).(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa"), common.Hash{}), verifyStorage) } +// TestStaleIterator tests if the iterator could correctly terminate the iteration +// if the associated layers are outdated. +func TestStaleIterator(t *testing.T) { + testStaleIterator(t, func(db *Database, hash common.Hash) Iterator { + it, _ := db.StorageIterator(hash, common.HexToHash("0xaa"), common.Hash{}) + return it + }) + testStaleIterator(t, func(db *Database, hash common.Hash) Iterator { + head := db.tree.get(hash) + return head.(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa"), common.Hash{}) + }) +} + +func testStaleIterator(t *testing.T, newIter func(db *Database, hash common.Hash) Iterator) { + config := &Config{ + WriteBufferSize: 16 * 1024 * 1024, + } + db := New(rawdb.NewMemoryDatabase(), config, false) + // db.WaitGeneration() + + // [02 (disk), 03] + db.Update(common.HexToHash("0x02"), types.EmptyRootHash, 1, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01"}}, nil), nil, nil)) + db.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), 2, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x02"}}, nil), nil, nil)) + db.tree.cap(common.HexToHash("0x03"), 1) + + // [02 (disk), 03, 04] + db.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), 3, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x03"}}, nil), nil, nil)) + iter := newIter(db, common.HexToHash("0x04")) + + // [04 (disk), 05] + db.Update(common.HexToHash("0x05"), common.HexToHash("0x04"), 3, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x04"}}, nil), nil, nil)) + db.tree.cap(common.HexToHash("0x05"), 1) + + // Iterator can't finish the traversal as the layer 02 has becoming stale. + for iter.Next() { + } + err := iter.Error() + t.Log(err) + if err == nil { + t.Fatalf("Expected iterator error is not reported") + } +} + // BenchmarkAccountIteratorTraversal is a bit notorious -- all layers contain the // exact same 200 accounts. That means that we need to process 2000 items, but // only spit out 200 values eventually.