diff --git a/benchmark/bench_trees.jl b/benchmark/bench_trees.jl new file mode 100644 index 000000000..547967d3e --- /dev/null +++ b/benchmark/bench_trees.jl @@ -0,0 +1,69 @@ +module BenchTrees + +using DataStructures +using BenchmarkTools +using Random + +suite = BenchmarkGroup() + +rand_setup = ( + Random.seed!(1234); + idx1 = rand(1:30000, 1000); + didx1 = rand(1:1000, 500); +) + +# insert a bunch of keys, search for all of them, delete half of them randomly, +# then search for all of them again. +function test_rand(T) + t = T{Int}() + for i in idx1 + push!(t, i) + end + for i in idx1 + haskey(t, i) + end + for i in didx1 + delete!(t, idx1[i]) + end + for i in idx1 + haskey(t, i) + end +end + +# insert 1, ..., N, then push 1 many times. tests a regression from an older +# splay tree implementation where splays didn't happen on redundant pushes. +function test_redundant(T, N=100000) + t = T{Int}() + for i in 1:N + push!(t, i) + end + for i in 1:N + push!(t, 1) + end +end + +# insert 1, ..., N, then access element 1 for N iterations. splay trees should +# perform best here. +function test_biased(T, N=100000) + t = T{Int}() + for i in 1:N + push!(t, i) + end + for _ in 1:N + haskey(t, 1) + end +end + +trees = Dict("splay" => SplayTree, "avl" => AVLTree, "rb" => RBTree) +for (T_name, T) in trees + suite[T_name]["rand"] = + @benchmarkable test_rand($(T)) setup=rand_setup + suite[T_name]["redundant"] = + @benchmarkable test_redundant($(T)) + suite[T_name]["biased"] = + @benchmarkable test_biased($T) +end + +end # module + +BenchTrees.suite diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 38e03e632..2b4b52e2d 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -12,3 +12,4 @@ const SUITE = BenchmarkGroup() SUITE["heap"] = include("bench_heap.jl") SUITE["SparseIntSet"] = include("bench_sparse_int_set.jl") +SUITE["trees"] = include("bench_trees.jl") diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 5e5d767ec..6ca01a0dd 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -51,9 +51,8 @@ module DataStructures export DiBitVector export RBTree, search_node, minimum_node - export SplayTree, maximum_node + export SplayTree, search_node, maximum_node export AVLTree, sorted_rank - export SplayTree, maximum_node export findkey diff --git a/src/splay_tree.jl b/src/splay_tree.jl index e6e804516..60a79e5d0 100644 --- a/src/splay_tree.jl +++ b/src/splay_tree.jl @@ -126,6 +126,11 @@ function _join!(tree::SplayTree, s::Union{SplayTreeNode, Nothing}, t::Union{Spla end end +# search_node must do a splay even in the case of failed searches to guarantee +# the O(log n) amortized time bound. This might change the structure of the +# tree, but does NOT change the user-visible state (has_key, in-order +# traversals, etc., all keep the same results before and after). Hence we don't +# mark it with a "!". function search_node(tree::SplayTree{K}, d::K) where K node = tree.root prev = nothing @@ -137,7 +142,9 @@ function search_node(tree::SplayTree{K}, d::K) where K node = node.leftChild end end - return (node == nothing) ? prev : node + last = (node == nothing) ? prev : node + (last == nothing) || splay!(tree, last) + return last end function Base.haskey(tree::SplayTree{K}, d::K) where K @@ -147,9 +154,7 @@ function Base.haskey(tree::SplayTree{K}, d::K) where K else node = search_node(tree, d) (node === nothing) && return false - is_found = (node.data == d) - is_found && splay!(tree, node) - return is_found + return (node.data == d) end end @@ -158,12 +163,10 @@ Base.in(key, tree::SplayTree) = haskey(tree, key) function Base.delete!(tree::SplayTree{K}, d::K) where K node = tree.root x = search_node(tree, d) - (x == nothing) && return tree + (x == nothing || x.data != d) && return tree t = nothing s = nothing - splay!(tree, x) - if x.rightChild !== nothing t = x.rightChild t.parent = nothing diff --git a/test/test_splay_tree.jl b/test/test_splay_tree.jl index bcd404174..cf993516e 100644 --- a/test/test_splay_tree.jl +++ b/test/test_splay_tree.jl @@ -42,6 +42,28 @@ @test length(t) == 100 end + @testset "deleting missing values" begin + t = SplayTree{Int}() + for i in 1:100 + push!(t, i) + end + for i in 101:200 + delete!(t, i) + end + + @test length(t) == 100 + + for i in 1:100 + @test haskey(t, i) + end + + for i in 101:200 + push!(t, i) + end + + @test length(t) == 200 + end + @testset "handling different cases of delete!" begin t2 = SplayTree() for i in 1:100000 @@ -84,7 +106,7 @@ @test !in('c', t4) end - @testset "search_node" begin + @testset "search_node" begin t5 = SplayTree() for i in 1:32 push!(t5, i) @@ -131,4 +153,4 @@ node = node.rightChild end end -end \ No newline at end of file +end