From 9e046d50ef7a0c50385c020d0efcd85b195fb2a1 Mon Sep 17 00:00:00 2001 From: Vikas Negi <68782261+vnegi10@users.noreply.github.com> Date: Sun, 8 May 2022 00:22:54 +0200 Subject: [PATCH] Support batch requests (#9) * Add batch support to JSPN-RPC request * Batch supp. for block stats is OK * Minor refactoring * Add batch support with stats * Batch support for network stats * Add test case for batch requests * Update version + README --- Project.toml | 2 +- README.md | 69 +++++++++++++++------- src/BitcoinRPC.jl | 3 +- src/analytics.jl | 127 ++++++++++++++++++++++++++++++++++++----- src/blockchainRPC.jl | 2 +- src/helpers.jl | 18 ++++++ src/request.jl | 61 +++++++++++++++++++- test/test_analytics.jl | 17 +++++- 8 files changed, 258 insertions(+), 41 deletions(-) diff --git a/Project.toml b/Project.toml index fb8e931..611138c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BitcoinRPC" uuid = "9b85cdd3-b0ee-4013-9538-086350204d42" authors = ["Vikas Negi "] -version = "0.1.2" +version = "0.1.3" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" diff --git a/README.md b/README.md index eb59988..82249fa 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,9 @@ Dict{String, Any} with 18 entries: "nTx" => 2826 "chainwork" => "00000000000000000000000000000000000000002cf79dc7911c9663902061d0" "nonce" => 2198508859 - ``` +``` - ```julia +```julia julia> show_chain_txstats(auth, nblocks = 1440, blockhash = show_block_hash(auth, height = 700000)) Dict{String, Any} with 8 entries: "txcount" => 669566382 @@ -94,22 +94,49 @@ Dict{String, Any} with 8 entries: "window_final_block_hash" => "0000000000000000000590fc0f3eba193a278534220b2b37e9849e1a770ca959" "window_block_count" => 1440 "txrate" => 3.05922 - ``` - - ```julia - julia> collect_block_stats(auth, 700_000, 700_010) -11×28 DataFrame - Row │ avgfee avgfeerate avgtxsize blockhash height ins maxfee maxfeerate maxtxsize medianfe ⋯ - │ Float64 Float64 Int64 String Int64 Int64 Float64 Float64 Int64 Float64 ⋯ -─────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── - 1 │ 0.00012069 1.5e-7 1000 0000000000000000000590fc0f3eba19… 700000 6342 0.014 3.58e-6 86228 1.512e- ⋯ - 2 │ 2.078e-5 2.0e-8 956 00000000000000000002f39baabb00ff… 700001 2435 0.00153 2.51e-6 46044 3.36e-6 - 3 │ 4.082e-5 8.0e-8 627 00000000000000000001993b6b5e4e3d… 700002 729 0.00059549 3.16e-6 22613 1.363e- - 4 │ 0.00025372 3.5e-7 981 000000000000000000055646f272b32b… 700003 716 0.025465 2.02e-6 35443 1.486e- - 5 │ 5.484e-5 1.1e-7 831 0000000000000000000e360e05cb9d7b… 700004 1174 0.0028332 4.77e-6 53046 1.323e- ⋯ - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 8 │ 8.023e-5 1.6e-7 723 0000000000000000000a5f9662af9517… 700007 4141 0.00584712 3.89e-6 72988 1.333e- - 9 │ 6.225e-5 1.5e-7 562 0000000000000000000bd59e42336960… 700008 6130 0.00388224 6.05e-6 61070 1.396e- - 10 │ 3.048e-5 5.0e-8 703 0000000000000000000190997c460412… 700009 2176 0.00164097 1.55e-6 46079 4.52e-6 - 11 │ 0.00010499 2.1e-7 597 00000000000000000001a6a742dcef16… 700010 384 0.00641376 1.55e-6 9780 1.343e- ⋯ - ``` \ No newline at end of file +``` + +```julia + julia> @time collect_block_stats(auth, 500_000, 505_000) + 46.696418 seconds (3.57 M allocations: 204.254 MiB, 0.23% gc time) +5001×28 DataFrame + Row │ avgfee avgfeerate avgtxsize blockhash height ins maxfee maxfeerate maxtxsize medianfe ⋯ + │ Float64 Float64 Int64 String Int64 Int64 Float64 Float64 Int64 Float64 ⋯ +──────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + 1 │ 0.00125685 3.4e-6 388 00000000000000000024fb37364cbf81… 500000 4370 0.0647075 8.84e-6 19850 0.000858 ⋯ + 2 │ 0.00137451 3.64e-6 403 0000000000000000005c9959b3216f86… 500001 5176 0.236868 2.79e-5 43980 0.000835 + 3 │ 0.00113823 3.41e-6 352 000000000000000000877d93d1412ca6… 500002 5059 0.1 3.141e-5 33674 0.000697 + 4 │ 0.00144839 3.64e-6 414 0000000000000000005467c7a728a3dc… 500003 5028 0.187487 1.182e-5 34739 0.000895 + 5 │ 0.00150056 2.9e-6 568 0000000000000000005d4da5924742e6… 500004 5303 0.177758 1.908e-5 103927 0.000691 ⋯ +``` + +Requests in **batch mode** are also supported. In fact, this is recommended while performing +on-chain analytics over a large number of blocks. See the difference in time (shown below) +compared to the above example: + +```julia +julia> @time collect_block_stats_batch(auth, 500_000, 505_000, batchsize = 1000) + 26.004791 seconds (3.38 M allocations: 158.279 MiB, 0.25% gc time) +5001×28 DataFrame + Row │ avgfee avgfeerate avgtxsize blockhash height ins maxfee maxfeerate maxtxsize medianfe ⋯ + │ Float64 Float64 Int64 String Int64 Int64 Float64 Float64 Int64 Float64 ⋯ +──────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + 1 │ 0.00125685 3.4e-6 388 00000000000000000024fb37364cbf81… 500000 4370 0.0647075 8.84e-6 19850 0.000858 ⋯ + 2 │ 0.00137451 3.64e-6 403 0000000000000000005c9959b3216f86… 500001 5176 0.236868 2.79e-5 43980 0.000835 + 3 │ 0.00113823 3.41e-6 352 000000000000000000877d93d1412ca6… 500002 5059 0.1 3.141e-5 33674 0.000697 + 4 │ 0.00144839 3.64e-6 414 0000000000000000005467c7a728a3dc… 500003 5028 0.187487 1.182e-5 34739 0.000895 + 5 │ 0.00150056 2.9e-6 568 0000000000000000005d4da5924742e6… 500004 5303 0.177758 1.908e-5 103927 0.000691 ⋯ +``` + +```julia +julia> collect_network_stats_batch(auth, 600_000, 601_000, batchsize = 100) +1001×4 DataFrame + Row │ height time network_hash difficulty + │ Int64 DateTime Float64 Float64 +──────┼─────────────────────────────────────────────────────── + 1 │ 600000 2019-10-19T00:04:21 9.59932e19 1.34101e13 + 2 │ 600001 2019-10-19T00:06:53 9.60499e19 1.3418e13 + 3 │ 600002 2019-10-19T00:14:35 9.60657e19 1.34202e13 + 4 │ 600003 2019-10-19T00:39:08 9.59483e19 1.34038e13 + 5 │ 600004 2019-10-19T00:46:56 9.59633e19 1.34059e13 +``` \ No newline at end of file diff --git a/src/BitcoinRPC.jl b/src/BitcoinRPC.jl index 31f63a1..1d35ec7 100644 --- a/src/BitcoinRPC.jl +++ b/src/BitcoinRPC.jl @@ -21,7 +21,8 @@ export show_best_block_hash, show_network_hashps, verify_chain, collect_block_stats, - collect_network_stats, + collect_block_stats_batch, + collect_network_stats_batch, UserAuth, StringOrInt diff --git a/src/analytics.jl b/src/analytics.jl index 57e8462..c3c658e 100644 --- a/src/analytics.jl +++ b/src/analytics.jl @@ -10,6 +10,9 @@ Collect statistics by iterating over a range of blocks. - `block_start::Int64` : Starting block height - `block_end::Int64` : Ending block height +# Optional keywords +- `stats` : Select specific values to return, e.g. ["avgfee", "avgtxsize"], default is "". + # Example ```julia-repl julia> collect_block_stats(auth, 700_000, 700_005) @@ -58,35 +61,129 @@ end """ - collect_network_stats(auth::UserAuth, block_start::Int64, block_end::Int64) + collect_block_stats_batch(auth::UserAuth, block_start::Int64, + block_end::Int64; batchsize::Int64 = 50, + stats = "") + +Collect statistics by iterating over a range of blocks in batches. + +# Arguments +- `auth::UserAuth` : User credentials, e.g. `auth = UserAuth("username", "password", port)` +- `block_start::Int64` : Starting block height +- `block_end::Int64` : Ending block height + +# Optional keywords +- `batchsize::Int64` : Request can be sent in batches of given size, default is set to 50. +- `stats` : Select specific values to return, e.g. ["avgfee", "avgtxsize"], default is "". + +# Example +```julia-repl +julia> @time collect_block_stats_batch(auth, 500_000, 505_000, batchsize = 1000) + 25.873394 seconds (3.37 M allocations: 157.842 MiB, 0.21% gc time) +5001×28 DataFrame + Row │ avgfee avgfeerate avgtxsize blockhash height ins maxfee maxfeerate maxtxsize medianfe ⋯ + │ Float64 Float64 Int64 String Int64 Int64 Float64 Float64 Int64 Float64 ⋯ +──────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + 1 │ 0.00125685 3.4e-6 388 00000000000000000024fb37364cbf81… 500000 4370 0.0647075 8.84e-6 19850 0.000858 ⋯ + 2 │ 0.00137451 3.64e-6 403 0000000000000000005c9959b3216f86… 500001 5176 0.236868 2.79e-5 43980 0.000835 + 3 │ 0.00113823 3.41e-6 352 000000000000000000877d93d1412ca6… 500002 5059 0.1 3.141e-5 33674 0.000697 + 4 │ 0.00144839 3.64e-6 414 0000000000000000005467c7a728a3dc… 500003 5028 0.187487 1.182e-5 34739 0.000895 + 5 │ 0.00150056 2.9e-6 568 0000000000000000005d4da5924742e6… 500004 5303 0.177758 1.908e-5 103927 0.000691 ⋯ +... +``` +""" +function collect_block_stats_batch(auth::UserAuth, block_start::Int64, + block_end::Int64; batchsize::Int64 = 50, + stats = "") + + @assert 0 ≤ block_start < block_end ≤ show_block_count(auth) "Invalid block height" + + num_blocks = block_end - block_start + 1 + @assert num_blocks > batchsize "number of blocks is smaller than the batch size" + + results = "" + all_results = Any[] + i = block_start + last_batch = false + + while i ≤ block_end + + j = i + batchsize + + if j ≥ block_end + j = block_end + last_batch = true + end + params = JSON.json(i:j) + + try + results = post_request_batch(auth,"getblockstats"; + params = params, stats = stats) + catch e + @info "Ran into error $(e)" + @info "Could not fetch data for blocks $(i) to $(j), will continue to next batch!" + i = j + 1 + continue + end + + for result in results + delete!(result, "feerate_percentiles") + sato_to_btc!(result) + push!(all_results, result) + end + + if last_batch + break + end + + i = j + 1 + end + + df_stats = DataFrame() + + try + df_stats = vcat(DataFrame.(all_results)...) + catch e + @info "Unable to create a DataFrame, check this error: $(e)" + end + + return df_stats +end + + +""" + collect_network_stats_batch(auth::UserAuth, block_start::Int64, block_end::Int64; + batchsize::Int64 = 50) -Collect block and network statistics by iterating over a range of blocks. +Collect block and network statistics by iterating over a range of blocks in batches. # Arguments - `auth::UserAuth` : User credentials, e.g. `auth = UserAuth("username", "password", port)` - `block_start::Int64` : Starting block height - `block_end::Int64` : Ending block height +# Optional keywords +- `batchsize::Int64` : Request can be sent in batches of given size, default is set to 50. + # Example ```julia-repl -julia> collect_network_stats(auth, 700_000, 700_100) -101×4 DataFrame +julia> collect_network_stats_batch(auth, 600_000, 600_699, batchsize = 100) +700×4 DataFrame Row │ height time network_hash difficulty │ Int64 DateTime Float64 Float64 ─────┼─────────────────────────────────────────────────────── - 1 │ 700000 2021-09-11T04:14:32 1.29538e20 1.80963e13 - 2 │ 700001 2021-09-11T04:15:02 1.29813e20 1.81346e13 - 3 │ 700002 2021-09-11T04:17:07 1.30042e20 1.81666e13 - 4 │ 700003 2021-09-11T04:17:57 1.30306e20 1.82036e13 - 5 │ 700004 2021-09-11T04:20:45 1.30515e20 1.82327e13 + 1 │ 600000 2019-10-19T00:04:21 9.59932e19 1.34101e13 + 2 │ 600001 2019-10-19T00:06:53 9.60499e19 1.3418e13 + 3 │ 600002 2019-10-19T00:14:35 9.60657e19 1.34202e13 + 4 │ 600003 2019-10-19T00:39:08 9.59483e19 1.34038e13 + 5 │ 600004 2019-10-19T00:46:56 9.59633e19 1.34059e13 ``` """ -function collect_network_stats(auth::UserAuth, block_start::Int64, block_end::Int64) - - @assert 0 ≤ block_start < block_end ≤ show_block_count(auth) "Invalid block height" +function collect_network_stats_batch(auth::UserAuth, block_start::Int64, block_end::Int64; + batchsize::Int64 = 50) - df_stats = collect_block_stats(auth, block_start, block_end, - stats = ["height", "time"]) + df_stats = collect_block_stats_batch(auth, block_start, block_end, batchsize = batchsize, + stats = ["height", "time"]) network_hash = [show_network_hashps(auth, height = h) for h in df_stats[!, :height]] @@ -98,4 +195,4 @@ function collect_network_stats(auth::UserAuth, block_start::Int64, block_end::In insertcols!(df_stats, :network_hash, :difficulty => difficulty, after = true) return df_stats -end \ No newline at end of file +end \ No newline at end of file diff --git a/src/blockchainRPC.jl b/src/blockchainRPC.jl index c667219..fc7eed1 100644 --- a/src/blockchainRPC.jl +++ b/src/blockchainRPC.jl @@ -171,7 +171,7 @@ Compute per block statistics for a given window. All amounts are in BTC. # Optional keywords - `hashORheight::StringOrInt` : Block hash or height -- `stats` : Specific group of stats, e.g. ["avgfee", "utxo_increase"] +- `stats` : Select specific values to return, e.g. ["avgfee", "avgtxsize"], default is "". # Example ```julia-repl diff --git a/src/helpers.jl b/src/helpers.jl index 0a2b4ea..4c35b86 100644 --- a/src/helpers.jl +++ b/src/helpers.jl @@ -40,4 +40,22 @@ function sato_to_btc!(result) end return result +end + +# Convert list of params from String to Int64 +function convert_to_int(params::String) + + # Example for params input: "[500,501,502,503,504...]" + + all_params = split(params, ",") + all_params[1] = strip(all_params[1], [ '[' ]) + all_params[end] = strip(all_params[end], [ ']' ]) + + params_int = Int64[] + + for par in all_params + push!(params_int, parse(Int64, par)) + end + + return params_int end \ No newline at end of file diff --git a/src/request.jl b/src/request.jl index f0d4b5d..8f4dafa 100644 --- a/src/request.jl +++ b/src/request.jl @@ -46,4 +46,63 @@ function post_request(auth::UserAuth, RPC_name::String; params) end return result -end \ No newline at end of file +end + +# Add support for batch requests +function generate_body_batch(RPC_name::String, params::String, stats) + + params = convert_to_int(params) + + bodies = Dict{String, Any}[] + i = 1 + + for param in params + + par = ~isempty(stats) ? [param, stats] : [param] + + body_dict = Dict("method" => RPC_name, + "params" => par, + "id" => i, + "jsonrpc" => "2.0") + push!(bodies, body_dict) + i += 1 + end + + return JSON.json(bodies) +end + +function post_request_batch(auth::UserAuth, RPC_name::String; + params::String, stats) + + url = "http://$(auth.name):$(auth.pass)@127.0.0.1:8332" + + bodies = generate_body_batch(RPC_name, params, stats) + headers = ["Content-Type" => "application/json"] + + response = HTTP.request( + "POST", + url, + headers, + bodies; + verbose = 0, + retries = 2 + ) + + response_dicts = String(response.body) |> JSON.parse + + #= Keep it like this since results can be Strings (hashes), + Numbers or Dicts =# + results = Any[] + + for response_dict in response_dicts + result = response_dict["result"] + + if "time" in keys(result) + result["time"] = unix2datetime(result["time"]) + end + + push!(results, result) + end + + return results +end \ No newline at end of file diff --git a/test/test_analytics.jl b/test/test_analytics.jl index 85c33b4..831f0d3 100644 --- a/test/test_analytics.jl +++ b/test/test_analytics.jl @@ -19,11 +19,26 @@ @test df_stats_2[!, :utxo_increase][5] == -129 @test isapprox(df_stats_2[!, :avgfee][10], 3.048e-5; atol = 1e-5) + # Test if batch mode results are the same as in normal mode + df_stats_1b = collect_block_stats_batch(AUTH, 700_000, 700_010, batchsize = 3) + rows, cols = size(df_stats_1b) + @test rows == 11 && cols == 28 + @test df_stats_1b[!, :avgtxsize][5] == 831 + @test df_stats_1b[!, :maxtxsize][10] == 46079 + + df_stats_2b = collect_block_stats_batch(AUTH, 700_000, 700_100, + stats = ["avgfee", "utxo_increase"], batchsize = 3) + rows, cols = size(df_stats_2b) + + @test rows == 101 && cols == 2 + @test df_stats_2b[!, :utxo_increase][5] == -129 + @test isapprox(df_stats_2b[!, :avgfee][10], 3.048e-5; atol = 1e-5) + end @testset "collect_network_stats" begin - df_stats = collect_network_stats(AUTH, 700_000, 700_100) + df_stats = collect_network_stats_batch(AUTH, 700_000, 700_100, batchsize = 42) rows, cols = size(df_stats) @test rows == 101 && cols == 4