Skip to content

Commit

Permalink
handle multiple blocks in datastream (#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
Moelf authored Jul 3, 2021
1 parent 94b8e6e commit 67f1e36
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/UnROOT.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ using Parameters
using StaticArrays

include("constants.jl")
include("utils.jl")
include("io.jl")
include("types.jl")
include("utils.jl")
include("streamers.jl")
include("bootstrap.jl")
include("root.jl")
Expand Down
2 changes: 0 additions & 2 deletions src/streamers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,8 @@ function Streamers(io)

if iscompressed(tkey)
@debug "Compressed stream at $(start)"
_start = tkey.fSeekKey
seekstart(io, tkey)
compression_header = unpack(io, CompressionHeader)
skipped = position(io) - _start
#FIXME for some reason we need to re-pack such that it ends at exact bytes.
skipped = position(io) - start
# notice our `TKey` size is not the same as official TKey, can't use sizeof()
Expand Down
35 changes: 21 additions & 14 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,22 +101,29 @@ function datastream(io, tkey::T) where T<:Union{TKey, TBasketKey}
@debug "Compressed stream at $(start)"
_start = tkey.fSeekKey
seekstart(io, tkey)
compression_header = unpack(io, CompressionHeader)
skipped = position(io) - _start
io_buf = IOBuffer(read(io, tkey.fNbytes - skipped))
if String(compression_header.algo) == "ZL"
return IOBuffer(read(ZlibDecompressorStream(io_buf), tkey.fObjlen))
elseif String(compression_header.algo) == "XZ"
#FIXME doesn't work, why
return IOBuffer(read(XzDecompressorStream(io_buf), tkey.fObjlen))
elseif String(compression_header.algo) == "L4"
#FIXME doesn't work
skip(io_buf, 8) #skip checksum
stream = IOBuffer(lz4_decompress(read(io_buf), tkey.fObjlen))
else
error("Unsupported compression type '$(String(compression_header.algo))'")
fufilled = 0
uncomp_data = Vector{UInt8}(undef, tkey.fObjlen)
while fufilled < tkey.fObjlen # careful with 0/1-based index when thinking about offsets
compression_header = unpack(io, CompressionHeader)
cname, _, compbytes, uncompbytes = unpack(compression_header)
io_buf = IOBuffer(read(io, compbytes))

# indexing `0+1 to 0+2` are two bytes, no need to +1 in the second term
uncomp_data[fufilled+1:fufilled+uncompbytes] .= if cname == "ZL"
read(ZlibDecompressorStream(io_buf), uncompbytes)
elseif cname == "XZ"
read(XzDecompressorStream(io_buf), uncompbytes)
elseif cname == "L4"
skip(io_buf, 8) #skip checksum
lz4_decompress(read(io_buf), uncompbytes)
else
error("Unsupported compression type '$(String(compression_header.algo))'")
end

fufilled += uncompbytes
end

return IOBuffer(uncomp_data)
end


Expand Down
18 changes: 18 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,21 @@ macro stack(into, structs...)
end
)
end

"""
unpack(x::CompressionHeader)
Return the following information:
- Name of compression algorithm
- Level of the compression
- compressedbytes and uncompressedbytes according to [uproot3](https://github.com/scikit-hep/uproot3/blob/54f5151fb7c686c3a161fbe44b9f299e482f346b/uproot3/source/compressed.py#L132)
"""
function unpack(x::CompressionHeader)
algname = String(x.algo)
ver = Int(x.method)
# shift without casting to `Int` will give you 0x00 because we're shifting 0 bits into UInt8
compressedbytes = x.c1 + (Int(x.c2) << 8) + (Int(x.c3) << 16)
uncompressedbytes = x.u1 + (Int(x.u2) << 8) + (Int(x.u3) << 16)

return algname, ver, compressedbytes, uncompressedbytes
end
Binary file added test/samples/__pycache__/lz4.cpython-39.pyc
Binary file not shown.
Binary file modified test/samples/tree_with_large_array_lz4.root
Binary file not shown.

0 comments on commit 67f1e36

Please sign in to comment.