diff --git a/.gitignore b/.gitignore index 99e2ceab..59a3ad88 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ /dev/ /docs/build/ /docs/site/ +__pycache__/ diff --git a/Project.toml b/Project.toml index 32024d34..1b74e673 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "UnROOT" uuid = "3cd96dde-e98d-4713-81e9-a4a1b0235ce9" authors = ["Tamas Gal", "Jerry Ling"] -version = "0.1.7" +version = "0.1.8" [deps] CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561" diff --git a/README.md b/README.md index 0c45c027..d2060447 100644 --- a/README.md +++ b/README.md @@ -22,61 +22,43 @@ Here is also a short discussion about the [ROOT binary format documentation](https://github.com/scikit-hep/uproot/issues/401) ## Status -The project is in early alpha prototyping phase and contributions are very +The project is in early prototyping phase and contributions are very welcome. -Reading of raw basket data is already working for uncompressed and -Zlib-compressed files. The raw data consists of two vectors: the bytes +We support reading all scalar branch and jagged branch of "basic" types, as +a metric, UnROOT can already read all branches of CMS' NanoAOD: + +``` julia +julia> t = ROOTFile("test/samples/NanoAODv5_sample.root") +ROOTFile("test/samples/NanoAODv5_sample.root") with 2 entries and 21 streamers. + +# example of a flat branch +julia> array(t, "Events/HLT_Mu3_PFJet40") +1000-element BitVector: + 0 + 1 + 0 + 0 + 0 + +# example of a jagged branch +julia> array(t, "Events/Electron_dxy") +1000-element Vector{Vector{Float32}}: + [0.00037050247] + [-0.009819031] + [] + [-0.0015697479] +``` + +If you have custom C++ struct inside you branch, reading raw data is also possible. +The raw data consists of two vectors: the bytes and the offsets and are available using the `UnROOT.array(f::ROOTFile, path; raw=true)` method. This data can be reinterpreted using a custom type with the method `UnROOT.splitup(data, offsets, T::Type; skipbytes=0)`. -Everything is in a very early alpha stage, as mentioned above. - -Here is a quick demo of reading a simple branch containing a vector of integers -using the preliminary high-level API, which works for non-jagged branches -(simple vectors of primitive types): - -```julia -julia> using UnROOT - -julia> f = ROOTFile("test/samples/tree_with_histos.root") -ROOTFile("test/samples/tree_with_histos.root") with 1 entry and 4 streamers. - -julia> array(f, "t1/mynum") -25-element Array{Int32,1}: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 -``` - -There is also a `raw` keyword which you can pass to `array()`, so it will skip -the interpretation and return the raw bytes. This is similar to `uproot.asdebug` -and can be used to read data where the streamers are not available (yet). -Here is it in action, using some data from the KM3NeT experiment: +You can then define suitable Julia `type` and `readtype` method for parsing these data. +Here is it in action, with the help of the `type`s from `custom.jl`, and some data from the KM3NeT experiment: ``` julia julia> using UnROOT @@ -84,19 +66,15 @@ julia> using UnROOT julia> f = ROOTFile("test/samples/km3net_online.root") ROOTFile("test/samples/km3net_online.root") with 10 entries and 41 streamers. -julia> array(f, "KM3NET_EVENT/KM3NET_EVENT/triggeredHits"; raw=true) +julia> data, offsets = array(f, "KM3NET_EVENT/KM3NET_EVENT/snapshotHits"; raw=true) 2058-element Array{UInt8,1}: 0x00 0x03 - 0x00 - 0x01 - 0x00 ⋮ - 0x56 - 0x45 - 0x4e - 0x54 - 0x00 + +julia> UnROOT.splitup(data, offsets, UnROOT.KM3NETDAQHit) +4-element Vector{Vector{UnROOT.KM3NETDAQHit}}: + [UnROOT.KM3NETDAQHit(1073742790, 0x00, 9, 0x60)...... ``` This is what happens behind the scenes with some additional debug output: @@ -176,16 +154,6 @@ Compressed datastream of 1317 bytes at 6180 (TKey 't1' (TTree)) 10 10 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 - 10 ``` ## Main challenges diff --git a/src/UnROOT.jl b/src/UnROOT.jl index 1b2104d5..46b1c559 100644 --- a/src/UnROOT.jl +++ b/src/UnROOT.jl @@ -2,7 +2,8 @@ module UnROOT export ROOTFile, array -import Base: keys, get, getindex, show, length, iterate, position +import Base: keys, get, getindex, show, length, iterate, position, ntoh +ntoh(b::Bool) = b using CodecZlib, CodecLz4, CodecXz using Mixers @@ -18,4 +19,8 @@ include("bootstrap.jl") include("root.jl") include("custom.jl") +if VERSION < v"1.2" + hasproperty(x, s::Symbol) = s in fieldnames(typeof(x)) +end + end # module diff --git a/src/bootstrap.jl b/src/bootstrap.jl index c40c9342..4f98775f 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -214,6 +214,7 @@ primitivetype(l::TLeafL) = l.fIsUnsigned ? UInt64 : Int64 fMinimum fMaximum end +primitivetype(l::TLeafO) = Bool function parsefields!(io, fields, T::Type{TLeafO}) preamble = Preamble(io, T) @@ -264,6 +265,41 @@ end primitivetype(l::TLeafF) = Float32 +# FIXME this should be generated and inherited from TLeaf +# https://root.cern/doc/master/TLeafB_8h_source.html#l00026 +@with_kw struct TLeafB + # from TNamed + fName + fTitle + + # from TLeaf + fLen + fLenType + fOffset + fIsRange + fIsUnsigned + fLeafCount + + # own fields + fMinimum + fMaximum +end + +function parsefields!(io, fields, T::Type{TLeafB}) + preamble = Preamble(io, T) + parsefields!(io, fields, TLeaf) + fields[:fMinimum] = readtype(io, UInt8) + fields[:fMaximum] = readtype(io, UInt8) + endcheck(io, preamble) +end + +function unpack(io, tkey::TKey, refs::Dict{Int32, Any}, T::Type{TLeafB}) + @initparse + parsefields!(io, fields, T) + T(;fields...) +end + +primitivetype(l::TLeafB) = UInt8 # FIXME this should be generated and inherited from TLeaf @with_kw struct TLeafD # from TNamed @@ -755,3 +791,6 @@ function TTree(io, tkey::TKey, refs) endcheck(io, preamble) TTree(;fields...) end + +# FIXME what to do with auto.py's massive type translation? +# https://github.com/scikit-hep/uproot3/blob/54f5151fb7c686c3a161fbe44b9f299e482f346b/uproot3/interp/auto.py#L360-L365 diff --git a/src/custom.jl b/src/custom.jl index a6a80e5e..542942cb 100644 --- a/src/custom.jl +++ b/src/custom.jl @@ -19,7 +19,7 @@ function Base.getproperty(hit::DAQHit, s::Symbol) r = Ref(hit) GC.@preserve r begin if s === :dom_id - return bswap(unsafe_load(Ptr{Int32}(Base.unsafe_convert(Ptr{Cvoid}, r)))) + return ntoh(unsafe_load(Ptr{Int32}(Base.unsafe_convert(Ptr{Cvoid}, r)))) elseif s === :channel_id return unsafe_load(Ptr{UInt8}(Base.unsafe_convert(Ptr{Cvoid}, r)+4)) elseif s === :tdc diff --git a/src/io.jl b/src/io.jl index 175f25f4..4c88a571 100644 --- a/src/io.jl +++ b/src/io.jl @@ -24,7 +24,8 @@ function readtype(io, ::Type{T}) where T<:AbstractString length = readtype(io, UInt8) if length == 255 - seek(io, start) + # first byte 0xff is useless now + # https://github.com/scikit-hep/uproot3/blob/54f5151fb7c686c3a161fbe44b9f299e482f346b/uproot3/source/cursor.py#L91 length = readtype(io, UInt32) end @@ -135,7 +136,7 @@ function endcheck(io, preamble::T) where {T<:Preamble} error("Object '$(preamble.type)' has $(observed) bytes; expected $(preamble.cnt)") end end - return true + nothing end diff --git a/src/root.jl b/src/root.jl index 1ae7f997..9ae4c123 100644 --- a/src/root.jl +++ b/src/root.jl @@ -130,6 +130,7 @@ function Base.getindex(t::T, s::AbstractString) where {T<:Union{TTree, TBranchEl end missing end + """ function array(f::ROOTFile, path) @@ -145,17 +146,60 @@ function array(f::ROOTFile, path; raw=false) end end - if raw - return readbasketsraw(f.fobj, branch) + if !raw && length(branch.fLeaves.elements) > 1 + error( + "Branches with multiple leaves are not supported yet. Try reading with `array(...; raw=true)`.", + ) end - if length(branch.fLeaves.elements) > 1 - error("Branches with multiple leaves are not supported yet.") + leaf = first(branch.fLeaves.elements) + rawdata, rawoffsets = readbasketsraw(f.fobj, branch) + # https://github.com/scikit-hep/uproot3/blob/54f5151fb7c686c3a161fbe44b9f299e482f346b/uproot3/interp/auto.py#L144 + isjagged = (match(r"\[.*\]", leaf.fTitle) !== nothing) + + # there are two possibility, one is the leaf is just normal leaf but the title has "[...]" in it + # magic offsets, seems to be common for a lot of types, see auto.py in uproot3 + # only needs when the jaggedness comes from TLeafElements, not needed when + # the jaggedness comes from having "[]" in TLeaf's title + jagg_offset = leaf isa TLeafElement ? 10 : 0 + if raw + return rawdata, rawoffsets + end + # the other is where we need to auto detector T bsaed on class name + if isjagged || !iszero(jagg_offset) # non-primitive jagged leaf + T = autointerp_T(branch, leaf) + + # for each "event", the index range is `offsets[i] + jagg_offset + 1` to `offsets[i+1]` + # this is why we need to append `rawoffsets` in the `readbasketsraw()` call + # when you use this range to index `rawdata`, you will get raw bytes belong to each event + # Say your real data is Int32 and you see 8 bytes after indexing, then this event has [num1, num2] as real data + @views [ + ntoh.(reinterpret( + T, rawdata[ (rawoffsets[i]+jagg_offset+1):rawoffsets[i+1] ] + )) for i in 1:(length(rawoffsets) - 1) + ] + else # the branch is not jagged + return ntoh.(reinterpret(primitivetype(leaf), rawdata)) end +end - leaf = first(branch.fLeaves.elements) +function autointerp_T(branch, leaf) + if hasproperty(branch, :fClassName) + classname = branch.fClassName # the C++ class name, such as "vector" + m = match(r"vector<(.*)>", classname) + m===nothing && error("Cannot understand fClassName: $classname.") + elname = m[1] + elname = endswith(elname, "_t") ? lowercase(chop(elname; tail=2)) : elname # Double_t -> double + try + elname == "bool" && return Bool #Cbool doesn't exist + getfield(Base, Symbol(:C, elname)) + catch + error("Cannot convert element of $elname to a native Julia type") + end + else + primitivetype(leaf) + end - readbaskets(f.fobj, branch, primitivetype(leaf)) end @@ -167,14 +211,17 @@ Reads a tree into a dataframe function DataFrame(f::ROOTFile, path) names = keys(f[path]) cols = [array(f, path * "/" * n) for n in names] - for each in cols - eltype(each) <: Number || error("Jagged array cannot be put into a dataframe") - end DataFrame(cols, names, copycols=false) #avoid double allocation end +""" + splitup(data::Vector{UInt8}, offsets, T::Type; skipbytes=0, primitive=false) + +Given the `offsets` and `data` return by `array(...; raw = true)`, reconstructed the actual +array (can be jagged, or with custome struct). +""" function splitup(data::Vector{UInt8}, offsets, T::Type; skipbytes=0, primitive=false) - elsize = packedsizeof(T) + elsize = sizeof(T) out = sizehint!(Vector{Vector{T}}(), length(offsets)) lengths = diff(offsets) push!(lengths, length(data) - offsets[end] + offsets[1]) # yay ;) @@ -194,29 +241,6 @@ function splitup(data::Vector{UInt8}, offsets, T::Type; skipbytes=0, primitive=f end -function readbaskets(io, branch, ::Type{T}) where {T} - seeks = branch.fBasketSeek - entries = branch.fBasketEntry - - out = sizehint!(Vector{T}(), branch.fEntries) - - - for (idx, basket_seek) in enumerate(seeks) - @debug "Reading basket" idx basket_seek - if basket_seek == 0 - break - end - seek(io, basket_seek) - basketkey = unpack(io, TBasketKey) - s = datastream(io, basketkey) - - for _ in entries[idx]:(entries[idx + 1] - 1) - push!(out, readtype(s, T)) - end - end - out -end - function readbasketsraw(io, branch) seeks = branch.fBasketSeek @@ -226,14 +250,13 @@ function readbasketsraw(io, branch) # Just to check if we have a jagged structure # streamer = streamerfor() - data = sizehint!(Vector{UInt8}(), sum(bytes)) - offsets = sizehint!(Vector{Int32}(), total_entries) + max_len = sum(bytes) + data = sizehint!(Vector{UInt8}(), max_len) + offsets = sizehint!(Vector{Int32}(), total_entries+1) # this is always Int32 idx = 1 for (basket_seek, n_bytes) in zip(seeks, bytes) @debug "Reading raw basket data" basket_seek n_bytes - if basket_seek == 0 - break - end + basket_seek == 0 && break seek(io, basket_seek) idx += readbasketbytes!(data, offsets, io, idx) end @@ -241,41 +264,50 @@ function readbasketsraw(io, branch) end -function readoffsets!(out, s, contentsize, global_offset, local_offset) - for _ in 1:contentsize - offset = readtype(s, Int32) + global_offset - push!(out, offset) - end -end - +# Thanks Jim and Philippe +# https://groups.google.com/forum/#!topic/polyglot-root-io/yeC0mAizQcA +# The offsets start at fKeylen - fLast + 4. A singe basket of data looks like this: +# 4 bytes 4 bytes +# ┌─────────┬────────────────────────────────┬───┬────────────┬───┐ +# │ TKey │ content │ X │ offsets │ x │ +# └─────────┴────────────────────────────────┴───┴────────────┴───┘ +# │← fLast - fKeylen →│ │ +# │ │ +# │← fObjlen →│ +# function readbasketbytes!(data, offsets, io, idx) basketkey = unpack(io, TBasketKey) - # @show basketkey s = datastream(io, basketkey) # position(s) == 0, but offsets start at -basketkey.fKeylen start = position(s) - # @show start contentsize = basketkey.fLast - basketkey.fKeylen - offsetlength = basketkey.fObjlen - contentsize + offsetbytesize = basketkey.fObjlen - contentsize - 8 + offset_len = offsetbytesize ÷ 4 # these are always Int32 - if offsetlength > 0 - @debug "Offset data present" offsetlength + if offsetbytesize > 0 + @debug "Offset data present" offsetbytesize skip(s, contentsize) - skip(s, 4) - readoffsets!(offsets, s, (offsetlength - 8) / 4, length(data), length(data)) - # https://groups.google.com/forum/#!topic/polyglot-root-io/yeC0mAizQcA + skip(s, 4) # a flag that indicates the type of data that follows + readoffsets!(offsets, s, offset_len, length(data), -basketkey.fKeylen) skip(s, 4) # "Pointer-to/location-of last used byte in basket" seek(s, start) end @debug "Reading $(contentsize) bytes" readbytes!(s, data, idx, contentsize) - # for _ in 1:contentsize - # push!(data, readtype(s, UInt8)) - # end + # offsets starts at -fKeylen, same as the `local_offset` we pass in in the loop + push!(offsets, basketkey.fLast - basketkey.fKeylen) + contentsize end +function readoffsets!(out, s, contentsize, global_offset, local_offset) + for _ in 1:contentsize + offset = readtype(s, Int32) + global_offset + local_offset + push!(out, offset) + end +end + """ function readbytes!(io, b, offset, nr) @@ -284,5 +316,7 @@ Efficient read of bytes into an existing array at a given offset function readbytes!(io, b, offset, nr) resize!(b, offset + nr - 1) nb = UInt(nr) - GC.@preserve b unsafe_read(io, pointer(b, offset), nr) + # GC.@preserve b unsafe_read(io, pointer(b, offset), nb) + unsafe_read(io, pointer(b, offset), nb) + nothing end diff --git a/src/streamers.jl b/src/streamers.jl index 7042358f..9c1d201a 100644 --- a/src/streamers.jl +++ b/src/streamers.jl @@ -301,7 +301,12 @@ function unpack(io, tkey::TKey, refs::Dict{Int32, Any}, T::Type{TObjArray}) name = readtype(io, String) size = readtype(io, Int32) low = readtype(io, Int32) - elements = [readobjany!(io, tkey, refs) for i in 1:size] + elements = Vector{Any}(undef, size) + for i in 1:size + ele = readobjany!(io, tkey, refs) + # !ismissing(ele) && @show ele.fName + elements[i] = ele + end endcheck(io, preamble) return TObjArray(name, low, elements) end diff --git a/src/types.jl b/src/types.jl index 2b352d05..06ab8d5f 100644 --- a/src/types.jl +++ b/src/types.jl @@ -122,7 +122,7 @@ function datastream(io, tkey::T) where T<:Union{TKey, TBasketKey} fufilled += uncompbytes end - + @assert fufilled == length(uncomp_data) return IOBuffer(uncomp_data) end diff --git a/src/types.jl.orig b/src/types.jl.orig new file mode 100644 index 00000000..06ab8d5f --- /dev/null +++ b/src/types.jl.orig @@ -0,0 +1,234 @@ +@io struct TKey32 + fNbytes::Int32 + fVersion::Int16 + fObjlen::Int32 + fDatime::UInt32 + fKeylen::Int16 + fCycle::Int16 + fSeekKey::Int32 + fSeekPdir::Int32 + fClassName::String + fName::String + fTitle::String +end + +@io struct TKey64 + fNbytes::Int32 + fVersion::Int16 + fObjlen::Int32 + fDatime::UInt32 + fKeylen::Int16 + fCycle::Int16 + fSeekKey::Int64 + fSeekPdir::Int64 + fClassName::String + fName::String + fTitle::String +end + +const TKey = Union{TKey32, TKey64} + +function unpack(io, ::Type{TKey}) + start = position(io) + skip(io, 4) + fVersion = readtype(io, Int16) + seek(io, start) + if fVersion <= 1000 + return unpack(io, TKey32) + end + unpack(io, TKey64) +end + +@with_kw struct TBasketKey + fNbytes::Int32 + fVersion + fObjlen::Int32 + fDatime::UInt32 + fKeylen::Int16 + fCycle::Int16 + fSeekKey + fSeekPdir + fClassName::String + fName::String + fTitle::String + fBufferSize::Int32 + fNevBufSize::Int32 + fNevBuf::Int32 + fLast::Int32 +end + +function unpack(io, T::Type{TBasketKey}) + start = position(io) + fields = Dict{Symbol, Any}() + fields[:fNbytes] = readtype(io, Int32) + fields[:fVersion] = readtype(io, Int16) # FIXME if "complete" it's UInt16 (acc. uproot) + + inttype = fields[:fVersion] <= 1000 ? Int32 : Int64 + + fields[:fObjlen] = readtype(io, Int32) + fields[:fDatime] = readtype(io, UInt32) + fields[:fKeylen] = readtype(io, Int16) + fields[:fCycle] = readtype(io, Int16) + fields[:fSeekKey] = readtype(io, inttype) + fields[:fSeekPdir] = readtype(io, inttype) + fields[:fClassName] = readtype(io, String) + fields[:fName] = readtype(io, String) + fields[:fTitle] = readtype(io, String) + + # if complete (which is true for compressed, it seems?) + seek(io, start + fields[:fKeylen] - 18 - 1) + fields[:fVersion] = readtype(io, Int16) # FIXME if "complete" it's UInt16 (acc. uproot) + fields[:fBufferSize] = readtype(io, Int32) + fields[:fNevBufSize] = readtype(io, Int32) + fields[:fNevBuf] = readtype(io, Int32) + fields[:fLast] = readtype(io, Int32) + + T(; fields...) +end + +iscompressed(t::T) where T<:Union{TKey, TBasketKey} = t.fObjlen != t.fNbytes - t.fKeylen +origin(t::T) where T<:Union{TKey, TBasketKey} = iscompressed(t) ? -t.fKeylen : t.fSeekKey +seekstart(io, t::T) where T<:Union{TKey, TBasketKey} = seek(io, t.fSeekKey + t.fKeylen) + +function datastream(io, tkey::T) where T<:Union{TKey, TBasketKey} + start = position(io) + if !iscompressed(tkey) + @debug ("Uncompressed datastream of $(tkey.fObjlen) bytes " * + "at $start (TKey '$(tkey.fName)' ($(tkey.fClassName)))") + skip(io, 1) # ??? + return io + end + @debug "Compressed stream at $(start)" + _start = tkey.fSeekKey + seekstart(io, tkey) + fufilled = 0 + uncomp_data = Vector{UInt8}(undef, tkey.fObjlen) + while fufilled < tkey.fObjlen # careful with 0/1-based index when thinking about offsets + compression_header = unpack(io, CompressionHeader) + cname, _, compbytes, uncompbytes = unpack(compression_header) + io_buf = IOBuffer(read(io, compbytes)) + + # indexing `0+1 to 0+2` are two bytes, no need to +1 in the second term + uncomp_data[fufilled+1:fufilled+uncompbytes] .= if cname == "ZL" + read(ZlibDecompressorStream(io_buf), uncompbytes) + elseif cname == "XZ" + read(XzDecompressorStream(io_buf), uncompbytes) + elseif cname == "L4" + skip(io_buf, 8) #skip checksum + lz4_decompress(read(io_buf), uncompbytes) + else + error("Unsupported compression type '$(String(compression_header.algo))'") + end + + fufilled += uncompbytes + end + @assert fufilled == length(uncomp_data) + return IOBuffer(uncomp_data) +end + + +@io struct FilePreamble + identifier::SVector{4, UInt8} # Root file identifier ("root") + fVersion::Int32 # File format version +end + +# https://root.cern/doc/v624/RMiniFile_8cxx_source.html#l00239 +@io struct FileHeader32 + fBEGIN::Int32 # Pointer to first data record + fEND::UInt32 # Pointer to first free word at the EOF + fSeekFree::UInt32 # Pointer to FREE data record + fNbytesFree::Int32 # Number of bytes in FREE data record + nfree::Int32 # Number of free data records + fNbytesName::Int32 # Number of bytes in TNamed at creation time + fUnits::UInt8 # Number of bytes for file pointers + fCompress::Int32 # Compression level and algorithm + fSeekInfo::UInt32 # Pointer to TStreamerInfo record + fNbytesInfo::Int32 # Number of bytes in TStreamerInfo record + fUUID::SVector{18, UInt8} # Universal Unique ID +end + + +@io struct FileHeader64 + fBEGIN::Int32 # Pointer to first data record + fEND::UInt64 # Pointer to first free word at the EOF + fSeekFree::UInt64 # Pointer to FREE data record + fNbytesFree::Int32 # Number of bytes in FREE data record + nfree::Int32 # Number of free data records + fNbytesName::Int32 # Number of bytes in TNamed at creation time + fUnits::UInt8 # Number of bytes for file pointers + fCompress::Int32 # Compression level and algorithm + fSeekInfo::UInt64 # Pointer to TStreamerInfo record + fNbytesInfo::Int32 # Number of bytes in TStreamerInfo record + fUUID::SVector{18, UInt8} # Universal Unique ID +end + +const FileHeader = Union{FileHeader32, FileHeader64} + + +@io struct ROOTDirectoryHeader32 + fVersion::Int16 + fDatimeC::UInt32 + fDatimeM::UInt32 + fNbytesKeys::Int32 + fNbytesName::Int32 + fSeekDir::Int32 + fSeekParent::Int32 + fSeekKeys::Int32 +end + +@io struct ROOTDirectoryHeader64 + fVersion::Int16 + fDatimeC::UInt32 + fDatimeM::UInt32 + fNbytesKeys::Int32 + fNbytesName::Int32 + fSeekDir::Int64 + fSeekParent::Int64 + fSeekKeys::Int64 +end + +const ROOTDirectoryHeader = Union{ROOTDirectoryHeader32, ROOTDirectoryHeader64} + +function unpack(io::IOStream, ::Type{ROOTDirectoryHeader}) + fVersion = readtype(io, Int16) + skip(io, -2) + + if fVersion <= 1000 + return unpack(io, ROOTDirectoryHeader32) + else + return unpack(io, ROOTDirectoryHeader64) + end + +end + + +@io struct CompressionHeader + algo::SVector{2, UInt8} + method::UInt8 + c1::UInt8 + c2::UInt8 + c3::UInt8 + u1::UInt8 + u2::UInt8 + u3::UInt8 +end + + +# Built-in types +function THashList end +function TRef end +function TArray end +function TArrayC end +function TArrayS end +function TArrayL end +function TArrayL64 end +function TArrayF end +function TRefArray end + +function aliasfor(classname) + if classname == "ROOT::TIOFeatures" + return ROOT_3a3a_TIOFeatures + else + nothing + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 74826204..c91f254f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -188,11 +188,49 @@ end @test [0.0, 1.0588236, 2.1176472, 3.1764705, 4.2352943] ≈ df.float_array[1:5] atol=1e-7 end +@testset "Jagged branches" begin + # 32bits T + rootfile = ROOTFile(joinpath(SAMPLES_DIR, "tree_with_jagged_array.root")) + data = array(rootfile, "t1/int32_array") + @test data isa Vector{Vector{Int32}} + @test data[1] == Int32[] + @test data[1:2] == [Int32[], Int32[0]] + @test data[end] == Int32[90, 91, 92, 93, 94, 95, 96, 97, 98] + + # 64bits T + T = Float64 + rootfile = ROOTFile(joinpath(SAMPLES_DIR, "tree_with_jagged_array_double.root")) + data = array(rootfile, "t1/double_array") + @test data isa Vector{Vector{T}} + @test data[1] == T[] + @test data[1:2] == [T[], T[0]] + @test data[end] == T[90, 91, 92, 93, 94, 95, 96, 97, 98] +end + +@testset "NanoAOD" begin + rootfile = ROOTFile(joinpath(SAMPLES_DIR, "NanoAODv5_sample.root")) + event = array(rootfile, "Events/event") + @test event[1:3] == UInt64[12423832, 12423821, 12423834] + Electron_dxy = array(rootfile, "Events/Electron_dxy") + @test eltype(Electron_dxy) == Vector{Float32} + @test Electron_dxy[1:3] ≈ [Float32[0.0003705], Float32[-0.00981903], Float32[]] + HLT_Mu3_PFJet40 = array(rootfile, "Events/HLT_Mu3_PFJet40") + @test eltype(HLT_Mu3_PFJet40) == Bool + @test HLT_Mu3_PFJet40[1:3] == [false, true, false] +end + @testset "readbasketsraw()" begin array_md5 = [0xb4, 0xe9, 0x32, 0xe8, 0xfb, 0xff, 0xcf, 0xa0, 0xda, 0x75, 0xe0, 0x25, 0x34, 0x9b, 0xcd, 0xdf] rootfile = ROOTFile(joinpath(SAMPLES_DIR, "km3net_online.root")) data, offsets = array(rootfile, "KM3NET_EVENT/KM3NET_EVENT/snapshotHits"; raw=true) @test array_md5 == md5(data) + + rootfile = ROOTFile(joinpath(SAMPLES_DIR, "tree_with_jagged_array.root")) + data, offsets = array(rootfile, "t1/int32_array"; raw=true) + + @test data isa Vector{UInt8} + @test offsets isa Vector{Int32} + @test data[1:3] == UInt8[0x40, 0x00, 0x00] end diff --git a/test/samples/NanoAODv5_sample.root b/test/samples/NanoAODv5_sample.root new file mode 100644 index 00000000..cbf02e01 Binary files /dev/null and b/test/samples/NanoAODv5_sample.root differ diff --git a/test/samples/tree_with_jagged_array.py b/test/samples/tree_with_jagged_array.py new file mode 100755 index 00000000..ece6a078 --- /dev/null +++ b/test/samples/tree_with_jagged_array.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# Example taken from https://root.cern.ch/how/how-write-ttree-python +# and modified to run... +import ROOT +from ROOT import TFile, TTree +from array import array + +f = TFile('tree_with_jagged_array.root', 'recreate', "jagged_lz4", 404) +t = TTree('t1', 'tree with jagged array') + +n = ROOT.vector('int')() +t.Branch('int32_array', n) + +for i in range(100): + if i % 10 == 0: + n.clear() + t.Fill() + n.push_back(i) + +f.Write() +f.Close() diff --git a/test/samples/tree_with_jagged_array.root b/test/samples/tree_with_jagged_array.root new file mode 100644 index 00000000..af6e6340 Binary files /dev/null and b/test/samples/tree_with_jagged_array.root differ diff --git a/test/samples/tree_with_jagged_array_double.py b/test/samples/tree_with_jagged_array_double.py new file mode 100755 index 00000000..46a1dd2f --- /dev/null +++ b/test/samples/tree_with_jagged_array_double.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# Example taken from https://root.cern.ch/how/how-write-ttree-python +# and modified to run... +import ROOT +from ROOT import TFile, TTree +from array import array + +f = TFile('tree_with_jagged_array_double.root', 'recreate', "double_lzma", 207) +t = TTree('t1', 'tree with jagged array') + +n = ROOT.vector('double')() +t.Branch('double_array', n) + +for i in range(100): + if i % 10 == 0: + n.clear() + t.Fill() + n.push_back(float(i)) + +f.Write() +f.Close() diff --git a/test/samples/tree_with_jagged_array_double.root b/test/samples/tree_with_jagged_array_double.root new file mode 100644 index 00000000..daf59951 Binary files /dev/null and b/test/samples/tree_with_jagged_array_double.root differ