cbrnr · abcsds · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
@@ -0,0 +1 @@
+style = "blue"
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -0,0 +1,41 @@
+name: CI
+on:
+  push:
+    branches:
+      - main
+    tags: ['*']
+  pull_request:
+  workflow_dispatch:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
+    permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
+      actions: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.11'
+          - '1.6'
+          - 'pre'
+        os:
+          - ubuntu-latest
+        arch:
+          - x64
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v2
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
@@ -0,0 +1,16 @@
+name: CompatHelper
+on:
+  schedule:
+    - cron: 0 0 * * *
+  workflow_dispatch:
+jobs:
+  CompatHelper:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Pkg.add("CompatHelper")
+        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
+      - name: CompatHelper.main()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
+        run: julia -e 'using CompatHelper; CompatHelper.main()'
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
@@ -4,6 +4,22 @@ on:
     types:
       - created
   workflow_dispatch:
+    inputs:
+      lookback:
+        default: "3"
+permissions:
+  actions: read
+  checks: read
+  contents: write
+  deployments: read
+  issues: read
+  discussions: read
+  packages: read
+  pages: read
+  pull-requests: read
+  repository-projects: read
+  security-events: read
+  statuses: read
 jobs:
   TagBot:
     if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'

diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 Manifest.toml
+Dockerfile
+.vscode/
+.venv/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,14 @@
+## [UNRELEASED] - 2025-01-08
+
+### ✨ Added
+- Add dejitter function (([#13](https://github.com/cbrnr/XDF.jl/pull/13) by [Alberto Barradas](https://github.com/abcsds))
+- Add CI workflow for versions 1.11, 1.6 (LTS), and pre-release. (([#11](https://github.com/cbrnr/XDF.jl/pull/11) by [Alberto Barradas](https://github.com/abcsds)))
+- Add tests for string markers from issue xdf-modules/libxdf#19 (([#14](https://github.com/cbrnr/XDF.jl/pull/14) by [Alberto Barradas](https://github.com/abcsds)))
+### 🌀 Changed
+- Reformat sources with [blue](https://github.com/JuliaDiff/BlueStyle) style
+
 ## [0.2.0] - 2022-02-23
+
 - Add support for string markers and string streams ([#2](https://github.com/cbrnr/XDF.jl/pull/2) by [Alberto Barradas](https://github.com/abcsds) and [Clemens Brunner](https://github.com/cbrnr))
 - Make header and footer XML available in "xml" key ([#4](https://github.com/cbrnr/XDF.jl/pull/4) by [Alberto Barradas](https://github.com/abcsds))
 

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "XDF"
 uuid = "31bc19ec-0089-417f-990e-a2b5e7515868"
 authors = ["Clemens Brunner <[email protected]>"]
-version = "0.2.0"
+version = "1.0.0-DEV"
 
 [deps]
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"

diff --git a/README.md b/README.md
@@ -1,4 +1,6 @@
 ![License](https://img.shields.io/github/license/cbrnr/XDF.jl)
+[![Build Status](https://github.com/cbrnr/XDF.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/cbrnr/XDF.jl/actions/workflows/CI.yml?query=branch%3Amain)
+[![Code Style Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle)
 
 XDF.jl
 ======
@@ -18,7 +20,6 @@ streams = read_xdf("minimal.xdf")
 ## Current status
 This package is currently in an early stage, so here's an overview of what doesn't work (yet):
 
-- [ ] Dejittering of streams with regular sampling rates is not available yet
 - [ ] Loading only specific streams does not work yet
 
 If you have a feature request or found a bug, please open a new issue and let me know. I'd be especially interested in making the code more efficient, because this is basically my first Julia project. Currently, the function is passing through the file twice: the first pass reads everything except sample chunks, whereas the second pass reads samples into preallocated arrays. I'm not sure if this is ideal, the code would be much simpler if it used just a simple pass (but then sample arrays will need to be concatenated).
diff --git a/src/XDF.jl b/src/XDF.jl
@@ -1,4 +1,4 @@
-# Authors: Clemens Brunner
+# Authors: Clemens Brunner, Alberto Barradas
 # License: BSD (3-clause)
 
 module XDF
@@ -7,30 +7,32 @@ export read_xdf
 
 using Logging: @info, @debug
 
-
-CHUNK_TYPE = Dict(1=>"FileHeader",
-                  2=>"StreamHeader",
-                  3=>"Samples",
-                  4=>"ClockOffset",
-                  5=>"Boundary",
-                  6=>"StreamFooter")
-
-DATA_TYPE = Dict("int8"=>Int8,
-                 "int16"=>Int16,
-                 "int32"=>Int32,
-                 "int64"=>Int64,
-                 "float32"=>Float32,
-                 "double64"=>Float64,
-                 "string"=>String)
-
+CHUNK_TYPE = Dict(
+    1 => "FileHeader",
+    2 => "StreamHeader",
+    3 => "Samples",
+    4 => "ClockOffset",
+    5 => "Boundary",
+    6 => "StreamFooter",
+)
+
+DATA_TYPE = Dict(
+    "int8" => Int8,
+    "int16" => Int16,
+    "int32" => Int32,
+    "int64" => Int64,
+    "float32" => Float32,
+    "double64" => Float64,
+    "string" => String,
+)
 
 """
     read_xdf(filename::AbstractString, sync::Bool=true)
 
 Read XDF file and optionally sync streams (default true).
 """
 function read_xdf(filename::AbstractString, sync::Bool=true)
-    streams = Dict{Int, Any}()
+    streams = Dict{Int,Any}()
     counter = Dict(zip(keys(CHUNK_TYPE), zeros(Int, length(CHUNK_TYPE))))  # count chunks
 
     open(filename) do io
@@ -123,9 +125,13 @@ function read_xdf(filename::AbstractString, sync::Bool=true)
                         streams[id]["time"][index[id]] = previous + delta
                     end
                     if streams[id]["dtype"] === String
-                        streams[id]["data"][index[id], :] .= String(read(io, read_varlen_int(io)))
+                        for j in 1:nchannels
+                            streams[id]["data"][index[id], j] = String(read(io, read_varlen_int(io)))
+                        end
                     else
-                        streams[id]["data"][index[id], :] = reinterpret(dtype, read(io, sizeof(dtype) * nchannels))
+                        streams[id]["data"][index[id], :] = reinterpret(
+                            dtype, read(io, sizeof(dtype) * nchannels)
+                        )
                     end
                     index[id] += 1
                 end
@@ -152,7 +158,6 @@ function read_xdf(filename::AbstractString, sync::Bool=true)
     return streams
 end
 
-
 "Read variable-length integer."
 function read_varlen_int(io::IO)
     nbytes = read(io, Int8)
@@ -162,18 +167,18 @@ function read_varlen_int(io::IO)
         read(io, UInt32)
     elseif nbytes == 8
         read(io, UInt64)
+    else
+        error("Invalid number of bytes for variable-length integer: $nbytes")
     end
 end
 
-
 "Find XML tag and return its content (optionally converted to specified type)."
 function findtag(xml::AbstractString, tag::AbstractString, type=String::DataType)
     m = match(Regex("<$tag>(.*)</$tag>"), xml)
     content = isnothing(m) ? nothing : m[1]
     return isnothing(content) || type == String ? content : parse(type, content)
 end
 
-
 "Synchronize clock values by their given offsets."
 function sync_clock(time::Array{Float64,1}, offsets::Array{Float64,2})
     x = hcat(ones(size(offsets, 1), 1), offsets[:, 1])
@@ -182,4 +187,59 @@ function sync_clock(time::Array{Float64,1}, offsets::Array{Float64,2})
     return time .+ (coefs[1] .+ coefs[2] .* time)
 end
 
-end
+"""
+    dejitter(stream::Dict, max_time::Float64=1, max_samples::Int=500)
+    Calculate timestamps assuming constant intervals within each continuous segment in a stream. Chooses the minimum of the time difference and the number of samples as indicator for a new segment.
+    args:
+        stream: Dict
+            Stream dictionary.
+        max_time: Float64
+            Maximum time difference between two consecutive samples (default: 1 second).
+        max_samples: Int
+            Maximum number of samples in a segment (default: 500 samples).
+    return:
+        Dict: Stream dictionary with updated timestamps.
+
+Example:
+```julia
+stream = read_xdf(Downloads.download("https://github.com/xdf-modules/example-files/blob/master/data_with_clock_resets.xdf?raw=true"))[2]
+stream = dejitter(stream, 1.0, 500) # process segments with a maximum time difference of 1 second or 500 samples
+stream["segments"] # list of segments
+stream["nominal_srate"] # recalculated nominal sampling rate
+```
+"""
+function dejitter(stream::Dict; max_time::Float64=1.0, max_samples::Int=500)
+    srate = stream["srate"]
+    if srate == 0
+        @warn "Attempting to dejitter marker streams or streams with zero sampling rate. Skipping."
+        return stream
+    end
+    nsamples = size(stream["data"], 1)
+    if nsamples == 0
+        @warn "Attempting to dejitter empty stream. Skipping."
+        return stream
+    end
+    stream["nominal_srate"] = 0 # Recalculated if possible
+    stream["segments"] = []
+    time = stream["time"]
+    breaks = [1; findall(diff(time) .> min.(max_time, max_samples .* (1 / srate)))]
+    seg_starts = breaks
+    seg_ends = [breaks[2:end] .- 1; nsamples]
+    for (start, stop) in zip(seg_starts, seg_ends)
+        push!(stream["segments"], (start, stop))
+        idx = [start:stop;]
+        X = hcat(ones(length(idx)), time[idx])
+        y = time[idx]
+        coefs = X \ y
+        stream["time"][idx] = coefs[1] .+ coefs[2] .* time[idx]
+    end
+    # Recalculate nominal sampling rate
+    counts = (seg_ends .- seg_starts) .+ 1
+    durations = diff([time[seg_starts]; time[seg_ends[end]]])
+    stream["nominal_srate"] = sum(counts) / sum(durations)
+    if stream["srate"] != 0 && abs(stream["srate"] - stream["nominal_srate"]) > 1e-1
+        @warn "After dejittering: Nominal sampling rate differs from specified rate: $(stream["nominal_srate"]) vs. $(stream["srate"]) Hz"
+    end
+    return stream
+end
+end
diff --git a/test/Project.toml b/test/Project.toml
@@ -1,3 +1,4 @@
 [deps]
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,4 +1,4 @@
-using XDF, Downloads, Test
+using XDF, Downloads, Test, SHA
 
 @testset "Minimal XDF file" begin
     url = "https://github.com/xdf-modules/example-files/blob/master/minimal.xdf?raw=true"
@@ -14,15 +14,17 @@ using XDF, Downloads, Test
     @test endswith(streams[0]["header"], "</uid></info>")
     @test startswith(streams[0]["footer"], "<?xml version=\"1.0\"?>")
     @test endswith(streams[0]["footer"], "</clock_offsets></info>")
-    @test streams[0]["data"] == [192 255 238
-                                  12  22  32
-                                  13  23  33
-                                  14  24  34
-                                  15  25  35
-                                  12  22  32
-                                  13  23  33
-                                  14  24  34
-                                  15  25  35]
+    @test streams[0]["data"] == [
+        192 255 238
+        12 22 32
+        13 23 33
+        14 24 34
+        15 25 35
+        12 22 32
+        13 23 33
+        14 24 34
+        15 25 35
+    ]
 
     @test 46202862 in keys(streams)
     @test streams[46202862]["name"] == "SendDataString"
@@ -37,8 +39,8 @@ using XDF, Downloads, Test
     @test size(streams[46202862]["data"]) == (9, 1)
     @test startswith(streams[46202862]["data"][1, 1], "<?xml version")
     @test endswith(streams[46202862]["data"][1, 1], "</info>")
-    @test streams[46202862]["data"][2:end, 1] == ["Hello", "World", "from", "LSL", "Hello",
-                                                  "World", "from", "LSL"]
+    @test streams[46202862]["data"][2:end, 1] ==
+        ["Hello", "World", "from", "LSL", "Hello", "World", "from", "LSL"]
 end
 
 @testset "XDF file with clock resets" begin
@@ -68,4 +70,43 @@ end
     @test startswith(streams[2]["footer"], "<?xml version=\"1.0\"?>")
     @test endswith(streams[2]["footer"], "</clock_offsets></info>")
     @test size(streams[2]["data"]) == (27815, 8)
+    d_stream = XDF.dejitter(streams[2])
+    @test d_stream["segments"][1] == (1, 12875)
+    @test d_stream["segments"][2] == (12876, 27815)
+end
+
+@testset "strings" begin
+    file = "testdata/test_chunk3.xdf"
+    @testset "strings.sha256" begin
+        open(file) do f
+            @test bytes2hex(sha256(f)) ==
+                "c730991efa078906117aa2accdca5f0ea11c54f43c3884770eba21e5a72edb82"
+        end
+    end
+    @testset "strings.read_xdf" begin
+        using XDF: XDF
+        streams = XDF.read_xdf(file)
+    end
+
+    @testset "strings.markers" begin
+        using XDF: XDF
+        streams = XDF.read_xdf(file)
+        s1 = streams[3735928559]
+        @test s1["type"] == "Marker"
+        @test s1["nchannels"] == 2
+        @test s1["srate"] == 1000.0
+        @test s1["dtype"] == String
+        @test size(s1["data"]) == (1, 2)
+        @test s1["data"] == ["Marker 0A" "Marker 0B"]
+        s2 = streams[46202862]
+        @test s2["type"] == "EEG"
+        @test s2["nchannels"] == 64
+        @test s2["srate"] == 1000.0
+        @test s2["dtype"] == Float64
+        @test size(s2["data"]) == (1, 64)
+        @test sum(s2["data"]) == 0.0
+        sgs = [XDF.dejitter(streams[k])["segments"] for k in keys(streams)]
+        @test sgs[1] == [(1, 1)]
+        @test sgs[2] == [(1, 1)]
+    end
 end
diff --git a/test/testdata/test_chunk3.xdf b/test/testdata/test_chunk3.xdf