From 527c9748c32d88050f48851e9c8b3894d5b65137 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 22 Oct 2024 14:04:45 +0200 Subject: [PATCH] Internals: Use QualityScores iterator Instead of the `Base.Generator` used previously. The difference is that we can explicitly defined eltype and length to help Julia generate better code. --- src/fastq/record.jl | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/fastq/record.jl b/src/fastq/record.jl index bb0cecb..4a14b31 100644 --- a/src/fastq/record.jl +++ b/src/fastq/record.jl @@ -272,6 +272,28 @@ function quality(::Type{StringView}, record::Record, part::UnitRange{<:Integer}= StringView(view(record.data, quality_indices(record, part))) end +""" + QualityScores + +Ab object of this type is returned by [`quality_scores`](@ref). +The only supported interfaces of this type is `length`, and `iterate`. +""" +struct QualityScores + # TODO: Change to Memory at 1.11 + vec::Vector{UInt8} + part::UnitRange{Int} + encoding::QualityEncoding +end + +Base.length(x::QualityScores) = length(x.part) +Base.eltype(::Type{QualityScores}) = Int8 + +function Base.iterate(x::QualityScores, state=first(x.part)) + state > last(x.part) && return nothing + byte = @inbounds x.vec[state] + (decode_quality(x.encoding, byte), state + 1) +end + function quality_scores(record::Record, part::UnitRange{<:Integer}=1:seqsize(record)) quality_scores(record, DEFAULT_ENCODING, part) end @@ -287,12 +309,12 @@ By default, the encoding is PHRED33 Sanger encoding, but may be specified with a function quality_scores(record::Record, encoding::QualityEncoding, part::UnitRange{<:Integer}=1:seqsize(record)) start, stop = first(part), last(part) (start < 1 || stop > seqsize(record)) && throw(BoundsError(record, start:stop)) - data = record.data - offset = record.description_len + seqsize(record) - return Iterators.map(offset+start:offset+stop) do i - v = data[i] - decode_quality(encoding, v) - end + offset = record.description_len + seqsize(record) + QualityScores( + record.data, + offset+start:offset+stop, + encoding, + ) end """