Skip to content

Commit

Permalink
Do not reach into TranscodingSteams internals
Browse files Browse the repository at this point in the history
When computing a FASTA index, previously, FASTX used the private state of
Buffer from TranscodingStreams. However, in this case, it can just as easily
be computed using a local variable.
  • Loading branch information
jakobnissen committed Apr 1, 2024
1 parent 868d702 commit b82477a
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "FASTX"
uuid = "c2308a5c-f048-11e8-3e8a-31650f418d12"
authors = ["Sabrina J. Ward <[email protected]>", "Jakob N. Nissen <[email protected]>"]
version = "2.1.4"
version = "2.1.5"

[weakdeps]
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
Expand Down
23 changes: 7 additions & 16 deletions src/fasta/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,6 @@ function Base.write(io::IO, index::Index)
n
end

function Base.print(io::IO, index::Index)
buffer = IOBuffer()
write(buffer, index)
String(take!(buffer))
end

index_fasta_actions = Dict(
:mark => :(@mark),
:countline => :(linenum += 1),
Expand All @@ -226,15 +220,9 @@ index_fasta_actions = Dict(
:description => quote
uses_rn_newline = byte == UInt8('\r')
no_more_seqlines = false

# Disturbingly, there is no API to get the absolute position of
# an Automa machine operating on a stream. We ought to fix this.
# This workaround works ONLY for a NoopStream,
# and relies on abusing the internals.
buffer_offset = buffer.transcoded - buffer.marginpos + 1

# We want 0-indexed, p is one-indexed, and we need the offset of first sequence
offset = buffer_offset + p + uses_rn_newline
# +1 for > symbol, +1 for newline, +1 if \r is used
offset += p - @markpos() + uses_rn_newline + 2
last_offset = offset
end,
:seqline => quote
# Validate line terminator is same, i.e. no seq have have both \r\n and \n
Expand All @@ -254,6 +242,7 @@ index_fasta_actions = Dict(
elseif current_seqwidth < seqwidth
no_more_seqlines = true
end
offset += current_seqwidth + 1 + uses_rn_newline
seqlen += current_seqwidth
end
end,
Expand All @@ -262,7 +251,7 @@ index_fasta_actions = Dict(

names[identifier] = record_count
push!(lengths, seqlen)
push!(offsets, offset)
push!(offsets, last_offset)
enc_linebases = (seqwidth % UInt)
enc_linebases |= ifelse(uses_rn_newline, typemin(Int) % UInt, UInt(0))
push!(encoded_linebases, enc_linebases)
Expand All @@ -278,6 +267,8 @@ initcode = quote
offsets = Int[]
encoded_linebases = UInt[]

offset = 0
last_offset = 0
seqwidth = -1
seqlen = 0
linenum = 1
Expand Down

0 comments on commit b82477a

Please sign in to comment.