Skip to content

Commit

Permalink
Remove StableSet and StableDict
Browse files Browse the repository at this point in the history
These two data types were originally added to prevent nondeterminism when
compiling regex.
Instead, prevent nondetminism by relabeling the minimized machines.

See issues #19, #106
  • Loading branch information
jakobnissen committed Aug 3, 2022
1 parent f3440f4 commit 6e5f508
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 299 deletions.
7 changes: 0 additions & 7 deletions src/Automa.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,6 @@ module Automa
using Printf: @sprintf
import ScanByte: ScanByte, ByteSet

include("sdict.jl")
include("sset.jl")

# TODO: use StableDict and StableSet only where they are required
const Dict = StableDict
const Set = StableSet

# Encode a byte set into a sequence of non-empty ranges.
function range_encode(set::ScanByte.ByteSet)
result = UnitRange{UInt8}[]
Expand Down
31 changes: 31 additions & 0 deletions src/edge.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,37 @@ function Edge(labels::ByteSet, actions::ActionList)
return Edge(labels, Precondition(), actions)
end

# Don't override isless, because I don't want to figure out how
# to hash correctly. It's fine, we only use this for sorting in order_machine
function in_sort_order(e1::Edge, e2::Edge)
# First check edges
# This could presumably be done much faster
for (i,j) in zip(e1.labels, e2.labels)
i < j && return true
j < i && return false
end
l1, l2 = length(e1.labels), length(e2.labels)
l1 < l2 && return true
l2 < l1 && return false

# Then check preconditions
p1, p2 = e1.precond, e2.precond
lp1, lp2 = length(p1.names), length(p2.names)
for i in 1:min(lp1, lp2)
isless(p1.names[i], p2.names[i]) && return true
isless(p2.names[i], p1.names[i]) && return false
u1, u2 = convert(UInt8, p1.values[i]), convert(UInt8, p2.values[i])
u1 < u2 && return true
u2 < u1 && return false
end
lp1 < lp2 && return true
lp2 < lp1 && return false

# A machine should never have two indistinguishable edges
# so if we reach here, something went wrong
error()
end

"""Check if two edges have preconditions that could be disambiguating.
I.e. can an FSM distinguish the edges based on their conditions?
"""
Expand Down
46 changes: 45 additions & 1 deletion src/machine.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,49 @@ function Base.show(io::IO, machine::Machine)
)
end

# Reorder machine states so the states are in a completely deterministic manner.
# solves #19, see issue #106.
function reorder_machine(machine::Machine)
# old state index => new state index, in a deterministic manner
old2new = Dict(machine.start.state => 1)
remaining = [machine.start]
while !isempty(remaining)
node = pop!(remaining)
for (_, target) in sort(node.edges; lt=in_sort_order, by=first)
if !haskey(old2new, target.state)
old2new[target.state] = length(old2new) + 1
push!(remaining, target)
end
end
end

# Make new nodes complete with edges
new_nodes = Dict(i => Node(i) for i in 1:length(old2new))
oldnodes = collect(traverse(machine.start))
@assert length(oldnodes) == length(machine.states)
for old_node in traverse(machine.start)
for (e, t) in old_node.edges
push!(
new_nodes[old2new[old_node.state]].edges,
(e, new_nodes[old2new[t.state]])
)

end
end
for node in values(new_nodes)
sort!(node.edges; by=first, lt=in_sort_order)
end

# Rebuild machine and return it
Machine(
new_nodes[1],
machine.states,
1,
Set([old2new[i] for i in machine.final_states]),
Dict{Int, ActionList}(old2new[i] => act for (i, act) in machine.eof_actions)
)
end

"""
compile(re::RegExp; optimize::Bool=true, unambiguous::Bool=true) -> Machine
Expand All @@ -94,7 +137,8 @@ function compile(re::RegExp.RE; optimize::Bool=true, unambiguous::Bool=true)
dfa = remove_dead_nodes(reduce_nodes(dfa))
end
validate(dfa)
return dfa2machine(dfa)
machine = dfa2machine(dfa)
return reorder_machine(machine)
end

function dfa2machine(dfa::DFA)
Expand Down
198 changes: 0 additions & 198 deletions src/sdict.jl

This file was deleted.

Loading

0 comments on commit 6e5f508

Please sign in to comment.