Skip to content

Commit

Permalink
Also check ambiguities for final and all actions
Browse files Browse the repository at this point in the history
An oversight in the ambiguity check meant that actions placed on non-epsilon
edges were accidentally not included in the paths for validation.
MWE: `compile(onfinal!(re"a", :a) | onfinal!(re"a", :b))`

This breaks tokenizers, so we manually skip ambiguity check in tokenizers.
In the case of conflicting actions in tokenizers, this will cause the longest
matching token to be emitted.
  • Loading branch information
jakobnissen committed Mar 1, 2023
1 parent 0a6710d commit ca4aad7
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/dfa.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ function get_epsilon_paths(tops::Set{NFANode})
push!(heads, (child, append!(copy(actions), [a.name for a in edge.actions])))
end
else
append!(actions, [a.name for a in edge.actions])
push!(paths, (edge, node, actions))
end
end
Expand Down Expand Up @@ -190,7 +191,7 @@ function validate_paths(
strings_to::Dict{DFANode, String}
)
# If they have the same actions, there is no ambiguity
all(actions == paths[1][2] for (n, actions) in paths) && return nothing
all(actions == paths[1][3] for (e, n, actions) in paths) && return nothing
for i in 1:length(paths) - 1
edge1, node1, actions1 = paths[i]
for j in i+1:length(paths)
Expand Down
2 changes: 1 addition & 1 deletion src/tokenizer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ function compile(tokens::AbstractVector{Pair{RegExp.RE,Expr}}; optimize::Bool=tr
push!(actions_code, (name, code))
end
nfa = NFA(start, final)
dfa = nfa2dfa(remove_dead_nodes(nfa))
dfa = nfa2dfa(remove_dead_nodes(nfa), false)
if optimize
dfa = remove_dead_nodes(reduce_nodes(dfa))
end
Expand Down

0 comments on commit ca4aad7

Please sign in to comment.