From ca4aad77384a2ec842aa393abf9e9244d065ee08 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Wed, 1 Mar 2023 19:48:36 +0100 Subject: [PATCH] Also check ambiguities for final and all actions An oversight in the ambiguity check meant that actions placed on non-epsilon edges were accidentally not included in the paths for validation. MWE: `compile(onfinal!(re"a", :a) | onfinal!(re"a", :b))` This breaks tokenizers, so we manually skip ambiguity check in tokenizers. In the case of conflicting actions in tokenizers, this will cause the longest matching token to be emitted. --- src/dfa.jl | 3 ++- src/tokenizer.jl | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dfa.jl b/src/dfa.jl index 8dd81112..ffd5d55f 100644 --- a/src/dfa.jl +++ b/src/dfa.jl @@ -150,6 +150,7 @@ function get_epsilon_paths(tops::Set{NFANode}) push!(heads, (child, append!(copy(actions), [a.name for a in edge.actions]))) end else + append!(actions, [a.name for a in edge.actions]) push!(paths, (edge, node, actions)) end end @@ -190,7 +191,7 @@ function validate_paths( strings_to::Dict{DFANode, String} ) # If they have the same actions, there is no ambiguity - all(actions == paths[1][2] for (n, actions) in paths) && return nothing + all(actions == paths[1][3] for (e, n, actions) in paths) && return nothing for i in 1:length(paths) - 1 edge1, node1, actions1 = paths[i] for j in i+1:length(paths) diff --git a/src/tokenizer.jl b/src/tokenizer.jl index 926b2446..c8adfbb8 100644 --- a/src/tokenizer.jl +++ b/src/tokenizer.jl @@ -27,7 +27,7 @@ function compile(tokens::AbstractVector{Pair{RegExp.RE,Expr}}; optimize::Bool=tr push!(actions_code, (name, code)) end nfa = NFA(start, final) - dfa = nfa2dfa(remove_dead_nodes(nfa)) + dfa = nfa2dfa(remove_dead_nodes(nfa), false) if optimize dfa = remove_dead_nodes(reduce_nodes(dfa)) end