Skip to content

Commit

Permalink
Better error on compiling null regex
Browse files Browse the repository at this point in the history
It is possible for users to construct a regex that matches the null set, i.e.
no strings, not even the empty string. An example is `re"A" & re"B"`.
Automa could support these, but don't currently, as there is no point to.
However, attempting to compile such a regex currently throws an obscure internal
error.

This PR improves that error.

See issue #104
  • Loading branch information
jakobnissen committed Aug 2, 2022
1 parent 8b23e8b commit d64ba0d
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 1 deletion.
12 changes: 12 additions & 0 deletions src/nfa.jl
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,18 @@ function remove_dead_nodes(nfa::NFA)
push!(get!(() -> Set{NFANode}(), backrefs, t), s)
end

# Automa could support null regex like `re"A" & re"B"`, but it's trouble,
# and it's useless for the user, who would probably prefer an error.
# We throw this error here and not on NFA construction so the user can visualise
# the NFA and find the error in their regex
if !haskey(backrefs, nfa.final)
error(
"NFA matches the empty set Ø, and therefore consists of only dead nodes. " *
"Automa currently does not support converting null NFAs to DFAs. " *
"Double check your regex, or inspect the NFA."
)
end

alive = Set{NFANode}()
unvisited = [nfa.final]
while !isempty(unvisited)
Expand Down
14 changes: 13 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ end
@test occursin(r"^Automa\.Node\(.*\)$", repr(machine.start))
end

@testset "Null Regex" begin
re = Automa.RegExp
for null_regex in [
re"A" & re"B",
(re"B" | re"C") \ re"[A-D]",
!re.rep(re.any()),
!re"[\x00-\xff]*",
]
@test_throws ErrorException Automa.compile(null_regex)
end
end

@testset "Determinacy" begin
# see https://github.com/BioJulia/Automa.jl/issues/19
notmach(re) = Automa.machine2dot(Automa.compile(re)) != Automa.machine2dot(Automa.compile(re))
Expand All @@ -78,7 +90,7 @@ include("test09.jl")
include("test10.jl")
include("test11.jl")
include("test12.jl")
# test13 tested functionality now removed.
include("test13.jl")
include("test14.jl")
include("test15.jl")
include("test16.jl")
Expand Down
26 changes: 26 additions & 0 deletions test/test13.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
module Test13

using Automa
using Test

# Some cases of regex I've seen fail
@testset "Test13" begin
for (regex, good_strings, bad_strings) in [
(re"[AB]" & re"A", ["A"], ["B", "AA", "AB"]),
(re"(A|B|C|D)" \ re"[A-C]", ["D"], ["AC", "A", "B", "DD"]),
(!re"A[BC]D?E", ["ABCDE", "ABCE"], ["ABDE", "ACE", "ABE"])
]
for goto in (false, true)
machine = Automa.compile(regex)
@eval $(Automa.generate_validator_function(:validate, machine, goto))
for string in good_strings
@test validate(string) === nothing
end
for string in bad_strings
@test validate(string) !== nothing
end
end
end
end

end # module

0 comments on commit d64ba0d

Please sign in to comment.