Skip to content

Commit

Permalink
Add more comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jakobnissen committed Aug 3, 2022
1 parent d64ba0d commit f3440f4
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 11 deletions.
22 changes: 17 additions & 5 deletions src/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,8 @@ function CodeGenContext(;
getbyte::Function=Base.getindex,
clean::Bool=false)
# special conditions for simd generator
if generator == :goto
if getbyte != Base.getindex
throw(ArgumentError("GOTO generator only support Base.getindex"))
end
if generator == :goto && getbyte != Base.getindex
throw(ArgumentError("GOTO generator only support Base.getindex"))
end
# check generator
if generator == :table
Expand Down Expand Up @@ -248,6 +246,8 @@ function generate_table_code(ctx::CodeGenContext, machine::Machine, actions::Dic
$(eof_action_code)
$(ctx.vars.cs) = 0
elseif $(ctx.vars.cs) < 0
# If cs < 0, the machine errored. The code above incremented p regardless,
# but on error, we want p to be where the machine errored, so we reset it.
$(ctx.vars.p) -= 1
end
end # GC.@preserve block
Expand Down Expand Up @@ -426,11 +426,12 @@ function generate_goto_code(ctx::CodeGenContext, machine::Machine, actions::Dict
# Check the final state is an accept state, in an efficient manner
final_state_code = generate_final_state_mem_code(ctx, machine)

# This is an overview of the final code structure
return quote
$(ctx.vars.mem) = $(SizedMemory)($(ctx.vars.data))
if $(ctx.vars.p) > $(ctx.vars.p_end)
@goto exit
end
$(ctx.vars.mem) = $(SizedMemory)($(ctx.vars.data))
$(enter_code)
$(Expr(:block, blocks...))
@label exit
Expand Down Expand Up @@ -586,6 +587,14 @@ function generate_input_error_code(ctx::CodeGenContext, machine::Machine)
end
end

# This is a dummy macro, not actually used in Automa.
# In generated code, Automa may generate this macro, but Automa
# removes it in the `rewrite_special_macros` function before Julia can expand
# the macro.
# I only have this here so if people grep for escape, they find this comment
macro escape()
end

# Used by the :table code generator.
function rewrite_special_macros(ctx::CodeGenContext, ex::Expr, eof::Bool)
args = []
Expand Down Expand Up @@ -631,6 +640,9 @@ function isescape(arg)
return arg isa Expr && arg.head == :macrocall && arg.args[1] == Symbol("@escape")
end

# Debug actions just pushes the action names into a vector called "logger".
# this exists as a convenience method to allow the user to set actions = :debug
# in generate_exec_code
function debug_actions(machine::Machine)
function log_expr(name)
return :(push!(logger, $(QuoteNode(name))))
Expand Down
26 changes: 21 additions & 5 deletions src/machine.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ function findedge(s::Node, b::UInt8)
error("$(b) ∈ label not found")
end

"""
Machine
An `Automa.Machine` represents a compiled regular expression in `Automa.jl`.
Its fields are considered internal. Its only use it to use as arguments in
Automa's code generation functions.
To visualise the DFA represented by a `Machine`, use `Automa.machine2dot`
to construct a DOT file, then visualise it using the `graphviz` software.
"""
struct Machine
start::Node
states::UnitRange{Int}
Expand Down Expand Up @@ -53,18 +63,24 @@ function machine_names(machine::Machine)
end

function Base.show(io::IO, machine::Machine)
print(io, summary(machine), "(<states=", machine.states, ",start_state=", machine.start_state, ",final_states=", machine.final_states, ">)")
print(io,
summary(machine),
"(<states=", machine.states,
",start_state=", machine.start_state,
",final_states=[", join(map(string, collect(machine.final_states)), ','), "]>)"
)
end

"""
compile(re::RegExp; optimize, unambiguous) -> Machine
compile(re::RegExp; optimize::Bool=true, unambiguous::Bool=true) -> Machine
Compile a finite state machine (FSM) from RegExp `re`. If `optimize`, attempt to minimize the number
of states in the FSM. If `unambiguous`, disallow creation of FSM where the actions are not deterministic.
Compile a finite state machine (FSM) from RegExp `re`.
If `optimize`, attempt to minimize the number of states in the FSM.
If `unambiguous`, disallow creation of FSM where the actions are not deterministic.
# Examples
```
machine let
machine = let
name = re"[A-Z][a-z]+"
first_last = name * re" " * name
last_first = name * re", " * name
Expand Down
15 changes: 15 additions & 0 deletions src/nfa.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ function Base.show(io::IO, node::NFANode)
print(io, summary(node), "(<", length(node.edges), " edges", '@', objectid(node), ">)")
end

# An NFA contains a start and final nodes, which are not the same, as per
# the textbook definition.
# This NFA is an nfa with epsilon transitions.
struct NFA
start::NFANode
final::NFANode
Expand Down Expand Up @@ -171,7 +174,11 @@ function re2nfa(re::RegExp.RE, predefined_actions::Dict{Symbol,Action}=Dict{Symb
return NFA(nfa_start, nfa_final)
end

# Removes both dead nodes, i.e. nodes from which there is no path to
# the final node, and also unreachable nodes, i.e. nodes that cannot be
# reached from the start node.
function remove_dead_nodes(nfa::NFA)
# Get a dict Node => Set of nodes that point to Node.
backrefs = Dict(nfa.start => Set{NFANode}())
for s in traverse(nfa.start), (_, t) in s.edges
push!(get!(() -> Set{NFANode}(), backrefs, t), s)
Expand All @@ -189,6 +196,7 @@ function remove_dead_nodes(nfa::NFA)
)
end

# Mark nodes as alive, if the final state can be reached from them.
alive = Set{NFANode}()
unvisited = [nfa.final]
while !isempty(unvisited)
Expand All @@ -200,13 +208,20 @@ function remove_dead_nodes(nfa::NFA)
end
end
end

# If this is not true, we threw the big error above.
@assert nfa.start alive
@assert nfa.final alive

# Map from old to new node.
newnodes = Dict{NFANode,NFANode}()
new(s) = get!(() -> NFANode(), newnodes, s)
isvisited(s) = haskey(newnodes, s)
unvisited = [nfa.start]

# Now make a new NFA that only contain nodes marked alive in the previous step.
# since we make this new NFA by traversing from the start node, we also skip
# unreachable nodes
while !isempty(unvisited)
s = pop!(unvisited)
s′ = new(s)
Expand Down
3 changes: 2 additions & 1 deletion src/tokenizer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ function generate_table_code(ctx::CodeGenContext, tokenizer::Tokenizer, actions:
$(ctx.vars.cs) = -$(ctx.vars.cs)
end
end
# If in a failed state, reset p (why do we do this?)
# If in a failed state, reset p to where it failed, since it was
# incremented immediately after the state transition
if $(ctx.vars.cs) < 0
$(ctx.vars.p) -= 1
end
Expand Down

0 comments on commit f3440f4

Please sign in to comment.