Skip to content

Commit

Permalink
Pass options recursively to parsers, and allow handlers to specify op…
Browse files Browse the repository at this point in the history
…tions. (Used for ignore_strings).
  • Loading branch information
gkellogg committed Sep 11, 2024
1 parent 8531d6d commit 6db52e6
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 22 deletions.
31 changes: 19 additions & 12 deletions lib/ebnf/peg/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ def start_production(term, **options, &block)
# @yieldparam [Proc] block
# Block passed to initialization for yielding to calling parser.
# Should conform to the yield specs for #initialize
# @yieldparam [Hash] **options
# Other data that may be passed to the production
# @yieldreturn [Object] the result of this production.
# Yield to generate a triple
def production(term, clear_packrat: false, &block)
Expand Down Expand Up @@ -183,6 +185,8 @@ def method_missing(method, *args, &block)
# Identify the symbol of the starting rule with `start`.
# @param [Hash{Symbol => Object}] options
# @option options[Integer] :high_water passed to lexer
# @option options[:upper, :lower] :insensitive_strings
# Perform case-insensitive match of strings not defined as terminals, and map to either upper or lower case.
# @option options [Logger] :logger for errors/progress/debug.
# @option options[Integer] :low_water passed to lexer
# @option options[Boolean] :seq_hash (false)
Expand All @@ -201,7 +205,7 @@ def method_missing(method, *args, &block)
# or errors raised during processing callbacks. Internal
# errors are raised using {Error}.
# @todo FIXME implement seq_hash
def parse(input = nil, start = nil, rules = nil, **options, &block)
def parse(input = nil, start = nil, rules = nil, insensitive_strings: nil, **options, &block)
start ||= options[:start]
rules ||= options[:rules] || []
@rules = rules.inject({}) {|memo, rule| memo.merge(rule.sym => rule)}
Expand Down Expand Up @@ -230,7 +234,7 @@ def parse(input = nil, start = nil, rules = nil, **options, &block)
start_rule = @rules[start]
raise Error, "Starting production #{start.inspect} not defined" unless start_rule

result = start_rule.parse(scanner)
result = start_rule.parse(scanner, insensitive_strings: insensitive_strings)
if result == :unmatched
# Start rule wasn't matched, which is about the only error condition
error("--top--", @furthest_failure.to_s,
Expand Down Expand Up @@ -367,21 +371,17 @@ def debug(*args, &block)
# Start for production
# Adds data avoiable during the processing of the production
#
# @param [Symbol] prod
# @param [Hash] **options other options available for handlers
# @return [Hash] composed of production options. Currently only `as_hash` is supported.
# @see ClassMethods#start_production
def onStart(prod)
def onStart(prod, **options)
handler = self.class.start_handlers[prod]
@productions << prod
progress("#{prod}(:start)", "",
lineno: (scanner.lineno if scanner),
pos: (scanner.pos if scanner)
) do
"#{prod}, pos: #{scanner ? scanner.pos : '?'}, rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
end
if handler
# Create a new production data element, potentially allowing handler
# to customize before pushing on the @prod_data stack
data = {_production: prod}
data = {_production: prod}.merge(options)
begin
self.class.eval_with_binding(self) {
handler.call(data, @parse_callback)
Expand All @@ -396,14 +396,21 @@ def onStart(prod)
# explicit start handler
@prod_data << {_production: prod}
end
progress("#{prod}(:start)", "",
lineno: (scanner.lineno if scanner),
pos: (scanner.pos if scanner)
) do
"#{data.inspect}@(#{scanner ? scanner.pos : '?'}), rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
end
return self.class.start_options.fetch(prod, {}) # any options on this production
end

# Finish of production
#
# @param [Object] result parse result
# @param [Hash] **options other options available for handlers
# @return [Object] parse result, or the value returned from the handler
def onFinish(result)
def onFinish(result, **options)
#puts "prod_data(f): " + @prod_data.inspect
prod = @productions.last
handler, clear_packrat = self.class.production_handlers[prod]
Expand All @@ -415,7 +422,7 @@ def onFinish(result)
# Pop production data element from stack, potentially allowing handler to use it
result = begin
self.class.eval_with_binding(self) {
handler.call(result, data, @parse_callback)
handler.call(result, data, @parse_callback, **options)
}
rescue ArgumentError, Error => e
error("finish", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
Expand Down
22 changes: 12 additions & 10 deletions lib/ebnf/peg/rule.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module Rule
##
# Parse a rule or terminal, invoking callbacks, as appropriate

# If there is are `start_production` and/or `production`,
# If there are `start_production` and/or `production` handlers,
# they are invoked with a `prod_data` stack, the input stream and offset.
# Otherwise, the results are added as an array value
# to a hash indexed by the rule name.
Expand All @@ -31,8 +31,9 @@ module Rule
# * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
#
# @param [Scanner] input
# @param [Hash] **options Other data that may be passed to handlers.
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
def parse(input)
def parse(input, **options)
# Save position and linenumber for backtracking
pos, lineno = input.pos, input.lineno

Expand Down Expand Up @@ -71,7 +72,7 @@ def parse(input)
else
eat_whitespace(input)
end
start_options = parser.onStart(sym)
start_options = options.merge(parser.onStart(sym, **options))
string_regexp_opts = start_options[:insensitive_strings] ? Regexp::IGNORECASE : 0

result = case expr.first
Expand All @@ -84,7 +85,7 @@ def parse(input)
when Symbol
rule = parser.find_rule(prod)
raise "No rule found for #{prod}" unless rule
rule.parse(input)
rule.parse(input, **options)
when String
s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
case start_options[:insensitive_strings]
Expand Down Expand Up @@ -148,7 +149,7 @@ def parse(input)
when :plus
# Result is an array of all expressions while they match,
# at least one must match
plus = rept(input, 1, '*', expr[1], string_regexp_opts)
plus = rept(input, 1, '*', expr[1], string_regexp_opts, **options)

# Update furthest failure for strings and terminals
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
Expand All @@ -163,7 +164,7 @@ def parse(input)
when :rept
# Result is an array of all expressions while they match,
# an empty array of none match
rept = rept(input, expr[1], expr[2], expr[3], string_regexp_opts)
rept = rept(input, expr[1], expr[2], expr[3], string_regexp_opts, **options)

# # Update furthest failure for strings and terminals
parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
Expand All @@ -176,7 +177,7 @@ def parse(input)
when Symbol
rule = parser.find_rule(prod)
raise "No rule found for #{prod}" unless rule
rule.parse(input)
rule.parse(input, **options.merge(_rept_data: accumulator))
when String
s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
case start_options[:insensitive_strings]
Expand Down Expand Up @@ -204,7 +205,7 @@ def parse(input)
when :star
# Result is an array of all expressions while they match,
# an empty array of none match
star = rept(input, 0, '*', expr[1], string_regexp_opts)
star = rept(input, 0, '*', expr[1], string_regexp_opts, **options)

# Update furthest failure for strings and terminals
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
Expand All @@ -217,7 +218,7 @@ def parse(input)
input.pos, input.lineno = pos, lineno
end

result = parser.onFinish(result)
result = parser.onFinish(result, **options)
(parser.packrat[sym] ||= {})[pos] = {
pos: input.pos,
lineno: input.lineno,
Expand All @@ -229,7 +230,8 @@ def parse(input)
##
# Repitition, 0-1, 0-n, 1-n, ...
#
# Note, nil results are removed from the result, but count towards min/max calculations
# Note, nil results are removed from the result, but count towards min/max calculations.
# Saves temporary production data to prod_data stack.
#
# @param [Scanner] input
# @param [Integer] min
Expand Down

0 comments on commit 6db52e6

Please sign in to comment.