diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb index bac73d0..704e333 100644 --- a/lib/ebnf/peg/parser.rb +++ b/lib/ebnf/peg/parser.rb @@ -138,6 +138,8 @@ def start_production(term, **options, &block) # @yieldparam [Proc] block # Block passed to initialization for yielding to calling parser. # Should conform to the yield specs for #initialize + # @yieldparam [Hash] **options + # Other data that may be passed to the production # @yieldreturn [Object] the result of this production. # Yield to generate a triple def production(term, clear_packrat: false, &block) @@ -183,6 +185,8 @@ def method_missing(method, *args, &block) # Identify the symbol of the starting rule with `start`. # @param [Hash{Symbol => Object}] options # @option options[Integer] :high_water passed to lexer + # @option options[:upper, :lower] :insensitive_strings + # Perform case-insensitive match of strings not defined as terminals, and map to either upper or lower case. # @option options [Logger] :logger for errors/progress/debug. # @option options[Integer] :low_water passed to lexer # @option options[Boolean] :seq_hash (false) @@ -201,7 +205,7 @@ def method_missing(method, *args, &block) # or errors raised during processing callbacks. Internal # errors are raised using {Error}. # @todo FIXME implement seq_hash - def parse(input = nil, start = nil, rules = nil, **options, &block) + def parse(input = nil, start = nil, rules = nil, insensitive_strings: nil, **options, &block) start ||= options[:start] rules ||= options[:rules] || [] @rules = rules.inject({}) {|memo, rule| memo.merge(rule.sym => rule)} @@ -230,7 +234,7 @@ def parse(input = nil, start = nil, rules = nil, **options, &block) start_rule = @rules[start] raise Error, "Starting production #{start.inspect} not defined" unless start_rule - result = start_rule.parse(scanner) + result = start_rule.parse(scanner, insensitive_strings: insensitive_strings) if result == :unmatched # Start rule wasn't matched, which is about the only error condition error("--top--", @furthest_failure.to_s, @@ -367,21 +371,17 @@ def debug(*args, &block) # Start for production # Adds data avoiable during the processing of the production # + # @param [Symbol] prod + # @param [Hash] **options other options available for handlers # @return [Hash] composed of production options. Currently only `as_hash` is supported. # @see ClassMethods#start_production - def onStart(prod) + def onStart(prod, **options) handler = self.class.start_handlers[prod] @productions << prod - progress("#{prod}(:start)", "", - lineno: (scanner.lineno if scanner), - pos: (scanner.pos if scanner) - ) do - "#{prod}, pos: #{scanner ? scanner.pos : '?'}, rest: #{scanner ? scanner.rest[0..20].inspect : '?'}" - end if handler # Create a new production data element, potentially allowing handler # to customize before pushing on the @prod_data stack - data = {_production: prod} + data = {_production: prod}.merge(options) begin self.class.eval_with_binding(self) { handler.call(data, @parse_callback) @@ -396,14 +396,21 @@ def onStart(prod) # explicit start handler @prod_data << {_production: prod} end + progress("#{prod}(:start)", "", + lineno: (scanner.lineno if scanner), + pos: (scanner.pos if scanner) + ) do + "#{data.inspect}@(#{scanner ? scanner.pos : '?'}), rest: #{scanner ? scanner.rest[0..20].inspect : '?'}" + end return self.class.start_options.fetch(prod, {}) # any options on this production end # Finish of production # # @param [Object] result parse result + # @param [Hash] **options other options available for handlers # @return [Object] parse result, or the value returned from the handler - def onFinish(result) + def onFinish(result, **options) #puts "prod_data(f): " + @prod_data.inspect prod = @productions.last handler, clear_packrat = self.class.production_handlers[prod] @@ -415,7 +422,7 @@ def onFinish(result) # Pop production data element from stack, potentially allowing handler to use it result = begin self.class.eval_with_binding(self) { - handler.call(result, data, @parse_callback) + handler.call(result, data, @parse_callback, **options) } rescue ArgumentError, Error => e error("finish", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace) diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index 305543a..ba57bf8 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -13,7 +13,7 @@ module Rule ## # Parse a rule or terminal, invoking callbacks, as appropriate - # If there is are `start_production` and/or `production`, + # If there are `start_production` and/or `production` handlers, # they are invoked with a `prod_data` stack, the input stream and offset. # Otherwise, the results are added as an array value # to a hash indexed by the rule name. @@ -31,8 +31,9 @@ module Rule # * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string. # # @param [Scanner] input + # @param [Hash] **options Other data that may be passed to handlers. # @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production. - def parse(input) + def parse(input, **options) # Save position and linenumber for backtracking pos, lineno = input.pos, input.lineno @@ -71,7 +72,7 @@ def parse(input) else eat_whitespace(input) end - start_options = parser.onStart(sym) + start_options = options.merge(parser.onStart(sym, **options)) string_regexp_opts = start_options[:insensitive_strings] ? Regexp::IGNORECASE : 0 result = case expr.first @@ -84,7 +85,7 @@ def parse(input) when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule - rule.parse(input) + rule.parse(input, **options) when String s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) case start_options[:insensitive_strings] @@ -148,7 +149,7 @@ def parse(input) when :plus # Result is an array of all expressions while they match, # at least one must match - plus = rept(input, 1, '*', expr[1], string_regexp_opts) + plus = rept(input, 1, '*', expr[1], string_regexp_opts, **options) # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal? @@ -163,7 +164,7 @@ def parse(input) when :rept # Result is an array of all expressions while they match, # an empty array of none match - rept = rept(input, expr[1], expr[2], expr[3], string_regexp_opts) + rept = rept(input, expr[1], expr[2], expr[3], string_regexp_opts, **options) # # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal? @@ -176,7 +177,7 @@ def parse(input) when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule - rule.parse(input) + rule.parse(input, **options.merge(_rept_data: accumulator)) when String s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) case start_options[:insensitive_strings] @@ -204,7 +205,7 @@ def parse(input) when :star # Result is an array of all expressions while they match, # an empty array of none match - star = rept(input, 0, '*', expr[1], string_regexp_opts) + star = rept(input, 0, '*', expr[1], string_regexp_opts, **options) # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal? @@ -217,7 +218,7 @@ def parse(input) input.pos, input.lineno = pos, lineno end - result = parser.onFinish(result) + result = parser.onFinish(result, **options) (parser.packrat[sym] ||= {})[pos] = { pos: input.pos, lineno: input.lineno, @@ -229,7 +230,8 @@ def parse(input) ## # Repitition, 0-1, 0-n, 1-n, ... # - # Note, nil results are removed from the result, but count towards min/max calculations + # Note, nil results are removed from the result, but count towards min/max calculations. + # Saves temporary production data to prod_data stack. # # @param [Scanner] input # @param [Integer] min