From aa88573efd682fdfe59bebd7dad5ff0348fb2b80 Mon Sep 17 00:00:00 2001 From: "NARUSE, Yui" Date: Mon, 12 Jun 2023 23:41:37 +0900 Subject: [PATCH 1/4] Add more grammars * Option: ? * Many: * * Many1: + * Grouping: ( ... ) --- lib/racc/grammar.rb | 75 +++++++++++++++++++++ lib/racc/grammarfileparser.rb | 118 +++++++++++++++++++++++++++++++--- 2 files changed, 185 insertions(+), 8 deletions(-) diff --git a/lib/racc/grammar.rb b/lib/racc/grammar.rb index 7159862f..8519763c 100644 --- a/lib/racc/grammar.rb +++ b/lib/racc/grammar.rb @@ -787,6 +787,81 @@ def name end + class OptionMark + def initialize(lineno) + @lineno = lineno + end + + def name + '?' + end + + alias inspect name + + attr_reader :lineno + end + + + class ManyMark + def initialize(lineno) + @lineno = lineno + end + + def name + '*' + end + + alias inspect name + + attr_reader :lineno + end + + + class Many1Mark + def initialize(lineno) + @lineno = lineno + end + + def name + '+' + end + + alias inspect name + + attr_reader :lineno + end + + + class GroupStartMark + def initialize(lineno) + @lineno = lineno + end + + def name + '(' + end + + alias inspect name + + attr_reader :lineno + end + + + class GroupEndMark + def initialize(lineno) + @lineno = lineno + end + + def name + ')' + end + + alias inspect name + + attr_reader :lineno + end + + class Prec def initialize(symbol, lineno) @symbol = symbol diff --git a/lib/racc/grammarfileparser.rb b/lib/racc/grammarfileparser.rb index c7d1207f..0dfee86b 100644 --- a/lib/racc/grammarfileparser.rb +++ b/lib/racc/grammarfileparser.rb @@ -133,6 +133,21 @@ module Racc | seq("|") {|*| OrMark.new(@scanner.lineno) }\ + | seq("?") {|*| + OptionMark.new(@scanner.lineno) + }\ + | seq("*") {|*| + ManyMark.new(@scanner.lineno) + }\ + | seq("+") {|*| + Many1Mark.new(@scanner.lineno) + }\ + | seq("(") {|*| + GroupStartMark.new(@scanner.lineno) + }\ + | seq(")") {|*| + GroupEndMark.new(@scanner.lineno) + }\ | seq("=", :symbol) {|_, sym| Prec.new(sym, @scanner.lineno) }\ @@ -210,27 +225,114 @@ def location end def add_rule_block(list) - sprec = nil target = list.shift case target - when OrMark, UserAction, Prec + when OrMark, OptionMark, ManyMark, Many1Mark, GroupStartMark, GroupEndMark, UserAction, Prec raise CompileError, "#{target.lineno}: unexpected symbol #{target.name}" end + enum = list.each.with_index + _, sym, idx = _add_rule_block(target, enum) + if idx + # sym is Racc::GroupEndMark + raise "#{sym.lineno}: unexpected symbol ')' at pos=#{idx}" + end + end + + def _add_rule_block(target, enum) + rules = [] # [ [seqs, sprec], .. ] curr = [] - list.each do |i| - case i + sprec = nil + while (sym, idx = enum.next rescue nil) + case sym when OrMark - add_rule target, curr, sprec + rules << [curr, sprec] curr = [] sprec = nil + when OptionMark + curr << _add_option_rule(curr.pop) + when ManyMark + curr << _add_many_rule(curr.pop) + when Many1Mark + curr << _add_many1_rule(curr.pop) + when GroupStartMark + curr << _add_group_rule(enum) + when GroupEndMark + rules << [curr, sprec] + return rules, sym, idx when Prec raise CompileError, "'=' used twice in one rule" if sprec - sprec = i.symbol + sprec = sym.symbol else - curr.push i + curr.push sym + end + end + rules << [curr, sprec] + rules.each do |syms, sprec| + add_rule target, syms, sprec + end + nil + end + + + def _add_option_rule(prev) + @option_rule_registry ||= {} + target = @option_rule_registry[prev.to_s] + return target if target + target = _gen_target_name("option", prev) + @option_rule_registry[prev.to_s] = target + act = UserAction.empty + @grammar.add Rule.new(target, [], act) + @grammar.add Rule.new(target, [prev], act) + target + end + + def _add_many_rule(prev) + @many_rule_registry ||= {} + target = @many_rule_registry[prev.to_s] + return target if target + target = _gen_target_name("many", prev) + @many_rule_registry[prev.to_s] = target + src = SourceText.new("result = val", __FILE__, __LINE__) + act = UserAction.source_text(src) + @grammar.add Rule.new(target, [], act) + @grammar.add Rule.new(target, [prev, target], act) + target + end + + def _add_many1_rule(prev) + @many1_rule_registry ||= {} + target = @many1_rule_registry[prev.to_s] + return target if target + target = _gen_target_name("many1", prev) + @many1_rule_registry[prev.to_s] = target + src = SourceText.new("result = val", __FILE__, __LINE__) + act = UserAction.source_text(src) + @grammar.add Rule.new(target, [prev], act) + @grammar.add Rule.new(target, [prev, target], act) + target + end + + def _add_group_rule(enum) + target = @grammar.intern("-temp-group") + rules, _ = _add_rule_block(target, enum) + target_name = rules.map{|syms, sprec| syms.join("-")}.join("|") + @group_rule_registry ||= {} + unless target = @group_rule_registry[target_name] + target = @grammar.intern("-group@#{target_name}") + @group_rule_registry[target_name] = target + src = SourceText.new("result = val", __FILE__, __LINE__) + act = UserAction.source_text(src) + rules.each do |syms, sprec| + rule = Rule.new(target, syms, act) + rule.specified_prec = sprec + @grammar.add rule end end - add_rule target, curr, sprec + target + end + + def _gen_target_name(type, sym) + @grammar.intern("-#{type}@#{sym.value}") end def add_rule(target, list, sprec) From 1bab4cbc53912b9736c55dcd13f341e546006f02 Mon Sep 17 00:00:00 2001 From: "NARUSE, Yui" Date: Tue, 20 Jun 2023 21:14:11 +0900 Subject: [PATCH 2/4] arguments to action should be flat array --- lib/racc/grammarfileparser.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/racc/grammarfileparser.rb b/lib/racc/grammarfileparser.rb index 0dfee86b..ebc0c8b6 100644 --- a/lib/racc/grammarfileparser.rb +++ b/lib/racc/grammarfileparser.rb @@ -292,7 +292,7 @@ def _add_many_rule(prev) return target if target target = _gen_target_name("many", prev) @many_rule_registry[prev.to_s] = target - src = SourceText.new("result = val", __FILE__, __LINE__) + src = SourceText.new("result = val[1] ? val[1].unshift(val[0]) : val", __FILE__, __LINE__) act = UserAction.source_text(src) @grammar.add Rule.new(target, [], act) @grammar.add Rule.new(target, [prev, target], act) @@ -305,7 +305,7 @@ def _add_many1_rule(prev) return target if target target = _gen_target_name("many1", prev) @many1_rule_registry[prev.to_s] = target - src = SourceText.new("result = val", __FILE__, __LINE__) + src = SourceText.new("result = val[1] ? val[1].unshift(val[0]) : val", __FILE__, __LINE__) act = UserAction.source_text(src) @grammar.add Rule.new(target, [prev], act) @grammar.add Rule.new(target, [prev, target], act) From 4449004d705b9cfeffae96b077cb95f6c484a67f Mon Sep 17 00:00:00 2001 From: "NARUSE, Yui" Date: Tue, 20 Jun 2023 21:14:38 +0900 Subject: [PATCH 3/4] dummy target won't be START --- lib/racc/grammarfileparser.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/racc/grammarfileparser.rb b/lib/racc/grammarfileparser.rb index ebc0c8b6..86117754 100644 --- a/lib/racc/grammarfileparser.rb +++ b/lib/racc/grammarfileparser.rb @@ -313,12 +313,12 @@ def _add_many1_rule(prev) end def _add_group_rule(enum) - target = @grammar.intern("-temp-group") + target = @grammar.intern("-temp-group", true) rules, _ = _add_rule_block(target, enum) target_name = rules.map{|syms, sprec| syms.join("-")}.join("|") @group_rule_registry ||= {} unless target = @group_rule_registry[target_name] - target = @grammar.intern("-group@#{target_name}") + target = @grammar.intern("-group@#{target_name}", true) @group_rule_registry[target_name] = target src = SourceText.new("result = val", __FILE__, __LINE__) act = UserAction.source_text(src) @@ -332,7 +332,7 @@ def _add_group_rule(enum) end def _gen_target_name(type, sym) - @grammar.intern("-#{type}@#{sym.value}") + @grammar.intern("-#{type}@#{sym.value}", true) end def add_rule(target, list, sprec) From 8a1b5a9e1a255f3cfbbe5a43cf39f3d8bcdc3a00 Mon Sep 17 00:00:00 2001 From: "NARUSE, Yui" Date: Tue, 20 Jun 2023 21:20:07 +0900 Subject: [PATCH 4/4] Add test for new grammars --- test/test_grammar.rb | 105 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 test/test_grammar.rb diff --git a/test/test_grammar.rb b/test/test_grammar.rb new file mode 100644 index 00000000..6173f8fe --- /dev/null +++ b/test/test_grammar.rb @@ -0,0 +1,105 @@ +require 'test/unit' +require 'racc/static' +require 'tempfile' + +class TestGrammar < Test::Unit::TestCase + private def with_parser(rule) + parser = Racc::GrammarFileParser.new + result = parser.parse(<<"eom", "foo.y") +class MyParser +rule +#{rule} +end +---- header +require 'strscan' +---- inner +def parse(str) + @ss = StringScanner.new(str) + do_parse +end +def next_token + @ss.skip(/\\s+/) + token = @ss.scan(/\\S+/) and [token, token] +end +eom + states = Racc::States.new(result.grammar).nfa + params = result.params.dup + generator = Racc::ParserFileGenerator.new(states, params) + Tempfile.create(%w[y .tab.rb]) do |f| + generator.generate_parser_file(f.path) + require f.path + parser = MyParser.new + yield parser + end + Object.__send__(:remove_const, :MyParser) + end + + def test_optional + with_parser("stmt: 'abc'?") do |parser| + assert_equal "abc", parser.parse("abc") + assert_equal nil, parser.parse("") + end + end + + def test_many + with_parser("stmt: 'abc'*") do |parser| + assert_equal [], parser.parse("") + assert_equal ["abc"], parser.parse("abc") + assert_equal ["abc", "abc"], parser.parse("abc abc") + assert_equal ["abc", "abc", "abc"], parser.parse("abc abc abc") + end + end + + def test_many1 + with_parser("stmt: 'abc'+") do |parser| + assert_raise(Racc::ParseError){ parser.parse("") } + assert_equal ["abc"], parser.parse("abc") + assert_equal ["abc", "abc"], parser.parse("abc abc") + assert_equal ["abc", "abc", "abc"], parser.parse("abc abc abc") + end + end + + def test_group + with_parser("stmt: ('a')") do |parser| + assert_raise(Racc::ParseError){ parser.parse("") } + assert_equal ["a"], parser.parse("a") + end + + with_parser("stmt: ('a' 'b')") do |parser| + assert_raise(Racc::ParseError){ parser.parse("") } + assert_raise(Racc::ParseError){ parser.parse("a") } + assert_equal ["a", "b"], parser.parse("a b") + end + end + + def test_group_or + with_parser("stmt: ('a' | 'b')") do |parser| + assert_raise(Racc::ParseError){ parser.parse("") } + assert_equal ["a"], parser.parse("a") + assert_equal ["b"], parser.parse("b") + end + end + + def test_group_many + with_parser("stmt: ('a')*") do |parser| + assert_equal [], parser.parse("") + assert_equal [["a"]], parser.parse("a") + assert_equal [["a"], ["a"]], parser.parse("a a") + end + + with_parser("start: stmt\n stmt: ('a' 'b')*") do |parser| + assert_equal [], parser.parse("") + assert_equal [["a", "b"]], parser.parse("a b") + assert_equal [["a", "b"], ["a", "b"]], parser.parse("a b a b") + end + end + + def test_group_or_many + with_parser("stmt: ('a' | 'b')*") do |parser| + assert_equal [], parser.parse("") + assert_equal [["a"], ["a"]], parser.parse("a a") + assert_equal [["a"], ["b"]], parser.parse("a b") + assert_equal [["a"], ["b"], ["b"], ["a"]], parser.parse("a b b a") + end + end +end