Skip to content

Commit c15c765

Browse files
committed
[PoC] Intoroduce parameterizing rules with conditonal
I would like to propose a new grammar in this PR. I believe that more parameterizing rules can handle more abstract rules if we can switch between rules and actions that are expanded by conditions in order to make rules common. Syntax is as follows: ``` %rule defined_rule(X, condition): /* empty */ | X { $$ = $1; } %if(condition) /* 1 */ | %if(condition) X %endif X { $$ = $1; } /* 2 */ ; %% r_true : defined_rule(number, %true) ; r_false : defined_rule(number, %false) ; ``` 1. It's like a postfix if in Ruby. If condition is false, it is equivalent to missing this line. 2. If statementIf condition is false, it is equivalent to missing RHS between `%if` and`% endif`. I believe it will solve the problem mentioned in the article below with the tight coupling with Lexer "to disable certain generation rules under certain conditions" and I would like to propose this feature to solve this problem. https://yui-knk.hatenablog.com/entry/2023/04/04/190413 We can trace the RHS to [f_args](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5523-L5575) > [args_tail](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5487-L5503) > [args_forward](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5586-L5597), where f_args is the RHS of both the lambda argument (f_larglist) and the method definition argument (f_arglist). So if we can switch between RHS and actions by passing parameters, we can break up the Lexer/Parser coupling here.
1 parent 95e0cc2 commit c15c765

File tree

13 files changed

+800
-425
lines changed

13 files changed

+800
-425
lines changed

lib/lrama/grammar.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class Grammar
3030
:after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
3131
:symbols_resolver, :types,
3232
:rules, :rule_builders,
33-
:sym_to_rules, :no_stdlib
33+
:sym_to_rules, :no_stdlib, :if_count
3434

3535
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
3636
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
@@ -58,6 +58,7 @@ def initialize(rule_counter)
5858
@accept_symbol = nil
5959
@aux = Auxiliary.new
6060
@no_stdlib = false
61+
@if_count = 0
6162

6263
append_special_symbols
6364
end
@@ -170,6 +171,10 @@ def find_rules_by_symbol(sym)
170171
@sym_to_rules[sym.number]
171172
end
172173

174+
def initialize_if_count
175+
@if_count = 0
176+
end
177+
173178
private
174179

175180
def compute_nullable

lib/lrama/grammar/binding.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ def resolve_symbol(symbol)
1515
if symbol.is_a?(Lexer::Token::InstantiateRule)
1616
resolved_args = symbol.args.map { |arg| resolve_symbol(arg) }
1717
Lrama::Lexer::Token::InstantiateRule.new(s_value: symbol.s_value, location: symbol.location, args: resolved_args, lhs_tag: symbol.lhs_tag)
18+
elsif symbol.is_a?(Lexer::Token::ControlSyntax)
19+
resolved = symbol.dup
20+
resolved.condition = @parameter_to_arg[symbol.condition_value]
21+
resolved
1822
else
1923
@parameter_to_arg[symbol.s_value] || symbol
2024
end

lib/lrama/grammar/parameterizing_rule/rhs.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,31 @@ def initialize
1010
@precedence_sym = nil
1111
end
1212

13+
def skip?(bindings)
14+
return false unless @symbols.last
15+
last_sym = bindings.resolve_symbol(@symbols.last)
16+
last_sym.is_a?(Lexer::Token::ControlSyntax) && last_sym.if? && last_sym.false?
17+
end
18+
19+
def resolve_symbols(bindings)
20+
is_skip = []
21+
@symbols.map do |sym|
22+
resolved = bindings.resolve_symbol(sym)
23+
if resolved.is_a?(Lexer::Token::ControlSyntax)
24+
if resolved.if?
25+
is_skip.push(resolved.false?)
26+
elsif resolved.endif?
27+
is_skip.pop
28+
else
29+
raise "Unexpected control syntax: #{resolved.condition_value}"
30+
end
31+
nil
32+
else
33+
resolved unless is_skip.last
34+
end
35+
end.compact
36+
end
37+
1338
def resolve_user_code(bindings)
1439
return unless user_code
1540

lib/lrama/grammar/rule_builder.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,10 @@ def process_rhs
133133
parameterizing_rule.rhs_list.each do |r|
134134
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: token.lhs_tag || parameterizing_rule.tag)
135135
rule_builder.lhs = lhs_token
136-
r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
136+
next if r.skip?(bindings)
137+
r.resolve_symbols(bindings).each do |sym|
138+
rule_builder.add_rhs(sym)
139+
end
137140
rule_builder.line = line
138141
rule_builder.precedence_sym = r.precedence_sym
139142
rule_builder.user_code = r.resolve_user_code(bindings)

lib/lrama/lexer.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ class Lexer
3838
%rule
3939
%no-stdlib
4040
%inline
41+
%if
42+
%endif
43+
%true
44+
%false
4145
)
4246

4347
def initialize(grammar_file)

lib/lrama/lexer/token.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
require 'lrama/lexer/token/char'
2+
require 'lrama/lexer/token/control_syntax'
23
require 'lrama/lexer/token/ident'
34
require 'lrama/lexer/token/instantiate_rule'
45
require 'lrama/lexer/token/tag'
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
module Lrama
2+
class Lexer
3+
class Token
4+
class ControlSyntax < Token
5+
attr_accessor :condition
6+
7+
def initialize(s_value:, location:, condition: nil)
8+
@condition = condition
9+
super(s_value: s_value, location: location)
10+
end
11+
12+
def if?
13+
s_value == '%if'
14+
end
15+
16+
def endif?
17+
s_value == '%endif'
18+
end
19+
20+
def true?
21+
!!@condition&.s_value
22+
end
23+
24+
def false?
25+
!true?
26+
end
27+
28+
def condition_value
29+
@condition&.s_value
30+
end
31+
end
32+
end
33+
end
34+
end

lib/lrama/parser.rb

Lines changed: 527 additions & 420 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

parser.y

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ rule
242242
{
243243
rule = Grammar::ParameterizingRule::Rule.new(val[2].s_value, [], val[4], is_inline: true)
244244
@grammar.add_parameterizing_rule(rule)
245+
@grammar.initialize_if_count
245246
}
246247

247248
rule_args: IDENTIFIER { result = [val[0]] }
@@ -282,7 +283,7 @@ rule
282283
builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]])
283284
result = builder
284285
}
285-
| rule_rhs IDENTIFIER "(" parameterizing_args ")" tag_opt
286+
| rule_rhs IDENTIFIER "(" parameterizing_rule_args ")" tag_opt
286287
{
287288
builder = val[0]
288289
builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5])
@@ -316,6 +317,21 @@ rule
316317
builder.precedence_sym = sym
317318
result = builder
318319
}
320+
| rule_rhs "%if" "(" IDENTIFIER ")"
321+
{
322+
builder = val[0]
323+
builder.symbols << Lrama::Lexer::Token::ControlSyntax.new(s_value: val[1], location: @lexer.location, condition: val[3])
324+
@grammar.if_count += 1
325+
result = builder
326+
}
327+
| rule_rhs "%endif"
328+
{
329+
on_action_error("no %if before %endif", val[0]) if @grammar.if_count == 0
330+
builder = val[0]
331+
builder.symbols << Lrama::Lexer::Token::ControlSyntax.new(s_value: val[1], location: @lexer.location)
332+
@grammar.if_count -= 1
333+
result = builder
334+
}
319335

320336
int_opt: # empty
321337
| INTEGER
@@ -491,11 +507,22 @@ rule
491507
| "+" { result = "nonempty_list" }
492508
| "*" { result = "list" }
493509

494-
parameterizing_args: symbol { result = [val[0]] }
495-
| parameterizing_args ',' symbol { result = val[0].append(val[2]) }
510+
parameterizing_rule_args: symbol { result = [val[0]] }
511+
| parameterizing_args ',' symbol { result = val[0].append(val[2]) }
512+
| symbol parameterizing_suffix { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] }
513+
| IDENTIFIER "(" parameterizing_args ")" { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] }
514+
515+
parameterizing_args: symbol_or_bool { result = [val[0]] }
516+
| parameterizing_args ',' symbol_or_bool { result = val[0].append(val[2]) }
496517
| symbol parameterizing_suffix { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] }
497518
| IDENTIFIER "(" parameterizing_args ")" { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] }
498519

520+
symbol_or_bool: symbol
521+
| bool
522+
523+
bool: "%true" { result = Lrama::Lexer::Token::Ident.new(s_value: true) }
524+
| "%false" { result = Lrama::Lexer::Token::Ident.new(s_value: false) }
525+
499526
named_ref_opt: # empty
500527
| '[' IDENTIFIER ']' { result = val[1].s_value }
501528

sig/lrama/grammar/parameterizing_rule/rhs.rbs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ module Lrama
77
attr_reader precedence_sym: Lexer::Token?
88

99
def initialize: () -> void
10+
def skip?: (Grammar::Binding bindings) -> bool
11+
def resolve_symbols: (Grammar::Binding bindings) -> Array[untyped]
1012
def resolve_user_code: (Grammar::Binding bindings) -> Lexer::Token::UserCode?
1113
end
1214
end

0 commit comments

Comments
 (0)