diff --git a/lib/lrama/warnings.rb b/lib/lrama/warnings.rb index 52f09144..fb7856c8 100644 --- a/lib/lrama/warnings.rb +++ b/lib/lrama/warnings.rb @@ -3,6 +3,7 @@ require_relative 'warnings/conflicts' require_relative 'warnings/implicit_empty' +require_relative 'warnings/mixed_references' require_relative 'warnings/name_conflicts' require_relative 'warnings/redefined_rules' require_relative 'warnings/required' @@ -14,6 +15,7 @@ class Warnings def initialize(logger, warnings) @conflicts = Conflicts.new(logger, warnings) @implicit_empty = ImplicitEmpty.new(logger, warnings) + @mixed_references = MixedReferences.new(logger, warnings) @name_conflicts = NameConflicts.new(logger, warnings) @redefined_rules = RedefinedRules.new(logger, warnings) @required = Required.new(logger, warnings) @@ -24,6 +26,7 @@ def initialize(logger, warnings) def warn(grammar, states) @conflicts.warn(states) @implicit_empty.warn(grammar) + @mixed_references.warn(grammar) @name_conflicts.warn(grammar) @redefined_rules.warn(grammar) @required.warn(grammar) diff --git a/lib/lrama/warnings/mixed_references.rb b/lib/lrama/warnings/mixed_references.rb new file mode 100644 index 00000000..07c59a36 --- /dev/null +++ b/lib/lrama/warnings/mixed_references.rb @@ -0,0 +1,101 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Warnings + # Warning rationale: Mixing positional and named references in one rule + # - It reduces readability and makes semantic actions harder to maintain + # - Named references are generally more robust when RHS changes + # Scope: + # - This warning targets semantic value references (`$...`) + # location references (`@...`), and index references (`$:...`) + # and special LHS references (`$$`, `$$`, `@$`, `$:$`) + class MixedReferences + # @rbs (Lrama::Logger logger, bool warnings) -> void + def initialize(logger, warnings) + @logger = logger + @warnings = warnings + end + + # @rbs (Lrama::Grammar grammar) -> void + def warn(grammar) + return unless @warnings + + build_grouped_usage(grammar).each do |rule, usage| + next unless usage[:positional] && usage[:named] + + @logger.warn("warning: rule `#{rule.as_comment}` mixes positional and named references; use named references consistently") + end + end + + private + + # @rbs (Lrama::Grammar grammar) -> Hash[Lrama::Grammar::Rule, { positional: bool, named: bool }] + def build_grouped_usage(grammar) + grouped_usage = {} #: Hash[Lrama::Grammar::Rule, { positional: bool, named: bool }] + + grammar.rules.each do |rule| + next unless (token_code = rule.token_code) + + original_rule = rule.original_rule || rule + usage = (grouped_usage[original_rule] ||= { positional: false, named: false }) + classify_references(token_code.references, token_code.s_value, usage) + end + + grouped_usage + end + + # @rbs (Array[Lrama::Grammar::Reference] references, String source, { positional: bool, named: bool } usage) -> void + def classify_references(references, source, usage) + references.each do |ref| + if positional_reference?(ref) + usage[:positional] = true + elsif named_reference?(ref, source) + usage[:named] = true + end + end + end + + # @rbs (Lrama::Grammar::Reference ref) -> bool + def positional_reference?(ref) + return false unless reference_type_supported?(ref) + return false if ref.index.nil? + + ref.name.nil? + end + + # @rbs (Lrama::Grammar::Reference ref, String source) -> bool + def named_reference?(ref, source) + return false unless reference_type_supported?(ref) + + return true if !ref.name.nil? && ref.name != "$" + + lhs_alias_reference?(ref, source) + end + + # @rbs (Lrama::Grammar::Reference ref) -> bool + def reference_type_supported?(ref) + ref.type == :dollar || ref.type == :at || ref.type == :index + end + + # @rbs (Lrama::Grammar::Reference ref, String source) -> bool + def lhs_alias_reference?(ref, source) + return false unless ref.name == "$" + + lexeme = source.byteslice(ref.first_column...ref.last_column) + return false if lexeme.nil? + + # Keep ignoring special LHS references ($$, $$), same as reference_to_c. + return false if special_lhs_reference_lexeme?(lexeme) + + # Treat remaining `$...` forms as named LHS aliases. + true + end + + # @rbs (String lexeme) -> bool + def special_lhs_reference_lexeme?(lexeme) + lexeme == "$$" || lexeme == "@$" || lexeme == "$:$" || (lexeme.start_with?("$<") && lexeme.end_with?("$")) + end + end + end +end diff --git a/sig/generated/lrama/warnings/mixed_references.rbs b/sig/generated/lrama/warnings/mixed_references.rbs new file mode 100644 index 00000000..d20ace0f --- /dev/null +++ b/sig/generated/lrama/warnings/mixed_references.rbs @@ -0,0 +1,43 @@ +# Generated from lib/lrama/warnings/mixed_references.rb with RBS::Inline + +module Lrama + class Warnings + # Warning rationale: Mixing positional and named references in one rule + # - It reduces readability and makes semantic actions harder to maintain + # - Named references are generally more robust when RHS changes + # Scope: + # - This warning targets semantic value references (`$...`) + # location references (`@...`), and index references (`$:...`) + # and special LHS references (`$$`, `$$`, `@$`, `$:$`) + class MixedReferences + # @rbs (Lrama::Logger logger, bool warnings) -> void + def initialize: (Lrama::Logger logger, bool warnings) -> void + + # @rbs (Lrama::Grammar grammar) -> void + def warn: (Lrama::Grammar grammar) -> void + + private + + # @rbs (Lrama::Grammar grammar) -> Hash[Lrama::Grammar::Rule, { positional: bool, named: bool }] + def build_grouped_usage: (Lrama::Grammar grammar) -> Hash[Lrama::Grammar::Rule, { positional: bool, named: bool }] + + # @rbs (Array[Lrama::Grammar::Reference] references, String source, { positional: bool, named: bool } usage) -> void + def classify_references: (Array[Lrama::Grammar::Reference] references, String source, { positional: bool, named: bool } usage) -> void + + # @rbs (Lrama::Grammar::Reference ref) -> bool + def positional_reference?: (Lrama::Grammar::Reference ref) -> bool + + # @rbs (Lrama::Grammar::Reference ref, String source) -> bool + def named_reference?: (Lrama::Grammar::Reference ref, String source) -> bool + + # @rbs (Lrama::Grammar::Reference ref) -> bool + def reference_type_supported?: (Lrama::Grammar::Reference ref) -> bool + + # @rbs (Lrama::Grammar::Reference ref, String source) -> bool + def lhs_alias_reference?: (Lrama::Grammar::Reference ref, String source) -> bool + + # @rbs (String lexeme) -> bool + def special_lhs_reference_lexeme?: (String lexeme) -> bool + end + end +end diff --git a/spec/lrama/warnings/mixed_references_spec.rb b/spec/lrama/warnings/mixed_references_spec.rb new file mode 100644 index 00000000..ef4e1d60 --- /dev/null +++ b/spec/lrama/warnings/mixed_references_spec.rb @@ -0,0 +1,531 @@ +# frozen_string_literal: true + +RSpec.describe Lrama::Warnings::MixedReferences do + describe "#warn" do + context "when warnings true" do + it "warns about mixed positional and named references in a rule" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] NUM[right] + { + $result = $1 + $right; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/mixed_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).to have_received(:warn).with("warning: rule `expr: NUM NUM` mixes positional and named references; use named references consistently") + end + + it "does not warn when named references are used consistently" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] NUM[right] + { + $result = $left + $right; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/named_references_only.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).not_to have_received(:warn).with(/mixes positional and named references/) + end + + it "warns once when references are mixed across actions in the same rule" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM + { + observe_value($1); + } + NUM[right] + { + $result = $right; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/mixed_across_actions.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).to have_received(:warn).with(/warning: rule `expr: .*` mixes positional and named references; use named references consistently/).once + end + + it "does not warn when positional references are used consistently" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] NUM[right] + { + consume_pair($1, $2); + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/positional_references_only.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).not_to have_received(:warn).with(/mixes positional and named references/) + end + + it "warns when mixing location and named value references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + assign_location(@1); + $result = $left; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/location_and_named_value_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).to have_received(:warn).with("warning: rule `expr: NUM` mixes positional and named references; use named references consistently") + end + + it "warns when mixing address-of location references and named value references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] NUM[right] + { + build_span(&@1, &@2); + $result = $left + $right; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/address_of_location_and_named_value_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).to have_received(:warn).with("warning: rule `expr: NUM NUM` mixes positional and named references; use named references consistently") + end + + it "warns when mixing index and named value references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + remember_index($:1); + $result = $left; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/index_and_named_value_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).to have_received(:warn).with("warning: rule `expr: NUM` mixes positional and named references; use named references consistently") + end + + it "does not warn when using only special and positional index references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + set_error_index($:$); + remember_index($:1); + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/special_and_positional_index_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).not_to have_received(:warn).with(/mixes positional and named references/) + end + + it "warns when mixing LHS alias and positional references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + $result = $1; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/lhs_alias_and_positional_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).to have_received(:warn).with("warning: rule `expr: NUM` mixes positional and named references; use named references consistently") + end + + it "does not warn when mixing special LHS and positional references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + $$ = $1; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/special_lhs_and_positional_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).not_to have_received(:warn).with(/mixes positional and named references/) + end + + it "does not warn when multibyte text appears before special LHS references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + /* あ */ + $$ = $1; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/multibyte_before_special_lhs_and_positional_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).not_to have_received(:warn).with(/mixes positional and named references/) + end + + it "does not warn when mixing tagged special LHS and positional references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + $$ = $1; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/tagged_special_lhs_and_positional_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).not_to have_received(:warn).with(/mixes positional and named references/) + end + + it "does not warn when multibyte text appears before tagged special LHS references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + /* あ */ + $$ = $1; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/multibyte_before_tagged_special_lhs_and_positional_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).not_to have_received(:warn).with(/mixes positional and named references/) + end + + it "warns when mixing $0 and named symbol references in the same rule" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token LEFT RIGHT tPLUS + %type expr + %% + program: expr + ; + expr: LEFT tPLUS RIGHT + { + $0 = $LEFT + $3; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/zero_positional_and_named_symbol_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).to have_received(:warn).with("warning: rule `expr: LEFT tPLUS RIGHT` mixes positional and named references; use named references consistently") + end + + it "warns when mixing tagged LHS alias and positional references" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] + { + $result = $1; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/tagged_lhs_alias_and_positional_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, true).warn(grammar, states) + + expect(logger).to have_received(:warn).with("warning: rule `expr: NUM` mixes positional and named references; use named references consistently") + end + end + + context "when warnings false" do + it "does not warn even if references are mixed" do + source = <<~Y + %{ + // Prologue + %} + %union { + int i; + } + %token NUM + %type expr + %% + program: expr + ; + expr[result]: NUM[left] NUM[right] + { + $result = $1 + $right; + } + ; + Y + + grammar = Lrama::Parser.new(source, "warnings/mixed_references.y").parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, Lrama::Tracer.new(Lrama::Logger.new)) + states.compute + logger = instance_spy(Lrama::Logger) + + Lrama::Warnings.new(logger, false).warn(grammar, states) + + expect(logger).not_to have_received(:warn) + end + end + end +end