From 3f881126c2498d21a7af7997e11974ae7f2c6840 Mon Sep 17 00:00:00 2001 From: alpaca-tc Date: Sat, 2 Aug 2025 22:34:38 +0900 Subject: [PATCH] Handle SyntaxError#message with invalid encoding related: https://bugs.ruby-lang.org/issues/21528 This patch fixes an issue in IRB where it fails to handle a `SyntaxError#message` that has invalid encoding. The root cause is that prism may generate a `SyntaxError#message` with broken encoding when the source contains multibyte characters. This patch ensures irb can handle such cases gracefully. --- lib/irb/ruby-lex.rb | 92 ++++++++++++++-------------- test/irb/command/test_show_source.rb | 24 ++++++++ 2 files changed, 70 insertions(+), 46 deletions(-) diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index dd4a8d060..3aef64497 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -73,6 +73,48 @@ def defined? do yield ] + SYNTAX_ERROR_PATTERNS = { + # "syntax error, unexpected keyword_end" + # + # example: + # if ( + # end + # + # example: + # end + "unexpected keyword_end" => :unrecoverable_error, + # "syntax error, unexpected '.'" + # + # example: + # . + "unexpected '.'" => :unrecoverable_error, + # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" + # + # example: + # method / f / + "unexpected tREGEXP_BEG" => :unrecoverable_error, + # "unterminated regexp meets end of file" + # + # example: + # / + # + # "unterminated string meets end of file" + # + # example: + # ' + "unterminated string meets end of file" => :recoverable_error, + "unterminated regexp meets end of file" => :recoverable_error, + # "syntax error, unexpected end-of-input, expecting keyword_end" + # + # example: + # if true + # hoge + # if false + # fuga + # end + "unexpected end-of-input" => :recoverable_error, + }.freeze + class TerminateLineInput < StandardError def initialize super("Terminate Line Input") @@ -252,53 +294,11 @@ def check_code_syntax(code, local_variables:) # This is for a hash with invalid encoding symbol, {"\xAE": 1} :unrecoverable_error rescue SyntaxError => e - case e.message - when /unexpected keyword_end/ - # "syntax error, unexpected keyword_end" - # - # example: - # if ( - # end - # - # example: - # end - return :unrecoverable_error - when /unexpected '\.'/ - # "syntax error, unexpected '.'" - # - # example: - # . - return :unrecoverable_error - when /unexpected tREGEXP_BEG/ - # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" - # - # example: - # method / f / - return :unrecoverable_error - when /unterminated (?:string|regexp) meets end of file/ - # "unterminated regexp meets end of file" - # - # example: - # / - # - # "unterminated string meets end of file" - # - # example: - # ' - return :recoverable_error - when /unexpected end-of-input/ - # "syntax error, unexpected end-of-input, expecting keyword_end" - # - # example: - # if true - # hoge - # if false - # fuga - # end - return :recoverable_error - else - return :other_error + SYNTAX_ERROR_PATTERNS.each do |pattern, error| + return error if e.message.include?(pattern) end + + return :other_error ensure $VERBOSE = verbose end diff --git a/test/irb/command/test_show_source.rb b/test/irb/command/test_show_source.rb index 7ef879e81..c9eb845a8 100644 --- a/test/irb/command/test_show_source.rb +++ b/test/irb/command/test_show_source.rb @@ -423,5 +423,29 @@ class B assert_match(%r[#{@ruby_file.to_path}:7\s+Z = 1], out) assert_match(%r[#{@ruby_file.to_path}:8\s+Array = 1], out) end + + def test_show_source_with_prism_returns_invalid_utf8_string + write_ruby <<~RUBY + class A + def call + if true + nil + # あああああああああああああああああああああああ + # あああああああああああああああああああああああ + end + end + end + + binding.irb + RUBY + + out = run_ruby_file do + type "inst = A.new" + type "show_source inst.call" + type "exit" + end + + assert_match(/def call/, out) + end end end