From fa348b6b84de6200f316785e73c15e614e221e48 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 13 Aug 2025 16:26:57 -0400 Subject: [PATCH 1/2] ZJIT: Side-exit on unknown instructions Don't abort the entire compilation. Fix https://github.com/Shopify/ruby/issues/700 --- zjit/src/codegen.rs | 51 ++++++++++++++++++++++++--------------------- zjit/src/hir.rs | 15 +++++++------ 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index ed0c52a91169a0..51c54846ba8cf5 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -281,11 +281,15 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio // Compile all instructions for &insn_id in block.insns() { let insn = function.find(insn_id); - if gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn).is_none() { - debug!("Failed to compile insn: {insn_id} {insn}"); + if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { + debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); incr_counter!(failed_gen_insn); - return None; - } + gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledInstruction(insn_id), &function.frame_state(last_snapshot)); + // Don't bother generating code after a side-exit. We won't run it. + // TODO(max): Generate ud2 or equivalent. + break; + }; + // It's fine; we generated the instruction } // Make sure the last patch point has enough space to insert a jump asm.pad_patch_point(); @@ -316,7 +320,7 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio } /// Compile an instruction -fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, function: &Function, insn_id: InsnId, insn: &Insn) -> Option<()> { +fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, function: &Function, insn_id: InsnId, insn: &Insn) -> Result<(), InsnId> { // Convert InsnId to lir::Opnd macro_rules! opnd { ($insn_id:ident) => { @@ -334,7 +338,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio macro_rules! no_output { ($call:expr) => { - { let () = $call; return Some(()); } + { let () = $call; return Ok(()); } }; } @@ -344,6 +348,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio let out_opnd = match insn { Insn::Const { val: Const::Value(val) } => gen_const(*val), + Insn::Const { .. } => panic!("Unexpected Const in gen_insn: {insn}"), Insn::NewArray { elements, state } => gen_new_array(asm, opnds!(elements), &function.frame_state(*state)), Insn::NewHash { elements, state } => gen_new_hash(jit, asm, elements, &function.frame_state(*state)), Insn::NewRange { low, high, flag, state } => gen_new_range(jit, asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)), @@ -351,12 +356,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::StringCopy { val, chilled, state } => gen_string_copy(asm, opnd!(val), *chilled, &function.frame_state(*state)), // concatstrings shouldn't have 0 strings // If it happens we abort the compilation for now - Insn::StringConcat { strings, .. } if strings.is_empty() => return None, + Insn::StringConcat { strings, state, .. } if strings.is_empty() => return Err(*state), Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)), Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"), - Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment + Insn::Snapshot { .. } => return Ok(()), // we don't need to do anything for this instruction at the moment Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)), Insn::IfTrue { val, target } => no_output!(gen_if_true(jit, asm, opnd!(val), target)), Insn::IfFalse { val, target } => no_output!(gen_if_false(jit, asm, opnd!(val), target)), @@ -367,7 +372,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state)), // Ensure we have enough room fit ec, self, and arguments // TODO remove this check when we have stack args (we can use Time.new to test it) - Insn::InvokeBuiltin { bf, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return None, + Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state), Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, opnds!(args)), Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))), Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), @@ -403,22 +408,20 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::IncrCounter(counter) => no_output!(gen_incr_counter(asm, counter)), Insn::ObjToString { val, cd, state, .. } => gen_objtostring(jit, asm, opnd!(val), *cd, &function.frame_state(*state)), &Insn::CheckInterrupts { state } => no_output!(gen_check_interrupts(jit, asm, &function.frame_state(state))), - Insn::ArrayExtend { .. } - | Insn::ArrayMax { .. } - | Insn::ArrayPush { .. } - | Insn::DefinedIvar { .. } - | Insn::FixnumDiv { .. } - | Insn::FixnumMod { .. } - | Insn::HashDup { .. } - | Insn::Send { .. } - | Insn::Throw { .. } - | Insn::ToArray { .. } - | Insn::ToNewArray { .. } - | Insn::Const { .. } + &Insn::ArrayExtend { state, .. } + | &Insn::ArrayMax { state, .. } + | &Insn::ArrayPush { state, .. } + | &Insn::DefinedIvar { state, .. } + | &Insn::FixnumDiv { state, .. } + | &Insn::FixnumMod { state, .. } + | &Insn::HashDup { state, .. } + | &Insn::Send { state, .. } + | &Insn::Throw { state, .. } + | &Insn::ToArray { state, .. } + | &Insn::ToNewArray { state, .. } => { - debug!("ZJIT: gen_function: unexpected insn {insn}"); incr_counter!(failed_gen_insn_unexpected); - return None; + return Err(state); } }; @@ -427,7 +430,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio // If the instruction has an output, remember it in jit.opnds jit.opnds[insn_id.0] = Some(out_opnd); - Some(()) + Ok(()) } /// Gets the EP of the ISeq of the containing method, or "local level". diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 15836b8c447444..5cbedece682f70 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -431,6 +431,7 @@ pub enum SideExitReason { UnknownNewarraySend(vm_opt_newarray_send_type), UnknownCallType, UnknownOpcode(u32), + UnhandledInstruction(InsnId), FixnumAddOverflow, FixnumSubOverflow, FixnumMultOverflow, @@ -567,7 +568,7 @@ pub enum Insn { /// Control flow instructions Return { val: InsnId }, /// Non-local control flow. See the throw YARV instruction - Throw { throw_state: u32, val: InsnId }, + Throw { throw_state: u32, val: InsnId, state: InsnId }, /// Fixnum +, -, *, /, %, ==, !=, <, <=, >, >=, &, | FixnumAdd { left: InsnId, right: InsnId, state: InsnId }, @@ -854,7 +855,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::AnyToString { val, str, .. } => { write!(f, "AnyToString {val}, str: {str}") }, Insn::SideExit { reason, .. } => write!(f, "SideExit {reason}"), Insn::PutSpecialObject { value_type } => write!(f, "PutSpecialObject {value_type}"), - Insn::Throw { throw_state, val } => { + Insn::Throw { throw_state, val, .. } => { write!(f, "Throw ")?; match throw_state & VM_THROW_STATE_MASK { RUBY_TAG_NONE => write!(f, "TAG_NONE"), @@ -1218,7 +1219,7 @@ impl Function { } }, &Return { val } => Return { val: find!(val) }, - &Throw { throw_state, val } => Throw { throw_state, val: find!(val) }, + &Throw { throw_state, val, state } => Throw { throw_state, val: find!(val), state }, &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state }, &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) }, &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) }, @@ -1992,7 +1993,6 @@ impl Function { worklist.push_back(state); } | &Insn::Return { val } - | &Insn::Throw { val, .. } | &Insn::Test { val } | &Insn::SetLocal { val, .. } | &Insn::IsNil { val } => @@ -2040,7 +2040,9 @@ impl Function { worklist.push_back(val); worklist.extend(args); } - &Insn::ArrayDup { val, state } | &Insn::HashDup { val, state } => { + &Insn::ArrayDup { val, state } + | &Insn::Throw { val, state, .. } + | &Insn::HashDup { val, state } => { worklist.push_back(val); worklist.push_back(state); } @@ -3261,7 +3263,8 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { break; // Don't enqueue the next block as a successor } YARVINSN_throw => { - fun.push_insn(block, Insn::Throw { throw_state: get_arg(pc, 0).as_u32(), val: state.stack_pop()? }); + let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); + fun.push_insn(block, Insn::Throw { throw_state: get_arg(pc, 0).as_u32(), val: state.stack_pop()?, state: exit_id }); break; // Don't enqueue the next block as a successor } From fc985f5320622301b0dc6132e9b5772c0e168cf0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Feb 2026 19:28:27 +0000 Subject: [PATCH 2/2] Initial plan