//! Code versioning, retained live control flow graph mutations, type tracking, etc. // So we can comment on individual uses of `unsafe` in `unsafe` functions #![warn(unsafe_op_in_unsafe_fn)] use crate::asm::*; use crate::backend::ir::*; use crate::codegen::*; use crate::virtualmem::CodePtr; use crate::cruby::*; use crate::options::*; use crate::stats::*; use crate::utils::*; #[cfg(feature="disasm")] use crate::disasm::*; use core::ffi::c_void; use std::cell::*; use std::fmt; use std::mem; use std::mem::transmute; use std::ops::Range; use std::rc::Rc; use std::collections::HashSet; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use mem::MaybeUninit; use std::ptr; use ptr::NonNull; use YARVOpnd::*; use TempMapping::*; use crate::invariants::*; // Maximum number of temp value types or registers we keep track of pub const MAX_CTX_TEMPS: usize = 8; // Maximum number of local variable types or registers we keep track of const MAX_CTX_LOCALS: usize = 8; /// An index into `ISEQ_BODY(iseq)->iseq_encoded`. Points /// to a YARV instruction or an instruction operand. pub type IseqIdx = u16; // Represent the type of a value (local/stack/self) in YJIT #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] #[repr(u8)] pub enum Type { Unknown = 0, UnknownImm, UnknownHeap, Nil, True, False, Fixnum, Flonum, ImmSymbol, TString, // An object with the T_STRING flag set, possibly an rb_cString CString, // An object that at one point had its class field equal rb_cString (creating a singleton class changes it) TArray, // An object with the T_ARRAY flag set, possibly an rb_cArray CArray, // An object that at one point had its class field equal rb_cArray (creating a singleton class changes it) THash, // An object with the T_HASH flag set, possibly an rb_cHash CHash, // An object that at one point had its class field equal rb_cHash (creating a singleton class changes it) BlockParamProxy, // A special sentinel value indicating the block parameter should be read from // the current surrounding cfp // The context currently relies on types taking at most 4 bits (max value 15) // to encode, so if we add any more, we will need to refactor the context. } // Default initialization impl Default for Type { fn default() -> Self { Type::Unknown } } impl Type { /// This returns an appropriate Type based on a known value pub fn from(val: VALUE) -> Type { if val.special_const_p() { if val.fixnum_p() { Type::Fixnum } else if val.nil_p() { Type::Nil } else if val == Qtrue { Type::True } else if val == Qfalse { Type::False } else if val.static_sym_p() { Type::ImmSymbol } else if val.flonum_p() { Type::Flonum } else { unreachable!("Illegal value: {:?}", val) } } else { // Core.rs can't reference rb_cString because it's linked by Rust-only tests. // But CString vs TString is only an optimisation and shouldn't affect correctness. #[cfg(not(test))] match val.class_of() { class if class == unsafe { rb_cArray } => return Type::CArray, class if class == unsafe { rb_cHash } => return Type::CHash, class if class == unsafe { rb_cString } => return Type::CString, _ => {} } // We likewise can't reference rb_block_param_proxy, but it's again an optimisation; // we can just treat it as a normal Object. #[cfg(not(test))] if val == unsafe { rb_block_param_proxy } { return Type::BlockParamProxy; } match val.builtin_type() { RUBY_T_ARRAY => Type::TArray, RUBY_T_HASH => Type::THash, RUBY_T_STRING => Type::TString, _ => Type::UnknownHeap, } } } /// Check if the type is an immediate pub fn is_imm(&self) -> bool { match self { Type::UnknownImm => true, Type::Nil => true, Type::True => true, Type::False => true, Type::Fixnum => true, Type::Flonum => true, Type::ImmSymbol => true, _ => false, } } /// Returns true when the type is not specific. pub fn is_unknown(&self) -> bool { match self { Type::Unknown | Type::UnknownImm | Type::UnknownHeap => true, _ => false, } } /// Returns true when we know the VALUE is a specific handle type, /// such as a static symbol ([Type::ImmSymbol], i.e. true from RB_STATIC_SYM_P()). /// Opposite of [Self::is_unknown]. pub fn is_specific(&self) -> bool { !self.is_unknown() } /// Check if the type is a heap object pub fn is_heap(&self) -> bool { match self { Type::UnknownHeap => true, Type::TArray => true, Type::CArray => true, Type::THash => true, Type::CHash => true, Type::TString => true, Type::CString => true, Type::BlockParamProxy => true, _ => false, } } /// Check if it's a T_ARRAY object (both TArray and CArray are T_ARRAY) pub fn is_array(&self) -> bool { matches!(self, Type::TArray | Type::CArray) } /// Check if it's a T_HASH object (both THash and CHash are T_HASH) pub fn is_hash(&self) -> bool { matches!(self, Type::THash | Type::CHash) } /// Check if it's a T_STRING object (both TString and CString are T_STRING) pub fn is_string(&self) -> bool { matches!(self, Type::TString | Type::CString) } /// Returns an Option with the T_ value type if it is known, otherwise None pub fn known_value_type(&self) -> Option { match self { Type::Nil => Some(RUBY_T_NIL), Type::True => Some(RUBY_T_TRUE), Type::False => Some(RUBY_T_FALSE), Type::Fixnum => Some(RUBY_T_FIXNUM), Type::Flonum => Some(RUBY_T_FLOAT), Type::TArray | Type::CArray => Some(RUBY_T_ARRAY), Type::THash | Type::CHash => Some(RUBY_T_HASH), Type::ImmSymbol => Some(RUBY_T_SYMBOL), Type::TString | Type::CString => Some(RUBY_T_STRING), Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None, Type::BlockParamProxy => None, } } /// Returns an Option with the class if it is known, otherwise None pub fn known_class(&self) -> Option { unsafe { match self { Type::Nil => Some(rb_cNilClass), Type::True => Some(rb_cTrueClass), Type::False => Some(rb_cFalseClass), Type::Fixnum => Some(rb_cInteger), Type::Flonum => Some(rb_cFloat), Type::ImmSymbol => Some(rb_cSymbol), Type::CArray => Some(rb_cArray), Type::CHash => Some(rb_cHash), Type::CString => Some(rb_cString), _ => None, } } } /// Returns an Option with the exact value if it is known, otherwise None #[allow(unused)] // not yet used pub fn known_exact_value(&self) -> Option { match self { Type::Nil => Some(Qnil), Type::True => Some(Qtrue), Type::False => Some(Qfalse), _ => None, } } /// Returns an Option boolean representing whether the value is truthy if known, otherwise None pub fn known_truthy(&self) -> Option { match self { Type::Nil => Some(false), Type::False => Some(false), Type::UnknownHeap => Some(true), Type::Unknown | Type::UnknownImm => None, _ => Some(true) } } /// Returns an Option boolean representing whether the value is equal to nil if known, otherwise None pub fn known_nil(&self) -> Option { match (self, self.known_truthy()) { (Type::Nil, _) => Some(true), (Type::False, _) => Some(false), // Qfalse is not nil (_, Some(true)) => Some(false), // if truthy, can't be nil (_, _) => None // otherwise unknown } } /// Compute a difference between two value types pub fn diff(self, dst: Self) -> TypeDiff { // Perfect match, difference is zero if self == dst { return TypeDiff::Compatible(0); } // Any type can flow into an unknown type if dst == Type::Unknown { return TypeDiff::Compatible(1); } // A CArray is also a TArray. if self == Type::CArray && dst == Type::TArray { return TypeDiff::Compatible(1); } // A CHash is also a THash. if self == Type::CHash && dst == Type::THash { return TypeDiff::Compatible(1); } // A CString is also a TString. if self == Type::CString && dst == Type::TString { return TypeDiff::Compatible(1); } // Specific heap type into unknown heap type is imperfect but valid if self.is_heap() && dst == Type::UnknownHeap { return TypeDiff::Compatible(1); } // Specific immediate type into unknown immediate type is imperfect but valid if self.is_imm() && dst == Type::UnknownImm { return TypeDiff::Compatible(1); } // Incompatible types return TypeDiff::Incompatible; } /// Upgrade this type into a more specific compatible type /// The new type must be compatible and at least as specific as the previously known type. fn upgrade(&mut self, new_type: Self) { // We can only upgrade to a type that is more specific assert!(new_type.diff(*self) != TypeDiff::Incompatible); *self = new_type; } } #[derive(Debug, Eq, PartialEq)] pub enum TypeDiff { // usize == 0: Same type // usize >= 1: Different but compatible. The smaller, the more compatible. Compatible(usize), Incompatible, } #[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] pub enum TempMapping { MapToStack(Type), MapToSelf, MapToLocal(u8), } impl Default for TempMapping { fn default() -> Self { TempMapping::MapToStack(Type::default()) } } impl TempMapping { /// Return TempMapping without type information in MapToStack pub fn without_type(&self) -> TempMapping { match self { MapToStack(_) => TempMapping::MapToStack(Type::default()), _ => *self, } } } // Operand to a YARV bytecode instruction #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum YARVOpnd { // The value is self SelfOpnd, // Temporary stack operand with stack index StackOpnd(u8), } impl From for YARVOpnd { fn from(value: Opnd) -> Self { match value { Opnd::Stack { idx, .. } => StackOpnd(idx.try_into().unwrap()), _ => unreachable!("{:?} cannot be converted to YARVOpnd", value) } } } /// Number of registers that can be used for stack temps or locals pub const MAX_MAPPED_REGS: usize = 5; /// A stack slot or a local variable. u8 represents the index of it (<= 8). #[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] pub enum RegOpnd { Stack(u8), Local(u8), } /// RegMappings manages a set of registers used for stack temps and locals. /// Each element of the array represents each of the registers. /// If an element is Some, the stack temp or the local uses a register. /// /// Note that Opnd::InsnOut uses a separate set of registers at the moment. #[derive(Copy, Clone, Default, Eq, Hash, PartialEq)] pub struct RegMapping([Option; MAX_MAPPED_REGS]); impl RegMapping { /// Return the index of the register for a given operand if allocated. pub fn get_reg(&self, opnd: RegOpnd) -> Option { self.0.iter().enumerate() .find(|(_, ®_opnd)| reg_opnd == Some(opnd)) .map(|(reg_idx, _)| reg_idx) } /// Set a given operand to the register at a given index. pub fn set_reg(&mut self, opnd: RegOpnd, reg_idx: usize) { assert!(self.0[reg_idx].is_none()); self.0[reg_idx] = Some(opnd); } /// Allocate a register for a given operand if available. /// Return true if self is updated. pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool { // If a given opnd already has a register, skip allocation. if self.get_reg(opnd).is_some() { return false; } // If the index is too large to encode with with 3 bits, give up. match opnd { RegOpnd::Stack(stack_idx) => if stack_idx >= MAX_CTX_TEMPS as u8 { return false; } RegOpnd::Local(local_idx) => if local_idx >= MAX_CTX_LOCALS as u8 { return false; } }; // Allocate a register if available. if let Some(reg_idx) = self.find_unused_reg(opnd) { self.0[reg_idx] = Some(opnd); return true; } false } /// Deallocate a register for a given operand if in use. /// Return true if self is updated. pub fn dealloc_reg(&mut self, opnd: RegOpnd) -> bool { for reg_opnd in self.0.iter_mut() { if *reg_opnd == Some(opnd) { *reg_opnd = None; return true; } } false } /// Find an available register and return the index of it. fn find_unused_reg(&self, opnd: RegOpnd) -> Option { let num_regs = get_option!(num_temp_regs); if num_regs == 0 { return None; } assert!(num_regs <= MAX_MAPPED_REGS); // If the default index for the operand is available, use that to minimize // discrepancies among Contexts. let default_idx = match opnd { RegOpnd::Stack(stack_idx) => stack_idx.as_usize() % num_regs, RegOpnd::Local(local_idx) => num_regs - (local_idx.as_usize() % num_regs) - 1, }; if self.0[default_idx].is_none() { return Some(default_idx); } // If not, pick any other available register. Like default indexes, prefer // lower indexes for Stack, and higher indexes for Local. let mut index_temps = self.0.iter().enumerate(); match opnd { RegOpnd::Stack(_) => index_temps.find(|(_, reg_opnd)| reg_opnd.is_none()), RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()), }.map(|(index, _)| index) } /// Return a vector of RegOpnds that have an allocated register pub fn get_reg_opnds(&self) -> Vec { self.0.iter().filter_map(|®_opnd| reg_opnd).collect() } /// Count the number of registers that store a different operand from `dst`. pub fn diff(&self, dst: RegMapping) -> usize { self.0.iter().enumerate().filter(|&(reg_idx, ®)| reg != dst.0[reg_idx]).count() } } impl fmt::Debug for RegMapping { /// Print `[None, ...]` instead of the default `RegMappings([None, ...])` fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { write!(fmt, "{:?}", self.0) } } /// Maximum value of the chain depth (should fit in 5 bits) const CHAIN_DEPTH_MAX: u8 = 0b11111; // 31 /// Code generation context /// Contains information we can use to specialize/optimize code #[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] pub struct Context { // Number of values currently on the temporary stack stack_size: u8, // Offset of the JIT SP relative to the interpreter SP // This represents how far the JIT's SP is from the "real" SP sp_offset: i8, /// Which stack temps or locals are in a register reg_mapping: RegMapping, // Depth of this block in the sidechain (eg: inline-cache chain) // 6 bits, max 63 chain_depth: u8, // Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing]) is_return_landing: bool, // Whether the compilation of this code has been deferred ([Self::is_deferred]) is_deferred: bool, // Type we track for self self_type: Type, // Local variable types we keep track of local_types: [Type; MAX_CTX_LOCALS], // Temp mapping type/local_idx we track temp_mapping: [TempMapping; MAX_CTX_TEMPS], /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined. inline_block: Option, } #[derive(Clone)] pub struct BitVector { // Flat vector of bytes to write into bytes: Vec, // Number of bits taken out of bytes allocated num_bits: usize, } impl BitVector { pub fn new() -> Self { Self { bytes: Vec::with_capacity(4096), num_bits: 0, } } #[allow(unused)] pub fn num_bits(&self) -> usize { self.num_bits } // Total number of bytes taken #[allow(unused)] pub fn num_bytes(&self) -> usize { (self.num_bits / 8) + if (self.num_bits % 8) != 0 { 1 } else { 0 } } // Write/append an unsigned integer value fn push_uint(&mut self, mut val: u64, mut num_bits: usize) { assert!(num_bits <= 64); // Mask out bits above the number of bits requested let mut val_bits = val; if num_bits < 64 { val_bits &= (1 << num_bits) - 1; assert!(val == val_bits); } // Number of bits encoded in the last byte let rem_bits = self.num_bits % 8; // Encode as many bits as we can in this last byte if rem_bits != 0 { let num_enc = std::cmp::min(num_bits, 8 - rem_bits); let bit_mask = (1 << num_enc) - 1; let frac_bits = (val & bit_mask) << rem_bits; let frac_bits: u8 = frac_bits.try_into().unwrap(); let last_byte_idx = self.bytes.len() - 1; self.bytes[last_byte_idx] |= frac_bits; self.num_bits += num_enc; num_bits -= num_enc; val >>= num_enc; } // While we have bits left to encode while num_bits > 0 { // Grow with a 1.2x growth factor instead of 2x assert!(self.num_bits % 8 == 0); let num_bytes = self.num_bits / 8; if num_bytes == self.bytes.capacity() { self.bytes.reserve_exact(self.bytes.len() / 5); } let bits = val & 0xFF; let bits: u8 = bits.try_into().unwrap(); self.bytes.push(bits); let bits_to_encode = std::cmp::min(num_bits, 8); self.num_bits += bits_to_encode; num_bits -= bits_to_encode; val >>= bits_to_encode; } } fn push_u8(&mut self, val: u8) { self.push_uint(val as u64, 8); } fn push_u5(&mut self, val: u8) { assert!(val <= 0b11111); self.push_uint(val as u64, 5); } fn push_u4(&mut self, val: u8) { assert!(val <= 0b1111); self.push_uint(val as u64, 4); } fn push_u3(&mut self, val: u8) { assert!(val <= 0b111); self.push_uint(val as u64, 3); } fn push_u2(&mut self, val: u8) { assert!(val <= 0b11); self.push_uint(val as u64, 2); } fn push_u1(&mut self, val: u8) { assert!(val <= 0b1); self.push_uint(val as u64, 1); } fn push_bool(&mut self, val: bool) { self.push_u1(if val { 1 } else { 0 }); } // Push a context encoding opcode fn push_op(&mut self, op: CtxOp) { self.push_u4(op as u8); } // Read a uint value at a given bit index // The bit index is incremented after the value is read fn read_uint(&self, bit_idx: &mut usize, mut num_bits: usize) -> u64 { let start_bit_idx = *bit_idx; let mut cur_idx = *bit_idx; // Read the bits in the first byte let bit_mod = cur_idx % 8; let bits_in_byte = self.bytes[cur_idx / 8] >> bit_mod; let num_bits_in_byte = std::cmp::min(num_bits, 8 - bit_mod); cur_idx += num_bits_in_byte; num_bits -= num_bits_in_byte; let mut out_bits = (bits_in_byte as u64) & ((1 << num_bits_in_byte) - 1); // While we have bits left to read while num_bits > 0 { let num_bits_in_byte = std::cmp::min(num_bits, 8); assert!(cur_idx % 8 == 0); let byte = self.bytes[cur_idx / 8] as u64; let bits_in_byte = byte & ((1 << num_bits) - 1); out_bits |= bits_in_byte << (cur_idx - start_bit_idx); // Move to the next byte/offset cur_idx += num_bits_in_byte; num_bits -= num_bits_in_byte; } // Update the read index *bit_idx = cur_idx; out_bits } fn read_u8(&self, bit_idx: &mut usize) -> u8 { self.read_uint(bit_idx, 8) as u8 } fn read_u5(&self, bit_idx: &mut usize) -> u8 { self.read_uint(bit_idx, 5) as u8 } fn read_u4(&self, bit_idx: &mut usize) -> u8 { self.read_uint(bit_idx, 4) as u8 } fn read_u3(&self, bit_idx: &mut usize) -> u8 { self.read_uint(bit_idx, 3) as u8 } fn read_u2(&self, bit_idx: &mut usize) -> u8 { self.read_uint(bit_idx, 2) as u8 } fn read_u1(&self, bit_idx: &mut usize) -> u8 { self.read_uint(bit_idx, 1) as u8 } fn read_bool(&self, bit_idx: &mut usize) -> bool { self.read_u1(bit_idx) != 0 } fn read_op(&self, bit_idx: &mut usize) -> CtxOp { unsafe { std::mem::transmute(self.read_u4(bit_idx)) } } } impl fmt::Debug for BitVector { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // We print the higher bytes first for (idx, byte) in self.bytes.iter().enumerate().rev() { write!(f, "{:08b}", byte)?; // Insert a separator between each byte if idx > 0 { write!(f, "|")?; } } Ok(()) } } #[cfg(test)] mod bitvector_tests { use super::*; #[test] fn write_3() { let mut arr = BitVector::new(); arr.push_uint(3, 2); assert!(arr.read_uint(&mut 0, 2) == 3); } #[test] fn write_11() { let mut arr = BitVector::new(); arr.push_uint(1, 1); arr.push_uint(1, 1); assert!(arr.read_uint(&mut 0, 2) == 3); } #[test] fn write_11_overlap() { let mut arr = BitVector::new(); arr.push_uint(0, 7); arr.push_uint(3, 2); arr.push_uint(1, 1); //dbg!(arr.read_uint(7, 2)); assert!(arr.read_uint(&mut 7, 2) == 3); } #[test] fn write_ff_0() { let mut arr = BitVector::new(); arr.push_uint(0xFF, 8); assert!(arr.read_uint(&mut 0, 8) == 0xFF); } #[test] fn write_ff_3() { // Write 0xFF at bit index 3 let mut arr = BitVector::new(); arr.push_uint(0, 3); arr.push_uint(0xFF, 8); assert!(arr.read_uint(&mut 3, 8) == 0xFF); } #[test] fn write_ff_sandwich() { // Write 0xFF sandwiched between zeros let mut arr = BitVector::new(); arr.push_uint(0, 3); arr.push_u8(0xFF); arr.push_uint(0, 3); assert!(arr.read_uint(&mut 3, 8) == 0xFF); } #[test] fn write_read_u32_max() { let mut arr = BitVector::new(); arr.push_uint(0xFF_FF_FF_FF, 32); assert!(arr.read_uint(&mut 0, 32) == 0xFF_FF_FF_FF); } #[test] fn write_read_u32_max_64b() { let mut arr = BitVector::new(); arr.push_uint(0xFF_FF_FF_FF, 64); assert!(arr.read_uint(&mut 0, 64) == 0xFF_FF_FF_FF); } #[test] fn write_read_u64_max() { let mut arr = BitVector::new(); arr.push_uint(u64::MAX, 64); assert!(arr.read_uint(&mut 0, 64) == u64::MAX); } #[test] fn encode_default() { let mut bits = BitVector::new(); let ctx = Context::default(); let start_idx = ctx.encode_into(&mut bits); assert!(start_idx == 0); assert!(bits.num_bits() > 0); assert!(bits.num_bytes() > 0); // Make sure that the round trip matches the input let ctx2 = Context::decode_from(&bits, 0); assert!(ctx2 == ctx); } #[test] fn encode_default_2x() { let mut bits = BitVector::new(); let ctx0 = Context::default(); let idx0 = ctx0.encode_into(&mut bits); let mut ctx1 = Context::default(); ctx1.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]); let idx1 = ctx1.encode_into(&mut bits); // Make sure that we can encode two contexts successively let ctx0_dec = Context::decode_from(&bits, idx0); let ctx1_dec = Context::decode_from(&bits, idx1); assert!(ctx0_dec == ctx0); assert!(ctx1_dec == ctx1); } #[test] fn regress_reg_mapping() { let mut bits = BitVector::new(); let mut ctx = Context::default(); ctx.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]); ctx.encode_into(&mut bits); let b0 = bits.read_u1(&mut 0); assert!(b0 == 1); // Make sure that the round trip matches the input let ctx2 = Context::decode_from(&bits, 0); assert!(ctx2 == ctx); } } // Context encoding opcodes (4 bits) #[derive(Debug, Copy, Clone)] #[repr(u8)] enum CtxOp { // Self type (4 bits) SetSelfType = 0, // Local idx (3 bits), temp type (4 bits) SetLocalType, // Map stack temp to self with known type // Temp idx (3 bits), known type (4 bits) SetTempType, // Map stack temp to a local variable // Temp idx (3 bits), local idx (3 bits) MapTempLocal, // Map a stack temp to self // Temp idx (3 bits) MapTempSelf, // Set inline block pointer (8 bytes) SetInlineBlock, // End of encoding EndOfCode, } // Number of entries in the context cache const CTX_ENCODE_CACHE_SIZE: usize = 1024; const CTX_DECODE_CACHE_SIZE: usize = 1024; // Cache of the last contexts encoded/decoded // Empirically this saves a few percent of memory and speeds up compilation // We can experiment with varying the size of this cache pub type CtxEncodeCache = [(Context, u32); CTX_ENCODE_CACHE_SIZE]; static mut CTX_ENCODE_CACHE: Option> = None; // Cache of the last contexts encoded/decoded // This speeds up compilation pub type CtxDecodeCache = [(Context, u32); CTX_DECODE_CACHE_SIZE]; static mut CTX_DECODE_CACHE: Option> = None; // Size of the context cache in bytes pub const CTX_ENCODE_CACHE_BYTES: usize = std::mem::size_of::(); pub const CTX_DECODE_CACHE_BYTES: usize = std::mem::size_of::(); impl Context { // Encode a context into the global context data, or return // a cached previously encoded offset if one is found pub fn encode(&self) -> u32 { incr_counter!(num_contexts_encoded); if *self == Context::default() { incr_counter!(context_cache_hits); return 0; } if let Some(idx) = Self::encode_cache_get(self) { incr_counter!(context_cache_hits); debug_assert!(Self::decode(idx) == *self); return idx; } let context_data = CodegenGlobals::get_context_data(); // Make sure we don't use offset 0 because // it's is reserved for the default context if context_data.num_bits() == 0 { context_data.push_u1(0); } let idx = self.encode_into(context_data); let idx: u32 = idx.try_into().unwrap(); // Save this offset into the cache Self::encode_cache_set(self, idx); Self::decode_cache_set(self, idx); // In debug mode, check that the round-trip decoding always matches debug_assert!(Self::decode(idx) == *self); idx } pub fn decode(start_idx: u32) -> Context { if start_idx == 0 { return Context::default(); }; if let Some(ctx) = Self::decode_cache_get(start_idx) { return ctx; } let context_data = CodegenGlobals::get_context_data(); let ctx = Self::decode_from(context_data, start_idx as usize); Self::encode_cache_set(&ctx, start_idx); Self::decode_cache_set(&ctx, start_idx); ctx } // Store an entry in a cache of recently encoded/decoded contexts for encoding fn encode_cache_set(ctx: &Context, idx: u32) { // Compute the hash for this context let mut hasher = DefaultHasher::new(); ctx.hash(&mut hasher); let ctx_hash = hasher.finish() as usize; unsafe { // Lazily initialize the context cache if CTX_ENCODE_CACHE == None { // Here we use the vec syntax to avoid allocating the large table on the stack, // as this can cause a stack overflow let tbl = vec![(Context::default(), 0); CTX_ENCODE_CACHE_SIZE].into_boxed_slice().try_into().unwrap(); CTX_ENCODE_CACHE = Some(tbl); } // Write a cache entry for this context let cache = CTX_ENCODE_CACHE.as_mut().unwrap(); cache[ctx_hash % CTX_ENCODE_CACHE_SIZE] = (*ctx, idx); } } // Store an entry in a cache of recently encoded/decoded contexts for decoding fn decode_cache_set(ctx: &Context, idx: u32) { unsafe { // Lazily initialize the context cache if CTX_DECODE_CACHE == None { // Here we use the vec syntax to avoid allocating the large table on the stack, // as this can cause a stack overflow let tbl = vec![(Context::default(), 0); CTX_DECODE_CACHE_SIZE].into_boxed_slice().try_into().unwrap(); CTX_DECODE_CACHE = Some(tbl); } // Write a cache entry for this context let cache = CTX_DECODE_CACHE.as_mut().unwrap(); cache[idx as usize % CTX_DECODE_CACHE_SIZE] = (*ctx, idx); } } // Lookup the context in a cache of recently encoded/decoded contexts for encoding fn encode_cache_get(ctx: &Context) -> Option { // Compute the hash for this context let mut hasher = DefaultHasher::new(); ctx.hash(&mut hasher); let ctx_hash = hasher.finish() as usize; unsafe { if CTX_ENCODE_CACHE == None { return None; } let cache = CTX_ENCODE_CACHE.as_mut().unwrap(); // Check that the context for this cache entry matches let cache_entry = &cache[ctx_hash % CTX_ENCODE_CACHE_SIZE]; if cache_entry.0 == *ctx { debug_assert!(cache_entry.1 != 0); return Some(cache_entry.1); } return None; } } // Lookup the context in a cache of recently encoded/decoded contexts for decoding fn decode_cache_get(start_idx: u32) -> Option { unsafe { if CTX_DECODE_CACHE == None { return None; } let cache = CTX_DECODE_CACHE.as_mut().unwrap(); // Check that the start_idx for this cache entry matches let cache_entry = &cache[start_idx as usize % CTX_DECODE_CACHE_SIZE]; if cache_entry.1 == start_idx { return Some(cache_entry.0); } return None; } } // Encode into a compressed context representation in a bit vector fn encode_into(&self, bits: &mut BitVector) -> usize { let start_idx = bits.num_bits(); // Most of the time, the stack size is small and sp offset has the same value if (self.stack_size as i64) == (self.sp_offset as i64) && self.stack_size < 4 { // One single bit to signify a compact stack_size/sp_offset encoding debug_assert!(self.sp_offset >= 0); bits.push_u1(1); bits.push_u2(self.stack_size); } else { // Full stack size encoding bits.push_u1(0); // Number of values currently on the temporary stack bits.push_u8(self.stack_size); // sp_offset: i8, bits.push_u8(self.sp_offset as u8); } // Which stack temps or locals are in a register for &temp in self.reg_mapping.0.iter() { if let Some(temp) = temp { bits.push_u1(1); // Some match temp { RegOpnd::Stack(stack_idx) => { bits.push_u1(0); // Stack bits.push_u3(stack_idx); } RegOpnd::Local(local_idx) => { bits.push_u1(1); // Local bits.push_u3(local_idx); } } } else { bits.push_u1(0); // None } } bits.push_bool(self.is_deferred); bits.push_bool(self.is_return_landing); // The chain depth is most often 0 or 1 if self.chain_depth < 2 { bits.push_u1(0); bits.push_u1(self.chain_depth); } else { bits.push_u1(1); bits.push_u5(self.chain_depth); } // Encode the self type if known if self.self_type != Type::Unknown { bits.push_op(CtxOp::SetSelfType); bits.push_u4(self.self_type as u8); } // Encode the local types if known for local_idx in 0..MAX_CTX_LOCALS { let t = self.get_local_type(local_idx); if t != Type::Unknown { bits.push_op(CtxOp::SetLocalType); bits.push_u3(local_idx as u8); bits.push_u4(t as u8); } } // Encode stack temps for stack_idx in 0..MAX_CTX_TEMPS { let mapping = self.get_temp_mapping(stack_idx); match mapping { MapToStack(temp_type) => { if temp_type != Type::Unknown { // Temp idx (3 bits), known type (4 bits) bits.push_op(CtxOp::SetTempType); bits.push_u3(stack_idx as u8); bits.push_u4(temp_type as u8); } } MapToLocal(local_idx) => { bits.push_op(CtxOp::MapTempLocal); bits.push_u3(stack_idx as u8); bits.push_u3(local_idx as u8); } MapToSelf => { // Temp idx (3 bits) bits.push_op(CtxOp::MapTempSelf); bits.push_u3(stack_idx as u8); } } } // Inline block pointer if let Some(iseq) = self.inline_block { bits.push_op(CtxOp::SetInlineBlock); bits.push_uint(iseq as u64, 64); } // TODO: should we add an op for end-of-encoding, // or store num ops at the beginning? bits.push_op(CtxOp::EndOfCode); start_idx } // Decode a compressed context representation from a bit vector fn decode_from(bits: &BitVector, start_idx: usize) -> Context { let mut ctx = Context::default(); let mut idx = start_idx; // Small vs large stack size encoding if bits.read_u1(&mut idx) == 1 { ctx.stack_size = bits.read_u2(&mut idx); ctx.sp_offset = ctx.stack_size as i8; } else { ctx.stack_size = bits.read_u8(&mut idx); let sp_offset_bits = bits.read_u8(&mut idx); ctx.sp_offset = sp_offset_bits as i8; // If the top bit is set, then the sp offset must be negative debug_assert!(!( (sp_offset_bits & 0x80) != 0 && ctx.sp_offset > 0 )); } // Which stack temps or locals are in a register for index in 0..MAX_MAPPED_REGS { if bits.read_u1(&mut idx) == 1 { // Some let temp = if bits.read_u1(&mut idx) == 0 { // RegMapping::Stack RegOpnd::Stack(bits.read_u3(&mut idx)) } else { RegOpnd::Local(bits.read_u3(&mut idx)) }; ctx.reg_mapping.0[index] = Some(temp); } } ctx.is_deferred = bits.read_bool(&mut idx); ctx.is_return_landing = bits.read_bool(&mut idx); if bits.read_u1(&mut idx) == 0 { ctx.chain_depth = bits.read_u1(&mut idx) } else { ctx.chain_depth = bits.read_u5(&mut idx) } loop { //println!("reading op"); let op = bits.read_op(&mut idx); //println!("got op {:?}", op); match op { CtxOp::SetSelfType => { ctx.self_type = unsafe { transmute(bits.read_u4(&mut idx)) }; } CtxOp::SetLocalType => { let local_idx = bits.read_u3(&mut idx) as usize; let t = unsafe { transmute(bits.read_u4(&mut idx)) }; ctx.set_local_type(local_idx, t); } // Map temp to stack (known type) CtxOp::SetTempType => { let temp_idx = bits.read_u3(&mut idx) as usize; let temp_type = unsafe { transmute(bits.read_u4(&mut idx)) }; ctx.set_temp_mapping(temp_idx, TempMapping::MapToStack(temp_type)); } // Map temp to local CtxOp::MapTempLocal => { let temp_idx = bits.read_u3(&mut idx) as usize; let local_idx = bits.read_u3(&mut idx); ctx.set_temp_mapping(temp_idx, TempMapping::MapToLocal(local_idx)); } // Map temp to self CtxOp::MapTempSelf => { let temp_idx = bits.read_u3(&mut idx) as usize; ctx.set_temp_mapping(temp_idx, TempMapping::MapToSelf); } // Inline block pointer CtxOp::SetInlineBlock => { ctx.inline_block = Some(bits.read_uint(&mut idx, 64) as IseqPtr); } CtxOp::EndOfCode => break, } } ctx } } /// Tuple of (iseq, idx) used to identify basic blocks /// There are a lot of blockid objects so we try to keep the size small. #[derive(Copy, Clone, PartialEq, Eq, Debug)] #[repr(packed)] pub struct BlockId { /// Instruction sequence pub iseq: IseqPtr, /// Index in the iseq where the block starts pub idx: u16, } /// Branch code shape enumeration #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum BranchShape { Next0, // Target 0 is next Next1, // Target 1 is next Default, // Neither target is next } #[derive(Clone, Debug, Eq, PartialEq)] pub enum BranchGenFn { BranchIf(Cell), BranchNil(Cell), BranchUnless(Cell), JumpToTarget0(Cell), JNZToTarget0, JZToTarget0, JBEToTarget0, JBToTarget0, JOMulToTarget0, JITReturn, } impl BranchGenFn { pub fn call(&self, asm: &mut Assembler, target0: Target, target1: Option) { match self { BranchGenFn::BranchIf(shape) => { match shape.get() { BranchShape::Next0 => asm.jz(target1.unwrap()), BranchShape::Next1 => asm.jnz(target0), BranchShape::Default => { asm.jnz(target0); asm.jmp(target1.unwrap()); } } } BranchGenFn::BranchNil(shape) => { match shape.get() { BranchShape::Next0 => asm.jne(target1.unwrap()), BranchShape::Next1 => asm.je(target0), BranchShape::Default => { asm.je(target0); asm.jmp(target1.unwrap()); } } } BranchGenFn::BranchUnless(shape) => { match shape.get() { BranchShape::Next0 => asm.jnz(target1.unwrap()), BranchShape::Next1 => asm.jz(target0), BranchShape::Default => { asm.jz(target0); asm.jmp(target1.unwrap()); } } } BranchGenFn::JumpToTarget0(shape) => { if shape.get() == BranchShape::Next1 { panic!("Branch shape Next1 not allowed in JumpToTarget0!"); } if shape.get() == BranchShape::Default { asm.jmp(target0); } } BranchGenFn::JNZToTarget0 => { asm.jnz(target0) } BranchGenFn::JZToTarget0 => { asm.jz(target0) } BranchGenFn::JBEToTarget0 => { asm.jbe(target0) } BranchGenFn::JBToTarget0 => { asm.jb(target0) } BranchGenFn::JOMulToTarget0 => { asm.jo_mul(target0) } BranchGenFn::JITReturn => { asm_comment!(asm, "update cfp->jit_return"); let jit_return = RUBY_OFFSET_CFP_JIT_RETURN - RUBY_SIZEOF_CONTROL_FRAME as i32; let raw_ptr = asm.lea_jump_target(target0); asm.mov(Opnd::mem(64, CFP, jit_return), raw_ptr); } } } pub fn get_shape(&self) -> BranchShape { match self { BranchGenFn::BranchIf(shape) | BranchGenFn::BranchNil(shape) | BranchGenFn::BranchUnless(shape) | BranchGenFn::JumpToTarget0(shape) => shape.get(), BranchGenFn::JNZToTarget0 | BranchGenFn::JZToTarget0 | BranchGenFn::JBEToTarget0 | BranchGenFn::JBToTarget0 | BranchGenFn::JOMulToTarget0 | BranchGenFn::JITReturn => BranchShape::Default, } } pub fn set_shape(&self, new_shape: BranchShape) { match self { BranchGenFn::BranchIf(shape) | BranchGenFn::BranchNil(shape) | BranchGenFn::BranchUnless(shape) => { shape.set(new_shape); } BranchGenFn::JumpToTarget0(shape) => { if new_shape == BranchShape::Next1 { panic!("Branch shape Next1 not allowed in JumpToTarget0!"); } shape.set(new_shape); } BranchGenFn::JNZToTarget0 | BranchGenFn::JZToTarget0 | BranchGenFn::JBEToTarget0 | BranchGenFn::JBToTarget0 | BranchGenFn::JOMulToTarget0 | BranchGenFn::JITReturn => { assert_eq!(new_shape, BranchShape::Default); } } } } /// A place that a branch could jump to #[derive(Debug, Clone)] enum BranchTarget { Stub(Box), // Not compiled yet Block(BlockRef), // Already compiled } impl BranchTarget { fn get_address(&self) -> Option { match self { BranchTarget::Stub(stub) => stub.address, BranchTarget::Block(blockref) => Some(unsafe { blockref.as_ref() }.start_addr), } } fn get_blockid(&self) -> BlockId { match self { BranchTarget::Stub(stub) => BlockId { iseq: stub.iseq.get(), idx: stub.iseq_idx }, BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.get_blockid(), } } fn get_ctx(&self) -> u32 { match self { BranchTarget::Stub(stub) => stub.ctx, BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx, } } fn get_block(&self) -> Option { match self { BranchTarget::Stub(_) => None, BranchTarget::Block(blockref) => Some(*blockref), } } fn set_iseq(&self, iseq: IseqPtr) { match self { BranchTarget::Stub(stub) => stub.iseq.set(iseq), BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.iseq.set(iseq), } } } #[derive(Debug, Clone)] struct BranchStub { address: Option, iseq: Cell, iseq_idx: IseqIdx, ctx: u32, } /// Store info about an outgoing branch in a code segment /// Note: care must be taken to minimize the size of branch objects pub struct Branch { // Block this is attached to block: Cell, // Positions where the generated code starts and ends start_addr: CodePtr, end_addr: Cell, // exclusive // Branch target blocks and their contexts targets: [Cell>>; 2], // Branch code generation function gen_fn: BranchGenFn, } /// A [Branch] for a [Block] that is under construction. /// Fields correspond, but may be `None` during construction. pub struct PendingBranch { /// Allocation holder for the address of the constructed branch /// in error paths Box deallocates it. uninit_branch: Box>, /// Branch code generation function gen_fn: BranchGenFn, /// Positions where the generated code starts and ends start_addr: Cell>, end_addr: Cell>, // exclusive /// Branch target blocks and their contexts targets: [Cell>>; 2], } impl Branch { // Compute the size of the branch code fn code_size(&self) -> usize { (self.end_addr.get().as_offset() - self.start_addr.as_offset()) as usize } /// Get the address of one of the branch destination fn get_target_address(&self, target_idx: usize) -> Option { unsafe { self.targets[target_idx] .ref_unchecked() .as_ref() .and_then(|target| target.get_address()) } } fn get_stub_count(&self) -> usize { let mut count = 0; for target in self.targets.iter() { if unsafe { // SAFETY: no mutation matches!( target.ref_unchecked().as_ref().map(Box::as_ref), Some(BranchTarget::Stub(_)) ) } { count += 1; } } count } fn assert_layout(&self) { let shape = self.gen_fn.get_shape(); assert!( !(shape == BranchShape::Default && 0 == self.code_size()), "zero-size branches are incorrect when code for neither targets are adjacent" // One needs to issue some instruction to steer to the branch target // when falling through isn't an option. ); } } impl std::fmt::Debug for Branch { // Can't derive this because `targets: !Copy` due to Cell. fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let targets = unsafe { // SAFETY: // While the references are live for the result of this function, // no mutation happens because we are only calling derived fmt::Debug functions. [self.targets[0].as_ptr().as_ref().unwrap(), self.targets[1].as_ptr().as_ref().unwrap()] }; formatter .debug_struct("Branch") .field("block", &self.block) .field("start", &self.start_addr) .field("end", &self.end_addr) .field("targets", &targets) .field("gen_fn", &self.gen_fn) .finish() } } impl PendingBranch { /// Set up a branch target at `target_idx`. Find an existing block to branch to /// or generate a stub for one. #[must_use] fn set_target( &self, target_idx: u32, target: BlockId, ctx: &Context, jit: &mut JITState, ) -> Option { // If the block already exists if let Some(blockref) = find_block_version(target, ctx) { let block = unsafe { blockref.as_ref() }; // Fill out the target with this block self.targets[target_idx.as_usize()] .set(Some(Box::new(BranchTarget::Block(blockref)))); return Some(block.start_addr); } // Compress/encode the context let ctx = Context::encode(ctx); // The branch struct is uninitialized right now but as a stable address. // We make sure the stub runs after the branch is initialized. let branch_struct_addr = self.uninit_branch.as_ptr() as usize; let stub_addr = gen_branch_stub(ctx, jit.iseq, jit.get_ocb(), branch_struct_addr, target_idx); if let Some(stub_addr) = stub_addr { // Fill the branch target with a stub self.targets[target_idx.as_usize()].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { address: Some(stub_addr), iseq: Cell::new(target.iseq), iseq_idx: target.idx, ctx, }))))); } stub_addr } // Construct the branch and wire it up in the grpah fn into_branch(mut self, uninit_block: BlockRef) -> BranchRef { // Make the branch let branch = Branch { block: Cell::new(uninit_block), start_addr: self.start_addr.get().unwrap(), end_addr: Cell::new(self.end_addr.get().unwrap()), targets: self.targets, gen_fn: self.gen_fn, }; // Move it to the designated place on // the heap and unwrap MaybeUninit. self.uninit_branch.write(branch); let raw_branch: *mut MaybeUninit = Box::into_raw(self.uninit_branch); let branchref = NonNull::new(raw_branch as *mut Branch).expect("no null from Box"); // SAFETY: just allocated it let branch = unsafe { branchref.as_ref() }; // For block branch targets, put the new branch in the // appropriate incoming list. for target in branch.targets.iter() { // SAFETY: no mutation let out_block: Option = unsafe { target.ref_unchecked().as_ref().and_then(|target| target.get_block()) }; if let Some(out_block) = out_block { // SAFETY: These blockrefs come from set_target() which only puts blocks from // ISeqs, which are all initialized. Note that uninit_block isn't in any ISeq // payload yet. unsafe { out_block.as_ref() }.incoming.push(branchref); } } branch.assert_layout(); incr_counter!(compiled_branch_count); branchref } } // Store info about code used on YJIT entry pub struct Entry { // Positions where the generated code starts and ends start_addr: CodePtr, end_addr: CodePtr, // exclusive } /// A [Branch] for a [Block] that is under construction. pub struct PendingEntry { pub uninit_entry: Box>, start_addr: Cell>, end_addr: Cell>, // exclusive } impl PendingEntry { // Construct the entry in the heap pub fn into_entry(mut self) -> EntryRef { // Make the entry let entry = Entry { start_addr: self.start_addr.get().unwrap(), end_addr: self.end_addr.get().unwrap(), }; // Move it to the designated place on the heap and unwrap MaybeUninit. self.uninit_entry.write(entry); let raw_entry: *mut MaybeUninit = Box::into_raw(self.uninit_entry); NonNull::new(raw_entry as *mut Entry).expect("no null from Box") } } // In case a block is invalidated, this helps to remove all pointers to the block. pub type CmePtr = *const rb_callable_method_entry_t; /// Basic block version /// Represents a portion of an iseq compiled with a given context /// Note: care must be taken to minimize the size of block_t objects #[derive(Debug)] pub struct Block { // The byte code instruction sequence this is a version of. // Can change due to moving GC. iseq: Cell, // Index range covered by this version in `ISEQ_BODY(iseq)->iseq_encoded`. iseq_range: Range, // Context at the start of the block // This should never be mutated ctx: u32, // Positions where the generated code starts and ends start_addr: CodePtr, end_addr: Cell, // List of incoming branches (from predecessors) incoming: MutableBranchList, // List of outgoing branches (to successors) // Infrequently mutated for control flow graph edits for saving memory. outgoing: MutableBranchList, // FIXME: should these be code pointers instead? // Offsets for GC managed objects in the mainline code block gc_obj_offsets: Box<[u32]>, // CME dependencies of this block, to help to remove all pointers to this // block in the system. cme_dependencies: Box<[Cell]>, // Code address of an exit for `ctx` and `blockid`. // Used for block invalidation. entry_exit: Option, } /// Pointer to a [Block]. /// /// # Safety /// /// _Never_ derive a `&mut Block` from this and always use /// [std::ptr::NonNull::as_ref] to get a `&Block`. `&'a mut` /// in Rust asserts that there are no other references live /// over the lifetime `'a`. This uniqueness assertion does /// not hold in many situations for us, even when you ignore /// the fact that our control flow graph can have cycles. /// Here are just two examples where we have overlapping references: /// - Yielding to a different OS thread within the same /// ractor during compilation /// - The GC calling [rb_yjit_iseq_mark] during compilation /// /// Technically, for soundness, we also need to ensure that /// the we have the VM lock while the result of `as_ref()` /// is live, so that no deallocation happens while the /// shared reference is live. The vast majority of our code run while /// holding the VM lock, though. pub type BlockRef = NonNull; /// Pointer to a [Branch]. See [BlockRef] for notes about /// proper usage. pub type BranchRef = NonNull; /// Pointer to an entry that is already added to an ISEQ pub type EntryRef = NonNull; /// List of block versions for a given blockid type VersionList = Vec; /// Map from iseq indices to lists of versions for that given blockid /// An instance of this is stored on each iseq type VersionMap = Vec; /// [Interior mutability][1] wrapper for a list of branches. /// O(n) insertion, but space efficient. We generally expect /// blocks to have only a few branches. /// /// [1]: https://doc.rust-lang.org/std/cell/struct.UnsafeCell.html #[repr(transparent)] struct MutableBranchList(Cell>); impl MutableBranchList { fn push(&self, branch: BranchRef) { // Temporary move the boxed slice out of self. // oom=abort is load bearing here... let mut current_list = self.0.take().into_vec(); current_list.push(branch); self.0.set(current_list.into_boxed_slice()); } /// Iterate through branches in the list by moving out of the cell /// and then putting it back when done. Modifications to this cell /// during iteration will be discarded. /// /// Assumes panic=abort since panic=unwind during iteration would /// leave the cell empty. fn for_each(&self, mut f: impl FnMut(BranchRef)) { let list = self.0.take(); for branch in list.iter() { f(*branch); } self.0.set(list); } /// Length of the list. fn len(&self) -> usize { // SAFETY: No cell mutation inside unsafe. unsafe { self.0.ref_unchecked().len() } } } impl fmt::Debug for MutableBranchList { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { // SAFETY: the derived Clone for boxed slices does not mutate this Cell let branches = unsafe { self.0.ref_unchecked().clone() }; formatter.debug_list().entries(branches.iter()).finish() } } /// This is all the data YJIT stores on an iseq /// This will be dynamically allocated by C code /// C code should pass an &mut IseqPayload to us /// when calling into YJIT #[derive(Default)] pub struct IseqPayload { // Basic block versions pub version_map: VersionMap, // Indexes of code pages used by this ISEQ pub pages: HashSet, // List of ISEQ entry codes pub entries: Vec, // Blocks that are invalidated but are not yet deallocated. // The code GC will free them later. pub dead_blocks: Vec, } impl IseqPayload { /// Remove all block versions from the payload and then return them as an iterator pub fn take_all_blocks(&mut self) -> impl Iterator { // Empty the blocks let version_map = mem::take(&mut self.version_map); // Turn it into an iterator that owns the blocks and return version_map.into_iter().flatten() } } /// Get the payload for an iseq. For safety it's up to the caller to ensure the returned `&mut` /// upholds aliasing rules and that the argument is a valid iseq. pub fn get_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> { let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; let payload: *mut IseqPayload = payload.cast(); unsafe { payload.as_mut() } } /// Get the payload object associated with an iseq. Create one if none exists. pub fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload { type VoidPtr = *mut c_void; let payload_non_null = unsafe { let payload = rb_iseq_get_yjit_payload(iseq); if payload.is_null() { // Increment the compiled iseq count incr_counter!(compiled_iseq_count); // Allocate a new payload with Box and transfer ownership to the GC. // We drop the payload with Box::from_raw when the GC frees the iseq and calls us. // NOTE(alan): Sometimes we read from an iseq without ever writing to it. // We allocate in those cases anyways. let new_payload = IseqPayload::default(); let new_payload = Box::into_raw(Box::new(new_payload)); rb_iseq_set_yjit_payload(iseq, new_payload as VoidPtr); new_payload } else { payload as *mut IseqPayload } }; // SAFETY: we should have the VM lock and all other Ruby threads should be asleep. So we have // exclusive mutable access. // Hmm, nothing seems to stop calling this on the same // iseq twice, though, which violates aliasing rules. unsafe { payload_non_null.as_mut() }.unwrap() } /// Iterate over all existing ISEQs pub fn for_each_iseq(mut callback: F) { unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { // SAFETY: points to the local below let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) }; callback(iseq); } let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; } /// Iterate over all on-stack ISEQs pub fn for_each_on_stack_iseq(mut callback: F) { unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { // SAFETY: points to the local below let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) }; callback(iseq); } let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; unsafe { rb_jit_cont_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; } /// Iterate over all on-stack ISEQ payloads pub fn for_each_on_stack_iseq_payload(mut callback: F) { for_each_on_stack_iseq(|iseq| { if let Some(iseq_payload) = get_iseq_payload(iseq) { callback(iseq_payload); } }); } /// Iterate over all NOT on-stack ISEQ payloads pub fn for_each_off_stack_iseq_payload(mut callback: F) { // Get all ISEQs on the heap. Note that rb_objspace_each_objects() runs GC first, // which could move ISEQ pointers when GC.auto_compact = true. // So for_each_on_stack_iseq() must be called after this, which doesn't run GC. let mut iseqs: Vec = vec![]; for_each_iseq(|iseq| iseqs.push(iseq)); // Get all ISEQs that are on a CFP of existing ECs. let mut on_stack_iseqs: HashSet = HashSet::new(); for_each_on_stack_iseq(|iseq| { on_stack_iseqs.insert(iseq); }); // Invoke the callback for iseqs - on_stack_iseqs for iseq in iseqs { if !on_stack_iseqs.contains(&iseq) { if let Some(iseq_payload) = get_iseq_payload(iseq) { callback(iseq_payload); } } } } /// Free the per-iseq payload #[no_mangle] pub extern "C" fn rb_yjit_iseq_free(iseq: IseqPtr) { // Free invariants for the ISEQ iseq_free_invariants(iseq); let payload = { let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; if payload.is_null() { // Nothing to free. return; } else { payload as *mut IseqPayload } }; // Take ownership of the payload with Box::from_raw(). // It drops right before this function returns. // SAFETY: We got the pointer from Box::into_raw(). let payload = unsafe { Box::from_raw(payload) }; // Free all blocks in version_map. The GC doesn't free running iseqs. for versions in &payload.version_map { for block in versions { // SAFETY: blocks in the version_map are always well connected unsafe { free_block(*block, true) }; } } // Free dead blocks for block in payload.dead_blocks { unsafe { free_block(block, false) }; } // Free all entries for entryref in payload.entries.iter() { let entry = unsafe { Box::from_raw(entryref.as_ptr()) }; mem::drop(entry); } // Increment the freed iseq count incr_counter!(freed_iseq_count); } /// GC callback for marking GC objects in the per-iseq payload. #[no_mangle] pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { let payload = if payload.is_null() { // Nothing to mark. return; } else { // SAFETY: The GC takes the VM lock while marking, which // we assert, so we should be synchronized and data race free. // // For aliasing, having the VM lock hopefully also implies that no one // else has an overlapping &mut IseqPayload. unsafe { rb_yjit_assert_holding_vm_lock(); &*(payload as *const IseqPayload) } }; // For marking VALUEs written into the inline code block. // We don't write VALUEs in the outlined block. let cb: &CodeBlock = CodegenGlobals::get_inline_cb(); for versions in &payload.version_map { for block in versions { // SAFETY: all blocks inside version_map are initialized. let block = unsafe { block.as_ref() }; mark_block(block, cb, false); } } // Mark dead blocks, since there could be stubs pointing at them for blockref in &payload.dead_blocks { // SAFETY: dead blocks come from version_map, which only have initialized blocks let block = unsafe { blockref.as_ref() }; mark_block(block, cb, true); } return; fn mark_block(block: &Block, cb: &CodeBlock, dead: bool) { unsafe { rb_gc_mark_movable(block.iseq.get().into()) }; // Mark method entry dependencies for cme_dep in block.cme_dependencies.iter() { unsafe { rb_gc_mark_movable(cme_dep.get().into()) }; } // Mark outgoing branch entries block.outgoing.for_each(|branch| { let branch = unsafe { branch.as_ref() }; for target in branch.targets.iter() { // SAFETY: no mutation inside unsafe let target_iseq = unsafe { target.ref_unchecked().as_ref().and_then(|target| { // Avoid get_blockid() on blockref. Can be dangling on dead blocks, // and the iseq housing the block already naturally handles it. if target.get_block().is_some() { None } else { Some(target.get_blockid().iseq) } }) }; if let Some(target_iseq) = target_iseq { unsafe { rb_gc_mark_movable(target_iseq.into()) }; } } }); // Mark references to objects in generated code. // Skip for dead blocks since they shouldn't run. if !dead { for offset in block.gc_obj_offsets.iter() { let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_address = value_address as *const VALUE; // SAFETY: these point to YJIT's code buffer unsafe { let object = value_address.read_unaligned(); rb_gc_mark_movable(object); }; } } } } /// GC callback for updating GC objects in the per-iseq payload. /// This is a mirror of [rb_yjit_iseq_mark]. #[no_mangle] pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) { let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; let payload = if payload.is_null() { // Nothing to update. return; } else { // SAFETY: The GC takes the VM lock while marking, which // we assert, so we should be synchronized and data race free. // // For aliasing, having the VM lock hopefully also implies that no one // else has an overlapping &mut IseqPayload. unsafe { rb_yjit_assert_holding_vm_lock(); &*(payload as *const IseqPayload) } }; // Evict other threads from generated code since we are about to patch them. // Also acts as an assert that we hold the VM lock. unsafe { rb_vm_barrier() }; // For updating VALUEs written into the inline code block. let cb = CodegenGlobals::get_inline_cb(); for versions in &payload.version_map { for version in versions { // SAFETY: all blocks inside version_map are initialized let block = unsafe { version.as_ref() }; block_update_references(block, cb, false); } } // Update dead blocks, since there could be stubs pointing at them for blockref in &payload.dead_blocks { // SAFETY: dead blocks come from version_map, which only have initialized blocks let block = unsafe { blockref.as_ref() }; block_update_references(block, cb, true); } // Note that we would have returned already if YJIT is off. cb.mark_all_executable(); CodegenGlobals::get_outlined_cb() .unwrap() .mark_all_executable(); return; fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) { block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq()); // Update method entry dependencies for cme_dep in block.cme_dependencies.iter() { let cur_cme: VALUE = cme_dep.get().into(); let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme(); cme_dep.set(new_cme); } // Update outgoing branch entries block.outgoing.for_each(|branch| { let branch = unsafe { branch.as_ref() }; for target in branch.targets.iter() { // SAFETY: no mutation inside unsafe let current_iseq = unsafe { target.ref_unchecked().as_ref().and_then(|target| { // Avoid get_blockid() on blockref. Can be dangling on dead blocks, // and the iseq housing the block already naturally handles it. if target.get_block().is_some() { None } else { Some(target.get_blockid().iseq) } }) }; if let Some(current_iseq) = current_iseq { let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) } .as_iseq(); // SAFETY: the Cell::set is not on the reference given out // by ref_unchecked. unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) }; } } }); // Update references to objects in generated code. // Skip for dead blocks since they shouldn't run and // so there is no potential of writing over invalidation jumps if !dead { for offset in block.gc_obj_offsets.iter() { let offset_to_value = offset.as_usize(); let value_code_ptr = cb.get_ptr(offset_to_value); let value_ptr: *const u8 = value_code_ptr.raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_ptr = value_ptr as *mut VALUE; // SAFETY: these point to YJIT's code buffer let object = unsafe { value_ptr.read_unaligned() }; let new_addr = unsafe { rb_gc_location(object) }; // Only write when the VALUE moves, to be copy-on-write friendly. if new_addr != object { for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() { let byte_code_ptr = value_code_ptr.add_bytes(byte_idx); cb.write_mem(byte_code_ptr, byte) .expect("patching existing code should be within bounds"); } } } } } } /// Get all blocks for a particular place in an iseq. fn get_version_list(blockid: BlockId) -> Option<&'static mut VersionList> { let insn_idx = blockid.idx.as_usize(); match get_iseq_payload(blockid.iseq) { Some(payload) if insn_idx < payload.version_map.len() => { Some(payload.version_map.get_mut(insn_idx).unwrap()) }, _ => None } } /// Get or create all blocks for a particular place in an iseq. fn get_or_create_version_list(blockid: BlockId) -> &'static mut VersionList { let payload = get_or_create_iseq_payload(blockid.iseq); let insn_idx = blockid.idx.as_usize(); // Expand the version map as necessary if insn_idx >= payload.version_map.len() { payload .version_map .resize(insn_idx + 1, VersionList::default()); } return payload.version_map.get_mut(insn_idx).unwrap(); } /// Take all of the blocks for a particular place in an iseq pub fn take_version_list(blockid: BlockId) -> VersionList { let insn_idx = blockid.idx.as_usize(); match get_iseq_payload(blockid.iseq) { Some(payload) if insn_idx < payload.version_map.len() => { mem::take(&mut payload.version_map[insn_idx]) }, _ => VersionList::default(), } } /// Count the number of block versions that match a given BlockId and part of a Context fn get_num_versions(blockid: BlockId, ctx: &Context) -> usize { let insn_idx = blockid.idx.as_usize(); match get_iseq_payload(blockid.iseq) { // FIXME: this counting logic is going to be expensive. // We should avoid it if possible Some(payload) => { payload .version_map .get(insn_idx) .map(|versions| { versions.iter().filter(|&&version| { let version_ctx = Context::decode(unsafe { version.as_ref() }.ctx); // Inline versions are counted separately towards MAX_INLINE_VERSIONS. version_ctx.inline() == ctx.inline() && // find_block_versions() finds only blocks with compatible reg_mapping, // so count only versions with compatible reg_mapping. version_ctx.reg_mapping == ctx.reg_mapping }).count() }) .unwrap_or(0) } None => 0, } } /// Get or create a list of block versions generated for an iseq /// This is used for disassembly (see disasm.rs) pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec { let payload = get_or_create_iseq_payload(iseq); let mut blocks = Vec::::new(); // For each instruction index for insn_idx in 0..payload.version_map.len() { let version_list = &payload.version_map[insn_idx]; // For each version at this instruction index for version in version_list { // Clone the block ref and add it to the list blocks.push(*version); } } return blocks; } /// Retrieve a basic block version for an (iseq, idx) tuple /// This will return None if no version is found fn find_block_version(blockid: BlockId, ctx: &Context) -> Option { let versions = get_version_list(blockid)?; // Best match found let mut best_version: Option = None; let mut best_diff = usize::MAX; // For each version matching the blockid for blockref in versions.iter() { let block = unsafe { blockref.as_ref() }; let block_ctx = Context::decode(block.ctx); // Note that we always prefer the first matching // version found because of inline-cache chains match ctx.diff(&block_ctx) { TypeDiff::Compatible(diff) if diff < best_diff => { best_version = Some(*blockref); best_diff = diff; } _ => {} } } return best_version; } /// Find the closest RegMapping among ones that have already been compiled. pub fn find_most_compatible_reg_mapping(blockid: BlockId, ctx: &Context) -> Option { let versions = get_version_list(blockid)?; // Best match found let mut best_mapping: Option = None; let mut best_diff = usize::MAX; // For each version matching the blockid for blockref in versions.iter() { let block = unsafe { blockref.as_ref() }; let block_ctx = Context::decode(block.ctx); // Discover the best block that is compatible if we load/spill registers match ctx.diff_allowing_reg_mismatch(&block_ctx) { TypeDiff::Compatible(diff) if diff < best_diff => { best_mapping = Some(block_ctx.get_reg_mapping()); best_diff = diff; } _ => {} } } best_mapping } /// Allow inlining a Block up to MAX_INLINE_VERSIONS times. const MAX_INLINE_VERSIONS: usize = 1000; /// Produce a generic context when the block version limit is hit for a blockid pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context { // Guard chains implement limits separately, do nothing if ctx.get_chain_depth() > 0 { return *ctx; } let next_versions = get_num_versions(blockid, ctx) + 1; let max_versions = if ctx.inline() { MAX_INLINE_VERSIONS } else { get_option!(max_versions) }; // If this block version we're about to add will hit the version limit if next_versions >= max_versions { // Produce a generic context that stores no type information, // but still respects the stack_size and sp_offset constraints. // This new context will then match all future requests. let generic_ctx = ctx.get_generic_ctx(); if cfg!(debug_assertions) { let mut ctx = ctx.clone(); if ctx.inline() { // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible // to keep inlining blocks until we hit the limit, but it's safe to give up inlining. ctx.inline_block = None; assert!(generic_ctx.inline_block == None); } assert_ne!( TypeDiff::Incompatible, ctx.diff(&generic_ctx), "should substitute a compatible context", ); } return generic_ctx; } if ctx.inline() { incr_counter_to!(max_inline_versions, next_versions); } return *ctx; } /// Install a block version into its [IseqPayload], letting the GC track its /// lifetime, and allowing it to be considered for use for other /// blocks we might generate. Uses `cb` for running write barriers. /// /// # Safety /// /// The block must be fully initialized. Its incoming and outgoing edges, /// if there are any, must point to initialized blocks, too. /// /// Note that the block might gain edges after this function returns, /// as can happen during [gen_block_series]. Initialized here doesn't mean /// ready to be consumed or that the machine code tracked by the block is /// ready to be run. /// /// Due to this transient state where a block is tracked by the GC by /// being inside an [IseqPayload] but not ready to be executed, it's /// generally unsound to call any Ruby methods during codegen. That has /// the potential to run blocks which are not ready. unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) { // SAFETY: caller ensures initialization let block = unsafe { blockref.as_ref() }; // Function entry blocks must have stack size 0 debug_assert!(!(block.iseq_range.start == 0 && Context::decode(block.ctx).stack_size > 0)); let version_list = get_or_create_version_list(block.get_blockid()); // If this the first block being compiled with this block id if version_list.len() == 0 { incr_counter!(compiled_blockid_count); } version_list.push(blockref); version_list.shrink_to_fit(); // By writing the new block to the iseq, the iseq now // contains new references to Ruby objects. Run write barriers. let iseq: VALUE = block.iseq.get().into(); for dep in block.iter_cme_deps() { obj_written!(iseq, dep.into()); } // Run write barriers for all objects in generated code. for offset in block.gc_obj_offsets.iter() { let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_address: *const VALUE = value_address.cast(); let object = unsafe { value_address.read_unaligned() }; obj_written!(iseq, object); } incr_counter!(compiled_block_count); if Context::decode(block.ctx).inline() { incr_counter!(inline_block_count); } // Mark code pages for code GC let iseq_payload = get_iseq_payload(block.iseq.get()).unwrap(); for page in cb.addrs_to_pages(block.start_addr, block.end_addr.get()) { iseq_payload.pages.insert(page); } } /// Remove a block version from the version map of its parent ISEQ fn remove_block_version(blockref: &BlockRef) { let block = unsafe { blockref.as_ref() }; let version_list = match get_version_list(block.get_blockid()) { Some(version_list) => version_list, None => return, }; // Retain the versions that are not this one version_list.retain(|other| blockref != other); } impl<'a> JITState<'a> { // Finish compiling and turn a jit state into a block // note that the block is still not in shape. pub fn into_block(self, end_insn_idx: IseqIdx, start_addr: CodePtr, end_addr: CodePtr, gc_obj_offsets: Vec) -> BlockRef { // Allocate the block and get its pointer let blockref: *mut MaybeUninit = Box::into_raw(Box::new(MaybeUninit::uninit())); incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len()); let ctx = Context::encode(&self.get_starting_ctx()); // Make the new block let block = MaybeUninit::new(Block { start_addr, iseq: Cell::new(self.get_iseq()), iseq_range: self.get_starting_insn_idx()..end_insn_idx, ctx, end_addr: Cell::new(end_addr), incoming: MutableBranchList(Cell::default()), gc_obj_offsets: gc_obj_offsets.into_boxed_slice(), entry_exit: self.get_block_entry_exit(), cme_dependencies: self.method_lookup_assumptions.into_iter().map(Cell::new).collect(), // Pending branches => actual branches outgoing: MutableBranchList(Cell::new(self.pending_outgoing.into_iter().map(|pending_out| { let pending_out = Rc::try_unwrap(pending_out) .ok().expect("all PendingBranchRefs should be unique when ready to construct a Block"); pending_out.into_branch(NonNull::new(blockref as *mut Block).expect("no null from Box")) }).collect())) }); // Initialize it on the heap // SAFETY: allocated with Box above unsafe { ptr::write(blockref, block) }; // Block is initialized now. Note that MaybeUnint has the same layout as T. let blockref = NonNull::new(blockref as *mut Block).expect("no null from Box"); // Track all the assumptions the block makes as invariants if self.block_assumes_single_ractor { track_single_ractor_assumption(blockref); } for bop in self.bop_assumptions { track_bop_assumption(blockref, bop); } // SAFETY: just allocated it above for cme in unsafe { blockref.as_ref() }.cme_dependencies.iter() { track_method_lookup_stability_assumption(blockref, cme.get()); } if let Some(idlist) = self.stable_constant_names_assumption { track_stable_constant_names_assumption(blockref, idlist); } for klass in self.no_singleton_class_assumptions { track_no_singleton_class_assumption(blockref, klass); } if self.no_ep_escape { track_no_ep_escape_assumption(blockref, self.iseq); } blockref } } impl Block { pub fn get_blockid(&self) -> BlockId { BlockId { iseq: self.iseq.get(), idx: self.iseq_range.start } } pub fn get_end_idx(&self) -> IseqIdx { self.iseq_range.end } pub fn get_ctx_count(&self) -> usize { let mut count = 1; // block.ctx self.outgoing.for_each(|branch| { // SAFETY: &self implies it's initialized count += unsafe { branch.as_ref() }.get_stub_count(); }); count } #[allow(unused)] pub fn get_start_addr(&self) -> CodePtr { self.start_addr } #[allow(unused)] pub fn get_end_addr(&self) -> CodePtr { self.end_addr.get() } /// Get an immutable iterator over cme dependencies pub fn iter_cme_deps(&self) -> impl Iterator + '_ { self.cme_dependencies.iter().map(Cell::get) } // Push an incoming branch ref and shrink the vector fn push_incoming(&self, branch: BranchRef) { self.incoming.push(branch); } // Compute the size of the block code pub fn code_size(&self) -> usize { (self.end_addr.get().as_offset() - self.start_addr.as_offset()).try_into().unwrap() } } impl Context { pub fn get_stack_size(&self) -> u8 { self.stack_size } pub fn set_stack_size(&mut self, stack_size: u8) { self.stack_size = stack_size; } /// Create a new Context that is compatible with self but doesn't have type information. pub fn get_generic_ctx(&self) -> Context { let mut generic_ctx = Context::default(); generic_ctx.stack_size = self.stack_size; generic_ctx.sp_offset = self.sp_offset; generic_ctx.reg_mapping = self.reg_mapping; if self.is_return_landing() { generic_ctx.set_as_return_landing(); } if self.is_deferred() { generic_ctx.mark_as_deferred(); } generic_ctx } /// Create a new Context instance with a given stack_size and sp_offset adjusted /// accordingly. This is useful when you want to virtually rewind a stack_size for /// generating a side exit while considering past sp_offset changes on gen_save_sp. pub fn with_stack_size(&self, stack_size: u8) -> Context { let mut ctx = *self; ctx.sp_offset -= (ctx.get_stack_size() as isize - stack_size as isize) as i8; ctx.stack_size = stack_size; ctx } pub fn get_sp_offset(&self) -> i8 { self.sp_offset } pub fn set_sp_offset(&mut self, offset: i8) { self.sp_offset = offset; } pub fn get_reg_mapping(&self) -> RegMapping { self.reg_mapping } pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) { self.reg_mapping = reg_mapping; } pub fn get_chain_depth(&self) -> u8 { self.chain_depth } pub fn reset_chain_depth_and_defer(&mut self) { self.chain_depth = 0; self.is_deferred = false; } pub fn increment_chain_depth(&mut self) { if self.get_chain_depth() == CHAIN_DEPTH_MAX { panic!("max block version chain depth reached!"); } self.chain_depth += 1; } pub fn set_as_return_landing(&mut self) { self.is_return_landing = true; } pub fn clear_return_landing(&mut self) { self.is_return_landing = false; } pub fn is_return_landing(&self) -> bool { self.is_return_landing } pub fn mark_as_deferred(&mut self) { self.is_deferred = true; } pub fn is_deferred(&self) -> bool { self.is_deferred } /// Get an operand for the adjusted stack pointer address pub fn sp_opnd(&self, offset: i32) -> Opnd { let offset = (self.sp_offset as i32 + offset) * SIZEOF_VALUE_I32; return Opnd::mem(64, SP, offset); } /// Get an operand for the adjusted environment pointer address using SP register. /// This is valid only when a Binding object hasn't been created for the frame. pub fn ep_opnd(&self, offset: i32) -> Opnd { let ep_offset = self.get_stack_size() as i32 + 1; self.sp_opnd(-ep_offset + offset) } /// Start using a register for a given stack temp or a local. pub fn alloc_reg(&mut self, opnd: RegOpnd) { let mut reg_mapping = self.get_reg_mapping(); if reg_mapping.alloc_reg(opnd) { self.set_reg_mapping(reg_mapping); } } /// Stop using a register for a given stack temp or a local. /// This allows us to reuse the register for a value that we know is dead /// and will no longer be used (e.g. popped stack temp). pub fn dealloc_reg(&mut self, opnd: RegOpnd) { let mut reg_mapping = self.get_reg_mapping(); if reg_mapping.dealloc_reg(opnd) { self.set_reg_mapping(reg_mapping); } } /// Get the type of an instruction operand pub fn get_opnd_type(&self, opnd: YARVOpnd) -> Type { match opnd { SelfOpnd => self.self_type, StackOpnd(idx) => { assert!(idx < self.stack_size); let stack_idx: usize = (self.stack_size - 1 - idx).into(); // If outside of tracked range, do nothing if stack_idx >= MAX_CTX_TEMPS { return Type::Unknown; } let mapping = self.get_temp_mapping(stack_idx); match mapping { MapToSelf => self.self_type, MapToStack(temp_type) => temp_type, MapToLocal(local_idx) => { assert!((local_idx as usize) < MAX_CTX_LOCALS); return self.get_local_type(local_idx.into()); } } } } } /// Get the currently tracked type for a local variable pub fn get_local_type(&self, local_idx: usize) -> Type { if local_idx >= MAX_CTX_LOCALS { Type::Unknown } else { self.local_types[local_idx] } } /// Get the current temp mapping for a given stack slot fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping { assert!(temp_idx < MAX_CTX_TEMPS); self.temp_mapping[temp_idx] } /// Set the current temp mapping for a given stack slot fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) { assert!(temp_idx < MAX_CTX_TEMPS); self.temp_mapping[temp_idx] = mapping; } /// Upgrade (or "learn") the type of an instruction operand /// This value must be compatible and at least as specific as the previously known type. /// If this value originated from self, or an lvar, the learned type will be /// propagated back to its source. pub fn upgrade_opnd_type(&mut self, opnd: YARVOpnd, opnd_type: Type) { // If type propagation is disabled, store no types if get_option!(no_type_prop) { return; } match opnd { SelfOpnd => self.self_type.upgrade(opnd_type), StackOpnd(idx) => { assert!(idx < self.stack_size); let stack_idx = (self.stack_size - 1 - idx) as usize; // If outside of tracked range, do nothing if stack_idx >= MAX_CTX_TEMPS { return; } let mapping = self.get_temp_mapping(stack_idx); match mapping { MapToSelf => self.self_type.upgrade(opnd_type), MapToStack(mut temp_type) => { temp_type.upgrade(opnd_type); self.set_temp_mapping(stack_idx, TempMapping::MapToStack(temp_type)); } MapToLocal(local_idx) => { let idx = local_idx as usize; assert!(idx < MAX_CTX_LOCALS); let mut new_type = self.get_local_type(idx); new_type.upgrade(opnd_type); self.set_local_type(idx, new_type); // Re-attach MapToLocal for this StackOpnd(idx). set_local_type() detaches // all MapToLocal mappings, including the one we're upgrading here. self.set_opnd_mapping(opnd, mapping); } } } } } /* Get both the type and mapping (where the value originates) of an operand. This is can be used with stack_push_mapping or set_opnd_mapping to copy a stack value's type while maintaining the mapping. */ pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> TempMapping { let opnd_type = self.get_opnd_type(opnd); match opnd { SelfOpnd => TempMapping::MapToSelf, StackOpnd(idx) => { assert!(idx < self.stack_size); let stack_idx = (self.stack_size - 1 - idx) as usize; if stack_idx < MAX_CTX_TEMPS { self.get_temp_mapping(stack_idx) } else { // We can't know the source of this stack operand, so we assume it is // a stack-only temporary. type will be UNKNOWN assert!(opnd_type == Type::Unknown); TempMapping::MapToStack(opnd_type) } } } } /// Overwrite both the type and mapping of a stack operand. pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, mapping: TempMapping) { match opnd { SelfOpnd => unreachable!("self always maps to self"), StackOpnd(idx) => { assert!(idx < self.stack_size); let stack_idx = (self.stack_size - 1 - idx) as usize; // If type propagation is disabled, store no types if get_option!(no_type_prop) { return; } // If outside of tracked range, do nothing if stack_idx >= MAX_CTX_TEMPS { return; } self.set_temp_mapping(stack_idx, mapping); } } } /// Set the type of a local variable pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) { // If type propagation is disabled, store no types if get_option!(no_type_prop) { return; } if local_idx >= MAX_CTX_LOCALS { return } // If any values on the stack map to this local we must detach them for mapping_idx in 0..MAX_CTX_TEMPS { let mapping = self.get_temp_mapping(mapping_idx); let tm = match mapping { MapToStack(_) => mapping, MapToSelf => mapping, MapToLocal(idx) => { if idx as usize == local_idx { let local_type = self.get_local_type(local_idx); TempMapping::MapToStack(local_type) } else { TempMapping::MapToLocal(idx) } } }; self.set_temp_mapping(mapping_idx, tm); } // Update the type self.local_types[local_idx] = local_type; } /// Erase local variable type information /// eg: because of a call we can't track pub fn clear_local_types(&mut self) { // When clearing local types we must detach any stack mappings to those // locals. Even if local values may have changed, stack values will not. for mapping_idx in 0..MAX_CTX_TEMPS { let mapping = self.get_temp_mapping(mapping_idx); if let MapToLocal(local_idx) = mapping { let local_idx = local_idx as usize; self.set_temp_mapping(mapping_idx, TempMapping::MapToStack(self.get_local_type(local_idx))); } } // Clear the local types self.local_types = [Type::default(); MAX_CTX_LOCALS]; } /// Return true if the code is inlined by the caller pub fn inline(&self) -> bool { self.inline_block.is_some() } /// Set a block ISEQ given to the Block of this Context pub fn set_inline_block(&mut self, iseq: IseqPtr) { self.inline_block = Some(iseq); } /// Compute a difference score for two context objects pub fn diff(&self, dst: &Context) -> TypeDiff { // Self is the source context (at the end of the predecessor) let src = self; // Can only lookup the first version in the chain if dst.get_chain_depth() != 0 { return TypeDiff::Incompatible; } // Blocks with depth > 0 always produce new versions // Sidechains cannot overlap if src.get_chain_depth() != 0 { return TypeDiff::Incompatible; } if src.is_return_landing() != dst.is_return_landing() { return TypeDiff::Incompatible; } if src.is_deferred() != dst.is_deferred() { return TypeDiff::Incompatible; } if dst.stack_size != src.stack_size { return TypeDiff::Incompatible; } if dst.sp_offset != src.sp_offset { return TypeDiff::Incompatible; } if dst.reg_mapping != src.reg_mapping { return TypeDiff::Incompatible; } // Difference sum let mut diff = 0; // Check the type of self diff += match src.self_type.diff(dst.self_type) { TypeDiff::Compatible(diff) => diff, TypeDiff::Incompatible => return TypeDiff::Incompatible, }; // Check the block to inline if src.inline_block != dst.inline_block { // find_block_version should not find existing blocks with different // inline_block so that their yield will not be megamorphic. return TypeDiff::Incompatible; } // For each local type we track for i in 0.. MAX_CTX_LOCALS { let t_src = src.get_local_type(i); let t_dst = dst.get_local_type(i); diff += match t_src.diff(t_dst) { TypeDiff::Compatible(diff) => diff, TypeDiff::Incompatible => return TypeDiff::Incompatible, }; } // For each value on the temp stack for i in 0..src.stack_size { let src_mapping = src.get_opnd_mapping(StackOpnd(i)); let dst_mapping = dst.get_opnd_mapping(StackOpnd(i)); // If the two mappings aren't the same if src_mapping != dst_mapping { if matches!(dst_mapping, MapToStack(_)) { // We can safely drop information about the source of the temp // stack operand. diff += 1; } else { return TypeDiff::Incompatible; } } let src_type = src.get_opnd_type(StackOpnd(i)); let dst_type = dst.get_opnd_type(StackOpnd(i)); diff += match src_type.diff(dst_type) { TypeDiff::Compatible(diff) => diff, TypeDiff::Incompatible => return TypeDiff::Incompatible, }; } return TypeDiff::Compatible(diff); } /// Basically diff() but allows RegMapping incompatibility that could be fixed by /// spilling, loading, or shuffling registers. pub fn diff_allowing_reg_mismatch(&self, dst: &Context) -> TypeDiff { // We shuffle only RegOpnd::Local and spill any other RegOpnd::Stack. // If dst has RegOpnd::Stack, we can't reuse the block as a callee. for reg_opnd in dst.get_reg_mapping().get_reg_opnds() { if matches!(reg_opnd, RegOpnd::Stack(_)) { return TypeDiff::Incompatible; } } // Prepare a Context with the same registers let mut dst_with_same_regs = dst.clone(); dst_with_same_regs.set_reg_mapping(self.get_reg_mapping()); // Diff registers and other stuff separately, and merge them if let TypeDiff::Compatible(ctx_diff) = self.diff(&dst_with_same_regs) { TypeDiff::Compatible(ctx_diff + self.get_reg_mapping().diff(dst.get_reg_mapping())) } else { TypeDiff::Incompatible } } pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option { if jit.at_compile_target() { let comptime_recv = jit.peek_at_stack(self, 1); let comptime_arg = jit.peek_at_stack(self, 0); return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p()); } let recv_type = self.get_opnd_type(StackOpnd(1)); let arg_type = self.get_opnd_type(StackOpnd(0)); match (recv_type, arg_type) { (Type::Fixnum, Type::Fixnum) => Some(true), (Type::Unknown | Type::UnknownImm, Type::Unknown | Type::UnknownImm) => None, _ => Some(false), } } } impl Assembler { /// Push one new value on the temp stack with an explicit mapping /// Return a pointer to the new stack top pub fn stack_push_mapping(&mut self, mapping: TempMapping) -> Opnd { // If type propagation is disabled, store no types if get_option!(no_type_prop) { return self.stack_push_mapping(mapping.without_type()); } let stack_size: usize = self.ctx.stack_size.into(); // Keep track of the type and mapping of the value if stack_size < MAX_CTX_TEMPS { self.ctx.set_temp_mapping(stack_size, mapping); if let MapToLocal(local_idx) = mapping { assert!((local_idx as usize) < MAX_CTX_LOCALS); } } self.ctx.stack_size += 1; self.ctx.sp_offset += 1; // Allocate a register to the new stack operand let stack_opnd = self.stack_opnd(0); self.alloc_reg(stack_opnd.reg_opnd()); stack_opnd } /// Push one new value on the temp stack /// Return a pointer to the new stack top pub fn stack_push(&mut self, val_type: Type) -> Opnd { return self.stack_push_mapping(TempMapping::MapToStack(val_type)); } /// Push the self value on the stack pub fn stack_push_self(&mut self) -> Opnd { return self.stack_push_mapping(TempMapping::MapToSelf); } /// Push a local variable on the stack pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { if local_idx >= MAX_CTX_LOCALS { return self.stack_push(Type::Unknown); } return self.stack_push_mapping(TempMapping::MapToLocal(local_idx as u8)); } // Pop N values off the stack // Return a pointer to the stack top before the pop operation pub fn stack_pop(&mut self, n: usize) -> Opnd { assert!(n <= self.ctx.stack_size.into()); let top = self.stack_opnd(0); // Clear the types of the popped values for i in 0..n { let idx: usize = (self.ctx.stack_size as usize) - i - 1; if idx < MAX_CTX_TEMPS { self.ctx.set_temp_mapping(idx, TempMapping::MapToStack(Type::Unknown)); } } self.ctx.stack_size -= n as u8; self.ctx.sp_offset -= n as i8; return top; } /// Shift stack temps to remove a Symbol for #send. pub fn shift_stack(&mut self, argc: usize) { assert!(argc < self.ctx.stack_size.into()); let method_name_index = (self.ctx.stack_size as usize) - argc - 1; for i in method_name_index..(self.ctx.stack_size - 1) as usize { if i < MAX_CTX_TEMPS { let next_arg_mapping = if i + 1 < MAX_CTX_TEMPS { self.ctx.get_temp_mapping(i + 1) } else { TempMapping::MapToStack(Type::Unknown) }; self.ctx.set_temp_mapping(i, next_arg_mapping); } } self.stack_pop(1); } /// Get an operand pointing to a slot on the temp stack pub fn stack_opnd(&self, idx: i32) -> Opnd { Opnd::Stack { idx, num_bits: 64, stack_size: self.ctx.stack_size, num_locals: None, // not needed for stack temps sp_offset: self.ctx.sp_offset, reg_mapping: None, // push_insn will set this } } /// Get an operand pointing to a local variable pub fn local_opnd(&self, ep_offset: u32) -> Opnd { let idx = self.ctx.stack_size as i32 + ep_offset as i32; Opnd::Stack { idx, num_bits: 64, stack_size: self.ctx.stack_size, num_locals: Some(self.get_num_locals().unwrap()), // this must exist for locals sp_offset: self.ctx.sp_offset, reg_mapping: None, // push_insn will set this } } } impl BlockId { /// Print Ruby source location for debugging #[cfg(debug_assertions)] #[allow(dead_code)] pub fn dump_src_loc(&self) { unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx as u32) } } } /// See [gen_block_series_body]. This simply counts compilation failures. fn gen_block_series( blockid: BlockId, start_ctx: &Context, ec: EcPtr, cb: &mut CodeBlock, ocb: &mut OutlinedCb, ) -> Option { let result = gen_block_series_body(blockid, start_ctx, ec, cb, ocb); if result.is_none() { incr_counter!(compilation_failure); } result } /// Immediately compile a series of block versions at a starting point and /// return the starting block. fn gen_block_series_body( blockid: BlockId, start_ctx: &Context, ec: EcPtr, cb: &mut CodeBlock, ocb: &mut OutlinedCb, ) -> Option { // Keep track of all blocks compiled in this batch const EXPECTED_BATCH_SIZE: usize = 4; let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE); // Generate code for the first block let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb, true).ok()?; batch.push(first_block); // Keep track of this block version // Add the block version to the VersionMap for this ISEQ unsafe { add_block_version(first_block, cb) }; // Loop variable let mut last_blockref = first_block; loop { // Get the last outgoing branch from the previous block. // SAFETY: No cell mutation inside unsafe. Copying out a BranchRef. let last_branchref: BranchRef = unsafe { let last_block = last_blockref.as_ref(); match last_block.outgoing.0.ref_unchecked().last() { Some(branch) => *branch, None => { break; } // If last block has no branches, stop. } }; let last_branch = unsafe { last_branchref.as_ref() }; incr_counter!(block_next_count); // gen_direct_jump() can request a block to be placed immediately after by // leaving a single target that has a `None` address. // SAFETY: no mutation inside the unsafe block let (requested_blockid, requested_ctx) = unsafe { match (last_branch.targets[0].ref_unchecked(), last_branch.targets[1].ref_unchecked()) { (Some(last_target), None) if last_target.get_address().is_none() => { (last_target.get_blockid(), last_target.get_ctx()) } _ => { // We're done when no fallthrough block is requested break; } } }; // Generate new block using context from the last branch. let requested_ctx = Context::decode(requested_ctx); let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb, false); // If the block failed to compile if result.is_err() { // Remove previously compiled block // versions from the version map for blockref in batch { remove_block_version(&blockref); // SAFETY: block was well connected because it was in a version_map unsafe { free_block(blockref, false) }; } // Stop compiling return None; } let new_blockref = result.unwrap(); // Add the block version to the VersionMap for this ISEQ unsafe { add_block_version(new_blockref, cb) }; // Connect the last branch and the new block last_branch.targets[0].set(Some(Box::new(BranchTarget::Block(new_blockref)))); unsafe { new_blockref.as_ref().incoming.push(last_branchref) }; // Track the block batch.push(new_blockref); // Repeat with newest block last_blockref = new_blockref; } #[cfg(feature = "disasm")] { // If dump_iseq_disasm is active, see if this iseq's location matches the given substring. // If so, we print the new blocks to the console. if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { let iseq_location = iseq_get_location(blockid.iseq, blockid.idx); if iseq_location.contains(substr) { let last_block = unsafe { last_blockref.as_ref() }; let iseq_range = &last_block.iseq_range; println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, iseq_range.start, iseq_range.end); print!("{}", disasm_iseq_insn_range(blockid.iseq, iseq_range.start, iseq_range.end)); } } } Some(first_block) } /// Generate a block version that is an entry point inserted into an iseq /// NOTE: this function assumes that the VM lock has been taken /// If jit_exception is true, compile JIT code for handling exceptions. /// See jit_compile_exception() for details. pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> { // Compute the current instruction index based on the current PC let cfp = unsafe { get_ec_cfp(ec) }; let insn_idx: u16 = unsafe { let ec_pc = get_cfp_pc(cfp); iseq_pc_to_insn_idx(iseq, ec_pc)? }; let stack_size: u8 = unsafe { u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? }; // The entry context makes no assumptions about types let blockid = BlockId { iseq, idx: insn_idx, }; // Get the inline and outlined code blocks let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); let code_ptr = gen_entry_point_body(blockid, stack_size, ec, jit_exception, cb, ocb); cb.mark_all_executable(); ocb.unwrap().mark_all_executable(); code_ptr } fn gen_entry_point_body(blockid: BlockId, stack_size: u8, ec: EcPtr, jit_exception: bool, cb: &mut CodeBlock, ocb: &mut OutlinedCb) -> Option<*const u8> { // Write the interpreter entry prologue. Might be NULL when out of memory. let (code_ptr, reg_mapping) = gen_entry_prologue(cb, ocb, blockid, stack_size, jit_exception)?; // Find or compile a block version let mut ctx = Context::default(); ctx.stack_size = stack_size; ctx.reg_mapping = reg_mapping; let block = match find_block_version(blockid, &ctx) { // If an existing block is found, generate a jump to the block. Some(blockref) => { let mut asm = Assembler::new_without_iseq(); asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); asm.compile(cb, Some(ocb))?; Some(blockref) } // If this block hasn't yet been compiled, generate blocks after the entry guard. None => gen_block_series(blockid, &ctx, ec, cb, ocb), }; match block { // Compilation failed None => { // Trigger code GC. This entry point will be recompiled later. if get_option!(code_gc) { cb.code_gc(ocb); } return None; } // If the block contains no Ruby instructions Some(block) => { let block = unsafe { block.as_ref() }; if block.iseq_range.is_empty() { return None; } } } // Count the number of entry points we compile incr_counter!(compiled_iseq_entry); // Compilation successful and block not empty Some(code_ptr.raw_ptr(cb)) } // Change the entry's jump target from an entry stub to a next entry pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) { let mut asm = Assembler::new_without_iseq(); asm_comment!(asm, "regenerate_entry"); // gen_entry_guard generates cmp + jne. We're rewriting only jne. asm.jne(next_entry.into()); // Move write_pos to rewrite the entry let old_write_pos = cb.get_write_pos(); let old_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr); cb.set_dropped_bytes(false); asm.compile(cb, None).expect("can rewrite existing code"); // Rewind write_pos to the original one assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr); cb.set_pos(old_write_pos); cb.set_dropped_bytes(old_dropped_bytes); } pub type PendingEntryRef = Rc; /// Create a new entry reference for an ISEQ pub fn new_pending_entry() -> PendingEntryRef { let entry = PendingEntry { uninit_entry: Box::new(MaybeUninit::uninit()), start_addr: Cell::new(None), end_addr: Cell::new(None), }; return Rc::new(entry); } c_callable! { /// Generated code calls this function with the SysV calling convention. /// See [gen_entry_stub]. fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 { with_compile_time(|| { with_vm_lock(src_loc!(), || { let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); let addr = entry_stub_hit_body(entry_ptr, ec, cb, ocb) .unwrap_or_else(|| { // Trigger code GC (e.g. no space). // This entry point will be recompiled later. if get_option!(code_gc) { cb.code_gc(ocb); } CodegenGlobals::get_stub_exit_code().raw_ptr(cb) }); cb.mark_all_executable(); ocb.unwrap().mark_all_executable(); addr }) }) } } /// Called by the generated code when an entry stub is executed fn entry_stub_hit_body( entry_ptr: *const c_void, ec: EcPtr, cb: &mut CodeBlock, ocb: &mut OutlinedCb ) -> Option<*const u8> { // Get ISEQ and insn_idx from the current ec->cfp let cfp = unsafe { get_ec_cfp(ec) }; let iseq = unsafe { get_cfp_iseq(cfp) }; let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?; let blockid = BlockId { iseq, idx: insn_idx }; let stack_size: u8 = unsafe { u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? }; // Compile a new entry guard as a next entry let next_entry = cb.get_write_ptr(); let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); let pending_entry = gen_entry_chain_guard(&mut asm, ocb, blockid)?; let reg_mapping = gen_entry_reg_mapping(&mut asm, blockid, stack_size); asm.compile(cb, Some(ocb))?; // Find or compile a block version let mut ctx = Context::default(); ctx.stack_size = stack_size; ctx.reg_mapping = reg_mapping; let blockref = match find_block_version(blockid, &ctx) { // If an existing block is found, generate a jump to the block. Some(blockref) => { let mut asm = Assembler::new_without_iseq(); asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); asm.compile(cb, Some(ocb))?; Some(blockref) } // If this block hasn't yet been compiled, generate blocks after the entry guard. None => gen_block_series(blockid, &ctx, ec, cb, ocb), }; // Commit or retry the entry if blockref.is_some() { // Regenerate the previous entry let entryref = NonNull::::new(entry_ptr as *mut Entry).expect("Entry should not be null"); regenerate_entry(cb, &entryref, next_entry); // Write an entry to the heap and push it to the ISEQ let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique"); get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry()); } // Return a code pointer if the block is successfully compiled. The entry stub needs // to jump to the entry preceding the block to load the registers in reg_mapping. blockref.map(|_block| next_entry.raw_ptr(cb)) } /// Generate a stub that calls entry_stub_hit pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); let mut asm = Assembler::new_without_iseq(); asm_comment!(asm, "entry stub hit"); asm.mov(C_ARG_OPNDS[0], entry_address.into()); // Jump to trampoline to call entry_stub_hit() // Not really a side exit, just don't need a padded jump here. asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit()); asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } /// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so /// it's useful for Code GC to call entry_stub_hit from a globally shared code. pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); let mut asm = Assembler::new_without_iseq(); // See gen_entry_guard for how it's used. asm_comment!(asm, "entry_stub_hit() trampoline"); let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]); // Jump to the address returned by the entry_stub_hit() call asm.jmp_opnd(jump_addr); asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } /// Generate code for a branch, possibly rewriting and changing the size of it fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) { // Remove old comments cb.remove_comments(branch.start_addr, branch.end_addr.get()); // SAFETY: having a &Branch implies branch.block is initialized. let block = unsafe { branch.block.get().as_ref() }; let branch_terminates_block = branch.end_addr.get() == block.get_end_addr(); // Generate the branch let mut asm = Assembler::new_without_iseq(); asm_comment!(asm, "regenerate_branch"); branch.gen_fn.call( &mut asm, Target::CodePtr(branch.get_target_address(0).unwrap()), branch.get_target_address(1).map(|addr| Target::CodePtr(addr)), ); // If the entire block is the branch and the block could be invalidated, // we need to pad to ensure there is room for invalidation patching. if branch.start_addr == block.start_addr && branch_terminates_block && block.entry_exit.is_some() { asm.pad_inval_patch(); } // Rewrite the branch let old_write_pos = cb.get_write_pos(); let old_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(branch.start_addr); cb.set_dropped_bytes(false); asm.compile(cb, None).expect("can rewrite existing code"); let new_end_addr = cb.get_write_ptr(); branch.end_addr.set(new_end_addr); // The block may have shrunk after the branch is rewritten if branch_terminates_block { // Adjust block size block.end_addr.set(new_end_addr); } // cb.write_pos is both a write cursor and a marker for the end of // everything written out so far. Leave cb->write_pos at the end of the // block before returning. This function only ever bump or retain the end // of block marker since that's what the majority of callers want. When the // branch sits at the very end of the codeblock and it shrinks after // regeneration, it's up to the caller to drop bytes off the end to // not leave a gap and implement branch->shape. if old_write_pos > cb.get_write_pos() { // We rewound cb->write_pos to generate the branch, now restore it. cb.set_pos(old_write_pos); cb.set_dropped_bytes(old_dropped_bytes); } else { // The branch sits at the end of cb and consumed some memory. // Keep cb.write_pos. } branch.assert_layout(); } pub type PendingBranchRef = Rc; /// Create a new outgoing branch entry for a block fn new_pending_branch(jit: &mut JITState, gen_fn: BranchGenFn) -> PendingBranchRef { let branch = Rc::new(PendingBranch { uninit_branch: Box::new(MaybeUninit::uninit()), gen_fn, start_addr: Cell::new(None), end_addr: Cell::new(None), targets: [Cell::new(None), Cell::new(None)], }); // Add to the list of outgoing branches for the block jit.queue_outgoing_branch(branch.clone()); branch } c_callable! { /// Generated code calls this function with the SysV calling convention. /// See [gen_branch_stub]. fn branch_stub_hit( branch_ptr: *const c_void, target_idx: u32, ec: EcPtr, ) -> *const u8 { with_vm_lock(src_loc!(), || { with_compile_time(|| { branch_stub_hit_body(branch_ptr, target_idx, ec) }) }) } } /// Called by the generated code when a branch stub is executed /// Triggers compilation of branches and code patching fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -> *const u8 { if get_option!(dump_insns) { println!("branch_stub_hit"); } let branch_ref = NonNull::::new(branch_ptr as *mut Branch) .expect("Branches should not be null"); // SAFETY: We have the VM lock, and the branch is initialized by the time generated // code calls this function. // // Careful, don't make a `&Block` from `branch.block` here because we might // delete it later in delete_empty_defer_block(). let branch = unsafe { branch_ref.as_ref() }; let branch_size_on_entry = branch.code_size(); let target_idx: usize = target_idx.as_usize(); let target_branch_shape = match target_idx { 0 => BranchShape::Next0, 1 => BranchShape::Next1, _ => unreachable!("target_idx < 2 must always hold"), }; let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); let (target_blockid, target_ctx): (BlockId, Context) = unsafe { // SAFETY: no mutation of the target's Cell. Just reading out data. let target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); // If this branch has already been patched, return the dst address // Note: recursion can cause the same stub to be hit multiple times if let BranchTarget::Block(_) = target.as_ref() { return target.get_address().unwrap().raw_ptr(cb); } let target_ctx = Context::decode(target.get_ctx()); (target.get_blockid(), target_ctx) }; let (cfp, original_interp_sp) = unsafe { let cfp = get_ec_cfp(ec); let original_interp_sp = get_cfp_sp(cfp); let running_iseq = get_cfp_iseq(cfp); assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq"); let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx.into()); let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into()); // Unlike in the interpreter, our `leave` doesn't write to the caller's // SP -- we do it in the returned-to code. Account for this difference. let reconned_sp = reconned_sp.add(target_ctx.is_return_landing().into()); // Update the PC in the current CFP, because it may be out of sync in JITted code rb_set_cfp_pc(cfp, reconned_pc); // :stub-sp-flush: // Generated code do stack operations without modifying cfp->sp, while the // cfp->sp tells the GC what values on the stack to root. Generated code // generally takes care of updating cfp->sp when it calls runtime routines that // could trigger GC, but it's inconvenient to do it before calling this function. // So we do it here instead. rb_set_cfp_sp(cfp, reconned_sp); // Bail if code GC is disabled and we've already run out of spaces. if !get_option!(code_gc) && (cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes()) { return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); } // Bail if we're about to run out of native stack space. // We've just reconstructed interpreter state. if rb_ec_stack_check(ec as _) != 0 { return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); } (cfp, original_interp_sp) }; // Try to find an existing compiled version of this block let mut block = find_block_version(target_blockid, &target_ctx); let mut branch_modified = false; // If this block hasn't yet been compiled if block.is_none() { let branch_old_shape = branch.gen_fn.get_shape(); // If the new block can be generated right after the branch (at cb->write_pos) if cb.get_write_ptr() == branch.end_addr.get() { // This branch should be terminating its block assert!(branch.end_addr == unsafe { branch.block.get().as_ref() }.end_addr); // Change the branch shape to indicate the target block will be placed next branch.gen_fn.set_shape(target_branch_shape); // Rewrite the branch with the new, potentially more compact shape regenerate_branch(cb, branch); branch_modified = true; // Ensure that the branch terminates the codeblock just like // before entering this if block. This drops bytes off the end // in case we shrank the branch when regenerating. cb.set_write_ptr(branch.end_addr.get()); } // Compile the new block version block = gen_block_series(target_blockid, &target_ctx, ec, cb, ocb); if block.is_none() && branch_modified { // We couldn't generate a new block for the branch, but we modified the branch. // Restore the branch by regenerating it. branch.gen_fn.set_shape(branch_old_shape); regenerate_branch(cb, branch); } } // Finish building the new block let dst_addr = match block { Some(new_block) => { let new_block = unsafe { new_block.as_ref() }; // Branch shape should reflect layout assert!(!(branch.gen_fn.get_shape() == target_branch_shape && new_block.start_addr != branch.end_addr.get())); // When block housing this branch is empty, try to free it delete_empty_defer_block(branch, new_block, target_ctx, target_blockid); // Add this branch to the list of incoming branches for the target new_block.push_incoming(branch_ref); // Update the branch target address branch.targets[target_idx].set(Some(Box::new(BranchTarget::Block(new_block.into())))); // Rewrite the branch with the new jump target address regenerate_branch(cb, branch); // Restore interpreter sp, since the code hitting the stub expects the original. unsafe { rb_set_cfp_sp(cfp, original_interp_sp) }; new_block.start_addr } None => { // Trigger code GC. The whole ISEQ will be recompiled later. // We shouldn't trigger it in the middle of compilation in branch_stub_hit // because incomplete code could be used when cb.dropped_bytes is flipped // by code GC. So this place, after all compilation, is the safest place // to hook code GC on branch_stub_hit. if get_option!(code_gc) { cb.code_gc(ocb); } // Failed to service the stub by generating a new block so now we // need to exit to the interpreter at the stubbed location. We are // intentionally *not* restoring original_interp_sp. At the time of // writing, reconstructing interpreter state only involves setting // cfp->sp and cfp->pc. We set both before trying to generate the // block. All there is left to do to exit is to pop the native // frame. We do that in code_for_exit_from_stub. CodegenGlobals::get_stub_exit_code() } }; ocb.unwrap().mark_all_executable(); cb.mark_all_executable(); let new_branch_size = branch.code_size(); assert!( new_branch_size <= branch_size_on_entry, "branch stubs should never enlarge branches (start_addr: {:?}, old_size: {}, new_size: {})", branch.start_addr.raw_ptr(cb), branch_size_on_entry, new_branch_size, ); // Return a pointer to the compiled block version dst_addr.raw_ptr(cb) } /// Part of branch_stub_hit(). /// If we've hit a deferred branch, and the housing block consists solely of the branch, rewire /// incoming branches to the new block and delete the housing block. fn delete_empty_defer_block(branch: &Branch, new_block: &Block, target_ctx: Context, target_blockid: BlockId) { // This &Block should be unique, relying on the VM lock let housing_block: &Block = unsafe { branch.block.get().as_ref() }; if target_ctx.is_deferred() && target_blockid == housing_block.get_blockid() && housing_block.outgoing.len() == 1 && { // The block is empty when iseq_range is one instruction long. let range = &housing_block.iseq_range; let iseq = housing_block.iseq.get(); let start_opcode = iseq_opcode_at_idx(iseq, range.start.into()) as usize; let empty_end = range.start + insn_len(start_opcode) as IseqIdx; range.end == empty_end } { // Divert incoming branches of housing_block to the new block housing_block.incoming.for_each(|incoming| { let incoming = unsafe { incoming.as_ref() }; for target in 0..incoming.targets.len() { // SAFETY: No cell mutation; copying out a BlockRef. if Some(BlockRef::from(housing_block)) == unsafe { incoming.targets[target] .ref_unchecked() .as_ref() .and_then(|target| target.get_block()) } { incoming.targets[target].set(Some(Box::new(BranchTarget::Block(new_block.into())))); } } new_block.push_incoming(incoming.into()); }); // Transplant the branch we've just hit to the new block mem::drop(housing_block.outgoing.0.take()); new_block.outgoing.push(branch.into()); let housing_block: BlockRef = branch.block.replace(new_block.into()); // Free the old housing block; there should now be no live &Block. remove_block_version(&housing_block); unsafe { free_block(housing_block, false) }; incr_counter!(deleted_defer_block_count); } } /// Generate a "stub", a piece of code that calls the compiler back when run. /// A piece of code that redeems for more code; a thunk for code. fn gen_branch_stub( ctx: u32, iseq: IseqPtr, ocb: &mut OutlinedCb, branch_struct_address: usize, target_idx: u32, ) -> Option { let ocb = ocb.unwrap(); let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); asm.ctx = Context::decode(ctx); asm.set_reg_mapping(asm.ctx.reg_mapping); asm_comment!(asm, "branch stub hit"); if asm.ctx.is_return_landing() { asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); let top = asm.stack_push(Type::Unknown); asm.mov(top, C_RET_OPND); } // Save caller-saved registers before C_ARG_OPNDS get clobbered. // Spill all registers for consistency with the trampoline. for ® in caller_saved_temp_regs() { asm.cpush(Opnd::Reg(reg)); } // Spill temps to the VM stack as well for jit.peek_at_stack() asm.spill_regs(); // Set up the arguments unique to this stub for: // // branch_stub_hit(branch_ptr, target_idx, ec) // // Bake pointer to Branch into output code. // We make sure the block housing the branch is still alive when branch_stub_hit() is running. asm.mov(C_ARG_OPNDS[0], branch_struct_address.into()); asm.mov(C_ARG_OPNDS[1], target_idx.into()); // Jump to trampoline to call branch_stub_hit() // Not really a side exit, just don't need a padded jump here. asm.jmp(CodegenGlobals::get_branch_stub_hit_trampoline().as_side_exit()); asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); let mut asm = Assembler::new_without_iseq(); // For `branch_stub_hit(branch_ptr, target_idx, ec)`, // `branch_ptr` and `target_idx` is different for each stub, // but the call and what's after is the same. This trampoline // is the unchanging part. // Since this trampoline is static, it allows code GC inside // branch_stub_hit() to free stubs without problems. asm_comment!(asm, "branch_stub_hit() trampoline"); let stub_hit_ret = asm.ccall( branch_stub_hit as *mut u8, vec![ C_ARG_OPNDS[0], C_ARG_OPNDS[1], EC, ] ); let jump_addr = asm.load(stub_hit_ret); // Restore caller-saved registers for stack temps for ® in caller_saved_temp_regs().rev() { asm.cpop_into(Opnd::Reg(reg)); } // Jump to the address returned by the branch_stub_hit() call asm.jmp_opnd(jump_addr); // HACK: popping into C_RET_REG clobbers the return value of branch_stub_hit() we need to jump // to, so we need a scratch register to preserve it. This extends the live range of the C // return register so we get something else for the return value. let _ = asm.live_reg_opnd(stub_hit_ret); asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } /// Return registers to be pushed and popped on branch_stub_hit. pub fn caller_saved_temp_regs() -> impl Iterator + DoubleEndedIterator { let temp_regs = Assembler::get_temp_regs().iter(); let len = temp_regs.len(); // The return value gen_leave() leaves in C_RET_REG // needs to survive the branch_stub_hit() call. let regs = temp_regs.chain(std::iter::once(&C_RET_REG)); // On x86_64, maintain 16-byte stack alignment if cfg!(target_arch = "x86_64") && len % 2 == 0 { static ONE_MORE: [Reg; 1] = [C_RET_REG]; regs.chain(ONE_MORE.iter()) } else { regs.chain(&[]) } } impl Assembler { /// Mark the start position of a patchable entry point in the machine code pub fn mark_entry_start(&mut self, entryref: &PendingEntryRef) { // We need to create our own entry rc object // so that we can move the closure below let entryref = entryref.clone(); self.pos_marker(move |code_ptr, _| { entryref.start_addr.set(Some(code_ptr)); }); } /// Mark the end position of a patchable entry point in the machine code pub fn mark_entry_end(&mut self, entryref: &PendingEntryRef) { // We need to create our own entry rc object // so that we can move the closure below let entryref = entryref.clone(); self.pos_marker(move |code_ptr, _| { entryref.end_addr.set(Some(code_ptr)); }); } // Mark the start position of a patchable branch in the machine code fn mark_branch_start(&mut self, branchref: &PendingBranchRef) { // We need to create our own branch rc object // so that we can move the closure below let branchref = branchref.clone(); self.pos_marker(move |code_ptr, _| { branchref.start_addr.set(Some(code_ptr)); }); } // Mark the end position of a patchable branch in the machine code fn mark_branch_end(&mut self, branchref: &PendingBranchRef) { // We need to create our own branch rc object // so that we can move the closure below let branchref = branchref.clone(); self.pos_marker(move |code_ptr, _| { branchref.end_addr.set(Some(code_ptr)); }); } } #[must_use] pub fn gen_branch( jit: &mut JITState, asm: &mut Assembler, target0: BlockId, ctx0: &Context, target1: Option, ctx1: Option<&Context>, gen_fn: BranchGenFn, ) -> Option<()> { let branch = new_pending_branch(jit, gen_fn); // Get the branch targets or stubs let target0_addr = branch.set_target(0, target0, ctx0, jit)?; let target1_addr = if let Some(ctx) = ctx1 { let addr = branch.set_target(1, target1.unwrap(), ctx, jit); if addr.is_none() { // target1 requested but we're out of memory. // Avoid unwrap() in gen_fn() return None; } addr } else { None }; // Call the branch generation function asm.mark_branch_start(&branch); branch.gen_fn.call(asm, Target::CodePtr(target0_addr), target1_addr.map(|addr| Target::CodePtr(addr))); asm.mark_branch_end(&branch); Some(()) } pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) { let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); let maybe_block = find_block_version(target0, ctx); // If the block already exists let new_target = if let Some(blockref) = maybe_block { let block = unsafe { blockref.as_ref() }; let block_addr = block.start_addr; // Call the branch generation function asm_comment!(asm, "gen_direct_jmp: existing block"); asm.mark_branch_start(&branch); branch.gen_fn.call(asm, Target::CodePtr(block_addr), None); asm.mark_branch_end(&branch); BranchTarget::Block(blockref) } else { // The branch is effectively empty (a noop) asm_comment!(asm, "gen_direct_jmp: fallthrough"); asm.mark_branch_start(&branch); asm.mark_branch_end(&branch); branch.gen_fn.set_shape(BranchShape::Next0); // `None` in new_target.address signals gen_block_series() to // compile the target block right after this one (fallthrough). BranchTarget::Stub(Box::new(BranchStub { address: None, ctx: Context::encode(ctx), iseq: Cell::new(target0.iseq), iseq_idx: target0.idx, })) }; branch.targets[0].set(Some(Box::new(new_target))); } /// Create a stub to force the code up to this point to be executed pub fn defer_compilation(jit: &mut JITState, asm: &mut Assembler) -> Result<(), ()> { if asm.ctx.is_deferred() { panic!("Double defer!"); } let mut next_ctx = asm.ctx; next_ctx.mark_as_deferred(); let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); let blockid = BlockId { iseq: jit.get_iseq(), idx: jit.get_insn_idx(), }; // Likely a stub since the context is marked as deferred(). let dst_addr = branch.set_target(0, blockid, &next_ctx, jit).ok_or(())?; // Pad the block if it has the potential to be invalidated. This must be // done before gen_fn() in case the jump is overwritten by a fallthrough. if jit.block_entry_exit.is_some() { asm.pad_inval_patch(); } // Call the branch generation function asm_comment!(asm, "defer_compilation"); asm.mark_branch_start(&branch); branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None); asm.mark_branch_end(&branch); // If the block we're deferring from is empty if jit.get_starting_insn_idx() == jit.get_insn_idx() { incr_counter!(defer_empty_count); } incr_counter!(defer_count); Ok(()) } /// Remove a block from the live control flow graph. /// Block must be initialized and incoming/outgoing edges /// must also point to initialized blocks. unsafe fn remove_from_graph(blockref: BlockRef) { let block = unsafe { blockref.as_ref() }; // Remove this block from the predecessor's targets for pred_branchref in block.incoming.0.take().iter() { // Branch from the predecessor to us let pred_branch = unsafe { pred_branchref.as_ref() }; // If this is us, nullify the target block for target_idx in 0..pred_branch.targets.len() { // SAFETY: no mutation inside unsafe let target_is_us = unsafe { pred_branch.targets[target_idx] .ref_unchecked() .as_ref() .and_then(|target| target.get_block()) .and_then(|target_block| (target_block == blockref).then(|| ())) .is_some() }; if target_is_us { pred_branch.targets[target_idx].set(None); } } } // For each outgoing branch block.outgoing.for_each(|out_branchref| { let out_branch = unsafe { out_branchref.as_ref() }; // For each successor block for out_target in out_branch.targets.iter() { // SAFETY: copying out an Option. No mutation. let succ_block: Option = unsafe { out_target.ref_unchecked().as_ref().and_then(|target| target.get_block()) }; if let Some(succ_block) = succ_block { // Remove outgoing branch from the successor's incoming list // SAFETY: caller promises the block has valid outgoing edges. let succ_block = unsafe { succ_block.as_ref() }; // Temporarily move out of succ_block.incoming. let succ_incoming = succ_block.incoming.0.take(); let mut succ_incoming = succ_incoming.into_vec(); succ_incoming.retain(|branch| *branch != out_branchref); succ_block.incoming.0.set(succ_incoming.into_boxed_slice()); // allocs. Rely on oom=abort } } }); } /// Tear down a block and deallocate it. /// Caller has to ensure that the code tracked by the block is not /// running, as running code may hit [branch_stub_hit] who expects /// [Branch] to be live. /// /// We currently ensure this through the `jit_cont` system in cont.c /// and sometimes through the GC calling [rb_yjit_iseq_free]. The GC /// has proven that an ISeq is not running if it calls us to free it. /// /// For delayed deallocation, since dead blocks don't keep /// blocks they refer alive, by the time we get here their outgoing /// edges may be dangling. Pass `graph_intact=false` such these cases. pub unsafe fn free_block(blockref: BlockRef, graph_intact: bool) { // Careful with order here. // First, remove all pointers to the referent block unsafe { block_assumptions_free(blockref); if graph_intact { remove_from_graph(blockref); } } // SAFETY: we should now have a unique pointer to the block unsafe { dealloc_block(blockref) } } /// Deallocate a block and its outgoing branches. Blocks own their outgoing branches. /// Caller must ensure that we have unique ownership for the referent block unsafe fn dealloc_block(blockref: BlockRef) { unsafe { for outgoing in blockref.as_ref().outgoing.0.take().iter() { // this Box::from_raw matches the Box::into_raw from PendingBranch::into_branch mem::drop(Box::from_raw(outgoing.as_ptr())); } } // Deallocate the referent Block unsafe { // this Box::from_raw matches the Box::into_raw from JITState::into_block mem::drop(Box::from_raw(blockref.as_ptr())); } } // Some runtime checks for integrity of a program location pub fn verify_blockid(blockid: BlockId) { unsafe { assert!(rb_IMEMO_TYPE_P(blockid.iseq.into(), imemo_iseq) != 0); assert!(u32::from(blockid.idx) < get_iseq_encoded_size(blockid.iseq)); } } // Invalidate one specific block version pub fn invalidate_block_version(blockref: &BlockRef) { //ASSERT_vm_locking(); // TODO: want to assert that all other ractors are stopped here. Can't patch // machine code that some other thread is running. let block = unsafe { (*blockref).as_ref() }; let id_being_invalidated = block.get_blockid(); let mut cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); verify_blockid(id_being_invalidated); #[cfg(feature = "disasm")] { // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated. if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { let iseq_range = &block.iseq_range; let iseq_location = iseq_get_location(block.iseq.get(), iseq_range.start); if iseq_location.contains(substr) { println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, iseq_range.start, iseq_range.end); } } } // Remove this block from the version array remove_block_version(blockref); // Get a pointer to the generated code for this block let block_start = block.start_addr; // Make the start of the block do an exit. This handles OOM situations // and some cases where we can't efficiently patch incoming branches. // Do this first, since in case there is a fallthrough branch into this // block, the patching loop below can overwrite the start of the block. // In those situations, there is hopefully no jumps to the start of the block // after patching as the start of the block would be in the middle of something // generated by branch_t::gen_fn. let block_entry_exit = block .entry_exit .expect("invalidation needs the entry_exit field"); { let block_end = block.get_end_addr(); if block_start == block_entry_exit { // Some blocks exit on entry. Patching a jump to the entry at the // entry makes an infinite loop. } else { // Patch in a jump to block.entry_exit. let cur_pos = cb.get_write_ptr(); let cur_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(block_start); let mut asm = Assembler::new_without_iseq(); asm.jmp(block_entry_exit.as_side_exit()); cb.set_dropped_bytes(false); asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code"); assert!( cb.get_write_ptr() <= block_end, "invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})", block.code_size(), cb.get_write_ptr().as_offset() - block_start.as_offset(), block.start_addr.raw_ptr(cb), ); cb.set_write_ptr(cur_pos); cb.set_dropped_bytes(cur_dropped_bytes); } } // For each incoming branch let mut incoming_branches = block.incoming.0.take(); // An adjacent branch will write into the start of the block being invalidated, possibly // overwriting the block's exit. If we run out of memory after doing this, any subsequent // incoming branches we rewrite won't be able use the block's exit as a fallback when they // are unable to generate a stub. To avoid this, if there's an incoming branch that's // adjacent to the invalidated block, make sure we process it last. let adjacent_branch_idx = incoming_branches.iter().position(|branchref| { let branch = unsafe { branchref.as_ref() }; let target_next = block.start_addr == branch.end_addr.get(); target_next }); if let Some(adjacent_branch_idx) = adjacent_branch_idx { incoming_branches.swap(adjacent_branch_idx, incoming_branches.len() - 1) } for (i, branchref) in incoming_branches.iter().enumerate() { let branch = unsafe { branchref.as_ref() }; let target_idx = if branch.get_target_address(0) == Some(block_start) { 0 } else { 1 }; // Assert that the incoming branch indeed points to the block being invalidated // SAFETY: no mutation. unsafe { let incoming_target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); assert_eq!(Some(block_start), incoming_target.get_address()); if let Some(incoming_block) = &incoming_target.get_block() { assert_eq!(blockref, incoming_block); } } // Create a stub for this branch target let stub_addr = gen_branch_stub(block.ctx, block.iseq.get(), ocb, branchref.as_ptr() as usize, target_idx as u32); // In case we were unable to generate a stub (e.g. OOM). Use the block's // exit instead of a stub for the block. It's important that we // still patch the branch in this situation so stubs are unique // to branches. Think about what could go wrong if we run out of // memory in the middle of this loop. let stub_addr = stub_addr.unwrap_or(block_entry_exit); // Fill the branch target with a stub branch.targets[target_idx].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { address: Some(stub_addr), iseq: block.iseq.clone(), iseq_idx: block.iseq_range.start, ctx: block.ctx, }))))); // Check if the invalidated block immediately follows let target_next = block.start_addr == branch.end_addr.get(); if target_next { if stub_addr != block.start_addr { // The new block will no longer be adjacent. // Note that we could be enlarging the branch and writing into the // start of the block being invalidated. branch.gen_fn.set_shape(BranchShape::Default); } else { // The branch target is still adjacent, so the branch must remain // a fallthrough so we don't overwrite the target with a jump. // // This can happen if we're unable to generate a stub and the // target block also exits on entry (block_start == block_entry_exit). } } // Rewrite the branch with the new jump target address let old_branch_size = branch.code_size(); regenerate_branch(cb, branch); if target_next && branch.end_addr > block.end_addr { panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size()); } let is_last_incoming_branch = i == incoming_branches.len() - 1; if target_next && branch.end_addr.get() > block_entry_exit && !is_last_incoming_branch { // We might still need to jump to this exit if we run out of memory when rewriting another incoming branch. panic!("yjit invalidate rewrote branch over exit of invalidated block: {:?}", branch); } if !target_next && branch.code_size() > old_branch_size { panic!( "invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})", branch.start_addr.raw_ptr(cb), old_branch_size, branch.code_size() ); } } // Clear out the JIT func so that we can recompile later and so the // interpreter will run the iseq. // // Only clear the jit_func when we're invalidating the JIT entry block. // We only support compiling iseqs from index 0 right now. So entry // points will always have an instruction index of 0. We'll need to // change this in the future when we support optional parameters because // they enter the function with a non-zero PC if block.iseq_range.start == 0 { // TODO: // We could reset the exec counter to zero in rb_iseq_reset_jit_func() // so that we eventually compile a new entry point when useful unsafe { rb_iseq_reset_jit_func(block.iseq.get()) }; } // FIXME: // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub. // SAFETY: This block was in a version_map earlier // in this function before we removed it, so it's well connected. unsafe { remove_from_graph(*blockref) }; delayed_deallocation(*blockref); ocb.unwrap().mark_all_executable(); cb.mark_all_executable(); incr_counter!(invalidation_count); } // We cannot deallocate blocks immediately after invalidation since there // could be stubs waiting to access branch pointers. Return stubs can do // this since patching the code for setting up return addresses does not // affect old return addresses that are already set up to use potentially // invalidated branch pointers. Example: // def foo(n) // if n == 2 // # 1.times.each to create a cfunc frame to preserve the JIT frame // # which will return to a stub housed in an invalidated block // return 1.times.each { Object.define_method(:foo) {} } // end // // foo(n + 1) // end // p foo(1) pub fn delayed_deallocation(blockref: BlockRef) { block_assumptions_free(blockref); let payload = get_iseq_payload(unsafe { blockref.as_ref() }.iseq.get()).unwrap(); payload.dead_blocks.push(blockref); } trait RefUnchecked { type Contained; unsafe fn ref_unchecked(&self) -> &Self::Contained; } impl RefUnchecked for Cell { type Contained = T; /// Gives a reference to the contents of a [Cell]. /// Dangerous; please include a SAFETY note. /// /// An easy way to use this without triggering Undefined Behavior is to /// 1. ensure there is transitively no Cell/UnsafeCell mutation in the `unsafe` block /// 2. ensure the `unsafe` block does not return any references, so our /// analysis is lexically confined. This is trivially true if the block /// returns a `bool`, for example. Aggregates that store references have /// explicit lifetime parameters that look like `<'a>`. /// /// There are other subtler situations that don't follow these rules yet /// are still sound. /// See `test_miri_ref_unchecked()` for examples. You can play with it /// with `cargo +nightly miri test miri`. unsafe fn ref_unchecked(&self) -> &Self::Contained { // SAFETY: pointer is dereferenceable because it's from a &Cell. // It's up to the caller to follow aliasing rules with the output // reference. unsafe { self.as_ptr().as_ref().unwrap() } } } #[cfg(test)] mod tests { use crate::core::*; #[test] fn type_size() { // Check that we can store types in 4 bits, // and all local types in 32 bits assert_eq!(mem::size_of::(), 1); assert!(Type::BlockParamProxy as usize <= 0b1111); assert!(MAX_CTX_LOCALS * 4 <= 32); } #[test] fn local_types() { let mut ctx = Context::default(); for i in 0..MAX_CTX_LOCALS { ctx.set_local_type(i, Type::Fixnum); assert_eq!(ctx.get_local_type(i), Type::Fixnum); ctx.set_local_type(i, Type::BlockParamProxy); assert_eq!(ctx.get_local_type(i), Type::BlockParamProxy); } ctx.set_local_type(0, Type::Fixnum); ctx.clear_local_types(); assert!(ctx.get_local_type(0) == Type::Unknown); // Make sure we don't accidentally set bits incorrectly let mut ctx = Context::default(); ctx.set_local_type(0, Type::Fixnum); assert_eq!(ctx.get_local_type(0), Type::Fixnum); ctx.set_local_type(2, Type::Fixnum); ctx.set_local_type(1, Type::BlockParamProxy); assert_eq!(ctx.get_local_type(0), Type::Fixnum); assert_eq!(ctx.get_local_type(2), Type::Fixnum); } #[test] fn types() { // Valid src => dst assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0)); assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), TypeDiff::Compatible(0)); assert_ne!(Type::UnknownImm.diff(Type::Unknown), TypeDiff::Incompatible); assert_ne!(Type::Fixnum.diff(Type::Unknown), TypeDiff::Incompatible); assert_ne!(Type::Fixnum.diff(Type::UnknownImm), TypeDiff::Incompatible); // Invalid src => dst assert_eq!(Type::Unknown.diff(Type::UnknownImm), TypeDiff::Incompatible); assert_eq!(Type::Unknown.diff(Type::Fixnum), TypeDiff::Incompatible); assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), TypeDiff::Incompatible); } #[test] fn reg_mapping() { let mut reg_mapping = RegMapping([None, None, None, None, None]); // 0 means every slot is not spilled for stack_idx in 0..MAX_CTX_TEMPS as u8 { assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(stack_idx)), None); } // Set 0, 2, 6 (RegMapping: [Some(0), Some(6), Some(2), None, None]) reg_mapping.alloc_reg(RegOpnd::Stack(0)); reg_mapping.alloc_reg(RegOpnd::Stack(2)); reg_mapping.alloc_reg(RegOpnd::Stack(3)); reg_mapping.dealloc_reg(RegOpnd::Stack(3)); reg_mapping.alloc_reg(RegOpnd::Stack(6)); // Get 0..8 assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(0)), Some(0)); assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(1)), None); assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(2)), Some(2)); assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(3)), None); assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(4)), None); assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(5)), None); assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(6)), Some(1)); assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(7)), None); } #[test] fn context() { // Valid src => dst assert_eq!(Context::default().diff(&Context::default()), TypeDiff::Compatible(0)); // Try pushing an operand and getting its type let mut asm = Assembler::new(0); asm.stack_push(Type::Fixnum); let top_type = asm.ctx.get_opnd_type(StackOpnd(0)); assert!(top_type == Type::Fixnum); // TODO: write more tests for Context type diff } #[test] fn context_upgrade_local() { let mut asm = Assembler::new(0); asm.stack_push_local(0); asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil); assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); } #[test] fn context_chain_depth() { let mut ctx = Context::default(); assert_eq!(ctx.get_chain_depth(), 0); assert_eq!(ctx.is_return_landing(), false); assert_eq!(ctx.is_deferred(), false); for _ in 0..5 { ctx.increment_chain_depth(); } assert_eq!(ctx.get_chain_depth(), 5); ctx.set_as_return_landing(); assert_eq!(ctx.is_return_landing(), true); ctx.clear_return_landing(); assert_eq!(ctx.is_return_landing(), false); ctx.mark_as_deferred(); assert_eq!(ctx.is_deferred(), true); ctx.reset_chain_depth_and_defer(); assert_eq!(ctx.get_chain_depth(), 0); assert_eq!(ctx.is_deferred(), false); } #[test] fn shift_stack_for_send() { let mut asm = Assembler::new(0); // Push values to simulate send(:name, arg) with 6 items already on-stack for _ in 0..6 { asm.stack_push(Type::Fixnum); } asm.stack_push(Type::Unknown); asm.stack_push(Type::ImmSymbol); asm.stack_push(Type::Unknown); // This method takes argc of the sendee, not argc of send asm.shift_stack(1); // The symbol should be gone assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(0))); assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(1))); } #[test] fn test_miri_ref_unchecked() { let blockid = BlockId { iseq: ptr::null(), idx: 0, }; let cb = CodeBlock::new_dummy(1024); let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(1024)); let dumm_addr = cb.get_write_ptr(); let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb, true) .into_block(0, dumm_addr, dumm_addr, vec![]); let _dropper = BlockDropper(block); // Outside of brief moments during construction, // we're always working with &Branch (a shared reference to a Branch). let branch: &Branch = &Branch { gen_fn: BranchGenFn::JZToTarget0, block: Cell::new(block), start_addr: dumm_addr, end_addr: Cell::new(dumm_addr), targets: [Cell::new(None), Cell::new(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { iseq: Cell::new(ptr::null()), iseq_idx: 0, address: None, ctx: 0, })))))] }; // For easier soundness reasoning, make sure the reference returned does not out live the // `unsafe` block! It's tempting to do, but it leads to non-local issues. // Here is an example where it goes wrong: if false { for target in branch.targets.iter().as_ref() { if let Some(btarget) = unsafe { target.ref_unchecked() } { // btarget is derived from the usnafe block! target.set(None); // This drops the contents of the cell... assert!(btarget.get_address().is_none()); // but `btarget` is still live! UB. } } } // Do something like this instead. It's not pretty, but it's easier to vet for UB this way. for target in branch.targets.iter().as_ref() { // SAFETY: no mutation within unsafe if unsafe { target.ref_unchecked().is_none() } { continue; } // SAFETY: no mutation within unsafe assert!(unsafe { target.ref_unchecked().as_ref().unwrap().get_address().is_none() }); target.set(None); } // A more subtle situation where we do Cell/UnsafeCell mutation over the // lifetime of the reference released by ref_unchecked(). branch.targets[0].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { iseq: Cell::new(ptr::null()), iseq_idx: 0, address: None, ctx: 0, }))))); // Invalid ISeq; we never dereference it. let secret_iseq = NonNull::::dangling().as_ptr(); unsafe { if let Some(branch_target) = branch.targets[0].ref_unchecked().as_ref() { if let BranchTarget::Stub(stub) = branch_target.as_ref() { // SAFETY: // This is a Cell mutation, but it mutates the contents // of a a Cell, which is a different type // from the type of Cell found in `Branch::targets`, so // there is no chance of mutating the Cell that we called // ref_unchecked() on above. Cell::set(&stub.iseq, secret_iseq); } } }; // Check that we indeed changed the iseq of the stub // Cell::take moves out of the cell. assert_eq!( secret_iseq as usize, branch.targets[0].take().unwrap().get_blockid().iseq as usize ); struct BlockDropper(BlockRef); impl Drop for BlockDropper { fn drop(&mut self) { // SAFETY: we have ownership because the test doesn't stash // the block away in any global structure. // Note that the test being self-contained is also why we // use dealloc_block() over free_block(), as free_block() touches // the global invariants tables unavailable in tests. unsafe { dealloc_block(self.0) }; } } } }