diff --git a/cranelift-codegen/src/ir/framelayout.rs b/cranelift-codegen/src/ir/framelayout.rs new file mode 100644 index 000000000..2227c4a28 --- /dev/null +++ b/cranelift-codegen/src/ir/framelayout.rs @@ -0,0 +1,73 @@ +//! Frame layout item changes. + +use crate::ir::entities::Inst; +use crate::isa::RegUnit; +use std::boxed::Box; + +#[cfg(not(feature = "std"))] +use crate::HashMap; +#[cfg(feature = "std")] +use std::collections::HashMap; + +/// Change in the frame layout information. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum FrameLayoutChange { + /// Base CFA pointer moved to different register/offset. + CallFrameAddressAt { + /// CFA register. + reg: RegUnit, + /// CFA offset. + offset: isize, + }, + /// Register saved at. + RegAt { + /// Saved register. + reg: RegUnit, + /// Offset in the frame (offset from CFA). + cfa_offset: isize, + }, + /// Return address saved at. + ReturnAddressAt { + /// Offset in the frame (offset from CFA). + cfa_offset: isize, + }, + /// The entire frame layout must be preserved somewhere to be restored at a corresponding + /// `Restore` change. + /// + /// This likely maps to the DWARF call frame instruction `.cfa_remember_state` + Preserve, + /// Restore the entire frame layout from a corresponding prior `Preserve` frame change. + /// + /// This likely maps to the DWARF call frame instruction `.cfa_restore_state` + Restore, +} + +/// Set of frame layout changes. +pub type FrameLayoutChanges = Box<[FrameLayoutChange]>; + +/// Frame items layout for (prologue/epilogue) instructions. +#[derive(Debug, Clone)] +pub struct FrameLayout { + /// Initial frame layout. + pub initial: FrameLayoutChanges, + + /// Instruction frame layout (changes). Because the map will not be dense, + /// a HashMap is used instead of a SecondaryMap. + pub instructions: HashMap, +} + +impl FrameLayout { + /// Creates instance of FrameLayout. + pub fn new() -> Self { + FrameLayout { + initial: vec![].into_boxed_slice(), + instructions: HashMap::new(), + } + } + + /// Clear the structure. + pub fn clear(&mut self) { + self.initial = vec![].into_boxed_slice(); + self.instructions.clear(); + } +} diff --git a/cranelift-codegen/src/ir/function.rs b/cranelift-codegen/src/ir/function.rs index 00827240d..337fef1ca 100644 --- a/cranelift-codegen/src/ir/function.rs +++ b/cranelift-codegen/src/ir/function.rs @@ -11,7 +11,7 @@ use crate::ir::{ Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, JumpTable, JumpTableData, SigRef, StackSlot, StackSlotData, Table, TableData, }; -use crate::ir::{EbbOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations}; +use crate::ir::{EbbOffsets, FrameLayout, InstEncodings, SourceLocs, StackSlots, ValueLocations}; use crate::ir::{JumpTableOffsets, JumpTables}; use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa}; use crate::regalloc::{EntryRegDiversions, RegDiversions}; @@ -83,6 +83,13 @@ pub struct Function { /// Track the original source location for each instruction. The source locations are not /// interpreted by Cranelift, only preserved. pub srclocs: SourceLocs, + + /// Frame layout for the instructions. + /// + /// The stack unwinding requires to have information about which registers and where they + /// are saved in the frame. This information is created during the prologue and epilogue + /// passes. + pub frame_layout: Option, } impl Function { @@ -104,6 +111,7 @@ impl Function { offsets: SecondaryMap::new(), jt_offsets: SecondaryMap::new(), srclocs: SecondaryMap::new(), + frame_layout: None, } } @@ -123,6 +131,7 @@ impl Function { self.offsets.clear(); self.jt_offsets.clear(); self.srclocs.clear(); + self.frame_layout = None; } /// Create a new empty, anonymous function with a Fast calling convention. @@ -232,6 +241,7 @@ impl Function { /// Starts collection of debug information. pub fn collect_debug_info(&mut self) { self.dfg.collect_debug_info(); + self.frame_layout = Some(FrameLayout::new()); } /// Changes the destination of a jump or branch instruction. diff --git a/cranelift-codegen/src/ir/mod.rs b/cranelift-codegen/src/ir/mod.rs index 8b89227ff..88dfb8eb8 100644 --- a/cranelift-codegen/src/ir/mod.rs +++ b/cranelift-codegen/src/ir/mod.rs @@ -6,6 +6,7 @@ pub mod dfg; pub mod entities; mod extfunc; mod extname; +mod framelayout; pub mod function; mod globalvalue; mod heap; @@ -39,6 +40,7 @@ pub use crate::ir::extfunc::{ AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature, }; pub use crate::ir::extname::ExternalName; +pub use crate::ir::framelayout::{FrameLayout, FrameLayoutChange, FrameLayoutChanges}; pub use crate::ir::function::{DisplayFunctionAnnotations, Function}; pub use crate::ir::globalvalue::GlobalValueData; pub use crate::ir::heap::{HeapData, HeapStyle}; diff --git a/cranelift-codegen/src/isa/x86/abi.rs b/cranelift-codegen/src/isa/x86/abi.rs index 6bd766724..c3fd7b08a 100644 --- a/cranelift-codegen/src/isa/x86/abi.rs +++ b/cranelift-codegen/src/isa/x86/abi.rs @@ -9,14 +9,15 @@ use crate::ir; use crate::ir::immediates::Imm64; use crate::ir::stackslot::{StackOffset, StackSize}; use crate::ir::{ - get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, - ValueLoc, + get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, + FrameLayoutChange, InstBuilder, ValueLoc, }; use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa}; use crate::regalloc::RegisterSet; use crate::result::CodegenResult; use crate::stack_layout::layout_stack; use core::i32; +use std::boxed::Box; use target_lexicon::{PointerWidth, Triple}; /// Argument registers for x86-64 @@ -371,6 +372,29 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> Ok(()) } +/// CFAState is `cranelift`'s model of the call frame layout at any particular point in a function. +/// Changes in this layout are used to derive appropriate `ir::FrameLayoutChange` to record for +/// relevant instructions. +#[derive(Clone)] +struct CFAState { + /// The register from which we can derive the call frame address. On x86_64, this is typically + /// `rbp`, but at function entry and exit may be `rsp` while the call frame is being + /// established. + cf_ptr_reg: RegUnit, + /// Given that `cf_ptr_reg` is a register containing a pointer to some memory, `cf_ptr_offset` + /// is the offset from that pointer to the address of the start of this function's call frame. + /// + /// For a concrete x86_64 example, we will start this at 8 - the call frame begins immediately + /// before the return address. This will typically then be set to 16, after pushing `rbp` to + /// preserve the parent call frame. It is very unlikely the offset should be anything other + /// than one or two `usize`. + cf_ptr_offset: isize, + /// The offset between the start of the call frame and the current stack pointer. This is + /// primarily useful to point to where on the stack preserved registers are, but is maintained + /// through the whole function for consistency. + current_depth: isize, +} + /// Implementation of the fastcall-based Win64 calling convention described at [1] /// [1] https://msdn.microsoft.com/en-us/library/ms235286.aspx fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { @@ -432,14 +456,27 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C func.signature.returns.push(csr_arg); } + let mut cfa_state = CFAState { + cf_ptr_reg: RU::rsp as RegUnit, + cf_ptr_offset: word_size as isize, + current_depth: -(word_size as isize), + }; + // Set up the cursor and insert the prologue let entry_ebb = func.layout.entry_block().expect("missing entry block"); let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb); - insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa); + insert_common_prologue( + &mut pos, + local_stack_size, + reg_type, + &csrs, + isa, + &mut cfa_state, + ); // Reset the cursor and insert the epilogue let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs); + insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, isa, &cfa_state); Ok(()) } @@ -487,14 +524,34 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C func.signature.returns.push(csr_arg); } + let mut cfa_state = CFAState { + cf_ptr_reg: RU::rsp as RegUnit, + cf_ptr_offset: word_size as isize, + current_depth: -(word_size as isize), + }; + // Set up the cursor and insert the prologue let entry_ebb = func.layout.entry_block().expect("missing entry block"); let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb); - insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa); + insert_common_prologue( + &mut pos, + local_stack_size, + reg_type, + &csrs, + isa, + &mut cfa_state, + ); // Reset the cursor and insert the epilogue let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs); + insert_common_epilogues( + &mut pos, + local_stack_size, + reg_type, + &csrs, + isa, + &mut cfa_state, + ); Ok(()) } @@ -507,7 +564,9 @@ fn insert_common_prologue( reg_type: ir::types::Type, csrs: &RegisterSet, isa: &dyn TargetIsa, + cfa_state: &mut CFAState, ) { + let word_size = isa.pointer_bytes(); if stack_size > 0 { // Check if there is a special stack limit parameter. If so insert stack check. if let Some(stack_limit_arg) = pos.func.special_param(ArgumentPurpose::StackLimit) { @@ -516,21 +575,64 @@ fn insert_common_prologue( // Also, the size of a return address, implicitly pushed by a x86 `call` instruction, // also should be accounted for. // TODO: Check if the function body actually contains a `call` instruction. - let word_size = isa.pointer_bytes(); let total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64; insert_stack_check(pos, total_stack_size, stack_limit_arg); } } + if let Some(ref mut frame_layout) = pos.func.frame_layout { + frame_layout.initial = vec![ + FrameLayoutChange::CallFrameAddressAt { + reg: cfa_state.cf_ptr_reg, + offset: cfa_state.cf_ptr_offset, + }, + FrameLayoutChange::ReturnAddressAt { + cfa_offset: cfa_state.current_depth, + }, + ] + .into_boxed_slice(); + } + // Append param to entry EBB let ebb = pos.current_ebb().expect("missing ebb under cursor"); let fp = pos.func.dfg.append_ebb_param(ebb, reg_type); pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit); - pos.ins().x86_push(fp); - pos.ins() + let push_fp_inst = pos.ins().x86_push(fp); + let word_size = word_size as isize; + cfa_state.current_depth -= word_size; + cfa_state.cf_ptr_offset += word_size; + if let Some(ref mut frame_layout) = pos.func.frame_layout { + frame_layout.instructions.insert( + push_fp_inst, + vec![ + FrameLayoutChange::CallFrameAddressAt { + reg: cfa_state.cf_ptr_reg, + offset: cfa_state.cf_ptr_offset, + }, + FrameLayoutChange::RegAt { + reg: RU::rbp as RegUnit, + cfa_offset: cfa_state.current_depth, + }, + ] + .into_boxed_slice(), + ); + } + let mov_sp_inst = pos + .ins() .copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit); + cfa_state.cf_ptr_reg = RU::rbp as RegUnit; + if let Some(ref mut frame_layout) = pos.func.frame_layout { + frame_layout.instructions.insert( + mov_sp_inst, + vec![FrameLayoutChange::CallFrameAddressAt { + reg: cfa_state.cf_ptr_reg, + offset: cfa_state.cf_ptr_offset, + }] + .into_boxed_slice(), + ); + } for reg in csrs.iter(GPR) { // Append param to entry EBB @@ -540,7 +642,18 @@ fn insert_common_prologue( pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); // Remember it so we can push it momentarily - pos.ins().x86_push(csr_arg); + let reg_push_inst = pos.ins().x86_push(csr_arg); + cfa_state.current_depth -= word_size; + if let Some(ref mut frame_layout) = pos.func.frame_layout { + frame_layout.instructions.insert( + reg_push_inst, + vec![FrameLayoutChange::RegAt { + reg: reg, + cfa_offset: cfa_state.current_depth, + }] + .into_boxed_slice(), + ); + } } // Allocate stack frame storage. @@ -618,12 +731,46 @@ fn insert_common_epilogues( stack_size: i64, reg_type: ir::types::Type, csrs: &RegisterSet, + isa: &dyn TargetIsa, + cfa_state: &CFAState, ) { while let Some(ebb) = pos.next_ebb() { pos.goto_last_inst(ebb); if let Some(inst) = pos.current_inst() { if pos.func.dfg[inst].opcode().is_return() { - insert_common_epilogue(inst, stack_size, pos, reg_type, csrs); + // figure out if we need to insert end-of-function-aware frame layout information + let following_inst = pos + .func + .layout + .next_ebb(ebb) + .and_then(|next_ebb| pos.func.layout.first_inst(next_ebb)); + + if following_inst.is_some() { + if let Some(ref mut frame_layout) = pos.func.frame_layout { + frame_layout + .instructions + .insert(inst, vec![FrameLayoutChange::Preserve].into_boxed_slice()); + } + } + + insert_common_epilogue( + inst, + stack_size, + pos, + reg_type, + csrs, + isa, + &mut cfa_state.clone(), + ); + + if let Some(following_inst) = following_inst { + if let Some(ref mut frame_layout) = pos.func.frame_layout { + frame_layout.instructions.insert( + following_inst, + vec![FrameLayoutChange::Restore].into_boxed_slice(), + ); + } + } } } } @@ -631,13 +778,17 @@ fn insert_common_epilogues( /// Insert an epilogue given a specific `return` instruction. /// This is used by common calling conventions such as System V. +/// TODO implement and handle _cfa_state more than one epilogue. fn insert_common_epilogue( inst: ir::Inst, stack_size: i64, pos: &mut EncCursor, reg_type: ir::types::Type, csrs: &RegisterSet, + isa: &dyn TargetIsa, + cfa_state: &mut CFAState, ) { + let word_size = isa.pointer_bytes() as isize; if stack_size > 0 { pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)); } @@ -645,6 +796,12 @@ fn insert_common_epilogue( // Pop all the callee-saved registers, stepping backward each time to // preserve the correct order. let fp_ret = pos.ins().x86_pop(reg_type); + // account for CFA state in the reverse of `insert_common_prologue`. + cfa_state.current_depth += word_size; + cfa_state.cf_ptr_offset -= word_size; + // and now that we're going to overwrite `rbp`, `rsp` is the only way to get to the call frame. + cfa_state.cf_ptr_reg = RU::rsp as RegUnit; + pos.prev_inst(); pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit); @@ -652,9 +809,37 @@ fn insert_common_epilogue( for reg in csrs.iter(GPR) { let csr_ret = pos.ins().x86_pop(reg_type); + cfa_state.current_depth += word_size; pos.prev_inst(); pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg); pos.func.dfg.append_inst_arg(inst, csr_ret); } + + if let Some(ref mut frame_layout) = pos.func.frame_layout { + // Validity checks - if we accounted correctly, CFA state at a return will match CFA state + // at the entry of a function. + // + // current_depth starts assuming a return address is pushed, and cf_ptr_offset is one + // pointer below current_depth. + assert_eq!(cfa_state.current_depth, -word_size); + assert_eq!(cfa_state.cf_ptr_offset, word_size); + + let new_cfa = FrameLayoutChange::CallFrameAddressAt { + reg: cfa_state.cf_ptr_reg, + offset: cfa_state.cf_ptr_offset, + }; + + frame_layout + .instructions + .entry(inst) + .and_modify(|insts| { + *insts = insts + .into_iter() + .map(|x| *x) + .chain(std::iter::once(new_cfa)) + .collect::>(); + }) + .or_insert_with(|| Box::new([new_cfa])); + } }