diff --git a/cranelift/codegen/src/branch_to_trap.rs b/cranelift/codegen/src/branch_to_trap.rs new file mode 100644 index 000000000000..4e33a2f36eb1 --- /dev/null +++ b/cranelift/codegen/src/branch_to_trap.rs @@ -0,0 +1,87 @@ +//! Analysis for rewriting branch-to-unconditional-trap into conditional trap +//! instructions. +//! +//! Given this instruction: +//! +//! ```clif +//! brif v0, block1, block2 +//! ``` +//! +//! If we know that `block1` does nothing but immediately trap then we can +//! rewrite that `brif` into the following: +//! +//! ```clif +//! trapnz v0, +//! jump block2 +//! ``` +//! +//! (And we can do the equivalent with `trapz` if `block2` immediately traps). +//! +//! This transformation allows for the conditional trap instructions to be GVN'd +//! and for our egraphs mid-end to generally better optimize the program. We +//! additionally have better codegen in our backends for `trapz`/`trapnz` than +//! branches to unconditional traps. +//! +//! This module only provides the *analysis* of which blocks are "just trap" +//! blocks; the actual rewrite is performed by `simplify_skeleton` ISLE rules in +//! the egraph pass, which consult this analysis via the `just_trap_block` ISLE +//! constructor. + +use crate::FxHashMap; +use crate::inst_predicates::is_pure_for_egraph; +use crate::ir::{self, InstructionData, Opcode}; +use cranelift_entity::EntitySet; + +/// On-demand, memoized analysis of which blocks are "just trap" blocks. +/// +/// A block is a "just trap" block when its terminator is an unconditional +/// `trap` and all of its other instructions in the block are pure. +/// +/// Results are memoized so that it does not matter in which order blocks are +/// analyzed or instructions are processed; callers can ask for the analysis +/// result of any block at any time. +#[derive(Default)] +pub struct BranchToTrapAnalysis { + /// The set of blocks we have already analyzed. + analyzed_blocks: EntitySet, + + /// Given that we have already analyzed a block and found it to be a + /// just-trap block, what is its trap code? + just_trap_block_codes: FxHashMap, +} + +impl BranchToTrapAnalysis { + /// Determine whether `block` is a "just trap" block and, if so, return the + /// trap code of its terminating `trap` instruction. + pub fn analyze_block(&mut self, func: &ir::Function, block: ir::Block) -> Option { + if self.analyzed_blocks.insert(block) { + let code = Self::analyze_block_impl(func, block); + if let Some(code) = code { + let old_entry = self.just_trap_block_codes.insert(block, code); + debug_assert!(old_entry.is_none()); + } + code + } else { + self.just_trap_block_codes.get(&block).copied() + } + } + + fn analyze_block_impl(func: &ir::Function, block: ir::Block) -> Option { + let last = func.layout.last_inst(block)?; + let code = match func.dfg.insts[last] { + InstructionData::Trap { + opcode: Opcode::Trap, + code, + } => code, + _ => return None, + }; + + for inst in func.layout.block_insts(block) { + if inst != last && !is_pure_for_egraph(func, inst) { + return None; + } + } + + Some(code) + } +} diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 177563b8d31b..a81291bd5737 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -16,7 +16,6 @@ use crate::flowgraph::ControlFlowGraph; use crate::inline::{Inline, do_inlining}; use crate::ir::Function; use crate::isa::TargetIsa; -use crate::legalizer::simple_legalize; use crate::loop_analysis::LoopAnalysis; use crate::machinst::{CompiledCode, CompiledCodeStencil}; use crate::nan_canonicalization::do_nan_canonicalization; @@ -173,7 +172,7 @@ impl Context { self.canonicalize_nans(isa)?; } - self.legalize(isa)?; + self.verify_if(isa)?; self.compute_cfg(); self.compute_domtree(); @@ -293,19 +292,6 @@ impl Context { self.verify_if(isa) } - /// Run the legalizer for `isa` on the function. - pub fn legalize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - // Legalization invalidates the domtree and loop_analysis by mutating the CFG. - // TODO: Avoid doing this when legalization doesn't actually mutate the CFG. - self.domtree.clear(); - self.loop_analysis.clear(); - self.cfg.clear(); - - // Run some specific legalizations only. - simple_legalize(&mut self.func); - self.verify_if(isa) - } - /// Compute the control flow graph. pub fn compute_cfg(&mut self) { self.cfg.compute(&self.func) diff --git a/cranelift/codegen/src/egraph/mod.rs b/cranelift/codegen/src/egraph/mod.rs index cb083d58ced6..6a4525cb7c58 100644 --- a/cranelift/codegen/src/egraph/mod.rs +++ b/cranelift/codegen/src/egraph/mod.rs @@ -2,6 +2,7 @@ use crate::FxHashSet; use crate::alias_analysis::{AliasAnalysis, LastStores, OptResult}; +use crate::branch_to_trap::BranchToTrapAnalysis; use crate::ctxhash::{CtxEq, CtxHash, NullCtx}; use crate::cursor::{Cursor, CursorPosition, FuncCursor}; use crate::dominator_tree::DominatorTree; @@ -112,6 +113,10 @@ pub struct EgraphPass<'a> { /// The control flow graph, used when eliminating unreachable code /// after branch simplification. cfg: &'a mut ControlFlowGraph, + /// Branch-to-trap analysis: determines which blocks are "just trap" blocks, + /// used by `simplify_skeleton` rules to rewrite conditional branches to + /// trapping blocks into conditional traps. + branch_to_trap_analysis: BranchToTrapAnalysis, /// Which Values do we want to rematerialize in each block where /// they're used? remat_values: FxHashSet, @@ -148,6 +153,7 @@ where domtree: &'opt DominatorTree, pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>, pub(crate) alias_analysis_state: &'opt mut LastStores, + pub(crate) branch_to_trap_analysis: &'opt mut BranchToTrapAnalysis, ctrl_plane: &'opt mut ControlPlane, // Held locally during optimization of one node (recursively): pub(crate) rewrite_depth: usize, @@ -642,6 +648,28 @@ where return Some(simplification); } + // `ReplaceBranchCond` is unconditionally accepted: the opcode + // and successors don't change, so we can't use the cost-based + // ranking the other variants do (skeleton cost can't consider + // operand costs). + SkeletonInstSimplification::ReplaceBranchCond { cond } => { + log::trace!(" -> simplify_skeleton: replace condition operand with {cond}"); + return Some(SkeletonInstSimplification::ReplaceBranchCond { cond }); + } + // `ReplaceWithTwo` (e.g. rewriting a branch-to-trap-block into + // a conditional trap + jump) is also unconditionally accepted: + // these rules are always an improvement, but due to its shape, + // cannot participate in the cost-based ranking other variants + // use. + SkeletonInstSimplification::ReplaceWithTwo { first, second } => { + log::trace!( + " -> simplify_skeleton: replace inst with `{}; {}`", + ctx.func.dfg.display_inst(first), + ctx.func.dfg.display_inst(second), + ); + return Some(SkeletonInstSimplification::ReplaceWithTwo { first, second }); + } + // For instruction replacement simplification, we want to check // that the replacements define the same number and types of // values as the original instruction, and also determine @@ -661,16 +689,6 @@ where ); (inst, Some(val)) } - // `ReplaceBranchCond` is unconditionally accepted — the - // opcode and successors don't change, so we can't use the - // cost-based ranking the other variants do (replacing the - // condition with a cheaper sub-expression keeps the same - // opcode/arity, hence the same skeleton cost). The first such - // candidate wins; ISLE rule ordering picks the form. - SkeletonInstSimplification::ReplaceBranchCond { cond } => { - log::trace!(" -> simplify_skeleton: replace condition operand with {cond}"); - return Some(SkeletonInstSimplification::ReplaceBranchCond { cond }); - } }; if cfg!(debug_assertions) { @@ -730,6 +748,7 @@ impl<'a> EgraphPass<'a> { alias_analysis, ctrl_plane, cfg, + branch_to_trap_analysis: BranchToTrapAnalysis::default(), stats: Stats::default(), remat_values: FxHashSet::default(), } @@ -920,6 +939,7 @@ impl<'a> EgraphPass<'a> { domtree: &self.domtree, alias_analysis: self.alias_analysis, alias_analysis_state: &mut alias_analysis_state, + branch_to_trap_analysis: &mut self.branch_to_trap_analysis, ctrl_plane: self.ctrl_plane, optimized_values: Default::default(), optimized_insts: Default::default(), @@ -977,11 +997,50 @@ impl<'a> EgraphPass<'a> { value_to_opt_value: &mut SecondaryMap, old_inst: Inst, ) { - let mut forward_val = |cursor: &mut FuncCursor, old_val, new_val| { + // Redirect uses of `old_val` to `new_val`. + let forward_val = |cursor: &mut FuncCursor<'_>, + value_to_opt_value: &mut SecondaryMap<_, _>, + old_val, + new_val| { cursor.func.dfg.change_to_alias(old_val, new_val); value_to_opt_value[old_val] = new_val; }; + // Values created during skeleton-instruction simplification are created + // after we've already computed the `value_to_opt_value` map and are + // therefore missing entries within it. These values are already + // optimized, so map them to themselves. + let self_map_operands = + |dfg: &DataFlowGraph, value_to_opt_value: &mut SecondaryMap<_, _>, inst| { + for val in dfg.inst_values(inst) { + debug_assert!( + value_to_opt_value[val] == val + || value_to_opt_value[val] == Value::reserved_value() + ); + value_to_opt_value[val] = core::cmp::min(value_to_opt_value[val], val); + } + }; + + // Any new instructions produced by a simplification inherit the source + // location of the instruction they replace. + let old_srcloc = cursor.func.srclocs[old_inst]; + // NB: But we only inherit the source location when it is non-default to + // avoid extending the `SecondaryMap`. + let old_srcloc = if old_srcloc.is_default() { + None + } else { + Some(old_srcloc) + }; + + // Rewind the cursor to just before `inst` so that the main loop + // re-processes it on its next iteration. This lets a freshly + // produced/modified skeleton instruction be GVN'd and/or simplified + // further. + fn reprocess_from(cursor: &mut FuncCursor, inst: Inst) { + cursor.goto_inst(inst); + cursor.prev_inst(); + } + let (new_inst, new_val) = match simplification { SkeletonInstSimplification::Remove => { cursor.remove_inst_and_step_back(); @@ -991,11 +1050,9 @@ impl<'a> EgraphPass<'a> { cursor.remove_inst_and_step_back(); let old_val = cursor.func.dfg.first_result(old_inst); cursor.func.dfg.detach_inst_results(old_inst); - forward_val(cursor, old_val, val); + forward_val(cursor, value_to_opt_value, old_val, val); return; } - SkeletonInstSimplification::Replace { inst } => (inst, None), - SkeletonInstSimplification::ReplaceWithVal { inst, val } => (inst, Some(val)), SkeletonInstSimplification::ReplaceBranchCond { cond } => { // Swap the condition operand (argument 0) of the existing // conditional skeleton instruction in place. The opcode and any @@ -1007,10 +1064,51 @@ impl<'a> EgraphPass<'a> { crate::ir::Opcode::Brif | crate::ir::Opcode::Trapz | crate::ir::Opcode::Trapnz, )); cursor.func.dfg.inst_args_mut(old_inst)[0] = cond; + // Re-process the modified instruction so that, e.g., another + // truthiness-preserving layer can be stripped from its + // condition or it can be GVN'd. + self_map_operands(&cursor.func.dfg, value_to_opt_value, old_inst); + reprocess_from(cursor, old_inst); return; } + SkeletonInstSimplification::ReplaceWithTwo { first, second } => { + // We don't forward result values for `ReplaceWithTwo` -- and it + // isn't clear what that would mean when both the new + // instructions have result values -- so we only support + // instructions without results here. + debug_assert!(cursor.func.dfg.inst_results(old_inst).is_empty()); + debug_assert!(cursor.func.dfg.inst_results(first).is_empty()); + debug_assert!(cursor.func.dfg.inst_results(second).is_empty()); + + // If the instruction we're replacing is a block terminator, + // then the trailing new instruction must also be a terminator, + // so the block remains well-formed. + debug_assert!( + !cursor.func.dfg.insts[old_inst].opcode().is_terminator() + || cursor.func.dfg.insts[second].opcode().is_terminator() + ); + + if let Some(old_srcloc) = old_srcloc { + cursor.func.srclocs[first] = old_srcloc; + cursor.func.srclocs[second] = old_srcloc; + } + + cursor.insert_inst(first); + cursor.replace_inst(second); + self_map_operands(&cursor.func.dfg, value_to_opt_value, first); + self_map_operands(&cursor.func.dfg, value_to_opt_value, second); + reprocess_from(cursor, first); + return; + } + + SkeletonInstSimplification::Replace { inst } => (inst, None), + SkeletonInstSimplification::ReplaceWithVal { inst, val } => (inst, Some(val)), }; + if let Some(old_srcloc) = old_srcloc { + cursor.func.srclocs[new_inst] = old_srcloc; + } + // Replace the old instruction with the new one. cursor.replace_inst(new_inst); @@ -1029,10 +1127,11 @@ impl<'a> EgraphPass<'a> { for i in 0..cursor.func.dfg.inst_results(old_inst).len() { let old_val = cursor.func.dfg.inst_results(old_inst)[i]; let new_val = next_new_val(&cursor.func.dfg); - forward_val(cursor, old_val, new_val); + forward_val(cursor, value_to_opt_value, old_val, new_val); } - cursor.goto_inst(new_inst); + self_map_operands(&cursor.func.dfg, value_to_opt_value, new_inst); + reprocess_from(cursor, new_inst); } /// Scoped elaboration: compute a final ordering of op computation diff --git a/cranelift/codegen/src/legalizer/branch_to_trap.rs b/cranelift/codegen/src/legalizer/branch_to_trap.rs deleted file mode 100644 index c35f03e86429..000000000000 --- a/cranelift/codegen/src/legalizer/branch_to_trap.rs +++ /dev/null @@ -1,98 +0,0 @@ -//! Rewrite branch-to-unconditional-trap into conditional trap instructions. -//! -//! Given this instruction: -//! -//! ```clif -//! brif v0, block1, block2 -//! ``` -//! -//! If we know that `block1` does nothing but immediately trap then we can -//! rewrite that `brif` into the following: -//! -//! ```clif -//! trapz v0, -//! jump block2 -//! ``` -//! -//! (And we can do the equivalent with `trapz` if `block2` immediately traps). -//! -//! This transformation allows for the conditional trap instructions to be GVN'd -//! and for our egraphs mid-end to generally better optimize the program. We -//! additionally have better codegen in our backends for `trapz` than branches -//! to unconditional traps. - -use super::*; - -#[derive(Default)] -pub struct BranchToTrap { - /// The set of blocks that contain exactly one unconditional trap - /// instruction. - just_trap_blocks: EntitySet, -} - -impl BranchToTrap { - /// Analyze the given block. - /// - /// The `block` must be terminated by a `trap` instruction. - pub fn analyze_trapping_block(&mut self, func: &ir::Function, block: ir::Block) { - if func.layout.block_contains_exactly_one_inst(block) { - self.just_trap_blocks.insert(block); - } - } - - fn just_trap_block_code(&self, func: &ir::Function, block: ir::Block) -> ir::TrapCode { - debug_assert!(self.just_trap_blocks.contains(block)); - debug_assert!(func.layout.block_contains_exactly_one_inst(block)); - let inst = func.layout.first_inst(block).unwrap(); - match func.dfg.insts[inst] { - InstructionData::Trap { code, .. } => code, - _ => unreachable!(), - } - } - - /// Process a `brif` instruction, potentially performing our rewrite. - /// - /// The `inst` must be a `brif` containing the given `arg` and `blocks`. - pub fn process_brif( - &self, - func: &mut ir::Function, - inst: ir::Inst, - arg: ir::Value, - blocks: [ir::BlockCall; 2], - ) { - let consequent = blocks[0].block(&func.dfg.value_lists); - let alternative = blocks[1].block(&func.dfg.value_lists); - - if self.just_trap_blocks.contains(consequent) { - let mut pos = FuncCursor::new(func); - pos.use_srcloc( - pos.func - .layout - .first_inst(consequent) - .expect("just-trap blocks have exactly one inst"), - ); - pos.goto_inst(inst); - - let code = self.just_trap_block_code(pos.func, consequent); - pos.ins().trapnz(arg, code); - - let args: SmallVec<[_; 8]> = blocks[1].args(&pos.func.dfg.value_lists).collect(); - pos.func.replace(inst).jump(alternative, &args); - } else if self.just_trap_blocks.contains(alternative) { - let mut pos = FuncCursor::new(func); - pos.use_srcloc( - pos.func - .layout - .first_inst(alternative) - .expect("just-trap blocks have exactly one inst"), - ); - pos.goto_inst(inst); - - let code = self.just_trap_block_code(pos.func, alternative); - pos.ins().trapz(arg, code); - - let args: SmallVec<[_; 8]> = blocks[0].args(&pos.func.dfg.value_lists).collect(); - pos.func.replace(inst).jump(consequent, &args); - } - } -} diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs deleted file mode 100644 index ed09a05b7414..000000000000 --- a/cranelift/codegen/src/legalizer/mod.rs +++ /dev/null @@ -1,75 +0,0 @@ -//! Legalize instructions. -//! -//! A legal instruction is one that can be mapped directly to a machine code instruction for the -//! target ISA. The `legalize_function()` function takes as input any function and transforms it -//! into an equivalent function using only legal instructions. -//! -//! The characteristics of legal instructions depend on the target ISA, so any given instruction -//! can be legal for one ISA and illegal for another. -//! -//! Besides transforming instructions, the legalizer also fills out the `function.encodings` map -//! which provides a legal encoding recipe for every instruction. -//! -//! The legalizer does not deal with register allocation constraints. These constraints are derived -//! from the encoding recipes, and solved later by the register allocator. - -use crate::cursor::{Cursor, FuncCursor}; -use crate::ir::{self, InstBuilder, InstructionData}; -use crate::trace; -use cranelift_entity::EntitySet; -use smallvec::SmallVec; - -mod branch_to_trap; -use self::branch_to_trap::BranchToTrap; - -/// A simple, naive backwards walk over every instruction in every block in the -/// function's layout. -/// -/// This does not guarantee any kind of reverse post-order visitation or -/// anything like that, it is just iterating over blocks in reverse layout -/// order, not any kind of control-flow graph visitation order. -fn backward_walk( - func: &mut ir::Function, - mut f: impl FnMut(&mut ir::Function, ir::Block, ir::Inst), -) { - let mut pos = FuncCursor::new(func); - while let Some(block) = pos.prev_block() { - while let Some(inst) = pos.prev_inst() { - f(pos.func, block, inst); - } - } -} - -/// Perform a simple legalization by expansion of the function, without -/// platform-specific transforms. -pub fn simple_legalize(func: &mut ir::Function) { - trace!("Pre-legalization function:\n{}", func.display()); - - let mut branch_to_trap = BranchToTrap::default(); - - // We walk the IR backwards because in practice, given the way that - // frontends tend to produce CLIF, this means we will visit in roughly - // reverse post order, which is helpful for getting the most optimizations - // out of the `branch-to-trap` pass that we can (it must analyze trapping - // blocks before it can rewrite branches to them) but the order does not - // actually affect correctness. - backward_walk(func, |func, block, inst| match func.dfg.insts[inst] { - InstructionData::Trap { - opcode: ir::Opcode::Trap, - code: _, - } => { - branch_to_trap.analyze_trapping_block(func, block); - } - InstructionData::Brif { - opcode: ir::Opcode::Brif, - arg, - blocks, - } => { - branch_to_trap.process_brif(func, inst, arg, blocks); - } - - _ => {} - }); - - trace!("Post-legalization function:\n{}", func.display()); -} diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 08de586ac985..0fad51d52e88 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -81,13 +81,13 @@ pub use crate::machinst::{ }; mod alias_analysis; +mod branch_to_trap; mod constant_hash; mod context; mod ctxhash; mod egraph; mod inst_predicates; mod isle_prelude; -mod legalizer; mod nan_canonicalization; mod opts; mod ranges; diff --git a/cranelift/codegen/src/opts.rs b/cranelift/codegen/src/opts.rs index 3471acb2f181..ac68fb2bb3b2 100644 --- a/cranelift/codegen/src/opts.rs +++ b/cranelift/codegen/src/opts.rs @@ -9,7 +9,7 @@ pub use crate::ir::immediates::{Ieee16, Ieee32, Ieee64, Ieee128, Imm64, Offset32 use crate::ir::instructions::InstructionFormat; pub use crate::ir::types::*; pub use crate::ir::{ - AtomicRmwOp, BlockCall, Constant, DynamicStackSlot, FuncRef, GlobalValue, Immediate, + AtomicRmwOp, Block, BlockCall, Constant, DynamicStackSlot, FuncRef, GlobalValue, Immediate, InstructionData, JumpTable, MemFlagsData, Opcode, StackSlot, TrapCode, Type, Value, }; use crate::isle_common_prelude_methods; @@ -263,6 +263,16 @@ impl<'a, 'b, 'c> generated_code::Context for IsleContext<'a, 'b, 'c> { } } + fn block_call_block(&mut self, block_call: BlockCall) -> Block { + block_call.block(&self.ctx.func.dfg.value_lists) + } + + fn just_trap_block(&mut self, block: &Block) -> Option { + self.ctx + .branch_to_trap_analysis + .analyze_block(self.ctx.func, *block) + } + fn iconst_sextend_etor( &mut self, (ty, inst_data): (Type, InstructionData), diff --git a/cranelift/codegen/src/opts/skeleton.isle b/cranelift/codegen/src/opts/skeleton.isle index e779ce302687..b1cb5304c8c1 100644 --- a/cranelift/codegen/src/opts/skeleton.isle +++ b/cranelift/codegen/src/opts/skeleton.isle @@ -26,6 +26,19 @@ (rule (simplify_skeleton (brif (iconst_u _ 0) _ else_dest)) (jump else_dest)) +;; Conditional branch to a block that does nothing but trap: rewrite into a +;; conditional trap plus a jump to the surviving successor. The trapping +;; successor's block-call arguments are dropped (it just traps); the surviving +;; successor's block call (with its arguments) is reused in the new `jump`. +(rule (simplify_skeleton (brif condition consequent alternative)) + (if-let trap_code (just_trap_block (block_call_block consequent))) + (replace_with_two (trapnz condition trap_code) + (jump alternative))) +(rule (simplify_skeleton (brif condition consequent alternative)) + (if-let trap_code (just_trap_block (block_call_block alternative))) + (replace_with_two (trapz condition trap_code) + (jump consequent))) + ;; Branch table with known index: resolve the destination from the ;; jump table data and replace with an unconditional jump. (rule (simplify_skeleton (br_table (iconst_u _ idx) jt)) diff --git a/cranelift/codegen/src/prelude_opt.isle b/cranelift/codegen/src/prelude_opt.isle index f1c80b6e66a8..f333d51bd019 100644 --- a/cranelift/codegen/src/prelude_opt.isle +++ b/cranelift/codegen/src/prelude_opt.isle @@ -42,6 +42,16 @@ (decl pure resolve_jump_table_entry (JumpTable u64) BlockCall) (extern constructor resolve_jump_table_entry resolve_jump_table_entry) +;; Get the destination block of a block call. +(decl pure block_call_block (BlockCall) Block) +(extern constructor block_call_block block_call_block) + +;; If `block` is a "just trap" block (its terminator is an unconditional `trap` +;; and all of its other instructions are pure) return the trap code of that +;; terminating `trap`. +(decl pure partial just_trap_block (Block) TrapCode) +(extern constructor just_trap_block just_trap_block) + (rule (eq ty x y) (icmp ty (IntCC.Equal) x y)) (rule (ne ty x y) (icmp ty (IntCC.NotEqual) x y)) (rule (ult ty x y) (icmp ty (IntCC.UnsignedLessThan) x y)) @@ -103,7 +113,15 @@ ;; ;; Use this to rewrite conditions which simplify within a boolean-test ;; context (e.g. `(brif (ctz x))` can be simplified to `(brif x)`). - (ReplaceBranchCond (cond Value)))) + (ReplaceBranchCond (cond Value)) + + ;; Replace the instruction being simplified with two instructions, + ;; spliced into the layout in order: `first` then `second`. + ;; + ;; Neither instruction may define any results, and if the instruction + ;; being replaced is a block terminator then `second` must also be a + ;; block terminator. + (ReplaceWithTwo (first Inst) (second Inst)))) (decl pure inst_to_skeleton_inst_simplification (Inst) SkeletonInstSimplification) (rule (inst_to_skeleton_inst_simplification inst) @@ -123,6 +141,10 @@ (rule (replace_branch_cond new_cond) (SkeletonInstSimplification.ReplaceBranchCond new_cond)) +(decl pure replace_with_two (Inst Inst) SkeletonInstSimplification) +(rule (replace_with_two first second) + (SkeletonInstSimplification.ReplaceWithTwo first second)) + (convert Inst SkeletonInstSimplification inst_to_skeleton_inst_simplification) (convert Value SkeletonInstSimplification value_to_skeleton_inst_simplification) diff --git a/cranelift/filetests/filetests/egraph/branch-to-trap.clif b/cranelift/filetests/filetests/egraph/branch-to-trap.clif new file mode 100644 index 000000000000..5a42db84682c --- /dev/null +++ b/cranelift/filetests/filetests/egraph/branch-to-trap.clif @@ -0,0 +1,160 @@ +test optimize precise-output +set opt_level=speed +target x86_64 + +;; The consequent (taken-if-true) successor is a just-trap block, so the `brif` +;; becomes a `trapnz` plus a `jump` to the surviving successor (carrying its +;; block arguments). The now-unreachable trap block is removed. +function %trapnz(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + brif v0, block1, block2(v1) +block1: + trap user42 +block2(v2: i64): + return v2 +} + +; function %trapnz(i64, i64) -> i64 fast { +; block0(v0: i64, v1: i64): +; trapnz v0, user42 +; jump block2 +; +; block2: +; return v1 +; } + +;; The alternative (taken-if-false) successor is the just-trap block, so the +;; `brif` becomes a `trapz` plus a `jump` to the surviving successor. +function %trapz(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + brif v0, block1(v1), block2 +block1(v2: i64): + return v2 +block2: + trap user42 +} + +; function %trapz(i64, i64) -> i64 fast { +; block0(v0: i64, v1: i64): +; trapz v0, user42 +; jump block1 +; +; block1: +; return v1 +; } + +;; Both successors are just-trap blocks, so either rewrite is correct. Here the +;; alternative-successor rule wins, producing a `trapz` (trapping with the +;; alternative's code) and a jump to the still-trapping, still-reachable +;; consequent. +function %could_be_either_doesnt_matter(i64) -> i64 { +block0(v0: i64): + brif v0, block1, block2 +block1: + trap user36 +block2: + trap user42 +} + +; function %could_be_either_doesnt_matter(i64) -> i64 fast { +; block0(v0: i64): +; trapz v0, user42 +; jump block1 +; +; block1: +; trap user36 +; } + +;; A block counts as a just-trap block even when it is not *only* a `trap`: a +;; pure (side-effect-free) prefix is allowed, because a trapping block never +;; returns so its pure instructions are unobservable. The `brif` is rewritten +;; and the pure prefix is dropped with the unreachable block. +function %trap_block_with_pure_prefix(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + brif v0, block1, block2(v1) +block1: + v10 = iadd v0, v0 + v11 = imul v10, v10 + trap user42 +block2(v2: i64): + return v2 +} + +; function %trap_block_with_pure_prefix(i64, i64) -> i64 fast { +; block0(v0: i64, v1: i64): +; trapnz v0, user42 +; jump block2 +; +; block2: +; return v1 +; } + +;; A block with a *side-effecting* instruction before the `trap` is NOT a +;; just-trap block, so the `brif` is left alone. +function %trap_block_with_side_effect_not_rewritten(i64, i64) { +block0(v0: i64, v1: i64): + brif v0, block1, block2 +block1: + store v1, v0 + trap user42 +block2: + return +} + +; function %trap_block_with_side_effect_not_rewritten(i64, i64) fast { +; block0(v0: i64, v1: i64): +; brif v0, block1, block2 +; +; block1: +; store.i64 v1, v0 +; trap user42 +; +; block2: +; return +; } + +;; The `brif` folds into a `trapz v0, user1` that is identical to the `trapz` +;; already dominating it, so the newly inserted trap is re-processed and GVN'd +;; away (only one `trapz` remains). +function %folded_trap_is_gvned(i32) -> i32 { +block0(v0: i32): + trapz v0, user1 + brif v0, block1, block2 +block1: + return v0 +block2: + trap user1 +} + +; function %folded_trap_is_gvned(i32) -> i32 fast { +; block0(v0: i32): +; trapz v0, user1 +; jump block1 +; +; block1: +; return v0 +; } + +;; The `brif`'s condition is a `uextend`, so after it folds into a `trapnz` the +;; newly inserted trap is re-processed and the truthiness-preserving `uextend` +;; is stripped from its condition (`trapnz v0` rather than `trapnz (uextend +;; v0)`). +function %folded_trap_condition_is_simplified(i8) -> i8 { +block0(v0: i8): + v1 = uextend.i32 v0 + brif v1, block1, block2 +block1: + trap user1 +block2: + return v0 +} + +; function %folded_trap_condition_is_simplified(i8) -> i8 fast { +; block0(v0: i8): +; trapnz v0, user1 +; jump block2 +; +; block2: +; return v0 +; } + diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index da477c19893b..bb2191bc31c9 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -1,4 +1,5 @@ test compile precise-output +set opt_level=speed target x86_64 function %f0(i32, i32) -> i32 { @@ -1037,28 +1038,28 @@ block1(v5: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0x1, %r11d -; movl $0x2, %edx -; movl $0x3, %ecx -; movl $0x4, %eax +; movl $0x4, %r11d +; movl $0x1, %edx +; movl $0x2, %ecx +; movl $0x3, %eax ; movl $0x4, %r9d ; movl %edi, %r10d ; cmpl %r9d, %r10d ; cmovbl %r10d, %r9d ; br_table %r9, %rdi, %rsi ; block1: +; movq %r11, %rax ; jmp label6 ; block2: -; movq %r11, %rax +; movq %rdx, %rax ; jmp label6 ; block3: -; movq %rdx, %rax +; movq %rcx, %rax ; jmp label6 ; block4: -; movq %rdx, %rax +; movq %rcx, %rax ; jmp label6 ; block5: -; movq %rcx, %rax ; jmp label6 ; block6: ; movq %rbp, %rsp @@ -1070,10 +1071,10 @@ block1(v5: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movl $1, %r11d -; movl $2, %edx -; movl $3, %ecx -; movl $4, %eax +; movl $4, %r11d +; movl $1, %edx +; movl $2, %ecx +; movl $3, %eax ; movl $4, %r9d ; movl %edi, %r10d ; cmpl %r9d, %r10d @@ -1082,30 +1083,24 @@ block1(v5: i32): ; movslq (%rdi, %r9, 4), %rsi ; addq %rsi, %rdi ; jmpq *%rdi -; sbbl %eax, (%rax) -; addb %al, (%rax) -; andl %eax, (%rax) +; sbbb $0, %al ; addb %al, (%rax) -; subl %eax, (%rax) +; andb $0, %al ; addb %al, (%rax) -; xorl %eax, (%rax) -; addb %al, (%rax) -; xorb $0, %al +; subb $0, %al ; addb %al, (%rax) ; block2: ; offset 0x4d -; jmp 0x6d -; block3: ; offset 0x52 ; movq %r11, %rax -; jmp 0x6d -; block4: ; offset 0x5a -; movq %rdx, %rax -; jmp 0x6d -; block5: ; offset 0x62 +; jmp 0x68 +; block3: ; offset 0x55 ; movq %rdx, %rax -; jmp 0x6d -; block6: ; offset 0x6a +; jmp 0x68 +; block4: ; offset 0x5d +; movq %rcx, %rax +; jmp 0x68 +; block5: ; offset 0x65 ; movq %rcx, %rax -; block7: ; offset 0x6d +; block6: ; offset 0x68 ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/legalizer/branch-to-unconditional-trap.clif b/cranelift/filetests/filetests/legalizer/branch-to-unconditional-trap.clif deleted file mode 100644 index 8d742c63b1ec..000000000000 --- a/cranelift/filetests/filetests/legalizer/branch-to-unconditional-trap.clif +++ /dev/null @@ -1,41 +0,0 @@ -test legalizer -target aarch64 - -function %trapnz(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - brif v0, block1, block2(v1) -block1: - trap user42 -block2(v2: i64): - return v2 -} - -; check: block0(v0: i64, v1: i64): -; nextln: trapnz v0, user42 -; nextln: jump block2(v1) - -function %trapz(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - brif v0, block1(v1), block2 -block1(v2: i64): - return v2 -block2: - trap user42 -} - -; check: block0(v0: i64, v1: i64): -; nextln: trapz v0, user42 -; nextln: jump block1(v1) - -function %could_be_either_doesnt_matter(i64) -> i64 { -block0(v0: i64): - brif v0, block1, block2 -block1: - trap user36 -block2: - trap user42 -} - -; check: block0(v0: i64): -; nextln: trapnz v0, user36 -; nextln: jump block2 diff --git a/cranelift/filetests/filetests/legalizer/bxor_imm.clif b/cranelift/filetests/filetests/legalizer/bxor_imm.clif deleted file mode 100644 index 4fc9a102f192..000000000000 --- a/cranelift/filetests/filetests/legalizer/bxor_imm.clif +++ /dev/null @@ -1,13 +0,0 @@ -test legalizer -target aarch64 -target x86_64 - -function %foo(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v3 = iconst.i64 0x100000000 - v2 = bxor v0, v3 - return v2 -} - -; check: v3 = iconst.i64 0x0001_0000_0000 -; check: v2 = bxor v0, v3 diff --git a/cranelift/filetests/filetests/legalizer/iconst-i64.clif b/cranelift/filetests/filetests/legalizer/iconst-i64.clif deleted file mode 100644 index 359ca772e95b..000000000000 --- a/cranelift/filetests/filetests/legalizer/iconst-i64.clif +++ /dev/null @@ -1,13 +0,0 @@ -test legalizer -target aarch64 -target i686 - -function %foo() -> i64 { -block0: - v1 = iconst.i64 0x6400000042 - return v1 -} - -; check: v2 = iconst.i32 66 -; check: v3 = iconst.i32 100 -; check: v1 = iconcat v2, v3 diff --git a/cranelift/filetests/filetests/legalizer/isplit-bb.clif b/cranelift/filetests/filetests/legalizer/isplit-bb.clif deleted file mode 100644 index c89118cebcf9..000000000000 --- a/cranelift/filetests/filetests/legalizer/isplit-bb.clif +++ /dev/null @@ -1,25 +0,0 @@ -test legalizer -target aarch64 -target x86_64 - -function u0:0(i128, i128, i64) -> i128 system_v { -block0(v0: i128, v1: i128, v2: i64): - jump block1 - -block1: - v17 = iadd v0, v1 - v20 = iadd v1, v17 - jump block79 - -block79: - v425 = iconst.i64 0 - v875 = iconst.i64 1 - v426 = icmp eq v425, v875 - brif v426, block80, block85(v20, v17) - -block80: - trap user1 - -block85(v462: i128, v874: i128): - trap user1 -} diff --git a/cranelift/filetests/src/lib.rs b/cranelift/filetests/src/lib.rs index 5443d31236d4..f12483098865 100644 --- a/cranelift/filetests/src/lib.rs +++ b/cranelift/filetests/src/lib.rs @@ -23,7 +23,6 @@ mod test_compile; mod test_domtree; mod test_inline; mod test_interpret; -mod test_legalizer; mod test_optimize; mod test_print_cfg; mod test_run; @@ -92,7 +91,6 @@ fn new_subtest(parsed: &TestCommand) -> anyhow::Result "domtree" => test_domtree::subtest(parsed), "inline" => test_inline::subtest(parsed), "interpret" => test_interpret::subtest(parsed), - "legalizer" => test_legalizer::subtest(parsed), "optimize" => test_optimize::subtest(parsed), "print-cfg" => test_print_cfg::subtest(parsed), "run" => test_run::subtest(parsed), diff --git a/cranelift/filetests/src/test_inline.rs b/cranelift/filetests/src/test_inline.rs index 68df7089cc32..183ab875cbb0 100644 --- a/cranelift/filetests/src/test_inline.rs +++ b/cranelift/filetests/src/test_inline.rs @@ -67,13 +67,8 @@ impl SubTest for TestInline { } fn run(&self, func: Cow, context: &Context) -> Result<()> { - // Legalize this function. let isa = context.isa.unwrap(); let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - comp_ctx - .legalize(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e)) - .context("error while legalizing")?; // Insert this function in our map for inlining into subsequent // functions. diff --git a/cranelift/filetests/src/test_legalizer.rs b/cranelift/filetests/src/test_legalizer.rs deleted file mode 100644 index 77b33b4c8784..000000000000 --- a/cranelift/filetests/src/test_legalizer.rs +++ /dev/null @@ -1,46 +0,0 @@ -//! Test command for checking the IR legalizer. -//! -//! The `test legalizer` test command runs each function through `legalize_function()` and sends -//! the result to filecheck. - -use crate::subtest::{Context, SubTest, run_filecheck}; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestLegalizer; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "legalizer"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {parsed}"); - } - Ok(Box::new(TestLegalizer)) -} - -impl SubTest for TestLegalizer { - fn name(&self) -> &'static str { - "legalizer" - } - - fn is_mutating(&self) -> bool { - true - } - - fn needs_isa(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - let isa = context.isa.expect("legalizer needs an ISA"); - - comp_ctx.compute_cfg(); - comp_ctx - .legalize(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; - - let text = comp_ctx.func.display().to_string(); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_safepoint.rs b/cranelift/filetests/src/test_safepoint.rs index 955147c65199..5432459370c5 100644 --- a/cranelift/filetests/src/test_safepoint.rs +++ b/cranelift/filetests/src/test_safepoint.rs @@ -21,11 +21,7 @@ impl SubTest for TestSafepoint { fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - let isa = context.isa.expect("register allocator needs an ISA"); comp_ctx.compute_cfg(); - comp_ctx - .legalize(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; comp_ctx.compute_domtree(); let text = comp_ctx.func.display().to_string(); diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index e22914e12509..793cd20664de 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -40,7 +40,7 @@ use wasmtime_environ::obj::{ELF_WASMTIME_EXCEPTIONS, ELF_WASMTIME_FRAMES}; use wasmtime_environ::{ Abi, AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, CompiledFunctionBody, DefinedFuncIndex, FlagValue, FrameInstPos, FrameStackShape, FrameStateSlotBuilder, - FrameTableBuilder, FuncKey, FunctionBodyData, FunctionLoc, GetPtrSize, HostCall, Inlining, + FrameTableBuilder, FuncKey, FunctionBodyData, FunctionLoc, GetPtrSize, HostCall, InliningCompiler, ModulePC, ModuleStartup, ModuleTranslation, ModuleTypesBuilder, PtrSize, StackMapSection, StaticModuleIndex, TrapEncodingBuilder, TrapSentinel, TripleExt, Tunables, WasmFuncType, WasmValType, prelude::*, @@ -561,14 +561,6 @@ impl wasmtime_environ::Compiler for Compiler { &mut func_env, )?; - if self.tunables.inlining != Inlining::No { - compiler - .cx - .codegen_context - .legalize(isa) - .map_err(|e| CompileError::Codegen(e.to_string()))?; - } - let needs_gc_heap = func_env.needs_gc_heap(); if let Some((_, slot_builder)) = func_env.state_slot { diff --git a/tests/disas/arith.wat b/tests/disas/arith.wat index 26db3991b31d..dce925c70231 100644 --- a/tests/disas/arith.wat +++ b/tests/disas/arith.wat @@ -27,8 +27,10 @@ ;; @0021 v3 = iconst.i32 4 ;; @0023 v4 = iconst.i32 4 ;; @0025 v5 = isub v3, v4 ; v3 = 4, v4 = 4 -;; @002c trapnz v5, user12 -;; @002a jump block4 +;; @002a brif v5, block2, block4 +;; +;; block2: +;; @002c trap user12 ;; ;; block4: ;; @002e v6 = iconst.i32 6 diff --git a/tests/disas/conditional-traps.wat b/tests/disas/conditional-traps.wat index b68120487116..6927686c7152 100644 --- a/tests/disas/conditional-traps.wat +++ b/tests/disas/conditional-traps.wat @@ -30,7 +30,7 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: i32): -;; @0031 trapnz v2, user12 +;; @002f trapnz v2, user12 ;; @002f jump block3 ;; ;; block3: @@ -51,7 +51,7 @@ ;; block0(v0: i64, v1: i64, v2: i32): ;; @0038 v3 = iconst.i32 0 ;; @0038 v4 = icmp eq v2, v3 ; v3 = 0 -;; @003b trapnz v4, user12 +;; @0039 trapnz v4, user12 ;; @0039 jump block3 ;; ;; block3: diff --git a/tests/disas/gc/drc/struct-new-default.wat b/tests/disas/gc/drc/struct-new-default.wat index 180e42b3f929..500ad58e943c 100644 --- a/tests/disas/gc/drc/struct-new-default.wat +++ b/tests/disas/gc/drc/struct-new-default.wat @@ -44,7 +44,7 @@ ;; @0021 v18 = iconst.i64 28 ;; @0021 v19 = iadd v15, v18 ; v18 = 28 ;; @0021 istore8 user2 little region4 v4, v19 ; v4 = 0 -;; jump block3 +;; @0021 jump block3 ;; ;; block3: ;; v60 = iconst.i32 0 diff --git a/tests/disas/if-reachability-translation-2.wat b/tests/disas/if-reachability-translation-2.wat index 6e7c3dce315c..d0537025edf0 100644 --- a/tests/disas/if-reachability-translation-2.wat +++ b/tests/disas/if-reachability-translation-2.wat @@ -22,8 +22,10 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: i32): -;; @001d trapnz v2, user12 -;; @001b jump block4 +;; @001b brif v2, block2, block4 +;; +;; block2: +;; @001d trap user12 ;; ;; block4: ;; @0020 jump block3 diff --git a/tests/disas/if-reachability-translation-3.wat b/tests/disas/if-reachability-translation-3.wat index c5da264d4a6c..0cf8f6ff4bc5 100644 --- a/tests/disas/if-reachability-translation-3.wat +++ b/tests/disas/if-reachability-translation-3.wat @@ -22,11 +22,13 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: i32): -;; @001f trapz v2, user12 -;; @001b jump block2 +;; @001b brif v2, block2, block4 ;; ;; block2: -;; jump block3 +;; @001e jump block3 +;; +;; block4: +;; @001f trap user12 ;; ;; block3: ;; @0021 v4 = iconst.i32 0 diff --git a/tests/disas/if-reachability-translation-4.wat b/tests/disas/if-reachability-translation-4.wat index defdc3dbddd6..4f3522c9aa82 100644 --- a/tests/disas/if-reachability-translation-4.wat +++ b/tests/disas/if-reachability-translation-4.wat @@ -22,8 +22,10 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: i32): -;; @001d trapnz v2, user12 -;; @001b jump block4 +;; @001b brif v2, block2, block4 +;; +;; block2: +;; @001d trap user12 ;; ;; block4: ;; @001f trap user12 diff --git a/tests/disas/if-reachability-translation-5.wat b/tests/disas/if-reachability-translation-5.wat index e74ca134bd1f..9bb409436f3e 100644 --- a/tests/disas/if-reachability-translation-5.wat +++ b/tests/disas/if-reachability-translation-5.wat @@ -24,12 +24,16 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: i32, v3: i32): -;; @0024 trapz v2, user12 -;; @001c jump block2 +;; @001c brif v2, block2, block5 ;; ;; block2: -;; @0022 trapz.i32 v3, user12 -;; @0020 jump block3 +;; @0020 brif.i32 v3, block3, block4 +;; +;; block4: +;; @0022 trap user12 +;; +;; block5: +;; @0024 trap user12 ;; ;; block3: ;; @0026 v5 = iconst.i32 0 diff --git a/tests/disas/if-reachability-translation-6.wat b/tests/disas/if-reachability-translation-6.wat index d43fe819d4c9..2b3ba1c7dec6 100644 --- a/tests/disas/if-reachability-translation-6.wat +++ b/tests/disas/if-reachability-translation-6.wat @@ -24,12 +24,16 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: i32, v3: i32): -;; @001e trapnz v2, user12 -;; @001c jump block4 +;; @001c brif v2, block2, block4 +;; +;; block2: +;; @001e trap user12 ;; ;; block4: -;; @0024 trapz.i32 v3, user12 -;; @0022 jump block3 +;; @0022 brif.i32 v3, block3, block5 +;; +;; block5: +;; @0024 trap user12 ;; ;; block3: ;; @0026 v5 = iconst.i32 0 diff --git a/tests/disas/if-unreachable-else-params-2.wat b/tests/disas/if-unreachable-else-params-2.wat index 4e17a81262fb..6191bfc478e0 100644 --- a/tests/disas/if-unreachable-else-params-2.wat +++ b/tests/disas/if-unreachable-else-params-2.wat @@ -30,8 +30,7 @@ ;; ;; block0(v0: i64, v1: i64, v2: i32, v3: i32): ;; @0049 v5 = f64const 0x1.0000000000000p0 -;; @005d trapz v3, user12 -;; @0056 jump block2 +;; @0056 brif v3, block2, block4 ;; ;; block2: ;; @0058 v7 = uextend.i64 v2 @@ -40,6 +39,9 @@ ;; @0058 v10 = sload16.i64 little region2 v9 ;; @005c jump block3 ;; +;; block4: +;; @005d trap user12 +;; ;; block3: ;; @005f jump block1 ;; diff --git a/tests/disas/stack-switching/symmetric-switch.wat b/tests/disas/stack-switching/symmetric-switch.wat index cf307d21cc6d..06d2b6e58523 100644 --- a/tests/disas/stack-switching/symmetric-switch.wat +++ b/tests/disas/stack-switching/symmetric-switch.wat @@ -76,8 +76,7 @@ ;; block2(v28: i64, v29: i64): ;; @003e v30 = iconst.i64 1 ;; @003e v31 = icmp eq v28, v30 ; v30 = 1 -;; @003e trapnz v31, user22 -;; @003e jump block3 +;; @003e brif v31, block7, block3 ;; ;; block3: ;; @003e v32 = load.i64 notrap aligned v29+48 @@ -104,6 +103,9 @@ ;; @003e v48 = iadd.i32 v39, v47 ; v47 = 1 ;; @003e brif v46, block6, block4(v48) ;; +;; block7 cold: +;; @003e trap user22 +;; ;; block6: ;; @003e store.i64 notrap aligned v29, v27+64 ;; @003e v49 = iconst.i64 120