diff --git a/RELEASES.md b/RELEASES.md index 5788136b5655..8076d21a93d7 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -4,6 +4,12 @@ Unreleased. ### Added +* Added opt-in support for the WebAssembly branch-hinting proposal: the + `metadata.code.branch_hint` custom section is parsed and used to mark cold + blocks during Cranelift compilation, behind `Config::wasm_branch_hinting` + (off by default). + [#13459](https://github.com/bytecodealliance/wasmtime/pull/13459) + ### Changed -------------------------------------------------------------------------------- diff --git a/crates/c-api/include/wasmtime/config.h b/crates/c-api/include/wasmtime/config.h index 1575fcaa40fc..e01b2dd5477e 100644 --- a/crates/c-api/include/wasmtime/config.h +++ b/crates/c-api/include/wasmtime/config.h @@ -305,6 +305,14 @@ WASMTIME_CONFIG_PROP(void, wasm_memory64, bool) */ WASMTIME_CONFIG_PROP(void, wasm_wide_arithmetic, bool) +/** + * \brief Configures whether the WebAssembly branch-hinting proposal is + * enabled. + * + * This setting is `false` by default. + */ +WASMTIME_CONFIG_PROP(void, wasm_branch_hinting, bool) + #ifdef WASMTIME_FEATURE_GC /** diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index abecac59a95e..1196dc49b526 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -500,6 +500,11 @@ pub extern "C" fn wasmtime_config_wasm_wide_arithmetic_set(c: &mut wasm_config_t c.config.wasm_wide_arithmetic(enable); } +#[unsafe(no_mangle)] +pub extern "C" fn wasmtime_config_wasm_branch_hinting_set(c: &mut wasm_config_t, enable: bool) { + c.config.wasm_branch_hinting(enable); +} + #[unsafe(no_mangle)] #[cfg(feature = "gc")] pub extern "C" fn wasmtime_config_wasm_exceptions_set(c: &mut wasm_config_t, enable: bool) { diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index be0c1f8837c6..a31492722271 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -451,6 +451,8 @@ wasmtime_option_group! { pub custom_page_sizes: Option, /// Configure support for the wide-arithmetic proposal. pub wide_arithmetic: Option, + /// Configure support for the branch-hinting proposal. + pub branch_hinting: Option, /// Configure support for the extended-const proposal. pub extended_const: Option, /// Configure support for the exceptions proposal. @@ -1202,6 +1204,10 @@ impl CommonOptions { if let Some(enable) = self.wasm.wide_arithmetic.or(all) { config.wasm_wide_arithmetic(enable); } + // Not included in `all_proposals`: off by default until fuzzed. + if let Some(enable) = self.wasm.branch_hinting { + config.wasm_branch_hinting(enable); + } if let Some(enable) = self.wasm.extended_const.or(all) { config.wasm_extended_const(enable); } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 5bc500550328..5cb1f76eca64 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -413,7 +413,7 @@ impl Compiler { context.func.signature = wasm_call_signature(&*self.isa, ty, &self.tunables); let (namespace, index) = key.into_raw_parts(); context.func.name = UserFuncName::User(UserExternalName { namespace, index }); - let mut func_env = FuncEnvironment::new(self, translation, types, ty, key); + let mut func_env = FuncEnvironment::new(self, translation, types, ty, key, None, 0); compiler .cx .func_translator @@ -486,7 +486,20 @@ impl wasmtime_environ::Compiler for Compiler { context.func.collect_debug_info(); } - let mut func_env = FuncEnvironment::new(self, translation, types, wasm_func_ty, key); + // Branch hints are keyed by function-body-relative offset, so the body's + // module-relative start is needed to convert source locations later. + let FunctionBodyData { validator, body } = input; + let func_body_offset = body.get_binary_reader().original_position(); + + let mut func_env = FuncEnvironment::new( + self, + translation, + types, + wasm_func_ty, + key, + Some(func_index), + func_body_offset, + ); // The `stack_limit` global value below is the implementation of stack // overflow checks in Wasmtime. @@ -554,7 +567,6 @@ impl wasmtime_environ::Compiler for Compiler { func_env.stack_limit_at_function_entry = Some(stack_limit); } } - let FunctionBodyData { validator, body } = input; let mut validator = validator.into_validator(mem::take(&mut compiler.cx.validator_allocations)); compiler.cx.func_translator.translate_body( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index c6f97a67a545..50a7452e9fb5 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -25,8 +25,11 @@ use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap}; use cranelift_frontend::Variable; use cranelift_frontend::{FuncInstBuilder, FunctionBuilder}; use smallvec::{SmallVec, smallvec}; +use std::iter::Peekable; use std::mem; -use wasmparser::{FuncValidator, Operator, WasmFeatures, WasmModuleResources}; +use wasmparser::{ + BranchHint, FuncValidator, Operator, SectionLimitedIntoIter, WasmFeatures, WasmModuleResources, +}; use wasmtime_core::math::f64_cvt_to_int_bounds; use wasmtime_environ::{ BuiltinFunctionIndex, ComponentPC, ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, @@ -235,6 +238,13 @@ pub struct FuncEnvironment<'module_environment> { /// to e.g. record the return-address of a callsite for debuginfo. pub(crate) next_srcloc: ir::SourceLoc, + /// Lazily-decoded branch hints for the current function, in ascending + /// `func_offset` order (as the proposal requires). `None` when the function + /// carries no hints. + branch_hints: Option>>, + /// Module-relative byte offset of the current function body's start. + func_body_offset: usize, + /// Cached alias regions for alias analysis. heap_alias_region: Option, table_alias_region: Option, @@ -252,10 +262,18 @@ impl<'module_environment> FuncEnvironment<'module_environment> { types: &'module_environment ModuleTypesBuilder, wasm_func_ty: &'module_environment WasmFuncType, key: FuncKey, + func_index: Option, + func_body_offset: usize, ) -> Self { let tunables = compiler.tunables(); let builtin_functions = BuiltinFunctions::new(compiler); + // Synthesized functions without a wasm body (e.g. module startup) pass + // `None`, yielding no hints. + let branch_hints = func_index + .and_then(|func_index| translation.branch_hints(func_index)) + .map(|reader| reader.into_iter().peekable()); + // This isn't used during translation, so squash the warning about this // being unused from the compiler. let _ = BuiltinFunctions::raise; @@ -305,12 +323,40 @@ impl<'module_environment> FuncEnvironment<'module_environment> { state_slot: None, next_srcloc: ir::SourceLoc::default(), wasm_module_offset: translation.wasm_module_offset, + + branch_hints, + func_body_offset, heap_alias_region: None, table_alias_region: None, vmctx_alias_region: None, } } + /// Consume the branch hint for the instruction at module-relative `offset` + /// (i.e. `builder.srcloc().bits()`), if any. The lazy decoder only moves + /// forward, making this O(n) over a function body. + pub(crate) fn take_branch_hint(&mut self, offset: usize) -> Option { + // Fast path: no hints (always so when the proposal is off), and this + // runs for every `if`/`br_if`. + let hints = self.branch_hints.as_mut()?; + let rel = u32::try_from(offset.checked_sub(self.func_body_offset)?).ok()?; + loop { + // Hint bytes were validated when the section was decoded, so an error + // here is unexpected; treat it like exhaustion (end of hints). + let hint = *hints.peek()?.as_ref().ok()?; + if hint.func_offset < rel { + hints.next(); + continue; + } + if hint.func_offset == rel { + hints.next(); + return Some(hint); + } + // The next hint is for a later offset; nothing for this branch. + return None; + } + } + pub(crate) fn pointer_type(&self) -> ir::Type { self.isa.pointer_type() } diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index 142eb76d7816..0869f9fc2a74 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -286,6 +286,9 @@ pub fn translate_operator( environ.translate_loop_header(builder)?; } Operator::If { blockty } => { + // Read the hint before `environ` is borrowed mutably below. + let branch_hint = environ.take_branch_hint(builder.srcloc().bits() as usize); + let val = environ.stacks.pop1(); let next_block = builder.create_block(); @@ -330,6 +333,24 @@ pub fn translate_operator( (destination, ElseData::WithElse { else_block }) }; + // Mark the unlikely successor cold per the branch hint. A likely + // condition makes the else block cold; when it is allocated lazily + // (`NoElse`) defer that to `Operator::Else` via `else_is_cold`. + let else_is_cold = match branch_hint { + Some(hint) if !hint.taken => { + builder.set_cold_block(next_block); + false + } + Some(_) => match &else_data { + ElseData::WithElse { else_block } => { + builder.set_cold_block(*else_block); + false + } + ElseData::NoElse { .. } => true, + }, + None => false, + }; + builder.seal_block(next_block); // Only predecessor is the current block. builder.switch_to_block(next_block); @@ -345,6 +366,7 @@ pub fn translate_operator( params.len(), results.len(), *blockty, + else_is_cold, ); } Operator::Else => { @@ -358,6 +380,7 @@ pub fn translate_operator( num_return_values, blocktype, destination, + else_is_cold, .. } => { // We finished the consequent, so record its final @@ -407,6 +430,12 @@ pub fn translate_operator( } }; + // Apply a likely-taken hint deferred from a lazily + // allocated else block (see `Operator::If`). + if else_is_cold { + builder.set_cold_block(else_block); + } + // You might be expecting that we push the parameters for this // `else` block here, something like this: // @@ -3339,6 +3368,7 @@ fn translate_unreachable_operator( 0, 0, blockty, + false, ); } Operator::Loop { blockty: _ } @@ -3984,9 +4014,22 @@ fn translate_br_if( builder: &mut FunctionBuilder, env: &mut FuncEnvironment<'_>, ) { + // Read the hint before `env` is borrowed mutably below. + let branch_hint = env.take_branch_hint(builder.srcloc().bits() as usize); + let val = env.stacks.pop1(); let (br_destination, inputs) = translate_br_if_args(relative_depth, env); let next_block = builder.create_block(); + + if let Some(hint) = branch_hint { + // Likely taken => the fallthrough is cold, else the branch target is. + builder.set_cold_block(if hint.taken { + next_block + } else { + br_destination + }); + } + canonicalise_brif(builder, val, br_destination, inputs, next_block, &[]); builder.seal_block(next_block); // The only predecessor is the current block. diff --git a/crates/cranelift/src/translate/stack.rs b/crates/cranelift/src/translate/stack.rs index f338fe3f0abd..95250df682f7 100644 --- a/crates/cranelift/src/translate/stack.rs +++ b/crates/cranelift/src/translate/stack.rs @@ -58,6 +58,9 @@ pub enum ControlStackFrame { original_stack_size: usize, exit_is_branched_to: bool, blocktype: wasmparser::BlockType, + /// Whether the `else` block, when allocated lazily, should be marked + /// cold (from a branch hint that the condition is likely true). + else_is_cold: bool, /// Was the head of the `if` reachable? head_is_reachable: bool, /// What was the reachability at the end of the consequent? @@ -507,6 +510,7 @@ impl FuncTranslationStacks { num_param_types: usize, num_result_types: usize, blocktype: wasmparser::BlockType, + else_is_cold: bool, ) { debug_assert!(num_param_types <= self.stack.len()); self.assert_debug_stack_is_synced(); @@ -541,6 +545,7 @@ impl FuncTranslationStacks { head_is_reachable: self.reachable, consequent_ends_reachable: None, blocktype, + else_is_cold, }); } diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index b5b02b9e6feb..542181e55fd6 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -116,6 +116,11 @@ pub struct ModuleTranslation<'data> { /// validation process. types: Option, + /// Per-function [`BranchHintReader`]s from the `metadata.code.branch_hint` + /// section, keyed by function index. Populated only when + /// [`Tunables::branch_hinting`] is enabled. + branch_hints: HashMap>, + /// The WebAssembly `start` function, if defined. pub start_func: Option, @@ -173,6 +178,11 @@ pub enum MemorySegmentOffset { Static(u64), } +/// Lazy decoder over the branch hints attached to a single function in the +/// `metadata.code.branch_hint` custom section +/// ([branch-hinting proposal](https://github.com/WebAssembly/branch-hinting)). +pub type BranchHintReader<'a> = wasmparser::SectionLimited<'a, wasmparser::BranchHint>; + impl<'data> ModuleTranslation<'data> { /// Create a new translation for the module with the given index. pub fn new(module_index: StaticModuleIndex) -> Self { @@ -191,6 +201,7 @@ impl<'data> ModuleTranslation<'data> { types: None, runtime_data_map: Default::default(), passive_elem_map: Default::default(), + branch_hints: HashMap::default(), start_func: None, global_initializers: Vec::new(), passive_elements: Default::default(), @@ -200,6 +211,11 @@ impl<'data> ModuleTranslation<'data> { } } + /// Returns the [`BranchHintReader`] for `func`, if the section attached any. + pub fn branch_hints(&self, func: FuncIndex) -> Option> { + self.branch_hints.get(&func).cloned() + } + /// Returns a reference to the type information of the current module. pub fn get_types(&self) -> &Types { self.types @@ -802,6 +818,26 @@ and for re-adding support for interface types you can see this issue: log::warn!("failed to parse name section {e:?}"); } } + KnownCustom::BranchHints(reader) if self.tunables.branch_hinting => { + // Branch hints are advisory and this section is never validated; + // it is decoded lazily during compilation, so record only the + // per-function sub-readers here. Discard the whole section if any + // entry is malformed rather than applying it partially. + let mut hints = HashMap::new(); + let result: wasmparser::Result<()> = reader.into_iter().try_for_each(|func| { + let func = func?; + // A well-formed section lists each function at most once; keep + // the first entry deterministically if it repeats. + hints + .entry(FuncIndex::from_u32(func.func)) + .or_insert(func.hints); + Ok(()) + }); + match result { + Ok(()) => self.result.branch_hints = hints, + Err(e) => log::warn!("failed to parse branch-hint section {e:?}"), + } + } _ => { let name = section.name().trim_end_matches(".dwo"); if name.starts_with(".debug_") { diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index 113eb9e9abf6..76123c17d1ec 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -188,6 +188,10 @@ define_tunables! { /// Boolean to track whether compiled code retains metadata necessary to /// report extra information on gc heap corruption being detected. pub metadata_for_gc_heap_corruption: bool, + + /// Whether `metadata.code.branch_hint` sections are parsed and used to + /// mark cold blocks during compilation. + pub branch_hinting: bool, } pub struct ConfigTunables { @@ -273,6 +277,7 @@ impl Tunables { gc_heap_may_move: true, metadata_for_internal_asserts: false, metadata_for_gc_heap_corruption: true, + branch_hinting: false, } } diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index ce43cffa3bef..e61fac35b9cb 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -138,6 +138,7 @@ impl Config { tail_call, extended_const, wide_arithmetic, + branch_hinting, component_model_async, component_model_more_async_builtins, component_model_async_stackful, @@ -177,6 +178,7 @@ impl Config { component_model_fixed_length_lists.unwrap_or(false); self.module_config.component_model_implements = component_model_implements.unwrap_or(false); self.module_config.stack_switching = stack_switching.unwrap_or(false); + self.wasmtime.branch_hinting = branch_hinting.unwrap_or(false); // Enable/disable proposals that wasm-smith has knobs for which will be // read when creating `wasmtime::Config`. @@ -346,6 +348,7 @@ impl Config { cfg.wasm.shared_everything_threads = Some(self.module_config.config.shared_everything_threads_enabled); cfg.wasm.wide_arithmetic = Some(self.module_config.config.wide_arithmetic_enabled); + cfg.wasm.branch_hinting = Some(self.wasmtime.branch_hinting); cfg.wasm.exceptions = Some(self.module_config.config.exceptions_enabled); cfg.wasm.stack_switching = Some(self.module_config.stack_switching); cfg.wasm.shared_memory = Some(self.module_config.shared_memory); @@ -608,6 +611,10 @@ pub struct WasmtimeConfig { table_lazy_init: bool, metadata_for_internal_asserts: bool, metadata_for_gc_heap_corruption: bool, + /// Whether the branch-hinting proposal is enabled. wasm-smith does not emit + /// `metadata.code.branch_hint` sections, so for generated modules this only + /// toggles the (otherwise no-op) parsing path. + branch_hinting: bool, /// Configuration for whether wasm is invoked in an async fashion and how /// it's cooperatively time-sliced. diff --git a/crates/test-util/src/wasmtime_wast.rs b/crates/test-util/src/wasmtime_wast.rs index cf7a3bca6f9c..aa0bc5aa77e7 100644 --- a/crates/test-util/src/wasmtime_wast.rs +++ b/crates/test-util/src/wasmtime_wast.rs @@ -39,6 +39,7 @@ pub fn apply_test_config(config: &mut Config, test_config: &wast::TestConfig) { tail_call, extended_const, wide_arithmetic, + branch_hinting, component_model_async, component_model_more_async_builtins, component_model_async_stackful, @@ -72,6 +73,7 @@ pub fn apply_test_config(config: &mut Config, test_config: &wast::TestConfig) { let tail_call = tail_call.unwrap_or(false); let extended_const = extended_const.unwrap_or(false); let wide_arithmetic = wide_arithmetic.unwrap_or(false); + let branch_hinting = branch_hinting.unwrap_or(false); let component_model_async = component_model_async.unwrap_or(false); let component_model_more_async_builtins = component_model_more_async_builtins.unwrap_or(false); let component_model_async_stackful = component_model_async_stackful.unwrap_or(false); @@ -115,6 +117,7 @@ pub fn apply_test_config(config: &mut Config, test_config: &wast::TestConfig) { .wasm_custom_page_sizes(custom_page_sizes) .wasm_extended_const(extended_const) .wasm_wide_arithmetic(wide_arithmetic) + .wasm_branch_hinting(branch_hinting) .wasm_component_model_async(component_model_async) .wasm_component_model_more_async_builtins(component_model_more_async_builtins) .wasm_component_model_async_stackful(component_model_async_stackful) diff --git a/crates/test-util/src/wast.rs b/crates/test-util/src/wast.rs index 03c340ea2955..bfe79aff88d0 100644 --- a/crates/test-util/src/wast.rs +++ b/crates/test-util/src/wast.rs @@ -273,6 +273,7 @@ macro_rules! foreach_config_option { tail_call extended_const wide_arithmetic + branch_hinting hogs_memory nan_canonicalization component_model_async diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 7051fab45046..685a6ae8d457 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -893,6 +893,20 @@ impl Config { self } + /// Configures whether the WebAssembly [branch-hinting] proposal is enabled. + /// + /// When enabled, the `metadata.code.branch_hint` custom section is parsed + /// and used to lay out cold code paths during compilation. The hints are + /// advisory and never affect execution semantics. + /// + /// This is `false` by default until the proposal has been fuzzed. + /// + /// [branch-hinting]: https://github.com/WebAssembly/branch-hinting + pub fn wasm_branch_hinting(&mut self, enable: bool) -> &mut Self { + self.tunables.branch_hinting = Some(enable); + self + } + /// Configures whether the WebAssembly custom-page-sizes proposal will be /// enabled for compilation or not. /// diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs index b4a216f31f21..24001e15181e 100644 --- a/crates/wasmtime/src/engine/serialization.rs +++ b/crates/wasmtime/src/engine/serialization.rs @@ -342,6 +342,10 @@ impl Metadata<'_> { // way. metadata_for_internal_asserts: _, metadata_for_gc_heap_corruption: _, + + // Only affects cold-block layout; a compiled artifact loads into an + // engine configured either way. + branch_hinting: _, } = self.tunables; Self::check_collector(collector, other.collector)?; diff --git a/docs/stability-wasm-proposals.md b/docs/stability-wasm-proposals.md index fbffed182f04..4ce9539c260e 100644 --- a/docs/stability-wasm-proposals.md +++ b/docs/stability-wasm-proposals.md @@ -74,17 +74,19 @@ The emoji legend is: | Proposal | Phase 4 | Tests | Finished | Fuzzed | API | C API | |-----------------------------|---------|-------|----------|--------|-----|-------| +| [`branch-hinting`] [^12] | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | | [`stack-switching`] [^11] | ❌ | 🚧 | 🚧 | ❌ | ❌ | ❌ | [^11]: The stack-switching proposal is a work-in-progress being tracked at [#9465](https://github.com/bytecodealliance/wasmtime/issues/9465). Currently the implementation is only for x86\_64 Linux. +[^12]: Disabled by default (`Config::wasm_branch_hinting`) pending fuzzing; + tracked at [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463). ## Unimplemented proposals | Proposal | Tracking Issue | |-------------------------------|----------------| -| [`branch-hinting`] | [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463) | | [`flexible-vectors`] | [#9464](https://github.com/bytecodealliance/wasmtime/issues/9464) | | [`memory-control`] | [#9467](https://github.com/bytecodealliance/wasmtime/issues/9467) | | [`shared-everything-threads`] | [#9466](https://github.com/bytecodealliance/wasmtime/issues/9466) | diff --git a/tests/disas/branch-hinting-aarch64.wat b/tests/disas/branch-hinting-aarch64.wat new file mode 100644 index 000000000000..2cc2cf5a283e --- /dev/null +++ b/tests/disas/branch-hinting-aarch64.wat @@ -0,0 +1,51 @@ +;;! target = "aarch64" +;;! test = "compile" +;;! flags = "-Wbranch-hinting" + +;; aarch64 honors branch hints too: the cold path is laid out after the hot +;; path in the generated machine code (the branch-hinting optimization lives in +;; target-independent Cranelift code, so it applies on every backend). + +(module + ;; condition likely false: the then-block is cold. + (func $if_unlikely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 10 + else + i32.const 20 + end + ) + + ;; condition likely true: the fallthrough is cold. + (func $br_if_likely (param i32) (result i32) + (block $b (result i32) + i32.const 10 + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $b + drop + i32.const 20 + ) + ) +) +;; wasm[0]::function[0]::if_unlikely: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; cbnz w4, #0x18 +;; c: mov w2, #0x14 +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 18: mov w2, #0xa +;; 1c: b #0x10 +;; +;; wasm[0]::function[1]::br_if_likely: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov w2, #0xa +;; cbz w4, #0x38 +;; 30: ldp x29, x30, [sp], #0x10 +;; ret +;; 38: mov w2, #0x14 +;; 3c: b #0x30 diff --git a/tests/disas/branch-hinting-disabled.wat b/tests/disas/branch-hinting-disabled.wat new file mode 100644 index 000000000000..07d6a1b78f1f --- /dev/null +++ b/tests/disas/branch-hinting-disabled.wat @@ -0,0 +1,73 @@ +;;! target = "x86_64" +;;! test = "optimize" +;;! flags = "-Wbranch-hinting=n" + +;; With branch hinting disabled, codegen ignores the `metadata.code.branch_hint` +;; custom section entirely: no block is marked `cold`. + +(module + (func $if_unlikely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 1 + else + i32.const 2 + end + ) + + (func $br_if_likely (param i32) (result i32) + (block $target + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $target + i32.const 1 + return + ) + i32.const 2 + ) +) +;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0043 brif v2, block2, block4 +;; +;; block2: +;; @0045 v5 = iconst.i32 1 +;; @0047 jump block3(v5) ; v5 = 1 +;; +;; block4: +;; @0048 v6 = iconst.i32 2 +;; @004a jump block3(v6) ; v6 = 2 +;; +;; block3(v4: i32): +;; @004b jump block1(v4) +;; +;; block1(v3: i32): +;; @004b return v3 +;; } +;; +;; function u0:1(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0052 brif v2, block2, block3 +;; +;; block3: +;; @0054 v4 = iconst.i32 1 +;; @0056 return v4 ; v4 = 1 +;; +;; block2: +;; @005a jump block1 +;; +;; block1: +;; @0058 v5 = iconst.i32 2 +;; @005a return v5 ; v5 = 2 +;; } diff --git a/tests/disas/branch-hinting-pulley.wat b/tests/disas/branch-hinting-pulley.wat new file mode 100644 index 000000000000..99e71da730b7 --- /dev/null +++ b/tests/disas/branch-hinting-pulley.wat @@ -0,0 +1,49 @@ +;;! target = "pulley64" +;;! test = "compile" +;;! flags = "-Wbranch-hinting" + +;; Pulley honors branch hints too: the cold path is laid out after the hot path +;; in the generated bytecode (the branch-hinting optimization lives in +;; target-independent Cranelift code, so it applies on every backend). + +(module + ;; condition likely false: the then-block is cold. + (func $if_unlikely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 10 + else + i32.const 20 + end + ) + + ;; condition likely true: the fallthrough is cold. + (func $br_if_likely (param i32) (result i32) + (block $b (result i32) + i32.const 10 + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $b + drop + i32.const 20 + ) + ) +) +;; wasm[0]::function[0]::if_unlikely: +;; push_frame +;; br_if32 x2, 0xb // target = 0xc +;; 7: xconst8 x0, 20 +;; pop_frame +;; ret +;; c: xconst8 x0, 10 +;; f: jump -0x5 // target = 0xa +;; +;; wasm[0]::function[1]::br_if_likely: +;; push_frame +;; xconst8 x0, 10 +;; br_if_not32 x2, 0x8 // target = 0x20 +;; 1e: pop_frame +;; ret +;; 20: xconst8 x0, 20 +;; 23: jump -0x5 // target = 0x1e diff --git a/tests/disas/branch-hinting.wat b/tests/disas/branch-hinting.wat new file mode 100644 index 000000000000..17a43905a705 --- /dev/null +++ b/tests/disas/branch-hinting.wat @@ -0,0 +1,191 @@ +;;! target = "x86_64" +;;! test = "optimize" +;;! flags = "-Wbranch-hinting" + +;; Branch hints from the `metadata.code.branch_hint` custom section mark blocks +;; cold during compilation: `\00` = condition likely false, `\01` = likely true. +;; +;; The annotation must immediately precede the `br_if`/`if` keyword: before a +;; folded instruction the encoder attaches the hint to the first nested +;; instruction (the condition) rather than the branch. + +(module + (global $g (mut i32) (i32.const 0)) + + ;; condition likely false: branch target is cold. + (func $br_if_unlikely (param i32) (result i32) + (block $target + local.get 0 + (@metadata.code.branch_hint "\00") + br_if $target + i32.const 1 + return + ) + i32.const 2 + ) + + ;; condition likely true: fallthrough is cold. + (func $br_if_likely (param i32) (result i32) + (block $target + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $target + i32.const 1 + return + ) + i32.const 2 + ) + + ;; condition likely false: then-block is cold. + (func $if_unlikely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 1 + else + i32.const 2 + end + ) + + ;; condition likely true: else-block is cold. + (func $if_likely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\01") + if (result i32) + i32.const 1 + else + i32.const 2 + end + ) + + ;; condition likely true with a lazily allocated else (void if/else): the + ;; else-block is still cold (the hint is deferred to the `else`). + (func $if_void_likely (param i32) + local.get 0 + (@metadata.code.branch_hint "\01") + if + i32.const 1 + global.set $g + else + i32.const 2 + global.set $g + end + ) +) +;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0063 brif v2, block2, block3 +;; +;; block3: +;; @0065 v4 = iconst.i32 1 +;; @0067 return v4 ; v4 = 1 +;; +;; block2 cold: +;; @006b jump block1 +;; +;; block1: +;; @0069 v5 = iconst.i32 2 +;; @006b return v5 ; v5 = 2 +;; } +;; +;; function u0:1(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0072 brif v2, block2, block3 +;; +;; block3 cold: +;; @0074 v4 = iconst.i32 1 +;; @0076 return v4 ; v4 = 1 +;; +;; block2: +;; @007a jump block1 +;; +;; block1: +;; @0078 v5 = iconst.i32 2 +;; @007a return v5 ; v5 = 2 +;; } +;; +;; function u0:2(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @007f brif v2, block2, block4 +;; +;; block2 cold: +;; @0081 v5 = iconst.i32 1 +;; @0083 jump block3(v5) ; v5 = 1 +;; +;; block4: +;; @0084 v6 = iconst.i32 2 +;; @0086 jump block3(v6) ; v6 = 2 +;; +;; block3(v4: i32): +;; @0087 jump block1(v4) +;; +;; block1(v3: i32): +;; @0087 return v3 +;; } +;; +;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @008c brif v2, block2, block4 +;; +;; block2: +;; @008e v5 = iconst.i32 1 +;; @0090 jump block3(v5) ; v5 = 1 +;; +;; block4 cold: +;; @0091 v6 = iconst.i32 2 +;; @0093 jump block3(v6) ; v6 = 2 +;; +;; block3(v4: i32): +;; @0094 jump block1(v4) +;; +;; block1(v3: i32): +;; @0094 return v3 +;; } +;; +;; function u0:4(i64 vmctx, i64, i32) tail { +;; region0 = 1 "table" +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; gv3 = vmctx +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0099 brif v2, block2, block4 +;; +;; block2: +;; @009b v3 = iconst.i32 1 +;; @009d store notrap aligned region0 v3, v0+48 ; v3 = 1 +;; @009f jump block3 +;; +;; block4 cold: +;; @00a0 v5 = iconst.i32 2 +;; @00a2 store notrap aligned region0 v5, v0+48 ; v5 = 2 +;; @00a4 jump block3 +;; +;; block3: +;; @00a5 jump block1 +;; +;; block1: +;; @00a5 return +;; } diff --git a/tests/misc_testsuite/branch-hinting-invalid.wast b/tests/misc_testsuite/branch-hinting-invalid.wast new file mode 100644 index 000000000000..d8a0a9ef95a7 --- /dev/null +++ b/tests/misc_testsuite/branch-hinting-invalid.wast @@ -0,0 +1,22 @@ +;;! branch_hinting = true + +;; A malformed `metadata.code.branch_hint` section must be ignored, not rejected: +;; the section is advisory and unvalidated, so a decode error discards it and +;; compilation still succeeds. Here the hint's reserved length byte is `\02` +;; instead of the required `\01`, so decoding the section fails. +;; +;; Section bytes: \01 = one function; \00 = func 0; \01 = one hint; +;; \00 = func_offset 0; \02 = (invalid) hint length byte. + +(module + (@custom "metadata.code.branch_hint" (after code) "\01\00\01\00\02") + (func (export "f") (param i32) (result i32) + local.get 0 + if (result i32) + i32.const 10 + else + i32.const 20 + end)) + +(assert_return (invoke "f" (i32.const 0)) (i32.const 20)) +(assert_return (invoke "f" (i32.const 1)) (i32.const 10)) diff --git a/tests/misc_testsuite/branch-hinting.wast b/tests/misc_testsuite/branch-hinting.wast new file mode 100644 index 000000000000..054d1eba9077 --- /dev/null +++ b/tests/misc_testsuite/branch-hinting.wast @@ -0,0 +1,36 @@ +;;! branch_hinting = true + +;; Branch hints are advisory: with the proposal enabled a hinted module must +;; still produce the same results it would without hints. Both functions return +;; 10 when their argument is nonzero and 20 otherwise. The +;; `(@metadata.code.branch_hint ...)` annotation must immediately precede the +;; `if`/`br_if` it applies to. + +(module + (func (export "via_if") (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 10 + else + i32.const 20 + end) + + (func (export "via_br_if") (param i32) (result i32) + (block $b (result i32) + i32.const 10 + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $b + drop + i32.const 20))) + +(assert_return (invoke "via_if" (i32.const 0)) (i32.const 20)) +(assert_return (invoke "via_if" (i32.const 1)) (i32.const 10)) +(assert_return (invoke "via_if" (i32.const 7)) (i32.const 10)) +(assert_return (invoke "via_if" (i32.const -3)) (i32.const 10)) + +(assert_return (invoke "via_br_if" (i32.const 0)) (i32.const 20)) +(assert_return (invoke "via_br_if" (i32.const 1)) (i32.const 10)) +(assert_return (invoke "via_br_if" (i32.const 7)) (i32.const 10)) +(assert_return (invoke "via_br_if" (i32.const -3)) (i32.const 10))