From cf77594149ff5f2d8fc25461f329156195efd79e Mon Sep 17 00:00:00 2001 From: FUJI Goro Date: Fri, 22 May 2026 23:17:44 +0900 Subject: [PATCH 1/6] Implement branch-hinting proposal support (Phase 1) Parse the `metadata.code.branch_hint` custom section and use it to mark cold blocks during Cranelift compilation. Hints are applied to `br_if` and `if` (including the lazily-allocated `else` case) and consumed in program-counter order via a forward cursor (O(n), no per-offset map). Gated behind a new `Config::wasm_branch_hinting` knob (stored in `Tunables`, default off until the proposal is fuzzed) with matching CLI `-Wbranch-hinting` and C API. Hints are advisory and never affect execution semantics, so the setting is excluded from artifact compatibility checks. Tests: disas snapshots for x86_64 and aarch64, plus an API test asserting semantic neutrality across enabled/disabled/default configs. Follow-ups: malformed-section validation + spec-test ungating, wasm-smith emission + fuzzing, then enabling by default. Refs #9463 Assisted-by: Claude Code:claude-opus-4-7 --- RELEASES.md | 6 + crates/c-api/include/wasmtime/config.h | 8 + crates/c-api/src/config.rs | 5 + crates/cli-flags/src/lib.rs | 6 + crates/cranelift/src/compiler.rs | 12 ++ crates/cranelift/src/func_environ.rs | 50 ++++- .../src/translate/code_translator.rs | 39 ++++ crates/cranelift/src/translate/stack.rs | 5 + crates/environ/src/compile/module_environ.rs | 38 ++++ crates/environ/src/tunables.rs | 5 + crates/wasmtime/src/config.rs | 14 ++ crates/wasmtime/src/engine/serialization.rs | 4 + docs/stability-wasm-proposals.md | 7 +- tests/all/branch_hinting.rs | 71 +++++++ tests/all/main.rs | 1 + tests/disas/branch-hinting-aarch64.wat | 51 +++++ tests/disas/branch-hinting-disabled.wat | 73 +++++++ tests/disas/branch-hinting.wat | 190 ++++++++++++++++++ 18 files changed, 583 insertions(+), 2 deletions(-) create mode 100644 tests/all/branch_hinting.rs create mode 100644 tests/disas/branch-hinting-aarch64.wat create mode 100644 tests/disas/branch-hinting-disabled.wat create mode 100644 tests/disas/branch-hinting.wat diff --git a/RELEASES.md b/RELEASES.md index 5788136b5655..8076d21a93d7 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -4,6 +4,12 @@ Unreleased. ### Added +* Added opt-in support for the WebAssembly branch-hinting proposal: the + `metadata.code.branch_hint` custom section is parsed and used to mark cold + blocks during Cranelift compilation, behind `Config::wasm_branch_hinting` + (off by default). + [#13459](https://github.com/bytecodealliance/wasmtime/pull/13459) + ### Changed -------------------------------------------------------------------------------- diff --git a/crates/c-api/include/wasmtime/config.h b/crates/c-api/include/wasmtime/config.h index 1575fcaa40fc..e01b2dd5477e 100644 --- a/crates/c-api/include/wasmtime/config.h +++ b/crates/c-api/include/wasmtime/config.h @@ -305,6 +305,14 @@ WASMTIME_CONFIG_PROP(void, wasm_memory64, bool) */ WASMTIME_CONFIG_PROP(void, wasm_wide_arithmetic, bool) +/** + * \brief Configures whether the WebAssembly branch-hinting proposal is + * enabled. + * + * This setting is `false` by default. + */ +WASMTIME_CONFIG_PROP(void, wasm_branch_hinting, bool) + #ifdef WASMTIME_FEATURE_GC /** diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index abecac59a95e..1196dc49b526 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -500,6 +500,11 @@ pub extern "C" fn wasmtime_config_wasm_wide_arithmetic_set(c: &mut wasm_config_t c.config.wasm_wide_arithmetic(enable); } +#[unsafe(no_mangle)] +pub extern "C" fn wasmtime_config_wasm_branch_hinting_set(c: &mut wasm_config_t, enable: bool) { + c.config.wasm_branch_hinting(enable); +} + #[unsafe(no_mangle)] #[cfg(feature = "gc")] pub extern "C" fn wasmtime_config_wasm_exceptions_set(c: &mut wasm_config_t, enable: bool) { diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 5a81ca87fc46..6720cb55153f 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -451,6 +451,8 @@ wasmtime_option_group! { pub custom_page_sizes: Option, /// Configure support for the wide-arithmetic proposal. pub wide_arithmetic: Option, + /// Configure support for the branch-hinting proposal. + pub branch_hinting: Option, /// Configure support for the extended-const proposal. pub extended_const: Option, /// Configure support for the exceptions proposal. @@ -1200,6 +1202,10 @@ impl CommonOptions { if let Some(enable) = self.wasm.wide_arithmetic.or(all) { config.wasm_wide_arithmetic(enable); } + // Not included in `all_proposals`: off by default until fuzzed. + if let Some(enable) = self.wasm.branch_hinting { + config.wasm_branch_hinting(enable); + } if let Some(enable) = self.wasm.extended_const.or(all) { config.wasm_extended_const(enable); } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 5ab4ed2bf564..10e7c6ddcd10 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -319,6 +319,18 @@ impl wasmtime_environ::Compiler for Compiler { } } let FunctionBodyData { validator, body } = input; + + // Branch hints are keyed by function-body-relative offset, so record the + // body's module-relative start to convert source locations later. + func_env.set_branch_hints( + translation + .branch_hints + .get(&func_index) + .map(|hints| &hints[..]) + .unwrap_or(&[]), + body.get_binary_reader().original_position(), + ); + let mut validator = validator.into_validator(mem::take(&mut compiler.cx.validator_allocations)); compiler.cx.func_translator.translate_body( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index c4495b09c1b8..ba2a6641fa71 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -29,7 +29,7 @@ use std::mem; use wasmparser::{FuncValidator, Operator, WasmFeatures, WasmModuleResources}; use wasmtime_core::math::f64_cvt_to_int_bounds; use wasmtime_environ::{ - BuiltinFunctionIndex, ComponentPC, DataIndex, DefinedFuncIndex, ElemIndex, + BranchHint, BuiltinFunctionIndex, ComponentPC, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex, FrameStateSlotBuilder, FrameValType, FuncIndex, FuncKey, GlobalConstValue, GlobalIndex, IndexType, Memory, MemoryIndex, MemoryTunables, Module, ModuleInternedTypeIndex, ModuleTranslation, ModuleTypesBuilder, PtrSize, Table, TableIndex, @@ -232,6 +232,14 @@ pub struct FuncEnvironment<'module_environment> { /// nonlinear control flow). This is useful in cases where we need /// to e.g. record the return-address of a callsite for debuginfo. pub(crate) next_srcloc: ir::SourceLoc, + + /// Branch hints for the current function, consumed in program-counter order + /// by `take_branch_hint`. + branch_hints: &'module_environment [BranchHint], + /// Forward cursor into `branch_hints`. + branch_hint_cursor: usize, + /// Module-relative byte offset of the current function body's start. + func_body_offset: usize, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -294,7 +302,47 @@ impl<'module_environment> FuncEnvironment<'module_environment> { state_slot: None, next_srcloc: ir::SourceLoc::default(), wasm_module_offset: translation.wasm_module_offset, + + branch_hints: &[], + branch_hint_cursor: 0, + func_body_offset: 0, + } + } + + /// Set the branch hints and the module-relative start offset for the + /// function about to be translated. Hints are expected in ascending + /// `func_offset` order (as the proposal requires); `take_branch_hint` + /// simply skips any that are out of order. + pub(crate) fn set_branch_hints( + &mut self, + hints: &'module_environment [BranchHint], + func_body_offset: usize, + ) { + self.branch_hints = hints; + self.branch_hint_cursor = 0; + self.func_body_offset = func_body_offset; + } + + /// Consume the branch hint for the instruction at module-relative `offset` + /// (i.e. `builder.srcloc().bits()`), if any; `Some(true)` means likely + /// taken. The cursor only advances, making this O(n) over a function body. + pub(crate) fn take_branch_hint(&mut self, offset: usize) -> Option { + if self.branch_hints.is_empty() { + return None; + } + let rel = u32::try_from(offset.checked_sub(self.func_body_offset)?).ok()?; + // Skip hints that don't line up with this (or a later) branch. + while matches!( + self.branch_hints.get(self.branch_hint_cursor), + Some(h) if h.func_offset < rel, + ) { + self.branch_hint_cursor += 1; } + let hint = self.branch_hints.get(self.branch_hint_cursor)?; + (hint.func_offset == rel).then(|| { + self.branch_hint_cursor += 1; + hint.taken + }) } pub(crate) fn pointer_type(&self) -> ir::Type { diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index a4e420fc59f1..850a5fddbc76 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -286,6 +286,9 @@ pub fn translate_operator( environ.translate_loop_header(builder)?; } Operator::If { blockty } => { + // Read the hint before `environ` is borrowed mutably below. + let branch_hint = environ.take_branch_hint(builder.srcloc().bits() as usize); + let val = environ.stacks.pop1(); let next_block = builder.create_block(); @@ -330,6 +333,24 @@ pub fn translate_operator( (destination, ElseData::WithElse { else_block }) }; + // Mark the unlikely successor cold per the branch hint. A likely + // condition makes the else block cold; when it is allocated lazily + // (`NoElse`) defer that to `Operator::Else` via `cold_else`. + let cold_else = match branch_hint { + Some(false) => { + builder.set_cold_block(next_block); + false + } + Some(true) => match &else_data { + ElseData::WithElse { else_block } => { + builder.set_cold_block(*else_block); + false + } + ElseData::NoElse { .. } => true, + }, + None => false, + }; + builder.seal_block(next_block); // Only predecessor is the current block. builder.switch_to_block(next_block); @@ -345,6 +366,7 @@ pub fn translate_operator( params.len(), results.len(), *blockty, + cold_else, ); } Operator::Else => { @@ -358,6 +380,7 @@ pub fn translate_operator( num_return_values, blocktype, destination, + else_is_cold, .. } => { // We finished the consequent, so record its final @@ -407,6 +430,12 @@ pub fn translate_operator( } }; + // Apply a likely-taken hint deferred from a lazily + // allocated else block (see `Operator::If`). + if else_is_cold { + builder.set_cold_block(else_block); + } + // You might be expecting that we push the parameters for this // `else` block here, something like this: // @@ -3339,6 +3368,7 @@ fn translate_unreachable_operator( 0, 0, blockty, + false, ); } Operator::Loop { blockty: _ } @@ -3981,9 +4011,18 @@ fn translate_br_if( builder: &mut FunctionBuilder, env: &mut FuncEnvironment<'_>, ) { + // Read the hint before `env` is borrowed mutably below. + let branch_hint = env.take_branch_hint(builder.srcloc().bits() as usize); + let val = env.stacks.pop1(); let (br_destination, inputs) = translate_br_if_args(relative_depth, env); let next_block = builder.create_block(); + + if let Some(taken) = branch_hint { + // Likely taken => the fallthrough is cold, else the branch target is. + builder.set_cold_block(if taken { next_block } else { br_destination }); + } + canonicalise_brif(builder, val, br_destination, inputs, next_block, &[]); builder.seal_block(next_block); // The only predecessor is the current block. diff --git a/crates/cranelift/src/translate/stack.rs b/crates/cranelift/src/translate/stack.rs index f338fe3f0abd..95250df682f7 100644 --- a/crates/cranelift/src/translate/stack.rs +++ b/crates/cranelift/src/translate/stack.rs @@ -58,6 +58,9 @@ pub enum ControlStackFrame { original_stack_size: usize, exit_is_branched_to: bool, blocktype: wasmparser::BlockType, + /// Whether the `else` block, when allocated lazily, should be marked + /// cold (from a branch hint that the condition is likely true). + else_is_cold: bool, /// Was the head of the `if` reachable? head_is_reachable: bool, /// What was the reachability at the end of the consequent? @@ -507,6 +510,7 @@ impl FuncTranslationStacks { num_param_types: usize, num_result_types: usize, blocktype: wasmparser::BlockType, + else_is_cold: bool, ) { debug_assert!(num_param_types <= self.stack.len()); self.assert_debug_stack_is_synced(); @@ -541,6 +545,7 @@ impl FuncTranslationStacks { head_is_reachable: self.reachable, consequent_ends_reachable: None, blocktype, + else_is_cold, }); } diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index d06ece9d94a3..784a32a4848c 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -122,6 +122,21 @@ pub struct ModuleTranslation<'data> { /// The type information of the current module made available at the end of the /// validation process. types: Option, + + /// Branch hints parsed from the `metadata.code.branch_hint` custom section, + /// keyed by module-level function index. Only populated when + /// [`Tunables::branch_hinting`] is enabled. + pub branch_hints: HashMap>, +} + +/// A single branch hint from the `metadata.code.branch_hint` custom section +/// ([branch-hinting proposal](https://github.com/WebAssembly/branch-hinting)). +#[derive(Debug, Copy, Clone)] +pub struct BranchHint { + /// Byte offset of the hinted `br_if`/`if` from the start of the function body. + pub func_offset: u32, + /// Whether the branch's condition is hinted to be true. + pub taken: bool, } impl<'data> ModuleTranslation<'data> { @@ -145,6 +160,7 @@ impl<'data> ModuleTranslation<'data> { types: None, passive_data_map: Default::default(), passive_elem_map: Default::default(), + branch_hints: HashMap::default(), } } @@ -757,6 +773,28 @@ and for re-adding support for interface types you can see this issue: log::warn!("failed to parse name section {e:?}"); } } + KnownCustom::BranchHints(reader) if self.tunables.branch_hinting => { + // Compilation relies on the proposal's guarantee that hints are + // in ascending `func_offset` order; we trust it rather than + // re-sort (validating malformed sections is not yet done). Hints + // are advisory, so skip entries that fail to parse. + for func in reader.into_iter().flatten() { + let hints = func + .hints + .into_iter() + .flatten() + .map(|h| BranchHint { + func_offset: h.func_offset, + taken: h.taken, + }) + .collect::>(); + if !hints.is_empty() { + self.result + .branch_hints + .insert(FuncIndex::from_u32(func.func), hints); + } + } + } _ => { let name = section.name().trim_end_matches(".dwo"); if name.starts_with(".debug_") { diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index 113eb9e9abf6..76123c17d1ec 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -188,6 +188,10 @@ define_tunables! { /// Boolean to track whether compiled code retains metadata necessary to /// report extra information on gc heap corruption being detected. pub metadata_for_gc_heap_corruption: bool, + + /// Whether `metadata.code.branch_hint` sections are parsed and used to + /// mark cold blocks during compilation. + pub branch_hinting: bool, } pub struct ConfigTunables { @@ -273,6 +277,7 @@ impl Tunables { gc_heap_may_move: true, metadata_for_internal_asserts: false, metadata_for_gc_heap_corruption: true, + branch_hinting: false, } } diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 3180c64be3d0..e97bc2668fae 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -893,6 +893,20 @@ impl Config { self } + /// Configures whether the WebAssembly [branch-hinting] proposal is enabled. + /// + /// When enabled, the `metadata.code.branch_hint` custom section is parsed + /// and used to lay out cold code paths during compilation. The hints are + /// advisory and never affect execution semantics. + /// + /// This is `false` by default until the proposal has been fuzzed. + /// + /// [branch-hinting]: https://github.com/WebAssembly/branch-hinting + pub fn wasm_branch_hinting(&mut self, enable: bool) -> &mut Self { + self.tunables.branch_hinting = Some(enable); + self + } + /// Configures whether the WebAssembly custom-page-sizes proposal will be /// enabled for compilation or not. /// diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs index b4a216f31f21..24001e15181e 100644 --- a/crates/wasmtime/src/engine/serialization.rs +++ b/crates/wasmtime/src/engine/serialization.rs @@ -342,6 +342,10 @@ impl Metadata<'_> { // way. metadata_for_internal_asserts: _, metadata_for_gc_heap_corruption: _, + + // Only affects cold-block layout; a compiled artifact loads into an + // engine configured either way. + branch_hinting: _, } = self.tunables; Self::check_collector(collector, other.collector)?; diff --git a/docs/stability-wasm-proposals.md b/docs/stability-wasm-proposals.md index fbffed182f04..97db45202903 100644 --- a/docs/stability-wasm-proposals.md +++ b/docs/stability-wasm-proposals.md @@ -74,17 +74,22 @@ The emoji legend is: | Proposal | Phase 4 | Tests | Finished | Fuzzed | API | C API | |-----------------------------|---------|-------|----------|--------|-----|-------| +| [`branch-hinting`] [^12] | ❌ | ❌ | 🚧 | ❌ | ✅ | ✅ | | [`stack-switching`] [^11] | ❌ | 🚧 | 🚧 | ❌ | ❌ | ❌ | [^11]: The stack-switching proposal is a work-in-progress being tracked at [#9465](https://github.com/bytecodealliance/wasmtime/issues/9465). Currently the implementation is only for x86\_64 Linux. +[^12]: Branch hinting parses the `metadata.code.branch_hint` custom section and + uses it to mark cold blocks during Cranelift compilation. It is disabled by + default (`Config::wasm_branch_hinting`) until it has been fuzzed; spec-test + enablement and validation of malformed sections are still pending. Tracked + at [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463). ## Unimplemented proposals | Proposal | Tracking Issue | |-------------------------------|----------------| -| [`branch-hinting`] | [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463) | | [`flexible-vectors`] | [#9464](https://github.com/bytecodealliance/wasmtime/issues/9464) | | [`memory-control`] | [#9467](https://github.com/bytecodealliance/wasmtime/issues/9467) | | [`shared-everything-threads`] | [#9466](https://github.com/bytecodealliance/wasmtime/issues/9466) | diff --git a/tests/all/branch_hinting.rs b/tests/all/branch_hinting.rs new file mode 100644 index 000000000000..56dea4e8775a --- /dev/null +++ b/tests/all/branch_hinting.rs @@ -0,0 +1,71 @@ +//! Tests for the WebAssembly branch-hinting proposal. The spec conformance +//! tests live in this repo at `tests/spec_testsuite/custom/branch_hint.wast`; +//! per that spec hints may only be attached to `br_if` and `if` instructions. +//! Proposal: . + +use wasmtime::*; + +// Both functions return 10 when their condition is true (arg != 0) and 20 +// otherwise. The `(@metadata.code.branch_hint ...)` annotation must immediately +// precede the `if`/`br_if` it applies to. +const MODULE: &str = r#" +(module + (func (export "via_if") (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 10 + else + i32.const 20 + end) + + (func (export "via_br_if") (param i32) (result i32) + (block $b (result i32) + i32.const 10 + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $b + drop + i32.const 20))) +"#; + +fn results(branch_hinting: Option) -> Result> { + let mut config = Config::new(); + if let Some(enable) = branch_hinting { + config.wasm_branch_hinting(enable); + } + let engine = Engine::new(&config)?; + let module = Module::new(&engine, MODULE)?; + let mut store = Store::new(&engine, ()); + let instance = Instance::new(&mut store, &module, &[])?; + let via_if = instance.get_typed_func::(&mut store, "via_if")?; + let via_br_if = instance.get_typed_func::(&mut store, "via_br_if")?; + + let mut out = Vec::new(); + for arg in [0, 1, 7, -3] { + out.push(( + arg, + via_if.call(&mut store, arg)?, + via_br_if.call(&mut store, arg)?, + )); + } + Ok(out) +} + +// A module carrying branch hints must compile and run identically whether the +// proposal is enabled, explicitly disabled, or left at its (disabled) default: +// hints are advisory and never change semantics. +#[test] +#[cfg_attr(miri, ignore)] +fn branch_hints_are_semantically_neutral() -> Result<()> { + let enabled = results(Some(true))?; + assert_eq!(enabled, results(Some(false))?); + assert_eq!(enabled, results(None)?); + + for (arg, via_if, via_br_if) in enabled { + let expected = if arg != 0 { 10 } else { 20 }; + assert_eq!(via_if, expected, "via_if({arg})"); + assert_eq!(via_br_if, expected, "via_br_if({arg})"); + } + Ok(()) +} diff --git a/tests/all/main.rs b/tests/all/main.rs index ec61178b3478..108a522058c9 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -4,6 +4,7 @@ use wasmtime::Result; mod arrays; mod async_functions; +mod branch_hinting; mod call_hook; mod cli_tests; mod compile_time_builtins; diff --git a/tests/disas/branch-hinting-aarch64.wat b/tests/disas/branch-hinting-aarch64.wat new file mode 100644 index 000000000000..2cc2cf5a283e --- /dev/null +++ b/tests/disas/branch-hinting-aarch64.wat @@ -0,0 +1,51 @@ +;;! target = "aarch64" +;;! test = "compile" +;;! flags = "-Wbranch-hinting" + +;; aarch64 honors branch hints too: the cold path is laid out after the hot +;; path in the generated machine code (the branch-hinting optimization lives in +;; target-independent Cranelift code, so it applies on every backend). + +(module + ;; condition likely false: the then-block is cold. + (func $if_unlikely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 10 + else + i32.const 20 + end + ) + + ;; condition likely true: the fallthrough is cold. + (func $br_if_likely (param i32) (result i32) + (block $b (result i32) + i32.const 10 + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $b + drop + i32.const 20 + ) + ) +) +;; wasm[0]::function[0]::if_unlikely: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; cbnz w4, #0x18 +;; c: mov w2, #0x14 +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 18: mov w2, #0xa +;; 1c: b #0x10 +;; +;; wasm[0]::function[1]::br_if_likely: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; mov w2, #0xa +;; cbz w4, #0x38 +;; 30: ldp x29, x30, [sp], #0x10 +;; ret +;; 38: mov w2, #0x14 +;; 3c: b #0x30 diff --git a/tests/disas/branch-hinting-disabled.wat b/tests/disas/branch-hinting-disabled.wat new file mode 100644 index 000000000000..8d569a8ec8e7 --- /dev/null +++ b/tests/disas/branch-hinting-disabled.wat @@ -0,0 +1,73 @@ +;;! target = "x86_64" +;;! test = "optimize" + +;; Same hinted module as branch-hinting.wat, but WITHOUT `-Wbranch-hinting`: +;; with the knob off (the default) the `metadata.code.branch_hint` section is +;; ignored and no block is marked `cold`. This pins the no-regression behavior. + +(module + (func $if_unlikely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 1 + else + i32.const 2 + end + ) + + (func $br_if_likely (param i32) (result i32) + (block $target + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $target + i32.const 1 + return + ) + i32.const 2 + ) +) +;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0043 brif v2, block2, block4 +;; +;; block2: +;; @0045 v5 = iconst.i32 1 +;; @0047 jump block3(v5) ; v5 = 1 +;; +;; block4: +;; @0048 v6 = iconst.i32 2 +;; @004a jump block3(v6) ; v6 = 2 +;; +;; block3(v4: i32): +;; @004b jump block1(v4) +;; +;; block1(v3: i32): +;; @004b return v3 +;; } +;; +;; function u0:1(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0052 brif v2, block2, block3 +;; +;; block3: +;; @0054 v4 = iconst.i32 1 +;; @0056 return v4 ; v4 = 1 +;; +;; block2: +;; @005a jump block1 +;; +;; block1: +;; @0058 v5 = iconst.i32 2 +;; @005a return v5 ; v5 = 2 +;; } diff --git a/tests/disas/branch-hinting.wat b/tests/disas/branch-hinting.wat new file mode 100644 index 000000000000..8f06c33031bd --- /dev/null +++ b/tests/disas/branch-hinting.wat @@ -0,0 +1,190 @@ +;;! target = "x86_64" +;;! test = "optimize" +;;! flags = "-Wbranch-hinting" + +;; Branch hints from the `metadata.code.branch_hint` custom section mark blocks +;; cold during compilation: `\00` = condition likely false, `\01` = likely true. +;; +;; The annotation must immediately precede the `br_if`/`if` keyword: before a +;; folded instruction the encoder attaches the hint to the first nested +;; instruction (the condition) rather than the branch. + +(module + (global $g (mut i32) (i32.const 0)) + + ;; condition likely false: branch target is cold. + (func $br_if_unlikely (param i32) (result i32) + (block $target + local.get 0 + (@metadata.code.branch_hint "\00") + br_if $target + i32.const 1 + return + ) + i32.const 2 + ) + + ;; condition likely true: fallthrough is cold. + (func $br_if_likely (param i32) (result i32) + (block $target + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $target + i32.const 1 + return + ) + i32.const 2 + ) + + ;; condition likely false: then-block is cold. + (func $if_unlikely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 1 + else + i32.const 2 + end + ) + + ;; condition likely true: else-block is cold. + (func $if_likely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\01") + if (result i32) + i32.const 1 + else + i32.const 2 + end + ) + + ;; condition likely true with a lazily allocated else (void if/else): the + ;; else-block is still cold (the hint is deferred to the `else`). + (func $if_void_likely (param i32) + local.get 0 + (@metadata.code.branch_hint "\01") + if + i32.const 1 + global.set $g + else + i32.const 2 + global.set $g + end + ) +) +;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0063 brif v2, block2, block3 +;; +;; block3: +;; @0065 v4 = iconst.i32 1 +;; @0067 return v4 ; v4 = 1 +;; +;; block2 cold: +;; @006b jump block1 +;; +;; block1: +;; @0069 v5 = iconst.i32 2 +;; @006b return v5 ; v5 = 2 +;; } +;; +;; function u0:1(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0072 brif v2, block2, block3 +;; +;; block3 cold: +;; @0074 v4 = iconst.i32 1 +;; @0076 return v4 ; v4 = 1 +;; +;; block2: +;; @007a jump block1 +;; +;; block1: +;; @0078 v5 = iconst.i32 2 +;; @007a return v5 ; v5 = 2 +;; } +;; +;; function u0:2(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @007f brif v2, block2, block4 +;; +;; block2 cold: +;; @0081 v5 = iconst.i32 1 +;; @0083 jump block3(v5) ; v5 = 1 +;; +;; block4: +;; @0084 v6 = iconst.i32 2 +;; @0086 jump block3(v6) ; v6 = 2 +;; +;; block3(v4: i32): +;; @0087 jump block1(v4) +;; +;; block1(v3: i32): +;; @0087 return v3 +;; } +;; +;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @008c brif v2, block2, block4 +;; +;; block2: +;; @008e v5 = iconst.i32 1 +;; @0090 jump block3(v5) ; v5 = 1 +;; +;; block4 cold: +;; @0091 v6 = iconst.i32 2 +;; @0093 jump block3(v6) ; v6 = 2 +;; +;; block3(v4: i32): +;; @0094 jump block1(v4) +;; +;; block1(v3: i32): +;; @0094 return v3 +;; } +;; +;; function u0:4(i64 vmctx, i64, i32) tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+24 +;; gv3 = vmctx +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0099 brif v2, block2, block4 +;; +;; block2: +;; @009b v3 = iconst.i32 1 +;; @009d store notrap aligned table v3, v0+48 ; v3 = 1 +;; @009f jump block3 +;; +;; block4 cold: +;; @00a0 v5 = iconst.i32 2 +;; @00a2 store notrap aligned table v5, v0+48 ; v5 = 2 +;; @00a4 jump block3 +;; +;; block3: +;; @00a5 jump block1 +;; +;; block1: +;; @00a5 return +;; } From d6e78dfa42f34339ec7d84aa905a132c36e33e2e Mon Sep 17 00:00:00 2001 From: FUJI Goro Date: Sat, 23 May 2026 08:46:13 +0900 Subject: [PATCH 2/6] Keep the first of duplicate branch-hint function entries Review feedback on #13459: a malformed `metadata.code.branch_hint` section could list the same function twice, and `HashMap::insert` would silently drop the earlier entry. Use `entry().or_insert` so duplicates are handled deterministically (first occurrence wins). Assisted-by: Claude Code:claude-opus-4-7 --- crates/environ/src/compile/module_environ.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index 784a32a4848c..c541204e1206 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -789,9 +789,14 @@ and for re-adding support for interface types you can see this issue: }) .collect::>(); if !hints.is_empty() { + // A well-formed section lists each function at most once; + // if a malformed one repeats a function, keep the first + // entry deterministically rather than silently + // overwriting it. self.result .branch_hints - .insert(FuncIndex::from_u32(func.func), hints); + .entry(FuncIndex::from_u32(func.func)) + .or_insert(hints); } } } From c4503727646d47b8c9a43ba19a2df32753fb7305 Mon Sep 17 00:00:00 2001 From: FUJI Goro Date: Sun, 24 May 2026 16:27:23 +0900 Subject: [PATCH 3/6] Refine branch hinting: lazy decode and review fixes - Decode the branch-hint section lazily via per-function readers instead of eagerly decoding and heap-allocating every hint - Discard malformed sections explicitly; keep first of duplicate entries - take_branch_hint yields BranchHint; init hints in FuncEnvironment::new - Move the runtime test to misc_testsuite/*.wast; add a malformed-section test - Add a fuzzing knob; tidy docs and comments Assisted-by: Claude Code:claude-opus-4-7 --- crates/cranelift/src/compiler.rs | 28 +++--- crates/cranelift/src/func_environ.rs | 92 +++++++++++-------- .../src/translate/code_translator.rs | 14 +-- crates/environ/src/compile/module_environ.rs | 68 +++++++------- crates/fuzzing/src/generators/config.rs | 7 ++ crates/test-util/src/wasmtime_wast.rs | 3 + crates/test-util/src/wast.rs | 1 + docs/stability-wasm-proposals.md | 9 +- tests/all/branch_hinting.rs | 71 -------------- tests/all/main.rs | 1 - tests/disas/branch-hinting-disabled.wat | 6 +- .../branch-hinting-invalid.wast | 22 +++++ tests/misc_testsuite/branch-hinting.wast | 36 ++++++++ 13 files changed, 182 insertions(+), 176 deletions(-) delete mode 100644 tests/all/branch_hinting.rs create mode 100644 tests/misc_testsuite/branch-hinting-invalid.wast create mode 100644 tests/misc_testsuite/branch-hinting.wast diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 10e7c6ddcd10..18205125c83f 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -262,7 +262,20 @@ impl wasmtime_environ::Compiler for Compiler { context.func.collect_debug_info(); } - let mut func_env = FuncEnvironment::new(self, translation, types, wasm_func_ty, key); + // Branch hints are keyed by function-body-relative offset, so the body's + // module-relative start is needed to convert source locations later. + let FunctionBodyData { validator, body } = input; + let func_body_offset = body.get_binary_reader().original_position(); + + let mut func_env = FuncEnvironment::new( + self, + translation, + types, + wasm_func_ty, + key, + func_index, + func_body_offset, + ); // The `stack_limit` global value below is the implementation of stack // overflow checks in Wasmtime. @@ -318,19 +331,6 @@ impl wasmtime_environ::Compiler for Compiler { func_env.stack_limit_at_function_entry = Some(stack_limit); } } - let FunctionBodyData { validator, body } = input; - - // Branch hints are keyed by function-body-relative offset, so record the - // body's module-relative start to convert source locations later. - func_env.set_branch_hints( - translation - .branch_hints - .get(&func_index) - .map(|hints| &hints[..]) - .unwrap_or(&[]), - body.get_binary_reader().original_position(), - ); - let mut validator = validator.into_validator(mem::take(&mut compiler.cx.validator_allocations)); compiler.cx.func_translator.translate_body( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index ba2a6641fa71..4f9340b5fac4 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -26,10 +26,12 @@ use cranelift_frontend::Variable; use cranelift_frontend::{FuncInstBuilder, FunctionBuilder}; use smallvec::{SmallVec, smallvec}; use std::mem; -use wasmparser::{FuncValidator, Operator, WasmFeatures, WasmModuleResources}; +use wasmparser::{ + BranchHint, FuncValidator, Operator, SectionLimitedIntoIter, WasmFeatures, WasmModuleResources, +}; use wasmtime_core::math::f64_cvt_to_int_bounds; use wasmtime_environ::{ - BranchHint, BuiltinFunctionIndex, ComponentPC, DataIndex, DefinedFuncIndex, ElemIndex, + BuiltinFunctionIndex, ComponentPC, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex, FrameStateSlotBuilder, FrameValType, FuncIndex, FuncKey, GlobalConstValue, GlobalIndex, IndexType, Memory, MemoryIndex, MemoryTunables, Module, ModuleInternedTypeIndex, ModuleTranslation, ModuleTypesBuilder, PtrSize, Table, TableIndex, @@ -233,11 +235,12 @@ pub struct FuncEnvironment<'module_environment> { /// to e.g. record the return-address of a callsite for debuginfo. pub(crate) next_srcloc: ir::SourceLoc, - /// Branch hints for the current function, consumed in program-counter order - /// by `take_branch_hint`. - branch_hints: &'module_environment [BranchHint], - /// Forward cursor into `branch_hints`. - branch_hint_cursor: usize, + /// Lazily-decoded branch hints for the current function, in ascending + /// `func_offset` order (as the proposal requires). `None` once exhausted or + /// when the function carries no hints. + branch_hints: Option>, + /// One-item lookahead into `branch_hints`, consumed by `take_branch_hint`. + peeked_hint: Option, /// Module-relative byte offset of the current function body's start. func_body_offset: usize, } @@ -249,10 +252,19 @@ impl<'module_environment> FuncEnvironment<'module_environment> { types: &'module_environment ModuleTypesBuilder, wasm_func_ty: &'module_environment WasmFuncType, key: FuncKey, + func_index: FuncIndex, + func_body_offset: usize, ) -> Self { let tunables = compiler.tunables(); let builtin_functions = BuiltinFunctions::new(compiler); + // Resolve the lazy branch-hint decoder for this function, if any. + // `func_body_offset` lets `take_branch_hint` convert source locations to + // the function-body-relative offsets the hints use. + let branch_hints = translation + .branch_hints(func_index) + .map(|reader| reader.into_iter()); + // This isn't used during translation, so squash the warning about this // being unused from the compiler. let _ = BuiltinFunctions::raise; @@ -303,46 +315,48 @@ impl<'module_environment> FuncEnvironment<'module_environment> { next_srcloc: ir::SourceLoc::default(), wasm_module_offset: translation.wasm_module_offset, - branch_hints: &[], - branch_hint_cursor: 0, - func_body_offset: 0, + branch_hints, + peeked_hint: None, + func_body_offset, } } - /// Set the branch hints and the module-relative start offset for the - /// function about to be translated. Hints are expected in ascending - /// `func_offset` order (as the proposal requires); `take_branch_hint` - /// simply skips any that are out of order. - pub(crate) fn set_branch_hints( - &mut self, - hints: &'module_environment [BranchHint], - func_body_offset: usize, - ) { - self.branch_hints = hints; - self.branch_hint_cursor = 0; - self.func_body_offset = func_body_offset; - } - /// Consume the branch hint for the instruction at module-relative `offset` - /// (i.e. `builder.srcloc().bits()`), if any; `Some(true)` means likely - /// taken. The cursor only advances, making this O(n) over a function body. - pub(crate) fn take_branch_hint(&mut self, offset: usize) -> Option { - if self.branch_hints.is_empty() { + /// (i.e. `builder.srcloc().bits()`), if any. The lazy decoder only moves + /// forward, making this O(n) over a function body. + pub(crate) fn take_branch_hint(&mut self, offset: usize) -> Option { + // Fast path for the common case of a function with no hints (always so + // when the proposal is disabled): this is called for every `if`/`br_if`. + if self.branch_hints.is_none() { return None; } let rel = u32::try_from(offset.checked_sub(self.func_body_offset)?).ok()?; - // Skip hints that don't line up with this (or a later) branch. - while matches!( - self.branch_hints.get(self.branch_hint_cursor), - Some(h) if h.func_offset < rel, - ) { - self.branch_hint_cursor += 1; + loop { + // Refill the one-item lookahead from the lazy decoder. + if self.peeked_hint.is_none() { + self.peeked_hint = self.next_branch_hint(); + } + let hint = self.peeked_hint?; + if hint.func_offset < rel { + // Hint precedes this branch (or never lined up); drop it. + self.peeked_hint = None; + continue; + } + if hint.func_offset == rel { + self.peeked_hint = None; + return Some(hint); + } + // The next hint is for a later offset; nothing for this branch. + return None; } - let hint = self.branch_hints.get(self.branch_hint_cursor)?; - (hint.func_offset == rel).then(|| { - self.branch_hint_cursor += 1; - hint.taken - }) + } + + /// Decode the next hint for the current function, if any. + fn next_branch_hint(&mut self) -> Option { + // These bytes were already validated when the section was decoded into + // per-function readers, so this re-decode cannot fail; defensively treat + // an unexpected error as the end of the hints. + self.branch_hints.as_mut()?.next()?.ok() } pub(crate) fn pointer_type(&self) -> ir::Type { diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index 850a5fddbc76..8bfced8b5e61 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -335,13 +335,13 @@ pub fn translate_operator( // Mark the unlikely successor cold per the branch hint. A likely // condition makes the else block cold; when it is allocated lazily - // (`NoElse`) defer that to `Operator::Else` via `cold_else`. - let cold_else = match branch_hint { - Some(false) => { + // (`NoElse`) defer that to `Operator::Else` via `else_is_cold`. + let else_is_cold = match branch_hint { + Some(hint) if !hint.taken => { builder.set_cold_block(next_block); false } - Some(true) => match &else_data { + Some(_) => match &else_data { ElseData::WithElse { else_block } => { builder.set_cold_block(*else_block); false @@ -366,7 +366,7 @@ pub fn translate_operator( params.len(), results.len(), *blockty, - cold_else, + else_is_cold, ); } Operator::Else => { @@ -4018,9 +4018,9 @@ fn translate_br_if( let (br_destination, inputs) = translate_br_if_args(relative_depth, env); let next_block = builder.create_block(); - if let Some(taken) = branch_hint { + if let Some(hint) = branch_hint { // Likely taken => the fallthrough is cold, else the branch target is. - builder.set_cold_block(if taken { next_block } else { br_destination }); + builder.set_cold_block(if hint.taken { next_block } else { br_destination }); } canonicalise_brif(builder, val, br_destination, inputs, next_block, &[]); diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index c541204e1206..edc9827d773a 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -123,21 +123,19 @@ pub struct ModuleTranslation<'data> { /// validation process. types: Option, - /// Branch hints parsed from the `metadata.code.branch_hint` custom section, + /// Per-function readers into the `metadata.code.branch_hint` custom section, /// keyed by module-level function index. Only populated when - /// [`Tunables::branch_hinting`] is enabled. - pub branch_hints: HashMap>, + /// [`Tunables::branch_hinting`] is enabled. The hints are decoded lazily + /// during compilation, so this holds the section's per-function sub-readers + /// rather than fully-decoded hints; access them via + /// [`ModuleTranslation::branch_hints`]. + branch_hints: HashMap>, } -/// A single branch hint from the `metadata.code.branch_hint` custom section +/// Lazy decoder over the branch hints attached to a single function in the +/// `metadata.code.branch_hint` custom section /// ([branch-hinting proposal](https://github.com/WebAssembly/branch-hinting)). -#[derive(Debug, Copy, Clone)] -pub struct BranchHint { - /// Byte offset of the hinted `br_if`/`if` from the start of the function body. - pub func_offset: u32, - /// Whether the branch's condition is hinted to be true. - pub taken: bool, -} +pub type BranchHintReader<'a> = wasmparser::SectionLimited<'a, wasmparser::BranchHint>; impl<'data> ModuleTranslation<'data> { /// Create a new translation for the module with the given index. @@ -164,6 +162,13 @@ impl<'data> ModuleTranslation<'data> { } } + /// Returns a lazy decoder over the branch hints for `func`, if the + /// `metadata.code.branch_hint` section attached any. Hints are decoded on + /// demand during compilation rather than eagerly during parsing. + pub fn branch_hints(&self, func: FuncIndex) -> Option> { + self.branch_hints.get(&func).cloned() + } + /// Returns a reference to the type information of the current module. pub fn get_types(&self) -> &Types { self.types @@ -774,30 +779,23 @@ and for re-adding support for interface types you can see this issue: } } KnownCustom::BranchHints(reader) if self.tunables.branch_hinting => { - // Compilation relies on the proposal's guarantee that hints are - // in ascending `func_offset` order; we trust it rather than - // re-sort (validating malformed sections is not yet done). Hints - // are advisory, so skip entries that fail to parse. - for func in reader.into_iter().flatten() { - let hints = func - .hints - .into_iter() - .flatten() - .map(|h| BranchHint { - func_offset: h.func_offset, - taken: h.taken, - }) - .collect::>(); - if !hints.is_empty() { - // A well-formed section lists each function at most once; - // if a malformed one repeats a function, keep the first - // entry deterministically rather than silently - // overwriting it. - self.result - .branch_hints - .entry(FuncIndex::from_u32(func.func)) - .or_insert(hints); - } + // Branch hints are advisory and this section is never validated; + // it is decoded lazily during compilation, so record only the + // per-function sub-readers here. Discard the whole section if any + // entry is malformed rather than applying it partially. + let mut hints = HashMap::new(); + let result: wasmparser::Result<()> = reader.into_iter().try_for_each(|func| { + let func = func?; + // A well-formed section lists each function at most once; keep + // the first entry deterministically if it repeats. + hints + .entry(FuncIndex::from_u32(func.func)) + .or_insert(func.hints); + Ok(()) + }); + match result { + Ok(()) => self.result.branch_hints = hints, + Err(e) => log::warn!("failed to parse branch-hint section {e:?}"), } } _ => { diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index ce43cffa3bef..e61fac35b9cb 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -138,6 +138,7 @@ impl Config { tail_call, extended_const, wide_arithmetic, + branch_hinting, component_model_async, component_model_more_async_builtins, component_model_async_stackful, @@ -177,6 +178,7 @@ impl Config { component_model_fixed_length_lists.unwrap_or(false); self.module_config.component_model_implements = component_model_implements.unwrap_or(false); self.module_config.stack_switching = stack_switching.unwrap_or(false); + self.wasmtime.branch_hinting = branch_hinting.unwrap_or(false); // Enable/disable proposals that wasm-smith has knobs for which will be // read when creating `wasmtime::Config`. @@ -346,6 +348,7 @@ impl Config { cfg.wasm.shared_everything_threads = Some(self.module_config.config.shared_everything_threads_enabled); cfg.wasm.wide_arithmetic = Some(self.module_config.config.wide_arithmetic_enabled); + cfg.wasm.branch_hinting = Some(self.wasmtime.branch_hinting); cfg.wasm.exceptions = Some(self.module_config.config.exceptions_enabled); cfg.wasm.stack_switching = Some(self.module_config.stack_switching); cfg.wasm.shared_memory = Some(self.module_config.shared_memory); @@ -608,6 +611,10 @@ pub struct WasmtimeConfig { table_lazy_init: bool, metadata_for_internal_asserts: bool, metadata_for_gc_heap_corruption: bool, + /// Whether the branch-hinting proposal is enabled. wasm-smith does not emit + /// `metadata.code.branch_hint` sections, so for generated modules this only + /// toggles the (otherwise no-op) parsing path. + branch_hinting: bool, /// Configuration for whether wasm is invoked in an async fashion and how /// it's cooperatively time-sliced. diff --git a/crates/test-util/src/wasmtime_wast.rs b/crates/test-util/src/wasmtime_wast.rs index cf7a3bca6f9c..aa0bc5aa77e7 100644 --- a/crates/test-util/src/wasmtime_wast.rs +++ b/crates/test-util/src/wasmtime_wast.rs @@ -39,6 +39,7 @@ pub fn apply_test_config(config: &mut Config, test_config: &wast::TestConfig) { tail_call, extended_const, wide_arithmetic, + branch_hinting, component_model_async, component_model_more_async_builtins, component_model_async_stackful, @@ -72,6 +73,7 @@ pub fn apply_test_config(config: &mut Config, test_config: &wast::TestConfig) { let tail_call = tail_call.unwrap_or(false); let extended_const = extended_const.unwrap_or(false); let wide_arithmetic = wide_arithmetic.unwrap_or(false); + let branch_hinting = branch_hinting.unwrap_or(false); let component_model_async = component_model_async.unwrap_or(false); let component_model_more_async_builtins = component_model_more_async_builtins.unwrap_or(false); let component_model_async_stackful = component_model_async_stackful.unwrap_or(false); @@ -115,6 +117,7 @@ pub fn apply_test_config(config: &mut Config, test_config: &wast::TestConfig) { .wasm_custom_page_sizes(custom_page_sizes) .wasm_extended_const(extended_const) .wasm_wide_arithmetic(wide_arithmetic) + .wasm_branch_hinting(branch_hinting) .wasm_component_model_async(component_model_async) .wasm_component_model_more_async_builtins(component_model_more_async_builtins) .wasm_component_model_async_stackful(component_model_async_stackful) diff --git a/crates/test-util/src/wast.rs b/crates/test-util/src/wast.rs index cc394ee8cf5c..ef71c79ee43e 100644 --- a/crates/test-util/src/wast.rs +++ b/crates/test-util/src/wast.rs @@ -273,6 +273,7 @@ macro_rules! foreach_config_option { tail_call extended_const wide_arithmetic + branch_hinting hogs_memory nan_canonicalization component_model_async diff --git a/docs/stability-wasm-proposals.md b/docs/stability-wasm-proposals.md index 97db45202903..4ce9539c260e 100644 --- a/docs/stability-wasm-proposals.md +++ b/docs/stability-wasm-proposals.md @@ -74,17 +74,14 @@ The emoji legend is: | Proposal | Phase 4 | Tests | Finished | Fuzzed | API | C API | |-----------------------------|---------|-------|----------|--------|-----|-------| -| [`branch-hinting`] [^12] | ❌ | ❌ | 🚧 | ❌ | ✅ | ✅ | +| [`branch-hinting`] [^12] | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | | [`stack-switching`] [^11] | ❌ | 🚧 | 🚧 | ❌ | ❌ | ❌ | [^11]: The stack-switching proposal is a work-in-progress being tracked at [#9465](https://github.com/bytecodealliance/wasmtime/issues/9465). Currently the implementation is only for x86\_64 Linux. -[^12]: Branch hinting parses the `metadata.code.branch_hint` custom section and - uses it to mark cold blocks during Cranelift compilation. It is disabled by - default (`Config::wasm_branch_hinting`) until it has been fuzzed; spec-test - enablement and validation of malformed sections are still pending. Tracked - at [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463). +[^12]: Disabled by default (`Config::wasm_branch_hinting`) pending fuzzing; + tracked at [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463). ## Unimplemented proposals diff --git a/tests/all/branch_hinting.rs b/tests/all/branch_hinting.rs deleted file mode 100644 index 56dea4e8775a..000000000000 --- a/tests/all/branch_hinting.rs +++ /dev/null @@ -1,71 +0,0 @@ -//! Tests for the WebAssembly branch-hinting proposal. The spec conformance -//! tests live in this repo at `tests/spec_testsuite/custom/branch_hint.wast`; -//! per that spec hints may only be attached to `br_if` and `if` instructions. -//! Proposal: . - -use wasmtime::*; - -// Both functions return 10 when their condition is true (arg != 0) and 20 -// otherwise. The `(@metadata.code.branch_hint ...)` annotation must immediately -// precede the `if`/`br_if` it applies to. -const MODULE: &str = r#" -(module - (func (export "via_if") (param i32) (result i32) - local.get 0 - (@metadata.code.branch_hint "\00") - if (result i32) - i32.const 10 - else - i32.const 20 - end) - - (func (export "via_br_if") (param i32) (result i32) - (block $b (result i32) - i32.const 10 - local.get 0 - (@metadata.code.branch_hint "\01") - br_if $b - drop - i32.const 20))) -"#; - -fn results(branch_hinting: Option) -> Result> { - let mut config = Config::new(); - if let Some(enable) = branch_hinting { - config.wasm_branch_hinting(enable); - } - let engine = Engine::new(&config)?; - let module = Module::new(&engine, MODULE)?; - let mut store = Store::new(&engine, ()); - let instance = Instance::new(&mut store, &module, &[])?; - let via_if = instance.get_typed_func::(&mut store, "via_if")?; - let via_br_if = instance.get_typed_func::(&mut store, "via_br_if")?; - - let mut out = Vec::new(); - for arg in [0, 1, 7, -3] { - out.push(( - arg, - via_if.call(&mut store, arg)?, - via_br_if.call(&mut store, arg)?, - )); - } - Ok(out) -} - -// A module carrying branch hints must compile and run identically whether the -// proposal is enabled, explicitly disabled, or left at its (disabled) default: -// hints are advisory and never change semantics. -#[test] -#[cfg_attr(miri, ignore)] -fn branch_hints_are_semantically_neutral() -> Result<()> { - let enabled = results(Some(true))?; - assert_eq!(enabled, results(Some(false))?); - assert_eq!(enabled, results(None)?); - - for (arg, via_if, via_br_if) in enabled { - let expected = if arg != 0 { 10 } else { 20 }; - assert_eq!(via_if, expected, "via_if({arg})"); - assert_eq!(via_br_if, expected, "via_br_if({arg})"); - } - Ok(()) -} diff --git a/tests/all/main.rs b/tests/all/main.rs index 108a522058c9..ec61178b3478 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -4,7 +4,6 @@ use wasmtime::Result; mod arrays; mod async_functions; -mod branch_hinting; mod call_hook; mod cli_tests; mod compile_time_builtins; diff --git a/tests/disas/branch-hinting-disabled.wat b/tests/disas/branch-hinting-disabled.wat index 8d569a8ec8e7..07d6a1b78f1f 100644 --- a/tests/disas/branch-hinting-disabled.wat +++ b/tests/disas/branch-hinting-disabled.wat @@ -1,9 +1,9 @@ ;;! target = "x86_64" ;;! test = "optimize" +;;! flags = "-Wbranch-hinting=n" -;; Same hinted module as branch-hinting.wat, but WITHOUT `-Wbranch-hinting`: -;; with the knob off (the default) the `metadata.code.branch_hint` section is -;; ignored and no block is marked `cold`. This pins the no-regression behavior. +;; With branch hinting disabled, codegen ignores the `metadata.code.branch_hint` +;; custom section entirely: no block is marked `cold`. (module (func $if_unlikely (param i32) (result i32) diff --git a/tests/misc_testsuite/branch-hinting-invalid.wast b/tests/misc_testsuite/branch-hinting-invalid.wast new file mode 100644 index 000000000000..d8a0a9ef95a7 --- /dev/null +++ b/tests/misc_testsuite/branch-hinting-invalid.wast @@ -0,0 +1,22 @@ +;;! branch_hinting = true + +;; A malformed `metadata.code.branch_hint` section must be ignored, not rejected: +;; the section is advisory and unvalidated, so a decode error discards it and +;; compilation still succeeds. Here the hint's reserved length byte is `\02` +;; instead of the required `\01`, so decoding the section fails. +;; +;; Section bytes: \01 = one function; \00 = func 0; \01 = one hint; +;; \00 = func_offset 0; \02 = (invalid) hint length byte. + +(module + (@custom "metadata.code.branch_hint" (after code) "\01\00\01\00\02") + (func (export "f") (param i32) (result i32) + local.get 0 + if (result i32) + i32.const 10 + else + i32.const 20 + end)) + +(assert_return (invoke "f" (i32.const 0)) (i32.const 20)) +(assert_return (invoke "f" (i32.const 1)) (i32.const 10)) diff --git a/tests/misc_testsuite/branch-hinting.wast b/tests/misc_testsuite/branch-hinting.wast new file mode 100644 index 000000000000..054d1eba9077 --- /dev/null +++ b/tests/misc_testsuite/branch-hinting.wast @@ -0,0 +1,36 @@ +;;! branch_hinting = true + +;; Branch hints are advisory: with the proposal enabled a hinted module must +;; still produce the same results it would without hints. Both functions return +;; 10 when their argument is nonzero and 20 otherwise. The +;; `(@metadata.code.branch_hint ...)` annotation must immediately precede the +;; `if`/`br_if` it applies to. + +(module + (func (export "via_if") (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 10 + else + i32.const 20 + end) + + (func (export "via_br_if") (param i32) (result i32) + (block $b (result i32) + i32.const 10 + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $b + drop + i32.const 20))) + +(assert_return (invoke "via_if" (i32.const 0)) (i32.const 20)) +(assert_return (invoke "via_if" (i32.const 1)) (i32.const 10)) +(assert_return (invoke "via_if" (i32.const 7)) (i32.const 10)) +(assert_return (invoke "via_if" (i32.const -3)) (i32.const 10)) + +(assert_return (invoke "via_br_if" (i32.const 0)) (i32.const 20)) +(assert_return (invoke "via_br_if" (i32.const 1)) (i32.const 10)) +(assert_return (invoke "via_br_if" (i32.const 7)) (i32.const 10)) +(assert_return (invoke "via_br_if" (i32.const -3)) (i32.const 10)) From 866d4a5a7a10659b30c0aec6a88b56373d5f6b0c Mon Sep 17 00:00:00 2001 From: FUJI Goro Date: Thu, 28 May 2026 15:18:47 +0900 Subject: [PATCH 4/6] Drive branch-hint lookahead with a Peekable iterator Replaces the hand-rolled one-item lookahead (a `peeked_hint` field plus a `next_branch_hint` helper) with `Iterator::peekable`, so the cursor's peek/advance is expressed directly by the standard library. Assisted-by: Claude Code:claude-opus-4-7 --- crates/cranelift/src/func_environ.rs | 39 +++++++------------ .../src/translate/code_translator.rs | 6 ++- 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 4f9340b5fac4..e5d2d47758c3 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -25,6 +25,7 @@ use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap}; use cranelift_frontend::Variable; use cranelift_frontend::{FuncInstBuilder, FunctionBuilder}; use smallvec::{SmallVec, smallvec}; +use std::iter::Peekable; use std::mem; use wasmparser::{ BranchHint, FuncValidator, Operator, SectionLimitedIntoIter, WasmFeatures, WasmModuleResources, @@ -236,11 +237,9 @@ pub struct FuncEnvironment<'module_environment> { pub(crate) next_srcloc: ir::SourceLoc, /// Lazily-decoded branch hints for the current function, in ascending - /// `func_offset` order (as the proposal requires). `None` once exhausted or - /// when the function carries no hints. - branch_hints: Option>, - /// One-item lookahead into `branch_hints`, consumed by `take_branch_hint`. - peeked_hint: Option, + /// `func_offset` order (as the proposal requires). `None` when the function + /// carries no hints. + branch_hints: Option>>, /// Module-relative byte offset of the current function body's start. func_body_offset: usize, } @@ -263,7 +262,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { // the function-body-relative offsets the hints use. let branch_hints = translation .branch_hints(func_index) - .map(|reader| reader.into_iter()); + .map(|reader| reader.into_iter().peekable()); // This isn't used during translation, so squash the warning about this // being unused from the compiler. @@ -316,7 +315,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { wasm_module_offset: translation.wasm_module_offset, branch_hints, - peeked_hint: None, func_body_offset, } } @@ -325,25 +323,22 @@ impl<'module_environment> FuncEnvironment<'module_environment> { /// (i.e. `builder.srcloc().bits()`), if any. The lazy decoder only moves /// forward, making this O(n) over a function body. pub(crate) fn take_branch_hint(&mut self, offset: usize) -> Option { - // Fast path for the common case of a function with no hints (always so + // `as_mut()?` is the fast path for a function with no hints (always so // when the proposal is disabled): this is called for every `if`/`br_if`. - if self.branch_hints.is_none() { - return None; - } + let hints = self.branch_hints.as_mut()?; let rel = u32::try_from(offset.checked_sub(self.func_body_offset)?).ok()?; loop { - // Refill the one-item lookahead from the lazy decoder. - if self.peeked_hint.is_none() { - self.peeked_hint = self.next_branch_hint(); - } - let hint = self.peeked_hint?; + // The hint bytes were already validated when the section was decoded + // into per-function readers, so a decode error here is unexpected; + // defensively treat it (like exhaustion) as the end of the hints. + let hint = *hints.peek()?.as_ref().ok()?; if hint.func_offset < rel { // Hint precedes this branch (or never lined up); drop it. - self.peeked_hint = None; + hints.next(); continue; } if hint.func_offset == rel { - self.peeked_hint = None; + hints.next(); return Some(hint); } // The next hint is for a later offset; nothing for this branch. @@ -351,14 +346,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { } } - /// Decode the next hint for the current function, if any. - fn next_branch_hint(&mut self) -> Option { - // These bytes were already validated when the section was decoded into - // per-function readers, so this re-decode cannot fail; defensively treat - // an unexpected error as the end of the hints. - self.branch_hints.as_mut()?.next()?.ok() - } - pub(crate) fn pointer_type(&self) -> ir::Type { self.isa.pointer_type() } diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index 8bfced8b5e61..c6a0e414d68e 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -4020,7 +4020,11 @@ fn translate_br_if( if let Some(hint) = branch_hint { // Likely taken => the fallthrough is cold, else the branch target is. - builder.set_cold_block(if hint.taken { next_block } else { br_destination }); + builder.set_cold_block(if hint.taken { + next_block + } else { + br_destination + }); } canonicalise_brif(builder, val, br_destination, inputs, next_block, &[]); From 7c5b1f56691dd5d6dd1c53627f96897e5188bcf9 Mon Sep 17 00:00:00 2001 From: FUJI Goro Date: Thu, 28 May 2026 16:02:03 +0900 Subject: [PATCH 5/6] Cover branch-hint cold-block layout on the Pulley backend Pins the Pulley bytecode emitted for hinted `if`/`br_if`, demonstrating that the target-independent cold-block layout reaches the interpreter backend just as it does the native ones. Assisted-by: Claude Code:claude-opus-4-7 --- tests/disas/branch-hinting-pulley.wat | 49 +++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/disas/branch-hinting-pulley.wat diff --git a/tests/disas/branch-hinting-pulley.wat b/tests/disas/branch-hinting-pulley.wat new file mode 100644 index 000000000000..99e71da730b7 --- /dev/null +++ b/tests/disas/branch-hinting-pulley.wat @@ -0,0 +1,49 @@ +;;! target = "pulley64" +;;! test = "compile" +;;! flags = "-Wbranch-hinting" + +;; Pulley honors branch hints too: the cold path is laid out after the hot path +;; in the generated bytecode (the branch-hinting optimization lives in +;; target-independent Cranelift code, so it applies on every backend). + +(module + ;; condition likely false: the then-block is cold. + (func $if_unlikely (param i32) (result i32) + local.get 0 + (@metadata.code.branch_hint "\00") + if (result i32) + i32.const 10 + else + i32.const 20 + end + ) + + ;; condition likely true: the fallthrough is cold. + (func $br_if_likely (param i32) (result i32) + (block $b (result i32) + i32.const 10 + local.get 0 + (@metadata.code.branch_hint "\01") + br_if $b + drop + i32.const 20 + ) + ) +) +;; wasm[0]::function[0]::if_unlikely: +;; push_frame +;; br_if32 x2, 0xb // target = 0xc +;; 7: xconst8 x0, 20 +;; pop_frame +;; ret +;; c: xconst8 x0, 10 +;; f: jump -0x5 // target = 0xa +;; +;; wasm[0]::function[1]::br_if_likely: +;; push_frame +;; xconst8 x0, 10 +;; br_if_not32 x2, 0x8 // target = 0x20 +;; 1e: pop_frame +;; ret +;; 20: xconst8 x0, 20 +;; 23: jump -0x5 // target = 0x1e From dc3581dca516f78e89b9a6e91f877c995e35ce62 Mon Sep 17 00:00:00 2001 From: FUJI Goro Date: Thu, 28 May 2026 22:03:24 +0900 Subject: [PATCH 6/6] Tighten branch-hint comments to the non-obvious Drops restated doc prose (the per-function reader is described once, on the `BranchHintReader` alias) and trims the running narration in the hint cursor down to the parts a reader cannot infer from the code. Assisted-by: Claude Code:claude-opus-4-7 --- crates/cranelift/src/func_environ.rs | 16 ++++++---------- crates/environ/src/compile/module_environ.rs | 13 ++++--------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 9efdd7532725..50a7452e9fb5 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -268,10 +268,8 @@ impl<'module_environment> FuncEnvironment<'module_environment> { let tunables = compiler.tunables(); let builtin_functions = BuiltinFunctions::new(compiler); - // Resolve the lazy branch-hint decoder for this function, if any. - // `func_body_offset` lets `take_branch_hint` convert source locations to - // the function-body-relative offsets the hints use. Synthesized functions - // without a wasm body (e.g. module startup) pass `None` and get no hints. + // Synthesized functions without a wasm body (e.g. module startup) pass + // `None`, yielding no hints. let branch_hints = func_index .and_then(|func_index| translation.branch_hints(func_index)) .map(|reader| reader.into_iter().peekable()); @@ -338,17 +336,15 @@ impl<'module_environment> FuncEnvironment<'module_environment> { /// (i.e. `builder.srcloc().bits()`), if any. The lazy decoder only moves /// forward, making this O(n) over a function body. pub(crate) fn take_branch_hint(&mut self, offset: usize) -> Option { - // `as_mut()?` is the fast path for a function with no hints (always so - // when the proposal is disabled): this is called for every `if`/`br_if`. + // Fast path: no hints (always so when the proposal is off), and this + // runs for every `if`/`br_if`. let hints = self.branch_hints.as_mut()?; let rel = u32::try_from(offset.checked_sub(self.func_body_offset)?).ok()?; loop { - // The hint bytes were already validated when the section was decoded - // into per-function readers, so a decode error here is unexpected; - // defensively treat it (like exhaustion) as the end of the hints. + // Hint bytes were validated when the section was decoded, so an error + // here is unexpected; treat it like exhaustion (end of hints). let hint = *hints.peek()?.as_ref().ok()?; if hint.func_offset < rel { - // Hint precedes this branch (or never lined up); drop it. hints.next(); continue; } diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index aa736cc51971..542181e55fd6 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -116,12 +116,9 @@ pub struct ModuleTranslation<'data> { /// validation process. types: Option, - /// Per-function readers into the `metadata.code.branch_hint` custom section, - /// keyed by module-level function index. Only populated when - /// [`Tunables::branch_hinting`] is enabled. The hints are decoded lazily - /// during compilation, so this holds the section's per-function sub-readers - /// rather than fully-decoded hints; access them via - /// [`ModuleTranslation::branch_hints`]. + /// Per-function [`BranchHintReader`]s from the `metadata.code.branch_hint` + /// section, keyed by function index. Populated only when + /// [`Tunables::branch_hinting`] is enabled. branch_hints: HashMap>, /// The WebAssembly `start` function, if defined. @@ -214,9 +211,7 @@ impl<'data> ModuleTranslation<'data> { } } - /// Returns a lazy decoder over the branch hints for `func`, if the - /// `metadata.code.branch_hint` section attached any. Hints are decoded on - /// demand during compilation rather than eagerly during parsing. + /// Returns the [`BranchHintReader`] for `func`, if the section attached any. pub fn branch_hints(&self, func: FuncIndex) -> Option> { self.branch_hints.get(&func).cloned() }