From ae177242232960d6398e8c8925514cfab8862c66 Mon Sep 17 00:00:00 2001 From: Sephyi Date: Sun, 19 Apr 2026 18:58:43 +0200 Subject: [PATCH] test(fuzz): add signature extraction fuzz targets for non-Rust languages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the signature-extraction fuzz coverage from Rust-only to all ten supported grammars (Rust, TypeScript, JavaScript, Python, Go, Java, C, C++, Ruby, C#). A new unified `fuzz_signature_multilang` target dispatches on `data[0] % 10` to pick a language, then feeds the remaining bytes to the matching `extract__signature` helper. This mirrors the byte-dispatch pattern already used by `fuzz_classify_span` and keeps boilerplate minimal. To expose the dispatcher, `lib.rs` grows one public wrapper per language, each delegating to a small private `extract_signature_for_ language(source, language)` helper that centralises the `Parser::new() -> set_language -> parse -> root.child(0) -> AnalyzerService::extract_signature` pipeline. Each wrapper is gated by its language feature so the crate still builds with arbitrary subsets of `lang-*` features. `fuzz/Cargo.toml` now pulls `commitbee` with `default-features = false` plus every `lang-*` feature, and registers the new `fuzz_signature_multilang` binary. Turning off default features also drops the keyring transitive dependency from the fuzz build, which is pure build-time savings for a workload that never touches secure storage. Verified via `cargo check --manifest-path fuzz/Cargo.toml` plus the standard `cargo fmt --check`, `cargo clippy --all-targets --all-features -- -D warnings`, and `cargo test --all-targets`. The fuzzer itself does not need to run to completion — the guarantee is "never panic on any input," and `cargo-fuzz` will exercise that as part of the normal fuzzing workflow. Closes audit entry D-047 from #3. --- fuzz/Cargo.lock | 43 +---------- fuzz/Cargo.toml | 18 ++++- fuzz/fuzz_targets/fuzz_signature_multilang.rs | 65 ++++++++++++++++ src/lib.rs | 74 ++++++++++++++++--- 4 files changed, 149 insertions(+), 51 deletions(-) create mode 100644 fuzz/fuzz_targets/fuzz_signature_multilang.rs diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index a744e56..97f13ad 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -388,7 +388,6 @@ dependencies = [ "gix", "globset", "indicatif", - "keyring", "miette", "rayon", "regex", @@ -1944,21 +1943,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "keyring" -version = "3.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eebcc3aff044e5944a8fbaf69eb277d11986064cba30c468730e8b9909fb551c" -dependencies = [ - "byteorder", - "linux-keyutils", - "log", - "security-framework 2.11.1", - "security-framework 3.7.0", - "windows-sys 0.60.2", - "zeroize", -] - [[package]] name = "lazy_static" version = "1.5.0" @@ -1999,16 +1983,6 @@ dependencies = [ "redox_syscall 0.7.3", ] -[[package]] -name = "linux-keyutils" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83270a18e9f90d0707c41e9f35efada77b64c0e6f3f1810e71c8368a864d5590" -dependencies = [ - "bitflags", - "libc", -] - [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -2732,7 +2706,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.7.0", + "security-framework", ] [[package]] @@ -2760,7 +2734,7 @@ dependencies = [ "rustls-native-certs", "rustls-platform-verifier-android", "rustls-webpki", - "security-framework 3.7.0", + "security-framework", "security-framework-sys", "webpki-root-certs", "windows-sys 0.61.2", @@ -2824,19 +2798,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "security-framework" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" -dependencies = [ - "bitflags", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - [[package]] name = "security-framework" version = "3.7.0" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 84a3325..f882198 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -13,7 +13,18 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4" -commitbee = { path = "..", features = ["lang-rust"] } +commitbee = { path = "..", default-features = false, features = [ + "lang-rust", + "lang-typescript", + "lang-javascript", + "lang-python", + "lang-go", + "lang-java", + "lang-c", + "lang-cpp", + "lang-ruby", + "lang-csharp", +] } [[bin]] name = "fuzz_sanitizer" @@ -35,6 +46,11 @@ name = "fuzz_signature" path = "fuzz_targets/fuzz_signature.rs" doc = false +[[bin]] +name = "fuzz_signature_multilang" +path = "fuzz_targets/fuzz_signature_multilang.rs" +doc = false + [[bin]] name = "fuzz_classify_span" path = "fuzz_targets/fuzz_classify_span.rs" diff --git a/fuzz/fuzz_targets/fuzz_signature_multilang.rs b/fuzz/fuzz_targets/fuzz_signature_multilang.rs new file mode 100644 index 0000000..954c5a5 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_signature_multilang.rs @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: 2026 Sephyi +// +// SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Commercial + +#![no_main] + +use libfuzzer_sys::fuzz_target; + +// Dispatches the remaining input to a language-specific signature extractor +// based on the first byte (`data[0] % 10`). Each `extract_*_signature` +// function must never panic on any input — this fuzzer only asserts that. +// +// Language map (matches CommitBee's supported grammars): +// 0 -> Rust +// 1 -> TypeScript +// 2 -> JavaScript +// 3 -> Python +// 4 -> Go +// 5 -> Java +// 6 -> C +// 7 -> C++ +// 8 -> Ruby +// 9 -> C# +fuzz_target!(|data: &[u8]| { + if data.is_empty() { + return; + } + let selector = data[0] % 10; + let Ok(source) = std::str::from_utf8(&data[1..]) else { + return; + }; + match selector { + 0 => { + let _ = commitbee::extract_rust_signature(source); + } + 1 => { + let _ = commitbee::extract_typescript_signature(source); + } + 2 => { + let _ = commitbee::extract_javascript_signature(source); + } + 3 => { + let _ = commitbee::extract_python_signature(source); + } + 4 => { + let _ = commitbee::extract_go_signature(source); + } + 5 => { + let _ = commitbee::extract_java_signature(source); + } + 6 => { + let _ = commitbee::extract_c_signature(source); + } + 7 => { + let _ = commitbee::extract_cpp_signature(source); + } + 8 => { + let _ = commitbee::extract_ruby_signature(source); + } + 9 => { + let _ = commitbee::extract_csharp_signature(source); + } + _ => unreachable!("selector is `% 10`"), + } +}); diff --git a/src/lib.rs b/src/lib.rs index 8720083..cc984ac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,18 +56,14 @@ pub fn parse_diff_hunks(diff: &str) -> Vec { services::analyzer::DiffHunk::parse_from_diff(diff) } -/// Extract signature from Rust source code for fuzz target access. +/// Extract signature from source code using the given tree-sitter language. /// -/// Parses the source with tree-sitter Rust, finds the first top-level definition, -/// and extracts its signature. Must never panic on any input. -#[cfg(feature = "lang-rust")] -pub fn extract_rust_signature(source: &str) -> Option { +/// Parses the source, finds the first top-level definition, and extracts its +/// signature. Must never panic on any input. +fn extract_signature_for_language(source: &str, language: tree_sitter::Language) -> Option { use tree_sitter::Parser; let mut parser = Parser::new(); - if parser - .set_language(&tree_sitter_rust::LANGUAGE.into()) - .is_err() - { + if parser.set_language(&language).is_err() { return None; } let tree = parser.parse(source, None)?; @@ -76,6 +72,66 @@ pub fn extract_rust_signature(source: &str) -> Option { services::analyzer::AnalyzerService::extract_signature(first_child, source) } +/// Extract signature from Rust source code for fuzz target access. +#[cfg(feature = "lang-rust")] +pub fn extract_rust_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_rust::LANGUAGE.into()) +} + +/// Extract signature from TypeScript source code for fuzz target access. +#[cfg(feature = "lang-typescript")] +pub fn extract_typescript_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()) +} + +/// Extract signature from JavaScript source code for fuzz target access. +#[cfg(feature = "lang-javascript")] +pub fn extract_javascript_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_javascript::LANGUAGE.into()) +} + +/// Extract signature from Python source code for fuzz target access. +#[cfg(feature = "lang-python")] +pub fn extract_python_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_python::LANGUAGE.into()) +} + +/// Extract signature from Go source code for fuzz target access. +#[cfg(feature = "lang-go")] +pub fn extract_go_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_go::LANGUAGE.into()) +} + +/// Extract signature from Java source code for fuzz target access. +#[cfg(feature = "lang-java")] +pub fn extract_java_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_java::LANGUAGE.into()) +} + +/// Extract signature from C source code for fuzz target access. +#[cfg(feature = "lang-c")] +pub fn extract_c_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_c::LANGUAGE.into()) +} + +/// Extract signature from C++ source code for fuzz target access. +#[cfg(feature = "lang-cpp")] +pub fn extract_cpp_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_cpp::LANGUAGE.into()) +} + +/// Extract signature from Ruby source code for fuzz target access. +#[cfg(feature = "lang-ruby")] +pub fn extract_ruby_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_ruby::LANGUAGE.into()) +} + +/// Extract signature from C# source code for fuzz target access. +#[cfg(feature = "lang-csharp")] +pub fn extract_csharp_signature(source: &str) -> Option { + extract_signature_for_language(source, tree_sitter_c_sharp::LANGUAGE.into()) +} + /// Classify whether a diff span contains whitespace-only changes for fuzz target access. /// /// Wrapper around `ContextBuilder::classify_span_change`. Must never panic on any input.