Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 2 additions & 41 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 17 additions & 1 deletion fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,18 @@ cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.4"
commitbee = { path = "..", features = ["lang-rust"] }
commitbee = { path = "..", default-features = false, features = [
"lang-rust",
"lang-typescript",
"lang-javascript",
"lang-python",
"lang-go",
"lang-java",
"lang-c",
"lang-cpp",
"lang-ruby",
"lang-csharp",
] }

[[bin]]
name = "fuzz_sanitizer"
Expand All @@ -35,6 +46,11 @@ name = "fuzz_signature"
path = "fuzz_targets/fuzz_signature.rs"
doc = false

[[bin]]
name = "fuzz_signature_multilang"
path = "fuzz_targets/fuzz_signature_multilang.rs"
doc = false

[[bin]]
name = "fuzz_classify_span"
path = "fuzz_targets/fuzz_classify_span.rs"
Expand Down
65 changes: 65 additions & 0 deletions fuzz/fuzz_targets/fuzz_signature_multilang.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// SPDX-FileCopyrightText: 2026 Sephyi <me@sephy.io>
//
// SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Commercial

#![no_main]

use libfuzzer_sys::fuzz_target;

// Dispatches the remaining input to a language-specific signature extractor
// based on the first byte (`data[0] % 10`). Each `extract_*_signature`
// function must never panic on any input — this fuzzer only asserts that.
//
// Language map (matches CommitBee's supported grammars):
// 0 -> Rust
// 1 -> TypeScript
// 2 -> JavaScript
// 3 -> Python
// 4 -> Go
// 5 -> Java
// 6 -> C
// 7 -> C++
// 8 -> Ruby
// 9 -> C#
fuzz_target!(|data: &[u8]| {
if data.is_empty() {
return;
}
let selector = data[0] % 10;
let Ok(source) = std::str::from_utf8(&data[1..]) else {
return;
};
match selector {
0 => {
let _ = commitbee::extract_rust_signature(source);
}
1 => {
let _ = commitbee::extract_typescript_signature(source);
}
2 => {
let _ = commitbee::extract_javascript_signature(source);
}
3 => {
let _ = commitbee::extract_python_signature(source);
}
4 => {
let _ = commitbee::extract_go_signature(source);
}
5 => {
let _ = commitbee::extract_java_signature(source);
}
6 => {
let _ = commitbee::extract_c_signature(source);
}
7 => {
let _ = commitbee::extract_cpp_signature(source);
}
8 => {
let _ = commitbee::extract_ruby_signature(source);
}
9 => {
let _ = commitbee::extract_csharp_signature(source);
Comment on lines +29 to +61
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fuzzer drops any input where data[1..] is not valid UTF-8, which can significantly reduce fuzz coverage since many byte sequences will exit early. To exercise the parsers more thoroughly, consider converting with a lossless-to-&str strategy like String::from_utf8_lossy(&data[1..]) (or switching the target input type to (&str, u8) via a structured fuzzer input) so every input still drives the extractor while staying within the &str API contract.

Suggested change
let Ok(source) = std::str::from_utf8(&data[1..]) else {
return;
};
match selector {
0 => {
let _ = commitbee::extract_rust_signature(source);
}
1 => {
let _ = commitbee::extract_typescript_signature(source);
}
2 => {
let _ = commitbee::extract_javascript_signature(source);
}
3 => {
let _ = commitbee::extract_python_signature(source);
}
4 => {
let _ = commitbee::extract_go_signature(source);
}
5 => {
let _ = commitbee::extract_java_signature(source);
}
6 => {
let _ = commitbee::extract_c_signature(source);
}
7 => {
let _ = commitbee::extract_cpp_signature(source);
}
8 => {
let _ = commitbee::extract_ruby_signature(source);
}
9 => {
let _ = commitbee::extract_csharp_signature(source);
let source = String::from_utf8_lossy(&data[1..]);
match selector {
0 => {
let _ = commitbee::extract_rust_signature(source.as_ref());
}
1 => {
let _ = commitbee::extract_typescript_signature(source.as_ref());
}
2 => {
let _ = commitbee::extract_javascript_signature(source.as_ref());
}
3 => {
let _ = commitbee::extract_python_signature(source.as_ref());
}
4 => {
let _ = commitbee::extract_go_signature(source.as_ref());
}
5 => {
let _ = commitbee::extract_java_signature(source.as_ref());
}
6 => {
let _ = commitbee::extract_c_signature(source.as_ref());
}
7 => {
let _ = commitbee::extract_cpp_signature(source.as_ref());
}
8 => {
let _ = commitbee::extract_ruby_signature(source.as_ref());
}
9 => {
let _ = commitbee::extract_csharp_signature(source.as_ref());

Copilot uses AI. Check for mistakes.
}
_ => unreachable!("selector is `% 10`"),
}
});
74 changes: 65 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,14 @@ pub fn parse_diff_hunks(diff: &str) -> Vec<services::analyzer::DiffHunk> {
services::analyzer::DiffHunk::parse_from_diff(diff)
}

/// Extract signature from Rust source code for fuzz target access.
/// Extract signature from source code using the given tree-sitter language.
///
/// Parses the source with tree-sitter Rust, finds the first top-level definition,
/// and extracts its signature. Must never panic on any input.
#[cfg(feature = "lang-rust")]
pub fn extract_rust_signature(source: &str) -> Option<String> {
/// Parses the source, finds the first top-level definition, and extracts its
/// signature. Must never panic on any input.
fn extract_signature_for_language(source: &str, language: tree_sitter::Language) -> Option<String> {
use tree_sitter::Parser;
let mut parser = Parser::new();
if parser
.set_language(&tree_sitter_rust::LANGUAGE.into())
.is_err()
{
if parser.set_language(&language).is_err() {
return None;
}
let tree = parser.parse(source, None)?;
Expand All @@ -76,6 +72,66 @@ pub fn extract_rust_signature(source: &str) -> Option<String> {
services::analyzer::AnalyzerService::extract_signature(first_child, source)
}
Comment on lines 59 to 73
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extract_signature_for_language is always compiled, but it’s only referenced from the language-specific wrappers that are all #[cfg(feature = "lang-*")]. In a --no-default-features build with no language features enabled, this helper becomes unused and will trigger a dead_code warning. Consider gating the helper with the same cfg(any(feature = "lang-rust", ...)) set, or explicitly allowing dead_code with a short rationale so minimal-feature builds stay warning-free.

Copilot uses AI. Check for mistakes.

/// Extract signature from Rust source code for fuzz target access.
#[cfg(feature = "lang-rust")]
pub fn extract_rust_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_rust::LANGUAGE.into())
}

/// Extract signature from TypeScript source code for fuzz target access.
#[cfg(feature = "lang-typescript")]
pub fn extract_typescript_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into())
}

/// Extract signature from JavaScript source code for fuzz target access.
#[cfg(feature = "lang-javascript")]
pub fn extract_javascript_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_javascript::LANGUAGE.into())
}

/// Extract signature from Python source code for fuzz target access.
#[cfg(feature = "lang-python")]
pub fn extract_python_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_python::LANGUAGE.into())
}

/// Extract signature from Go source code for fuzz target access.
#[cfg(feature = "lang-go")]
pub fn extract_go_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_go::LANGUAGE.into())
}

/// Extract signature from Java source code for fuzz target access.
#[cfg(feature = "lang-java")]
pub fn extract_java_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_java::LANGUAGE.into())
}

/// Extract signature from C source code for fuzz target access.
#[cfg(feature = "lang-c")]
pub fn extract_c_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_c::LANGUAGE.into())
}

/// Extract signature from C++ source code for fuzz target access.
#[cfg(feature = "lang-cpp")]
pub fn extract_cpp_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_cpp::LANGUAGE.into())
}

/// Extract signature from Ruby source code for fuzz target access.
#[cfg(feature = "lang-ruby")]
pub fn extract_ruby_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_ruby::LANGUAGE.into())
}

/// Extract signature from C# source code for fuzz target access.
#[cfg(feature = "lang-csharp")]
pub fn extract_csharp_signature(source: &str) -> Option<String> {
extract_signature_for_language(source, tree_sitter_c_sharp::LANGUAGE.into())
}

/// Classify whether a diff span contains whitespace-only changes for fuzz target access.
///
/// Wrapper around `ContextBuilder::classify_span_change`. Must never panic on any input.
Expand Down
Loading