-
-
Notifications
You must be signed in to change notification settings - Fork 1
D-047: test(fuzz): signature extraction fuzzers for non-Rust languages #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: development
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| // SPDX-FileCopyrightText: 2026 Sephyi <me@sephy.io> | ||
| // | ||
| // SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Commercial | ||
|
|
||
| #![no_main] | ||
|
|
||
| use libfuzzer_sys::fuzz_target; | ||
|
|
||
| // Dispatches the remaining input to a language-specific signature extractor | ||
| // based on the first byte (`data[0] % 10`). Each `extract_*_signature` | ||
| // function must never panic on any input — this fuzzer only asserts that. | ||
| // | ||
| // Language map (matches CommitBee's supported grammars): | ||
| // 0 -> Rust | ||
| // 1 -> TypeScript | ||
| // 2 -> JavaScript | ||
| // 3 -> Python | ||
| // 4 -> Go | ||
| // 5 -> Java | ||
| // 6 -> C | ||
| // 7 -> C++ | ||
| // 8 -> Ruby | ||
| // 9 -> C# | ||
| fuzz_target!(|data: &[u8]| { | ||
| if data.is_empty() { | ||
| return; | ||
| } | ||
| let selector = data[0] % 10; | ||
| let Ok(source) = std::str::from_utf8(&data[1..]) else { | ||
| return; | ||
| }; | ||
| match selector { | ||
| 0 => { | ||
| let _ = commitbee::extract_rust_signature(source); | ||
| } | ||
| 1 => { | ||
| let _ = commitbee::extract_typescript_signature(source); | ||
| } | ||
| 2 => { | ||
| let _ = commitbee::extract_javascript_signature(source); | ||
| } | ||
| 3 => { | ||
| let _ = commitbee::extract_python_signature(source); | ||
| } | ||
| 4 => { | ||
| let _ = commitbee::extract_go_signature(source); | ||
| } | ||
| 5 => { | ||
| let _ = commitbee::extract_java_signature(source); | ||
| } | ||
| 6 => { | ||
| let _ = commitbee::extract_c_signature(source); | ||
| } | ||
| 7 => { | ||
| let _ = commitbee::extract_cpp_signature(source); | ||
| } | ||
| 8 => { | ||
| let _ = commitbee::extract_ruby_signature(source); | ||
| } | ||
| 9 => { | ||
| let _ = commitbee::extract_csharp_signature(source); | ||
| } | ||
| _ => unreachable!("selector is `% 10`"), | ||
| } | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,18 +56,14 @@ pub fn parse_diff_hunks(diff: &str) -> Vec<services::analyzer::DiffHunk> { | |
| services::analyzer::DiffHunk::parse_from_diff(diff) | ||
| } | ||
|
|
||
| /// Extract signature from Rust source code for fuzz target access. | ||
| /// Extract signature from source code using the given tree-sitter language. | ||
| /// | ||
| /// Parses the source with tree-sitter Rust, finds the first top-level definition, | ||
| /// and extracts its signature. Must never panic on any input. | ||
| #[cfg(feature = "lang-rust")] | ||
| pub fn extract_rust_signature(source: &str) -> Option<String> { | ||
| /// Parses the source, finds the first top-level definition, and extracts its | ||
| /// signature. Must never panic on any input. | ||
| fn extract_signature_for_language(source: &str, language: tree_sitter::Language) -> Option<String> { | ||
| use tree_sitter::Parser; | ||
| let mut parser = Parser::new(); | ||
| if parser | ||
| .set_language(&tree_sitter_rust::LANGUAGE.into()) | ||
| .is_err() | ||
| { | ||
| if parser.set_language(&language).is_err() { | ||
| return None; | ||
| } | ||
| let tree = parser.parse(source, None)?; | ||
|
|
@@ -76,6 +72,66 @@ pub fn extract_rust_signature(source: &str) -> Option<String> { | |
| services::analyzer::AnalyzerService::extract_signature(first_child, source) | ||
| } | ||
|
Comment on lines
59
to
73
|
||
|
|
||
| /// Extract signature from Rust source code for fuzz target access. | ||
| #[cfg(feature = "lang-rust")] | ||
| pub fn extract_rust_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_rust::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Extract signature from TypeScript source code for fuzz target access. | ||
| #[cfg(feature = "lang-typescript")] | ||
| pub fn extract_typescript_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()) | ||
| } | ||
|
|
||
| /// Extract signature from JavaScript source code for fuzz target access. | ||
| #[cfg(feature = "lang-javascript")] | ||
| pub fn extract_javascript_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_javascript::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Extract signature from Python source code for fuzz target access. | ||
| #[cfg(feature = "lang-python")] | ||
| pub fn extract_python_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_python::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Extract signature from Go source code for fuzz target access. | ||
| #[cfg(feature = "lang-go")] | ||
| pub fn extract_go_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_go::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Extract signature from Java source code for fuzz target access. | ||
| #[cfg(feature = "lang-java")] | ||
| pub fn extract_java_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_java::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Extract signature from C source code for fuzz target access. | ||
| #[cfg(feature = "lang-c")] | ||
| pub fn extract_c_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_c::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Extract signature from C++ source code for fuzz target access. | ||
| #[cfg(feature = "lang-cpp")] | ||
| pub fn extract_cpp_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_cpp::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Extract signature from Ruby source code for fuzz target access. | ||
| #[cfg(feature = "lang-ruby")] | ||
| pub fn extract_ruby_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_ruby::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Extract signature from C# source code for fuzz target access. | ||
| #[cfg(feature = "lang-csharp")] | ||
| pub fn extract_csharp_signature(source: &str) -> Option<String> { | ||
| extract_signature_for_language(source, tree_sitter_c_sharp::LANGUAGE.into()) | ||
| } | ||
|
|
||
| /// Classify whether a diff span contains whitespace-only changes for fuzz target access. | ||
| /// | ||
| /// Wrapper around `ContextBuilder::classify_span_change`. Must never panic on any input. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The fuzzer drops any input where
data[1..]is not valid UTF-8, which can significantly reduce fuzz coverage since many byte sequences will exit early. To exercise the parsers more thoroughly, consider converting with a lossless-to-&strstrategy likeString::from_utf8_lossy(&data[1..])(or switching the target input type to(&str, u8)via a structured fuzzer input) so every input still drives the extractor while staying within the&strAPI contract.