Skip to content

Commit ccbf3cf

Browse files
bugerclaude
andauthored
fix: quoted search falls back to BM25 and returns unrelated files (#527) (#528)
* fix: skip filename matching and BM25 ranking for quoted exact queries (#527) Quoted queries like '"cleanupScopeMappings"' set exact:true on individual AST terms but not the global exact flag. Two code paths only checked the global flag, causing filename matching to tokenize the query into subwords ("cleanup", "scope", "map") and BM25 ranking to boost unrelated files. Now check both the global exact flag and is_exact_search(ast) to properly detect quoted queries. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: resolve flaky test_tree_cache_invalidation by removing exact size assertion The test asserted get_cache_size() == 1, but the tree cache is a process-wide global static. Other tests running in parallel can add entries even though this test holds a local mutex. Replaced with is_in_cache() check which validates the correct behavior without being sensitive to parallel test interference. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2cb7a2c commit ccbf3cf

File tree

4 files changed

+125
-7
lines changed

4 files changed

+125
-7
lines changed

src/language/tree_cache_tests.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,9 @@ fn test_tree_cache_invalidation() {
151151
// But the content is different, so the byte positions should differ
152152
assert_ne!(tree1.root_node().end_byte(), tree2.root_node().end_byte());
153153

154-
// Check that the cache still has one entry (the updated one)
155-
assert_eq!(tree_cache::get_cache_size(), 1);
154+
// Check that the file is still in the cache (updated entry)
155+
// Note: we don't check exact cache size because other tests running in parallel
156+
// may add entries to the global cache even though this test holds the local mutex.
156157
assert!(tree_cache::is_in_cache("test_file2.rs"));
157158
}
158159

src/search/query.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ fn update_ast_exact(expr: &mut elastic_query::Expr) {
271271
}
272272

273273
/// Helper function to check if the AST represents an exact search
274-
fn is_exact_search(expr: &elastic_query::Expr) -> bool {
274+
pub fn is_exact_search(expr: &elastic_query::Expr) -> bool {
275275
match expr {
276276
elastic_query::Expr::Term { exact, .. } => *exact,
277277
elastic_query::Expr::And(left, right) => is_exact_search(left) && is_exact_search(right),

src/search/search_runner.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -508,8 +508,12 @@ pub fn perform_probe(options: &SearchOptions) -> Result<LimitedSearchResults> {
508508
let mut all_files = file_term_map.keys().cloned().collect::<HashSet<_>>();
509509

510510
// Add filename matches if enabled
511+
// Skip filename matching for exact searches (--exact flag) and when all AST terms
512+
// are exact (e.g., quoted queries like "cleanupScopeMappings"). Filename matching
513+
// tokenizes terms into subwords which creates false positives for exact queries. (#527)
511514
let fm_start = Instant::now();
512-
if include_filenames && !exact {
515+
let ast_all_exact = crate::search::query::is_exact_search(&plan.ast);
516+
if include_filenames && !exact && !ast_all_exact {
513517
if debug_mode {
514518
println!("DEBUG: Starting filename matching...");
515519
}
@@ -1352,17 +1356,18 @@ pub fn perform_probe(options: &SearchOptions) -> Result<LimitedSearchResults> {
13521356
format_duration(remaining_time)
13531357
);
13541358
}
1355-
// Rank results (skip if exact flag is set)
1359+
// Rank results (skip if exact flag is set or all AST terms are exact like quoted queries)
13561360
let rr_start = Instant::now();
1361+
let skip_ranking = *exact || ast_all_exact;
13571362
if debug_mode {
1358-
if *exact {
1363+
if skip_ranking {
13591364
println!("DEBUG: Skipping result ranking due to exact flag being set");
13601365
} else {
13611366
println!("DEBUG: Starting result ranking...");
13621367
}
13631368
}
13641369

1365-
if !*exact {
1370+
if !skip_ranking {
13661371
// Only perform ranking if exact flag is not set
13671372
rank_search_results(&mut final_results, queries, reranker, *question);
13681373

tests/integration_tests.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,3 +853,115 @@ fn test_skipped_files_with_match_counts() {
853853
"Total results + skipped should be at least 2"
854854
);
855855
}
856+
857+
/// Issue #527: Quoted search on a directory should behave like exact/literal search.
858+
/// Previously, quoted queries like '"cleanupScopeMappings"' ran filename matching
859+
/// and BM25 ranking with tokenized subwords, causing unrelated files containing
860+
/// "cleanup", "scope", or "map" individually to appear in results.
861+
#[test]
862+
fn test_quoted_search_excludes_unrelated_files() {
863+
let temp_dir = TempDir::new().expect("Failed to create temp dir");
864+
865+
// File that DOES contain the exact camelCase symbol
866+
let src_dir = temp_dir.path().join("model");
867+
fs::create_dir(&src_dir).expect("Failed to create model dir");
868+
let mut target_file = File::create(src_dir.join("products.go")).unwrap();
869+
target_file
870+
.write_all(
871+
b"package products\n\n\
872+
func cleanupScopeMappings(tx interface{}, newApis []string, oldApis []string) error {\n\
873+
\treturn nil\n\
874+
}\n",
875+
)
876+
.unwrap();
877+
878+
// File that does NOT contain the symbol but has subwords: cleanup, scope, mapping
879+
let app_dir = temp_dir.path().join("app");
880+
fs::create_dir(&app_dir).expect("Failed to create app dir");
881+
let mut unrelated_file = File::create(app_dir.join("about.go")).unwrap();
882+
unrelated_file
883+
.write_all(
884+
b"package about\n\n\
885+
// GetVersion returns the cleanup version info for scope mappings\n\
886+
func GetVersion() string {\n\
887+
\treturn \"1.0.0\"\n\
888+
}\n\n\
889+
// GetStatus returns the status of the scope cleanup mapping service\n\
890+
func GetStatus() string {\n\
891+
\treturn \"running\"\n\
892+
}\n",
893+
)
894+
.unwrap();
895+
896+
// Another unrelated file with no matching subwords at all
897+
let mut other_file = File::create(app_dir.join("users.go")).unwrap();
898+
other_file
899+
.write_all(
900+
b"package users\n\n\
901+
func GetUser(id string) (interface{}, error) {\n\
902+
\treturn nil, nil\n\
903+
}\n",
904+
)
905+
.unwrap();
906+
907+
let custom_ignores: Vec<String> = vec![];
908+
909+
// Quoted query — should only return products.go
910+
let queries = vec!["\"cleanupScopeMappings\"".to_string()];
911+
let options = SearchOptions {
912+
path: temp_dir.path(),
913+
queries: &queries,
914+
files_only: false,
915+
custom_ignores: &custom_ignores,
916+
exclude_filenames: false,
917+
language: None,
918+
reranker: "hybrid",
919+
frequency_search: false,
920+
max_results: None,
921+
max_bytes: None,
922+
max_tokens: None,
923+
allow_tests: true,
924+
no_merge: false,
925+
merge_threshold: None,
926+
dry_run: false,
927+
session: None,
928+
timeout: 30,
929+
question: None,
930+
exact: false, // NOT using --exact flag, just quoted query
931+
no_gitignore: true,
932+
lsp: false,
933+
};
934+
935+
let search_result = perform_probe(&options).expect("Search should succeed");
936+
937+
println!(
938+
"Quoted search returned {} results:",
939+
search_result.results.len()
940+
);
941+
for r in &search_result.results {
942+
println!(" File: {} Lines: {:?}", r.file, r.lines);
943+
}
944+
945+
// All results should be from products.go (the file containing the exact symbol)
946+
assert!(
947+
!search_result.results.is_empty(),
948+
"Quoted search should find at least one result"
949+
);
950+
for r in &search_result.results {
951+
assert!(
952+
r.file.contains("products.go"),
953+
"Quoted search should only return files containing the exact symbol, got: {}",
954+
r.file
955+
);
956+
}
957+
958+
// Specifically: about.go should NOT appear (it only has subwords, not the full symbol)
959+
let has_about = search_result
960+
.results
961+
.iter()
962+
.any(|r| r.file.contains("about.go"));
963+
assert!(
964+
!has_about,
965+
"about.go should not appear in quoted search results — it doesn't contain 'cleanupScopeMappings'"
966+
);
967+
}

0 commit comments

Comments
 (0)