/* * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 * * Reproducer for https://github.com/rapidsai/cuvs-lucene/issues/93 * cuvsCagraSearch returned 0 (Reason=cudaErrorInvalidValue:invalid argument) * * ROOT CAUSE: * `cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)` * is not thread-safe. It sets a CUDA-context-wide attribute. When two threads call it * concurrently with different smem_size values, the following race occurs: * 1. Thread A sets max-dynamic-shared-mem to SIZE_A (larger). * 2. Thread B overwrites it with SIZE_B (smaller). * 3. Thread A launches its kernel requesting SIZE_A of shared memory, * but the CUDA context now only allows SIZE_B → cudaErrorInvalidValue. * * HOW IT MANIFESTS IN cuvs-lucene: * Lucene's TaskExecutor dispatches per-segment CAGRA searches to a thread pool. * Each segment has a different number of vectors (e.g. 25, 26, 47), leading to * different graph degrees after reduction, and therefore different smem_size values * in the single-CTA search kernel. The concurrent cudaFuncSetAttribute calls race. * * REPRODUCTION STRATEGY: * Build CAGRA indices with different dataset sizes (different graph degrees), * then search them concurrently from separate threads, each with its own raft::resources. * This mirrors the cuvs-lucene setup where each thread gets a ThreadLocal CuVSResources. */ #include #include #include #include #include #include #include #include #include #include #include #include #include namespace cuvs::neighbors::cagra { // NOLINTNEXTLINE(readability-identifier-naming) TEST(Issue93Reproducer, ConcurrentSearchDifferentGraphDegrees) { raft::resources handle; raft::random::RngState rng(6181234567890123459ULL); // Dataset sizes from REPRODUCER.md warnings (different sizes → different graph degrees). // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) std::vector dataset_sizes = {25, 26, 47, 25}; constexpr int dim = 64; constexpr int top_k = 10; // Build indices on the main thread. std::vector> indices; for (int n_rows : dataset_sizes) { auto database = raft::make_device_matrix(handle, n_rows, dim); raft::random::uniform( handle, rng, database.data_handle(), n_rows * dim, -1.0F, 1.0F); // NOLINT cagra::index_params ip; ip.metric = cuvs::distance::DistanceType::L2Expanded; ip.intermediate_graph_degree = 128; // NOLINT ip.graph_degree = 64; // NOLINT ip.graph_build_params = graph_build_params::nn_descent_params(ip.intermediate_graph_degree, ip.metric); indices.push_back(cagra::build(handle, ip, raft::make_const_mdspan(database.view()))); } raft::resource::sync_stream(handle); // Search concurrently from multiple threads until the first failure. const int num_threads = static_cast(indices.size()); std::mutex error_mutex; std::string first_error; // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) for (int iter = 0; iter < 50 && first_error.empty(); ++iter) { std::vector threads; for (int t = 0; t < num_threads; ++t) { threads.emplace_back([&, t, iter]() { raft::resources thread_handle; raft::random::RngState thread_rng(42ULL + static_cast(t) + static_cast(iter) * 1000ULL); try { auto query = raft::make_device_matrix(thread_handle, 1, dim); raft::random::uniform(thread_handle, thread_rng, query.data_handle(), dim, -1.0F, 1.0F); // Match cuvs-lucene params: Java's Panama zero-initializes the struct, // and SINGLE_CTA = 0 in the enum, so algo is SINGLE_CTA. cagra::search_params sp; sp.itopk_size = top_k; sp.search_width = 1; sp.algo = search_algo::SINGLE_CTA; auto neighbors = raft::make_device_matrix(thread_handle, 1, top_k); auto distances = raft::make_device_matrix(thread_handle, 1, top_k); cagra::search(thread_handle, sp, indices[static_cast(t)], raft::make_const_mdspan(query.view()), neighbors.view(), distances.view()); raft::resource::sync_stream(thread_handle); } catch (const std::exception& e) { std::lock_guard lock(error_mutex); if (first_error.empty()) { first_error = e.what(); } } }); } for (auto& th : threads) { th.join(); } } ASSERT_TRUE(first_error.empty()) << first_error; } } // namespace cuvs::neighbors::cagra