-
Notifications
You must be signed in to change notification settings - Fork 160
Spectral Embedding #871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
rapids-bot
merged 65 commits into
rapidsai:branch-25.08
from
aamijar:spectral-embedding
Jul 22, 2025
Merged
Spectral Embedding #871
Changes from all commits
Commits
Show all changes
65 commits
Select commit
Hold shift + click to select a range
7c0c136
init spectral embedding
aamijar 123878b
remove unused
aamijar f5d6915
update license year
aamijar d181676
return 0
aamijar de5d56f
use raft matrix_vector_op
aamijar 0f18735
add gtests
aamijar a90f2c7
Merge branch 'branch-25.06' into spectral-embedding
aamijar d66d0d3
remove _types.hpp
aamijar e0508c0
refactor function signature
aamijar a382dff
use unary_op and fill
aamijar 01376e4
remove unused
aamijar 0a345e2
Merge branch 'branch-25.06' into spectral-embedding
aamijar 7654dd0
Merge branch 'branch-25.06' into spectral-embedding
aamijar c08c157
rename function
aamijar 514adbb
rename namespace
aamijar 8123767
doxygen format
aamijar 70b5c09
remove unused
aamijar 6511837
Merge branch 'branch-25.06' into spectral-embedding
aamijar 272d923
Merge branch 'branch-25.06' into spectral-embedding
aamijar 1b9be33
Merge branch 'branch-25.08' into spectral-embedding
aamijar 1be33fd
Merge branch 'branch-25.08' into spectral-embedding
aamijar 38e8ca4
Merge branch 'branch-25.08' into spectral-embedding
aamijar 397810f
migrate to new coo_matrix types
aamijar fedd83c
format fix
aamijar 97b0567
remove unused
aamijar ab1d59d
remove references to old COO api
aamijar 55413cd
Merge branch 'branch-25.08' into spectral-embedding
aamijar 4a0f362
remove unused
aamijar c4c0922
remove unused
aamijar aedddb6
Merge branch 'branch-25.08' into spectral-embedding
aamijar 1f88328
fix
aamijar 3fce304
fix
aamijar 560b96e
remove unused
aamijar c5075b5
remove unused
aamijar 8e786d4
Merge branch 'branch-25.08' into spectral-embedding
aamijar 3ba4dbd
rerun CI
aamijar e74218c
refactor directory structure
aamijar 4e3b7ad
Merge branch 'branch-25.08' into spectral-embedding
aamijar 182cbeb
remove unused
aamijar 0469e80
Merge branch 'branch-25.08' into spectral-embedding
aamijar 316b1d5
rerun CI
aamijar 3a348a7
reference todo issue
aamijar 24b8bf3
change function to void
aamijar ba6e95d
Merge branch 'branch-25.08' into spectral-embedding
aamijar 030729b
clean transform function
aamijar 88448ba
remove sync_stream
aamijar bf5e26c
refactor includes
aamijar 34d9a07
tpb 128
aamijar 3ecdc8b
clean up
aamijar 5d5af15
clean up
aamijar 9f5c91d
clean up
aamijar cba9e80
clean up
aamijar e6e3451
clean up
aamijar bfedab5
clean up
aamijar c66f1b2
clean up
aamijar e521aa4
clean up
aamijar 645eef9
remove unused
aamijar afc13bb
break into functions
aamijar a838e1e
clean up
aamijar aa99c7c
clean up
aamijar 8dd33ca
Merge branch 'branch-25.08' into spectral-embedding
aamijar 49916cf
add new functions to hpp
aamijar da7de07
remove unused
aamijar c61910f
Merge branch 'branch-25.08' into spectral-embedding
aamijar 82409f0
remove functions from public api
aamijar File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| /* | ||
| * Copyright (c) 2025, NVIDIA CORPORATION. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <raft/core/device_coo_matrix.hpp> | ||
| #include <raft/core/device_csr_matrix.hpp> | ||
| #include <raft/core/device_mdspan.hpp> | ||
| #include <raft/core/resources.hpp> | ||
|
|
||
| namespace cuvs::preprocessing::spectral_embedding { | ||
|
|
||
| /** | ||
| * @brief Parameters for spectral embedding algorithm | ||
| */ | ||
| struct params { | ||
| /** @brief The number of components to reduce the data to. */ | ||
| int n_components; | ||
|
|
||
| /** @brief The number of neighbors to use for the nearest neighbors graph. */ | ||
| int n_neighbors; | ||
|
|
||
| /** @brief Whether to normalize the Laplacian matrix. */ | ||
| bool norm_laplacian; | ||
|
|
||
| /** @brief Whether to drop the first eigenvector. */ | ||
| bool drop_first; | ||
|
|
||
| /** @brief Random seed for reproducibility */ | ||
| uint64_t seed; | ||
| }; | ||
|
|
||
| void transform(raft::resources const& handle, | ||
| params config, | ||
| raft::device_matrix_view<float, int, raft::row_major> dataset, | ||
| raft::device_matrix_view<float, int, raft::col_major> embedding); | ||
|
|
||
| } // namespace cuvs::preprocessing::spectral_embedding |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,260 @@ | ||
| /* | ||
| * Copyright (c) 2025, NVIDIA CORPORATION. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #include <cuvs/neighbors/brute_force.hpp> | ||
| #include <cuvs/preprocessing/spectral_embedding.hpp> | ||
|
|
||
| #include <raft/core/device_coo_matrix.hpp> | ||
| #include <raft/core/device_mdspan.hpp> | ||
| #include <raft/core/handle.hpp> | ||
| #include <raft/core/resources.hpp> | ||
| #include <raft/linalg/matrix_vector_op.cuh> | ||
| #include <raft/matrix/gather.cuh> | ||
| #include <raft/matrix/init.cuh> | ||
| #include <raft/sparse/coo.hpp> | ||
| #include <raft/sparse/linalg/laplacian.cuh> | ||
| #include <raft/sparse/linalg/symmetrize.cuh> | ||
| #include <raft/sparse/op/filter.cuh> | ||
| #include <raft/sparse/solver/lanczos.cuh> | ||
| #include <raft/sparse/solver/lanczos_types.hpp> | ||
| #include <raft/util/cudart_utils.hpp> | ||
| #include <raft/util/integer_utils.hpp> | ||
|
|
||
| #include <thrust/sequence.h> | ||
| #include <thrust/tabulate.h> | ||
|
|
||
| namespace cuvs::preprocessing::spectral_embedding { | ||
|
|
||
| void create_connectivity_graph(raft::resources const& handle, | ||
| params spectral_embedding_config, | ||
| raft::device_matrix_view<float, int, raft::row_major> dataset, | ||
| raft::device_matrix_view<float, int, raft::col_major> embedding, | ||
| raft::device_coo_matrix<float, int, int, int>& connectivity_graph) | ||
| { | ||
| const int n_samples = dataset.extent(0); | ||
| const int n_features = dataset.extent(1); | ||
| const int k_search = spectral_embedding_config.n_neighbors; | ||
| const size_t nnz = n_samples * k_search; | ||
|
|
||
| auto stream = raft::resource::get_cuda_stream(handle); | ||
|
|
||
| cuvs::neighbors::brute_force::search_params search_params; | ||
| cuvs::neighbors::brute_force::index_params index_params; | ||
| index_params.metric = cuvs::distance::DistanceType::L2SqrtExpanded; | ||
|
|
||
| auto d_indices = raft::make_device_matrix<int64_t>(handle, n_samples, k_search); | ||
| auto d_distances = raft::make_device_matrix<float>(handle, n_samples, k_search); | ||
|
|
||
| auto index = | ||
| cuvs::neighbors::brute_force::build(handle, index_params, raft::make_const_mdspan(dataset)); | ||
aamijar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| cuvs::neighbors::brute_force::search( | ||
| handle, search_params, index, dataset, d_indices.view(), d_distances.view()); | ||
|
|
||
| auto knn_rows = raft::make_device_vector<int>(handle, nnz); | ||
| auto knn_cols = raft::make_device_vector<int>(handle, nnz); | ||
|
|
||
| raft::linalg::unary_op( | ||
| handle, make_const_mdspan(d_indices.view()), knn_cols.view(), [] __device__(int64_t x) { | ||
| return static_cast<int>(x); | ||
| }); | ||
|
|
||
| thrust::tabulate(raft::resource::get_thrust_policy(handle), | ||
| knn_rows.data_handle(), | ||
| knn_rows.data_handle() + nnz, | ||
| [k_search] __device__(int idx) { return idx / k_search; }); | ||
|
|
||
| // set all distances to 1.0f (connectivity KNN graph) | ||
| raft::matrix::fill(handle, raft::make_device_vector_view(d_distances.data_handle(), nnz), 1.0f); | ||
|
|
||
| auto coo_matrix_view = raft::make_device_coo_matrix_view<const float, int, int, int>( | ||
| d_distances.data_handle(), | ||
| raft::make_device_coordinate_structure_view<int, int, int>( | ||
| knn_rows.data_handle(), knn_cols.data_handle(), n_samples, n_samples, nnz)); | ||
|
|
||
| auto sym_coo1_matrix = | ||
| raft::make_device_coo_matrix<float, int, int, int>(handle, n_samples, n_samples); | ||
| raft::sparse::linalg::coo_symmetrize<128, float, int, int>( | ||
| handle, coo_matrix_view, sym_coo1_matrix, [] __device__(int row, int col, float a, float b) { | ||
| return 0.5f * (a + b); | ||
| }); | ||
|
|
||
| raft::sparse::op::coo_sort<float>(n_samples, | ||
| n_samples, | ||
| sym_coo1_matrix.structure_view().get_nnz(), | ||
| sym_coo1_matrix.structure_view().get_rows().data(), | ||
| sym_coo1_matrix.structure_view().get_cols().data(), | ||
| sym_coo1_matrix.get_elements().data(), | ||
| stream); | ||
|
|
||
| raft::sparse::op::coo_remove_scalar<128, float, int, int>( | ||
| handle, | ||
| raft::make_device_coo_matrix_view<const float, int, int, int>( | ||
| sym_coo1_matrix.get_elements().data(), sym_coo1_matrix.structure_view()), | ||
| raft::make_host_scalar<float>(0.0f).view(), | ||
| connectivity_graph); | ||
| } | ||
|
|
||
| raft::device_csr_matrix_view<float, int, int, int> coo_to_csr_matrix( | ||
| raft::resources const& handle, | ||
| const int n_samples, | ||
| raft::device_vector_view<int> sym_coo_row_ind, | ||
| raft::device_coo_matrix<float, int, int, int>& sym_coo_matrix) | ||
| { | ||
| auto stream = raft::resource::get_cuda_stream(handle); | ||
|
|
||
| raft::sparse::op::coo_sort<float>(n_samples, | ||
| n_samples, | ||
| sym_coo_matrix.structure_view().get_nnz(), | ||
| sym_coo_matrix.structure_view().get_rows().data(), | ||
| sym_coo_matrix.structure_view().get_cols().data(), | ||
| sym_coo_matrix.get_elements().data(), | ||
| stream); | ||
|
|
||
| raft::sparse::convert::sorted_coo_to_csr(sym_coo_matrix.structure_view().get_rows().data(), | ||
| sym_coo_matrix.structure_view().get_nnz(), | ||
| sym_coo_row_ind.data_handle(), | ||
| n_samples, | ||
| stream); | ||
|
|
||
| auto sym_coo_nnz = sym_coo_matrix.structure_view().get_nnz(); | ||
| raft::copy(sym_coo_row_ind.data_handle() + sym_coo_row_ind.size() - 1, &sym_coo_nnz, 1, stream); | ||
|
|
||
| auto csr_matrix_view = raft::make_device_csr_matrix_view<float, int, int, int>( | ||
| const_cast<float*>(sym_coo_matrix.get_elements().data()), | ||
| raft::make_device_compressed_structure_view<int, int, int>( | ||
| const_cast<int*>(sym_coo_row_ind.data_handle()), | ||
| const_cast<int*>(sym_coo_matrix.structure_view().get_cols().data()), | ||
| n_samples, | ||
| n_samples, | ||
| sym_coo_matrix.structure_view().get_nnz())); | ||
| return csr_matrix_view; | ||
| } | ||
|
|
||
| raft::device_csr_matrix<float, int, int, int> create_laplacian( | ||
| raft::resources const& handle, | ||
| params spectral_embedding_config, | ||
| raft::device_csr_matrix_view<float, int, int, int> csr_matrix_view, | ||
| raft::device_vector_view<float, int> diagonal) | ||
| { | ||
| auto laplacian = spectral_embedding_config.norm_laplacian | ||
| ? raft::sparse::linalg::laplacian_normalized(handle, csr_matrix_view, diagonal) | ||
| : raft::sparse::linalg::compute_graph_laplacian(handle, csr_matrix_view); | ||
|
|
||
| auto laplacian_elements_view = raft::make_device_vector_view<float, int>( | ||
| laplacian.get_elements().data(), laplacian.structure_view().get_nnz()); | ||
|
|
||
| raft::linalg::unary_op(handle, | ||
| raft::make_const_mdspan(laplacian_elements_view), | ||
| laplacian_elements_view, | ||
| [] __device__(float x) { return -x; }); | ||
|
|
||
| return laplacian; | ||
| } | ||
|
|
||
| void compute_eigenpairs(raft::resources const& handle, | ||
| params spectral_embedding_config, | ||
| const int n_samples, | ||
| raft::device_csr_matrix<float, int, int, int> laplacian, | ||
| raft::device_vector_view<float, int> diagonal, | ||
| raft::device_matrix_view<float, int, raft::col_major> embedding) | ||
| { | ||
| auto config = raft::sparse::solver::lanczos_solver_config<float>(); | ||
| config.n_components = spectral_embedding_config.n_components; | ||
| config.max_iterations = 1000; | ||
| config.ncv = std::min(n_samples, std::max(2 * config.n_components + 1, 20)); | ||
| config.tolerance = 1e-5; | ||
| config.which = raft::sparse::solver::LANCZOS_WHICH::LA; | ||
| config.seed = spectral_embedding_config.seed; | ||
|
|
||
| auto eigenvalues = | ||
aamijar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| raft::make_device_vector<float, int, raft::col_major>(handle, config.n_components); | ||
| auto eigenvectors = | ||
| raft::make_device_matrix<float, int, raft::col_major>(handle, n_samples, config.n_components); | ||
|
|
||
| raft::sparse::solver::lanczos_compute_smallest_eigenvectors<int, float>( | ||
| handle, | ||
| config, | ||
| raft::make_device_csr_matrix_view<float, int, int, int>(laplacian.get_elements().data(), | ||
| laplacian.structure_view()), | ||
| std::nullopt, | ||
| eigenvalues.view(), | ||
| eigenvectors.view()); | ||
|
|
||
| if (spectral_embedding_config.norm_laplacian) { | ||
| raft::linalg::matrix_vector_op<raft::Apply::ALONG_COLUMNS>( | ||
| handle, | ||
| raft::make_const_mdspan(eigenvectors.view()), // input matrix view | ||
| raft::make_const_mdspan(diagonal), // input vector view | ||
| eigenvectors.view(), // output matrix view (in-place) | ||
| [] __device__(float elem, float diag) { return elem / diag; }); | ||
| } | ||
|
|
||
| // Create a sequence of reversed column indices | ||
| config.n_components = | ||
| spectral_embedding_config.drop_first ? config.n_components - 1 : config.n_components; | ||
| auto col_indices = raft::make_device_vector<int>(handle, config.n_components); | ||
|
|
||
| // TODO: https://github.com/rapidsai/raft/issues/2661 | ||
| thrust::sequence(thrust::device, | ||
aamijar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| col_indices.data_handle(), | ||
| col_indices.data_handle() + config.n_components, | ||
| config.n_components - 1, // Start from the last column index | ||
| -1 // Decrement (move backward) | ||
| ); | ||
|
|
||
| // Create row-major views of the column-major matrices | ||
| // This is just a view re-interpretation, no data movement | ||
| auto eigenvectors_row_view = raft::make_device_matrix_view<float, int, raft::row_major>( | ||
| eigenvectors.data_handle(), | ||
| eigenvectors.extent(1), // Swap dimensions for the view | ||
| eigenvectors.extent(0)); | ||
|
|
||
| auto embedding_row_view = raft::make_device_matrix_view<float, int, raft::row_major>( | ||
| embedding.data_handle(), | ||
| embedding.extent(1), // Swap dimensions for the view | ||
| embedding.extent(0)); | ||
|
|
||
| raft::matrix::gather<float, int, int>( | ||
| handle, | ||
| raft::make_const_mdspan(eigenvectors_row_view), // Source matrix (as row-major view) | ||
| raft::make_const_mdspan(col_indices.view()), // Column indices to gather | ||
| embedding_row_view // Destination matrix (as row-major view) | ||
| ); | ||
| } | ||
|
|
||
| void transform(raft::resources const& handle, | ||
| params spectral_embedding_config, | ||
| raft::device_matrix_view<float, int, raft::row_major> dataset, | ||
| raft::device_matrix_view<float, int, raft::col_major> embedding) | ||
| { | ||
| const int n_samples = dataset.extent(0); | ||
|
|
||
| auto sym_coo_matrix = | ||
| raft::make_device_coo_matrix<float, int, int, int>(handle, n_samples, n_samples); | ||
| auto sym_coo_row_ind = raft::make_device_vector<int>(handle, n_samples + 1); | ||
| auto diagonal = raft::make_device_vector<float, int>(handle, n_samples); | ||
|
|
||
| create_connectivity_graph(handle, spectral_embedding_config, dataset, embedding, sym_coo_matrix); | ||
| auto csr_matrix_view = | ||
| coo_to_csr_matrix(handle, n_samples, sym_coo_row_ind.view(), sym_coo_matrix); | ||
| auto laplacian = | ||
| create_laplacian(handle, spectral_embedding_config, csr_matrix_view, diagonal.view()); | ||
| compute_eigenpairs( | ||
| handle, spectral_embedding_config, n_samples, laplacian, diagonal.view(), embedding); | ||
| } | ||
|
|
||
| } // namespace cuvs::preprocessing::spectral_embedding | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.