Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
297fe51
Java: New off-heap Dataset support for CAGRA and Brute Force
May 15, 2025
fa1e2e4
Merge branch 'branch-25.06' into ishan/new-dataset-method
chatman May 15, 2025
9241eed
Merge branch 'branch-25.06' into ishan/new-dataset-method
chatman May 23, 2025
27a15f6
Merge branch 'branch-25.06' into ishan/new-dataset-method
chatman May 27, 2025
f4ac8b5
Merge branch 'branch-25.06' into ishan/new-dataset-method
narangvivek10 May 27, 2025
d6193a0
Review feedback incorporated
May 27, 2025
7a75d9a
Review feedback incorporated
May 27, 2025
9ee1e4f
style-check fix
narangvivek10 May 27, 2025
208bbaf
Merge branch 'branch-25.06' into ishan/new-dataset-method
narangvivek10 May 27, 2025
40c65df
check if arena is alive before attempting to close
narangvivek10 May 27, 2025
fb01145
Merge branch 'branch-25.06' into ishan/new-dataset-method
narangvivek10 May 27, 2025
ff0e08f
Merge branch 'branch-25.06' into ishan/new-dataset-method
chatman May 27, 2025
25d7a7b
Running every randomized test twice, with either of Dataset and float…
May 27, 2025
f29ebc1
Fix indentation
chatman May 27, 2025
5373e5d
Merge branch 'branch-25.06' into ishan/new-dataset-method
chatman May 27, 2025
06e3f67
Merge branch 'branch-25.06' into ishan/new-dataset-method
narangvivek10 May 27, 2025
30e7546
add since tag in javadoc
narangvivek10 May 28, 2025
70177ef
Merge branch 'branch-25.06' into ishan/new-dataset-method
narangvivek10 May 28, 2025
6148cff
Merge branch 'branch-25.06' into ishan/new-dataset-method
narangvivek10 May 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,21 @@ interface Builder {
*/
Builder from(InputStream inputStream);

/**
* Sets the dataset vectors for building the {@link BruteForceIndex}.
*
* @param vectors a two-dimensional float array
* @return an instance of this Builder
*/
Builder withDataset(float[][] vectors);

/**
* Sets the dataset for building the {@link BruteForceIndex}.
*
* @param dataset a two-dimensional float array
* @param dataset a {@link Dataset} object containing the vectors
* @return an instance of this Builder
*/
Builder withDataset(float[][] dataset);
Builder withDataset(Dataset dataset);

/**
* Builds and returns an instance of {@link BruteForceIndex}.
Expand Down
13 changes: 11 additions & 2 deletions java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.nio.file.Path;
import java.util.Objects;

import com.nvidia.cuvs.BruteForceIndex.Builder;
import com.nvidia.cuvs.spi.CuVSProvider;

/**
Expand Down Expand Up @@ -183,13 +184,21 @@ interface Builder {
*/
Builder from(InputStream inputStream);

/**
* Sets the dataset vectors for building the {@link CagraIndex}.
*
* @param vectors a two-dimensional float array
* @return an instance of this Builder
*/
Builder withDataset(float[][] vectors);

/**
* Sets the dataset for building the {@link CagraIndex}.
*
* @param dataset a two-dimensional float array
* @param dataset a {@link Dataset} object containing the vectors
* @return an instance of this Builder
*/
Builder withDataset(float[][] dataset);
Builder withDataset(Dataset dataset);

/**
* Registers an instance of configured {@link CagraIndexParams} with this
Expand Down
62 changes: 62 additions & 0 deletions java/cuvs-java/src/main/java/com/nvidia/cuvs/Dataset.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.cuvs;

import com.nvidia.cuvs.spi.CuVSProvider;

/**
* This represents a wrapper for a dataset to be used for index construction.
* The purpose is to allow a caller to place the vectors into native memory
* directly, instead of requiring the caller to load all the vectors into the heap
* (e.g. with a float[][]).
*
* @since 25.06
*/
public interface Dataset extends AutoCloseable {

/**
* Add a single vector to the dataset.
*
* @param vector A float array of as many elements as the dimensions
*/
public void addVector(float[] vector);

/**
* Create a new instance of a dataset
*
* @param size Number of vectors in the dataset
* @param dimensions Size of each vector in the dataset
* @return new instance of {@link Dataset}
*/
static Dataset create(int size, int dimensions) {
return CuVSProvider.provider().newDataset(size, dimensions);
}

/**
* Gets the size of the dataset
*
* @return Size of the dataset
*/
public int size();

/**
* Gets the dimensions of the vectors in this dataset
*
* @return Dimensions of the vectors in the dataset
*/
public int dimensions();
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.nvidia.cuvs.BruteForceIndex;
import com.nvidia.cuvs.CagraIndex;
import com.nvidia.cuvs.CuVSResources;
import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.HnswIndex;

import java.nio.file.Path;
Expand Down Expand Up @@ -50,6 +51,9 @@ default Path nativeLibraryPath() {
CuVSResources newCuVSResources(Path tempDirectory)
throws Throwable;

/** Create a {@link Dataset} instance **/
Dataset newDataset(int size, int dimensions) throws UnsupportedOperationException;

/** Creates a new BruteForceIndex Builder. */
BruteForceIndex.Builder newBruteForceIndexBuilder(CuVSResources cuVSResources)
throws UnsupportedOperationException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.nvidia.cuvs.BruteForceIndex;
import com.nvidia.cuvs.CagraIndex;
import com.nvidia.cuvs.CuVSResources;
import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.HnswIndex;

import java.nio.file.Path;
Expand Down Expand Up @@ -47,4 +48,9 @@ public CagraIndex.Builder newCagraIndexBuilder(CuVSResources cuVSResources) {
public HnswIndex.Builder newHnswIndexBuilder(CuVSResources cuVSResources) {
throw new UnsupportedOperationException();
}

@Override
public Dataset newDataset(int size, int dimensions) throws UnsupportedOperationException {
throw new UnsupportedOperationException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import com.nvidia.cuvs.BruteForceIndexParams;
import com.nvidia.cuvs.BruteForceQuery;
import com.nvidia.cuvs.CuVSResources;
import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.SearchResults;
import com.nvidia.cuvs.internal.common.Util;
import com.nvidia.cuvs.internal.panama.cuvsBruteForceIndex;
Expand Down Expand Up @@ -71,7 +72,8 @@ public class BruteForceIndexImpl implements BruteForceIndex{
private static final MethodHandle deserializeMethodHandle = downcallHandle("deserialize_brute_force_index",
FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS, ADDRESS));

private final float[][] dataset;
private final float[][] vectors;
private final Dataset dataset;
private final CuVSResourcesImpl resources;
private final IndexReference bruteForceIndexReference;
private final BruteForceIndexParams bruteForceIndexParams;
Expand All @@ -86,8 +88,10 @@ public class BruteForceIndexImpl implements BruteForceIndex{
* @param bruteForceIndexParams an instance of {@link BruteForceIndexParams}
* holding the index parameters
*/
private BruteForceIndexImpl(float[][] dataset, CuVSResourcesImpl resources, BruteForceIndexParams bruteForceIndexParams)
private BruteForceIndexImpl(float[][] vectors, Dataset dataset, CuVSResourcesImpl resources,
BruteForceIndexParams bruteForceIndexParams)
throws Throwable {
this.vectors = vectors;
this.dataset = dataset;
this.resources = resources;
this.bruteForceIndexParams = bruteForceIndexParams;
Expand All @@ -102,6 +106,7 @@ private BruteForceIndexImpl(float[][] dataset, CuVSResourcesImpl resources, Brut
*/
private BruteForceIndexImpl(InputStream inputStream, CuVSResourcesImpl resources) throws Throwable {
this.bruteForceIndexParams = null;
this.vectors = null;
this.dataset = null;
this.resources = resources;
this.bruteForceIndexReference = deserialize(inputStream);
Expand All @@ -127,6 +132,7 @@ public void destroyIndex() throws Throwable {
} finally {
destroyed = true;
}
if (dataset != null) dataset.close();
}

/**
Expand All @@ -137,10 +143,11 @@ public void destroyIndex() throws Throwable {
* index
*/
private IndexReference build() throws Throwable {
long rows = dataset.length;
long cols = rows > 0 ? dataset[0].length : 0;
long rows = dataset != null? dataset.size(): vectors.length;
long cols = dataset != null? dataset.dimensions(): (rows > 0 ? vectors[0].length : 0);

MemorySegment dataSeg = Util.buildMemorySegment(resources.getArena(), dataset);
MemorySegment dataSeg = dataset != null? ((DatasetImpl) dataset).seg:
Util.buildMemorySegment(resources.getArena(), vectors);
try (var localArena = Arena.ofConfined()) {
MemorySegment returnValue = localArena.allocate(C_INT);
MemorySegment indexSeg = (MemorySegment) indexMethodHandle.invokeExact(
Expand Down Expand Up @@ -284,7 +291,8 @@ public static BruteForceIndex.Builder newBuilder(CuVSResources cuvsResources) {
*/
public static class Builder implements BruteForceIndex.Builder {

private float[][] dataset;
private float[][] vectors;
private Dataset dataset;
Comment on lines +294 to +295
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we foresee deprecating the vectors interface in the future, in favour of Dataset?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't plan for it. I think many users might find the float[][] easier to use, but those who are conscious of performance might want to try the other. Do you suggest we deprecate the float[][]?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very late here, but did you consider writing a second DatasetImpl to hold the float[][]? This way you do not need to keep around two fields, of which one is always null, and if between them every time in the various Index implementations. It would be easier to maintain.

I can draft a quick PR to show what I mean if you are interested.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, externally you can provide data either as a float[][] or a Dataset, but internally it will just be a Dataset.

private final CuVSResourcesImpl cuvsResources;
private BruteForceIndexParams bruteForceIndexParams;
private InputStream inputStream;
Expand Down Expand Up @@ -327,11 +335,23 @@ public Builder from(InputStream inputStream) {
/**
* Sets the dataset for building the {@link BruteForceIndex}.
*
* @param dataset a two-dimensional float array
* @param vectors a two-dimensional float array
* @return an instance of this Builder
*/
@Override
public Builder withDataset(float[][] dataset) {
public Builder withDataset(float[][] vectors) {
this.vectors = vectors;
return this;
}

/**
* Sets the dataset for building the {@link BruteForceIndex}.
*
* @param dataset a {@link Dataset} object containing the vectors
* @return an instance of this Builder
*/
@Override
public Builder withDataset(Dataset dataset) {
this.dataset = dataset;
return this;
}
Expand All @@ -346,7 +366,7 @@ public BruteForceIndexImpl build() throws Throwable {
if (inputStream != null) {
return new BruteForceIndexImpl(inputStream, cuvsResources);
} else {
return new BruteForceIndexImpl(dataset, cuvsResources, bruteForceIndexParams);
return new BruteForceIndexImpl(vectors, dataset, cuvsResources, bruteForceIndexParams);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import com.nvidia.cuvs.CagraQuery;
import com.nvidia.cuvs.CagraSearchParams;
import com.nvidia.cuvs.CuVSResources;
import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.SearchResults;
import com.nvidia.cuvs.internal.common.Util;
import com.nvidia.cuvs.internal.panama.cuvsCagraCompressionParams;
Expand Down Expand Up @@ -87,7 +88,8 @@ public class CagraIndexImpl implements CagraIndex {
private static final MethodHandle serializeCAGRAIndexToHNSWMethodHandle = downcallHandle("serialize_cagra_index_to_hnsw",
FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS, ADDRESS));

private final float[][] dataset;
private final float[][] vectors;
private final Dataset dataset;
private final CuVSResourcesImpl resources;
private final CagraIndexParams cagraIndexParameters;
private final CagraCompressionParams cagraCompressionParams;
Expand All @@ -104,10 +106,11 @@ public class CagraIndexImpl implements CagraIndex {
* @param dataset the dataset for indexing
* @param resources an instance of {@link CuVSResources}
*/
private CagraIndexImpl(CagraIndexParams indexParameters, CagraCompressionParams cagraCompressionParams, float[][] dataset,
CuVSResourcesImpl resources) throws Throwable {
private CagraIndexImpl(CagraIndexParams indexParameters, CagraCompressionParams cagraCompressionParams, float[][] vectors,
Dataset dataset, CuVSResourcesImpl resources) throws Throwable {
this.cagraIndexParameters = indexParameters;
this.cagraCompressionParams = cagraCompressionParams;
this.vectors = vectors;
this.dataset = dataset;
this.resources = resources;
this.cagraIndexReference = build();
Expand All @@ -122,6 +125,7 @@ private CagraIndexImpl(CagraIndexParams indexParameters, CagraCompressionParams
private CagraIndexImpl(InputStream inputStream, CuVSResourcesImpl resources) throws Throwable {
this.cagraIndexParameters = null;
this.cagraCompressionParams = null;
this.vectors = null;
this.dataset = null;
this.resources = resources;
this.cagraIndexReference = deserialize(inputStream);
Expand All @@ -146,6 +150,7 @@ public void destroyIndex() throws Throwable {
} finally {
destroyed = true;
}
if (dataset != null) dataset.close();
}

/**
Expand All @@ -156,8 +161,8 @@ public void destroyIndex() throws Throwable {
* index
*/
private IndexReference build() throws Throwable {
long rows = dataset.length;
long cols = rows > 0 ? dataset[0].length : 0;
long rows = dataset != null? dataset.size(): vectors.length;
long cols = dataset != null? dataset.dimensions(): (rows > 0 ? vectors[0].length : 0);

MemorySegment indexParamsMemorySegment = cagraIndexParameters != null
? segmentFromIndexParams(cagraIndexParameters)
Expand All @@ -169,7 +174,8 @@ private IndexReference build() throws Throwable {
? segmentFromCompressionParams(cagraCompressionParams)
: MemorySegment.NULL;

MemorySegment dataSeg = Util.buildMemorySegment(resources.getArena(), dataset);
MemorySegment dataSeg = dataset != null? ((DatasetImpl) dataset).seg:
Util.buildMemorySegment(resources.getArena(), vectors);

try (var localArena = Arena.ofConfined()) {
MemorySegment returnValue = localArena.allocate(C_INT);
Expand Down Expand Up @@ -470,7 +476,8 @@ public static CagraIndex.Builder newBuilder(CuVSResources cuvsResources) {
*/
public static class Builder implements CagraIndex.Builder{

private float[][] dataset;
private float[][] vectors;
private Dataset dataset;
private CagraIndexParams cagraIndexParams;
private CagraCompressionParams cagraCompressionParams;
private CuVSResourcesImpl cuvsResources;
Expand All @@ -487,7 +494,13 @@ public Builder from(InputStream inputStream) {
}

@Override
public Builder withDataset(float[][] dataset) {
public Builder withDataset(float[][] vectors) {
this.vectors = vectors;
return this;
}

@Override
public Builder withDataset(Dataset dataset) {
this.dataset = dataset;
return this;
}
Expand All @@ -509,7 +522,10 @@ public CagraIndexImpl build() throws Throwable {
if (inputStream != null) {
return new CagraIndexImpl(inputStream, cuvsResources);
} else {
return new CagraIndexImpl(cagraIndexParams, cagraCompressionParams, dataset, cuvsResources);
if (vectors != null && dataset != null) {
throw new IllegalArgumentException("Please specify only one type of dataset (a float[] or a Dataset instance)");
}
return new CagraIndexImpl(cagraIndexParams, cagraCompressionParams, vectors, dataset, cuvsResources);
}
}
}
Expand Down
Loading