Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding persistent kernel params to cagra python #583

Open
wants to merge 5 commits into
base: branch-25.02
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions cpp/include/cuvs/neighbors/cagra.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,30 @@ struct cuvsCagraSearchParams {
uint32_t num_random_samplings;
/** Bit mask used for initial random seed node selection. */
uint64_t rand_xor_mask;

/** Whether to use the persistent version of the kernel (only SINGLE_CTA is supported a.t.m.) */
bool persistent;
/** Persistent kernel: time in seconds before the kernel stops if no requests received. */
float persistent_lifetime;
/**
* Set the fraction of maximum grid size used by persistent kernel.
* Value 1.0 means the kernel grid size is maximum possible for the selected device.
* The value must be greater than 0.0 and not greater than 1.0.
*
* One may need to run other kernels alongside this persistent kernel. This parameter can
* be used to reduce the grid size of the persistent kernel to leave a few SMs idle.
* Note: running any other work on GPU alongside with the persistent kernel makes the setup
* fragile.
* - Running another kernel in another thread usually works, but no progress guaranteed
* - Any CUDA allocations block the context (this issue may be obscured by using pools)
* - Memory copies to not-pinned host memory may block the context
*
* Even when we know there are no other kernels working at the same time, setting
* kDeviceUsage to 1.0 surprisingly sometimes hurts performance. Proceed with care.
* If you suspect this is an issue, you can reduce this number to ~0.9 without a significant
* impact on the throughput.
*/
float persistent_device_usage;
};

typedef struct cuvsCagraSearchParams* cuvsCagraSearchParams_t;
Expand Down
3 changes: 3 additions & 0 deletions python/cuvs/cuvs/neighbors/cagra/cagra.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil:
float hashmap_max_fill_rate
uint32_t num_random_samplings
uint64_t rand_xor_mask
bool persistent
float persistent_lifetime
float persistent_device_usage

ctypedef struct cuvsCagraIndex:
uintptr_t addr
Expand Down
27 changes: 24 additions & 3 deletions python/cuvs/cuvs/neighbors/cagra/cagra.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def build(IndexParams index_params, dataset, resources=None):
----------
index_params : IndexParams object
dataset : CUDA array interface compliant matrix shape (n_samples, dim)
Supported dtype [float, int8, uint8]
Supported dtype [float, int8, uint8]
{resources_docstring}

Returns
Expand Down Expand Up @@ -363,6 +363,7 @@ cdef class SearchParams:
more.
rand_xor_mask: int, default = 0x128394
Bit mask used for initial random seed node selection.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a description of those new parameters to the docstring

"""

cdef cuvsCagraSearchParams params
Expand All @@ -380,7 +381,10 @@ cdef class SearchParams:
hashmap_min_bitlen=0,
hashmap_max_fill_rate=0.5,
num_random_samplings=1,
rand_xor_mask=0x128394):
rand_xor_mask=0x128394,
persistent=False,
persistent_lifetime=2,
persistent_device_usage=1.0):
self.params.max_queries = max_queries
self.params.itopk_size = itopk_size
self.params.max_iterations = max_iterations
Expand Down Expand Up @@ -413,14 +417,19 @@ cdef class SearchParams:
self.params.num_random_samplings = num_random_samplings
self.params.rand_xor_mask = rand_xor_mask

self.params.persistent = persistent
self.params.persistent_lifetime = persistent_lifetime
self.params.persistent_device_usage = persistent_device_usage

def __repr__(self):
attr_str = [attr + "=" + str(getattr(self, attr))
for attr in [
"max_queries", "itopk_size", "max_iterations", "algo",
"team_size", "search_width", "min_iterations",
"thread_block_size", "hashmap_mode",
"hashmap_min_bitlen", "hashmap_max_fill_rate",
"num_random_samplings", "rand_xor_mask"]]
"num_random_samplings", "rand_xor_mask", "persistent",
"persistent_lifetime", "persistent_device_usage"]]
return "SearchParams(type=CAGRA, " + (", ".join(attr_str)) + ")"

@property
Expand Down Expand Up @@ -475,6 +484,18 @@ cdef class SearchParams:
def rand_xor_mask(self):
return self.params.rand_xor_mask

@property
def persistent(self):
return self.params.persistent

@property
def persistent_lifetime(self):
return self.params.persistent_lifetime

@property
def persistent_device_usage(self):
return self.params.persistent_device_usage


@auto_sync_resources
@auto_convert_output
Expand Down
10 changes: 10 additions & 0 deletions python/cuvs/cuvs/test/test_cagra.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,13 @@ def test_cagra_vpq_compression():
run_cagra_build_search_test(
n_cols=dim, compression=cagra.CompressionParams(pq_dim=dim / pq_len)
)


def test_cagra_persistent_search():
run_cagra_build_search_test(
search_params={
"persistent": True,
"persistent_lifetime": 10,
"persistent_device_usage": 0.5,
}
)
Loading