rapidsai · cjnolet · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025
@@ -214,6 +214,30 @@ struct cuvsCagraSearchParams {
   uint32_t num_random_samplings;
   /** Bit mask used for initial random seed node selection. */
   uint64_t rand_xor_mask;
+
+  /** Whether to use the persistent version of the kernel (only SINGLE_CTA is supported a.t.m.) */
+  bool persistent;
+  /** Persistent kernel: time in seconds before the kernel stops if no requests received. */
+  float persistent_lifetime;
+  /**
+   * Set the fraction of maximum grid size used by persistent kernel.
+   * Value 1.0 means the kernel grid size is maximum possible for the selected device.
+   * The value must be greater than 0.0 and not greater than 1.0.
+   *
+   * One may need to run other kernels alongside this persistent kernel. This parameter can
+   * be used to reduce the grid size of the persistent kernel to leave a few SMs idle.
+   * Note: running any other work on GPU alongside with the persistent kernel makes the setup
+   * fragile.
+   *   - Running another kernel in another thread usually works, but no progress guaranteed
+   *   - Any CUDA allocations block the context (this issue may be obscured by using pools)
+   *   - Memory copies to not-pinned host memory may block the context
+   *
+   * Even when we know there are no other kernels working at the same time, setting
+   * kDeviceUsage to 1.0 surprisingly sometimes hurts performance. Proceed with care.
+   * If you suspect this is an issue, you can reduce this number to ~0.9 without a significant
+   * impact on the throughput.
+   */
+  float persistent_device_usage;
 };
 
 typedef struct cuvsCagraSearchParams* cuvsCagraSearchParams_t;

@@ -82,6 +82,9 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil:
         float hashmap_max_fill_rate
         uint32_t num_random_samplings
         uint64_t rand_xor_mask
+        bool persistent
+        float persistent_lifetime
+        float persistent_device_usage
 
     ctypedef struct cuvsCagraIndex:
         uintptr_t addr

@@ -256,7 +256,7 @@ def build(IndexParams index_params, dataset, resources=None):
     ----------
     index_params : IndexParams object
     dataset : CUDA array interface compliant matrix shape (n_samples, dim)
-        Supported dtype [float, int8, uint8]
+              Supported dtype [float, int8, uint8]
     {resources_docstring}
 
     Returns
@@ -363,6 +363,7 @@ cdef class SearchParams:
         more.
     rand_xor_mask: int, default = 0x128394
         Bit mask used for initial random seed node selection.
+
     """
 
     cdef cuvsCagraSearchParams params
@@ -380,7 +381,10 @@ cdef class SearchParams:
                  hashmap_min_bitlen=0,
                  hashmap_max_fill_rate=0.5,
                  num_random_samplings=1,
-                 rand_xor_mask=0x128394):
+                 rand_xor_mask=0x128394,
+                 persistent=False,
+                 persistent_lifetime=2,
+                 persistent_device_usage=1.0):
         self.params.max_queries = max_queries
         self.params.itopk_size = itopk_size
         self.params.max_iterations = max_iterations
@@ -413,14 +417,19 @@ cdef class SearchParams:
         self.params.num_random_samplings = num_random_samplings
         self.params.rand_xor_mask = rand_xor_mask
 
+        self.params.persistent = persistent
+        self.params.persistent_lifetime = persistent_lifetime
+        self.params.persistent_device_usage = persistent_device_usage
+
     def __repr__(self):
         attr_str = [attr + "=" + str(getattr(self, attr))
                     for attr in [
                         "max_queries", "itopk_size", "max_iterations", "algo",
                         "team_size", "search_width", "min_iterations",
                         "thread_block_size", "hashmap_mode",
                         "hashmap_min_bitlen", "hashmap_max_fill_rate",
-                        "num_random_samplings", "rand_xor_mask"]]
+                        "num_random_samplings", "rand_xor_mask", "persistent",
+                        "persistent_lifetime", "persistent_device_usage"]]
         return "SearchParams(type=CAGRA, " + (", ".join(attr_str)) + ")"
 
     @property
@@ -475,6 +484,18 @@ cdef class SearchParams:
     def rand_xor_mask(self):
         return self.params.rand_xor_mask
 
+    @property
+    def persistent(self):
+        return self.params.persistent
+
+    @property
+    def persistent_lifetime(self):
+        return self.params.persistent_lifetime
+
+    @property
+    def persistent_device_usage(self):
+        return self.params.persistent_device_usage
+
 
 @auto_sync_resources
 @auto_convert_output

@@ -187,3 +187,13 @@ def test_cagra_vpq_compression():
     run_cagra_build_search_test(
         n_cols=dim, compression=cagra.CompressionParams(pq_dim=dim / pq_len)
     )
+
+
+def test_cagra_persistent_search():
+    run_cagra_build_search_test(
+        search_params={
+            "persistent": True,
+            "persistent_lifetime": 10,
+            "persistent_device_usage": 0.5,
+        }
+    )