Skip to content

Commit 81936e1

Browse files
committed
Add --use-constant-memory-space flag, off by default
1 parent 6a3b08b commit 81936e1

File tree

2 files changed

+54
-9
lines changed

2 files changed

+54
-9
lines changed

crates/cuda_builder/src/lib.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,18 @@ pub struct CudaBuilder {
130130
///
131131
/// `true` by default.
132132
pub override_libm: bool,
133+
/// If `true`, the codegen will attempt to place `static` variables in CUDA's
134+
/// constant memory, which is fast but limited in size (~64KB total across all
135+
/// statics). The codegen avoids placing any single item too large, but it does not
136+
/// track cumulative size. Exceeding the limit may cause `IllegalAddress` runtime
137+
/// errors (CUDA error code: `700`).
138+
///
139+
/// The default is `false`, which places all statics in global memory. This avoids
140+
/// such errors but may reduce performance and use more general memory.
141+
///
142+
/// Future versions may support smarter placement and user-controlled
143+
/// packing/spilling strategies.
144+
pub use_constant_memory_space: bool,
133145
/// Whether to generate any debug info and what level of info to generate.
134146
pub debug: DebugInfo,
135147
/// Additional arguments passed to cargo during `cargo build`.
@@ -155,6 +167,7 @@ impl CudaBuilder {
155167
emit: None,
156168
optix: false,
157169
override_libm: true,
170+
use_constant_memory_space: false,
158171
debug: DebugInfo::None,
159172
build_args: vec![],
160173
final_module_path: None,
@@ -284,6 +297,22 @@ impl CudaBuilder {
284297
self
285298
}
286299

300+
/// If `true`, the codegen will attempt to place `static` variables in CUDA's
301+
/// constant memory, which is fast but limited in size (~64KB total across all
302+
/// statics). The codegen avoids placing any single item too large, but it does not
303+
/// track cumulative size. Exceeding the limit may cause `IllegalAddress` runtime
304+
/// errors (CUDA error code: `700`).
305+
///
306+
/// If `false`, all statics are placed in global memory. This avoids such errors but
307+
/// may reduce performance and use more general memory.
308+
///
309+
/// Future versions may support smarter placement and user-controlled
310+
/// packing/spilling strategies.
311+
pub fn use_constant_memory_space(mut self, use_constant_memory_space: bool) -> Self {
312+
self.use_constant_memory_space = use_constant_memory_space;
313+
self
314+
}
315+
287316
/// An optional path where to dump LLVM IR of the final output the codegen will feed to libnvvm. Usually
288317
/// used for debugging.
289318
pub fn final_module_path(mut self, path: impl AsRef<Path>) -> Self {

crates/rustc_codegen_nvvm/src/context.rs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -271,17 +271,30 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
271271
}
272272

273273
if !is_mutable && self.type_is_freeze(ty) {
274-
let layout = self.layout_of(ty);
275-
if layout.size.bytes() > CONSTANT_MEMORY_SIZE_LIMIT_BYTES {
276-
self.tcx.sess.dcx().warn(format!(
277-
"static `{}` exceeds the constant-memory limit; placing in global memory (performance may be reduced)",
278-
instance
279-
));
280-
// Global memory
274+
if !self.codegen_args.use_constant_memory_space {
275+
// We aren't using constant memory, so put the instance in global memory.
281276
AddressSpace(1)
282277
} else {
283-
// Constant memory
284-
AddressSpace(4)
278+
// We are using constant memory, see if the instance will fit.
279+
//
280+
// FIXME(@LegNeato) ideally we keep track of what we have put into
281+
// constant memory and when it is filled up spill instead of only
282+
// spilling when a static is big. We'll probably want some packing
283+
// strategy controlled by the user...for example, if you have one large
284+
// static and many small ones, you might want the small ones to all be
285+
// in constant memory or just the big one depending on your workload.
286+
let layout = self.layout_of(ty);
287+
if layout.size.bytes() > CONSTANT_MEMORY_SIZE_LIMIT_BYTES {
288+
self.tcx.sess.dcx().warn(format!(
289+
"static `{}` exceeds the constant memory limit; placing in global memory (performance may be reduced)",
290+
instance
291+
));
292+
// Place instance in global memory if it is too big for constant memory.
293+
AddressSpace(1)
294+
} else {
295+
// Place instance in constant memory if it fits.
296+
AddressSpace(4)
297+
}
285298
}
286299
} else {
287300
AddressSpace::DATA
@@ -534,6 +547,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
534547
pub struct CodegenArgs {
535548
pub nvvm_options: Vec<NvvmOption>,
536549
pub override_libm: bool,
550+
pub use_constant_memory_space: bool,
537551
pub final_module_path: Option<PathBuf>,
538552
}
539553

@@ -552,6 +566,8 @@ impl CodegenArgs {
552566
cg_args.nvvm_options.push(flag);
553567
} else if arg == "--override-libm" {
554568
cg_args.override_libm = true;
569+
} else if arg == "--use-constant-memory-space" {
570+
cg_args.use_constant_memory_space = true;
555571
} else if arg == "--final-module-path" {
556572
cg_args.final_module_path = Some(PathBuf::from(
557573
args.get(idx + 1).expect("No path for --final-module-path"),

0 commit comments

Comments
 (0)