Skip to content

Commit e72a113

Browse files
committed
Fix read_volatile intrinsic
It would always generate PTX to load only a 1-bit integer (`i1`) from the provided pointer, regardless of the actual size of `T`. The alignment specified for the load was also based on the pointer's alignment, not the pointee's alignment. This commit changes it to: 1. Load the full `llvm_type_of(T)` instead of `i1`. 2. Use the correct alignment of `T` for the load instruction. 3. Removes an incorrect pointer cast that was based on the return ABI of `T` rather than the type of the memory being read. Possibly fixes Rust-GPU#208.
1 parent 3b3e049 commit e72a113

File tree

1 file changed

+26
-9
lines changed

1 file changed

+26
-9
lines changed

crates/rustc_codegen_nvvm/src/intrinsic.rs

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -295,22 +295,39 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
295295
}
296296
}
297297
sym::volatile_load | sym::unaligned_volatile_load => {
298-
let tp_ty = fn_args.type_at(0);
299-
let mut ptr = args[0].immediate();
300-
if let PassMode::Cast { cast: ty, .. } = &fn_abi.ret.mode {
301-
ptr = self.pointercast(ptr, self.type_ptr_to(ty.llvm_type(self)));
302-
}
303-
let load = self.volatile_load(self.type_i1(), ptr);
298+
// The `*const T` or `*mut T` operand.
299+
let ptr_operand = &args[0];
300+
let src_ptr_llval = ptr_operand.immediate();
301+
302+
// Determine the type T (the pointee type) and its LLVM representation.
303+
// `ptr_operand.layout.ty` is the Rust type `*const T` (or `*mut T`). We
304+
// need the layout of `T`.
305+
let layout_of_pointee = ptr_operand
306+
.layout
307+
.pointee_info_at(self, Size::ZERO)
308+
.expect(
309+
"volatile_load input pointer is not a pointer type or has no pointee info",
310+
)
311+
.layout;
312+
let llvm_ty_of_pointee = layout_of_pointee.llvm_type(self);
313+
314+
// Call volatile_load with the correct LLVM type of T. The
315+
// `volatile_load` does a pointercast so we do not need to do it here.
316+
let loaded_llval = self.volatile_load(llvm_ty_of_pointee, src_ptr_llval);
317+
318+
// Set alignment for the LLVM load instruction based on the alignment of
319+
// `T`.
304320
let align = if name == sym::unaligned_volatile_load {
305321
1
306322
} else {
307-
self.align_of(tp_ty).bytes() as u32
323+
layout_of_pointee.align.abi.bytes() as u32
308324
};
309325
unsafe {
310-
llvm::LLVMSetAlignment(load, align);
326+
llvm::LLVMSetAlignment(loaded_llval, align);
311327
}
328+
312329
if !result.layout.is_zst() {
313-
self.store_to_place(load, result.val);
330+
self.store_to_place(loaded_llval, result.val);
314331
}
315332
return Ok(());
316333
}

0 commit comments

Comments
 (0)