Skip to content

Commit 76d5d54

Browse files
jdoerferttstellar
authored andcommitted
Avoid use of stack allocations in asynchronous calls
NOTE: This is an adaption of the original patch to be applicable to the LLVM 12 release branch. Logic is the same though. As reported by Guilherme Valarini [0], we used to pass stack allocations to calls that can nowadays be asynchronous. This is arguably a problem and it will inevitably result in UB. To remedy the situation we allocate the locations as part of the AsyncInfoTy object. The lifetime of that object matches what we need for now. If the synchronization is not tied to the AsyncInfoTy object anymore we might need to have a different buffer construct in global space. This should be back-ported to LLVM 12 but needs slight modifications as it is based on refactoring patches we do not need to backport. [0] https://lists.llvm.org/pipermail/openmp-dev/2021-February/003867.html Differential Revision: https://reviews.llvm.org/D96667
1 parent ee7eaf8 commit 76d5d54

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

openmp/libomptarget/include/omptarget.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#ifndef _OMPTARGET_H_
1515
#define _OMPTARGET_H_
1616

17+
#include <deque>
18+
#include <stddef.h>
1719
#include <stdint.h>
1820
#include <stddef.h>
1921

@@ -119,10 +121,18 @@ struct __tgt_target_table {
119121
/// This struct contains information exchanged between different asynchronous
120122
/// operations for device-dependent optimization and potential synchronization
121123
struct __tgt_async_info {
124+
/// Locations we used in (potentially) asynchronous calls which should live
125+
/// as long as this AsyncInfoTy object.
126+
std::deque<void *> BufferLocations;
127+
122128
// A pointer to a queue-like structure where offloading operations are issued.
123129
// We assume to use this structure to do synchronization. In CUDA backend, it
124130
// is CUstream.
125131
void *Queue = nullptr;
132+
133+
/// Return a void* reference with a lifetime that is at least as long as this
134+
/// AsyncInfoTy object. The location can be used as intermediate buffer.
135+
void *&getVoidPtrLocation();
126136
};
127137

128138
/// This struct is a record of non-contiguous information

openmp/libomptarget/src/omptarget.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818
#include <cassert>
1919
#include <vector>
2020

21+
/// Return a void* reference with a lifetime that is at least as long as this
22+
/// AsyncInfoTy object. The location can be used as intermediate buffer.
23+
void *&__tgt_async_info::getVoidPtrLocation() {
24+
BufferLocations.push_back(nullptr);
25+
return BufferLocations.back();
26+
}
27+
2128
/* All begin addresses for partially mapped structs must be 8-aligned in order
2229
* to ensure proper alignment of members. E.g.
2330
*
@@ -415,7 +422,8 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
415422
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
416423
DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
417424
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
418-
void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
425+
void *&TgtPtrBase = async_info_ptr->getVoidPtrLocation();
426+
TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
419427
int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
420428
sizeof(void *), async_info_ptr);
421429
if (rt != OFFLOAD_SUCCESS) {
@@ -1122,8 +1130,9 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
11221130
DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
11231131
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
11241132
void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
1125-
void *PointerTgtPtrBegin = Device.getTgtPtrBegin(
1126-
HstPtrVal, ArgSizes[I], IsLast, false, IsHostPtr);
1133+
void *&PointerTgtPtrBegin = AsyncInfo->getVoidPtrLocation();
1134+
PointerTgtPtrBegin = Device.getTgtPtrBegin(HstPtrVal, ArgSizes[I],
1135+
IsLast, false, IsHostPtr);
11271136
if (!PointerTgtPtrBegin) {
11281137
DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
11291138
DPxPTR(HstPtrVal));

0 commit comments

Comments
 (0)