Skip to content

Commit c6f9d6d

Browse files
cuviperJonPsson
andauthored
[SystemZ] Assign the full space for promoted and split outgoing args. (#95)
When a large "irregular" (e.g. i96) integer call argument is converted to indirect, 64-bit parts are stored to the stack. The full stack space (e.g. i128) was not allocated prior to this patch, but rather just the exact space of the original type. This caused neighboring values on the stack to be overwritten. Thanks to Josh Stone for reporting this. Review: Ulrich Weigand Fixes https://bugs.llvm.org/show_bug.cgi?id=49322 Differential Revision: https://reviews.llvm.org/D97514 (cherry picked from commit 52bbbf4) Co-authored-by: Jonas Paulsson <[email protected]>
1 parent 96ae895 commit c6f9d6d

File tree

2 files changed

+72
-4
lines changed

2 files changed

+72
-4
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,6 +1543,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
15431543
bool IsVarArg = CLI.IsVarArg;
15441544
MachineFunction &MF = DAG.getMachineFunction();
15451545
EVT PtrVT = getPointerTy(MF.getDataLayout());
1546+
LLVMContext &Ctx = *DAG.getContext();
15461547

15471548
// Detect unsupported vector argument and return types.
15481549
if (Subtarget.hasVector()) {
@@ -1552,7 +1553,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
15521553

15531554
// Analyze the operands of the call, assigning locations to each operand.
15541555
SmallVector<CCValAssign, 16> ArgLocs;
1555-
SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1556+
SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
15561557
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
15571558

15581559
// We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -1577,14 +1578,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
15771578

15781579
if (VA.getLocInfo() == CCValAssign::Indirect) {
15791580
// Store the argument in a stack slot and pass its address.
1580-
SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
1581+
unsigned ArgIndex = Outs[I].OrigArgIndex;
1582+
EVT SlotVT;
1583+
if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1584+
// Allocate the full stack space for a promoted (and split) argument.
1585+
Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1586+
EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1587+
MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1588+
unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1589+
SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1590+
} else {
1591+
SlotVT = Outs[I].ArgVT;
1592+
}
1593+
SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
15811594
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
15821595
MemOpChains.push_back(
15831596
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
15841597
MachinePointerInfo::getFixedStack(MF, FI)));
15851598
// If the original argument was split (e.g. i128), we need
15861599
// to store all parts of it here (and pass just one address).
1587-
unsigned ArgIndex = Outs[I].OrigArgIndex;
15881600
assert (Outs[I].PartOffset == 0);
15891601
while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
15901602
SDValue PartValue = OutVals[I + 1];
@@ -1594,6 +1606,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
15941606
MemOpChains.push_back(
15951607
DAG.getStore(Chain, DL, PartValue, Address,
15961608
MachinePointerInfo::getFixedStack(MF, FI)));
1609+
assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1610+
SlotVT.getStoreSize()) && "Not enough space for argument part!");
15971611
++I;
15981612
}
15991613
ArgValue = SpillSlot;
@@ -1687,7 +1701,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
16871701

16881702
// Assign locations to each value returned by this call.
16891703
SmallVector<CCValAssign, 16> RetLocs;
1690-
CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1704+
CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
16911705
RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
16921706

16931707
// Copy all of the result registers out of their specified physreg.

llvm/test/CodeGen/SystemZ/args-11.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; Test outgoing promoted arguments that are split (and passed by reference).
3+
;
4+
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
5+
6+
; The i96 arg is promoted to i128 and should get the full stack space.
7+
declare void @fn1(i96)
8+
define i32 @fn2() {
9+
; CHECK-LABEL: fn2:
10+
; CHECK: # %bb.0:
11+
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
12+
; CHECK-NEXT: .cfi_offset %r14, -48
13+
; CHECK-NEXT: .cfi_offset %r15, -40
14+
; CHECK-NEXT: aghi %r15, -184
15+
; CHECK-NEXT: .cfi_def_cfa_offset 344
16+
; CHECK-NEXT: mvhi 180(%r15), -1
17+
; CHECK-NEXT: mvghi 168(%r15), 0
18+
; CHECK-NEXT: la %r2, 160(%r15)
19+
; CHECK-NEXT: mvghi 160(%r15), 0
20+
; CHECK-NEXT: brasl %r14, fn1@PLT
21+
; CHECK-NEXT: l %r2, 180(%r15)
22+
; CHECK-NEXT: lmg %r14, %r15, 296(%r15)
23+
; CHECK-NEXT: br %r14
24+
%1 = alloca i32
25+
store i32 -1, i32* %1
26+
call void @fn1(i96 0)
27+
%2 = load i32, i32* %1
28+
ret i32 %2
29+
}
30+
31+
declare void @fn3(i136)
32+
define i32 @fn4() {
33+
; CHECK-LABEL: fn4:
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
36+
; CHECK-NEXT: .cfi_offset %r14, -48
37+
; CHECK-NEXT: .cfi_offset %r15, -40
38+
; CHECK-NEXT: aghi %r15, -192
39+
; CHECK-NEXT: .cfi_def_cfa_offset 352
40+
; CHECK-NEXT: mvhi 188(%r15), -1
41+
; CHECK-NEXT: mvghi 176(%r15), 0
42+
; CHECK-NEXT: mvghi 168(%r15), 0
43+
; CHECK-NEXT: la %r2, 160(%r15)
44+
; CHECK-NEXT: mvghi 160(%r15), 0
45+
; CHECK-NEXT: brasl %r14, fn3@PLT
46+
; CHECK-NEXT: l %r2, 188(%r15)
47+
; CHECK-NEXT: lmg %r14, %r15, 304(%r15)
48+
; CHECK-NEXT: br %r14
49+
%1 = alloca i32
50+
store i32 -1, i32* %1
51+
call void @fn3(i136 0)
52+
%2 = load i32, i32* %1
53+
ret i32 %2
54+
}

0 commit comments

Comments
 (0)