Skip to content

Commit 1bfd444

Browse files
authored
[DAGCombiner] Fold and/or of NaN SETCC (#135645)
Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.
1 parent af28c9c commit 1bfd444

File tree

3 files changed

+108
-0
lines changed

3 files changed

+108
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6448,6 +6448,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
64486448
}
64496449
}
64506450

6451+
if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6452+
LHS0.getValueType() == RHS0.getValueType() &&
6453+
((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6454+
(LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6455+
return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6456+
64516457
if (TargetPreference == AndOrSETCCFoldKind::None)
64526458
return SDValue();
64536459

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
define i1 @and_ord(float %a, float %b) {
8+
; CHECK-LABEL: and_ord(
9+
; CHECK: {
10+
; CHECK-NEXT: .reg .pred %p<2>;
11+
; CHECK-NEXT: .reg .b32 %r<2>;
12+
; CHECK-NEXT: .reg .f32 %f<3>;
13+
; CHECK-EMPTY:
14+
; CHECK-NEXT: // %bb.0:
15+
; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0];
16+
; CHECK-NEXT: ld.param.f32 %f2, [and_ord_param_1];
17+
; CHECK-NEXT: setp.num.f32 %p1, %f1, %f2;
18+
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
19+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
20+
; CHECK-NEXT: ret;
21+
%c = fcmp ord float %a, 0.0
22+
%d = fcmp ord float %b, 0.0
23+
%e = and i1 %c, %d
24+
ret i1 %e
25+
}
26+
27+
define i1 @or_uno(float %a, float %b) {
28+
; CHECK-LABEL: or_uno(
29+
; CHECK: {
30+
; CHECK-NEXT: .reg .pred %p<2>;
31+
; CHECK-NEXT: .reg .b32 %r<2>;
32+
; CHECK-NEXT: .reg .f32 %f<3>;
33+
; CHECK-EMPTY:
34+
; CHECK-NEXT: // %bb.0:
35+
; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0];
36+
; CHECK-NEXT: ld.param.f32 %f2, [or_uno_param_1];
37+
; CHECK-NEXT: setp.nan.f32 %p1, %f1, %f2;
38+
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
39+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
40+
; CHECK-NEXT: ret;
41+
%c = fcmp uno float %a, 0.0
42+
%d = fcmp uno float %b, 0.0
43+
%e = or i1 %c, %d
44+
ret i1 %e
45+
}

llvm/test/CodeGen/X86/and-or-setcc.ll

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
4+
5+
define i1 @and_ord(float %a, float %b) {
6+
; X86-LABEL: and_ord:
7+
; X86: # %bb.0:
8+
; X86-NEXT: flds {{[0-9]+}}(%esp)
9+
; X86-NEXT: flds {{[0-9]+}}(%esp)
10+
; X86-NEXT: fucompp
11+
; X86-NEXT: fnstsw %ax
12+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
13+
; X86-NEXT: sahf
14+
; X86-NEXT: setnp %al
15+
; X86-NEXT: retl
16+
;
17+
; X64-LABEL: and_ord:
18+
; X64: # %bb.0:
19+
; X64-NEXT: xorps %xmm2, %xmm2
20+
; X64-NEXT: cmpordps %xmm2, %xmm1
21+
; X64-NEXT: cmpordps %xmm2, %xmm0
22+
; X64-NEXT: andps %xmm1, %xmm0
23+
; X64-NEXT: movd %xmm0, %eax
24+
; X64-NEXT: # kill: def $al killed $al killed $eax
25+
; X64-NEXT: retq
26+
%c = fcmp ord float %a, 0.0
27+
%d = fcmp ord float %b, 0.0
28+
%e = and i1 %c, %d
29+
ret i1 %e
30+
}
31+
32+
define i1 @or_uno(float %a, float %b) {
33+
; X86-LABEL: or_uno:
34+
; X86: # %bb.0:
35+
; X86-NEXT: flds {{[0-9]+}}(%esp)
36+
; X86-NEXT: flds {{[0-9]+}}(%esp)
37+
; X86-NEXT: fucompp
38+
; X86-NEXT: fnstsw %ax
39+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
40+
; X86-NEXT: sahf
41+
; X86-NEXT: setp %al
42+
; X86-NEXT: retl
43+
;
44+
; X64-LABEL: or_uno:
45+
; X64: # %bb.0:
46+
; X64-NEXT: xorps %xmm2, %xmm2
47+
; X64-NEXT: cmpunordps %xmm2, %xmm1
48+
; X64-NEXT: cmpunordps %xmm2, %xmm0
49+
; X64-NEXT: orps %xmm1, %xmm0
50+
; X64-NEXT: movd %xmm0, %eax
51+
; X64-NEXT: # kill: def $al killed $al killed $eax
52+
; X64-NEXT: retq
53+
%c = fcmp uno float %a, 0.0
54+
%d = fcmp uno float %b, 0.0
55+
%e = or i1 %c, %d
56+
ret i1 %e
57+
}

0 commit comments

Comments
 (0)