Description
Consider matching on an enum where multiple variants should match to the same expression. Those variants can be explicitly spelled out in one or more match arms, or they can be grouped with the wildcard pattern _
instead.
In some cases, spelling out the variants (ideally in a single arm with a pattern like Variant1 | Variant2
) can generate smaller LLVM IR compared to the wildcard version, and less frequently can generate better machine code. One way this can happen is when another variant in a different match arm has a nontrivial Drop
but the groupable variants don't. The optimized MIR will still try to drop the whole enum or check for the variants that need Drop
, potentially leading to larger LLVM IR that takes longer for LLVM to process or results in worse machine code.
Here is a simple example, a function that converts a container with multiple full and empty variants to an Option
:
pub enum Container {
Empty1,
Empty2,
Full1(String),
Full2(String),
}
pub fn into_option(container: Container) -> Option<String> {
match container {
Container::Full1(s) | Container::Full2(s) => Some(s),
Container::Empty1 | Container::Empty2 => None,
// _ => None,
}
}
Both approaches generate similar machine code with opt-level=3
, but the initial LLVM IR is significantly larger with the wildcard.
LLVM IR with variants spelled out
rustc -C opt-level=3 -C no-prepopulate-passes --target x86_64-unknown-linux-gnu --emit=llvm-ir
into_option
takes 42 lines and the .ll file is 3.01 KiB total.
define dso_local void @into_option(ptr dead_on_unwind noalias nocapture noundef writable sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(32) %container) unnamed_addr #0 {
start:
%_4 = alloca [24 x i8], align 8
%s = alloca [24 x i8], align 8
%_2 = load i64, ptr %container, align 8, !range !4, !noundef !5
switch i64 %_2, label %bb1 [
i64 0, label %bb2
i64 1, label %bb2
i64 2, label %bb4
i64 3, label %bb3
]
bb1: ; preds = %start
unreachable
bb2: ; preds = %start, %start
store i64 -9223372036854775808, ptr %_0, align 8
br label %bb6
bb4: ; preds = %start
call void @llvm.lifetime.start.p0(i64 24, ptr %s)
%0 = getelementptr inbounds i8, ptr %container, i64 8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 %0, i64 24, i1 false)
br label %bb5
bb3: ; preds = %start
call void @llvm.lifetime.start.p0(i64 24, ptr %s)
%1 = getelementptr inbounds i8, ptr %container, i64 8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 %1, i64 24, i1 false)
br label %bb5
bb6: ; preds = %bb5, %bb2
ret void
bb5: ; preds = %bb3, %bb4
call void @llvm.lifetime.start.p0(i64 24, ptr %_4)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_4, ptr align 8 %s, i64 24, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_0, ptr align 8 %_4, i64 24, i1 false)
call void @llvm.lifetime.end.p0(i64 24, ptr %_4)
call void @llvm.lifetime.end.p0(i64 24, ptr %s)
br label %bb6
}
LLVM IR with wildcard
rustc -C opt-level=3 -C no-prepopulate-passes --target x86_64-unknown-linux-gnu --emit=llvm-ir
into_option
takes 70 lines and the .ll file is 20.73 KiB total (though that's mostly library functions that a bigger project would call from multiple places).
define dso_local void @into_option(ptr dead_on_unwind noalias nocapture noundef writable sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef align 8 dereferenceable(32) %container) unnamed_addr #0 {
start:
%_5 = alloca [8 x i8], align 8
%_4 = alloca [24 x i8], align 8
%s = alloca [24 x i8], align 8
%_2 = load i64, ptr %container, align 8, !range !8, !noundef !4
switch i64 %_2, label %bb1 [
i64 2, label %bb3
i64 3, label %bb2
]
bb1: ; preds = %start
store i64 -9223372036854775808, ptr %_0, align 8
%0 = load i64, ptr %container, align 8, !range !8, !noundef !4
store i64 %0, ptr %_5, align 8
%1 = load i64, ptr %_5, align 8, !noundef !4
switch i64 %1, label %bb4 [
i64 2, label %bb5
i64 3, label %bb6
]
bb3: ; preds = %start
call void @llvm.lifetime.start.p0(i64 24, ptr %s)
%2 = getelementptr inbounds i8, ptr %container, i64 8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 %2, i64 24, i1 false)
call void @llvm.lifetime.start.p0(i64 24, ptr %_4)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_4, ptr align 8 %s, i64 24, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_0, ptr align 8 %_4, i64 24, i1 false)
call void @llvm.lifetime.end.p0(i64 24, ptr %_4)
call void @llvm.lifetime.end.p0(i64 24, ptr %s)
%3 = load i64, ptr %container, align 8, !range !8, !noundef !4
store i64 %3, ptr %_5, align 8
%4 = load i64, ptr %_5, align 8, !noundef !4
switch i64 %4, label %bb4 [
i64 2, label %bb4
i64 3, label %bb6
]
bb2: ; preds = %start
call void @llvm.lifetime.start.p0(i64 24, ptr %s)
%5 = getelementptr inbounds i8, ptr %container, i64 8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 %5, i64 24, i1 false)
call void @llvm.lifetime.start.p0(i64 24, ptr %_4)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_4, ptr align 8 %s, i64 24, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_0, ptr align 8 %_4, i64 24, i1 false)
call void @llvm.lifetime.end.p0(i64 24, ptr %_4)
call void @llvm.lifetime.end.p0(i64 24, ptr %s)
%6 = load i64, ptr %container, align 8, !range !8, !noundef !4
store i64 %6, ptr %_5, align 8
%7 = load i64, ptr %_5, align 8, !noundef !4
switch i64 %7, label %bb4 [
i64 2, label %bb5
i64 3, label %bb4
]
bb4: ; preds = %bb6, %bb5, %bb1, %bb2, %bb2, %bb3, %bb3
ret void
bb6: ; preds = %bb1, %bb3
%8 = getelementptr inbounds i8, ptr %container, i64 8
; call core::ptr::drop_in_place<alloc::string::String>
call void @"_ZN4core3ptr42drop_in_place$LT$alloc..string..String$GT$17hb544879c41d56b0bE"(ptr noalias noundef align 8 dereferenceable(24) %8)
br label %bb4
bb5: ; preds = %bb1, %bb2
%9 = getelementptr inbounds i8, ptr %container, i64 8
; call core::ptr::drop_in_place<alloc::string::String>
call void @"_ZN4core3ptr42drop_in_place$LT$alloc..string..String$GT$17hb544879c41d56b0bE"(ptr noalias noundef align 8 dereferenceable(24) %9)
br label %bb4
}
Another example is a take
function for a container that panics if it's "locked":
pub enum Container {
Empty,
Locked,
Full(String),
}
pub fn take(container: &mut Container) -> Option<String> {
match container {
Container::Empty => None,
Container::Locked => panic!("you tried to break into the container!"),
Container::Full(_) => match std::mem::replace(container, Container::Empty) {
Container::Full(content) => Some(content),
Container::Empty | Container::Locked => unreachable!(),
// _ => unreachable!(),
},
}
}
This time, in addition to larger LLVM IR, the final machine code is larger and contains an unnecessary unreachable check when using panic_unwind
. (With panic_abort
, the LLVM IR is very similar in both cases. The wildcard machine code still contains the unreachable check, but that's a different issue that seems more suited to the LLVM side.)
(Note that if Locked
becomes Locked(String)
so that it needs Drop
, the machine code will contain the unreachable check no matter how you write the match arms when using panic_unwind
. That might be better characterized as an LLVM improvement, but I'm mentioning it because an LLVM opt improvement fixing that might also fix the case above without any MIR opt changes. I haven't looked into it though.)
x86 asm with variants spelled out
take:
movabsq $-9223372036854775808, %rcx
movq (%rsi), %rax
movq %rax, %r8
xorq %rcx, %r8
cmpq $2, %r8
movl $2, %edx
cmovbq %r8, %rdx
testq %rdx, %rdx
je .LBB0_1
cmpq $2, %rdx
jne .LBB0_5
movups 8(%rsi), %xmm0
movups %xmm0, 8(%rdi)
movq %rcx, (%rsi)
movq %rax, (%rdi)
movq %rdi, %rax
retq
.LBB0_1:
movq %rcx, %rax
movq %rax, (%rdi)
movq %rdi, %rax
retq
.LBB0_5:
pushq %rax
leaq .Lanon.600d6af0f320a021dc494cfa2daca569.0(%rip), %rdi
leaq .Lanon.600d6af0f320a021dc494cfa2daca569.2(%rip), %rdx
movl $38, %esi
callq *_ZN3std9panicking11begin_panic17h60e0c77c4eaa68d9E@GOTPCREL(%rip)
.Lanon.600d6af0f320a021dc494cfa2daca569.0:
.ascii "you tried to break into the container!"
.Lanon.600d6af0f320a021dc494cfa2daca569.1:
.ascii "main.rs"
.Lanon.600d6af0f320a021dc494cfa2daca569.2:
.quad .Lanon.600d6af0f320a021dc494cfa2daca569.1
.asciz "\007\000\000\000\000\000\000\000\"\000\000\000\036\000\000"
x86 asm with wildcard
take:
pushq %rax
movabsq $-9223372036854775808, %rcx
movq (%rsi), %rax
movq %rax, %r8
xorq %rcx, %r8
cmpq $2, %r8
movl $2, %edx
cmovbq %r8, %rdx
testq %rdx, %rdx
je .LBB0_4
cmpq $2, %rdx
jne .LBB0_7
movq 8(%rsi), %rdx
movq 16(%rsi), %r8
movq %rcx, (%rsi)
incq %rcx
cmpq %rcx, %rax
jle .LBB0_6
movq %rax, (%rdi)
movq %rdx, 8(%rdi)
movq %r8, 16(%rdi)
movq %rdi, %rax
popq %rcx
retq
.LBB0_4:
movq %rcx, (%rdi)
movq %rdi, %rax
popq %rcx
retq
.LBB0_7:
leaq .Lanon.600d6af0f320a021dc494cfa2daca569.0(%rip), %rdi
leaq .Lanon.600d6af0f320a021dc494cfa2daca569.2(%rip), %rdx
movl $38, %esi
callq *_ZN3std9panicking11begin_panic17h60e0c77c4eaa68d9E@GOTPCREL(%rip)
.LBB0_6:
leaq .Lanon.600d6af0f320a021dc494cfa2daca569.3(%rip), %rdi
leaq .Lanon.600d6af0f320a021dc494cfa2daca569.4(%rip), %rdx
movl $40, %esi
callq *_ZN4core9panicking5panic17h48a7e1f3665210c6E@GOTPCREL(%rip)
.Lanon.600d6af0f320a021dc494cfa2daca569.0:
.ascii "you tried to break into the container!"
.Lanon.600d6af0f320a021dc494cfa2daca569.1:
.ascii "main.rs"
.Lanon.600d6af0f320a021dc494cfa2daca569.2:
.quad .Lanon.600d6af0f320a021dc494cfa2daca569.1
.asciz "\007\000\000\000\000\000\000\000\"\000\000\000\036\000\000"
.Lanon.600d6af0f320a021dc494cfa2daca569.3:
.ascii "internal error: entered unreachable code"
.Lanon.600d6af0f320a021dc494cfa2daca569.4:
.quad .Lanon.600d6af0f320a021dc494cfa2daca569.1
.asciz "\007\000\000\000\000\000\000\000&\000\000\000\022\000\000"