Skip to content

Drop is called for match wildcards even when all matched variants have no Drop #142705

@ashivaram23

Description

@ashivaram23

Consider matching on an enum where multiple variants should match to the same expression. Those variants can be explicitly spelled out in one or more match arms, or they can be grouped with the wildcard pattern _ instead.

In some cases, spelling out the variants (ideally in a single arm with a pattern like Variant1 | Variant2) can generate smaller LLVM IR compared to the wildcard version, and less frequently can generate better machine code. One way this can happen is when another variant in a different match arm has a nontrivial Drop but the groupable variants don't. The optimized MIR will still try to drop the whole enum or check for the variants that need Drop, potentially leading to larger LLVM IR that takes longer for LLVM to process or results in worse machine code.


Here is a simple example, a function that converts a container with multiple full and empty variants to an Option:

pub enum Container {
    Empty1,
    Empty2,
    Full1(String),
    Full2(String),
}

pub fn into_option(container: Container) -> Option<String> {
    match container {
        Container::Full1(s) | Container::Full2(s) => Some(s),
        Container::Empty1 | Container::Empty2 => None,
        // _ => None,
    }
}

Both approaches generate similar machine code with opt-level=3, but the initial LLVM IR is significantly larger with the wildcard.

LLVM IR with variants spelled out

rustc -C opt-level=3 -C no-prepopulate-passes --target x86_64-unknown-linux-gnu --emit=llvm-ir

into_option takes 42 lines and the .ll file is 3.01 KiB total.

define dso_local void @into_option(ptr dead_on_unwind noalias nocapture noundef writable sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(32) %container) unnamed_addr #0 {
start:
  %_4 = alloca [24 x i8], align 8
  %s = alloca [24 x i8], align 8
  %_2 = load i64, ptr %container, align 8, !range !4, !noundef !5
  switch i64 %_2, label %bb1 [
    i64 0, label %bb2
    i64 1, label %bb2
    i64 2, label %bb4
    i64 3, label %bb3
  ]

bb1:                                              ; preds = %start
  unreachable

bb2:                                              ; preds = %start, %start
  store i64 -9223372036854775808, ptr %_0, align 8
  br label %bb6

bb4:                                              ; preds = %start
  call void @llvm.lifetime.start.p0(i64 24, ptr %s)
  %0 = getelementptr inbounds i8, ptr %container, i64 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 %0, i64 24, i1 false)
  br label %bb5

bb3:                                              ; preds = %start
  call void @llvm.lifetime.start.p0(i64 24, ptr %s)
  %1 = getelementptr inbounds i8, ptr %container, i64 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 %1, i64 24, i1 false)
  br label %bb5

bb6:                                              ; preds = %bb5, %bb2
  ret void

bb5:                                              ; preds = %bb3, %bb4
  call void @llvm.lifetime.start.p0(i64 24, ptr %_4)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_4, ptr align 8 %s, i64 24, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_0, ptr align 8 %_4, i64 24, i1 false)
  call void @llvm.lifetime.end.p0(i64 24, ptr %_4)
  call void @llvm.lifetime.end.p0(i64 24, ptr %s)
  br label %bb6
}
LLVM IR with wildcard

rustc -C opt-level=3 -C no-prepopulate-passes --target x86_64-unknown-linux-gnu --emit=llvm-ir

into_option takes 70 lines and the .ll file is 20.73 KiB total (though that's mostly library functions that a bigger project would call from multiple places).

define dso_local void @into_option(ptr dead_on_unwind noalias nocapture noundef writable sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef align 8 dereferenceable(32) %container) unnamed_addr #0 {
start:
  %_5 = alloca [8 x i8], align 8
  %_4 = alloca [24 x i8], align 8
  %s = alloca [24 x i8], align 8
  %_2 = load i64, ptr %container, align 8, !range !8, !noundef !4
  switch i64 %_2, label %bb1 [
    i64 2, label %bb3
    i64 3, label %bb2
  ]

bb1:                                              ; preds = %start
  store i64 -9223372036854775808, ptr %_0, align 8
  %0 = load i64, ptr %container, align 8, !range !8, !noundef !4
  store i64 %0, ptr %_5, align 8
  %1 = load i64, ptr %_5, align 8, !noundef !4
  switch i64 %1, label %bb4 [
    i64 2, label %bb5
    i64 3, label %bb6
  ]

bb3:                                              ; preds = %start
  call void @llvm.lifetime.start.p0(i64 24, ptr %s)
  %2 = getelementptr inbounds i8, ptr %container, i64 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 %2, i64 24, i1 false)
  call void @llvm.lifetime.start.p0(i64 24, ptr %_4)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_4, ptr align 8 %s, i64 24, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_0, ptr align 8 %_4, i64 24, i1 false)
  call void @llvm.lifetime.end.p0(i64 24, ptr %_4)
  call void @llvm.lifetime.end.p0(i64 24, ptr %s)
  %3 = load i64, ptr %container, align 8, !range !8, !noundef !4
  store i64 %3, ptr %_5, align 8
  %4 = load i64, ptr %_5, align 8, !noundef !4
  switch i64 %4, label %bb4 [
    i64 2, label %bb4
    i64 3, label %bb6
  ]

bb2:                                              ; preds = %start
  call void @llvm.lifetime.start.p0(i64 24, ptr %s)
  %5 = getelementptr inbounds i8, ptr %container, i64 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 %5, i64 24, i1 false)
  call void @llvm.lifetime.start.p0(i64 24, ptr %_4)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_4, ptr align 8 %s, i64 24, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_0, ptr align 8 %_4, i64 24, i1 false)
  call void @llvm.lifetime.end.p0(i64 24, ptr %_4)
  call void @llvm.lifetime.end.p0(i64 24, ptr %s)
  %6 = load i64, ptr %container, align 8, !range !8, !noundef !4
  store i64 %6, ptr %_5, align 8
  %7 = load i64, ptr %_5, align 8, !noundef !4
  switch i64 %7, label %bb4 [
    i64 2, label %bb5
    i64 3, label %bb4
  ]

bb4:                                              ; preds = %bb6, %bb5, %bb1, %bb2, %bb2, %bb3, %bb3
  ret void

bb6:                                              ; preds = %bb1, %bb3
  %8 = getelementptr inbounds i8, ptr %container, i64 8
; call core::ptr::drop_in_place<alloc::string::String>
  call void @"_ZN4core3ptr42drop_in_place$LT$alloc..string..String$GT$17hb544879c41d56b0bE"(ptr noalias noundef align 8 dereferenceable(24) %8)
  br label %bb4

bb5:                                              ; preds = %bb1, %bb2
  %9 = getelementptr inbounds i8, ptr %container, i64 8
; call core::ptr::drop_in_place<alloc::string::String>
  call void @"_ZN4core3ptr42drop_in_place$LT$alloc..string..String$GT$17hb544879c41d56b0bE"(ptr noalias noundef align 8 dereferenceable(24) %9)
  br label %bb4
}

Another example is a take function for a container that panics if it's "locked":

pub enum Container {
    Empty,
    Locked,
    Full(String),
}

pub fn take(container: &mut Container) -> Option<String> {
    match container {
        Container::Empty => None,
        Container::Locked => panic!("you tried to break into the container!"),
        Container::Full(_) => match std::mem::replace(container, Container::Empty) {
            Container::Full(content) => Some(content),
            Container::Empty | Container::Locked => unreachable!(),
            // _ => unreachable!(),
        },
    }
}

This time, in addition to larger LLVM IR, the final machine code is larger and contains an unnecessary unreachable check when using panic_unwind. (With panic_abort, the LLVM IR is very similar in both cases. The wildcard machine code still contains the unreachable check, but that's a different issue that seems more suited to the LLVM side.)

(Note that if Locked becomes Locked(String) so that it needs Drop, the machine code will contain the unreachable check no matter how you write the match arms when using panic_unwind. That might be better characterized as an LLVM improvement, but I'm mentioning it because an LLVM opt improvement fixing that might also fix the case above without any MIR opt changes. I haven't looked into it though.)

x86 asm with variants spelled out
take:
    movabsq $-9223372036854775808, %rcx
    movq (%rsi), %rax
    movq %rax, %r8
    xorq %rcx, %r8
    cmpq $2, %r8
    movl $2, %edx
    cmovbq %r8, %rdx
    testq %rdx, %rdx
    je .LBB0_1
    cmpq $2, %rdx
    jne .LBB0_5
    movups 8(%rsi), %xmm0
    movups %xmm0, 8(%rdi)
    movq %rcx, (%rsi)
    movq %rax, (%rdi)
    movq %rdi, %rax
    retq
.LBB0_1:
    movq %rcx, %rax
    movq %rax, (%rdi)
    movq %rdi, %rax
    retq
.LBB0_5:
    pushq %rax
    leaq .Lanon.600d6af0f320a021dc494cfa2daca569.0(%rip), %rdi
    leaq .Lanon.600d6af0f320a021dc494cfa2daca569.2(%rip), %rdx
    movl $38, %esi
    callq *_ZN3std9panicking11begin_panic17h60e0c77c4eaa68d9E@GOTPCREL(%rip)

.Lanon.600d6af0f320a021dc494cfa2daca569.0:
    .ascii "you tried to break into the container!"

.Lanon.600d6af0f320a021dc494cfa2daca569.1:
    .ascii "main.rs"

.Lanon.600d6af0f320a021dc494cfa2daca569.2:
    .quad .Lanon.600d6af0f320a021dc494cfa2daca569.1
    .asciz "\007\000\000\000\000\000\000\000\"\000\000\000\036\000\000"
x86 asm with wildcard
take:
    pushq %rax
    movabsq $-9223372036854775808, %rcx
    movq (%rsi), %rax
    movq %rax, %r8
    xorq %rcx, %r8
    cmpq $2, %r8
    movl $2, %edx
    cmovbq %r8, %rdx
    testq %rdx, %rdx
    je .LBB0_4
    cmpq $2, %rdx
    jne .LBB0_7
    movq 8(%rsi), %rdx
    movq 16(%rsi), %r8
    movq %rcx, (%rsi)
    incq %rcx
    cmpq %rcx, %rax
    jle .LBB0_6
    movq %rax, (%rdi)
    movq %rdx, 8(%rdi)
    movq %r8, 16(%rdi)
    movq %rdi, %rax
    popq %rcx
    retq
.LBB0_4:
    movq %rcx, (%rdi)
    movq %rdi, %rax
    popq %rcx
    retq
.LBB0_7:
    leaq .Lanon.600d6af0f320a021dc494cfa2daca569.0(%rip), %rdi
    leaq .Lanon.600d6af0f320a021dc494cfa2daca569.2(%rip), %rdx
    movl $38, %esi
    callq *_ZN3std9panicking11begin_panic17h60e0c77c4eaa68d9E@GOTPCREL(%rip)
.LBB0_6:
    leaq .Lanon.600d6af0f320a021dc494cfa2daca569.3(%rip), %rdi
    leaq .Lanon.600d6af0f320a021dc494cfa2daca569.4(%rip), %rdx
    movl $40, %esi
    callq *_ZN4core9panicking5panic17h48a7e1f3665210c6E@GOTPCREL(%rip)

.Lanon.600d6af0f320a021dc494cfa2daca569.0:
    .ascii "you tried to break into the container!"

.Lanon.600d6af0f320a021dc494cfa2daca569.1:
    .ascii "main.rs"

.Lanon.600d6af0f320a021dc494cfa2daca569.2:
    .quad .Lanon.600d6af0f320a021dc494cfa2daca569.1
    .asciz "\007\000\000\000\000\000\000\000\"\000\000\000\036\000\000"

.Lanon.600d6af0f320a021dc494cfa2daca569.3:
    .ascii "internal error: entered unreachable code"

.Lanon.600d6af0f320a021dc494cfa2daca569.4:
    .quad .Lanon.600d6af0f320a021dc494cfa2daca569.1
    .asciz "\007\000\000\000\000\000\000\000&\000\000\000\022\000\000"

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-patternsRelating to patterns and pattern matchingC-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.needs-triageThis issue may need triage. Remove it if it has been sufficiently triaged.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions