Inefficient codegen for non-concrete varargs

The following example generates bad code on recent Julia releases (including master):

```julia
function foo(args...)
    Base.pointerset(args[1], 1, 1, 1)
    return
end

code_llvm(foo, Tuple{Ptr{Int}, Type{Int}})
```

```llvm
define void @julia_foo_11773(i64, %jl_value_t addrspace(10)* nonnull) #0 {
top:
  %2 = alloca %jl_value_t addrspace(10)*, i32 3
  %gcframe = alloca %jl_value_t addrspace(10)*, i32 3, align 16
...
  %19 = call nonnull %jl_value_t addrspace(10)* @jl_f_tuple(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* null to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %2, i32 2)
...
  %24 = call nonnull %jl_value_t addrspace(10)* @jl_f_getfield(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* null to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %2, i32 3)
...
  %25 = bitcast %jl_value_t addrspace(10)* %24 to i64* addrspace(10)*
  %26 = load i64*, i64* addrspace(10)* %25, align 8
  store i64 1, i64* %26, align 1
...
  ret void
}
```

Not passing a `Type{Int}` (or any other non-concrete argument), or getting rid of the varargs, results in the expected clean code.

---

This doesn't look like an inference/specialization problem to me (i.e. not https://github.com/JuliaLang/julia/issues/34365):

```julia
julia> first(code_typed(foo, Tuple{Ptr{Int}, Type{Int}})[])
CodeInfo(
1 ─ %1 = Base.pointerset::Core.Compiler.Const(Core.Intrinsics.pointerset, false)
│   %2 = Base.getfield(args, 1, true)::Ptr{Int64}
│        (%1)(%2, 1, 1, 1)::Ptr{Int64}
└──      return
)

julia> first(code_typed(foo, Tuple{Ptr{Int}, Type{Int}})[]).parent.specTypes
Tuple{typeof(foo),Ptr{Int64},Type{Int64}}
```

Instead, we get the tuple from the specsig but non-concrete varargs code path here: https://github.com/JuliaLang/julia/blob/0549bf1ce39071f22cb7c8065d99e8215714c063/src/codegen.cpp#L6177-L6202

---

This penalizes a lot of Cassette-based code, where splatting is used heavily for _every_ function call (`overdub(ctx, args...)`) and we get plenty calls to `jl_f_tuple` and `jl_f_getfield` as soon as passing a non-concrete argument (as observed in https://github.com/JuliaGPU/CUDAnative.jl/pull/334 where these calls break compilation).

	// step 10. allocate rest argument
	CallInst *restTuple = NULL;
	if (va && ctx.vaSlot != -1) {
	jl_varinfo_t &vi = ctx.slots[ctx.vaSlot];
	if (vi.value.constant \|\| !vi.used) {
	assert(vi.boxroot == NULL);
	}
	else if (specsig) {
	ctx.nvargs = jl_nparams(lam->specTypes) - nreq;
	jl_cgval_t vargs = (jl_cgval_t)alloca(sizeof(jl_cgval_t) * ctx.nvargs);
	for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
	jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
	bool isboxed = deserves_argbox(argType);
	Type *llvmArgType = isboxed ? T_prjlvalue : julia_type_to_llvm(argType);
	vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed);
	}
	if (jl_is_concrete_type(vi.value.typ)) {
	jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs);
	// FIXME: this may assert since the type of vi might not be isbits here
	emit_varinfo_assign(ctx, vi, tuple);
	} else {
	Value *vtpl = emit_jlcall(ctx, prepare_call(jltuple_func), maybe_decay_untracked(V_null),
	vargs, ctx.nvargs, JLCALL_F_CC);
	jl_cgval_t tuple = mark_julia_type(ctx, vtpl, true, vi.value.typ);
	emit_varinfo_assign(ctx, vi, tuple);
	}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Inefficient codegen for non-concrete varargs #34459

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

Inefficient codegen for non-concrete varargs #34459

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions