From 8c0f53751d882f10f223cb574bc44e4183b003ef Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 27 Apr 2026 09:40:05 +0200 Subject: [PATCH 1/2] [X86][FastISel] Restore support for struct returns After #180322, X86 FastISel forces SDAG fallback for any call with a struct return. This caused major compile-time regressions for debug builds in Rust, where struct returns are very common. The type legality check should work on the de-aggregated types, not on the return type directly. --- llvm/lib/Target/X86/X86FastISel.cpp | 29 ++++++---- llvm/test/CodeGen/X86/fast-isel-struct-ret.ll | 58 +++++++++++++++++++ 2 files changed, 75 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/X86/fast-isel-struct-ret.ll diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index d36a9581a3638..f91ef4abbdf27 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -21,6 +21,7 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -3208,18 +3209,22 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // the value from FuncInfo.ValueMap. // However, i1 is promoted to i8 and return i8 defined by ABI, so FastISel can // lower it without switching to DAGISel. - MVT RetVT = MVT::Other; - if (!isTypeLegal(CLI.RetTy, RetVT) && !CLI.RetTy->isVoidTy()) { - if (RetVT == MVT::Other) - return false; // Unknown type, let DAG ISel handle it. - - // RetVT is not MVT::Other, it must be simple now. It is something rely on - // the logic of isTypeLegal(). - MVT ABIVT = TLI.getRegisterTypeForCallingConv(CLI.RetTy->getContext(), - CLI.CallConv, RetVT); - MVT RegVT = TLI.getRegisterType(CLI.RetTy->getContext(), RetVT); - if (ABIVT != RegVT) - return false; + SmallVector RetTys; + ComputeValueTypes(DL, CLI.RetTy, RetTys); + for (Type *RetTy : RetTys) { + MVT RetVT = MVT::Other; + if (!isTypeLegal(RetTy, RetVT)) { + if (RetVT == MVT::Other) + return false; // Unknown type, let DAG ISel handle it. + + // RetVT is not MVT::Other, it must be simple now. It is something rely on + // the logic of isTypeLegal(). + MVT ABIVT = TLI.getRegisterTypeForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, RetVT); + MVT RegVT = TLI.getRegisterType(CLI.RetTy->getContext(), RetVT); + if (ABIVT != RegVT) + return false; + } } // Call / invoke instructions with NoCfCheck attribute require special diff --git a/llvm/test/CodeGen/X86/fast-isel-struct-ret.ll b/llvm/test/CodeGen/X86/fast-isel-struct-ret.ll new file mode 100644 index 0000000000000..34798ef5abe1f --- /dev/null +++ b/llvm/test/CodeGen/X86/fast-isel-struct-ret.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -fast-isel -fast-isel-abort=3 < %s | FileCheck %s + +declare { i32, i32 } @get_i32s() + +define i32 @call_get_i32s() nounwind { +; CHECK-LABEL: call_get_i32s: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq get_i32s@PLT +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %res = call { i32, i32 } @get_i32s() + %res.0 = extractvalue { i32, i32 } %res, 0 + %res.1 = extractvalue { i32, i32 } %res, 1 + %add = add i32 %res.0, %res.1 + ret i32 %add +} + +declare { ptr, ptr } @get_ptrs() + +define i64 @call_get_ptrs() nounwind { +; CHECK-LABEL: call_get_ptrs: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq get_ptrs@PLT +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %res = call { ptr, ptr } @get_ptrs() + %res.0 = extractvalue { ptr, ptr } %res, 0 + %res.1 = extractvalue { ptr, ptr } %res, 1 + %res.0.addr = ptrtoaddr ptr %res.0 to i64 + %res.1.addr = ptrtoaddr ptr %res.1 to i64 + %sub = sub i64 %res.0.addr, %res.1.addr + ret i64 %sub +} + +declare { i64, i1 } @get_i64_and_bool() + +define i64 @call_get_i64_and_bool() nounwind { +; CHECK-LABEL: call_get_i64_and_bool: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq get_i64_and_bool@PLT +; CHECK-NEXT: andb $1, %dl +; CHECK-NEXT: movzbl %dl, %ecx +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %res = call { i64, i1 } @get_i64_and_bool() + %res.0 = extractvalue { i64, i1 } %res, 0 + %res.1 = extractvalue { i64, i1 } %res, 1 + %res.1.ext = zext i1 %res.1 to i64 + %add = add i64 %res.0, %res.1.ext + ret i64 %add +} From f78d96323106c351eb16d56de5d92e957f9ac3d4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 29 Apr 2026 10:25:21 +0200 Subject: [PATCH 2/2] Add test with bfloat struct return --- llvm/test/CodeGen/X86/bf16-fast-isel.ll | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/llvm/test/CodeGen/X86/bf16-fast-isel.ll b/llvm/test/CodeGen/X86/bf16-fast-isel.ll index 812ffc3ab5f19..4259b811bbfe9 100644 --- a/llvm/test/CodeGen/X86/bf16-fast-isel.ll +++ b/llvm/test/CodeGen/X86/bf16-fast-isel.ll @@ -116,6 +116,36 @@ entry: ret i8 %call2 } +declare { bfloat, bfloat } @get_bfloats() +declare void @take_bfloats({ bfloat, bfloat }) + +define void @call_get_bfloats() nounwind { +; CHECK-LABEL: call_get_bfloats: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq get_bfloats@PLT +; CHECK-NEXT: pextrw $0, %xmm1, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movl %eax, (%rsp) # 4-byte Spill +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: callq __truncsfbf2@PLT +; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movss (%rsp), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq __truncsfbf2@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq take_bfloats@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %res = call { bfloat, bfloat } @get_bfloats() + call void @take_bfloats({ bfloat, bfloat } %res) + ret void +} + declare bfloat @foo(ptr %f) declare zeroext i8 @bar(bfloat) declare fastcc bfloat @foo_fast(ptr %f)