Skip to content

Commit 31f66d2

Browse files
Copilotmangod9jkotasCopilotjakobbotsch
authored
Save all volatile argument registers in GC probe hijack frames on AMD64 and ARM64 (#126848)
- [x] Save volatile argument registers (RDX/R8/R9) in Windows AMD64 GC probe hijack frames - [x] Save volatile argument registers (RSI/RDI/R8/R9) in Unix AMD64 GC probe hijack frames - [x] Use CFI-aware push_register/pop_register macros for proper unwind directives - [x] Save volatile argument registers (x3-x7) in ARM64 GC probe hijack frames - [x] Update RBM_INTERFACELOOKUP_FOR_SLOT_TRASH to trash all non-argument volatile registers - [x] Save all FP argument registers in GC probe hijack frames - [x] Merge main and apply targeted JIT PHI jump threading fix for #126976 - [x] Add project exclusions for arm64 Windows tests - [x] Fix JITDUMP formatting in redundantbranchopts.cpp for jitformat checker --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mangod9 <61718172+mangod9@users.noreply.github.com> Co-authored-by: Jan Kotas <jkotas@microsoft.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> Co-authored-by: jkotas <6668460+jkotas@users.noreply.github.com> Co-authored-by: jakobbotsch <7887810+jakobbotsch@users.noreply.github.com> Co-authored-by: Michal Strehovský <MichalStrehovsky@users.noreply.github.com>
1 parent 6eab9a7 commit 31f66d2

File tree

11 files changed

+271
-139
lines changed

11 files changed

+271
-139
lines changed

src/coreclr/jit/redundantbranchopts.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,9 +1762,9 @@ Compiler::JumpThreadCheckResult Compiler::optJumpThreadCheck(BasicBlock* const b
17621762
//
17631763
if (ssaVarDsc->HasGlobalUse())
17641764
{
1765-
JITDUMP(FMT_BB " has global phi for V%02u.%u; deferring jump threading pending use analysis\n",
1766-
block->bbNum, lclNum, ssaNum);
1767-
hasGlobalPhiUses = true;
1765+
JITDUMP(FMT_BB " has global phi for V%02u.%u; no phi-based threading\n", block->bbNum, lclNum,
1766+
ssaNum);
1767+
return JumpThreadCheckResult::CannotThread;
17681768
}
17691769
}
17701770

src/coreclr/jit/targetamd64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@
540540
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
541541
#define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH
542542

543-
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_RAX | RBM_R10 | RBM_R11)
543+
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS | RBM_FLTARG_REGS))
544544

545545
#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX))
546546
#define RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL (RBM_INT_CALLEE_TRASH_ALL & ~(RBM_R10 | RBM_RCX))

src/coreclr/jit/targetarm64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@
263263
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
264264
#define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH
265265

266-
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_R12 | RBM_R13 | RBM_R14 | RBM_R15)
266+
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS | RBM_FLTARG_REGS))
267267
#define RBM_INTERFACELOOKUP_FOR_SLOT_RETURN RBM_R15
268268
#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15))
269269
#define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15

src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,9 @@ PTFF_SAVE_ALL_PRESERVED equ 000000F7h ;; NOTE: RBP is not included in this set
359359
PTFF_SAVE_RSP equ 00008000h
360360
PTFF_SAVE_RAX equ 00000100h ;; RAX is saved in hijack handler - in case it contains a GC ref
361361
PTFF_SAVE_RCX equ 00000200h ;; RCX is saved in hijack handler - in case it contains a GC ref
362+
PTFF_SAVE_RDX equ 00000400h ;; RDX is saved in hijack handler - in case it contains a GC ref
363+
PTFF_SAVE_R8 equ 00000800h ;; R8 is saved in hijack handler - in case it contains a GC ref
364+
PTFF_SAVE_R9 equ 00001000h ;; R9 is saved in hijack handler - in case it contains a GC ref
362365
PTFF_SAVE_ALL_SCRATCH equ 00007F00h
363366
PTFF_THREAD_HIJACK equ 00100000h ;; indicates that this is a frame for a hijacked call
364367

src/coreclr/nativeaot/Runtime/amd64/GcProbe.S

Lines changed: 95 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,42 +6,53 @@
66
#include <unixasmmacros.inc>
77

88
//
9-
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX/RCX/RDX and accepts the register
10-
// bitmask in R8
9+
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves volatile argument registers
10+
// and accepts the register bitmask
1111
//
1212
// On entry:
1313
// - BITMASK: bitmask describing pushes, may be volatile register or constant value
1414
// - RAX: managed function return value, may be an object or byref
15+
// - RSI, RDI, RCX, RDX, R8, R9: may contain objects or byrefs at the hijack point
1516
// - preserved regs: need to stay preserved, may contain objects or byrefs
1617
//
1718
// INVARIANTS
1819
// - The macro assumes it is called from a prolog, prior to a frame pointer being setup.
1920
// - All preserved registers remain unchanged from their values in managed code.
2021
//
2122
.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK
23+
push_register r9 // save R9, it might contain an objectref
24+
push_register r8 // save R8, it might contain an objectref
2225
push_register rdx // save RDX, it might contain an objectref
2326
push_register rcx // save RCX, it might contain an objectref (async continuation)
2427
push_register rax // save RAX, it might contain an objectref
25-
lea \trashReg, [rsp + 0x20]
28+
lea \trashReg, [rsp + 0x30]
2629
push_register \trashReg // save caller`s RSP
2730
push_nonvol_reg r15 // save preserved registers
2831
push_nonvol_reg r14 // ..
2932
push_nonvol_reg r13 // ..
3033
push_nonvol_reg r12 // ..
34+
push_register rdi // save RDI, volatile on Unix, might contain an objectref
35+
push_register rsi // save RSI, volatile on Unix, might contain an objectref
3136
push_nonvol_reg rbx // ..
3237
push_register \BITMASK // save the register bitmask passed in by caller
3338
push_register \threadReg // Thread * (unused by stackwalker)
3439
push_nonvol_reg rbp // save caller`s RBP
35-
mov \trashReg, [rsp + 12*8] // Find the return address
40+
mov \trashReg, [rsp + 16*8] // Find the return address
3641
push_register \trashReg // save m_RIP
3742
lea \trashReg, [rsp + 0] // trashReg == address of frame
3843

39-
// allocate space for xmm0, xmm1 and alignment
40-
alloc_stack 0x20 + 0
44+
// allocate space for xmm0..xmm7 (FP argument registers)
45+
alloc_stack 0x80 + 0
4146

42-
// save xmm0 and xmm1 in case they are used as return values
43-
movdqa [rsp + 0x10], xmm0
44-
movdqa [rsp + 0] , xmm1
47+
// save FP argument registers in case they contain live values at the hijack point
48+
movdqa [rsp + 0x70], xmm0
49+
movdqa [rsp + 0x60], xmm1
50+
movdqa [rsp + 0x50], xmm2
51+
movdqa [rsp + 0x40], xmm3
52+
movdqa [rsp + 0x30], xmm4
53+
movdqa [rsp + 0x20], xmm5
54+
movdqa [rsp + 0x10], xmm6
55+
movdqa [rsp + 0x00], xmm7
4556

4657
// link the frame into the Thread
4758
mov [\threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], \trashReg
@@ -52,21 +63,31 @@
5263
// registers and return value to their values from before the probe was called (while also updating any
5364
// object refs or byrefs).
5465
.macro POP_PROBE_FRAME
55-
movdqa xmm1, [rsp + 0]
56-
movdqa xmm0, [rsp + 0x10]
57-
add rsp, 0x20 + 8 // skip xmm0, xmm1 and discard RIP
58-
pop rbp
59-
pop rax // discard Thread*
60-
pop rax // discard BITMASK
61-
pop rbx
62-
pop r12
63-
pop r13
64-
pop r14
65-
pop r15
66-
pop rax // discard caller RSP
67-
pop rax
68-
pop rcx
69-
pop rdx
66+
movdqa xmm7, [rsp + 0x00]
67+
movdqa xmm6, [rsp + 0x10]
68+
movdqa xmm5, [rsp + 0x20]
69+
movdqa xmm4, [rsp + 0x30]
70+
movdqa xmm3, [rsp + 0x40]
71+
movdqa xmm2, [rsp + 0x50]
72+
movdqa xmm1, [rsp + 0x60]
73+
movdqa xmm0, [rsp + 0x70]
74+
free_stack 0x80 + 8 // skip xmm0..xmm7 and discard RIP
75+
pop_nonvol_reg rbp
76+
pop_register rax // discard Thread*
77+
pop_register rax // discard BITMASK
78+
pop_nonvol_reg rbx
79+
pop_register rsi
80+
pop_register rdi
81+
pop_nonvol_reg r12
82+
pop_nonvol_reg r13
83+
pop_nonvol_reg r14
84+
pop_nonvol_reg r15
85+
pop_register rax // discard caller RSP
86+
pop_register rax
87+
pop_register rcx
88+
pop_register rdx
89+
pop_register r8
90+
pop_register r9
7091
.endm
7192

7293
//
@@ -78,38 +99,69 @@
7899
//
79100
// Register state on exit:
80101
// R11: thread pointer
81-
// RAX, RCX, RDX preserved, other volatile regs trashed
102+
// RAX, RCX, RDX, RSI, RDI, R8, R9, xmm0-xmm7 preserved, R10 trashed
82103
//
83104
.macro FixupHijackedCallstack
84-
// preserve RAX, RDX as they may contain return values
85-
push rax
86-
push rdx
105+
// preserve volatile argument registers across INLINE_GETTHREAD
106+
push_register rax
107+
push_register rdx
87108

88109
// preserve RCX as it may contain async continuation return value
89-
push rcx
90-
91-
// align stack
92-
sub rsp, 0x8
110+
push_register rcx
111+
112+
// preserve RSI, RDI, R8 and R9 as they may contain GC refs
113+
push_register rsi
114+
push_register rdi
115+
push_register r8
116+
push_register r9
117+
118+
// allocate space for xmm0..xmm7 + alignment (0x80 for xmm regs + 0x8 for 16-byte alignment)
119+
alloc_stack 0x88
120+
121+
// save FP argument registers that would be clobbered by INLINE_GETTHREAD call
122+
movdqa [rsp + 0x70], xmm0
123+
movdqa [rsp + 0x60], xmm1
124+
movdqa [rsp + 0x50], xmm2
125+
movdqa [rsp + 0x40], xmm3
126+
movdqa [rsp + 0x30], xmm4
127+
movdqa [rsp + 0x20], xmm5
128+
movdqa [rsp + 0x10], xmm6
129+
movdqa [rsp + 0x00], xmm7
93130

94131
// rax = GetThread(), makes nested calls
95132
INLINE_GETTHREAD
96133
mov r11, rax
97134

98-
add rsp, 0x8
135+
// restore FP argument registers
136+
movdqa xmm7, [rsp + 0x00]
137+
movdqa xmm6, [rsp + 0x10]
138+
movdqa xmm5, [rsp + 0x20]
139+
movdqa xmm4, [rsp + 0x30]
140+
movdqa xmm3, [rsp + 0x40]
141+
movdqa xmm2, [rsp + 0x50]
142+
movdqa xmm1, [rsp + 0x60]
143+
movdqa xmm0, [rsp + 0x70]
144+
145+
free_stack 0x88
146+
147+
pop_register r9
148+
pop_register r8
149+
pop_register rdi
150+
pop_register rsi
99151

100-
pop rcx
152+
pop_register rcx
101153

102-
pop rdx
103-
pop rax
154+
pop_register rdx
155+
pop_register rax
104156

105157
// Fix the stack by pushing the original return address
106-
mov r8, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
107-
push r8
158+
mov r10, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
159+
push r10
108160

109161
// Clear hijack state
110-
xor r8, r8
111-
mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r8
112-
mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r8
162+
xor r10, r10
163+
mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r10
164+
mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r10
113165
.endm
114166

115167
//
@@ -124,12 +176,12 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
124176
ret
125177

126178
LOCAL_LABEL(WaitForGC):
127-
mov r8d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_THREAD_HIJACK
179+
mov r10d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RSI + PTFF_SAVE_RDI + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9 + PTFF_THREAD_HIJACK
128180
jmp C_FUNC(RhpWaitForGC)
129181
NESTED_END RhpGcProbeHijack, _TEXT
130182

131183
NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
132-
PUSH_PROBE_FRAME r11, rax, r8
184+
PUSH_PROBE_FRAME r11, rax, r10
133185
END_PROLOGUE
134186

135187
mov rbx, r11

0 commit comments

Comments
 (0)