Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/targetamd64.h
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
#define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH

#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_RAX | RBM_R10 | RBM_R11)
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS | RBM_FLTARG_REGS))

#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX))
#define RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL (RBM_INT_CALLEE_TRASH_ALL & ~(RBM_R10 | RBM_RCX))
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/targetarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
#define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH

#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_R12 | RBM_R13 | RBM_R14 | RBM_R15)
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS | RBM_FLTARG_REGS))
#define RBM_INTERFACELOOKUP_FOR_SLOT_RETURN RBM_R15
#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15))
#define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ PTFF_SAVE_ALL_PRESERVED equ 000000F7h ;; NOTE: RBP is not included in this set
PTFF_SAVE_RSP equ 00008000h
PTFF_SAVE_RAX equ 00000100h ;; RAX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_RCX equ 00000200h ;; RCX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_RDX equ 00000400h ;; RDX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_R8 equ 00000800h ;; R8 is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_R9 equ 00001000h ;; R9 is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_ALL_SCRATCH equ 00007F00h
PTFF_THREAD_HIJACK equ 00100000h ;; indicates that this is a frame for a hijacked call

Expand Down
138 changes: 95 additions & 43 deletions src/coreclr/nativeaot/Runtime/amd64/GcProbe.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,53 @@
#include <unixasmmacros.inc>

//
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX/RCX/RDX and accepts the register
// bitmask in R8
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves volatile argument registers
// and accepts the register bitmask
//
// On entry:
// - BITMASK: bitmask describing pushes, may be volatile register or constant value
// - RAX: managed function return value, may be an object or byref
// - RSI, RDI, RCX, RDX, R8, R9: may contain objects or byrefs at the hijack point
// - preserved regs: need to stay preserved, may contain objects or byrefs
//
// INVARIANTS
// - The macro assumes it is called from a prolog, prior to a frame pointer being setup.
// - All preserved registers remain unchanged from their values in managed code.
//
.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK
push_register r9 // save R9, it might contain an objectref
push_register r8 // save R8, it might contain an objectref
push_register rdx // save RDX, it might contain an objectref
push_register rcx // save RCX, it might contain an objectref (async continuation)
push_register rax // save RAX, it might contain an objectref
lea \trashReg, [rsp + 0x20]
lea \trashReg, [rsp + 0x30]
push_register \trashReg // save caller`s RSP
push_nonvol_reg r15 // save preserved registers
push_nonvol_reg r14 // ..
push_nonvol_reg r13 // ..
push_nonvol_reg r12 // ..
push_register rdi // save RDI, volatile on Unix, might contain an objectref
push_register rsi // save RSI, volatile on Unix, might contain an objectref
push_nonvol_reg rbx // ..
push_register \BITMASK // save the register bitmask passed in by caller
push_register \threadReg // Thread * (unused by stackwalker)
push_nonvol_reg rbp // save caller`s RBP
mov \trashReg, [rsp + 12*8] // Find the return address
mov \trashReg, [rsp + 16*8] // Find the return address
push_register \trashReg // save m_RIP
lea \trashReg, [rsp + 0] // trashReg == address of frame

// allocate space for xmm0, xmm1 and alignment
alloc_stack 0x20 + 0
// allocate space for xmm0..xmm7 (FP argument registers)
alloc_stack 0x80 + 0

// save xmm0 and xmm1 in case they are used as return values
movdqa [rsp + 0x10], xmm0
movdqa [rsp + 0] , xmm1
// save FP argument registers in case they contain live values at the hijack point
movdqa [rsp + 0x70], xmm0
movdqa [rsp + 0x60], xmm1
movdqa [rsp + 0x50], xmm2
movdqa [rsp + 0x40], xmm3
movdqa [rsp + 0x30], xmm4
movdqa [rsp + 0x20], xmm5
movdqa [rsp + 0x10], xmm6
movdqa [rsp + 0x00], xmm7

// link the frame into the Thread
mov [\threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], \trashReg
Expand All @@ -52,21 +63,31 @@
// registers and return value to their values from before the probe was called (while also updating any
// object refs or byrefs).
.macro POP_PROBE_FRAME
movdqa xmm1, [rsp + 0]
movdqa xmm0, [rsp + 0x10]
add rsp, 0x20 + 8 // skip xmm0, xmm1 and discard RIP
pop rbp
pop rax // discard Thread*
pop rax // discard BITMASK
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rax // discard caller RSP
pop rax
pop rcx
pop rdx
movdqa xmm7, [rsp + 0x00]
movdqa xmm6, [rsp + 0x10]
movdqa xmm5, [rsp + 0x20]
movdqa xmm4, [rsp + 0x30]
movdqa xmm3, [rsp + 0x40]
movdqa xmm2, [rsp + 0x50]
movdqa xmm1, [rsp + 0x60]
movdqa xmm0, [rsp + 0x70]
free_stack 0x80 + 8 // skip xmm0..xmm7 and discard RIP
pop_nonvol_reg rbp
pop_register rax // discard Thread*
pop_register rax // discard BITMASK
pop_nonvol_reg rbx
pop_register rsi
pop_register rdi
pop_nonvol_reg r12
pop_nonvol_reg r13
pop_nonvol_reg r14
pop_nonvol_reg r15
pop_register rax // discard caller RSP
pop_register rax
pop_register rcx
pop_register rdx
pop_register r8
pop_register r9
.endm

//
Expand All @@ -78,38 +99,69 @@
//
// Register state on exit:
// R11: thread pointer
// RAX, RCX, RDX preserved, other volatile regs trashed
// RAX, RCX, RDX, RSI, RDI, R8, R9, xmm0-xmm7 preserved, R10 trashed
//
.macro FixupHijackedCallstack
// preserve RAX, RDX as they may contain return values
push rax
push rdx
// preserve volatile argument registers across INLINE_GETTHREAD
push_register rax
push_register rdx

// preserve RCX as it may contain async continuation return value
push rcx

// align stack
sub rsp, 0x8
push_register rcx

// preserve RSI, RDI, R8 and R9 as they may contain GC refs
push_register rsi
push_register rdi
push_register r8
push_register r9

// allocate space for xmm0..xmm7 + alignment (0x80 for xmm regs + 0x8 for 16-byte alignment)
alloc_stack 0x88

// save FP argument registers that would be clobbered by INLINE_GETTHREAD call
movdqa [rsp + 0x70], xmm0
movdqa [rsp + 0x60], xmm1
movdqa [rsp + 0x50], xmm2
movdqa [rsp + 0x40], xmm3
movdqa [rsp + 0x30], xmm4
movdqa [rsp + 0x20], xmm5
movdqa [rsp + 0x10], xmm6
movdqa [rsp + 0x00], xmm7

// rax = GetThread(), makes nested calls
INLINE_GETTHREAD
mov r11, rax

add rsp, 0x8
// restore FP argument registers
movdqa xmm7, [rsp + 0x00]
movdqa xmm6, [rsp + 0x10]
movdqa xmm5, [rsp + 0x20]
movdqa xmm4, [rsp + 0x30]
movdqa xmm3, [rsp + 0x40]
movdqa xmm2, [rsp + 0x50]
movdqa xmm1, [rsp + 0x60]
movdqa xmm0, [rsp + 0x70]

free_stack 0x88

pop_register r9
pop_register r8
pop_register rdi
pop_register rsi

pop rcx
pop_register rcx

pop rdx
pop rax
pop_register rdx
pop_register rax

// Fix the stack by pushing the original return address
mov r8, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r8
mov r10, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r10

// Clear hijack state
xor r8, r8
mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r8
mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r8
xor r10, r10
mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r10
mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r10
.endm

//
Expand All @@ -124,12 +176,12 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
ret

LOCAL_LABEL(WaitForGC):
mov r8d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_THREAD_HIJACK
mov r10d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RSI + PTFF_SAVE_RDI + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9 + PTFF_THREAD_HIJACK
jmp C_FUNC(RhpWaitForGC)
NESTED_END RhpGcProbeHijack, _TEXT

NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
PUSH_PROBE_FRAME r11, rax, r8
PUSH_PROBE_FRAME r11, rax, r10
END_PROLOGUE

mov rbx, r11
Expand Down
67 changes: 40 additions & 27 deletions src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
include AsmMacros.inc

;;
;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX/RCX and accepts
;; the register bitmask
;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves volatile argument registers
;; and accepts the register bitmask
;;
;; On entry:
;; - BITMASK: bitmask describing pushes, a volatile register
;; - RAX: managed function return value, may be an object or byref
;; - RCX: managed function return value (async continuation), may be an object
;; - RDX, R8, R9: may contain objects or byrefs at the hijack point
;; - preserved regs: need to stay preserved, may contain objects or byrefs
;;
;; INVARIANTS
Expand All @@ -19,9 +20,12 @@ include AsmMacros.inc
;;
PUSH_PROBE_FRAME macro threadReg, trashReg, BITMASK

push_vol_reg r9 ; save R9, it might contain an objectref
push_vol_reg r8 ; save R8, it might contain an objectref
push_vol_reg rdx ; save RDX, it might contain an objectref
push_vol_reg rcx ; save RCX, it might contain an objectref (async continuation)
push_vol_reg rax ; save RAX, it might contain an objectref
lea trashReg, [rsp + 18h]
lea trashReg, [rsp + 30h]
push_vol_reg trashReg ; save caller's RSP
push_nonvol_reg r15 ; save preserved registers
push_nonvol_reg r14 ; ..
Expand All @@ -33,15 +37,18 @@ PUSH_PROBE_FRAME macro threadReg, trashReg, BITMASK
push_vol_reg BITMASK ; save the register bitmask passed in by caller
push_vol_reg threadReg ; Thread * (unused by stackwalker)
push_nonvol_reg rbp ; save caller's RBP
mov trashReg, [rsp + 13*8] ; Find the return address
mov trashReg, [rsp + 16*8] ; Find the return address
push_vol_reg trashReg ; save m_RIP
lea trashReg, [rsp + 0] ; trashReg == address of frame

;; allocate scratch space and any required alignment
alloc_stack 20h + 10h + 8
;; allocate scratch space (20h home space + 40h for xmm0..xmm3)
alloc_stack 20h + 40h

;; save xmm0 in case it's being used as a return value
movdqa [rsp + 20h], xmm0
;; save xmm argument registers in case they contain live values at the hijack point
movdqa [rsp + 20h + 00h], xmm0
movdqa [rsp + 20h + 10h], xmm1
movdqa [rsp + 20h + 20h], xmm2
movdqa [rsp + 20h + 30h], xmm3

;; link the frame into the Thread
mov [threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], trashReg
Expand All @@ -53,8 +60,11 @@ endm
;; object refs or byrefs).
;;
POP_PROBE_FRAME macro
movdqa xmm0, [rsp + 20h]
add rsp, 20h + 10h + 8 + 8 ; deallocate stack and discard saved m_RIP
movdqa xmm0, [rsp + 20h + 00h]
movdqa xmm1, [rsp + 20h + 10h]
movdqa xmm2, [rsp + 20h + 20h]
movdqa xmm3, [rsp + 20h + 30h]
add rsp, 20h + 40h + 8 ; deallocate scratch space and discard saved m_RIP
pop rbp
pop rax ; discard Thread*
pop rax ; discard BITMASK
Expand All @@ -68,6 +78,9 @@ POP_PROBE_FRAME macro
pop rax ; discard caller RSP
pop rax
pop rcx
pop rdx
pop r8
pop r9
endm

;;
Expand All @@ -78,21 +91,21 @@ endm
;; All registers correct for return to the original return address.
;;
;; Register state on exit:
;; RDX: thread pointer
;; RAX/RCX: preserved, other volatile regs trashed
;; R10: thread pointer
;; RAX/RCX/RDX/R8/R9: preserved, R11 trashed
;;
FixupHijackedCallstack macro
;; rdx <- GetThread(), TRASHES r8
INLINE_GETTHREAD rdx, r8
;; r10 <- GetThread(), TRASHES r11
INLINE_GETTHREAD r10, r11

;; Fix the stack by pushing the original return address
mov r8, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r8
mov r11, [r10 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r11

;; Clear hijack state
xor r8, r8
mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r8
mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], r8
xor r11, r11
mov [r10 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r11
mov [r10 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r11
endm

;;
Expand All @@ -106,15 +119,15 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT
jnz @f
ret
@@:
mov r8d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_THREAD_HIJACK
mov r11d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9 + PTFF_THREAD_HIJACK
jmp RhpWaitForGC
NESTED_END RhpGcProbeHijack, _TEXT

NESTED_ENTRY RhpWaitForGC, _TEXT
PUSH_PROBE_FRAME rdx, rax, r8
PUSH_PROBE_FRAME r10, rax, r11
END_PROLOGUE

mov rbx, rdx
mov rbx, r10
mov rcx, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame]
call RhpWaitForGC2

Expand Down Expand Up @@ -147,7 +160,7 @@ ifdef FEATURE_GC_STRESS
;;
LEAF_ENTRY RhpGcStressHijack, _TEXT
FixupHijackedCallstack
mov r8d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX
mov r11d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9
jmp RhpGcStressProbe
LEAF_END RhpGcStressHijack, _TEXT

Expand All @@ -157,15 +170,15 @@ LEAF_END RhpGcStressHijack, _TEXT
;; This worker performs the GC Stress work and returns to the original return address.
;;
;; Register state on entry:
;; RDX: thread pointer
;; R8: register bitmask
;; R10: thread pointer
;; R11: register bitmask
;;
;; Register state on exit:
;; Scratch registers, except for RAX/RCX, have been trashed
;; Scratch registers, except for RAX/RCX/RDX/R8/R9, have been trashed
;; All other registers restored as they were when the hijack was first reached.
;;
NESTED_ENTRY RhpGcStressProbe, _TEXT
PUSH_PROBE_FRAME rdx, rax, r8
PUSH_PROBE_FRAME r10, rax, r11
END_PROLOGUE

call RhpStressGc
Expand Down
Loading