Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/targetamd64.h
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
#define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH

#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_RAX | RBM_R10 | RBM_R11)
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_INT_CALLEE_TRASH_INIT & ~RBM_ARG_REGS)
Comment thread
jkotas marked this conversation as resolved.
Outdated

#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX))
#define RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL (RBM_INT_CALLEE_TRASH_ALL & ~(RBM_R10 | RBM_RCX))
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/targetarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
#define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH

#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_R12 | RBM_R13 | RBM_R14 | RBM_R15)
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_INT_CALLEE_TRASH & ~RBM_ARG_REGS)
#define RBM_INTERFACELOOKUP_FOR_SLOT_RETURN RBM_R15
#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15))
#define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ PTFF_SAVE_ALL_PRESERVED equ 000000F7h ;; NOTE: RBP is not included in this set
PTFF_SAVE_RSP equ 00008000h
PTFF_SAVE_RAX equ 00000100h ;; RAX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_RCX equ 00000200h ;; RCX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_RDX equ 00000400h ;; RDX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_R8 equ 00000800h ;; R8 is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_R9 equ 00001000h ;; R9 is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_ALL_SCRATCH equ 00007F00h
PTFF_THREAD_HIJACK equ 00100000h ;; indicates that this is a frame for a hijacked call

Expand Down
138 changes: 95 additions & 43 deletions src/coreclr/nativeaot/Runtime/amd64/GcProbe.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,53 @@
#include <unixasmmacros.inc>

//
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX/RCX/RDX and accepts the register
// bitmask in R8
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves volatile argument registers
// and accepts the register bitmask
//
// On entry:
// - BITMASK: bitmask describing pushes, may be volatile register or constant value
// - RAX: managed function return value, may be an object or byref
// - RSI, RDI, RCX, RDX, R8, R9: may contain objects or byrefs at the hijack point
// - preserved regs: need to stay preserved, may contain objects or byrefs
//
// INVARIANTS
// - The macro assumes it is called from a prolog, prior to a frame pointer being setup.
// - All preserved registers remain unchanged from their values in managed code.
//
.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK
push_register r9 // save R9, it might contain an objectref
Comment thread
jkotas marked this conversation as resolved.
push_register r8 // save R8, it might contain an objectref
push_register rdx // save RDX, it might contain an objectref
push_register rcx // save RCX, it might contain an objectref (async continuation)
push_register rax // save RAX, it might contain an objectref
lea \trashReg, [rsp + 0x20]
lea \trashReg, [rsp + 0x30]
push_register \trashReg // save caller`s RSP
push_nonvol_reg r15 // save preserved registers
push_nonvol_reg r14 // ..
push_nonvol_reg r13 // ..
push_nonvol_reg r12 // ..
push_register rdi // save RDI, volatile on Unix, might contain an objectref
push_register rsi // save RSI, volatile on Unix, might contain an objectref
push_nonvol_reg rbx // ..
push_register \BITMASK // save the register bitmask passed in by caller
push_register \threadReg // Thread * (unused by stackwalker)
push_nonvol_reg rbp // save caller`s RBP
mov \trashReg, [rsp + 12*8] // Find the return address
mov \trashReg, [rsp + 16*8] // Find the return address
push_register \trashReg // save m_RIP
lea \trashReg, [rsp + 0] // trashReg == address of frame

// allocate space for xmm0, xmm1 and alignment
alloc_stack 0x20 + 0
// allocate space for xmm0..xmm7 (FP argument registers)
alloc_stack 0x80 + 0

// save xmm0 and xmm1 in case they are used as return values
movdqa [rsp + 0x10], xmm0
movdqa [rsp + 0] , xmm1
// save FP argument registers in case they contain live values at the hijack point
movdqa [rsp + 0x70], xmm0
movdqa [rsp + 0x60], xmm1
movdqa [rsp + 0x50], xmm2
movdqa [rsp + 0x40], xmm3
movdqa [rsp + 0x30], xmm4
movdqa [rsp + 0x20], xmm5
movdqa [rsp + 0x10], xmm6
movdqa [rsp + 0x00], xmm7

// link the frame into the Thread
mov [\threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], \trashReg
Expand All @@ -52,21 +63,31 @@
// registers and return value to their values from before the probe was called (while also updating any
// object refs or byrefs).
.macro POP_PROBE_FRAME
movdqa xmm1, [rsp + 0]
movdqa xmm0, [rsp + 0x10]
add rsp, 0x20 + 8 // skip xmm0, xmm1 and discard RIP
pop rbp
pop rax // discard Thread*
pop rax // discard BITMASK
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rax // discard caller RSP
pop rax
pop rcx
pop rdx
movdqa xmm7, [rsp + 0x00]
movdqa xmm6, [rsp + 0x10]
movdqa xmm5, [rsp + 0x20]
movdqa xmm4, [rsp + 0x30]
movdqa xmm3, [rsp + 0x40]
movdqa xmm2, [rsp + 0x50]
movdqa xmm1, [rsp + 0x60]
movdqa xmm0, [rsp + 0x70]
free_stack 0x80 + 8 // skip xmm0..xmm7 and discard RIP
pop_nonvol_reg rbp
pop_register rax // discard Thread*
pop_register rax // discard BITMASK
pop_nonvol_reg rbx
pop_register rsi
pop_register rdi
pop_nonvol_reg r12
pop_nonvol_reg r13
pop_nonvol_reg r14
pop_nonvol_reg r15
pop_register rax // discard caller RSP
pop_register rax
pop_register rcx
pop_register rdx
pop_register r8
pop_register r9
.endm

//
Expand All @@ -78,38 +99,69 @@
//
// Register state on exit:
// R11: thread pointer
// RAX, RCX, RDX preserved, other volatile regs trashed
// RAX, RCX, RDX, RSI, RDI, R8, R9, xmm0-xmm7 preserved, R10 trashed
//
.macro FixupHijackedCallstack
// preserve RAX, RDX as they may contain return values
push rax
push rdx
// preserve volatile argument registers across INLINE_GETTHREAD
push_register rax
push_register rdx

// preserve RCX as it may contain async continuation return value
push rcx

// align stack
sub rsp, 0x8
push_register rcx

// preserve RSI, RDI, R8 and R9 as they may contain GC refs
push_register rsi
push_register rdi
push_register r8
push_register r9

// allocate space for xmm0..xmm7 + alignment (0x80 for xmm regs + 0x8 for 16-byte alignment)
alloc_stack 0x88

// save FP argument registers that would be clobbered by INLINE_GETTHREAD call
movdqa [rsp + 0x70], xmm0
movdqa [rsp + 0x60], xmm1
movdqa [rsp + 0x50], xmm2
movdqa [rsp + 0x40], xmm3
movdqa [rsp + 0x30], xmm4
movdqa [rsp + 0x20], xmm5
movdqa [rsp + 0x10], xmm6
movdqa [rsp + 0x00], xmm7

// rax = GetThread(), makes nested calls
INLINE_GETTHREAD
mov r11, rax

add rsp, 0x8
// restore FP argument registers
movdqa xmm7, [rsp + 0x00]
movdqa xmm6, [rsp + 0x10]
movdqa xmm5, [rsp + 0x20]
movdqa xmm4, [rsp + 0x30]
movdqa xmm3, [rsp + 0x40]
movdqa xmm2, [rsp + 0x50]
movdqa xmm1, [rsp + 0x60]
movdqa xmm0, [rsp + 0x70]

free_stack 0x88

pop_register r9
pop_register r8
pop_register rdi
pop_register rsi

pop rcx
pop_register rcx

pop rdx
pop rax
pop_register rdx
pop_register rax

// Fix the stack by pushing the original return address
mov r8, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r8
mov r10, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r10

// Clear hijack state
xor r8, r8
mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r8
mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r8
xor r10, r10
mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r10
mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r10
.endm

//
Expand All @@ -124,12 +176,12 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
ret

LOCAL_LABEL(WaitForGC):
mov r8d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_THREAD_HIJACK
mov r10d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RSI + PTFF_SAVE_RDI + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9 + PTFF_THREAD_HIJACK
jmp C_FUNC(RhpWaitForGC)
NESTED_END RhpGcProbeHijack, _TEXT

NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
PUSH_PROBE_FRAME r11, rax, r8
PUSH_PROBE_FRAME r11, rax, r10
END_PROLOGUE

mov rbx, r11
Expand Down
67 changes: 40 additions & 27 deletions src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
include AsmMacros.inc

;;
;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX/RCX and accepts
;; the register bitmask
;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves volatile argument registers
;; and accepts the register bitmask
;;
;; On entry:
;; - BITMASK: bitmask describing pushes, a volatile register
;; - RAX: managed function return value, may be an object or byref
;; - RCX: managed function return value (async continuation), may be an object
;; - RDX, R8, R9: may contain objects or byrefs at the hijack point
;; - preserved regs: need to stay preserved, may contain objects or byrefs
;;
;; INVARIANTS
Expand All @@ -19,9 +20,12 @@ include AsmMacros.inc
;;
PUSH_PROBE_FRAME macro threadReg, trashReg, BITMASK

push_vol_reg r9 ; save R9, it might contain an objectref
push_vol_reg r8 ; save R8, it might contain an objectref
push_vol_reg rdx ; save RDX, it might contain an objectref
push_vol_reg rcx ; save RCX, it might contain an objectref (async continuation)
push_vol_reg rax ; save RAX, it might contain an objectref
lea trashReg, [rsp + 18h]
lea trashReg, [rsp + 30h]
push_vol_reg trashReg ; save caller's RSP
push_nonvol_reg r15 ; save preserved registers
push_nonvol_reg r14 ; ..
Expand All @@ -33,15 +37,18 @@ PUSH_PROBE_FRAME macro threadReg, trashReg, BITMASK
push_vol_reg BITMASK ; save the register bitmask passed in by caller
push_vol_reg threadReg ; Thread * (unused by stackwalker)
push_nonvol_reg rbp ; save caller's RBP
mov trashReg, [rsp + 13*8] ; Find the return address
mov trashReg, [rsp + 16*8] ; Find the return address
push_vol_reg trashReg ; save m_RIP
lea trashReg, [rsp + 0] ; trashReg == address of frame

;; allocate scratch space and any required alignment
alloc_stack 20h + 10h + 8
;; allocate scratch space (20h home space + 40h for xmm0..xmm3)
alloc_stack 20h + 40h

;; save xmm0 in case it's being used as a return value
movdqa [rsp + 20h], xmm0
;; save xmm argument registers in case they contain live values at the hijack point
movdqa [rsp + 20h + 00h], xmm0
movdqa [rsp + 20h + 10h], xmm1
movdqa [rsp + 20h + 20h], xmm2
movdqa [rsp + 20h + 30h], xmm3

;; link the frame into the Thread
mov [threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], trashReg
Expand All @@ -53,8 +60,11 @@ endm
;; object refs or byrefs).
;;
POP_PROBE_FRAME macro
movdqa xmm0, [rsp + 20h]
add rsp, 20h + 10h + 8 + 8 ; deallocate stack and discard saved m_RIP
movdqa xmm0, [rsp + 20h + 00h]
movdqa xmm1, [rsp + 20h + 10h]
movdqa xmm2, [rsp + 20h + 20h]
movdqa xmm3, [rsp + 20h + 30h]
add rsp, 20h + 40h + 8 ; deallocate scratch space and discard saved m_RIP
pop rbp
pop rax ; discard Thread*
pop rax ; discard BITMASK
Expand All @@ -68,6 +78,9 @@ POP_PROBE_FRAME macro
pop rax ; discard caller RSP
pop rax
pop rcx
pop rdx
pop r8
pop r9
endm

;;
Expand All @@ -78,21 +91,21 @@ endm
;; All registers correct for return to the original return address.
;;
;; Register state on exit:
;; RDX: thread pointer
;; RAX/RCX: preserved, other volatile regs trashed
;; R10: thread pointer
;; RAX/RCX/RDX/R8/R9: preserved, R11 trashed
;;
FixupHijackedCallstack macro
;; rdx <- GetThread(), TRASHES r8
INLINE_GETTHREAD rdx, r8
;; r10 <- GetThread(), TRASHES r11
INLINE_GETTHREAD r10, r11

;; Fix the stack by pushing the original return address
mov r8, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r8
mov r11, [r10 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r11

;; Clear hijack state
xor r8, r8
mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r8
mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], r8
xor r11, r11
mov [r10 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r11
mov [r10 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r11
endm

;;
Expand All @@ -106,15 +119,15 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT
jnz @f
ret
@@:
mov r8d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_THREAD_HIJACK
mov r11d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9 + PTFF_THREAD_HIJACK
jmp RhpWaitForGC
NESTED_END RhpGcProbeHijack, _TEXT

NESTED_ENTRY RhpWaitForGC, _TEXT
PUSH_PROBE_FRAME rdx, rax, r8
PUSH_PROBE_FRAME r10, rax, r11
END_PROLOGUE

mov rbx, rdx
mov rbx, r10
mov rcx, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame]
call RhpWaitForGC2

Expand Down Expand Up @@ -147,7 +160,7 @@ ifdef FEATURE_GC_STRESS
;;
LEAF_ENTRY RhpGcStressHijack, _TEXT
FixupHijackedCallstack
mov r8d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX
mov r11d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9
jmp RhpGcStressProbe
LEAF_END RhpGcStressHijack, _TEXT

Expand All @@ -157,15 +170,15 @@ LEAF_END RhpGcStressHijack, _TEXT
;; This worker performs the GC Stress work and returns to the original return address.
;;
;; Register state on entry:
;; RDX: thread pointer
;; R8: register bitmask
;; R10: thread pointer
;; R11: register bitmask
;;
;; Register state on exit:
;; Scratch registers, except for RAX/RCX, have been trashed
;; Scratch registers, except for RAX/RCX/RDX/R8/R9, have been trashed
;; All other registers restored as they were when the hijack was first reached.
;;
NESTED_ENTRY RhpGcStressProbe, _TEXT
PUSH_PROBE_FRAME rdx, rax, r8
PUSH_PROBE_FRAME r10, rax, r11
END_PROLOGUE

call RhpStressGc
Expand Down
Loading
Loading