Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/coreclr/jit/redundantbranchopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1762,9 +1762,9 @@ Compiler::JumpThreadCheckResult Compiler::optJumpThreadCheck(BasicBlock* const b
//
if (ssaVarDsc->HasGlobalUse())
{
JITDUMP(FMT_BB " has global phi for V%02u.%u; deferring jump threading pending use analysis\n",
JITDUMP(FMT_BB " has global phi for V%02u.%u; no phi-based threading\n",
Comment thread
MichalStrehovsky marked this conversation as resolved.
Outdated
block->bbNum, lclNum, ssaNum);
hasGlobalPhiUses = true;
return JumpThreadCheckResult::CannotThread;
}
Comment thread
MichalStrehovsky marked this conversation as resolved.
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/targetamd64.h
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
#define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH

#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_RAX | RBM_R10 | RBM_R11)
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS | RBM_FLTARG_REGS))

#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX))
#define RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL (RBM_INT_CALLEE_TRASH_ALL & ~(RBM_R10 | RBM_RCX))
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/targetarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
#define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH

#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_R12 | RBM_R13 | RBM_R14 | RBM_R15)
#define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS | RBM_FLTARG_REGS))
#define RBM_INTERFACELOOKUP_FOR_SLOT_RETURN RBM_R15
#define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15))
#define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ PTFF_SAVE_ALL_PRESERVED equ 000000F7h ;; NOTE: RBP is not included in this set
PTFF_SAVE_RSP equ 00008000h
PTFF_SAVE_RAX equ 00000100h ;; RAX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_RCX equ 00000200h ;; RCX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_RDX equ 00000400h ;; RDX is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_R8 equ 00000800h ;; R8 is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_R9 equ 00001000h ;; R9 is saved in hijack handler - in case it contains a GC ref
PTFF_SAVE_ALL_SCRATCH equ 00007F00h
PTFF_THREAD_HIJACK equ 00100000h ;; indicates that this is a frame for a hijacked call

Expand Down
138 changes: 95 additions & 43 deletions src/coreclr/nativeaot/Runtime/amd64/GcProbe.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,53 @@
#include <unixasmmacros.inc>

//
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX/RCX/RDX and accepts the register
// bitmask in R8
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves volatile argument registers
// and accepts the register bitmask
//
// On entry:
// - BITMASK: bitmask describing pushes, may be volatile register or constant value
// - RAX: managed function return value, may be an object or byref
// - RSI, RDI, RCX, RDX, R8, R9: may contain objects or byrefs at the hijack point
// - preserved regs: need to stay preserved, may contain objects or byrefs
//
// INVARIANTS
// - The macro assumes it is called from a prolog, prior to a frame pointer being setup.
// - All preserved registers remain unchanged from their values in managed code.
//
.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK
push_register r9 // save R9, it might contain an objectref
Comment thread
jkotas marked this conversation as resolved.
push_register r8 // save R8, it might contain an objectref
push_register rdx // save RDX, it might contain an objectref
push_register rcx // save RCX, it might contain an objectref (async continuation)
push_register rax // save RAX, it might contain an objectref
lea \trashReg, [rsp + 0x20]
lea \trashReg, [rsp + 0x30]
push_register \trashReg // save caller`s RSP
push_nonvol_reg r15 // save preserved registers
push_nonvol_reg r14 // ..
push_nonvol_reg r13 // ..
push_nonvol_reg r12 // ..
push_register rdi // save RDI, volatile on Unix, might contain an objectref
push_register rsi // save RSI, volatile on Unix, might contain an objectref
push_nonvol_reg rbx // ..
push_register \BITMASK // save the register bitmask passed in by caller
push_register \threadReg // Thread * (unused by stackwalker)
push_nonvol_reg rbp // save caller`s RBP
mov \trashReg, [rsp + 12*8] // Find the return address
mov \trashReg, [rsp + 16*8] // Find the return address
push_register \trashReg // save m_RIP
lea \trashReg, [rsp + 0] // trashReg == address of frame

// allocate space for xmm0, xmm1 and alignment
alloc_stack 0x20 + 0
// allocate space for xmm0..xmm7 (FP argument registers)
alloc_stack 0x80 + 0

// save xmm0 and xmm1 in case they are used as return values
movdqa [rsp + 0x10], xmm0
movdqa [rsp + 0] , xmm1
// save FP argument registers in case they contain live values at the hijack point
movdqa [rsp + 0x70], xmm0
movdqa [rsp + 0x60], xmm1
movdqa [rsp + 0x50], xmm2
movdqa [rsp + 0x40], xmm3
movdqa [rsp + 0x30], xmm4
movdqa [rsp + 0x20], xmm5
movdqa [rsp + 0x10], xmm6
movdqa [rsp + 0x00], xmm7

// link the frame into the Thread
mov [\threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], \trashReg
Expand All @@ -52,21 +63,31 @@
// registers and return value to their values from before the probe was called (while also updating any
// object refs or byrefs).
.macro POP_PROBE_FRAME
movdqa xmm1, [rsp + 0]
movdqa xmm0, [rsp + 0x10]
add rsp, 0x20 + 8 // skip xmm0, xmm1 and discard RIP
pop rbp
pop rax // discard Thread*
pop rax // discard BITMASK
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rax // discard caller RSP
pop rax
pop rcx
pop rdx
movdqa xmm7, [rsp + 0x00]
movdqa xmm6, [rsp + 0x10]
movdqa xmm5, [rsp + 0x20]
movdqa xmm4, [rsp + 0x30]
movdqa xmm3, [rsp + 0x40]
movdqa xmm2, [rsp + 0x50]
movdqa xmm1, [rsp + 0x60]
movdqa xmm0, [rsp + 0x70]
free_stack 0x80 + 8 // skip xmm0..xmm7 and discard RIP
pop_nonvol_reg rbp
pop_register rax // discard Thread*
pop_register rax // discard BITMASK
pop_nonvol_reg rbx
pop_register rsi
pop_register rdi
pop_nonvol_reg r12
pop_nonvol_reg r13
pop_nonvol_reg r14
pop_nonvol_reg r15
pop_register rax // discard caller RSP
pop_register rax
pop_register rcx
pop_register rdx
pop_register r8
pop_register r9
.endm

//
Expand All @@ -78,38 +99,69 @@
//
// Register state on exit:
// R11: thread pointer
// RAX, RCX, RDX preserved, other volatile regs trashed
// RAX, RCX, RDX, RSI, RDI, R8, R9, xmm0-xmm7 preserved, R10 trashed
//
.macro FixupHijackedCallstack
// preserve RAX, RDX as they may contain return values
push rax
push rdx
// preserve volatile argument registers across INLINE_GETTHREAD
push_register rax
push_register rdx

// preserve RCX as it may contain async continuation return value
push rcx

// align stack
sub rsp, 0x8
push_register rcx

// preserve RSI, RDI, R8 and R9 as they may contain GC refs
push_register rsi
push_register rdi
push_register r8
push_register r9

// allocate space for xmm0..xmm7 + alignment (0x80 for xmm regs + 0x8 for 16-byte alignment)
alloc_stack 0x88

// save FP argument registers that would be clobbered by INLINE_GETTHREAD call
movdqa [rsp + 0x70], xmm0
movdqa [rsp + 0x60], xmm1
movdqa [rsp + 0x50], xmm2
movdqa [rsp + 0x40], xmm3
movdqa [rsp + 0x30], xmm4
movdqa [rsp + 0x20], xmm5
movdqa [rsp + 0x10], xmm6
movdqa [rsp + 0x00], xmm7

// rax = GetThread(), makes nested calls
INLINE_GETTHREAD
mov r11, rax

add rsp, 0x8
// restore FP argument registers
movdqa xmm7, [rsp + 0x00]
movdqa xmm6, [rsp + 0x10]
movdqa xmm5, [rsp + 0x20]
movdqa xmm4, [rsp + 0x30]
movdqa xmm3, [rsp + 0x40]
movdqa xmm2, [rsp + 0x50]
movdqa xmm1, [rsp + 0x60]
movdqa xmm0, [rsp + 0x70]

free_stack 0x88

pop_register r9
pop_register r8
pop_register rdi
pop_register rsi

pop rcx
pop_register rcx

pop rdx
pop rax
pop_register rdx
pop_register rax

// Fix the stack by pushing the original return address
mov r8, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r8
mov r10, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress]
push r10

// Clear hijack state
xor r8, r8
mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r8
mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r8
xor r10, r10
mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r10
mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r10
.endm

//
Expand All @@ -124,12 +176,12 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
ret

LOCAL_LABEL(WaitForGC):
mov r8d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_THREAD_HIJACK
mov r10d, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RSI + PTFF_SAVE_RDI + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9 + PTFF_THREAD_HIJACK
jmp C_FUNC(RhpWaitForGC)
NESTED_END RhpGcProbeHijack, _TEXT

NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
PUSH_PROBE_FRAME r11, rax, r8
PUSH_PROBE_FRAME r11, rax, r10
END_PROLOGUE

mov rbx, r11
Expand Down
Loading
Loading