66#include <unixasmmacros. inc >
77
88//
9- // See PUSH_COOP_PINVOKE_FRAME , this macro is very similar , but also saves RAX / RCX / RDX and accepts the register
10- // bitmask in R8
9+ // See PUSH_COOP_PINVOKE_FRAME , this macro is very similar , but also saves volatile argument registers
10+ // and accepts the register bitmask
1111//
1212// On entry:
1313// - BITMASK: bitmask describing pushes , may be volatile register or constant value
1414// - RAX : managed function return value , may be an object or byref
15+ // - RSI , RDI , RCX , RDX , R8 , R9 : may contain objects or byrefs at the hijack point
1516// - preserved regs: need to stay preserved , may contain objects or byrefs
1617//
1718// INVARIANTS
1819// - The macro assumes it is called from a prolog , prior to a frame pointer being setup.
1920// - All preserved registers remain unchanged from their values in managed code.
2021//
2122.macro PUSH_PROBE_FRAME threadReg , trashReg , BITMASK
23+ push_register r9 // save R9 , it might contain an objectref
24+ push_register r8 // save R8 , it might contain an objectref
2225 push_register rdx // save RDX , it might contain an objectref
2326 push_register rcx // save RCX , it might contain an objectref (async continuation)
2427 push_register rax // save RAX , it might contain an objectref
25- lea \trashReg , [ rsp + 0x20 ]
28+ lea \trashReg , [ rsp + 0x30 ]
2629 push_register \trashReg // save caller`s RSP
2730 push_nonvol_reg r15 // save preserved registers
2831 push_nonvol_reg r14 // ..
2932 push_nonvol_reg r13 // ..
3033 push_nonvol_reg r12 // ..
34+ push_register rdi // save RDI , volatile on Unix , might contain an objectref
35+ push_register rsi // save RSI , volatile on Unix , might contain an objectref
3136 push_nonvol_reg rbx // ..
3237 push_register \BITMASK // save the register bitmask passed in by caller
3338 push_register \threadReg // Thread * (unused by stackwalker)
3439 push_nonvol_reg rbp // save caller`s RBP
35- mov \trashReg , [ rsp + 12 * 8 ] // Find the return address
40+ mov \trashReg , [ rsp + 16 * 8 ] // Find the return address
3641 push_register \trashReg // save m_RIP
3742 lea \trashReg , [ rsp + 0 ] // trashReg == address of frame
3843
39- // allocate space for xmm0 , xmm1 and alignment
40- alloc_stack 0x20 + 0
44+ // allocate space for xmm0 .. xmm7 (FP argument registers)
45+ alloc_stack 0x80 + 0
4146
42- // save xmm0 and xmm1 in case they are used as return values
43- movdqa [ rsp + 0x10 ], xmm0
44- movdqa [ rsp + 0 ] , xmm1
47+ // save FP argument registers in case they contain live values at the hijack point
48+ movdqa [ rsp + 0x70 ], xmm0
49+ movdqa [ rsp + 0x60 ], xmm1
50+ movdqa [ rsp + 0x50 ], xmm2
51+ movdqa [ rsp + 0x40 ], xmm3
52+ movdqa [ rsp + 0x30 ], xmm4
53+ movdqa [ rsp + 0x20 ], xmm5
54+ movdqa [ rsp + 0x10 ], xmm6
55+ movdqa [ rsp + 0x00 ], xmm7
4556
4657 // link the frame into the Thread
4758 mov [ \threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame ], \trashReg
5263// registers and return value to their values from before the probe was called (while also updating any
5364// object refs or byrefs).
5465.macro POP_PROBE_FRAME
55- movdqa xmm1 , [ rsp + 0 ]
56- movdqa xmm0 , [ rsp + 0x10 ]
57- add rsp , 0x20 + 8 // skip xmm0 , xmm1 and discard RIP
58- pop rbp
59- pop rax // discard Thread *
60- pop rax // discard BITMASK
61- pop rbx
62- pop r12
63- pop r13
64- pop r14
65- pop r15
66- pop rax // discard caller RSP
67- pop rax
68- pop rcx
69- pop rdx
66+ movdqa xmm7 , [ rsp + 0x00 ]
67+ movdqa xmm6 , [ rsp + 0x10 ]
68+ movdqa xmm5 , [ rsp + 0x20 ]
69+ movdqa xmm4 , [ rsp + 0x30 ]
70+ movdqa xmm3 , [ rsp + 0x40 ]
71+ movdqa xmm2 , [ rsp + 0x50 ]
72+ movdqa xmm1 , [ rsp + 0x60 ]
73+ movdqa xmm0 , [ rsp + 0x70 ]
74+ free_stack 0x80 + 8 // skip xmm0 .. xmm7 and discard RIP
75+ pop_nonvol_reg rbp
76+ pop_register rax // discard Thread *
77+ pop_register rax // discard BITMASK
78+ pop_nonvol_reg rbx
79+ pop_register rsi
80+ pop_register rdi
81+ pop_nonvol_reg r12
82+ pop_nonvol_reg r13
83+ pop_nonvol_reg r14
84+ pop_nonvol_reg r15
85+ pop_register rax // discard caller RSP
86+ pop_register rax
87+ pop_register rcx
88+ pop_register rdx
89+ pop_register r8
90+ pop_register r9
7091.endm
7192
7293//
7899//
79100// Register state on exit:
80101// R11 : thread pointer
81- // RAX , RCX , RDX preserved , other volatile regs trashed
102+ // RAX , RCX , RDX , RSI , RDI , R8 , R9 , xmm0 - xmm7 preserved , R10 trashed
82103//
83104.macro FixupHijackedCallstack
84- // preserve RAX , RDX as they may contain return values
85- push rax
86- push rdx
105+ // preserve volatile argument registers across INLINE_GETTHREAD
106+ push_register rax
107+ push_register rdx
87108
88109 // preserve RCX as it may contain async continuation return value
89- push rcx
90-
91- // align stack
92- sub rsp , 0x8
110+ push_register rcx
111+
112+ // preserve RSI , RDI , R8 and R9 as they may contain GC refs
113+ push_register rsi
114+ push_register rdi
115+ push_register r8
116+ push_register r9
117+
118+ // allocate space for xmm0 .. xmm7 + alignment ( 0x80 for xmm regs + 0x8 for 16 - byte alignment)
119+ alloc_stack 0x88
120+
121+ // save FP argument registers th at would be clobbered by INLINE_GETTHREAD call
122+ movdqa [ rsp + 0x70 ], xmm0
123+ movdqa [ rsp + 0x60 ], xmm1
124+ movdqa [ rsp + 0x50 ], xmm2
125+ movdqa [ rsp + 0x40 ], xmm3
126+ movdqa [ rsp + 0x30 ], xmm4
127+ movdqa [ rsp + 0x20 ], xmm5
128+ movdqa [ rsp + 0x10 ], xmm6
129+ movdqa [ rsp + 0x00 ], xmm7
93130
94131 // rax = GetThread() , makes nested calls
95132 INLINE_GETTHREAD
96133 mov r11 , rax
97134
98- add rsp , 0x8
135+ // restore FP argument registers
136+ movdqa xmm7 , [ rsp + 0x00 ]
137+ movdqa xmm6 , [ rsp + 0x10 ]
138+ movdqa xmm5 , [ rsp + 0x20 ]
139+ movdqa xmm4 , [ rsp + 0x30 ]
140+ movdqa xmm3 , [ rsp + 0x40 ]
141+ movdqa xmm2 , [ rsp + 0x50 ]
142+ movdqa xmm1 , [ rsp + 0x60 ]
143+ movdqa xmm0 , [ rsp + 0x70 ]
144+
145+ free_stack 0x88
146+
147+ pop_register r9
148+ pop_register r8
149+ pop_register rdi
150+ pop_register rsi
99151
100- pop rcx
152+ pop_register rcx
101153
102- pop rdx
103- pop rax
154+ pop_register rdx
155+ pop_register rax
104156
105157 // Fix the stack by pushing the original return address
106- mov r8 , [ r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress ]
107- push r8
158+ mov r10 , [ r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress ]
159+ push r10
108160
109161 // Clear hijack state
110- xor r8 , r8
111- mov [ r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation ], r8
112- mov [ r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress ], r8
162+ xor r10 , r10
163+ mov [ r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation ], r10
164+ mov [ r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress ], r10
113165.endm
114166
115167//
@@ -124,12 +176,12 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
124176 ret
125177
126178LOCAL_LABEL(WaitForGC):
127- mov r8d , DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_THREAD_HIJACK
179+ mov r10d , DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RSI + PTFF_SAVE_RDI + PTFF_SAVE_RAX + PTFF_SAVE_RCX + PTFF_SAVE_RDX + PTFF_SAVE_R8 + PTFF_SAVE_R9 + PTFF_THREAD_HIJACK
128180 jmp C_FUNC(RhpWaitForGC)
129181NESTED_END RhpGcProbeHijack , _TEXT
130182
131183NESTED_ENTRY RhpWaitForGC , _TEXT , NoHandler
132- PUSH_PROBE_FRAME r11 , rax , r8
184+ PUSH_PROBE_FRAME r11 , rax , r10
133185 END_PROLOGUE
134186
135187 mov rbx , r11
0 commit comments