summaryrefslogtreecommitdiff
path: root/vstdlib/coroutine_win64.masm
diff options
context:
space:
mode:
Diffstat (limited to 'vstdlib/coroutine_win64.masm')
-rw-r--r--vstdlib/coroutine_win64.masm175
1 files changed, 175 insertions, 0 deletions
diff --git a/vstdlib/coroutine_win64.masm b/vstdlib/coroutine_win64.masm
new file mode 100644
index 0000000..764ebdd
--- /dev/null
+++ b/vstdlib/coroutine_win64.masm
@@ -0,0 +1,175 @@
+option casemap:none
+
+.CODE
+
+; import Coroutine_Finish with its mangled Microsoft Visual C++ name
+?Coroutine_Finish@@YAXXZ PROTO
+
+; extern "C" void SaveNonVolatileRegs( uintptr_t regs[8] );
+; incoming parameter is rcs
+SaveNonVolatileRegs PROC FRAME
+ .endprolog
+ mov qword ptr[rcx], rbx
+ mov qword ptr[rcx+8], rbp
+ mov qword ptr[rcx+16], rsi
+ mov qword ptr[rcx+24], rdi
+ mov qword ptr[rcx+32], r12
+ mov qword ptr[rcx+40], r13
+ mov qword ptr[rcx+48], r14
+ mov qword ptr[rcx+56], r15
+ ret
+SaveNonVolatileRegs ENDP
+
+; extern "C" void NORETURN Coroutine_Launch_ASM( byte **ppStackHigh, uintptr_t **ppLaunchParentFramePtr, void (*pfnExec)( void* ), void *pvParam )
+; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address
+Coroutine_Launch_ASM PROC FRAME
+ ; x64 prolog and prolog description macros:
+
+ ; save caller's nonvolatile registers (pushed in reverse order to match SaveNonVolatileRegs)
+ ; so that we can slam new values in later to trick the x64 callstack unwind procedure
+ push r15
+ .pushreg r15
+ push r14
+ .pushreg r14
+ push r13
+ .pushreg r13
+ push r12
+ .pushreg r12
+ push rdi
+ .pushreg rdi
+ push rsi
+ .pushreg rsi
+ push rbp
+ .pushreg rbp
+ push rbx
+ .pushreg rbx
+
+ ; stack-allocate Win64 function call shadow space for calls to pfnExec and Coroutine_Finish,
+ ; plus 8 additional bytes to align the stack frame properly (comes in off by 8)
+ sub rsp, 28h
+ .allocstack 28h
+
+ .endprolog
+
+ ; compute top of stack for coroutine: 40 bytes for stack, 64 for saved regs, 8 for return address
+ ; (we do not bother including the additional unused 32 byte shadow space we own above that)
+ lea rax, [rsp+70h]
+ mov qword ptr [rcx], rax
+
+ ; save off the address of our saved regs so that we can memcpy over them later and trick
+ ; the x64 stack unwind logic into walking up to a different Internal_Coroutine_Continue
+ lea rax, [rsp+28h]
+ mov qword ptr [rdx], rax
+
+ ; call pfnExec(pvParam)
+ mov rcx, r9
+ call r8
+
+ ; call Coroutine_Finish - does not return
+ call ?Coroutine_Finish@@YAXXZ
+
+Coroutine_Launch_ASM ENDP
+
+
+
+
+; Needs to match definition found in setjmp.h
+_JUMP_BUFFER STRUCT
+ m_Frame QWORD ?
+ m_Rbx QWORD ?
+ m_Rsp QWORD ?
+ m_Rbp QWORD ?
+ m_Rsi QWORD ?
+ m_Rdi QWORD ?
+ m_R12 QWORD ?
+ m_R13 QWORD ?
+ m_R14 QWORD ?
+ m_R15 QWORD ?
+ m_Rip QWORD ?
+ m_MxCsr DWORD ?
+ m_FpCsr WORD ?
+ m_Spare WORD ?
+ m_Xmm6 XMMWORD ?
+ m_Xmm7 XMMWORD ?
+ m_Xmm8 XMMWORD ?
+ m_Xmm9 XMMWORD ?
+ m_Xmm10 XMMWORD ?
+ m_Xmm11 XMMWORD ?
+ m_Xmm12 XMMWORD ?
+ m_Xmm13 XMMWORD ?
+ m_Xmm14 XMMWORD ?
+ m_Xmm15 XMMWORD ?
+_JUMP_BUFFER ENDS
+
+
+;This is the reference asm for __intrinsic_setjmp() in VS2015
+;mov qword ptr [rcx],rdx ; intrinsic call site does "mov rdx,rbp" followed by "add rdx,0FFFFFFFFFFFFFFC0h", looks like a nonstandard abi
+;mov qword ptr [rcx+8],rbx
+;mov qword ptr [rcx+18h],rbp
+;mov qword ptr [rcx+20h],rsi
+;mov qword ptr [rcx+28h],rdi
+;mov qword ptr [rcx+30h],r12
+;mov qword ptr [rcx+38h],r13
+;mov qword ptr [rcx+40h],r14
+;mov qword ptr [rcx+48h],r15
+;lea r8,[rsp+8] ; rsp set to post-return address
+;mov qword ptr [rcx+10h],r8
+;mov r8,qword ptr [rsp]
+;mov qword ptr [rcx+50h],r8
+;stmxcsr dword ptr [rcx+58h]
+;fnstcw word ptr [rcx+5Ch]
+;movdqa xmmword ptr [rcx+60h],xmm6
+;ovdqa xmmword ptr [rcx+70h],xmm7
+;movdqa xmmword ptr [rcx+80h],xmm8
+;movdqa xmmword ptr [rcx+90h],xmm9
+;movdqa xmmword ptr [rcx+0A0h],xmm10
+;movdqa xmmword ptr [rcx+0B0h],xmm11
+;movdqa xmmword ptr [rcx+0C0h],xmm12
+;movdqa xmmword ptr [rcx+0D0h],xmm13
+;movdqa xmmword ptr [rcx+0E0h],xmm14
+;movdqa xmmword ptr [rcx+0F0h],xmm15
+;xor eax,eax
+;ret
+
+
+; extern "C" void NORETURN Coroutine_LongJmp_UnChecked( jmp_buf buf, int nResult )
+; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address
+Coroutine_LongJmp_Unchecked PROC
+ ;load nResult into result from initial setjmp()
+ xor rax, rax
+ mov eax, edx
+
+ ;restore to setjmp() caller state
+ mov rdx, [rcx]._JUMP_BUFFER.m_Frame ; appears to be an error checking value of (_JUMP_BUFFER.m_Rbp + 0FFFFFFFFFFFFFFC0h) passed non-standardly through rdx to setjmp()
+ mov rbx, [rcx]._JUMP_BUFFER.m_Rbx
+ mov rsp, [rcx]._JUMP_BUFFER.m_Rsp
+ mov rbp, [rcx]._JUMP_BUFFER.m_Rbp
+ mov rsi, [rcx]._JUMP_BUFFER.m_Rsi
+ mov rdi, [rcx]._JUMP_BUFFER.m_Rdi
+ mov r12, [rcx]._JUMP_BUFFER.m_R12
+ mov r13, [rcx]._JUMP_BUFFER.m_R13
+ mov r14, [rcx]._JUMP_BUFFER.m_R14
+ mov r15, [rcx]._JUMP_BUFFER.m_R15
+ mov r10, [rcx]._JUMP_BUFFER.m_Rip ; store return address in r10 for return
+ ldmxcsr [rcx]._JUMP_BUFFER.m_MxCsr
+ fldcw [rcx]._JUMP_BUFFER.m_FpCsr
+ ;[rcx]._JUMP_BUFFER.m_Spare
+ movaps xmm6, [rcx]._JUMP_BUFFER.m_Xmm6
+ movaps xmm7, [rcx]._JUMP_BUFFER.m_Xmm7
+ movaps xmm8, [rcx]._JUMP_BUFFER.m_Xmm8
+ movaps xmm9, [rcx]._JUMP_BUFFER.m_Xmm9
+ movaps xmm10, [rcx]._JUMP_BUFFER.m_Xmm10
+ movaps xmm11, [rcx]._JUMP_BUFFER.m_Xmm11
+ movaps xmm12, [rcx]._JUMP_BUFFER.m_Xmm12
+ movaps xmm13, [rcx]._JUMP_BUFFER.m_Xmm13
+ movaps xmm14, [rcx]._JUMP_BUFFER.m_Xmm14
+ movaps xmm15, [rcx]._JUMP_BUFFER.m_Xmm15
+
+ ;jmp instead of ret to _JUMP_BUFFER.m_Rip because setjmp() already set the _JUMP_BUFFER.m_Rsp to the post-return state
+ db 048h ; emit a REX prefix on the jmp to ensure it's a full qword
+ jmp qword ptr r10
+Coroutine_LongJmp_Unchecked ENDP
+
+
+_TEXT ENDS
+END