diff options
Diffstat (limited to 'vstdlib/coroutine_win64.masm')
| -rw-r--r-- | vstdlib/coroutine_win64.masm | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/vstdlib/coroutine_win64.masm b/vstdlib/coroutine_win64.masm new file mode 100644 index 0000000..764ebdd --- /dev/null +++ b/vstdlib/coroutine_win64.masm @@ -0,0 +1,175 @@ +option casemap:none + +.CODE + +; import Coroutine_Finish with its mangled Microsoft Visual C++ name +?Coroutine_Finish@@YAXXZ PROTO + +; extern "C" void SaveNonVolatileRegs( uintptr_t regs[8] ); +; incoming parameter is rcs +SaveNonVolatileRegs PROC FRAME + .endprolog + mov qword ptr[rcx], rbx + mov qword ptr[rcx+8], rbp + mov qword ptr[rcx+16], rsi + mov qword ptr[rcx+24], rdi + mov qword ptr[rcx+32], r12 + mov qword ptr[rcx+40], r13 + mov qword ptr[rcx+48], r14 + mov qword ptr[rcx+56], r15 + ret +SaveNonVolatileRegs ENDP + +; extern "C" void NORETURN Coroutine_Launch_ASM( byte **ppStackHigh, uintptr_t **ppLaunchParentFramePtr, void (*pfnExec)( void* ), void *pvParam ) +; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address +Coroutine_Launch_ASM PROC FRAME + ; x64 prolog and prolog description macros: + + ; save caller's nonvolatile registers (pushed in reverse order to match SaveNonVolatileRegs) + ; so that we can slam new values in later to trick the x64 callstack unwind procedure + push r15 + .pushreg r15 + push r14 + .pushreg r14 + push r13 + .pushreg r13 + push r12 + .pushreg r12 + push rdi + .pushreg rdi + push rsi + .pushreg rsi + push rbp + .pushreg rbp + push rbx + .pushreg rbx + + ; stack-allocate Win64 function call shadow space for calls to pfnExec and Coroutine_Finish, + ; plus 8 additional bytes to align the stack frame properly (comes in off by 8) + sub rsp, 28h + .allocstack 28h + + .endprolog + + ; compute top of stack for coroutine: 40 bytes for stack, 64 for saved regs, 8 for return address + ; (we do not bother including the additional unused 32 byte shadow space we own above that) + lea rax, [rsp+70h] + mov qword ptr [rcx], rax + + ; save off the address of our saved regs so that we can memcpy over them later and trick + ; the x64 stack unwind logic into walking up to a different Internal_Coroutine_Continue + lea rax, [rsp+28h] + mov qword ptr [rdx], rax + + ; call pfnExec(pvParam) + mov rcx, r9 + call r8 + + ; call Coroutine_Finish - does not return + call ?Coroutine_Finish@@YAXXZ + +Coroutine_Launch_ASM ENDP + + + + +; Needs to match definition found in setjmp.h +_JUMP_BUFFER STRUCT + m_Frame QWORD ? + m_Rbx QWORD ? + m_Rsp QWORD ? + m_Rbp QWORD ? + m_Rsi QWORD ? + m_Rdi QWORD ? + m_R12 QWORD ? + m_R13 QWORD ? + m_R14 QWORD ? + m_R15 QWORD ? + m_Rip QWORD ? + m_MxCsr DWORD ? + m_FpCsr WORD ? + m_Spare WORD ? + m_Xmm6 XMMWORD ? + m_Xmm7 XMMWORD ? + m_Xmm8 XMMWORD ? + m_Xmm9 XMMWORD ? + m_Xmm10 XMMWORD ? + m_Xmm11 XMMWORD ? + m_Xmm12 XMMWORD ? + m_Xmm13 XMMWORD ? + m_Xmm14 XMMWORD ? + m_Xmm15 XMMWORD ? +_JUMP_BUFFER ENDS + + +;This is the reference asm for __intrinsic_setjmp() in VS2015 +;mov qword ptr [rcx],rdx ; intrinsic call site does "mov rdx,rbp" followed by "add rdx,0FFFFFFFFFFFFFFC0h", looks like a nonstandard abi +;mov qword ptr [rcx+8],rbx +;mov qword ptr [rcx+18h],rbp +;mov qword ptr [rcx+20h],rsi +;mov qword ptr [rcx+28h],rdi +;mov qword ptr [rcx+30h],r12 +;mov qword ptr [rcx+38h],r13 +;mov qword ptr [rcx+40h],r14 +;mov qword ptr [rcx+48h],r15 +;lea r8,[rsp+8] ; rsp set to post-return address +;mov qword ptr [rcx+10h],r8 +;mov r8,qword ptr [rsp] +;mov qword ptr [rcx+50h],r8 +;stmxcsr dword ptr [rcx+58h] +;fnstcw word ptr [rcx+5Ch] +;movdqa xmmword ptr [rcx+60h],xmm6 +;ovdqa xmmword ptr [rcx+70h],xmm7 +;movdqa xmmword ptr [rcx+80h],xmm8 +;movdqa xmmword ptr [rcx+90h],xmm9 +;movdqa xmmword ptr [rcx+0A0h],xmm10 +;movdqa xmmword ptr [rcx+0B0h],xmm11 +;movdqa xmmword ptr [rcx+0C0h],xmm12 +;movdqa xmmword ptr [rcx+0D0h],xmm13 +;movdqa xmmword ptr [rcx+0E0h],xmm14 +;movdqa xmmword ptr [rcx+0F0h],xmm15 +;xor eax,eax +;ret + + +; extern "C" void NORETURN Coroutine_LongJmp_UnChecked( jmp_buf buf, int nResult ) +; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address +Coroutine_LongJmp_Unchecked PROC + ;load nResult into result from initial setjmp() + xor rax, rax + mov eax, edx + + ;restore to setjmp() caller state + mov rdx, [rcx]._JUMP_BUFFER.m_Frame ; appears to be an error checking value of (_JUMP_BUFFER.m_Rbp + 0FFFFFFFFFFFFFFC0h) passed non-standardly through rdx to setjmp() + mov rbx, [rcx]._JUMP_BUFFER.m_Rbx + mov rsp, [rcx]._JUMP_BUFFER.m_Rsp + mov rbp, [rcx]._JUMP_BUFFER.m_Rbp + mov rsi, [rcx]._JUMP_BUFFER.m_Rsi + mov rdi, [rcx]._JUMP_BUFFER.m_Rdi + mov r12, [rcx]._JUMP_BUFFER.m_R12 + mov r13, [rcx]._JUMP_BUFFER.m_R13 + mov r14, [rcx]._JUMP_BUFFER.m_R14 + mov r15, [rcx]._JUMP_BUFFER.m_R15 + mov r10, [rcx]._JUMP_BUFFER.m_Rip ; store return address in r10 for return + ldmxcsr [rcx]._JUMP_BUFFER.m_MxCsr + fldcw [rcx]._JUMP_BUFFER.m_FpCsr + ;[rcx]._JUMP_BUFFER.m_Spare + movaps xmm6, [rcx]._JUMP_BUFFER.m_Xmm6 + movaps xmm7, [rcx]._JUMP_BUFFER.m_Xmm7 + movaps xmm8, [rcx]._JUMP_BUFFER.m_Xmm8 + movaps xmm9, [rcx]._JUMP_BUFFER.m_Xmm9 + movaps xmm10, [rcx]._JUMP_BUFFER.m_Xmm10 + movaps xmm11, [rcx]._JUMP_BUFFER.m_Xmm11 + movaps xmm12, [rcx]._JUMP_BUFFER.m_Xmm12 + movaps xmm13, [rcx]._JUMP_BUFFER.m_Xmm13 + movaps xmm14, [rcx]._JUMP_BUFFER.m_Xmm14 + movaps xmm15, [rcx]._JUMP_BUFFER.m_Xmm15 + + ;jmp instead of ret to _JUMP_BUFFER.m_Rip because setjmp() already set the _JUMP_BUFFER.m_Rsp to the post-return state + db 048h ; emit a REX prefix on the jmp to ensure it's a full qword + jmp qword ptr r10 +Coroutine_LongJmp_Unchecked ENDP + + +_TEXT ENDS +END |