diff options
author | Thilo Schulz <arny@ats.s.bawue.de> | 2011-06-13 09:56:39 +0000 |
---|---|---|
committer | Tim Angus <tim@ngus.net> | 2013-01-09 23:15:55 +0000 |
commit | 6a71409a0622050f9a682d4e3b02419c444febe5 (patch) | |
tree | 7766ff71304d04c6e42de7dd7d48ed7e7e0fac59 /src/asm | |
parent | b15804d39f71e9be202818288726777d1ca8ac09 (diff) |
- Add MASM assembler files for MSVC x64 projects to support vm_x86 in x64 mode - Clean up ftol()/snapvector() mess - Make use of SSE instructions for ftol()/snapvector() if available - move ftol/snapvector pure assembler to inline assembler, this will add x86_64 and improve support for different calling conventions - Set FPU control word at program startup to get consistent behaviour on all platforms
Diffstat (limited to 'src/asm')
-rw-r--r-- | src/asm/ftola.asm | 90 | ||||
-rw-r--r-- | src/asm/ftola.s | 157 | ||||
-rw-r--r-- | src/asm/snapvector.asm | 107 | ||||
-rw-r--r-- | src/asm/snapvectora.s | 103 | ||||
-rw-r--r-- | src/asm/vm_x86_64.asm | 76 |
5 files changed, 273 insertions, 260 deletions
diff --git a/src/asm/ftola.asm b/src/asm/ftola.asm new file mode 100644 index 00000000..370c12d8 --- /dev/null +++ b/src/asm/ftola.asm @@ -0,0 +1,90 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +; +; This file is part of Quake III Arena source code. +; +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +; +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +; =========================================================================== + +; MASM ftol conversion functions using SSE or FPU +; assume __cdecl calling convention is being used for x86, __fastcall for x64 + +IFNDEF idx64 +.model flat, c +ENDIF + +; .data + +; ifndef idx64 +; fpucw WORD 0F7Fh +; endif + +.code + +IFDEF idx64 +; qftol using SSE + + qftolsse PROC + cvttss2si eax, xmm0 + ret + qftolsse ENDP + + qvmftolsse PROC + movss xmm0, dword ptr [rdi + rbx * 4] + cvttss2si eax, xmm0 + ret + qvmftolsse ENDP + +ELSE +; qftol using FPU + + qftolx87m macro src +; not necessary, fpucw is set with _controlfp at startup +; sub esp, 2 +; fnstcw word ptr [esp] +; fldcw fpucw + fld dword ptr src + fistp dword ptr src +; fldcw [esp] + mov eax, src +; add esp, 2 + ret + endm + + qftolx87 PROC +; need this line when storing FPU control word on stack +; qftolx87m [esp + 6] + qftolx87m [esp + 4] + qftolx87 ENDP + + qvmftolx87 PROC + qftolx87m [edi + ebx * 4] + qvmftolx87 ENDP + +; qftol using SSE + qftolsse PROC + movss xmm0, dword ptr [esp + 4] + cvttss2si eax, xmm0 + ret + qftolsse ENDP + + qvmftolsse PROC + movss xmm0, dword ptr [edi + ebx * 4] + cvttss2si eax, xmm0 + ret + qvmftolsse ENDP +ENDIF + +end diff --git a/src/asm/ftola.s b/src/asm/ftola.s deleted file mode 100644 index ca3d8626..00000000 --- a/src/asm/ftola.s +++ /dev/null @@ -1,157 +0,0 @@ -/* -=========================================================================== -Copyright (C) 1999-2005 Id Software, Inc. - -This file is part of Quake III Arena source code. - -Quake III Arena source code is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Quake III Arena source code is distributed in the hope that it will be -useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Quake III Arena source code; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -=========================================================================== -*/ - -// -// qftol -- fast floating point to long conversion. -// - -// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus -// <tim@ngus.net> - -#include "qasm.h" - -.data - -temp: .single 0.0 -fpucw: .long 0 - -// Precision Control Field , 2 bits / 0x0300 -// PC24 0x0000 Single precision (24 bits). -// PC53 0x0200 Double precision (53 bits). -// PC64 0x0300 Extended precision (64 bits). - -// Rounding Control Field, 2 bits / 0x0C00 -// RCN 0x0000 Rounding to nearest (even). -// RCD 0x0400 Rounding down (directed, minus). -// RCU 0x0800 Rounding up (directed plus). -// RC0 0x0C00 Rounding towards zero (chop mode). - - -// rounding towards nearest (even) -cw027F: .long 0x027F -cw037F: .long 0x037F - -// rounding towards zero (chop mode) -cw0E7F: .long 0x0E7F -cw0F7F: .long 0x0F7F - - -.text - -// -// int qftol( void ) - default control word -// - -.globl C(qftol) - -C(qftol): - fistpl temp - movl temp,%eax - ret - - -// -// int qftol027F( void ) - DirectX FPU -// - -.globl C(qftol027F) - -C(qftol027F): - fnstcw fpucw - fldcw cw027F - fistpl temp - fldcw fpucw - movl temp,%eax - ret - -// -// int qftol037F( void ) - Linux FPU -// - -.globl C(qftol037F) - -C(qftol037F): - fnstcw fpucw - fldcw cw037F - fistpl temp - fldcw fpucw - movl temp,%eax - ret - - -// -// int qftol0F7F( void ) - ANSI -// - -.globl C(qftol0F7F) - -C(qftol0F7F): - fnstcw fpucw - fldcw cw0F7F - fistpl temp - fldcw fpucw - movl temp,%eax - ret - -// -// int qftol0E7F( void ) -// - -.globl C(qftol0E7F) - -C(qftol0E7F): - fnstcw fpucw - fldcw cw0E7F - fistpl temp - fldcw fpucw - movl temp,%eax - ret - - - -// -// long Q_ftol( float q ) -// - -.globl C(Q_ftol) - -C(Q_ftol): - flds 4(%esp) - fistpl temp - movl temp,%eax - ret - - -// -// long qftol0F7F( float q ) - Linux FPU -// - -.globl C(Q_ftol0F7F) - -C(Q_ftol0F7F): - fnstcw fpucw - flds 4(%esp) - fldcw cw0F7F - fistpl temp - fldcw fpucw - movl temp,%eax - ret diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm new file mode 100644 index 00000000..87c77372 --- /dev/null +++ b/src/asm/snapvector.asm @@ -0,0 +1,107 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +; +; This file is part of Quake III Arena source code. +; +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +; +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +; =========================================================================== + +; MASM version of snapvector conversion function using SSE or FPU +; assume __cdecl calling convention is being used for x86, __fastcall for x64 +; +; function prototype: +; void qsnapvector(vec3_t vec) + +IFNDEF idx64 +.model flat, c +ENDIF + +.data + + ALIGN 16 + ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h + ssecw DWORD 00001F80h + +IFNDEF idx64 + fpucw WORD 037Fh +ENDIF + +.code + +IFDEF idx64 +; qsnapvector using SSE + + qsnapvectorsse PROC + sub rsp, 4 + stmxcsr [rsp] ; save SSE control word + ldmxcsr ssecw ; set to round nearest + + push rdi + mov rdi, rcx ; maskmovdqu uses rdi as implicit memory operand + movaps xmm1, ssemask ; initialize the mask register for maskmovdqu + movups xmm0, [rdi] ; here is stored our vector. Read 4 values in one go + cvtps2dq xmm0, xmm0 ; convert 4 single fp to int + cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp + maskmovdqu xmm0, xmm1 ; write 3 values back to memory + pop rdi + + ldmxcsr [rsp] ; restore sse control word to old value + add rsp, 4 + ret + qsnapvectorsse ENDP + +ELSE + + qsnapvectorsse PROC + sub esp, 4 + stmxcsr [esp] ; save SSE control word + ldmxcsr ssecw ; set to round nearest + + push edi + mov edi, dword ptr 12[esp] ; maskmovdqu uses edi as implicit memory operand + movaps xmm1, ssemask ; initialize the mask register for maskmovdqu + movups xmm0, [edi] ; here is stored our vector. Read 4 values in one go + cvtps2dq xmm0, xmm0 ; convert 4 single fp to int + cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp + maskmovdqu xmm0, xmm1 ; write 3 values back to memory + pop edi + + ldmxcsr [esp] ; restore sse control word to old value + add esp, 4 + ret + qsnapvectorsse ENDP + + qroundx87 macro src + fld dword ptr src + fistp dword ptr src + fild dword ptr src + fstp dword ptr src + endm + + qsnapvectorx87 PROC + mov eax, dword ptr 4[esp] + sub esp, 2 + fnstcw word ptr [esp] + fldcw fpucw + qroundx87 [eax] + qroundx87 4[eax] + qroundx87 8[eax] + fldcw [esp] + add esp, 2 + qsnapvectorx87 ENDP + +ENDIF + +end diff --git a/src/asm/snapvectora.s b/src/asm/snapvectora.s deleted file mode 100644 index bc10c757..00000000 --- a/src/asm/snapvectora.s +++ /dev/null @@ -1,103 +0,0 @@ -/* -=========================================================================== -Copyright (C) 1999-2005 Id Software, Inc. - -This file is part of Quake III Arena source code. - -Quake III Arena source code is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Quake III Arena source code is distributed in the hope that it will be -useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Quake III Arena source code; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -=========================================================================== -*/ - -// -// Sys_SnapVector NASM code (Andrew Henderson) -// See win32/win_shared.c for the Win32 equivalent -// This code is provided to ensure that the -// rounding behavior (and, if necessary, the -// precision) of DLL and QVM code are identical -// e.g. for network-visible operations. -// See ftol.nasm for operations on a single float, -// as used in compiled VM and DLL code that does -// not use this system trap. -// - -// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus -// <tim@ngus.net> - -#include "qasm.h" - -#if id386 -.data - -fpucw: .long 0 -cw037F: .long 0x037F - -.text - -// void Sys_SnapVector( float *v ) -.globl C(Sys_SnapVector) -C(Sys_SnapVector): - pushl %eax - pushl %ebp - movl %esp,%ebp - - fnstcw fpucw - movl 12(%ebp),%eax - fldcw cw037F - flds (%eax) - fistpl (%eax) - fildl (%eax) - fstps (%eax) - flds 4(%eax) - fistpl 4(%eax) - fildl 4(%eax) - fstps 4(%eax) - flds 8(%eax) - fistpl 8(%eax) - fildl 8(%eax) - fstps 8(%eax) - fldcw fpucw - - popl %ebp - popl %eax - ret - -// void Sys_SnapVectorCW( float *v, unsigned short int cw ) -.globl C(Sys_SnapVectorCW) -C(Sys_SnapVectorCW): - pushl %eax - pushl %ebp - movl %esp,%ebp - - fnstcw fpucw - movl 12(%ebp),%eax - fldcw 16(%ebp) - flds (%eax) - fistpl (%eax) - fildl (%eax) - fstps (%eax) - flds 4(%eax) - fistpl 4(%eax) - fildl 4(%eax) - fstps 4(%eax) - flds 8(%eax) - fistpl 8(%eax) - fildl 8(%eax) - fstps 8(%eax) - fldcw fpucw - - popl %ebp - popl %eax - ret -#endif diff --git a/src/asm/vm_x86_64.asm b/src/asm/vm_x86_64.asm new file mode 100644 index 00000000..030b6987 --- /dev/null +++ b/src/asm/vm_x86_64.asm @@ -0,0 +1,76 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +; +; This file is part of Quake III Arena source code. +; +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +; +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +; =========================================================================== + +; Call wrapper for vm_x86 when built with MSVC in 64 bit mode, +; since MSVC does not support inline x64 assembler code anymore. +; +; assumes __fastcall calling convention + +DoSyscall PROTO + +.code + +; Call to static void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg) + +qsyscall64 PROC + sub rsp, 28h ; after this esp will be aligned to 16 byte boundary + mov qword ptr [rsp + 20h], rcx ; 5th parameter "arg" is passed on stack + mov r9b, bl ; opStackOfs + mov r8, rdi ; opStackBase + mov edx, esi ; programStack + mov ecx, eax ; syscallNum + mov rax, DoSyscall ; store call address of DoSyscall in rax + call rax + add rsp, 28h + ret +qsyscall64 ENDP + + +; Call to compiled code after setting up the register environment for the VM +; prototype: +; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase); + +qvmcall64 PROC + push rsi ; push non-volatile registers to stack + push rdi + push rbx + ; need to save pointer in rcx so we can write back the programData value to caller + push rcx + + ; registers r8 and r9 have correct value already thanx to __fastcall + xor rbx, rbx ; opStackOfs starts out being 0 + mov rdi, rdx ; opStack + mov esi, dword ptr [rcx] ; programStack + + call qword ptr [r8] ; instructionPointers[0] is also the entry point + + pop rcx + + mov dword ptr [rcx], esi ; write back the programStack value + mov al, bl ; return opStack offset + + pop rbx + pop rdi + pop rsi + + ret +qvmcall64 ENDP + +end |