summaryrefslogtreecommitdiff
path: root/src/asm
diff options
context:
space:
mode:
authorThilo Schulz <arny@ats.s.bawue.de>2011-06-13 09:56:39 +0000
committerTim Angus <tim@ngus.net>2013-01-09 23:15:55 +0000
commit6a71409a0622050f9a682d4e3b02419c444febe5 (patch)
tree7766ff71304d04c6e42de7dd7d48ed7e7e0fac59 /src/asm
parentb15804d39f71e9be202818288726777d1ca8ac09 (diff)
- Add MASM assembler files for MSVC x64 projects to support vm_x86 in x64 mode - Clean up ftol()/snapvector() mess - Make use of SSE instructions for ftol()/snapvector() if available - move ftol/snapvector pure assembler to inline assembler, this will add x86_64 and improve support for different calling conventions - Set FPU control word at program startup to get consistent behaviour on all platforms
Diffstat (limited to 'src/asm')
-rw-r--r--src/asm/ftola.asm90
-rw-r--r--src/asm/ftola.s157
-rw-r--r--src/asm/snapvector.asm107
-rw-r--r--src/asm/snapvectora.s103
-rw-r--r--src/asm/vm_x86_64.asm76
5 files changed, 273 insertions, 260 deletions
diff --git a/src/asm/ftola.asm b/src/asm/ftola.asm
new file mode 100644
index 00000000..370c12d8
--- /dev/null
+++ b/src/asm/ftola.asm
@@ -0,0 +1,90 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+;
+; This file is part of Quake III Arena source code.
+;
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+;
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+; ===========================================================================
+
+; MASM ftol conversion functions using SSE or FPU
+; assume __cdecl calling convention is being used for x86, __fastcall for x64
+
+IFNDEF idx64
+.model flat, c
+ENDIF
+
+; .data
+
+; ifndef idx64
+; fpucw WORD 0F7Fh
+; endif
+
+.code
+
+IFDEF idx64
+; qftol using SSE
+
+ qftolsse PROC
+ cvttss2si eax, xmm0
+ ret
+ qftolsse ENDP
+
+ qvmftolsse PROC
+ movss xmm0, dword ptr [rdi + rbx * 4]
+ cvttss2si eax, xmm0
+ ret
+ qvmftolsse ENDP
+
+ELSE
+; qftol using FPU
+
+ qftolx87m macro src
+; not necessary, fpucw is set with _controlfp at startup
+; sub esp, 2
+; fnstcw word ptr [esp]
+; fldcw fpucw
+ fld dword ptr src
+ fistp dword ptr src
+; fldcw [esp]
+ mov eax, src
+; add esp, 2
+ ret
+ endm
+
+ qftolx87 PROC
+; need this line when storing FPU control word on stack
+; qftolx87m [esp + 6]
+ qftolx87m [esp + 4]
+ qftolx87 ENDP
+
+ qvmftolx87 PROC
+ qftolx87m [edi + ebx * 4]
+ qvmftolx87 ENDP
+
+; qftol using SSE
+ qftolsse PROC
+ movss xmm0, dword ptr [esp + 4]
+ cvttss2si eax, xmm0
+ ret
+ qftolsse ENDP
+
+ qvmftolsse PROC
+ movss xmm0, dword ptr [edi + ebx * 4]
+ cvttss2si eax, xmm0
+ ret
+ qvmftolsse ENDP
+ENDIF
+
+end
diff --git a/src/asm/ftola.s b/src/asm/ftola.s
deleted file mode 100644
index ca3d8626..00000000
--- a/src/asm/ftola.s
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
-===========================================================================
-Copyright (C) 1999-2005 Id Software, Inc.
-
-This file is part of Quake III Arena source code.
-
-Quake III Arena source code is free software; you can redistribute it
-and/or modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the License,
-or (at your option) any later version.
-
-Quake III Arena source code is distributed in the hope that it will be
-useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with Quake III Arena source code; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-===========================================================================
-*/
-
-//
-// qftol -- fast floating point to long conversion.
-//
-
-// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus
-// <tim@ngus.net>
-
-#include "qasm.h"
-
-.data
-
-temp: .single 0.0
-fpucw: .long 0
-
-// Precision Control Field , 2 bits / 0x0300
-// PC24 0x0000 Single precision (24 bits).
-// PC53 0x0200 Double precision (53 bits).
-// PC64 0x0300 Extended precision (64 bits).
-
-// Rounding Control Field, 2 bits / 0x0C00
-// RCN 0x0000 Rounding to nearest (even).
-// RCD 0x0400 Rounding down (directed, minus).
-// RCU 0x0800 Rounding up (directed plus).
-// RC0 0x0C00 Rounding towards zero (chop mode).
-
-
-// rounding towards nearest (even)
-cw027F: .long 0x027F
-cw037F: .long 0x037F
-
-// rounding towards zero (chop mode)
-cw0E7F: .long 0x0E7F
-cw0F7F: .long 0x0F7F
-
-
-.text
-
-//
-// int qftol( void ) - default control word
-//
-
-.globl C(qftol)
-
-C(qftol):
- fistpl temp
- movl temp,%eax
- ret
-
-
-//
-// int qftol027F( void ) - DirectX FPU
-//
-
-.globl C(qftol027F)
-
-C(qftol027F):
- fnstcw fpucw
- fldcw cw027F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
-
-//
-// int qftol037F( void ) - Linux FPU
-//
-
-.globl C(qftol037F)
-
-C(qftol037F):
- fnstcw fpucw
- fldcw cw037F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
-
-
-//
-// int qftol0F7F( void ) - ANSI
-//
-
-.globl C(qftol0F7F)
-
-C(qftol0F7F):
- fnstcw fpucw
- fldcw cw0F7F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
-
-//
-// int qftol0E7F( void )
-//
-
-.globl C(qftol0E7F)
-
-C(qftol0E7F):
- fnstcw fpucw
- fldcw cw0E7F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
-
-
-
-//
-// long Q_ftol( float q )
-//
-
-.globl C(Q_ftol)
-
-C(Q_ftol):
- flds 4(%esp)
- fistpl temp
- movl temp,%eax
- ret
-
-
-//
-// long qftol0F7F( float q ) - Linux FPU
-//
-
-.globl C(Q_ftol0F7F)
-
-C(Q_ftol0F7F):
- fnstcw fpucw
- flds 4(%esp)
- fldcw cw0F7F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm
new file mode 100644
index 00000000..87c77372
--- /dev/null
+++ b/src/asm/snapvector.asm
@@ -0,0 +1,107 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+;
+; This file is part of Quake III Arena source code.
+;
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+;
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+; ===========================================================================
+
+; MASM version of snapvector conversion function using SSE or FPU
+; assume __cdecl calling convention is being used for x86, __fastcall for x64
+;
+; function prototype:
+; void qsnapvector(vec3_t vec)
+
+IFNDEF idx64
+.model flat, c
+ENDIF
+
+.data
+
+ ALIGN 16
+ ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h
+ ssecw DWORD 00001F80h
+
+IFNDEF idx64
+ fpucw WORD 037Fh
+ENDIF
+
+.code
+
+IFDEF idx64
+; qsnapvector using SSE
+
+ qsnapvectorsse PROC
+ sub rsp, 4
+ stmxcsr [rsp] ; save SSE control word
+ ldmxcsr ssecw ; set to round nearest
+
+ push rdi
+ mov rdi, rcx ; maskmovdqu uses rdi as implicit memory operand
+ movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
+ movups xmm0, [rdi] ; here is stored our vector. Read 4 values in one go
+ cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
+ cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
+ maskmovdqu xmm0, xmm1 ; write 3 values back to memory
+ pop rdi
+
+ ldmxcsr [rsp] ; restore sse control word to old value
+ add rsp, 4
+ ret
+ qsnapvectorsse ENDP
+
+ELSE
+
+ qsnapvectorsse PROC
+ sub esp, 4
+ stmxcsr [esp] ; save SSE control word
+ ldmxcsr ssecw ; set to round nearest
+
+ push edi
+ mov edi, dword ptr 12[esp] ; maskmovdqu uses edi as implicit memory operand
+ movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
+ movups xmm0, [edi] ; here is stored our vector. Read 4 values in one go
+ cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
+ cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
+ maskmovdqu xmm0, xmm1 ; write 3 values back to memory
+ pop edi
+
+ ldmxcsr [esp] ; restore sse control word to old value
+ add esp, 4
+ ret
+ qsnapvectorsse ENDP
+
+ qroundx87 macro src
+ fld dword ptr src
+ fistp dword ptr src
+ fild dword ptr src
+ fstp dword ptr src
+ endm
+
+ qsnapvectorx87 PROC
+ mov eax, dword ptr 4[esp]
+ sub esp, 2
+ fnstcw word ptr [esp]
+ fldcw fpucw
+ qroundx87 [eax]
+ qroundx87 4[eax]
+ qroundx87 8[eax]
+ fldcw [esp]
+ add esp, 2
+ qsnapvectorx87 ENDP
+
+ENDIF
+
+end
diff --git a/src/asm/snapvectora.s b/src/asm/snapvectora.s
deleted file mode 100644
index bc10c757..00000000
--- a/src/asm/snapvectora.s
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
-===========================================================================
-Copyright (C) 1999-2005 Id Software, Inc.
-
-This file is part of Quake III Arena source code.
-
-Quake III Arena source code is free software; you can redistribute it
-and/or modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the License,
-or (at your option) any later version.
-
-Quake III Arena source code is distributed in the hope that it will be
-useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with Quake III Arena source code; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-===========================================================================
-*/
-
-//
-// Sys_SnapVector NASM code (Andrew Henderson)
-// See win32/win_shared.c for the Win32 equivalent
-// This code is provided to ensure that the
-// rounding behavior (and, if necessary, the
-// precision) of DLL and QVM code are identical
-// e.g. for network-visible operations.
-// See ftol.nasm for operations on a single float,
-// as used in compiled VM and DLL code that does
-// not use this system trap.
-//
-
-// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus
-// <tim@ngus.net>
-
-#include "qasm.h"
-
-#if id386
-.data
-
-fpucw: .long 0
-cw037F: .long 0x037F
-
-.text
-
-// void Sys_SnapVector( float *v )
-.globl C(Sys_SnapVector)
-C(Sys_SnapVector):
- pushl %eax
- pushl %ebp
- movl %esp,%ebp
-
- fnstcw fpucw
- movl 12(%ebp),%eax
- fldcw cw037F
- flds (%eax)
- fistpl (%eax)
- fildl (%eax)
- fstps (%eax)
- flds 4(%eax)
- fistpl 4(%eax)
- fildl 4(%eax)
- fstps 4(%eax)
- flds 8(%eax)
- fistpl 8(%eax)
- fildl 8(%eax)
- fstps 8(%eax)
- fldcw fpucw
-
- popl %ebp
- popl %eax
- ret
-
-// void Sys_SnapVectorCW( float *v, unsigned short int cw )
-.globl C(Sys_SnapVectorCW)
-C(Sys_SnapVectorCW):
- pushl %eax
- pushl %ebp
- movl %esp,%ebp
-
- fnstcw fpucw
- movl 12(%ebp),%eax
- fldcw 16(%ebp)
- flds (%eax)
- fistpl (%eax)
- fildl (%eax)
- fstps (%eax)
- flds 4(%eax)
- fistpl 4(%eax)
- fildl 4(%eax)
- fstps 4(%eax)
- flds 8(%eax)
- fistpl 8(%eax)
- fildl 8(%eax)
- fstps 8(%eax)
- fldcw fpucw
-
- popl %ebp
- popl %eax
- ret
-#endif
diff --git a/src/asm/vm_x86_64.asm b/src/asm/vm_x86_64.asm
new file mode 100644
index 00000000..030b6987
--- /dev/null
+++ b/src/asm/vm_x86_64.asm
@@ -0,0 +1,76 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+;
+; This file is part of Quake III Arena source code.
+;
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+;
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+; ===========================================================================
+
+; Call wrapper for vm_x86 when built with MSVC in 64 bit mode,
+; since MSVC does not support inline x64 assembler code anymore.
+;
+; assumes __fastcall calling convention
+
+DoSyscall PROTO
+
+.code
+
+; Call to static void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg)
+
+qsyscall64 PROC
+ sub rsp, 28h ; after this esp will be aligned to 16 byte boundary
+ mov qword ptr [rsp + 20h], rcx ; 5th parameter "arg" is passed on stack
+ mov r9b, bl ; opStackOfs
+ mov r8, rdi ; opStackBase
+ mov edx, esi ; programStack
+ mov ecx, eax ; syscallNum
+ mov rax, DoSyscall ; store call address of DoSyscall in rax
+ call rax
+ add rsp, 28h
+ ret
+qsyscall64 ENDP
+
+
+; Call to compiled code after setting up the register environment for the VM
+; prototype:
+; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
+
+qvmcall64 PROC
+ push rsi ; push non-volatile registers to stack
+ push rdi
+ push rbx
+ ; need to save pointer in rcx so we can write back the programData value to caller
+ push rcx
+
+ ; registers r8 and r9 have correct value already thanx to __fastcall
+ xor rbx, rbx ; opStackOfs starts out being 0
+ mov rdi, rdx ; opStack
+ mov esi, dword ptr [rcx] ; programStack
+
+ call qword ptr [r8] ; instructionPointers[0] is also the entry point
+
+ pop rcx
+
+ mov dword ptr [rcx], esi ; write back the programStack value
+ mov al, bl ; return opStack offset
+
+ pop rbx
+ pop rdi
+ pop rsi
+
+ ret
+qvmcall64 ENDP
+
+end