diff options
author | Thilo Schulz <arny@ats.s.bawue.de> | 2011-06-13 09:56:39 +0000 |
---|---|---|
committer | Tim Angus <tim@ngus.net> | 2013-01-09 23:15:55 +0000 |
commit | 6a71409a0622050f9a682d4e3b02419c444febe5 (patch) | |
tree | 7766ff71304d04c6e42de7dd7d48ed7e7e0fac59 | |
parent | b15804d39f71e9be202818288726777d1ca8ac09 (diff) |
- Add MASM assembler files for MSVC x64 projects to support vm_x86 in x64 mode - Clean up ftol()/snapvector() mess - Make use of SSE instructions for ftol()/snapvector() if available - move ftol/snapvector pure assembler to inline assembler, this will add x86_64 and improve support for different calling conventions - Set FPU control word at program startup to get consistent behaviour on all platforms
-rw-r--r-- | Makefile | 70 | ||||
-rw-r--r-- | src/asm/ftola.asm | 90 | ||||
-rw-r--r-- | src/asm/ftola.s | 157 | ||||
-rw-r--r-- | src/asm/snapvector.asm | 107 | ||||
-rw-r--r-- | src/asm/snapvectora.s | 103 | ||||
-rw-r--r-- | src/asm/vm_x86_64.asm | 76 | ||||
-rw-r--r-- | src/client/cl_cgame.c | 2 | ||||
-rw-r--r-- | src/client/snd_wavelet.c | 2 | ||||
-rw-r--r-- | src/qcommon/common.c | 57 | ||||
-rw-r--r-- | src/qcommon/q_platform.h | 2 | ||||
-rw-r--r-- | src/qcommon/q_shared.h | 53 | ||||
-rw-r--r-- | src/qcommon/vm_x86.c | 99 | ||||
-rw-r--r-- | src/renderer/tr_light.c | 6 | ||||
-rw-r--r-- | src/renderer/tr_local.h | 8 | ||||
-rw-r--r-- | src/renderer/tr_mesh.c | 2 | ||||
-rw-r--r-- | src/renderer/tr_shade.c | 8 | ||||
-rw-r--r-- | src/renderer/tr_shade_calc.c | 25 | ||||
-rw-r--r-- | src/renderer/tr_sky.c | 8 | ||||
-rw-r--r-- | src/server/sv_game.c | 2 | ||||
-rw-r--r-- | src/sys/sys_main.c | 4 | ||||
-rw-r--r-- | src/sys/sys_unix.c | 32 | ||||
-rw-r--r-- | src/sys/sys_win32.c | 70 |
22 files changed, 532 insertions, 451 deletions
@@ -1404,8 +1404,24 @@ ifeq ($(ARCH),x86) Q3OBJ += \ $(B)/client/snd_mixa.o \ $(B)/client/matha.o \ - $(B)/client/snapvectora.o + $(B)/client/snapvector.o \ + $(B)/client/ftola.o endif +ifeq ($(ARCH),x86_64) + Q3OBJ += \ + $(B)/client/snapvector.o \ + $(B)/client/ftola.o +endif +ifeq ($(ARCH),amd64) + Q3OBJ += \ + $(B)/client/snapvector.o \ + $(B)/client/ftola.o +endif +ifeq ($(ARCH),x64) + Q3OBJ += \ + $(B)/client/snapvector.o \ + $(B)/client/ftola.o + endif ifeq ($(USE_VOIP),1) ifeq ($(USE_INTERNAL_SPEEX),1) @@ -1466,8 +1482,7 @@ endif ifeq ($(HAVE_VM_COMPILED),true) ifeq ($(ARCH),x86) Q3OBJ += \ - $(B)/client/vm_x86.o \ - $(B)/client/ftola.o + $(B)/client/vm_x86.o endif ifeq ($(ARCH),x86_64) ifeq ($(USE_OLD_VM64),1) @@ -1476,8 +1491,7 @@ ifeq ($(HAVE_VM_COMPILED),true) $(B)/client/vm_x86_64_assembler.o else Q3OBJ += \ - $(B)/client/vm_x86.o \ - $(B)/client/ftola.o + $(B)/client/vm_x86.o endif endif ifeq ($(ARCH),amd64) @@ -1487,8 +1501,7 @@ ifeq ($(HAVE_VM_COMPILED),true) $(B)/client/vm_x86_64_assembler.o else Q3OBJ += \ - $(B)/client/vm_x86.o \ - $(B)/client/ftola.o + $(B)/client/vm_x86.o endif endif ifeq ($(ARCH),x64) @@ -1498,8 +1511,7 @@ ifeq ($(HAVE_VM_COMPILED),true) $(B)/client/vm_x86_64_assembler.o else Q3OBJ += \ - $(B)/client/vm_x86.o \ - $(B)/client/ftola.o + $(B)/client/vm_x86.o endif endif ifeq ($(ARCH),ppc) @@ -1607,8 +1619,24 @@ Q3DOBJ = \ ifeq ($(ARCH),x86) Q3DOBJ += \ - $(B)/ded/snapvectora.o \ - $(B)/ded/matha.o + $(B)/ded/matha.o \ + $(B)/ded/snapvector.o \ + $(B)/ded/ftola.o +endif +ifeq ($(ARCH),x86_64) + Q3DOBJ += \ + $(B)/ded/snapvector.o \ + $(B)/ded/ftola.o +endif +ifeq ($(ARCH),amd64) + Q3DOBJ += \ + $(B)/ded/snapvector.o \ + $(B)/ded/ftola.o +endif +ifeq ($(ARCH),x64) + Q3DOBJ += \ + $(B)/ded/snapvector.o \ + $(B)/ded/ftola.o endif ifeq ($(USE_INTERNAL_ZLIB),1) @@ -1624,8 +1652,7 @@ endif ifeq ($(HAVE_VM_COMPILED),true) ifeq ($(ARCH),x86) Q3DOBJ += \ - $(B)/ded/vm_x86.o \ - $(B)/ded/ftola.o + $(B)/ded/vm_x86.o endif ifeq ($(ARCH),x86_64) ifeq ($(USE_OLD_VM64),1) @@ -1634,8 +1661,7 @@ ifeq ($(HAVE_VM_COMPILED),true) $(B)/ded/vm_x86_64_assembler.o else Q3DOBJ += \ - $(B)/ded/vm_x86.o \ - $(B)/ded/ftola.o + $(B)/ded/vm_x86.o endif endif ifeq ($(ARCH),amd64) @@ -1645,8 +1671,7 @@ ifeq ($(HAVE_VM_COMPILED),true) $(B)/ded/vm_x86_64_assembler.o else Q3DOBJ += \ - $(B)/ded/vm_x86.o \ - $(B)/ded/ftola.o + $(B)/ded/vm_x86.o endif endif ifeq ($(ARCH),x64) @@ -1656,8 +1681,7 @@ ifeq ($(HAVE_VM_COMPILED),true) $(B)/ded/vm_x86_64_assembler.o else Q3DOBJ += \ - $(B)/ded/vm_x86.o \ - $(B)/ded/ftola.o + $(B)/ded/vm_x86.o endif endif ifeq ($(ARCH),ppc) @@ -1828,6 +1852,10 @@ $(B)/base/vm/ui.qvm: $(UIVMOBJ) $(UIDIR)/ui_syscalls.asm $(Q3ASM) $(B)/client/%.o: $(ASMDIR)/%.s $(DO_AS) +# k8 so inline assembler knows about SSE +$(B)/client/%.o: $(ASMDIR)/%.c + $(DO_CC) -march=k8 + $(B)/client/%.o: $(CDIR)/%.c $(DO_CC) @@ -1868,6 +1896,10 @@ $(B)/client/%.o: $(SYSDIR)/%.rc $(B)/ded/%.o: $(ASMDIR)/%.s $(DO_AS) +# k8 so inline assembler knows about SSE +$(B)/ded/%.o: $(ASMDIR)/%.c + $(DO_CC) -march=k8 + $(B)/ded/%.o: $(SDIR)/%.c $(DO_DED_CC) diff --git a/src/asm/ftola.asm b/src/asm/ftola.asm new file mode 100644 index 00000000..370c12d8 --- /dev/null +++ b/src/asm/ftola.asm @@ -0,0 +1,90 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +; +; This file is part of Quake III Arena source code. +; +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +; +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +; =========================================================================== + +; MASM ftol conversion functions using SSE or FPU +; assume __cdecl calling convention is being used for x86, __fastcall for x64 + +IFNDEF idx64 +.model flat, c +ENDIF + +; .data + +; ifndef idx64 +; fpucw WORD 0F7Fh +; endif + +.code + +IFDEF idx64 +; qftol using SSE + + qftolsse PROC + cvttss2si eax, xmm0 + ret + qftolsse ENDP + + qvmftolsse PROC + movss xmm0, dword ptr [rdi + rbx * 4] + cvttss2si eax, xmm0 + ret + qvmftolsse ENDP + +ELSE +; qftol using FPU + + qftolx87m macro src +; not necessary, fpucw is set with _controlfp at startup +; sub esp, 2 +; fnstcw word ptr [esp] +; fldcw fpucw + fld dword ptr src + fistp dword ptr src +; fldcw [esp] + mov eax, src +; add esp, 2 + ret + endm + + qftolx87 PROC +; need this line when storing FPU control word on stack +; qftolx87m [esp + 6] + qftolx87m [esp + 4] + qftolx87 ENDP + + qvmftolx87 PROC + qftolx87m [edi + ebx * 4] + qvmftolx87 ENDP + +; qftol using SSE + qftolsse PROC + movss xmm0, dword ptr [esp + 4] + cvttss2si eax, xmm0 + ret + qftolsse ENDP + + qvmftolsse PROC + movss xmm0, dword ptr [edi + ebx * 4] + cvttss2si eax, xmm0 + ret + qvmftolsse ENDP +ENDIF + +end diff --git a/src/asm/ftola.s b/src/asm/ftola.s deleted file mode 100644 index ca3d8626..00000000 --- a/src/asm/ftola.s +++ /dev/null @@ -1,157 +0,0 @@ -/* -=========================================================================== -Copyright (C) 1999-2005 Id Software, Inc. - -This file is part of Quake III Arena source code. - -Quake III Arena source code is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Quake III Arena source code is distributed in the hope that it will be -useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Quake III Arena source code; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -=========================================================================== -*/ - -// -// qftol -- fast floating point to long conversion. -// - -// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus -// <tim@ngus.net> - -#include "qasm.h" - -.data - -temp: .single 0.0 -fpucw: .long 0 - -// Precision Control Field , 2 bits / 0x0300 -// PC24 0x0000 Single precision (24 bits). -// PC53 0x0200 Double precision (53 bits). -// PC64 0x0300 Extended precision (64 bits). - -// Rounding Control Field, 2 bits / 0x0C00 -// RCN 0x0000 Rounding to nearest (even). -// RCD 0x0400 Rounding down (directed, minus). -// RCU 0x0800 Rounding up (directed plus). -// RC0 0x0C00 Rounding towards zero (chop mode). - - -// rounding towards nearest (even) -cw027F: .long 0x027F -cw037F: .long 0x037F - -// rounding towards zero (chop mode) -cw0E7F: .long 0x0E7F -cw0F7F: .long 0x0F7F - - -.text - -// -// int qftol( void ) - default control word -// - -.globl C(qftol) - -C(qftol): - fistpl temp - movl temp,%eax - ret - - -// -// int qftol027F( void ) - DirectX FPU -// - -.globl C(qftol027F) - -C(qftol027F): - fnstcw fpucw - fldcw cw027F - fistpl temp - fldcw fpucw - movl temp,%eax - ret - -// -// int qftol037F( void ) - Linux FPU -// - -.globl C(qftol037F) - -C(qftol037F): - fnstcw fpucw - fldcw cw037F - fistpl temp - fldcw fpucw - movl temp,%eax - ret - - -// -// int qftol0F7F( void ) - ANSI -// - -.globl C(qftol0F7F) - -C(qftol0F7F): - fnstcw fpucw - fldcw cw0F7F - fistpl temp - fldcw fpucw - movl temp,%eax - ret - -// -// int qftol0E7F( void ) -// - -.globl C(qftol0E7F) - -C(qftol0E7F): - fnstcw fpucw - fldcw cw0E7F - fistpl temp - fldcw fpucw - movl temp,%eax - ret - - - -// -// long Q_ftol( float q ) -// - -.globl C(Q_ftol) - -C(Q_ftol): - flds 4(%esp) - fistpl temp - movl temp,%eax - ret - - -// -// long qftol0F7F( float q ) - Linux FPU -// - -.globl C(Q_ftol0F7F) - -C(Q_ftol0F7F): - fnstcw fpucw - flds 4(%esp) - fldcw cw0F7F - fistpl temp - fldcw fpucw - movl temp,%eax - ret diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm new file mode 100644 index 00000000..87c77372 --- /dev/null +++ b/src/asm/snapvector.asm @@ -0,0 +1,107 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +; +; This file is part of Quake III Arena source code. +; +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +; +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +; =========================================================================== + +; MASM version of snapvector conversion function using SSE or FPU +; assume __cdecl calling convention is being used for x86, __fastcall for x64 +; +; function prototype: +; void qsnapvector(vec3_t vec) + +IFNDEF idx64 +.model flat, c +ENDIF + +.data + + ALIGN 16 + ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h + ssecw DWORD 00001F80h + +IFNDEF idx64 + fpucw WORD 037Fh +ENDIF + +.code + +IFDEF idx64 +; qsnapvector using SSE + + qsnapvectorsse PROC + sub rsp, 4 + stmxcsr [rsp] ; save SSE control word + ldmxcsr ssecw ; set to round nearest + + push rdi + mov rdi, rcx ; maskmovdqu uses rdi as implicit memory operand + movaps xmm1, ssemask ; initialize the mask register for maskmovdqu + movups xmm0, [rdi] ; here is stored our vector. Read 4 values in one go + cvtps2dq xmm0, xmm0 ; convert 4 single fp to int + cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp + maskmovdqu xmm0, xmm1 ; write 3 values back to memory + pop rdi + + ldmxcsr [rsp] ; restore sse control word to old value + add rsp, 4 + ret + qsnapvectorsse ENDP + +ELSE + + qsnapvectorsse PROC + sub esp, 4 + stmxcsr [esp] ; save SSE control word + ldmxcsr ssecw ; set to round nearest + + push edi + mov edi, dword ptr 12[esp] ; maskmovdqu uses edi as implicit memory operand + movaps xmm1, ssemask ; initialize the mask register for maskmovdqu + movups xmm0, [edi] ; here is stored our vector. Read 4 values in one go + cvtps2dq xmm0, xmm0 ; convert 4 single fp to int + cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp + maskmovdqu xmm0, xmm1 ; write 3 values back to memory + pop edi + + ldmxcsr [esp] ; restore sse control word to old value + add esp, 4 + ret + qsnapvectorsse ENDP + + qroundx87 macro src + fld dword ptr src + fistp dword ptr src + fild dword ptr src + fstp dword ptr src + endm + + qsnapvectorx87 PROC + mov eax, dword ptr 4[esp] + sub esp, 2 + fnstcw word ptr [esp] + fldcw fpucw + qroundx87 [eax] + qroundx87 4[eax] + qroundx87 8[eax] + fldcw [esp] + add esp, 2 + qsnapvectorx87 ENDP + +ENDIF + +end diff --git a/src/asm/snapvectora.s b/src/asm/snapvectora.s deleted file mode 100644 index bc10c757..00000000 --- a/src/asm/snapvectora.s +++ /dev/null @@ -1,103 +0,0 @@ -/* -=========================================================================== -Copyright (C) 1999-2005 Id Software, Inc. - -This file is part of Quake III Arena source code. - -Quake III Arena source code is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Quake III Arena source code is distributed in the hope that it will be -useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Quake III Arena source code; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -=========================================================================== -*/ - -// -// Sys_SnapVector NASM code (Andrew Henderson) -// See win32/win_shared.c for the Win32 equivalent -// This code is provided to ensure that the -// rounding behavior (and, if necessary, the -// precision) of DLL and QVM code are identical -// e.g. for network-visible operations. -// See ftol.nasm for operations on a single float, -// as used in compiled VM and DLL code that does -// not use this system trap. -// - -// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus -// <tim@ngus.net> - -#include "qasm.h" - -#if id386 -.data - -fpucw: .long 0 -cw037F: .long 0x037F - -.text - -// void Sys_SnapVector( float *v ) -.globl C(Sys_SnapVector) -C(Sys_SnapVector): - pushl %eax - pushl %ebp - movl %esp,%ebp - - fnstcw fpucw - movl 12(%ebp),%eax - fldcw cw037F - flds (%eax) - fistpl (%eax) - fildl (%eax) - fstps (%eax) - flds 4(%eax) - fistpl 4(%eax) - fildl 4(%eax) - fstps 4(%eax) - flds 8(%eax) - fistpl 8(%eax) - fildl 8(%eax) - fstps 8(%eax) - fldcw fpucw - - popl %ebp - popl %eax - ret - -// void Sys_SnapVectorCW( float *v, unsigned short int cw ) -.globl C(Sys_SnapVectorCW) -C(Sys_SnapVectorCW): - pushl %eax - pushl %ebp - movl %esp,%ebp - - fnstcw fpucw - movl 12(%ebp),%eax - fldcw 16(%ebp) - flds (%eax) - fistpl (%eax) - fildl (%eax) - fstps (%eax) - flds 4(%eax) - fistpl 4(%eax) - fildl 4(%eax) - fstps 4(%eax) - flds 8(%eax) - fistpl 8(%eax) - fildl 8(%eax) - fstps 8(%eax) - fldcw fpucw - - popl %ebp - popl %eax - ret -#endif diff --git a/src/asm/vm_x86_64.asm b/src/asm/vm_x86_64.asm new file mode 100644 index 00000000..030b6987 --- /dev/null +++ b/src/asm/vm_x86_64.asm @@ -0,0 +1,76 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +; +; This file is part of Quake III Arena source code. +; +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +; +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +; =========================================================================== + +; Call wrapper for vm_x86 when built with MSVC in 64 bit mode, +; since MSVC does not support inline x64 assembler code anymore. +; +; assumes __fastcall calling convention + +DoSyscall PROTO + +.code + +; Call to static void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg) + +qsyscall64 PROC + sub rsp, 28h ; after this esp will be aligned to 16 byte boundary + mov qword ptr [rsp + 20h], rcx ; 5th parameter "arg" is passed on stack + mov r9b, bl ; opStackOfs + mov r8, rdi ; opStackBase + mov edx, esi ; programStack + mov ecx, eax ; syscallNum + mov rax, DoSyscall ; store call address of DoSyscall in rax + call rax + add rsp, 28h + ret +qsyscall64 ENDP + + +; Call to compiled code after setting up the register environment for the VM +; prototype: +; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase); + +qvmcall64 PROC + push rsi ; push non-volatile registers to stack + push rdi + push rbx + ; need to save pointer in rcx so we can write back the programData value to caller + push rcx + + ; registers r8 and r9 have correct value already thanx to __fastcall + xor rbx, rbx ; opStackOfs starts out being 0 + mov rdi, rdx ; opStack + mov esi, dword ptr [rcx] ; programStack + + call qword ptr [r8] ; instructionPointers[0] is also the entry point + + pop rcx + + mov dword ptr [rcx], esi ; write back the programStack value + mov al, bl ; return opStack offset + + pop rbx + pop rdi + pop rsi + + ret +qvmcall64 ENDP + +end diff --git a/src/client/cl_cgame.c b/src/client/cl_cgame.c index c1b58f2c..c86eb248 100644 --- a/src/client/cl_cgame.c +++ b/src/client/cl_cgame.c @@ -698,7 +698,7 @@ intptr_t CL_CgameSystemCalls( intptr_t *args ) { case CG_REAL_TIME: return Com_RealTime( VMA(1) ); case CG_SNAPVECTOR: - Sys_SnapVector( VMA(1) ); + Q_SnapVector(VMA(1)); return 0; case CG_CIN_PLAYCINEMATIC: diff --git a/src/client/snd_wavelet.c b/src/client/snd_wavelet.c index e75323c0..8c392cdc 100644 --- a/src/client/snd_wavelet.c +++ b/src/client/snd_wavelet.c @@ -23,8 +23,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "snd_local.h" -long myftol( float f ); - #define C0 0.4829629131445341 #define C1 0.8365163037378079 #define C2 0.2241438680420134 diff --git a/src/qcommon/common.c b/src/qcommon/common.c index 069eb922..c5f49ace 100644 --- a/src/qcommon/common.c +++ b/src/qcommon/common.c @@ -87,6 +87,14 @@ cvar_t *com_abnormalExit; cvar_t *com_homepath; cvar_t *com_busyWait; +#if defined(idx64) + void (*Q_VMftol)(void); +#elif defined(id386) + long (QDECL *Q_ftol)(float f); + void (QDECL *Q_VMftol)(void); + void (QDECL *Q_SnapVector)(vec3_t vec); +#endif + // com_speeds times int time_game; int time_frontend; // renderer frontend time @@ -2444,6 +2452,53 @@ static void Com_DetectAltivec(void) /* ================= +Com_DetectSSE +Find out whether we have SSE support for Q_ftol function +================= +*/ + +#if defined(id386) || defined(idx64) + +static void Com_DetectSSE(void) +{ +#ifndef idx64 + cpuFeatures_t feat; + + feat = Sys_GetProcessorFeatures(); + + if(feat & CF_SSE) + { + if(feat & CF_SSE2) + Q_SnapVector = qsnapvectorsse; + else + Q_SnapVector = qsnapvectorx87; + + Q_ftol = qftolsse; +#endif + Q_VMftol = qvmftolsse; + + Com_Printf("Have SSE support\n"); +#ifndef idx64 + } + else + { + Q_ftol = qftolx87; + Q_VMftol = qvmftolx87; + Q_SnapVector = qsnapvectorx87; + + Com_Printf("No SSE support on this machine\n"); + } +#endif +} + +#else + +#define Com_DetectSSE() + +#endif + +/* +================= Com_InitRand Seed the random number generator, if possible with an OS supplied random seed. ================= @@ -2492,6 +2547,8 @@ void Com_Init( char *commandLine ) { // Swap_Init (); Cbuf_Init (); + Com_DetectSSE(); + // override anything from the config files with command line args Com_StartupVariable( NULL ); diff --git a/src/qcommon/q_platform.h b/src/qcommon/q_platform.h index 79b1c0c8..ddc8039d 100644 --- a/src/qcommon/q_platform.h +++ b/src/qcommon/q_platform.h @@ -88,7 +88,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define OS_STRING "win_mingw64" #endif -#define ID_INLINE inline +#define ID_INLINE __inline #define PATH_SEP '\\' #if defined( __WIN64__ ) diff --git a/src/qcommon/q_shared.h b/src/qcommon/q_shared.h index 2002bcc9..389412b0 100644 --- a/src/qcommon/q_shared.h +++ b/src/qcommon/q_shared.h @@ -419,6 +419,58 @@ extern vec3_t axisDefault[3]; #define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask) +int Q_isnan(float x); + +#ifdef idx64 + extern long qftolsse(float f); + extern void qvmftolsse(void); + extern void qsnapvectorsse(vec3_t vec); + + #define Q_ftol qftolsse + #define Q_SnapVector qsnapvectorsse + + extern void (*Q_VMftol)(void); +#elif defined(id386) + extern long QDECL qftolx87(float f); + extern long QDECL qftolsse(float f); + extern void QDECL qvmftolx87(void); + extern void QDECL qvmftolsse(void); + extern void QDECL qsnapvectorx87(vec3_t vec); + extern void QDECL qsnapvectorsse(vec3_t vec); + + extern long (QDECL *Q_ftol)(float f); + extern void (QDECL *Q_VMftol)(void); + extern void (QDECL *Q_SnapVector)(vec3_t vec); +#else + #define Q_ftol(f) lrintf((f)) + #define Q_SnapVector(vec)\ + do\ + {\ + vec3_t *temp = (vec);\ + \ + (*temp)[0] = round((*temp)[0]);\ + (*temp)[1] = round((*temp)[1]);\ + (*temp)[2] = round((*temp)[2]);\ + } while(0) +#endif +/* +// if your system does not have lrintf() and round() you can try this block. Please also open a bug report at bugzilla.icculus.org +// or write a mail to the ioq3 mailing list. +#else + #define Q_ftol(f) ((long) (f)) + #define Q_round(f) do { if((f) < 0) (f) -= 0.5f; else (f) += 0.5f; (f) = Q_ftol((f)); } while(0) + #define Q_SnapVector(vec) \ + do\ + {\ + vec3_t *temp = (vec);\ + \ + Q_round((*temp)[0]);\ + Q_round((*temp)[1]);\ + Q_round((*temp)[2]);\ + } while(0) +#endif +*/ + #if idppc static ID_INLINE float Q_rsqrt( float number ) { @@ -667,7 +719,6 @@ void MatrixMultiply(float in1[3][3], float in2[3][3], float out[3][3]); void VectorMatrixMultiply( const vec3_t p, vec3_t m[ 3 ], vec3_t out ); void AngleVectors( const vec3_t angles, vec3_t forward, vec3_t right, vec3_t up); void PerpendicularVector( vec3_t dst, const vec3_t src ); -int Q_isnan( float x ); void GetPerpendicularViewVector( const vec3_t point, const vec3_t p1, const vec3_t p2, vec3_t up ); diff --git a/src/qcommon/vm_x86.c b/src/qcommon/vm_x86.c index e609bc1e..72225473 100644 --- a/src/qcommon/vm_x86.c +++ b/src/qcommon/vm_x86.c @@ -68,29 +68,6 @@ static int pc = 0; #define FTOL_PTR -#ifdef _MSC_VER - -#if defined( FTOL_PTR ) -int _ftol( float ); -static void *ftolPtr = _ftol; -#endif - -#else // _MSC_VER - -#if defined( FTOL_PTR ) - -int qftol( void ); -int qftol027F( void ); -int qftol037F( void ); -int qftol0E7F( void ); -int qftol0F7F( void ); - - -static void *ftolPtr = qftol0F7F; -#endif // FTOL_PTR - -#endif - static int instruction, pass; static int lastConst = 0; static int oc0, oc1, pop0, pop1; @@ -112,15 +89,17 @@ typedef enum static ELastCommand LastCommand; -static inline int iss8(int32_t v) +static int iss8(int32_t v) { return (SCHAR_MIN <= v && v <= SCHAR_MAX); } -static inline int isu8(uint32_t v) +#if 0 +static int isu8(uint32_t v) { return (v <= UCHAR_MAX); } +#endif static int NextConstant4(void) { @@ -437,30 +416,37 @@ Uses asm to retrieve arguments from registers to work around different calling c ================= */ +#if defined(_MSC_VER) && defined(idx64) + +extern void qsyscall64(void); +extern uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase); + +// Microsoft does not support inline assembler on x64 platforms. Meh. +void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg) +{ +#else static void DoSyscall(void) { - vm_t *savedVM; - int syscallNum; int programStack; int *opStackBase; - int opStackOfs; + uint8_t opStackOfs; intptr_t arg; +#endif + + vm_t *savedVM; #ifdef _MSC_VER + #ifndef idx64 __asm { mov dword ptr syscallNum, eax mov dword ptr programStack, esi - mov dword ptr opStackOfs, ebx -#ifdef idx64 - mov qword ptr opStackBase, rdi - mov qword ptr arg, rcx -#else + mov byte ptr opStackOfs, bl mov dword ptr opStackBase, edi mov dword ptr arg, ecx -#endif } + #endif #else __asm__ volatile( "" @@ -540,8 +526,13 @@ Call to DoSyscall() int EmitCallDoSyscall(vm_t *vm) { // use edx register to store DoSyscall address +#if defined(_MSC_VER) && defined(idx64) + EmitRexString(0x48, "BA"); // mov edx, qsyscall64 + EmitPtr(qsyscall64); +#else EmitRexString(0x48, "BA"); // mov edx, DoSyscall EmitPtr(DoSyscall); +#endif // Push important registers to stack as we can't really make // any assumptions about calling conventions. @@ -1630,9 +1621,8 @@ void VM_Compile(vm_t *vm, vmHeader_t *header) EmitString("DB 1C 9F"); // fistp dword ptr [edi + ebx * 4] #else // FTOL_PTR // call the library conversion function - EmitString("D9 04 9F"); // fld dword ptr [edi + ebx * 4] - EmitRexString(0x48, "BA"); // mov edx, ftolPtr - EmitPtr(ftolPtr); + EmitRexString(0x48, "BA"); // mov edx, Q_VMftol + EmitPtr(Q_VMftol); EmitRexString(0x48, "FF D2"); // call edx EmitCommand(LAST_COMMAND_MOV_STACK_EAX); // mov dword ptr [edi + ebx * 4], eax #endif @@ -1747,12 +1737,12 @@ This function is called directly by the generated code int VM_CallCompiled(vm_t *vm, int *args) { - int stack[OPSTACK_SIZE + 7]; + byte stack[OPSTACK_SIZE * 4 + 15]; void *entryPoint; int programCounter; int programStack, stackOnEntry; byte *image; - int *opStack, *opStackOnEntry; + int *opStack; int opStackOfs; currentVM = vm; @@ -1785,35 +1775,16 @@ int VM_CallCompiled(vm_t *vm, int *args) // off we go into generated code... entryPoint = vm->codeBase + vm->entryOfs; - opStack = opStackOnEntry = PADP(stack, 8); + opStack = PADP(stack, 16); *opStack = 0xDEADBEEF; opStackOfs = 0; #ifdef _MSC_VER + #ifdef idx64 + opStackOfs = qvmcall64(&programStack, opStack, vm->instructionPointers, vm->dataBase); + #else __asm { -#ifdef idx64 - // non-volatile registers according to x64 calling convention - push rsi - push rdi - push rbx - - mov esi, dword ptr programStack - mov rdi, qword ptr opStack - mov ebx, dword ptr opStackOfs - mov r8, qword ptr vm->instructionPointers - mov r9, qword ptr vm->dataBase - - call entryPoint - - mov dword ptr opStackOfs, ebx - mov qword ptr opStack, rdi - mov dword ptr programStack, esi - - pop rbx - pop rdi - pop rsi -#else pushad mov esi, dword ptr programStack @@ -1827,8 +1798,8 @@ int VM_CallCompiled(vm_t *vm, int *args) mov dword ptr programStack, esi popad -#endif } + #endif #elif defined(idx64) __asm__ volatile( "movq %5, %%rax\r\n" @@ -1856,7 +1827,7 @@ int VM_CallCompiled(vm_t *vm, int *args) ); #endif - if(opStack != opStackOnEntry || opStackOfs != 1 || *opStack != 0xDEADBEEF) + if(opStackOfs != 1 || *opStack != 0xDEADBEEF) { Com_Error(ERR_DROP, "opStack corrupted in compiled code"); } diff --git a/src/renderer/tr_light.c b/src/renderer/tr_light.c index 18dea318..05aca8b8 100644 --- a/src/renderer/tr_light.c +++ b/src/renderer/tr_light.c @@ -360,9 +360,9 @@ void R_SetupEntityLighting( const trRefdef_t *refdef, trRefEntity_t *ent ) { } // save out the byte packet version - ((byte *)&ent->ambientLightInt)[0] = myftol( ent->ambientLight[0] ); - ((byte *)&ent->ambientLightInt)[1] = myftol( ent->ambientLight[1] ); - ((byte *)&ent->ambientLightInt)[2] = myftol( ent->ambientLight[2] ); + ((byte *)&ent->ambientLightInt)[0] = Q_ftol(ent->ambientLight[0]); + ((byte *)&ent->ambientLightInt)[1] = Q_ftol(ent->ambientLight[1]); + ((byte *)&ent->ambientLightInt)[2] = Q_ftol(ent->ambientLight[2]); ((byte *)&ent->ambientLightInt)[3] = 0xff; // transform the direction to local space diff --git a/src/renderer/tr_local.h b/src/renderer/tr_local.h index ef49e217..1dea4bb3 100644 --- a/src/renderer/tr_local.h +++ b/src/renderer/tr_local.h @@ -35,14 +35,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define GL_INDEX_TYPE GL_UNSIGNED_INT typedef unsigned int glIndex_t; -// fast float to int conversion -#if id386 && !defined(__GNUC__) -long myftol( float f ); -#else -#define myftol(x) ((int)(x)) -#endif - - // everything that is needed by the backend needs // to be double buffered to allow it to run in // parallel on a dual cpu machine diff --git a/src/renderer/tr_mesh.c b/src/renderer/tr_mesh.c index ddcc62f7..446ee836 100644 --- a/src/renderer/tr_mesh.c +++ b/src/renderer/tr_mesh.c @@ -219,7 +219,7 @@ int R_ComputeLOD( trRefEntity_t *ent ) { } flod *= tr.currentModel->numLods; - lod = myftol( flod ); + lod = Q_ftol(flod); if ( lod < 0 ) { diff --git a/src/renderer/tr_shade.c b/src/renderer/tr_shade.c index b40a06c5..f12519b4 100644 --- a/src/renderer/tr_shade.c +++ b/src/renderer/tr_shade.c @@ -234,7 +234,7 @@ static void R_BindAnimatedImage( textureBundle_t *bundle ) { // it is necessary to do this messy calc to make sure animations line up // exactly with waveforms of the same frequency - index = myftol( tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE ); + index = Q_ftol(tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE); index >>= FUNCTABLE_SIZE2; if ( index < 0 ) { @@ -690,9 +690,9 @@ static void ProjectDlightTexture_scalar( void ) { } } clipBits[i] = clip; - colors[0] = myftol(floatColor[0] * modulate); - colors[1] = myftol(floatColor[1] * modulate); - colors[2] = myftol(floatColor[2] * modulate); + colors[0] = Q_ftol(floatColor[0] * modulate); + colors[1] = Q_ftol(floatColor[1] * modulate); + colors[2] = Q_ftol(floatColor[2] * modulate); colors[3] = 255; } diff --git a/src/renderer/tr_shade_calc.c b/src/renderer/tr_shade_calc.c index 13fcf0b5..8d02177b 100644 --- a/src/renderer/tr_shade_calc.c +++ b/src/renderer/tr_shade_calc.c @@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #endif -#define WAVEVALUE( table, base, amplitude, phase, freq ) ((base) + table[ myftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude)) +#define WAVEVALUE( table, base, amplitude, phase, freq ) ((base) + table[ Q_ftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude)) static float *TableForFunc( genFunc_t func ) { @@ -700,7 +700,7 @@ void RB_CalcWaveColor( const waveForm_t *wf, unsigned char *dstColors ) glow = 1; } - v = myftol( 255 * glow ); + v = Q_ftol(255 * glow); color[0] = color[1] = color[2] = v; color[3] = 255; v = *(int *)color; @@ -1019,21 +1019,6 @@ void RB_CalcRotateTexCoords( float degsPerSecond, float *st ) } - - - - -#if id386 && !defined(__GNUC__) - -long myftol( float f ) { - static int tmp; - __asm fld f - __asm fistp tmp - __asm mov eax, tmp -} - -#endif - /* ** RB_CalcSpecularAlpha ** @@ -1196,19 +1181,19 @@ static void RB_CalcDiffuseColor_scalar( unsigned char *colors ) *(int *)&colors[i*4] = ambientLightInt; continue; } - j = myftol( ambientLight[0] + incoming * directedLight[0] ); + j = Q_ftol(ambientLight[0] + incoming * directedLight[0]); if ( j > 255 ) { j = 255; } colors[i*4+0] = j; - j = myftol( ambientLight[1] + incoming * directedLight[1] ); + j = Q_ftol(ambientLight[1] + incoming * directedLight[1]); if ( j > 255 ) { j = 255; } colors[i*4+1] = j; - j = myftol( ambientLight[2] + incoming * directedLight[2] ); + j = Q_ftol(ambientLight[2] + incoming * directedLight[2]); if ( j > 255 ) { j = 255; } diff --git a/src/renderer/tr_sky.c b/src/renderer/tr_sky.c index 6ab8aa6e..5c7788c0 100644 --- a/src/renderer/tr_sky.c +++ b/src/renderer/tr_sky.c @@ -554,10 +554,10 @@ static void FillCloudBox( const shader_t *shader, int stage ) continue; } - sky_mins_subd[0] = myftol( sky_mins[0][i] * HALF_SKY_SUBDIVISIONS ); - sky_mins_subd[1] = myftol( sky_mins[1][i] * HALF_SKY_SUBDIVISIONS ); - sky_maxs_subd[0] = myftol( sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS ); - sky_maxs_subd[1] = myftol( sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS ); + sky_mins_subd[0] = Q_ftol(sky_mins[0][i] * HALF_SKY_SUBDIVISIONS); + sky_mins_subd[1] = Q_ftol(sky_mins[1][i] * HALF_SKY_SUBDIVISIONS); + sky_maxs_subd[0] = Q_ftol(sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS); + sky_maxs_subd[1] = Q_ftol(sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS); if ( sky_mins_subd[0] < -HALF_SKY_SUBDIVISIONS ) sky_mins_subd[0] = -HALF_SKY_SUBDIVISIONS; diff --git a/src/server/sv_game.c b/src/server/sv_game.c index c81c62ea..4948a9c8 100644 --- a/src/server/sv_game.c +++ b/src/server/sv_game.c @@ -415,7 +415,7 @@ intptr_t SV_GameSystemCalls( intptr_t *args ) { case G_REAL_TIME: return Com_RealTime( VMA(1) ); case G_SNAPVECTOR: - Sys_SnapVector( VMA(1) ); + Q_SnapVector( VMA(1) ); return 0; case G_SEND_GAMESTAT: diff --git a/src/sys/sys_main.c b/src/sys/sys_main.c index 46a795eb..07e8e395 100644 --- a/src/sys/sys_main.c +++ b/src/sys/sys_main.c @@ -417,8 +417,8 @@ Used to load a development dll instead of a virtual machine #2 look in fs_basepath ================= */ -void *Sys_LoadDll( const char *name, - intptr_t (**entryPoint)(int, ...), +void * QDECL Sys_LoadDll( const char *name, + intptr_t (QDECL **entryPoint)(int, ...), intptr_t (*systemcalls)(intptr_t, ...) ) { void *libHandle; diff --git a/src/sys/sys_unix.c b/src/sys/sys_unix.c index 4aad8b88..72ca8360 100644 --- a/src/sys/sys_unix.c +++ b/src/sys/sys_unix.c @@ -37,6 +37,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include <pwd.h> #include <libgen.h> #include <fcntl.h> +#include <fenv.h> qboolean stdinIsATTY; @@ -118,31 +119,6 @@ int Sys_Milliseconds (void) return curtime; } -#if !id386 -/* -================== -fastftol -================== -*/ -long fastftol( float f ) -{ - return (long)f; -} - -/* -================== -Sys_SnapVector -================== -*/ -void Sys_SnapVector( float *v ) -{ - v[0] = rint(v[0]); - v[1] = rint(v[1]); - v[2] = rint(v[2]); -} -#endif - - /* ================== Sys_RandomBytes @@ -749,6 +725,12 @@ void Sys_GLimpInit( void ) // NOP } +void Sys_SetFloatEnv(void) +{ + // rounding towards 0 + fesetround(FE_TOWARDZERO); +} + /* ============== Sys_PlatformInit diff --git a/src/sys/sys_win32.c b/src/sys/sys_win32.c index f91b26b1..4fddfdc0 100644 --- a/src/sys/sys_win32.c +++ b/src/sys/sys_win32.c @@ -39,6 +39,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include <wincrypt.h> #include <shlobj.h> #include <psapi.h> +#include <float.h> // Used to determine where to store user-specific files static char homePath[ MAX_OSPATH ] = { 0 }; @@ -47,14 +48,38 @@ static char homePath[ MAX_OSPATH ] = { 0 }; static UINT timerResolution = 0; #endif -#ifdef __WIN64__ -void Sys_SnapVector( float *v ) +/* +================ +Sys_SetFPUCW +Set FPU control word to default value +================ +*/ + +#ifndef _RC_CHOP +// mingw doesn't seem to have these defined :( + + #define _MCW_EM 0x0008001fU + #define _MCW_RC 0x00000300U + #define _MCW_PC 0x00030000U + #define _RC_CHOP 0x00000300U + #define _PC_53 0x00010000U + + unsigned int _controlfp(unsigned int new, unsigned int mask); +#endif + +#define FPUCWMASK1 (_MCW_RC | _MCW_EM) +#define FPUCW (_RC_CHOP | _MCW_EM | _PC_53) + +#ifdef idx64 +#define FPUCWMASK (FPUCWMASK1) +#else +#define FPUCWMASK (FPUCWMASK1 | _MCW_PC) +#endif + +void Sys_SetFloatEnv(void) { - v[0] = rint(v[0]); - v[1] = rint(v[1]); - v[2] = rint(v[2]); + _controlfp(FPUCW, FPUCWMASK); } -#endif /* ================ @@ -136,34 +161,6 @@ int Sys_Milliseconds (void) return sys_curtime; } -#ifndef __GNUC__ //see snapvectora.s -/* -================ -Sys_SnapVector -================ -*/ -void Sys_SnapVector( float *v ) -{ - int i; - float f; - - f = *v; - __asm fld f; - __asm fistp i; - *v = i; - v++; - f = *v; - __asm fld f; - __asm fistp i; - *v = i; - v++; - f = *v; - __asm fld f; - __asm fistp i; - *v = i; -} -#endif - /* ================ Sys_RandomBytes @@ -715,9 +712,12 @@ void Sys_PlatformInit( void ) { #ifndef DEDICATED TIMECAPS ptc; - const char *SDL_VIDEODRIVER = getenv( "SDL_VIDEODRIVER" ); +#endif + + Sys_SetFloatEnv(); +#ifndef DEDICATED if( SDL_VIDEODRIVER ) { Com_Printf( "SDL_VIDEODRIVER is externally set to \"%s\", " |