summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThilo Schulz <arny@ats.s.bawue.de>2011-06-13 09:56:39 +0000
committerTim Angus <tim@ngus.net>2013-01-09 23:15:55 +0000
commit6a71409a0622050f9a682d4e3b02419c444febe5 (patch)
tree7766ff71304d04c6e42de7dd7d48ed7e7e0fac59 /src
parentb15804d39f71e9be202818288726777d1ca8ac09 (diff)
- Add MASM assembler files for MSVC x64 projects to support vm_x86 in x64 mode - Clean up ftol()/snapvector() mess - Make use of SSE instructions for ftol()/snapvector() if available - move ftol/snapvector pure assembler to inline assembler, this will add x86_64 and improve support for different calling conventions - Set FPU control word at program startup to get consistent behaviour on all platforms
Diffstat (limited to 'src')
-rw-r--r--src/asm/ftola.asm90
-rw-r--r--src/asm/ftola.s157
-rw-r--r--src/asm/snapvector.asm107
-rw-r--r--src/asm/snapvectora.s103
-rw-r--r--src/asm/vm_x86_64.asm76
-rw-r--r--src/client/cl_cgame.c2
-rw-r--r--src/client/snd_wavelet.c2
-rw-r--r--src/qcommon/common.c57
-rw-r--r--src/qcommon/q_platform.h2
-rw-r--r--src/qcommon/q_shared.h53
-rw-r--r--src/qcommon/vm_x86.c99
-rw-r--r--src/renderer/tr_light.c6
-rw-r--r--src/renderer/tr_local.h8
-rw-r--r--src/renderer/tr_mesh.c2
-rw-r--r--src/renderer/tr_shade.c8
-rw-r--r--src/renderer/tr_shade_calc.c25
-rw-r--r--src/renderer/tr_sky.c8
-rw-r--r--src/server/sv_game.c2
-rw-r--r--src/sys/sys_main.c4
-rw-r--r--src/sys/sys_unix.c32
-rw-r--r--src/sys/sys_win32.c70
21 files changed, 481 insertions, 432 deletions
diff --git a/src/asm/ftola.asm b/src/asm/ftola.asm
new file mode 100644
index 00000000..370c12d8
--- /dev/null
+++ b/src/asm/ftola.asm
@@ -0,0 +1,90 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+;
+; This file is part of Quake III Arena source code.
+;
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+;
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+; ===========================================================================
+
+; MASM ftol conversion functions using SSE or FPU
+; assume __cdecl calling convention is being used for x86, __fastcall for x64
+
+IFNDEF idx64
+.model flat, c
+ENDIF
+
+; .data
+
+; ifndef idx64
+; fpucw WORD 0F7Fh
+; endif
+
+.code
+
+IFDEF idx64
+; qftol using SSE
+
+ qftolsse PROC
+ cvttss2si eax, xmm0
+ ret
+ qftolsse ENDP
+
+ qvmftolsse PROC
+ movss xmm0, dword ptr [rdi + rbx * 4]
+ cvttss2si eax, xmm0
+ ret
+ qvmftolsse ENDP
+
+ELSE
+; qftol using FPU
+
+ qftolx87m macro src
+; not necessary, fpucw is set with _controlfp at startup
+; sub esp, 2
+; fnstcw word ptr [esp]
+; fldcw fpucw
+ fld dword ptr src
+ fistp dword ptr src
+; fldcw [esp]
+ mov eax, src
+; add esp, 2
+ ret
+ endm
+
+ qftolx87 PROC
+; need this line when storing FPU control word on stack
+; qftolx87m [esp + 6]
+ qftolx87m [esp + 4]
+ qftolx87 ENDP
+
+ qvmftolx87 PROC
+ qftolx87m [edi + ebx * 4]
+ qvmftolx87 ENDP
+
+; qftol using SSE
+ qftolsse PROC
+ movss xmm0, dword ptr [esp + 4]
+ cvttss2si eax, xmm0
+ ret
+ qftolsse ENDP
+
+ qvmftolsse PROC
+ movss xmm0, dword ptr [edi + ebx * 4]
+ cvttss2si eax, xmm0
+ ret
+ qvmftolsse ENDP
+ENDIF
+
+end
diff --git a/src/asm/ftola.s b/src/asm/ftola.s
deleted file mode 100644
index ca3d8626..00000000
--- a/src/asm/ftola.s
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
-===========================================================================
-Copyright (C) 1999-2005 Id Software, Inc.
-
-This file is part of Quake III Arena source code.
-
-Quake III Arena source code is free software; you can redistribute it
-and/or modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the License,
-or (at your option) any later version.
-
-Quake III Arena source code is distributed in the hope that it will be
-useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with Quake III Arena source code; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-===========================================================================
-*/
-
-//
-// qftol -- fast floating point to long conversion.
-//
-
-// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus
-// <tim@ngus.net>
-
-#include "qasm.h"
-
-.data
-
-temp: .single 0.0
-fpucw: .long 0
-
-// Precision Control Field , 2 bits / 0x0300
-// PC24 0x0000 Single precision (24 bits).
-// PC53 0x0200 Double precision (53 bits).
-// PC64 0x0300 Extended precision (64 bits).
-
-// Rounding Control Field, 2 bits / 0x0C00
-// RCN 0x0000 Rounding to nearest (even).
-// RCD 0x0400 Rounding down (directed, minus).
-// RCU 0x0800 Rounding up (directed plus).
-// RC0 0x0C00 Rounding towards zero (chop mode).
-
-
-// rounding towards nearest (even)
-cw027F: .long 0x027F
-cw037F: .long 0x037F
-
-// rounding towards zero (chop mode)
-cw0E7F: .long 0x0E7F
-cw0F7F: .long 0x0F7F
-
-
-.text
-
-//
-// int qftol( void ) - default control word
-//
-
-.globl C(qftol)
-
-C(qftol):
- fistpl temp
- movl temp,%eax
- ret
-
-
-//
-// int qftol027F( void ) - DirectX FPU
-//
-
-.globl C(qftol027F)
-
-C(qftol027F):
- fnstcw fpucw
- fldcw cw027F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
-
-//
-// int qftol037F( void ) - Linux FPU
-//
-
-.globl C(qftol037F)
-
-C(qftol037F):
- fnstcw fpucw
- fldcw cw037F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
-
-
-//
-// int qftol0F7F( void ) - ANSI
-//
-
-.globl C(qftol0F7F)
-
-C(qftol0F7F):
- fnstcw fpucw
- fldcw cw0F7F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
-
-//
-// int qftol0E7F( void )
-//
-
-.globl C(qftol0E7F)
-
-C(qftol0E7F):
- fnstcw fpucw
- fldcw cw0E7F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
-
-
-
-//
-// long Q_ftol( float q )
-//
-
-.globl C(Q_ftol)
-
-C(Q_ftol):
- flds 4(%esp)
- fistpl temp
- movl temp,%eax
- ret
-
-
-//
-// long qftol0F7F( float q ) - Linux FPU
-//
-
-.globl C(Q_ftol0F7F)
-
-C(Q_ftol0F7F):
- fnstcw fpucw
- flds 4(%esp)
- fldcw cw0F7F
- fistpl temp
- fldcw fpucw
- movl temp,%eax
- ret
diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm
new file mode 100644
index 00000000..87c77372
--- /dev/null
+++ b/src/asm/snapvector.asm
@@ -0,0 +1,107 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+;
+; This file is part of Quake III Arena source code.
+;
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+;
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+; ===========================================================================
+
+; MASM version of snapvector conversion function using SSE or FPU
+; assume __cdecl calling convention is being used for x86, __fastcall for x64
+;
+; function prototype:
+; void qsnapvector(vec3_t vec)
+
+IFNDEF idx64
+.model flat, c
+ENDIF
+
+.data
+
+ ALIGN 16
+ ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h
+ ssecw DWORD 00001F80h
+
+IFNDEF idx64
+ fpucw WORD 037Fh
+ENDIF
+
+.code
+
+IFDEF idx64
+; qsnapvector using SSE
+
+ qsnapvectorsse PROC
+ sub rsp, 4
+ stmxcsr [rsp] ; save SSE control word
+ ldmxcsr ssecw ; set to round nearest
+
+ push rdi
+ mov rdi, rcx ; maskmovdqu uses rdi as implicit memory operand
+ movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
+ movups xmm0, [rdi] ; here is stored our vector. Read 4 values in one go
+ cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
+ cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
+ maskmovdqu xmm0, xmm1 ; write 3 values back to memory
+ pop rdi
+
+ ldmxcsr [rsp] ; restore sse control word to old value
+ add rsp, 4
+ ret
+ qsnapvectorsse ENDP
+
+ELSE
+
+ qsnapvectorsse PROC
+ sub esp, 4
+ stmxcsr [esp] ; save SSE control word
+ ldmxcsr ssecw ; set to round nearest
+
+ push edi
+ mov edi, dword ptr 12[esp] ; maskmovdqu uses edi as implicit memory operand
+ movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
+ movups xmm0, [edi] ; here is stored our vector. Read 4 values in one go
+ cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
+ cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
+ maskmovdqu xmm0, xmm1 ; write 3 values back to memory
+ pop edi
+
+ ldmxcsr [esp] ; restore sse control word to old value
+ add esp, 4
+ ret
+ qsnapvectorsse ENDP
+
+ qroundx87 macro src
+ fld dword ptr src
+ fistp dword ptr src
+ fild dword ptr src
+ fstp dword ptr src
+ endm
+
+ qsnapvectorx87 PROC
+ mov eax, dword ptr 4[esp]
+ sub esp, 2
+ fnstcw word ptr [esp]
+ fldcw fpucw
+ qroundx87 [eax]
+ qroundx87 4[eax]
+ qroundx87 8[eax]
+ fldcw [esp]
+ add esp, 2
+ qsnapvectorx87 ENDP
+
+ENDIF
+
+end
diff --git a/src/asm/snapvectora.s b/src/asm/snapvectora.s
deleted file mode 100644
index bc10c757..00000000
--- a/src/asm/snapvectora.s
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
-===========================================================================
-Copyright (C) 1999-2005 Id Software, Inc.
-
-This file is part of Quake III Arena source code.
-
-Quake III Arena source code is free software; you can redistribute it
-and/or modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the License,
-or (at your option) any later version.
-
-Quake III Arena source code is distributed in the hope that it will be
-useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with Quake III Arena source code; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-===========================================================================
-*/
-
-//
-// Sys_SnapVector NASM code (Andrew Henderson)
-// See win32/win_shared.c for the Win32 equivalent
-// This code is provided to ensure that the
-// rounding behavior (and, if necessary, the
-// precision) of DLL and QVM code are identical
-// e.g. for network-visible operations.
-// See ftol.nasm for operations on a single float,
-// as used in compiled VM and DLL code that does
-// not use this system trap.
-//
-
-// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus
-// <tim@ngus.net>
-
-#include "qasm.h"
-
-#if id386
-.data
-
-fpucw: .long 0
-cw037F: .long 0x037F
-
-.text
-
-// void Sys_SnapVector( float *v )
-.globl C(Sys_SnapVector)
-C(Sys_SnapVector):
- pushl %eax
- pushl %ebp
- movl %esp,%ebp
-
- fnstcw fpucw
- movl 12(%ebp),%eax
- fldcw cw037F
- flds (%eax)
- fistpl (%eax)
- fildl (%eax)
- fstps (%eax)
- flds 4(%eax)
- fistpl 4(%eax)
- fildl 4(%eax)
- fstps 4(%eax)
- flds 8(%eax)
- fistpl 8(%eax)
- fildl 8(%eax)
- fstps 8(%eax)
- fldcw fpucw
-
- popl %ebp
- popl %eax
- ret
-
-// void Sys_SnapVectorCW( float *v, unsigned short int cw )
-.globl C(Sys_SnapVectorCW)
-C(Sys_SnapVectorCW):
- pushl %eax
- pushl %ebp
- movl %esp,%ebp
-
- fnstcw fpucw
- movl 12(%ebp),%eax
- fldcw 16(%ebp)
- flds (%eax)
- fistpl (%eax)
- fildl (%eax)
- fstps (%eax)
- flds 4(%eax)
- fistpl 4(%eax)
- fildl 4(%eax)
- fstps 4(%eax)
- flds 8(%eax)
- fistpl 8(%eax)
- fildl 8(%eax)
- fstps 8(%eax)
- fldcw fpucw
-
- popl %ebp
- popl %eax
- ret
-#endif
diff --git a/src/asm/vm_x86_64.asm b/src/asm/vm_x86_64.asm
new file mode 100644
index 00000000..030b6987
--- /dev/null
+++ b/src/asm/vm_x86_64.asm
@@ -0,0 +1,76 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+;
+; This file is part of Quake III Arena source code.
+;
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+;
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+; ===========================================================================
+
+; Call wrapper for vm_x86 when built with MSVC in 64 bit mode,
+; since MSVC does not support inline x64 assembler code anymore.
+;
+; assumes __fastcall calling convention
+
+DoSyscall PROTO
+
+.code
+
+; Call to static void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg)
+
+qsyscall64 PROC
+ sub rsp, 28h ; after this esp will be aligned to 16 byte boundary
+ mov qword ptr [rsp + 20h], rcx ; 5th parameter "arg" is passed on stack
+ mov r9b, bl ; opStackOfs
+ mov r8, rdi ; opStackBase
+ mov edx, esi ; programStack
+ mov ecx, eax ; syscallNum
+ mov rax, DoSyscall ; store call address of DoSyscall in rax
+ call rax
+ add rsp, 28h
+ ret
+qsyscall64 ENDP
+
+
+; Call to compiled code after setting up the register environment for the VM
+; prototype:
+; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
+
+qvmcall64 PROC
+ push rsi ; push non-volatile registers to stack
+ push rdi
+ push rbx
+ ; need to save pointer in rcx so we can write back the programData value to caller
+ push rcx
+
+ ; registers r8 and r9 have correct value already thanx to __fastcall
+ xor rbx, rbx ; opStackOfs starts out being 0
+ mov rdi, rdx ; opStack
+ mov esi, dword ptr [rcx] ; programStack
+
+ call qword ptr [r8] ; instructionPointers[0] is also the entry point
+
+ pop rcx
+
+ mov dword ptr [rcx], esi ; write back the programStack value
+ mov al, bl ; return opStack offset
+
+ pop rbx
+ pop rdi
+ pop rsi
+
+ ret
+qvmcall64 ENDP
+
+end
diff --git a/src/client/cl_cgame.c b/src/client/cl_cgame.c
index c1b58f2c..c86eb248 100644
--- a/src/client/cl_cgame.c
+++ b/src/client/cl_cgame.c
@@ -698,7 +698,7 @@ intptr_t CL_CgameSystemCalls( intptr_t *args ) {
case CG_REAL_TIME:
return Com_RealTime( VMA(1) );
case CG_SNAPVECTOR:
- Sys_SnapVector( VMA(1) );
+ Q_SnapVector(VMA(1));
return 0;
case CG_CIN_PLAYCINEMATIC:
diff --git a/src/client/snd_wavelet.c b/src/client/snd_wavelet.c
index e75323c0..8c392cdc 100644
--- a/src/client/snd_wavelet.c
+++ b/src/client/snd_wavelet.c
@@ -23,8 +23,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include "snd_local.h"
-long myftol( float f );
-
#define C0 0.4829629131445341
#define C1 0.8365163037378079
#define C2 0.2241438680420134
diff --git a/src/qcommon/common.c b/src/qcommon/common.c
index 069eb922..c5f49ace 100644
--- a/src/qcommon/common.c
+++ b/src/qcommon/common.c
@@ -87,6 +87,14 @@ cvar_t *com_abnormalExit;
cvar_t *com_homepath;
cvar_t *com_busyWait;
+#if defined(idx64)
+ void (*Q_VMftol)(void);
+#elif defined(id386)
+ long (QDECL *Q_ftol)(float f);
+ void (QDECL *Q_VMftol)(void);
+ void (QDECL *Q_SnapVector)(vec3_t vec);
+#endif
+
// com_speeds times
int time_game;
int time_frontend; // renderer frontend time
@@ -2444,6 +2452,53 @@ static void Com_DetectAltivec(void)
/*
=================
+Com_DetectSSE
+Find out whether we have SSE support for Q_ftol function
+=================
+*/
+
+#if defined(id386) || defined(idx64)
+
+static void Com_DetectSSE(void)
+{
+#ifndef idx64
+ cpuFeatures_t feat;
+
+ feat = Sys_GetProcessorFeatures();
+
+ if(feat & CF_SSE)
+ {
+ if(feat & CF_SSE2)
+ Q_SnapVector = qsnapvectorsse;
+ else
+ Q_SnapVector = qsnapvectorx87;
+
+ Q_ftol = qftolsse;
+#endif
+ Q_VMftol = qvmftolsse;
+
+ Com_Printf("Have SSE support\n");
+#ifndef idx64
+ }
+ else
+ {
+ Q_ftol = qftolx87;
+ Q_VMftol = qvmftolx87;
+ Q_SnapVector = qsnapvectorx87;
+
+ Com_Printf("No SSE support on this machine\n");
+ }
+#endif
+}
+
+#else
+
+#define Com_DetectSSE()
+
+#endif
+
+/*
+=================
Com_InitRand
Seed the random number generator, if possible with an OS supplied random seed.
=================
@@ -2492,6 +2547,8 @@ void Com_Init( char *commandLine ) {
// Swap_Init ();
Cbuf_Init ();
+ Com_DetectSSE();
+
// override anything from the config files with command line args
Com_StartupVariable( NULL );
diff --git a/src/qcommon/q_platform.h b/src/qcommon/q_platform.h
index 79b1c0c8..ddc8039d 100644
--- a/src/qcommon/q_platform.h
+++ b/src/qcommon/q_platform.h
@@ -88,7 +88,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#define OS_STRING "win_mingw64"
#endif
-#define ID_INLINE inline
+#define ID_INLINE __inline
#define PATH_SEP '\\'
#if defined( __WIN64__ )
diff --git a/src/qcommon/q_shared.h b/src/qcommon/q_shared.h
index 2002bcc9..389412b0 100644
--- a/src/qcommon/q_shared.h
+++ b/src/qcommon/q_shared.h
@@ -419,6 +419,58 @@ extern vec3_t axisDefault[3];
#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask)
+int Q_isnan(float x);
+
+#ifdef idx64
+ extern long qftolsse(float f);
+ extern void qvmftolsse(void);
+ extern void qsnapvectorsse(vec3_t vec);
+
+ #define Q_ftol qftolsse
+ #define Q_SnapVector qsnapvectorsse
+
+ extern void (*Q_VMftol)(void);
+#elif defined(id386)
+ extern long QDECL qftolx87(float f);
+ extern long QDECL qftolsse(float f);
+ extern void QDECL qvmftolx87(void);
+ extern void QDECL qvmftolsse(void);
+ extern void QDECL qsnapvectorx87(vec3_t vec);
+ extern void QDECL qsnapvectorsse(vec3_t vec);
+
+ extern long (QDECL *Q_ftol)(float f);
+ extern void (QDECL *Q_VMftol)(void);
+ extern void (QDECL *Q_SnapVector)(vec3_t vec);
+#else
+ #define Q_ftol(f) lrintf((f))
+ #define Q_SnapVector(vec)\
+ do\
+ {\
+ vec3_t *temp = (vec);\
+ \
+ (*temp)[0] = round((*temp)[0]);\
+ (*temp)[1] = round((*temp)[1]);\
+ (*temp)[2] = round((*temp)[2]);\
+ } while(0)
+#endif
+/*
+// if your system does not have lrintf() and round() you can try this block. Please also open a bug report at bugzilla.icculus.org
+// or write a mail to the ioq3 mailing list.
+#else
+ #define Q_ftol(f) ((long) (f))
+ #define Q_round(f) do { if((f) < 0) (f) -= 0.5f; else (f) += 0.5f; (f) = Q_ftol((f)); } while(0)
+ #define Q_SnapVector(vec) \
+ do\
+ {\
+ vec3_t *temp = (vec);\
+ \
+ Q_round((*temp)[0]);\
+ Q_round((*temp)[1]);\
+ Q_round((*temp)[2]);\
+ } while(0)
+#endif
+*/
+
#if idppc
static ID_INLINE float Q_rsqrt( float number ) {
@@ -667,7 +719,6 @@ void MatrixMultiply(float in1[3][3], float in2[3][3], float out[3][3]);
void VectorMatrixMultiply( const vec3_t p, vec3_t m[ 3 ], vec3_t out );
void AngleVectors( const vec3_t angles, vec3_t forward, vec3_t right, vec3_t up);
void PerpendicularVector( vec3_t dst, const vec3_t src );
-int Q_isnan( float x );
void GetPerpendicularViewVector( const vec3_t point, const vec3_t p1,
const vec3_t p2, vec3_t up );
diff --git a/src/qcommon/vm_x86.c b/src/qcommon/vm_x86.c
index e609bc1e..72225473 100644
--- a/src/qcommon/vm_x86.c
+++ b/src/qcommon/vm_x86.c
@@ -68,29 +68,6 @@ static int pc = 0;
#define FTOL_PTR
-#ifdef _MSC_VER
-
-#if defined( FTOL_PTR )
-int _ftol( float );
-static void *ftolPtr = _ftol;
-#endif
-
-#else // _MSC_VER
-
-#if defined( FTOL_PTR )
-
-int qftol( void );
-int qftol027F( void );
-int qftol037F( void );
-int qftol0E7F( void );
-int qftol0F7F( void );
-
-
-static void *ftolPtr = qftol0F7F;
-#endif // FTOL_PTR
-
-#endif
-
static int instruction, pass;
static int lastConst = 0;
static int oc0, oc1, pop0, pop1;
@@ -112,15 +89,17 @@ typedef enum
static ELastCommand LastCommand;
-static inline int iss8(int32_t v)
+static int iss8(int32_t v)
{
return (SCHAR_MIN <= v && v <= SCHAR_MAX);
}
-static inline int isu8(uint32_t v)
+#if 0
+static int isu8(uint32_t v)
{
return (v <= UCHAR_MAX);
}
+#endif
static int NextConstant4(void)
{
@@ -437,30 +416,37 @@ Uses asm to retrieve arguments from registers to work around different calling c
=================
*/
+#if defined(_MSC_VER) && defined(idx64)
+
+extern void qsyscall64(void);
+extern uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
+
+// Microsoft does not support inline assembler on x64 platforms. Meh.
+void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg)
+{
+#else
static void DoSyscall(void)
{
- vm_t *savedVM;
-
int syscallNum;
int programStack;
int *opStackBase;
- int opStackOfs;
+ uint8_t opStackOfs;
intptr_t arg;
+#endif
+
+ vm_t *savedVM;
#ifdef _MSC_VER
+ #ifndef idx64
__asm
{
mov dword ptr syscallNum, eax
mov dword ptr programStack, esi
- mov dword ptr opStackOfs, ebx
-#ifdef idx64
- mov qword ptr opStackBase, rdi
- mov qword ptr arg, rcx
-#else
+ mov byte ptr opStackOfs, bl
mov dword ptr opStackBase, edi
mov dword ptr arg, ecx
-#endif
}
+ #endif
#else
__asm__ volatile(
""
@@ -540,8 +526,13 @@ Call to DoSyscall()
int EmitCallDoSyscall(vm_t *vm)
{
// use edx register to store DoSyscall address
+#if defined(_MSC_VER) && defined(idx64)
+ EmitRexString(0x48, "BA"); // mov edx, qsyscall64
+ EmitPtr(qsyscall64);
+#else
EmitRexString(0x48, "BA"); // mov edx, DoSyscall
EmitPtr(DoSyscall);
+#endif
// Push important registers to stack as we can't really make
// any assumptions about calling conventions.
@@ -1630,9 +1621,8 @@ void VM_Compile(vm_t *vm, vmHeader_t *header)
EmitString("DB 1C 9F"); // fistp dword ptr [edi + ebx * 4]
#else // FTOL_PTR
// call the library conversion function
- EmitString("D9 04 9F"); // fld dword ptr [edi + ebx * 4]
- EmitRexString(0x48, "BA"); // mov edx, ftolPtr
- EmitPtr(ftolPtr);
+ EmitRexString(0x48, "BA"); // mov edx, Q_VMftol
+ EmitPtr(Q_VMftol);
EmitRexString(0x48, "FF D2"); // call edx
EmitCommand(LAST_COMMAND_MOV_STACK_EAX); // mov dword ptr [edi + ebx * 4], eax
#endif
@@ -1747,12 +1737,12 @@ This function is called directly by the generated code
int VM_CallCompiled(vm_t *vm, int *args)
{
- int stack[OPSTACK_SIZE + 7];
+ byte stack[OPSTACK_SIZE * 4 + 15];
void *entryPoint;
int programCounter;
int programStack, stackOnEntry;
byte *image;
- int *opStack, *opStackOnEntry;
+ int *opStack;
int opStackOfs;
currentVM = vm;
@@ -1785,35 +1775,16 @@ int VM_CallCompiled(vm_t *vm, int *args)
// off we go into generated code...
entryPoint = vm->codeBase + vm->entryOfs;
- opStack = opStackOnEntry = PADP(stack, 8);
+ opStack = PADP(stack, 16);
*opStack = 0xDEADBEEF;
opStackOfs = 0;
#ifdef _MSC_VER
+ #ifdef idx64
+ opStackOfs = qvmcall64(&programStack, opStack, vm->instructionPointers, vm->dataBase);
+ #else
__asm
{
-#ifdef idx64
- // non-volatile registers according to x64 calling convention
- push rsi
- push rdi
- push rbx
-
- mov esi, dword ptr programStack
- mov rdi, qword ptr opStack
- mov ebx, dword ptr opStackOfs
- mov r8, qword ptr vm->instructionPointers
- mov r9, qword ptr vm->dataBase
-
- call entryPoint
-
- mov dword ptr opStackOfs, ebx
- mov qword ptr opStack, rdi
- mov dword ptr programStack, esi
-
- pop rbx
- pop rdi
- pop rsi
-#else
pushad
mov esi, dword ptr programStack
@@ -1827,8 +1798,8 @@ int VM_CallCompiled(vm_t *vm, int *args)
mov dword ptr programStack, esi
popad
-#endif
}
+ #endif
#elif defined(idx64)
__asm__ volatile(
"movq %5, %%rax\r\n"
@@ -1856,7 +1827,7 @@ int VM_CallCompiled(vm_t *vm, int *args)
);
#endif
- if(opStack != opStackOnEntry || opStackOfs != 1 || *opStack != 0xDEADBEEF)
+ if(opStackOfs != 1 || *opStack != 0xDEADBEEF)
{
Com_Error(ERR_DROP, "opStack corrupted in compiled code");
}
diff --git a/src/renderer/tr_light.c b/src/renderer/tr_light.c
index 18dea318..05aca8b8 100644
--- a/src/renderer/tr_light.c
+++ b/src/renderer/tr_light.c
@@ -360,9 +360,9 @@ void R_SetupEntityLighting( const trRefdef_t *refdef, trRefEntity_t *ent ) {
}
// save out the byte packet version
- ((byte *)&ent->ambientLightInt)[0] = myftol( ent->ambientLight[0] );
- ((byte *)&ent->ambientLightInt)[1] = myftol( ent->ambientLight[1] );
- ((byte *)&ent->ambientLightInt)[2] = myftol( ent->ambientLight[2] );
+ ((byte *)&ent->ambientLightInt)[0] = Q_ftol(ent->ambientLight[0]);
+ ((byte *)&ent->ambientLightInt)[1] = Q_ftol(ent->ambientLight[1]);
+ ((byte *)&ent->ambientLightInt)[2] = Q_ftol(ent->ambientLight[2]);
((byte *)&ent->ambientLightInt)[3] = 0xff;
// transform the direction to local space
diff --git a/src/renderer/tr_local.h b/src/renderer/tr_local.h
index ef49e217..1dea4bb3 100644
--- a/src/renderer/tr_local.h
+++ b/src/renderer/tr_local.h
@@ -35,14 +35,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#define GL_INDEX_TYPE GL_UNSIGNED_INT
typedef unsigned int glIndex_t;
-// fast float to int conversion
-#if id386 && !defined(__GNUC__)
-long myftol( float f );
-#else
-#define myftol(x) ((int)(x))
-#endif
-
-
// everything that is needed by the backend needs
// to be double buffered to allow it to run in
// parallel on a dual cpu machine
diff --git a/src/renderer/tr_mesh.c b/src/renderer/tr_mesh.c
index ddcc62f7..446ee836 100644
--- a/src/renderer/tr_mesh.c
+++ b/src/renderer/tr_mesh.c
@@ -219,7 +219,7 @@ int R_ComputeLOD( trRefEntity_t *ent ) {
}
flod *= tr.currentModel->numLods;
- lod = myftol( flod );
+ lod = Q_ftol(flod);
if ( lod < 0 )
{
diff --git a/src/renderer/tr_shade.c b/src/renderer/tr_shade.c
index b40a06c5..f12519b4 100644
--- a/src/renderer/tr_shade.c
+++ b/src/renderer/tr_shade.c
@@ -234,7 +234,7 @@ static void R_BindAnimatedImage( textureBundle_t *bundle ) {
// it is necessary to do this messy calc to make sure animations line up
// exactly with waveforms of the same frequency
- index = myftol( tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE );
+ index = Q_ftol(tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE);
index >>= FUNCTABLE_SIZE2;
if ( index < 0 ) {
@@ -690,9 +690,9 @@ static void ProjectDlightTexture_scalar( void ) {
}
}
clipBits[i] = clip;
- colors[0] = myftol(floatColor[0] * modulate);
- colors[1] = myftol(floatColor[1] * modulate);
- colors[2] = myftol(floatColor[2] * modulate);
+ colors[0] = Q_ftol(floatColor[0] * modulate);
+ colors[1] = Q_ftol(floatColor[1] * modulate);
+ colors[2] = Q_ftol(floatColor[2] * modulate);
colors[3] = 255;
}
diff --git a/src/renderer/tr_shade_calc.c b/src/renderer/tr_shade_calc.c
index 13fcf0b5..8d02177b 100644
--- a/src/renderer/tr_shade_calc.c
+++ b/src/renderer/tr_shade_calc.c
@@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#endif
-#define WAVEVALUE( table, base, amplitude, phase, freq ) ((base) + table[ myftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude))
+#define WAVEVALUE( table, base, amplitude, phase, freq ) ((base) + table[ Q_ftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude))
static float *TableForFunc( genFunc_t func )
{
@@ -700,7 +700,7 @@ void RB_CalcWaveColor( const waveForm_t *wf, unsigned char *dstColors )
glow = 1;
}
- v = myftol( 255 * glow );
+ v = Q_ftol(255 * glow);
color[0] = color[1] = color[2] = v;
color[3] = 255;
v = *(int *)color;
@@ -1019,21 +1019,6 @@ void RB_CalcRotateTexCoords( float degsPerSecond, float *st )
}
-
-
-
-
-#if id386 && !defined(__GNUC__)
-
-long myftol( float f ) {
- static int tmp;
- __asm fld f
- __asm fistp tmp
- __asm mov eax, tmp
-}
-
-#endif
-
/*
** RB_CalcSpecularAlpha
**
@@ -1196,19 +1181,19 @@ static void RB_CalcDiffuseColor_scalar( unsigned char *colors )
*(int *)&colors[i*4] = ambientLightInt;
continue;
}
- j = myftol( ambientLight[0] + incoming * directedLight[0] );
+ j = Q_ftol(ambientLight[0] + incoming * directedLight[0]);
if ( j > 255 ) {
j = 255;
}
colors[i*4+0] = j;
- j = myftol( ambientLight[1] + incoming * directedLight[1] );
+ j = Q_ftol(ambientLight[1] + incoming * directedLight[1]);
if ( j > 255 ) {
j = 255;
}
colors[i*4+1] = j;
- j = myftol( ambientLight[2] + incoming * directedLight[2] );
+ j = Q_ftol(ambientLight[2] + incoming * directedLight[2]);
if ( j > 255 ) {
j = 255;
}
diff --git a/src/renderer/tr_sky.c b/src/renderer/tr_sky.c
index 6ab8aa6e..5c7788c0 100644
--- a/src/renderer/tr_sky.c
+++ b/src/renderer/tr_sky.c
@@ -554,10 +554,10 @@ static void FillCloudBox( const shader_t *shader, int stage )
continue;
}
- sky_mins_subd[0] = myftol( sky_mins[0][i] * HALF_SKY_SUBDIVISIONS );
- sky_mins_subd[1] = myftol( sky_mins[1][i] * HALF_SKY_SUBDIVISIONS );
- sky_maxs_subd[0] = myftol( sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS );
- sky_maxs_subd[1] = myftol( sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS );
+ sky_mins_subd[0] = Q_ftol(sky_mins[0][i] * HALF_SKY_SUBDIVISIONS);
+ sky_mins_subd[1] = Q_ftol(sky_mins[1][i] * HALF_SKY_SUBDIVISIONS);
+ sky_maxs_subd[0] = Q_ftol(sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS);
+ sky_maxs_subd[1] = Q_ftol(sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS);
if ( sky_mins_subd[0] < -HALF_SKY_SUBDIVISIONS )
sky_mins_subd[0] = -HALF_SKY_SUBDIVISIONS;
diff --git a/src/server/sv_game.c b/src/server/sv_game.c
index c81c62ea..4948a9c8 100644
--- a/src/server/sv_game.c
+++ b/src/server/sv_game.c
@@ -415,7 +415,7 @@ intptr_t SV_GameSystemCalls( intptr_t *args ) {
case G_REAL_TIME:
return Com_RealTime( VMA(1) );
case G_SNAPVECTOR:
- Sys_SnapVector( VMA(1) );
+ Q_SnapVector( VMA(1) );
return 0;
case G_SEND_GAMESTAT:
diff --git a/src/sys/sys_main.c b/src/sys/sys_main.c
index 46a795eb..07e8e395 100644
--- a/src/sys/sys_main.c
+++ b/src/sys/sys_main.c
@@ -417,8 +417,8 @@ Used to load a development dll instead of a virtual machine
#2 look in fs_basepath
=================
*/
-void *Sys_LoadDll( const char *name,
- intptr_t (**entryPoint)(int, ...),
+void * QDECL Sys_LoadDll( const char *name,
+ intptr_t (QDECL **entryPoint)(int, ...),
intptr_t (*systemcalls)(intptr_t, ...) )
{
void *libHandle;
diff --git a/src/sys/sys_unix.c b/src/sys/sys_unix.c
index 4aad8b88..72ca8360 100644
--- a/src/sys/sys_unix.c
+++ b/src/sys/sys_unix.c
@@ -37,6 +37,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include <pwd.h>
#include <libgen.h>
#include <fcntl.h>
+#include <fenv.h>
qboolean stdinIsATTY;
@@ -118,31 +119,6 @@ int Sys_Milliseconds (void)
return curtime;
}
-#if !id386
-/*
-==================
-fastftol
-==================
-*/
-long fastftol( float f )
-{
- return (long)f;
-}
-
-/*
-==================
-Sys_SnapVector
-==================
-*/
-void Sys_SnapVector( float *v )
-{
- v[0] = rint(v[0]);
- v[1] = rint(v[1]);
- v[2] = rint(v[2]);
-}
-#endif
-
-
/*
==================
Sys_RandomBytes
@@ -749,6 +725,12 @@ void Sys_GLimpInit( void )
// NOP
}
+void Sys_SetFloatEnv(void)
+{
+ // rounding towards 0
+ fesetround(FE_TOWARDZERO);
+}
+
/*
==============
Sys_PlatformInit
diff --git a/src/sys/sys_win32.c b/src/sys/sys_win32.c
index f91b26b1..4fddfdc0 100644
--- a/src/sys/sys_win32.c
+++ b/src/sys/sys_win32.c
@@ -39,6 +39,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include <wincrypt.h>
#include <shlobj.h>
#include <psapi.h>
+#include <float.h>
// Used to determine where to store user-specific files
static char homePath[ MAX_OSPATH ] = { 0 };
@@ -47,14 +48,38 @@ static char homePath[ MAX_OSPATH ] = { 0 };
static UINT timerResolution = 0;
#endif
-#ifdef __WIN64__
-void Sys_SnapVector( float *v )
+/*
+================
+Sys_SetFPUCW
+Set FPU control word to default value
+================
+*/
+
+#ifndef _RC_CHOP
+// mingw doesn't seem to have these defined :(
+
+ #define _MCW_EM 0x0008001fU
+ #define _MCW_RC 0x00000300U
+ #define _MCW_PC 0x00030000U
+ #define _RC_CHOP 0x00000300U
+ #define _PC_53 0x00010000U
+
+ unsigned int _controlfp(unsigned int new, unsigned int mask);
+#endif
+
+#define FPUCWMASK1 (_MCW_RC | _MCW_EM)
+#define FPUCW (_RC_CHOP | _MCW_EM | _PC_53)
+
+#ifdef idx64
+#define FPUCWMASK (FPUCWMASK1)
+#else
+#define FPUCWMASK (FPUCWMASK1 | _MCW_PC)
+#endif
+
+void Sys_SetFloatEnv(void)
{
- v[0] = rint(v[0]);
- v[1] = rint(v[1]);
- v[2] = rint(v[2]);
+ _controlfp(FPUCW, FPUCWMASK);
}
-#endif
/*
================
@@ -136,34 +161,6 @@ int Sys_Milliseconds (void)
return sys_curtime;
}
-#ifndef __GNUC__ //see snapvectora.s
-/*
-================
-Sys_SnapVector
-================
-*/
-void Sys_SnapVector( float *v )
-{
- int i;
- float f;
-
- f = *v;
- __asm fld f;
- __asm fistp i;
- *v = i;
- v++;
- f = *v;
- __asm fld f;
- __asm fistp i;
- *v = i;
- v++;
- f = *v;
- __asm fld f;
- __asm fistp i;
- *v = i;
-}
-#endif
-
/*
================
Sys_RandomBytes
@@ -715,9 +712,12 @@ void Sys_PlatformInit( void )
{
#ifndef DEDICATED
TIMECAPS ptc;
-
const char *SDL_VIDEODRIVER = getenv( "SDL_VIDEODRIVER" );
+#endif
+
+ Sys_SetFloatEnv();
+#ifndef DEDICATED
if( SDL_VIDEODRIVER )
{
Com_Printf( "SDL_VIDEODRIVER is externally set to \"%s\", "