diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/asm/ftola.asm | 90 | ||||
| -rw-r--r-- | src/asm/ftola.s | 157 | ||||
| -rw-r--r-- | src/asm/snapvector.asm | 107 | ||||
| -rw-r--r-- | src/asm/snapvectora.s | 103 | ||||
| -rw-r--r-- | src/asm/vm_x86_64.asm | 76 | ||||
| -rw-r--r-- | src/client/cl_cgame.c | 2 | ||||
| -rw-r--r-- | src/client/snd_wavelet.c | 2 | ||||
| -rw-r--r-- | src/qcommon/common.c | 57 | ||||
| -rw-r--r-- | src/qcommon/q_platform.h | 2 | ||||
| -rw-r--r-- | src/qcommon/q_shared.h | 53 | ||||
| -rw-r--r-- | src/qcommon/vm_x86.c | 99 | ||||
| -rw-r--r-- | src/renderer/tr_light.c | 6 | ||||
| -rw-r--r-- | src/renderer/tr_local.h | 8 | ||||
| -rw-r--r-- | src/renderer/tr_mesh.c | 2 | ||||
| -rw-r--r-- | src/renderer/tr_shade.c | 8 | ||||
| -rw-r--r-- | src/renderer/tr_shade_calc.c | 25 | ||||
| -rw-r--r-- | src/renderer/tr_sky.c | 8 | ||||
| -rw-r--r-- | src/server/sv_game.c | 2 | ||||
| -rw-r--r-- | src/sys/sys_main.c | 4 | ||||
| -rw-r--r-- | src/sys/sys_unix.c | 32 | ||||
| -rw-r--r-- | src/sys/sys_win32.c | 70 | 
21 files changed, 481 insertions, 432 deletions
diff --git a/src/asm/ftola.asm b/src/asm/ftola.asm new file mode 100644 index 00000000..370c12d8 --- /dev/null +++ b/src/asm/ftola.asm @@ -0,0 +1,90 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +;  +; This file is part of Quake III Arena source code. +;  +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +;  +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +; GNU General Public License for more details. +;  +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA +; =========================================================================== + +; MASM ftol conversion functions using SSE or FPU +; assume __cdecl calling convention is being used for x86, __fastcall for x64 + +IFNDEF idx64 +.model flat, c +ENDIF + +; .data + +; ifndef idx64 +;   fpucw WORD 0F7Fh +; endif + +.code + +IFDEF idx64 +; qftol using SSE + +  qftolsse PROC +    cvttss2si eax, xmm0 +	ret +  qftolsse ENDP + +  qvmftolsse PROC +    movss xmm0, dword ptr [rdi + rbx * 4] +	cvttss2si eax, xmm0 +	ret +  qvmftolsse ENDP + +ELSE +; qftol using FPU + +  qftolx87m macro src +;    not necessary, fpucw is set with _controlfp at startup +;    sub esp, 2 +;    fnstcw word ptr [esp] +;    fldcw fpucw +    fld dword ptr src +	fistp dword ptr src +;	fldcw [esp] +	mov eax, src +;	add esp, 2 +	ret +  endm +   +  qftolx87 PROC +; need this line when storing FPU control word on stack +;    qftolx87m [esp + 6] +    qftolx87m [esp + 4] +  qftolx87 ENDP + +  qvmftolx87 PROC +    qftolx87m [edi + ebx * 4] +  qvmftolx87 ENDP + +; qftol using SSE +  qftolsse PROC +    movss xmm0, dword ptr [esp + 4] +    cvttss2si eax, xmm0 +	ret +  qftolsse ENDP + +  qvmftolsse PROC +    movss xmm0, dword ptr [edi + ebx * 4] +	cvttss2si eax, xmm0 +	ret +  qvmftolsse ENDP +ENDIF + +end diff --git a/src/asm/ftola.s b/src/asm/ftola.s deleted file mode 100644 index ca3d8626..00000000 --- a/src/asm/ftola.s +++ /dev/null @@ -1,157 +0,0 @@ -/* -=========================================================================== -Copyright (C) 1999-2005 Id Software, Inc. - -This file is part of Quake III Arena source code. - -Quake III Arena source code is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Quake III Arena source code is distributed in the hope that it will be -useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Quake III Arena source code; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA -=========================================================================== -*/ - -// -// qftol -- fast floating point to long conversion. -// - -// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus -// <tim@ngus.net> - -#include "qasm.h" - -.data - -temp:   .single   0.0 -fpucw:  .long     0 - -// Precision Control Field , 2 bits / 0x0300 -// PC24 0x0000   Single precision (24 bits). -// PC53 0x0200   Double precision (53 bits). -// PC64 0x0300   Extended precision (64 bits). - -// Rounding Control Field, 2 bits / 0x0C00 -// RCN  0x0000   Rounding to nearest (even). -// RCD  0x0400   Rounding down (directed, minus). -// RCU  0x0800   Rounding up (directed plus). -// RC0  0x0C00   Rounding towards zero (chop mode). - - -// rounding towards nearest (even) -cw027F: .long     0x027F -cw037F: .long     0x037F - -// rounding towards zero (chop mode) -cw0E7F: .long     0x0E7F -cw0F7F: .long     0x0F7F - - -.text - -// -// int qftol( void ) - default control word -// - -.globl C(qftol) - -C(qftol): -        fistpl temp -        movl temp,%eax -        ret - - -// -// int qftol027F( void ) - DirectX FPU -// - -.globl C(qftol027F) - -C(qftol027F): -        fnstcw fpucw -        fldcw  cw027F -        fistpl temp -        fldcw  fpucw -        movl temp,%eax -        ret - -// -// int qftol037F( void ) - Linux FPU -// - -.globl C(qftol037F) - -C(qftol037F): -        fnstcw fpucw -        fldcw  cw037F -        fistpl temp -        fldcw  fpucw -        movl temp,%eax -        ret - - -// -// int qftol0F7F( void ) - ANSI -// - -.globl C(qftol0F7F) - -C(qftol0F7F): -        fnstcw fpucw -        fldcw  cw0F7F -        fistpl temp -        fldcw  fpucw -        movl temp,%eax -        ret - -// -// int qftol0E7F( void ) -// - -.globl C(qftol0E7F) - -C(qftol0E7F): -        fnstcw fpucw -        fldcw  cw0E7F -        fistpl temp -        fldcw  fpucw -        movl temp,%eax -        ret - - - -// -// long Q_ftol( float q ) -// - -.globl C(Q_ftol) - -C(Q_ftol): -        flds 4(%esp) -        fistpl temp -        movl temp,%eax -        ret - - -// -// long qftol0F7F( float q ) - Linux FPU -// - -.globl C(Q_ftol0F7F) - -C(Q_ftol0F7F): -        fnstcw fpucw -        flds 4(%esp) -        fldcw  cw0F7F -        fistpl temp -        fldcw  fpucw -        movl temp,%eax -        ret diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm new file mode 100644 index 00000000..87c77372 --- /dev/null +++ b/src/asm/snapvector.asm @@ -0,0 +1,107 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +;  +; This file is part of Quake III Arena source code. +;  +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +;  +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +; GNU General Public License for more details. +;  +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA +; =========================================================================== + +; MASM version of snapvector conversion function using SSE or FPU +; assume __cdecl calling convention is being used for x86, __fastcall for x64 +; +; function prototype: +; void qsnapvector(vec3_t vec) + +IFNDEF idx64 +.model flat, c +ENDIF + +.data + +  ALIGN 16 +  ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h +  ssecw DWORD 00001F80h + +IFNDEF idx64 +  fpucw WORD 037Fh +ENDIF + +.code + +IFDEF idx64 +; qsnapvector using SSE + +  qsnapvectorsse PROC +    sub rsp, 4 +	stmxcsr [rsp]				; save SSE control word +	ldmxcsr ssecw				; set to round nearest + +    push rdi +	mov rdi, rcx				; maskmovdqu uses rdi as implicit memory operand +	movaps xmm1, ssemask		; initialize the mask register for maskmovdqu +    movups xmm0, [rdi]			; here is stored our vector. Read 4 values in one go +	cvtps2dq xmm0, xmm0			; convert 4 single fp to int +	cvtdq2ps xmm0, xmm0			; convert 4 int to single fp +	maskmovdqu xmm0, xmm1		; write 3 values back to memory +	pop rdi + +	ldmxcsr [rsp]				; restore sse control word to old value +	add rsp, 4 +	ret +  qsnapvectorsse ENDP + +ELSE + +  qsnapvectorsse PROC +	sub esp, 4 +	stmxcsr [esp]				; save SSE control word +	ldmxcsr ssecw				; set to round nearest + +    push edi +	mov edi, dword ptr 12[esp]	; maskmovdqu uses edi as implicit memory operand +	movaps xmm1, ssemask		; initialize the mask register for maskmovdqu +    movups xmm0, [edi]			; here is stored our vector. Read 4 values in one go +	cvtps2dq xmm0, xmm0			; convert 4 single fp to int +	cvtdq2ps xmm0, xmm0			; convert 4 int to single fp +	maskmovdqu xmm0, xmm1		; write 3 values back to memory +	pop edi + +	ldmxcsr [esp]				; restore sse control word to old value +	add esp, 4 +	ret +  qsnapvectorsse ENDP + +  qroundx87 macro src +	fld dword ptr src +	fistp dword ptr src +	fild dword ptr src +	fstp dword ptr src +  endm     + +  qsnapvectorx87 PROC +	mov eax, dword ptr 4[esp] +	sub esp, 2 +	fnstcw word ptr [esp] +	fldcw fpucw +	qroundx87 [eax] +	qroundx87 4[eax] +	qroundx87 8[eax] +	fldcw [esp] +	add esp, 2 +  qsnapvectorx87 ENDP + +ENDIF + +end diff --git a/src/asm/snapvectora.s b/src/asm/snapvectora.s deleted file mode 100644 index bc10c757..00000000 --- a/src/asm/snapvectora.s +++ /dev/null @@ -1,103 +0,0 @@ -/* -=========================================================================== -Copyright (C) 1999-2005 Id Software, Inc. - -This file is part of Quake III Arena source code. - -Quake III Arena source code is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Quake III Arena source code is distributed in the hope that it will be -useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Quake III Arena source code; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA -=========================================================================== -*/ - -// -// Sys_SnapVector NASM code (Andrew Henderson) -// See win32/win_shared.c for the Win32 equivalent -// This code is provided to ensure that the -//  rounding behavior (and, if necessary, the -//  precision) of DLL and QVM code are identical -//  e.g. for network-visible operations. -// See ftol.nasm for operations on a single float, -//  as used in compiled VM and DLL code that does -//  not use this system trap. -// - -// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus -// <tim@ngus.net> - -#include "qasm.h" - -#if id386 -.data - -fpucw:  .long   0 -cw037F: .long   0x037F - -.text - -// void Sys_SnapVector( float *v ) -.globl C(Sys_SnapVector) -C(Sys_SnapVector): -        pushl   %eax -        pushl   %ebp -        movl    %esp,%ebp - -        fnstcw  fpucw -        movl    12(%ebp),%eax -        fldcw   cw037F -        flds    (%eax) -        fistpl  (%eax) -        fildl   (%eax) -        fstps   (%eax) -        flds    4(%eax) -        fistpl  4(%eax) -        fildl   4(%eax) -        fstps   4(%eax) -        flds    8(%eax) -        fistpl  8(%eax) -        fildl   8(%eax) -        fstps   8(%eax) -        fldcw   fpucw - -        popl %ebp -        popl %eax -        ret - -// void Sys_SnapVectorCW( float *v, unsigned short int cw ) -.globl C(Sys_SnapVectorCW) -C(Sys_SnapVectorCW): -        pushl   %eax -        pushl   %ebp -        movl    %esp,%ebp - -        fnstcw  fpucw -        movl    12(%ebp),%eax -        fldcw   16(%ebp) -        flds    (%eax) -        fistpl  (%eax) -        fildl   (%eax) -        fstps   (%eax) -        flds    4(%eax) -        fistpl  4(%eax) -        fildl   4(%eax) -        fstps   4(%eax) -        flds    8(%eax) -        fistpl  8(%eax) -        fildl   8(%eax) -        fstps   8(%eax) -        fldcw   fpucw - -        popl %ebp -        popl %eax -        ret -#endif diff --git a/src/asm/vm_x86_64.asm b/src/asm/vm_x86_64.asm new file mode 100644 index 00000000..030b6987 --- /dev/null +++ b/src/asm/vm_x86_64.asm @@ -0,0 +1,76 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +;  +; This file is part of Quake III Arena source code. +;  +; Quake III Arena source code is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of the License, +; or (at your option) any later version. +;  +; Quake III Arena source code is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +; GNU General Public License for more details. +;  +; You should have received a copy of the GNU General Public License +; along with Quake III Arena source code; if not, write to the Free Software +; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA +; =========================================================================== + +; Call wrapper for vm_x86 when built with MSVC in 64 bit mode, +; since MSVC does not support inline x64 assembler code anymore. +; +; assumes __fastcall calling convention + +DoSyscall PROTO + +.code + +; Call to static void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg) + +qsyscall64 PROC +  sub rsp, 28h						; after this esp will be aligned to 16 byte boundary +  mov qword ptr [rsp + 20h], rcx	; 5th parameter "arg" is passed on stack +  mov r9b, bl						; opStackOfs +  mov r8, rdi						; opStackBase +  mov edx, esi						; programStack +  mov ecx, eax						; syscallNum +  mov rax, DoSyscall				; store call address of DoSyscall in rax +  call rax +  add rsp, 28h +  ret +qsyscall64 ENDP + + +; Call to compiled code after setting up the register environment for the VM +; prototype: +; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase); + +qvmcall64 PROC +  push rsi							; push non-volatile registers to stack +  push rdi +  push rbx +  ; need to save pointer in rcx so we can write back the programData value to caller +  push rcx + +  ; registers r8 and r9 have correct value already thanx to __fastcall +  xor rbx, rbx						; opStackOfs starts out being 0 +  mov rdi, rdx						; opStack +  mov esi, dword ptr [rcx]			; programStack +   +  call qword ptr [r8]				; instructionPointers[0] is also the entry point + +  pop rcx + +  mov dword ptr [rcx], esi			; write back the programStack value +  mov al, bl						; return opStack offset + +  pop rbx +  pop rdi +  pop rsi +   +  ret +qvmcall64 ENDP + +end diff --git a/src/client/cl_cgame.c b/src/client/cl_cgame.c index c1b58f2c..c86eb248 100644 --- a/src/client/cl_cgame.c +++ b/src/client/cl_cgame.c @@ -698,7 +698,7 @@ intptr_t CL_CgameSystemCalls( intptr_t *args ) {  	case CG_REAL_TIME:  		return Com_RealTime( VMA(1) );  	case CG_SNAPVECTOR: -		Sys_SnapVector( VMA(1) ); +		Q_SnapVector(VMA(1));  		return 0;  	case CG_CIN_PLAYCINEMATIC: diff --git a/src/client/snd_wavelet.c b/src/client/snd_wavelet.c index e75323c0..8c392cdc 100644 --- a/src/client/snd_wavelet.c +++ b/src/client/snd_wavelet.c @@ -23,8 +23,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  #include "snd_local.h" -long myftol( float f ); -  #define C0 0.4829629131445341  #define C1 0.8365163037378079  #define C2 0.2241438680420134 diff --git a/src/qcommon/common.c b/src/qcommon/common.c index 069eb922..c5f49ace 100644 --- a/src/qcommon/common.c +++ b/src/qcommon/common.c @@ -87,6 +87,14 @@ cvar_t	*com_abnormalExit;  cvar_t  *com_homepath;  cvar_t	*com_busyWait; +#if defined(idx64)  +  void (*Q_VMftol)(void); +#elif defined(id386) +	long (QDECL *Q_ftol)(float f); +	void (QDECL *Q_VMftol)(void); +	void (QDECL *Q_SnapVector)(vec3_t vec); +#endif +  // com_speeds times  int		time_game;  int		time_frontend;		// renderer frontend time @@ -2444,6 +2452,53 @@ static void Com_DetectAltivec(void)  /*  ================= +Com_DetectSSE +Find out whether we have SSE support for Q_ftol function +================= +*/ + +#if defined(id386) || defined(idx64) + +static void Com_DetectSSE(void) +{ +#ifndef idx64 +	cpuFeatures_t feat; +	 +	feat = Sys_GetProcessorFeatures(); + +	if(feat & CF_SSE) +	{ +		if(feat & CF_SSE2) +			Q_SnapVector = qsnapvectorsse; +		else +			Q_SnapVector = qsnapvectorx87; + +		Q_ftol = qftolsse; +#endif +		Q_VMftol = qvmftolsse; + +		Com_Printf("Have SSE support\n"); +#ifndef idx64 +	} +	else +	{ +		Q_ftol = qftolx87; +		Q_VMftol = qvmftolx87; +		Q_SnapVector = qsnapvectorx87; + +		Com_Printf("No SSE support on this machine\n"); +	} +#endif +} + +#else + +#define Com_DetectSSE() + +#endif + +/* +=================  Com_InitRand  Seed the random number generator, if possible with an OS supplied random seed.  ================= @@ -2492,6 +2547,8 @@ void Com_Init( char *commandLine ) {  //	Swap_Init ();  	Cbuf_Init (); +	Com_DetectSSE(); +  	// override anything from the config files with command line args  	Com_StartupVariable( NULL ); diff --git a/src/qcommon/q_platform.h b/src/qcommon/q_platform.h index 79b1c0c8..ddc8039d 100644 --- a/src/qcommon/q_platform.h +++ b/src/qcommon/q_platform.h @@ -88,7 +88,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  #define OS_STRING "win_mingw64"  #endif -#define ID_INLINE inline +#define ID_INLINE __inline  #define PATH_SEP '\\'  #if defined( __WIN64__ )  diff --git a/src/qcommon/q_shared.h b/src/qcommon/q_shared.h index 2002bcc9..389412b0 100644 --- a/src/qcommon/q_shared.h +++ b/src/qcommon/q_shared.h @@ -419,6 +419,58 @@ extern	vec3_t	axisDefault[3];  #define	IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask) +int Q_isnan(float x); + +#ifdef idx64 +  extern long qftolsse(float f); +  extern void qvmftolsse(void); +  extern void qsnapvectorsse(vec3_t vec); + +  #define Q_ftol qftolsse +  #define Q_SnapVector qsnapvectorsse + +  extern void (*Q_VMftol)(void); +#elif defined(id386) +  extern long QDECL qftolx87(float f); +  extern long QDECL qftolsse(float f); +  extern void QDECL qvmftolx87(void); +  extern void QDECL qvmftolsse(void); +  extern void QDECL qsnapvectorx87(vec3_t vec); +  extern void QDECL qsnapvectorsse(vec3_t vec); + +  extern long (QDECL *Q_ftol)(float f); +  extern void (QDECL *Q_VMftol)(void); +  extern void (QDECL *Q_SnapVector)(vec3_t vec); +#else +  #define Q_ftol(f) lrintf((f)) +  #define Q_SnapVector(vec)\ +	do\ +	{\ +		vec3_t *temp = (vec);\ +		\ +		(*temp)[0] = round((*temp)[0]);\ +		(*temp)[1] = round((*temp)[1]);\ +		(*temp)[2] = round((*temp)[2]);\ +	} while(0) +#endif +/* +// if your system does not have lrintf() and round() you can try this block. Please also open a bug report at bugzilla.icculus.org +// or write a mail to the ioq3 mailing list. +#else +  #define Q_ftol(f) ((long) (f)) +  #define Q_round(f) do { if((f) < 0) (f) -= 0.5f; else (f) += 0.5f; (f) = Q_ftol((f)); } while(0) +  #define Q_SnapVector(vec) \ +	do\ +	{\ +		vec3_t *temp = (vec);\ +		\ +		Q_round((*temp)[0]);\ +		Q_round((*temp)[1]);\ +		Q_round((*temp)[2]);\ +	} while(0) +#endif +*/ +  #if idppc  static ID_INLINE float Q_rsqrt( float number ) { @@ -667,7 +719,6 @@ void MatrixMultiply(float in1[3][3], float in2[3][3], float out[3][3]);  void VectorMatrixMultiply( const vec3_t p, vec3_t m[ 3 ], vec3_t out );  void AngleVectors( const vec3_t angles, vec3_t forward, vec3_t right, vec3_t up);  void PerpendicularVector( vec3_t dst, const vec3_t src ); -int Q_isnan( float x );  void GetPerpendicularViewVector( const vec3_t point, const vec3_t p1,  		const vec3_t p2, vec3_t up ); diff --git a/src/qcommon/vm_x86.c b/src/qcommon/vm_x86.c index e609bc1e..72225473 100644 --- a/src/qcommon/vm_x86.c +++ b/src/qcommon/vm_x86.c @@ -68,29 +68,6 @@ static	int		pc = 0;  #define FTOL_PTR -#ifdef _MSC_VER - -#if defined( FTOL_PTR ) -int _ftol( float ); -static	void	*ftolPtr = _ftol; -#endif - -#else // _MSC_VER - -#if defined( FTOL_PTR ) - -int qftol( void ); -int qftol027F( void ); -int qftol037F( void ); -int qftol0E7F( void ); -int qftol0F7F( void ); - - -static	void	*ftolPtr = qftol0F7F; -#endif // FTOL_PTR - -#endif -  static	int	instruction, pass;  static	int	lastConst = 0;  static	int	oc0, oc1, pop0, pop1; @@ -112,15 +89,17 @@ typedef enum  static	ELastCommand	LastCommand; -static inline int iss8(int32_t v) +static int iss8(int32_t v)  {  	return (SCHAR_MIN <= v && v <= SCHAR_MAX);  } -static inline int isu8(uint32_t v) +#if 0 +static int isu8(uint32_t v)  {  	return (v <= UCHAR_MAX);  } +#endif  static int NextConstant4(void)  { @@ -437,30 +416,37 @@ Uses asm to retrieve arguments from registers to work around different calling c  =================  */ +#if defined(_MSC_VER) && defined(idx64) + +extern void qsyscall64(void); +extern uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase); + +// Microsoft does not support inline assembler on x64 platforms. Meh. +void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg) +{ +#else  static void DoSyscall(void)  { -	vm_t *savedVM; -  	int syscallNum;  	int programStack;  	int *opStackBase; -	int opStackOfs; +	uint8_t opStackOfs;  	intptr_t arg; +#endif + +	vm_t *savedVM;  #ifdef _MSC_VER +  #ifndef idx64  	__asm  	{  		mov	dword ptr syscallNum, eax  		mov	dword ptr programStack, esi -		mov	dword ptr opStackOfs, ebx -#ifdef idx64 -		mov	qword ptr opStackBase, rdi -		mov	qword ptr arg, rcx -#else +		mov	byte ptr opStackOfs, bl  		mov	dword ptr opStackBase, edi  		mov	dword ptr arg, ecx -#endif  	} +  #endif  #else  	__asm__ volatile(  		"" @@ -540,8 +526,13 @@ Call to DoSyscall()  int EmitCallDoSyscall(vm_t *vm)  {  	// use edx register to store DoSyscall address +#if defined(_MSC_VER) && defined(idx64) +	EmitRexString(0x48, "BA");		// mov edx, qsyscall64 +	EmitPtr(qsyscall64); +#else  	EmitRexString(0x48, "BA");		// mov edx, DoSyscall  	EmitPtr(DoSyscall); +#endif  	// Push important registers to stack as we can't really make  	// any assumptions about calling conventions. @@ -1630,9 +1621,8 @@ void VM_Compile(vm_t *vm, vmHeader_t *header)  			EmitString("DB 1C 9F");				// fistp dword ptr [edi + ebx * 4]  #else // FTOL_PTR  			// call the library conversion function -			EmitString("D9 04 9F");				// fld dword ptr [edi + ebx * 4] -			EmitRexString(0x48, "BA");			// mov edx, ftolPtr -			EmitPtr(ftolPtr); +			EmitRexString(0x48, "BA");			// mov edx, Q_VMftol +			EmitPtr(Q_VMftol);  			EmitRexString(0x48, "FF D2");			// call edx  			EmitCommand(LAST_COMMAND_MOV_STACK_EAX);	// mov dword ptr [edi + ebx * 4], eax  #endif @@ -1747,12 +1737,12 @@ This function is called directly by the generated code  int VM_CallCompiled(vm_t *vm, int *args)  { -	int		stack[OPSTACK_SIZE + 7]; +	byte	stack[OPSTACK_SIZE * 4 + 15];  	void	*entryPoint;  	int		programCounter;  	int		programStack, stackOnEntry;  	byte	*image; -	int	*opStack, *opStackOnEntry; +	int	*opStack;  	int		opStackOfs;  	currentVM = vm; @@ -1785,35 +1775,16 @@ int VM_CallCompiled(vm_t *vm, int *args)  	// off we go into generated code...  	entryPoint = vm->codeBase + vm->entryOfs; -	opStack = opStackOnEntry = PADP(stack, 8); +	opStack = PADP(stack, 16);  	*opStack = 0xDEADBEEF;  	opStackOfs = 0;  #ifdef _MSC_VER +  #ifdef idx64 +	opStackOfs = qvmcall64(&programStack, opStack, vm->instructionPointers, vm->dataBase); +  #else  	__asm  	{ -#ifdef idx64 -		// non-volatile registers according to x64 calling convention -		push	rsi -		push	rdi -		push	rbx -		 -		mov	esi, dword ptr programStack -		mov	rdi, qword ptr opStack -		mov	ebx, dword ptr opStackOfs -		mov	r8, qword ptr vm->instructionPointers -		mov	r9, qword ptr vm->dataBase - -		call	entryPoint - -		mov	dword ptr opStackOfs, ebx -		mov	qword ptr opStack, rdi -		mov	dword ptr programStack, esi -		 -		pop	rbx -		pop	rdi -		pop	rsi -#else  		pushad  		mov	esi, dword ptr programStack @@ -1827,8 +1798,8 @@ int VM_CallCompiled(vm_t *vm, int *args)  		mov	dword ptr programStack, esi  		popad -#endif		  	} +  #endif		  #elif defined(idx64)  	__asm__ volatile(  		"movq %5, %%rax\r\n" @@ -1856,7 +1827,7 @@ int VM_CallCompiled(vm_t *vm, int *args)  	);  #endif -	if(opStack != opStackOnEntry || opStackOfs != 1 || *opStack != 0xDEADBEEF) +	if(opStackOfs != 1 || *opStack != 0xDEADBEEF)  	{  		Com_Error(ERR_DROP, "opStack corrupted in compiled code");  	} diff --git a/src/renderer/tr_light.c b/src/renderer/tr_light.c index 18dea318..05aca8b8 100644 --- a/src/renderer/tr_light.c +++ b/src/renderer/tr_light.c @@ -360,9 +360,9 @@ void R_SetupEntityLighting( const trRefdef_t *refdef, trRefEntity_t *ent ) {  	}  	// save out the byte packet version -	((byte *)&ent->ambientLightInt)[0] = myftol( ent->ambientLight[0] ); -	((byte *)&ent->ambientLightInt)[1] = myftol( ent->ambientLight[1] ); -	((byte *)&ent->ambientLightInt)[2] = myftol( ent->ambientLight[2] ); +	((byte *)&ent->ambientLightInt)[0] = Q_ftol(ent->ambientLight[0]); +	((byte *)&ent->ambientLightInt)[1] = Q_ftol(ent->ambientLight[1]); +	((byte *)&ent->ambientLightInt)[2] = Q_ftol(ent->ambientLight[2]);  	((byte *)&ent->ambientLightInt)[3] = 0xff;  	// transform the direction to local space diff --git a/src/renderer/tr_local.h b/src/renderer/tr_local.h index ef49e217..1dea4bb3 100644 --- a/src/renderer/tr_local.h +++ b/src/renderer/tr_local.h @@ -35,14 +35,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  #define GL_INDEX_TYPE		GL_UNSIGNED_INT  typedef unsigned int glIndex_t; -// fast float to int conversion -#if id386 && !defined(__GNUC__) -long myftol( float f ); -#else -#define	myftol(x) ((int)(x)) -#endif - -  // everything that is needed by the backend needs  // to be double buffered to allow it to run in  // parallel on a dual cpu machine diff --git a/src/renderer/tr_mesh.c b/src/renderer/tr_mesh.c index ddcc62f7..446ee836 100644 --- a/src/renderer/tr_mesh.c +++ b/src/renderer/tr_mesh.c @@ -219,7 +219,7 @@ int R_ComputeLOD( trRefEntity_t *ent ) {  		}  		flod *= tr.currentModel->numLods; -		lod = myftol( flod ); +		lod = Q_ftol(flod);  		if ( lod < 0 )  		{ diff --git a/src/renderer/tr_shade.c b/src/renderer/tr_shade.c index b40a06c5..f12519b4 100644 --- a/src/renderer/tr_shade.c +++ b/src/renderer/tr_shade.c @@ -234,7 +234,7 @@ static void R_BindAnimatedImage( textureBundle_t *bundle ) {  	// it is necessary to do this messy calc to make sure animations line up  	// exactly with waveforms of the same frequency -	index = myftol( tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE ); +	index = Q_ftol(tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE);  	index >>= FUNCTABLE_SIZE2;  	if ( index < 0 ) { @@ -690,9 +690,9 @@ static void ProjectDlightTexture_scalar( void ) {  				}  			}  			clipBits[i] = clip; -			colors[0] = myftol(floatColor[0] * modulate); -			colors[1] = myftol(floatColor[1] * modulate); -			colors[2] = myftol(floatColor[2] * modulate); +			colors[0] = Q_ftol(floatColor[0] * modulate); +			colors[1] = Q_ftol(floatColor[1] * modulate); +			colors[2] = Q_ftol(floatColor[2] * modulate);  			colors[3] = 255;  		} diff --git a/src/renderer/tr_shade_calc.c b/src/renderer/tr_shade_calc.c index 13fcf0b5..8d02177b 100644 --- a/src/renderer/tr_shade_calc.c +++ b/src/renderer/tr_shade_calc.c @@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  #endif -#define	WAVEVALUE( table, base, amplitude, phase, freq )  ((base) + table[ myftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude)) +#define	WAVEVALUE( table, base, amplitude, phase, freq )  ((base) + table[ Q_ftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude))  static float *TableForFunc( genFunc_t func )   { @@ -700,7 +700,7 @@ void RB_CalcWaveColor( const waveForm_t *wf, unsigned char *dstColors )  		glow = 1;  	} -	v = myftol( 255 * glow ); +	v = Q_ftol(255 * glow);  	color[0] = color[1] = color[2] = v;  	color[3] = 255;  	v = *(int *)color; @@ -1019,21 +1019,6 @@ void RB_CalcRotateTexCoords( float degsPerSecond, float *st )  } - - - - -#if id386 && !defined(__GNUC__) - -long myftol( float f ) { -	static int tmp; -	__asm fld f -	__asm fistp tmp -	__asm mov eax, tmp -} - -#endif -  /*  ** RB_CalcSpecularAlpha  ** @@ -1196,19 +1181,19 @@ static void RB_CalcDiffuseColor_scalar( unsigned char *colors )  			*(int *)&colors[i*4] = ambientLightInt;  			continue;  		}  -		j = myftol( ambientLight[0] + incoming * directedLight[0] ); +		j = Q_ftol(ambientLight[0] + incoming * directedLight[0]);  		if ( j > 255 ) {  			j = 255;  		}  		colors[i*4+0] = j; -		j = myftol( ambientLight[1] + incoming * directedLight[1] ); +		j = Q_ftol(ambientLight[1] + incoming * directedLight[1]);  		if ( j > 255 ) {  			j = 255;  		}  		colors[i*4+1] = j; -		j = myftol( ambientLight[2] + incoming * directedLight[2] ); +		j = Q_ftol(ambientLight[2] + incoming * directedLight[2]);  		if ( j > 255 ) {  			j = 255;  		} diff --git a/src/renderer/tr_sky.c b/src/renderer/tr_sky.c index 6ab8aa6e..5c7788c0 100644 --- a/src/renderer/tr_sky.c +++ b/src/renderer/tr_sky.c @@ -554,10 +554,10 @@ static void FillCloudBox( const shader_t *shader, int stage )  			continue;  		} -		sky_mins_subd[0] = myftol( sky_mins[0][i] * HALF_SKY_SUBDIVISIONS ); -		sky_mins_subd[1] = myftol( sky_mins[1][i] * HALF_SKY_SUBDIVISIONS ); -		sky_maxs_subd[0] = myftol( sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS ); -		sky_maxs_subd[1] = myftol( sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS ); +		sky_mins_subd[0] = Q_ftol(sky_mins[0][i] * HALF_SKY_SUBDIVISIONS); +		sky_mins_subd[1] = Q_ftol(sky_mins[1][i] * HALF_SKY_SUBDIVISIONS); +		sky_maxs_subd[0] = Q_ftol(sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS); +		sky_maxs_subd[1] = Q_ftol(sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS);  		if ( sky_mins_subd[0] < -HALF_SKY_SUBDIVISIONS )   			sky_mins_subd[0] = -HALF_SKY_SUBDIVISIONS; diff --git a/src/server/sv_game.c b/src/server/sv_game.c index c81c62ea..4948a9c8 100644 --- a/src/server/sv_game.c +++ b/src/server/sv_game.c @@ -415,7 +415,7 @@ intptr_t SV_GameSystemCalls( intptr_t *args ) {  	case G_REAL_TIME:  		return Com_RealTime( VMA(1) );  	case G_SNAPVECTOR: -		Sys_SnapVector( VMA(1) ); +		Q_SnapVector( VMA(1) );  		return 0;  	case G_SEND_GAMESTAT: diff --git a/src/sys/sys_main.c b/src/sys/sys_main.c index 46a795eb..07e8e395 100644 --- a/src/sys/sys_main.c +++ b/src/sys/sys_main.c @@ -417,8 +417,8 @@ Used to load a development dll instead of a virtual machine  #2 look in fs_basepath  =================  */ -void *Sys_LoadDll( const char *name, -	intptr_t (**entryPoint)(int, ...), +void * QDECL Sys_LoadDll( const char *name, +	intptr_t (QDECL **entryPoint)(int, ...),  	intptr_t (*systemcalls)(intptr_t, ...) )  {  	void  *libHandle; diff --git a/src/sys/sys_unix.c b/src/sys/sys_unix.c index 4aad8b88..72ca8360 100644 --- a/src/sys/sys_unix.c +++ b/src/sys/sys_unix.c @@ -37,6 +37,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  #include <pwd.h>  #include <libgen.h>  #include <fcntl.h> +#include <fenv.h>  qboolean stdinIsATTY; @@ -118,31 +119,6 @@ int Sys_Milliseconds (void)  	return curtime;  } -#if !id386 -/* -================== -fastftol -================== -*/ -long fastftol( float f ) -{ -	return (long)f; -} - -/* -================== -Sys_SnapVector -================== -*/ -void Sys_SnapVector( float *v ) -{ -	v[0] = rint(v[0]); -	v[1] = rint(v[1]); -	v[2] = rint(v[2]); -} -#endif - -  /*  ==================  Sys_RandomBytes @@ -749,6 +725,12 @@ void Sys_GLimpInit( void )  	// NOP  } +void Sys_SetFloatEnv(void) +{ +	// rounding towards 0 +	fesetround(FE_TOWARDZERO); +} +  /*  ==============  Sys_PlatformInit diff --git a/src/sys/sys_win32.c b/src/sys/sys_win32.c index f91b26b1..4fddfdc0 100644 --- a/src/sys/sys_win32.c +++ b/src/sys/sys_win32.c @@ -39,6 +39,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  #include <wincrypt.h>  #include <shlobj.h>  #include <psapi.h> +#include <float.h>  // Used to determine where to store user-specific files  static char homePath[ MAX_OSPATH ] = { 0 }; @@ -47,14 +48,38 @@ static char homePath[ MAX_OSPATH ] = { 0 };  static UINT timerResolution = 0;  #endif -#ifdef __WIN64__ -void Sys_SnapVector( float *v ) +/* +================ +Sys_SetFPUCW +Set FPU control word to default value +================ +*/ + +#ifndef _RC_CHOP +// mingw doesn't seem to have these defined :( + +  #define _MCW_EM	0x0008001fU +  #define _MCW_RC	0x00000300U +  #define _MCW_PC	0x00030000U +  #define _RC_CHOP	0x00000300U +  #define _PC_53	0x00010000U +   +  unsigned int _controlfp(unsigned int new, unsigned int mask); +#endif + +#define FPUCWMASK1 (_MCW_RC | _MCW_EM) +#define FPUCW (_RC_CHOP | _MCW_EM | _PC_53) + +#ifdef idx64 +#define FPUCWMASK	(FPUCWMASK1) +#else +#define FPUCWMASK	(FPUCWMASK1 | _MCW_PC) +#endif + +void Sys_SetFloatEnv(void)  { -        v[0] = rint(v[0]); -        v[1] = rint(v[1]); -        v[2] = rint(v[2]); +	_controlfp(FPUCW, FPUCWMASK);  } -#endif  /*  ================ @@ -136,34 +161,6 @@ int Sys_Milliseconds (void)  	return sys_curtime;  } -#ifndef __GNUC__ //see snapvectora.s -/* -================ -Sys_SnapVector -================ -*/ -void Sys_SnapVector( float *v ) -{ -	int i; -	float f; - -	f = *v; -	__asm	fld		f; -	__asm	fistp	i; -	*v = i; -	v++; -	f = *v; -	__asm	fld		f; -	__asm	fistp	i; -	*v = i; -	v++; -	f = *v; -	__asm	fld		f; -	__asm	fistp	i; -	*v = i; -} -#endif -  /*  ================  Sys_RandomBytes @@ -715,9 +712,12 @@ void Sys_PlatformInit( void )  {  #ifndef DEDICATED  	TIMECAPS ptc; -	  	const char *SDL_VIDEODRIVER = getenv( "SDL_VIDEODRIVER" ); +#endif + +	Sys_SetFloatEnv(); +#ifndef DEDICATED  	if( SDL_VIDEODRIVER )  	{  		Com_Printf( "SDL_VIDEODRIVER is externally set to \"%s\", "  | 
