diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/asm/ftola.c | 24 | ||||
| -rw-r--r-- | src/asm/snapvector.asm | 10 | ||||
| -rw-r--r-- | src/asm/snapvector.c | 43 | ||||
| -rw-r--r-- | src/qcommon/common.c | 4 | ||||
| -rw-r--r-- | src/qcommon/q_shared.h | 10 | 
5 files changed, 52 insertions, 39 deletions
diff --git a/src/asm/ftola.c b/src/asm/ftola.c index e0298e8e..ad197836 100644 --- a/src/asm/ftola.c +++ b/src/asm/ftola.c @@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  long qftolsse(float f)  { -  register long retval; +  long retval;    __asm__ volatile    ( @@ -40,21 +40,25 @@ long qftolsse(float f)    return retval;  } -void qvmftolsse(void) +int qvmftolsse(void)  { +  int retval; +      __asm__ volatile    (      "movss (" EDI ", " EBX ", 4), %%xmm0\n" -    "cvttss2si %%xmm0, " EAX "\n" -    : +    "cvttss2si %%xmm0, %0\n" +    : "=r" (retval)      :      : "%xmm0"    ); +   +  return retval;  }  long qftolx87(float f)  { -  register long retval; +  long retval;    __asm__ volatile    ( @@ -68,13 +72,17 @@ long qftolx87(float f)    return retval;  } -void qvmftolx87(void) +int qvmftolx87(void)  { +  int retval; +    __asm__ volatile    (      "flds (" EDI ", " EBX ", 4)\n"      "fistpl (" EDI ", " EBX ", 4)\n" -    "mov (" EDI ", " EBX ", 4), " EAX "\n" -    : +    "mov (" EDI ", " EBX ", 4), %0\n" +    : "=r" (retval)    ); +   +  return retval;  } diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm index 87c77372..eca40fe1 100644 --- a/src/asm/snapvector.asm +++ b/src/asm/snapvector.asm @@ -44,7 +44,7 @@ IFDEF idx64  ; qsnapvector using SSE    qsnapvectorsse PROC -    sub rsp, 4 +    sub rsp, 8  	stmxcsr [rsp]				; save SSE control word  	ldmxcsr ssecw				; set to round nearest @@ -58,19 +58,19 @@ IFDEF idx64  	pop rdi  	ldmxcsr [rsp]				; restore sse control word to old value -	add rsp, 4 +	add rsp, 8  	ret    qsnapvectorsse ENDP  ELSE    qsnapvectorsse PROC -	sub esp, 4 +	sub esp, 8  	stmxcsr [esp]				; save SSE control word  	ldmxcsr ssecw				; set to round nearest      push edi -	mov edi, dword ptr 12[esp]	; maskmovdqu uses edi as implicit memory operand +	mov edi, dword ptr 16[esp]	; maskmovdqu uses edi as implicit memory operand  	movaps xmm1, ssemask		; initialize the mask register for maskmovdqu      movups xmm0, [edi]			; here is stored our vector. Read 4 values in one go  	cvtps2dq xmm0, xmm0			; convert 4 single fp to int @@ -79,7 +79,7 @@ ELSE  	pop edi  	ldmxcsr [esp]				; restore sse control word to old value -	add esp, 4 +	add esp, 8  	ret    qsnapvectorsse ENDP diff --git a/src/asm/snapvector.c b/src/asm/snapvector.c index 402b3925..8e9b2868 100644 --- a/src/asm/snapvector.c +++ b/src/asm/snapvector.c @@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  /*   * GNU inline asm version of qsnapvector + * See MASM snapvector.asm for commentary   */  static unsigned char ssemask[16] __attribute__((aligned(16))) = @@ -32,29 +33,33 @@ static unsigned char ssemask[16] __attribute__((aligned(16))) =  	"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00"  }; -static unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80; -static unsigned short fpucw = 0x037F; +static const unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80; +static const unsigned short fpucw = 0x037F;  void qsnapvectorsse(vec3_t vec)  { +	uint32_t oldcw __attribute__((aligned(16))); +	  	__asm__ volatile  	( -		"sub $4, " ESP "\n" -		"stmxcsr (" ESP ")\n" +		"stmxcsr %3\n"  		"ldmxcsr %1\n"  		"movaps (%0), %%xmm1\n" -		"movups (" EDI "), %%xmm0\n" +		"movups (%2), %%xmm0\n"  		"cvtps2dq %%xmm0, %%xmm0\n"  		"cvtdq2ps %%xmm0, %%xmm0\n" +		// vec MUST reside in register rdi as maskmovdqu uses +		// it as an implicit operand. The "D" constraint makes +		// sure of that.  		"maskmovdqu %%xmm1, %%xmm0\n" -		"ldmxcsr (" ESP ")\n" -		"add $4, " ESP "\n" +		"ldmxcsr %3\n"  		: -		: "r" (ssemask), "m" (ssecw), "D" (vec) +		: "r" (ssemask), "m" (ssecw), "D" (vec), "m" (oldcw)  		: "memory", "%xmm0", "%xmm1"  	); +	  }  #define QROUNDX87(src) \ @@ -67,16 +72,16 @@ void qsnapvectorx87(vec3_t vec)  {  	__asm__ volatile  	( -        	"sub $2, " ESP "\n" -        	"fnstcw (" ESP ")\n" -        	"fldcw %0\n" -        	QROUNDX87("(%1)") -        	QROUNDX87("4(%1)") -        	QROUNDX87("8(%1)") -        	"fldcw (" ESP ")\n" -        	"add $2, " ESP "\n" -        	: -        	: "m" (fpucw), "r" (vec) -        	: "memory" +		"sub $2, " ESP "\n" +		"fnstcw (" ESP ")\n" +		"fldcw %0\n" +		QROUNDX87("(%1)") +		QROUNDX87("4(%1)") +		QROUNDX87("8(%1)") +		"fldcw (" ESP ")\n" +		"add $2, " ESP "\n" +		: +		: "m" (fpucw), "r" (vec) +		: "memory"  	);  } diff --git a/src/qcommon/common.c b/src/qcommon/common.c index 160501b5..c08ea8a0 100644 --- a/src/qcommon/common.c +++ b/src/qcommon/common.c @@ -88,10 +88,10 @@ cvar_t  *com_homepath;  cvar_t	*com_busyWait;  #if idx64 -  void (*Q_VMftol)(void); +	int (*Q_VMftol)(void);  #elif id386  	long (QDECL *Q_ftol)(float f); -	void (QDECL *Q_VMftol)(void); +	int (QDECL *Q_VMftol)(void);  	void (QDECL *Q_SnapVector)(vec3_t vec);  #endif diff --git a/src/qcommon/q_shared.h b/src/qcommon/q_shared.h index 3994f861..9d15d9c5 100644 --- a/src/qcommon/q_shared.h +++ b/src/qcommon/q_shared.h @@ -423,23 +423,23 @@ int Q_isnan(float x);  #if idx64    extern long qftolsse(float f); -  extern void qvmftolsse(void); +  extern int qvmftolsse(void);    extern void qsnapvectorsse(vec3_t vec);    #define Q_ftol qftolsse    #define Q_SnapVector qsnapvectorsse -  extern void (*Q_VMftol)(void); +  extern int (*Q_VMftol)(void);  #elif id386    extern long QDECL qftolx87(float f);    extern long QDECL qftolsse(float f); -  extern void QDECL qvmftolx87(void); -  extern void QDECL qvmftolsse(void); +  extern int QDECL qvmftolx87(void); +  extern int QDECL qvmftolsse(void);    extern void QDECL qsnapvectorx87(vec3_t vec);    extern void QDECL qsnapvectorsse(vec3_t vec);    extern long (QDECL *Q_ftol)(float f); -  extern void (QDECL *Q_VMftol)(void); +  extern int (QDECL *Q_VMftol)(void);    extern void (QDECL *Q_SnapVector)(vec3_t vec);  #else    #define Q_ftol(f) lrintf((f))  | 
