From 425decdf7e9284d15aa726e3ae96b9942fb0e3ea Mon Sep 17 00:00:00 2001
From: IronClawTrem <louie.nutman@gmail.com>
Date: Sun, 16 Feb 2020 03:40:06 +0000
Subject: create tremded branch

---
 src/asm/snapvector.asm | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 src/asm/snapvector.asm

(limited to 'src/asm/snapvector.asm')

diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm
new file mode 100644
index 0000000..aa5052a
--- /dev/null
+++ b/src/asm/snapvector.asm
@@ -0,0 +1,91 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+; Copyright (C) 2015-2019 GrangerHub
+; 
+; This file is part of Tremulous.
+; 
+; Tremulous is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 3 of the License,
+; or (at your option) any later version.
+; 
+; Tremulous is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details.
+; 
+; You should have received a copy of the GNU General Public License
+; along with Tremulous; if not, see <https://www.gnu.org/licenses/>
+; 
+; ===========================================================================
+
+; MASM version of snapvector conversion function using SSE or FPU
+; assume __cdecl calling convention is being used for x86, __fastcall for x64
+;
+; function prototype:
+; void qsnapvector(vec3_t vec)
+
+IFNDEF idx64
+.686p
+.xmm
+.model flat, c
+ENDIF
+
+.data
+
+  ALIGN 16
+  ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h
+  ssecw DWORD 00001F80h
+
+.code
+
+IFDEF idx64
+; qsnapvector using SSE
+
+  qsnapvectorsse PROC
+	movaps xmm1, ssemask		; initialize the mask register
+	movups xmm0, [rcx]			; here is stored our vector. Read 4 values in one go
+	movaps xmm2, xmm0			; keep a copy of the original data
+	andps xmm0, xmm1			; set the fourth value to zero in xmm0
+	andnps xmm1, xmm2			; copy fourth value to xmm1 and set rest to zero
+	cvtps2dq xmm0, xmm0			; convert 4 single fp to int
+	cvtdq2ps xmm0, xmm0			; convert 4 int to single fp
+	orps xmm0, xmm1				; combine all 4 values again
+	movups [rcx], xmm0			; write 3 rounded and 1 unchanged values back to memory
+	ret
+  qsnapvectorsse ENDP
+
+ELSE
+
+  qsnapvectorsse PROC
+	mov eax, dword ptr 4[esp]		; store address of vector in eax
+	movaps xmm1, ssemask			; initialize the mask register for maskmovdqu
+	movups xmm0, [eax]			; here is stored our vector. Read 4 values in one go
+	movaps xmm2, xmm0			; keep a copy of the original data
+	andps xmm0, xmm1			; set the fourth value to zero in xmm0
+	andnps xmm1, xmm2			; copy fourth value to xmm1 and set rest to zero
+	cvtps2dq xmm0, xmm0			; convert 4 single fp to int
+	cvtdq2ps xmm0, xmm0			; convert 4 int to single fp
+	orps xmm0, xmm1				; combine all 4 values again
+	movups [eax], xmm0			; write 3 rounded and 1 unchanged values back to memory
+	ret
+  qsnapvectorsse ENDP
+
+  qroundx87 macro src
+	fld dword ptr src
+	fistp dword ptr src
+	fild dword ptr src
+	fstp dword ptr src
+  endm    
+
+  qsnapvectorx87 PROC
+	mov eax, dword ptr 4[esp]
+	qroundx87 [eax]
+	qroundx87 4[eax]
+	qroundx87 8[eax]
+	ret
+  qsnapvectorx87 ENDP
+
+ENDIF
+
+end
-- 
cgit