From 425decdf7e9284d15aa726e3ae96b9942fb0e3ea Mon Sep 17 00:00:00 2001 From: IronClawTrem Date: Sun, 16 Feb 2020 03:40:06 +0000 Subject: create tremded branch --- src/asm/snapvector.asm | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 src/asm/snapvector.asm (limited to 'src/asm/snapvector.asm') diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm new file mode 100644 index 0000000..aa5052a --- /dev/null +++ b/src/asm/snapvector.asm @@ -0,0 +1,91 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz +; Copyright (C) 2015-2019 GrangerHub +; +; This file is part of Tremulous. +; +; Tremulous is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 3 of the License, +; or (at your option) any later version. +; +; Tremulous is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Tremulous; if not, see +; +; =========================================================================== + +; MASM version of snapvector conversion function using SSE or FPU +; assume __cdecl calling convention is being used for x86, __fastcall for x64 +; +; function prototype: +; void qsnapvector(vec3_t vec) + +IFNDEF idx64 +.686p +.xmm +.model flat, c +ENDIF + +.data + + ALIGN 16 + ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h + ssecw DWORD 00001F80h + +.code + +IFDEF idx64 +; qsnapvector using SSE + + qsnapvectorsse PROC + movaps xmm1, ssemask ; initialize the mask register + movups xmm0, [rcx] ; here is stored our vector. Read 4 values in one go + movaps xmm2, xmm0 ; keep a copy of the original data + andps xmm0, xmm1 ; set the fourth value to zero in xmm0 + andnps xmm1, xmm2 ; copy fourth value to xmm1 and set rest to zero + cvtps2dq xmm0, xmm0 ; convert 4 single fp to int + cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp + orps xmm0, xmm1 ; combine all 4 values again + movups [rcx], xmm0 ; write 3 rounded and 1 unchanged values back to memory + ret + qsnapvectorsse ENDP + +ELSE + + qsnapvectorsse PROC + mov eax, dword ptr 4[esp] ; store address of vector in eax + movaps xmm1, ssemask ; initialize the mask register for maskmovdqu + movups xmm0, [eax] ; here is stored our vector. Read 4 values in one go + movaps xmm2, xmm0 ; keep a copy of the original data + andps xmm0, xmm1 ; set the fourth value to zero in xmm0 + andnps xmm1, xmm2 ; copy fourth value to xmm1 and set rest to zero + cvtps2dq xmm0, xmm0 ; convert 4 single fp to int + cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp + orps xmm0, xmm1 ; combine all 4 values again + movups [eax], xmm0 ; write 3 rounded and 1 unchanged values back to memory + ret + qsnapvectorsse ENDP + + qroundx87 macro src + fld dword ptr src + fistp dword ptr src + fild dword ptr src + fstp dword ptr src + endm + + qsnapvectorx87 PROC + mov eax, dword ptr 4[esp] + qroundx87 [eax] + qroundx87 4[eax] + qroundx87 8[eax] + ret + qsnapvectorx87 ENDP + +ENDIF + +end -- cgit