diff options
Diffstat (limited to 'src/asm/snapvector.c')
-rw-r--r-- | src/asm/snapvector.c | 43 |
1 files changed, 24 insertions, 19 deletions
diff --git a/src/asm/snapvector.c b/src/asm/snapvector.c index 402b3925..8e9b2868 100644 --- a/src/asm/snapvector.c +++ b/src/asm/snapvector.c @@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA /* * GNU inline asm version of qsnapvector + * See MASM snapvector.asm for commentary */ static unsigned char ssemask[16] __attribute__((aligned(16))) = @@ -32,29 +33,33 @@ static unsigned char ssemask[16] __attribute__((aligned(16))) = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00" }; -static unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80; -static unsigned short fpucw = 0x037F; +static const unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80; +static const unsigned short fpucw = 0x037F; void qsnapvectorsse(vec3_t vec) { + uint32_t oldcw __attribute__((aligned(16))); + __asm__ volatile ( - "sub $4, " ESP "\n" - "stmxcsr (" ESP ")\n" + "stmxcsr %3\n" "ldmxcsr %1\n" "movaps (%0), %%xmm1\n" - "movups (" EDI "), %%xmm0\n" + "movups (%2), %%xmm0\n" "cvtps2dq %%xmm0, %%xmm0\n" "cvtdq2ps %%xmm0, %%xmm0\n" + // vec MUST reside in register rdi as maskmovdqu uses + // it as an implicit operand. The "D" constraint makes + // sure of that. "maskmovdqu %%xmm1, %%xmm0\n" - "ldmxcsr (" ESP ")\n" - "add $4, " ESP "\n" + "ldmxcsr %3\n" : - : "r" (ssemask), "m" (ssecw), "D" (vec) + : "r" (ssemask), "m" (ssecw), "D" (vec), "m" (oldcw) : "memory", "%xmm0", "%xmm1" ); + } #define QROUNDX87(src) \ @@ -67,16 +72,16 @@ void qsnapvectorx87(vec3_t vec) { __asm__ volatile ( - "sub $2, " ESP "\n" - "fnstcw (" ESP ")\n" - "fldcw %0\n" - QROUNDX87("(%1)") - QROUNDX87("4(%1)") - QROUNDX87("8(%1)") - "fldcw (" ESP ")\n" - "add $2, " ESP "\n" - : - : "m" (fpucw), "r" (vec) - : "memory" + "sub $2, " ESP "\n" + "fnstcw (" ESP ")\n" + "fldcw %0\n" + QROUNDX87("(%1)") + QROUNDX87("4(%1)") + QROUNDX87("8(%1)") + "fldcw (" ESP ")\n" + "add $2, " ESP "\n" + : + : "m" (fpucw), "r" (vec) + : "memory" ); } |