diff options
author | IronClawTrem <louie.nutman@gmail.com> | 2020-02-16 03:40:06 +0000 |
---|---|---|
committer | IronClawTrem <louie.nutman@gmail.com> | 2020-02-16 03:40:06 +0000 |
commit | 425decdf7e9284d15aa726e3ae96b9942fb0e3ea (patch) | |
tree | 6c0dd7edfefff1be7b9e75fe0b3a0a85fe1595f3 /src/asm | |
parent | ccb0b2e4d6674a7a00c9bf491f08fc73b6898c54 (diff) |
create tremded branch
Diffstat (limited to 'src/asm')
-rw-r--r-- | src/asm/matha.s | 55 | ||||
-rw-r--r-- | src/asm/qasm-inline.h | 41 | ||||
-rw-r--r-- | src/asm/qasm.h | 39 | ||||
-rw-r--r-- | src/asm/snapvector.asm | 91 | ||||
-rw-r--r-- | src/asm/snapvector.c | 75 | ||||
-rw-r--r-- | src/asm/snd_mixa.s | 217 | ||||
-rw-r--r-- | src/asm/vm_x86_64.asm | 59 |
7 files changed, 577 insertions, 0 deletions
diff --git a/src/asm/matha.s b/src/asm/matha.s new file mode 100644 index 0000000..7bdff0a --- /dev/null +++ b/src/asm/matha.s @@ -0,0 +1,55 @@ +/* +=========================================================================== +Copyright (C) 1999-2005 Id Software, Inc. +Copyright (C) 2015-2019 GrangerHub + +This file is part of Tremulous. + +Tremulous is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 3 of the License, +or (at your option) any later version. + +Tremulous is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Tremulous; if not, see <https://www.gnu.org/licenses/> + +=========================================================================== +*/ +// +// math.s +// x86 assembly-language math routines. + +#include "qasm.h" + + +#if id386 + + .text + +// TODO: rounding needed? +// stack parameter offset +#define val 4 + +.globl C(Invert24To16) +C(Invert24To16): + + movl val(%esp),%ecx + movl $0x100,%edx // 0x10000000000 as dividend + cmpl %edx,%ecx + jle LOutOfRange + + subl %eax,%eax + divl %ecx + + ret + +LOutOfRange: + movl $0xFFFFFFFF,%eax + ret + +#endif // id386 diff --git a/src/asm/qasm-inline.h b/src/asm/qasm-inline.h new file mode 100644 index 0000000..ed30ce6 --- /dev/null +++ b/src/asm/qasm-inline.h @@ -0,0 +1,41 @@ +/* +=========================================================================== +Copyright (C) 1999-2005 Id Software, Inc. +Copyright (C) 2000-2013 Darklegion Development +Copyright (C) 2015-2019 GrangerHub + +This file is part of Tremulous. + +Tremulous is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 3 of the License, +or (at your option) any later version. + +Tremulous is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Tremulous; if not, see <https://www.gnu.org/licenses/> + +=========================================================================== +*/ +#ifndef __ASM_INLINE_I386__ +#define __ASM_INLINE_I386__ + +#include "qcommon/q_platform.h" + +#if idx64 + #define EAX "%%rax" + #define EBX "%%rbx" + #define ESP "%%rsp" + #define EDI "%%rdi" +#else + #define EAX "%%eax" + #define EBX "%%ebx" + #define ESP "%%esp" + #define EDI "%%edi" +#endif + +#endif diff --git a/src/asm/qasm.h b/src/asm/qasm.h new file mode 100644 index 0000000..8fbb6ee --- /dev/null +++ b/src/asm/qasm.h @@ -0,0 +1,39 @@ +/* +=========================================================================== +Copyright (C) 1999-2005 Id Software, Inc. +Copyright (C) 2000-2013 Darklegion Development +Copyright (C) 2015-2019 GrangerHub + +This file is part of Tremulous. + +Tremulous is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 3 of the License, +or (at your option) any later version. + +Tremulous is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Tremulous; if not, see <https://www.gnu.org/licenses/> + +=========================================================================== +*/ +#ifndef __ASM_I386__ +#define __ASM_I386__ + +#include "qcommon/q_platform.h" + +#ifdef __ELF__ +.section .note.GNU-stack,"",@progbits +#endif + +#if defined(__ELF__) || defined(__WIN64__) +#define C(label) label +#else +#define C(label) _##label +#endif + +#endif diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm new file mode 100644 index 0000000..aa5052a --- /dev/null +++ b/src/asm/snapvector.asm @@ -0,0 +1,91 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +; Copyright (C) 2015-2019 GrangerHub +; +; This file is part of Tremulous. +; +; Tremulous is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 3 of the License, +; or (at your option) any later version. +; +; Tremulous is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Tremulous; if not, see <https://www.gnu.org/licenses/> +; +; =========================================================================== + +; MASM version of snapvector conversion function using SSE or FPU +; assume __cdecl calling convention is being used for x86, __fastcall for x64 +; +; function prototype: +; void qsnapvector(vec3_t vec) + +IFNDEF idx64 +.686p +.xmm +.model flat, c +ENDIF + +.data + + ALIGN 16 + ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h + ssecw DWORD 00001F80h + +.code + +IFDEF idx64 +; qsnapvector using SSE + + qsnapvectorsse PROC + movaps xmm1, ssemask ; initialize the mask register + movups xmm0, [rcx] ; here is stored our vector. Read 4 values in one go + movaps xmm2, xmm0 ; keep a copy of the original data + andps xmm0, xmm1 ; set the fourth value to zero in xmm0 + andnps xmm1, xmm2 ; copy fourth value to xmm1 and set rest to zero + cvtps2dq xmm0, xmm0 ; convert 4 single fp to int + cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp + orps xmm0, xmm1 ; combine all 4 values again + movups [rcx], xmm0 ; write 3 rounded and 1 unchanged values back to memory + ret + qsnapvectorsse ENDP + +ELSE + + qsnapvectorsse PROC + mov eax, dword ptr 4[esp] ; store address of vector in eax + movaps xmm1, ssemask ; initialize the mask register for maskmovdqu + movups xmm0, [eax] ; here is stored our vector. Read 4 values in one go + movaps xmm2, xmm0 ; keep a copy of the original data + andps xmm0, xmm1 ; set the fourth value to zero in xmm0 + andnps xmm1, xmm2 ; copy fourth value to xmm1 and set rest to zero + cvtps2dq xmm0, xmm0 ; convert 4 single fp to int + cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp + orps xmm0, xmm1 ; combine all 4 values again + movups [eax], xmm0 ; write 3 rounded and 1 unchanged values back to memory + ret + qsnapvectorsse ENDP + + qroundx87 macro src + fld dword ptr src + fistp dword ptr src + fild dword ptr src + fstp dword ptr src + endm + + qsnapvectorx87 PROC + mov eax, dword ptr 4[esp] + qroundx87 [eax] + qroundx87 4[eax] + qroundx87 8[eax] + ret + qsnapvectorx87 ENDP + +ENDIF + +end diff --git a/src/asm/snapvector.c b/src/asm/snapvector.c new file mode 100644 index 0000000..febbee0 --- /dev/null +++ b/src/asm/snapvector.c @@ -0,0 +1,75 @@ +/* +=========================================================================== +Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +Copyright (C) 2000-2013 Darklegion Development +Copyright (C) 2015-2019 GrangerHub + +This file is part of Tremulous. + +Tremulous is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 3 of the License, +or (at your option) any later version. + +Tremulous is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Tremulous; if not, see <https://www.gnu.org/licenses/> + +=========================================================================== +*/ + +#include "qasm-inline.h" +#include "qcommon/q_shared.h" + +/* + * GNU inline asm version of qsnapvector + * See MASM snapvector.asm for commentary + */ + +static unsigned char ssemask[16] __attribute__((aligned(16))) = +{ + "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00" +}; + +void qsnapvectorsse(vec3_t vec) +{ + __asm__ volatile + ( + "movaps (%0), %%xmm1\n" + "movups (%1), %%xmm0\n" + "movaps %%xmm0, %%xmm2\n" + "andps %%xmm1, %%xmm0\n" + "andnps %%xmm2, %%xmm1\n" + "cvtps2dq %%xmm0, %%xmm0\n" + "cvtdq2ps %%xmm0, %%xmm0\n" + "orps %%xmm1, %%xmm0\n" + "movups %%xmm0, (%1)\n" + : + : "r" (ssemask), "r" (vec) + : "memory", "%xmm0", "%xmm1", "%xmm2" + ); + +} + +#define QROUNDX87(src) \ + "flds " src "\n" \ + "fistpl " src "\n" \ + "fildl " src "\n" \ + "fstps " src "\n" + +void qsnapvectorx87(vec3_t vec) +{ + __asm__ volatile + ( + QROUNDX87("(%0)") + QROUNDX87("4(%0)") + QROUNDX87("8(%0)") + : + : "r" (vec) + : "memory" + ); +} diff --git a/src/asm/snd_mixa.s b/src/asm/snd_mixa.s new file mode 100644 index 0000000..ebae0a4 --- /dev/null +++ b/src/asm/snd_mixa.s @@ -0,0 +1,217 @@ +/* +=========================================================================== +Copyright (C) 1999-2005 Id Software, Inc. +Copyright (C) 2015-2019 GrangerHub + +This file is part of Tremulous. + +Tremulous is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 3 of the License, +or (at your option) any later version. + +Tremulous is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Tremulous; if not, see <https://www.gnu.org/licenses/> + +=========================================================================== +*/ +// +// snd_mixa.s +// x86 assembly-language sound code +// + +#include "qasm.h" + +#if id386 + + .text + +#if 0 +//---------------------------------------------------------------------- +// 8-bit sound-mixing code +//---------------------------------------------------------------------- + +#define ch 4+16 +#define sc 8+16 +#define count 12+16 + +.globl C(S_PaintChannelFrom8) +C(S_PaintChannelFrom8): + pushl %esi // preserve register variables + pushl %edi + pushl %ebx + pushl %ebp + +// int data; +// short *lscale, *rscale; +// unsigned char *sfx; +// int i; + + movl ch(%esp),%ebx + movl sc(%esp),%esi + +// if (ch->leftvol > 255) +// ch->leftvol = 255; +// if (ch->rightvol > 255) +// ch->rightvol = 255; + movl ch_leftvol(%ebx),%eax + movl ch_rightvol(%ebx),%edx + cmpl $255,%eax + jna LLeftSet + movl $255,%eax +LLeftSet: + cmpl $255,%edx + jna LRightSet + movl $255,%edx +LRightSet: + +// lscale = snd_scaletable[ch->leftvol >> 3]; +// rscale = snd_scaletable[ch->rightvol >> 3]; +// sfx = (signed char *)sc->data + ch->pos; +// ch->pos += count; + andl $0xF8,%eax + addl $20,%esi + movl (%esi),%esi + andl $0xF8,%edx + movl ch_pos(%ebx),%edi + movl count(%esp),%ecx + addl %edi,%esi + shll $7,%eax + addl %ecx,%edi + shll $7,%edx + movl %edi,ch_pos(%ebx) + addl $(C(snd_scaletable)),%eax + addl $(C(snd_scaletable)),%edx + subl %ebx,%ebx + movb -1(%esi,%ecx,1),%bl + + testl $1,%ecx + jz LMix8Loop + + movl (%eax,%ebx,4),%edi + movl (%edx,%ebx,4),%ebp + addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi + addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp + movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size) + movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size) + movb -2(%esi,%ecx,1),%bl + + decl %ecx + jz LDone + +// for (i=0 ; i<count ; i++) +// { +LMix8Loop: + +// data = sfx[i]; +// paintbuffer[i].left += lscale[data]; +// paintbuffer[i].right += rscale[data]; + movl (%eax,%ebx,4),%edi + movl (%edx,%ebx,4),%ebp + addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi + addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp + movb -2(%esi,%ecx,1),%bl + movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size) + movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size) + + movl (%eax,%ebx,4),%edi + movl (%edx,%ebx,4),%ebp + movb -3(%esi,%ecx,1),%bl + addl C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size),%edi + addl C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size),%ebp + movl %edi,C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size) + movl %ebp,C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size) + +// } + subl $2,%ecx + jnz LMix8Loop + +LDone: + popl %ebp + popl %ebx + popl %edi + popl %esi + + ret + + +#endif + +//---------------------------------------------------------------------- +// Transfer of stereo buffer to 16-bit DMA buffer code +//---------------------------------------------------------------------- + +.globl C(S_WriteLinearBlastStereo16) +C(S_WriteLinearBlastStereo16): + pushl %edi + pushl %ebx + +// int i; +// int val; + movl C(snd_linear_count),%ecx + movl C(snd_p),%ebx + movl C(snd_out),%edi + +// for (i=0 ; i<snd_linear_count ; i+=2) +// { +LWLBLoopTop: + +// val = (snd_p[i]*snd_vol)>>8; +// if (val > 0x7fff) +// snd_out[i] = 0x7fff; +// else if (val < (short)0x8000) +// snd_out[i] = (short)0x8000; +// else +// snd_out[i] = val; + movl -8(%ebx,%ecx,4),%eax + sarl $8,%eax + cmpl $0x7FFF,%eax + jg LClampHigh + cmpl $0xFFFF8000,%eax + jnl LClampDone + movl $0xFFFF8000,%eax + jmp LClampDone +LClampHigh: + movl $0x7FFF,%eax +LClampDone: + +// val = (snd_p[i+1]*snd_vol)>>8; +// if (val > 0x7fff) +// snd_out[i+1] = 0x7fff; +// else if (val < (short)0x8000) +// snd_out[i+1] = (short)0x8000; +// else +// snd_out[i+1] = val; + movl -4(%ebx,%ecx,4),%edx + sarl $8,%edx + cmpl $0x7FFF,%edx + jg LClampHigh2 + cmpl $0xFFFF8000,%edx + jnl LClampDone2 + movl $0xFFFF8000,%edx + jmp LClampDone2 +LClampHigh2: + movl $0x7FFF,%edx +LClampDone2: + shll $16,%edx + andl $0xFFFF,%eax + orl %eax,%edx + movl %edx,-4(%edi,%ecx,2) + +// } + subl $2,%ecx + jnz LWLBLoopTop + +// snd_p += snd_linear_count; + + popl %ebx + popl %edi + + ret + +#endif // id386 diff --git a/src/asm/vm_x86_64.asm b/src/asm/vm_x86_64.asm new file mode 100644 index 0000000..692cee9 --- /dev/null +++ b/src/asm/vm_x86_64.asm @@ -0,0 +1,59 @@ +; =========================================================================== +; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu> +; Copyright (C) 2015-2019 GrangerHub +; +; This file is part of Tremulous. +; +; Tremulous is free software; you can redistribute it +; and/or modify it under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 3 of the License, +; or (at your option) any later version. +; +; Tremulous is distributed in the hope that it will be +; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with Tremulous; if not, see <https://www.gnu.org/licenses/> +; +; =========================================================================== + +; Call wrapper for vm_x86 when built with MSVC in 64 bit mode, +; since MSVC does not support inline x64 assembler code anymore. +; +; assumes __fastcall calling convention + +.code + +; Call to compiled code after setting up the register environment for the VM +; prototype: +; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase); + +qvmcall64 PROC + push rsi ; push non-volatile registers to stack + push rdi + push rbx + ; need to save pointer in rcx so we can write back the programData value to caller + push rcx + + ; registers r8 and r9 have correct value already thanx to __fastcall + xor rbx, rbx ; opStackOfs starts out being 0 + mov rdi, rdx ; opStack + mov esi, dword ptr [rcx] ; programStack + + call qword ptr [r8] ; instructionPointers[0] is also the entry point + + pop rcx + + mov dword ptr [rcx], esi ; write back the programStack value + mov al, bl ; return opStack offset + + pop rbx + pop rdi + pop rsi + + ret +qvmcall64 ENDP + +end |