summaryrefslogtreecommitdiff
path: root/src/asm
diff options
context:
space:
mode:
authorIronClawTrem <louie.nutman@gmail.com>2020-02-16 03:40:06 +0000
committerIronClawTrem <louie.nutman@gmail.com>2020-02-16 03:40:06 +0000
commit425decdf7e9284d15aa726e3ae96b9942fb0e3ea (patch)
tree6c0dd7edfefff1be7b9e75fe0b3a0a85fe1595f3 /src/asm
parentccb0b2e4d6674a7a00c9bf491f08fc73b6898c54 (diff)
create tremded branch
Diffstat (limited to 'src/asm')
-rw-r--r--src/asm/matha.s55
-rw-r--r--src/asm/qasm-inline.h41
-rw-r--r--src/asm/qasm.h39
-rw-r--r--src/asm/snapvector.asm91
-rw-r--r--src/asm/snapvector.c75
-rw-r--r--src/asm/snd_mixa.s217
-rw-r--r--src/asm/vm_x86_64.asm59
7 files changed, 577 insertions, 0 deletions
diff --git a/src/asm/matha.s b/src/asm/matha.s
new file mode 100644
index 0000000..7bdff0a
--- /dev/null
+++ b/src/asm/matha.s
@@ -0,0 +1,55 @@
+/*
+===========================================================================
+Copyright (C) 1999-2005 Id Software, Inc.
+Copyright (C) 2015-2019 GrangerHub
+
+This file is part of Tremulous.
+
+Tremulous is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+Tremulous is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Tremulous; if not, see <https://www.gnu.org/licenses/>
+
+===========================================================================
+*/
+//
+// math.s
+// x86 assembly-language math routines.
+
+#include "qasm.h"
+
+
+#if id386
+
+ .text
+
+// TODO: rounding needed?
+// stack parameter offset
+#define val 4
+
+.globl C(Invert24To16)
+C(Invert24To16):
+
+ movl val(%esp),%ecx
+ movl $0x100,%edx // 0x10000000000 as dividend
+ cmpl %edx,%ecx
+ jle LOutOfRange
+
+ subl %eax,%eax
+ divl %ecx
+
+ ret
+
+LOutOfRange:
+ movl $0xFFFFFFFF,%eax
+ ret
+
+#endif // id386
diff --git a/src/asm/qasm-inline.h b/src/asm/qasm-inline.h
new file mode 100644
index 0000000..ed30ce6
--- /dev/null
+++ b/src/asm/qasm-inline.h
@@ -0,0 +1,41 @@
+/*
+===========================================================================
+Copyright (C) 1999-2005 Id Software, Inc.
+Copyright (C) 2000-2013 Darklegion Development
+Copyright (C) 2015-2019 GrangerHub
+
+This file is part of Tremulous.
+
+Tremulous is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+Tremulous is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Tremulous; if not, see <https://www.gnu.org/licenses/>
+
+===========================================================================
+*/
+#ifndef __ASM_INLINE_I386__
+#define __ASM_INLINE_I386__
+
+#include "qcommon/q_platform.h"
+
+#if idx64
+ #define EAX "%%rax"
+ #define EBX "%%rbx"
+ #define ESP "%%rsp"
+ #define EDI "%%rdi"
+#else
+ #define EAX "%%eax"
+ #define EBX "%%ebx"
+ #define ESP "%%esp"
+ #define EDI "%%edi"
+#endif
+
+#endif
diff --git a/src/asm/qasm.h b/src/asm/qasm.h
new file mode 100644
index 0000000..8fbb6ee
--- /dev/null
+++ b/src/asm/qasm.h
@@ -0,0 +1,39 @@
+/*
+===========================================================================
+Copyright (C) 1999-2005 Id Software, Inc.
+Copyright (C) 2000-2013 Darklegion Development
+Copyright (C) 2015-2019 GrangerHub
+
+This file is part of Tremulous.
+
+Tremulous is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+Tremulous is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Tremulous; if not, see <https://www.gnu.org/licenses/>
+
+===========================================================================
+*/
+#ifndef __ASM_I386__
+#define __ASM_I386__
+
+#include "qcommon/q_platform.h"
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",@progbits
+#endif
+
+#if defined(__ELF__) || defined(__WIN64__)
+#define C(label) label
+#else
+#define C(label) _##label
+#endif
+
+#endif
diff --git a/src/asm/snapvector.asm b/src/asm/snapvector.asm
new file mode 100644
index 0000000..aa5052a
--- /dev/null
+++ b/src/asm/snapvector.asm
@@ -0,0 +1,91 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+; Copyright (C) 2015-2019 GrangerHub
+;
+; This file is part of Tremulous.
+;
+; Tremulous is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 3 of the License,
+; or (at your option) any later version.
+;
+; Tremulous is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Tremulous; if not, see <https://www.gnu.org/licenses/>
+;
+; ===========================================================================
+
+; MASM version of snapvector conversion function using SSE or FPU
+; assume __cdecl calling convention is being used for x86, __fastcall for x64
+;
+; function prototype:
+; void qsnapvector(vec3_t vec)
+
+IFNDEF idx64
+.686p
+.xmm
+.model flat, c
+ENDIF
+
+.data
+
+ ALIGN 16
+ ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h
+ ssecw DWORD 00001F80h
+
+.code
+
+IFDEF idx64
+; qsnapvector using SSE
+
+ qsnapvectorsse PROC
+ movaps xmm1, ssemask ; initialize the mask register
+ movups xmm0, [rcx] ; here is stored our vector. Read 4 values in one go
+ movaps xmm2, xmm0 ; keep a copy of the original data
+ andps xmm0, xmm1 ; set the fourth value to zero in xmm0
+ andnps xmm1, xmm2 ; copy fourth value to xmm1 and set rest to zero
+ cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
+ cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
+ orps xmm0, xmm1 ; combine all 4 values again
+ movups [rcx], xmm0 ; write 3 rounded and 1 unchanged values back to memory
+ ret
+ qsnapvectorsse ENDP
+
+ELSE
+
+ qsnapvectorsse PROC
+ mov eax, dword ptr 4[esp] ; store address of vector in eax
+ movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
+ movups xmm0, [eax] ; here is stored our vector. Read 4 values in one go
+ movaps xmm2, xmm0 ; keep a copy of the original data
+ andps xmm0, xmm1 ; set the fourth value to zero in xmm0
+ andnps xmm1, xmm2 ; copy fourth value to xmm1 and set rest to zero
+ cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
+ cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
+ orps xmm0, xmm1 ; combine all 4 values again
+ movups [eax], xmm0 ; write 3 rounded and 1 unchanged values back to memory
+ ret
+ qsnapvectorsse ENDP
+
+ qroundx87 macro src
+ fld dword ptr src
+ fistp dword ptr src
+ fild dword ptr src
+ fstp dword ptr src
+ endm
+
+ qsnapvectorx87 PROC
+ mov eax, dword ptr 4[esp]
+ qroundx87 [eax]
+ qroundx87 4[eax]
+ qroundx87 8[eax]
+ ret
+ qsnapvectorx87 ENDP
+
+ENDIF
+
+end
diff --git a/src/asm/snapvector.c b/src/asm/snapvector.c
new file mode 100644
index 0000000..febbee0
--- /dev/null
+++ b/src/asm/snapvector.c
@@ -0,0 +1,75 @@
+/*
+===========================================================================
+Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+Copyright (C) 2000-2013 Darklegion Development
+Copyright (C) 2015-2019 GrangerHub
+
+This file is part of Tremulous.
+
+Tremulous is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+Tremulous is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Tremulous; if not, see <https://www.gnu.org/licenses/>
+
+===========================================================================
+*/
+
+#include "qasm-inline.h"
+#include "qcommon/q_shared.h"
+
+/*
+ * GNU inline asm version of qsnapvector
+ * See MASM snapvector.asm for commentary
+ */
+
+static unsigned char ssemask[16] __attribute__((aligned(16))) =
+{
+ "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00"
+};
+
+void qsnapvectorsse(vec3_t vec)
+{
+ __asm__ volatile
+ (
+ "movaps (%0), %%xmm1\n"
+ "movups (%1), %%xmm0\n"
+ "movaps %%xmm0, %%xmm2\n"
+ "andps %%xmm1, %%xmm0\n"
+ "andnps %%xmm2, %%xmm1\n"
+ "cvtps2dq %%xmm0, %%xmm0\n"
+ "cvtdq2ps %%xmm0, %%xmm0\n"
+ "orps %%xmm1, %%xmm0\n"
+ "movups %%xmm0, (%1)\n"
+ :
+ : "r" (ssemask), "r" (vec)
+ : "memory", "%xmm0", "%xmm1", "%xmm2"
+ );
+
+}
+
+#define QROUNDX87(src) \
+ "flds " src "\n" \
+ "fistpl " src "\n" \
+ "fildl " src "\n" \
+ "fstps " src "\n"
+
+void qsnapvectorx87(vec3_t vec)
+{
+ __asm__ volatile
+ (
+ QROUNDX87("(%0)")
+ QROUNDX87("4(%0)")
+ QROUNDX87("8(%0)")
+ :
+ : "r" (vec)
+ : "memory"
+ );
+}
diff --git a/src/asm/snd_mixa.s b/src/asm/snd_mixa.s
new file mode 100644
index 0000000..ebae0a4
--- /dev/null
+++ b/src/asm/snd_mixa.s
@@ -0,0 +1,217 @@
+/*
+===========================================================================
+Copyright (C) 1999-2005 Id Software, Inc.
+Copyright (C) 2015-2019 GrangerHub
+
+This file is part of Tremulous.
+
+Tremulous is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+Tremulous is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Tremulous; if not, see <https://www.gnu.org/licenses/>
+
+===========================================================================
+*/
+//
+// snd_mixa.s
+// x86 assembly-language sound code
+//
+
+#include "qasm.h"
+
+#if id386
+
+ .text
+
+#if 0
+//----------------------------------------------------------------------
+// 8-bit sound-mixing code
+//----------------------------------------------------------------------
+
+#define ch 4+16
+#define sc 8+16
+#define count 12+16
+
+.globl C(S_PaintChannelFrom8)
+C(S_PaintChannelFrom8):
+ pushl %esi // preserve register variables
+ pushl %edi
+ pushl %ebx
+ pushl %ebp
+
+// int data;
+// short *lscale, *rscale;
+// unsigned char *sfx;
+// int i;
+
+ movl ch(%esp),%ebx
+ movl sc(%esp),%esi
+
+// if (ch->leftvol > 255)
+// ch->leftvol = 255;
+// if (ch->rightvol > 255)
+// ch->rightvol = 255;
+ movl ch_leftvol(%ebx),%eax
+ movl ch_rightvol(%ebx),%edx
+ cmpl $255,%eax
+ jna LLeftSet
+ movl $255,%eax
+LLeftSet:
+ cmpl $255,%edx
+ jna LRightSet
+ movl $255,%edx
+LRightSet:
+
+// lscale = snd_scaletable[ch->leftvol >> 3];
+// rscale = snd_scaletable[ch->rightvol >> 3];
+// sfx = (signed char *)sc->data + ch->pos;
+// ch->pos += count;
+ andl $0xF8,%eax
+ addl $20,%esi
+ movl (%esi),%esi
+ andl $0xF8,%edx
+ movl ch_pos(%ebx),%edi
+ movl count(%esp),%ecx
+ addl %edi,%esi
+ shll $7,%eax
+ addl %ecx,%edi
+ shll $7,%edx
+ movl %edi,ch_pos(%ebx)
+ addl $(C(snd_scaletable)),%eax
+ addl $(C(snd_scaletable)),%edx
+ subl %ebx,%ebx
+ movb -1(%esi,%ecx,1),%bl
+
+ testl $1,%ecx
+ jz LMix8Loop
+
+ movl (%eax,%ebx,4),%edi
+ movl (%edx,%ebx,4),%ebp
+ addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
+ addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
+ movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
+ movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
+ movb -2(%esi,%ecx,1),%bl
+
+ decl %ecx
+ jz LDone
+
+// for (i=0 ; i<count ; i++)
+// {
+LMix8Loop:
+
+// data = sfx[i];
+// paintbuffer[i].left += lscale[data];
+// paintbuffer[i].right += rscale[data];
+ movl (%eax,%ebx,4),%edi
+ movl (%edx,%ebx,4),%ebp
+ addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
+ addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
+ movb -2(%esi,%ecx,1),%bl
+ movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
+ movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
+
+ movl (%eax,%ebx,4),%edi
+ movl (%edx,%ebx,4),%ebp
+ movb -3(%esi,%ecx,1),%bl
+ addl C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size),%edi
+ addl C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size),%ebp
+ movl %edi,C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size)
+ movl %ebp,C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size)
+
+// }
+ subl $2,%ecx
+ jnz LMix8Loop
+
+LDone:
+ popl %ebp
+ popl %ebx
+ popl %edi
+ popl %esi
+
+ ret
+
+
+#endif
+
+//----------------------------------------------------------------------
+// Transfer of stereo buffer to 16-bit DMA buffer code
+//----------------------------------------------------------------------
+
+.globl C(S_WriteLinearBlastStereo16)
+C(S_WriteLinearBlastStereo16):
+ pushl %edi
+ pushl %ebx
+
+// int i;
+// int val;
+ movl C(snd_linear_count),%ecx
+ movl C(snd_p),%ebx
+ movl C(snd_out),%edi
+
+// for (i=0 ; i<snd_linear_count ; i+=2)
+// {
+LWLBLoopTop:
+
+// val = (snd_p[i]*snd_vol)>>8;
+// if (val > 0x7fff)
+// snd_out[i] = 0x7fff;
+// else if (val < (short)0x8000)
+// snd_out[i] = (short)0x8000;
+// else
+// snd_out[i] = val;
+ movl -8(%ebx,%ecx,4),%eax
+ sarl $8,%eax
+ cmpl $0x7FFF,%eax
+ jg LClampHigh
+ cmpl $0xFFFF8000,%eax
+ jnl LClampDone
+ movl $0xFFFF8000,%eax
+ jmp LClampDone
+LClampHigh:
+ movl $0x7FFF,%eax
+LClampDone:
+
+// val = (snd_p[i+1]*snd_vol)>>8;
+// if (val > 0x7fff)
+// snd_out[i+1] = 0x7fff;
+// else if (val < (short)0x8000)
+// snd_out[i+1] = (short)0x8000;
+// else
+// snd_out[i+1] = val;
+ movl -4(%ebx,%ecx,4),%edx
+ sarl $8,%edx
+ cmpl $0x7FFF,%edx
+ jg LClampHigh2
+ cmpl $0xFFFF8000,%edx
+ jnl LClampDone2
+ movl $0xFFFF8000,%edx
+ jmp LClampDone2
+LClampHigh2:
+ movl $0x7FFF,%edx
+LClampDone2:
+ shll $16,%edx
+ andl $0xFFFF,%eax
+ orl %eax,%edx
+ movl %edx,-4(%edi,%ecx,2)
+
+// }
+ subl $2,%ecx
+ jnz LWLBLoopTop
+
+// snd_p += snd_linear_count;
+
+ popl %ebx
+ popl %edi
+
+ ret
+
+#endif // id386
diff --git a/src/asm/vm_x86_64.asm b/src/asm/vm_x86_64.asm
new file mode 100644
index 0000000..692cee9
--- /dev/null
+++ b/src/asm/vm_x86_64.asm
@@ -0,0 +1,59 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
+; Copyright (C) 2015-2019 GrangerHub
+;
+; This file is part of Tremulous.
+;
+; Tremulous is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 3 of the License,
+; or (at your option) any later version.
+;
+; Tremulous is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Tremulous; if not, see <https://www.gnu.org/licenses/>
+;
+; ===========================================================================
+
+; Call wrapper for vm_x86 when built with MSVC in 64 bit mode,
+; since MSVC does not support inline x64 assembler code anymore.
+;
+; assumes __fastcall calling convention
+
+.code
+
+; Call to compiled code after setting up the register environment for the VM
+; prototype:
+; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
+
+qvmcall64 PROC
+ push rsi ; push non-volatile registers to stack
+ push rdi
+ push rbx
+ ; need to save pointer in rcx so we can write back the programData value to caller
+ push rcx
+
+ ; registers r8 and r9 have correct value already thanx to __fastcall
+ xor rbx, rbx ; opStackOfs starts out being 0
+ mov rdi, rdx ; opStack
+ mov esi, dword ptr [rcx] ; programStack
+
+ call qword ptr [r8] ; instructionPointers[0] is also the entry point
+
+ pop rcx
+
+ mov dword ptr [rcx], esi ; write back the programStack value
+ mov al, bl ; return opStack offset
+
+ pop rbx
+ pop rdi
+ pop rsi
+
+ ret
+qvmcall64 ENDP
+
+end