summaryrefslogtreecommitdiff
path: root/src/unix/matha.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/unix/matha.s')
-rw-r--r--src/unix/matha.s425
1 files changed, 425 insertions, 0 deletions
diff --git a/src/unix/matha.s b/src/unix/matha.s
new file mode 100644
index 0000000..d8ddb55
--- /dev/null
+++ b/src/unix/matha.s
@@ -0,0 +1,425 @@
+/*
+===========================================================================
+Copyright (C) 1999-2005 Id Software, Inc.
+Copyright (C) 2000-2006 Tim Angus
+
+This file is part of Tremulous.
+
+Tremulous is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the License,
+or (at your option) any later version.
+
+Tremulous is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Foobar; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+===========================================================================
+*/
+//
+// math.s
+// x86 assembly-language math routines.
+
+#include "qasm.h"
+
+
+#if id386
+
+ .data
+
+ .align 4
+Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
+ .long Lcase4, Lcase5, Lcase6, Lcase7
+
+ .text
+
+// TODO: rounding needed?
+// stack parameter offset
+#define val 4
+
+.globl C(Invert24To16)
+C(Invert24To16):
+
+ movl val(%esp),%ecx
+ movl $0x100,%edx // 0x10000000000 as dividend
+ cmpl %edx,%ecx
+ jle LOutOfRange
+
+ subl %eax,%eax
+ divl %ecx
+
+ ret
+
+LOutOfRange:
+ movl $0xFFFFFFFF,%eax
+ ret
+
+#if 0
+
+#define in 4
+#define out 8
+
+ .align 2
+.globl C(TransformVector)
+C(TransformVector):
+ movl in(%esp),%eax
+ movl out(%esp),%edx
+
+ flds (%eax) // in[0]
+ fmuls C(vright) // in[0]*vright[0]
+ flds (%eax) // in[0] | in[0]*vright[0]
+ fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0]
+ flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0]
+ fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
+
+ flds 4(%eax) // in[1] | ...
+ fmuls C(vright)+4 // in[1]*vright[1] | ...
+ flds 4(%eax) // in[1] | in[1]*vright[1] | ...
+ fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ...
+ flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
+ fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
+ fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
+
+ faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ...
+ faddp %st(0),%st(3) // in[1]*vpn[1] | ...
+ faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
+
+ flds 8(%eax) // in[2] | ...
+ fmuls C(vright)+8 // in[2]*vright[2] | ...
+ flds 8(%eax) // in[2] | in[2]*vright[2] | ...
+ fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ...
+ flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
+ fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
+ fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
+
+ faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ...
+ faddp %st(0),%st(3) // in[2]*vpn[2] | ...
+ faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
+
+ fstps 8(%edx) // out[2]
+ fstps 4(%edx) // out[1]
+ fstps (%edx) // out[0]
+
+ ret
+
+#endif
+
+#define EMINS 4+4
+#define EMAXS 4+8
+#define P 4+12
+
+ .align 2
+.globl C(BoxOnPlaneSide)
+C(BoxOnPlaneSide):
+ pushl %ebx
+
+ movl P(%esp),%edx
+ movl EMINS(%esp),%ecx
+ xorl %eax,%eax
+ movl EMAXS(%esp),%ebx
+ movb pl_signbits(%edx),%al
+ cmpb $8,%al
+ jge Lerror
+ flds pl_normal(%edx) // p->normal[0]
+ fld %st(0) // p->normal[0] | p->normal[0]
+ // bk000422 - warning: missing prefix `*' in absolute indirect address, maybe misassembled!
+ // bk001129 - fix from Andrew Henderson, was: Ljmptab(,%eax,4)
+ jmp *Ljmptab(,%eax,4)
+
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+Lcase0:
+ fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0]
+ flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] |
+ // p->normal[0]
+ fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] |
+ // p->normal[1]
+ fmuls (%ecx) // p->normal[0]*emins[0] |
+ // p->normal[0]*emaxs[0] | p->normal[1]
+ fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fld %st(0) // p->normal[1] | p->normal[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] |
+ // p->normal[1] | p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] |
+ // p->normal[2] | p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fmuls 4(%ecx) // p->normal[1]*emins[1] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[2] | p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fld %st(0) // p->normal[2] | p->normal[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fmuls 8(%ebx) // p->normal[2]*emaxs[2] |
+ // p->normal[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fxch %st(5) // p->normal[0]*emins[0] |
+ // p->normal[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[2]*emaxs[2]
+ faddp %st(0),%st(3) //p->normal[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0] |
+ // p->normal[2]*emaxs[2]
+ fmuls 8(%ecx) //p->normal[2]*emins[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0] |
+ // p->normal[2]*emaxs[2]
+ fxch %st(1) //p->normal[1]*emaxs[1] |
+ // p->normal[2]*emins[2] |
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0] |
+ // p->normal[2]*emaxs[2]
+ faddp %st(0),%st(3) //p->normal[2]*emins[2] |
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
+ // p->normal[2]*emaxs[2]
+ fxch %st(3) //p->normal[2]*emaxs[2] +
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
+ // p->normal[2]*emins[2]
+ faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // dist1 | p->normal[2]*emins[2]
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+Lcase1:
+ fmuls (%ecx) // emins[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ebx) // emaxs[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ebx) // emaxs[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ecx) // emins[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+Lcase2:
+ fmuls (%ebx) // emaxs[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ecx) // emins[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ecx) // emins[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ebx) // emaxs[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+Lcase3:
+ fmuls (%ecx) // emins[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ebx) // emaxs[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ecx) // emins[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ebx) // emaxs[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+Lcase4:
+ fmuls (%ebx) // emaxs[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ecx) // emins[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ebx) // emaxs[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ecx) // emins[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+Lcase5:
+ fmuls (%ecx) // emins[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ebx) // emaxs[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ebx) // emaxs[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ecx) // emins[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+Lcase6:
+ fmuls (%ebx) // emaxs[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ecx) // emins[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ecx) // emins[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ebx) // emaxs[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+Lcase7:
+ fmuls (%ecx) // emins[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ebx) // emaxs[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ecx) // emins[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ebx) // emaxs[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+LSetSides:
+
+// sides = 0;
+// if (dist1 >= p->dist)
+// sides = 1;
+// if (dist2 < p->dist)
+// sides |= 2;
+
+ faddp %st(0),%st(2) // dist1 | dist2
+ fcomps pl_dist(%edx)
+ xorl %ecx,%ecx
+ fnstsw %ax
+ fcomps pl_dist(%edx)
+ andb $1,%ah
+ xorb $1,%ah
+ addb %ah,%cl
+
+ fnstsw %ax
+ andb $1,%ah
+ addb %ah,%ah
+ addb %ah,%cl
+
+// return sides;
+
+ popl %ebx
+ movl %ecx,%eax // return status
+
+ ret
+
+
+Lerror:
+ movl 1, %eax
+ ret
+
+#endif // id386