diff options
Diffstat (limited to 'ioq3-r437/src/unix/linux_common.c')
-rw-r--r-- | ioq3-r437/src/unix/linux_common.c | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/ioq3-r437/src/unix/linux_common.c b/ioq3-r437/src/unix/linux_common.c new file mode 100644 index 00000000..ab1eafa6 --- /dev/null +++ b/ioq3-r437/src/unix/linux_common.c @@ -0,0 +1,346 @@ +#if 0 // not used anymore +/* +=========================================================================== +Copyright (C) 1999-2005 Id Software, Inc. + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ +/** + * GAS syntax equivalents of the MSVC asm memory calls in common.c + * + * The following changes have been made to the asm: + * 1. Registers are loaded by the inline asm arguments when possible + * 2. Labels have been changed to local label format (0,1,etc.) to allow inlining + * + * HISTORY: + * AH - Created on 08 Dec 2000 + */ + +#include <unistd.h> // AH - for size_t +#include <string.h> + +// bk001207 - we need something under Linux, too. Mac? +#if 1 // defined(C_ONLY) // bk010102 - dedicated? +void Com_Memcpy (void* dest, const void* src, const size_t count) { + memcpy(dest, src, count); +} + +void Com_Memset (void* dest, const int val, const size_t count) { + memset(dest, val, count); +} + +#else + +typedef enum { + PRE_READ, // prefetch assuming that buffer is used for reading only + PRE_WRITE, // prefetch assuming that buffer is used for writing only + PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing +} e_prefetch; + +void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type); + +void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) { + // MMX version not used on standard Pentium MMX + // because the dword version is faster (with + // proper destination prefetching) + __asm__ __volatile__ (" \ + //mov eax,constant // eax = val \ + //mov edx,dest // dest \ + //mov ecx,count \ + movd %%eax, %%mm0 \ + punpckldq %%mm0, %%mm0 \ +\ + // ensure that destination is qword aligned \ +\ + testl $7, %%edx // qword padding?\ + jz 0f \ + movl %%eax, (%%edx) \ + decl %%ecx \ + addl $4, %%edx \ +\ +0: movl %%ecx, %%ebx \ + andl $0xfffffff0, %%ecx \ + jz 2f \ + jmp 1f \ + .align 16 \ +\ + // funny ordering here to avoid commands \ + // that cross 32-byte boundaries (the \ + // [edx+0] version has a special 3-byte opcode... \ +1: movq %%mm0, 8(%%edx) \ + movq %%mm0, 16(%%edx) \ + movq %%mm0, 24(%%edx) \ + movq %%mm0, 32(%%edx) \ + movq %%mm0, 40(%%edx) \ + movq %%mm0, 48(%%edx) \ + movq %%mm0, 56(%%edx) \ + movq %%mm0, (%%edx)\ + addl $64, %%edx \ + subl $16, %%ecx \ + jnz 1b \ +2: \ + movl %%ebx, %%ecx // ebx = cnt \ + andl $0xfffffff0, %%ecx // ecx = cnt&~15 \ + subl %%ecx, %%ebx \ + jz 6f \ + cmpl $8, %%ebx \ + jl 3f \ +\ + movq %%mm0, (%%edx) \ + movq %%mm0, 8(%%edx) \ + movq %%mm0, 16(%%edx) \ + movq %%mm0, 24(%%edx) \ + addl $32, %%edx \ + subl $8, %%ebx \ + jz 6f \ +\ +3: cmpl $4, %%ebx \ + jl 4f \ + \ + movq %%mm0, (%%edx) \ + movq %%mm0, 8(%%edx) \ + addl $16, %%edx \ + subl $4, %%ebx \ +\ +4: cmpl $2, %%ebx \ + jl 5f \ + movq %%mm0, (%%edx) \ + addl $8, %%edx \ + subl $2, %%ebx \ +\ +5: cmpl $1, %%ebx \ + jl 6f \ + movl %%eax, (%%edx) \ +6: \ + emms \ + " + : : "a" (constant), "c" (count), "d" (dest) + : "%ebx", "%edi", "%esi", "cc", "memory"); +} + +// optimized memory copy routine that handles all alignment +// cases and block sizes efficiently +void Com_Memcpy (void* dest, const void* src, const size_t count) { + Com_Prefetch (src, count, PRE_READ); + __asm__ __volatile__ (" \ + pushl %%edi \ + pushl %%esi \ + //mov ecx,count \ + cmpl $0, %%ecx // count = 0 check (just to be on the safe side) \ + je 6f \ + //mov edx,dest \ + movl %0, %%ebx \ + cmpl $32, %%ecx // padding only? \ + jl 1f \ +\ + movl %%ecx, %%edi \ + andl $0xfffffe00, %%edi // edi = count&~31 \ + subl $32, %%edi \ +\ + .align 16 \ +0: \ + movl (%%ebx, %%edi, 1), %%eax \ + movl 4(%%ebx, %%edi, 1), %%esi \ + movl %%eax, (%%edx, %%edi, 1) \ + movl %%esi, 4(%%edx, %%edi, 1) \ + movl 8(%%ebx, %%edi, 1), %%eax \ + movl 12(%%ebx, %%edi, 1), %%esi \ + movl %%eax, 8(%%edx, %%edi, 1) \ + movl %%esi, 12(%%edx, %%edi, 1) \ + movl 16(%%ebx, %%edi, 1), %%eax \ + movl 20(%%ebx, %%edi, 1), %%esi \ + movl %%eax, 16(%%edx, %%edi, 1) \ + movl %%esi, 20(%%edx, %%edi, 1) \ + movl 24(%%ebx, %%edi, 1), %%eax \ + movl 28(%%ebx, %%edi, 1), %%esi \ + movl %%eax, 24(%%edx, %%edi, 1) \ + movl %%esi, 28(%%edx, %%edi, 1) \ + subl $32, %%edi \ + jge 0b \ + \ + movl %%ecx, %%edi \ + andl $0xfffffe00, %%edi \ + addl %%edi, %%ebx // increase src pointer \ + addl %%edi, %%edx // increase dst pointer \ + andl $31, %%ecx // new count \ + jz 6f // if count = 0, get outta here \ +\ +1: \ + cmpl $16, %%ecx \ + jl 2f \ + movl (%%ebx), %%eax \ + movl %%eax, (%%edx) \ + movl 4(%%ebx), %%eax \ + movl %%eax, 4(%%edx) \ + movl 8(%%ebx), %%eax \ + movl %%eax, 8(%%edx) \ + movl 12(%%ebx), %%eax \ + movl %%eax, 12(%%edx) \ + subl $16, %%ecx \ + addl $16, %%ebx \ + addl $16, %%edx \ +2: \ + cmpl $8, %%ecx \ + jl 3f \ + movl (%%ebx), %%eax \ + movl %%eax, (%%edx) \ + movl 4(%%ebx), %%eax \ + subl $8, %%ecx \ + movl %%eax, 4(%%edx) \ + addl $8, %%ebx \ + addl $8, %%edx \ +3: \ + cmpl $4, %%ecx \ + jl 4f \ + movl (%%ebx), %%eax // here 4-7 bytes \ + addl $4, %%ebx \ + subl $4, %%ecx \ + movl %%eax, (%%edx) \ + addl $4, %%edx \ +4: // 0-3 remaining bytes \ + cmpl $2, %%ecx \ + jl 5f \ + movw (%%ebx), %%ax // two bytes \ + cmpl $3, %%ecx // less than 3? \ + movw %%ax, (%%edx) \ + jl 6f \ + movb 2(%%ebx), %%al // last byte \ + movb %%al, 2(%%edx) \ + jmp 6f \ +5: \ + cmpl $1, %%ecx \ + jl 6f \ + movb (%%ebx), %%al \ + movb %%al, (%%edx) \ +6: \ + popl %%esi \ + popl %%edi \ + " + : : "m" (src), "d" (dest), "c" (count) + : "%eax", "%ebx", "%edi", "%esi", "cc", "memory"); +} + +void Com_Memset (void* dest, const int val, const size_t count) +{ + unsigned int fillval; + + if (count < 8) + { + __asm__ __volatile__ (" \ + //mov edx,dest \ + //mov eax, val \ + movb %%al, %%ah \ + movl %%eax, %%ebx \ + andl $0xffff, %%ebx \ + shll $16, %%eax \ + addl %%ebx, %%eax // eax now contains pattern \ + //mov ecx,count \ + cmpl $4, %%ecx \ + jl 0f \ + movl %%eax, (%%edx) // copy first dword \ + addl $4, %%edx \ + subl $4, %%ecx \ + 0: cmpl $2, %%ecx \ + jl 1f \ + movw %%ax, (%%edx) // copy 2 bytes \ + addl $2, %%edx \ + subl $2, %%ecx \ + 1: cmpl $0, %%ecx \ + je 2f \ + movb %%al, (%%edx) // copy single byte \ + 2: \ + " + : : "d" (dest), "a" (val), "c" (count) + : "%ebx", "%edi", "%esi", "cc", "memory"); + + return; + } + + fillval = val; + + fillval = fillval|(fillval<<8); + fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern + + _copyDWord ((unsigned int*)(dest),fillval, count/4); + + __asm__ __volatile__ (" // padding of 0-3 bytes \ + //mov ecx,count \ + movl %%ecx, %%eax \ + andl $3, %%ecx \ + jz 1f \ + andl $0xffffff00, %%eax \ + //mov ebx,dest \ + addl %%eax, %%edx \ + movl %0, %%eax \ + cmpl $2, %%ecx \ + jl 0f \ + movw %%ax, (%%edx) \ + cmpl $2, %%ecx \ + je 1f \ + movb %%al, 2(%%edx) \ + jmp 1f \ +0: \ + cmpl $0, %%ecx\ + je 1f\ + movb %%al, (%%edx)\ +1: \ + " + : : "m" (fillval), "c" (count), "d" (dest) + : "%eax", "%ebx", "%edi", "%esi", "cc", "memory"); +} + +void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type) +{ + // write buffer prefetching is performed only if + // the processor benefits from it. Read and read/write + // prefetching is always performed. + + switch (type) + { + case PRE_WRITE : break; + case PRE_READ: + case PRE_READ_WRITE: + + __asm__ __volatile__ ("\ + //mov ebx,s\ + //mov ecx,bytes\ + cmpl $4096, %%ecx // clamp to 4kB\ + jle 0f\ + movl $4096, %%ecx\ + 0:\ + addl $0x1f, %%ecx\ + shrl $5, %%ecx // number of cache lines\ + jz 2f\ + jmp 1f\ +\ + .align 16\ + 1: testb %%al, (%%edx)\ + addl $32, %%edx\ + decl %%ecx\ + jnz 1b\ + 2:\ + " + : : "d" (s), "c" (bytes) + : "%eax", "%ebx", "%edi", "%esi", "memory", "cc"); + + break; + } +} + +#endif +#endif |